eras.pl (11484B)
1 #!/usr/bin/env -S swipl --quiet 2 3 % This is an assembler for the Eris SOC. The program takes two command 4 % line arguments: The first argument is the assembler program. Second 5 % argument is the target binary. 6 % 7 % Besides writing the target file, the assembler outputs the 8 % result. Lines in the output start with the current memory 9 % adress. Opcodes are followed by their binary representation in round 10 % brackets. Label references are followed by their memory location in 11 % square brackets. 12 % 13 % The assembler knows one directive: .start_of_rom sets the 14 % current memory address to $80. This is the start address of the ROM. 15 % In order to assemble a program stored in ROM, this directive should 16 % precede the actual code. 17 % 18 % Copyright 2022 Gerd Beuster (gerd@frombelow.net). This is free soft 19 % under the GNU GPL v3 license or any later version. See COPYING in 20 % the root directory for details. 21 22 23 % 24 % Load opcodes generated by microcode compiler. 25 % 26 27 :- consult("opcodes.pl"). 28 29 30 :- use_module(library(dcg/basics)). 31 32 % The actual main predicate. The output file is only written if the 33 % assembly was successful. 34 assemble_file(SourceFile, TargetFile) :- 35 read_file_to_string(SourceFile, S, []), 36 string_to_list(S, L), 37 % Step 1: Tokenize the input 38 tokenize(1, _, Parsed, L, []), 39 % Step 2: Assemble it 40 assemble(Assembled, labels{}, Labels, 0, _, Parsed, []), 41 % Step 3: De-reference labels 42 dereferenceLabels(Assembled, Final, Labels), 43 !, 44 % Write result 45 get_binary(Final, Binary), 46 !, 47 check_for_errors(Binary), 48 save_binary(TargetFile, Binary), 49 write("$00: "), 50 print_map(Final, []). 51 52 % We write a hex file as output. 53 save_binary(TargetFile, Binary) :- 54 open(TargetFile, write, S), 55 write_hex(S, Binary), 56 close(S). 57 write_hex(_, []). 58 write_hex(S, [F|R]) :- 59 format(S, '~|~`0t~16R~2+ ', F), 60 write_hex(S, R). 61 62 % If the list of binary values resulting from assembly contains 63 % the atom 'error', then an error occured in the assembly process. 64 check_for_errors([]). 65 check_for_errors([error|_]) :- !, fail. 66 check_for_errors([_|R]) :- check_for_errors(R). 67 68 % Write source code together with binary representation to standard 69 % out. 70 print_map --> print_command, print_map. 71 print_map --> print_command. 72 print_command --> [(newline, _, bytePosition(BytePos), _)], 73 { format('\n$~|~`0t~16R~2+: ', [BytePos]) }. 74 print_command --> [(labelDefinition(L), _, _, _)], 75 { format('~w:', L) }. 76 print_command --> [(opcode(P), _, _, assembly(A))], 77 { format(' ~w [$~|~`0t~16R~2+]', [P, A]) }. 78 print_command --> [(number(N, direct), _, _, _)], 79 { format(' $~|~`0t~16R~2+', [N]) }. 80 print_command --> [(number(N, immediate), _, _, _)], 81 { format(' #$~|~`0t~16R~2+', [N]) }. 82 print_command --> [(number(N, indirect), _, _, _)], 83 { format(' ($~|~`0t~16R~2+)', [N]) }. 84 print_command --> [(comment(C), _, _, _)], 85 { format(' //~w', [C]) }. 86 print_command --> [(labelReference(E, direct), _, _, assembly(A))], 87 { format(' :~w [$~|~`0t~16R~2+]', [E, A]) }. 88 print_command --> [(labelReference(E, immediate), _, _, assembly(A))], 89 { format(' #:~w [$~|~`0t~16R~2+]', [E, A]) }. 90 print_command --> [(labelReference(E, indirect), _, _, assembly(A))], 91 { format(' (:~w [$~|~`0t~16R~2+])', [E, A]) }. 92 print_command --> [(assembler_directive(D), _, _, _)], 93 { format('.~w', [D]) }. 94 95 % The result of the assembly is a list of tokens augmented by their 96 % binary representation. Here we remove everything but the binary 97 % representation from the list elements, resulting in a sequence of 98 % bytes. 99 get_binary([], []). 100 get_binary([(_, _, _, assembly(A))|R], [A|B]) :- 101 get_binary(R, B). 102 get_binary([(_, _, _, assembly())|R], B) :- 103 get_binary(R, B). 104 105 106 % Predicate assemble is the parser. It takes the tuples returned 107 % by the tokenizer and augments them by the corresponding byte 108 % representation in term assembly/1. 109 % 110 % When translating the tokens to their byte representation, a list of 111 % labels and their representation in memory is created. These 112 % variables have names like L0 and L1, for the labels before/after a 113 % token is processed. In order to associate the labels with their 114 % memory location, we keep track of the current memory location in the 115 % way in varaibles named like B0 (processing the next token) and B1 116 % (processing the next token). The actual variable names may vary. 117 118 assemble(A, L0, L2, B0, B2) --> 119 command(F, L0, L1, B0, B1), 120 assemble(R, L1, L2, B1, B2), 121 { append(F, R, A) }. 122 assemble(F, L0, L1, B0, B1) --> command(F, L0, L1, B0, B1). 123 124 % Newslines and comments do not generate assembler code. 125 command([(newline, lineNumber(N), bytePosition(B), assembly())], 126 L, L, B, B) --> 127 [(newline, lineNumber(N))]. 128 command([(comment(S), lineNumber(N), bytePosition(B), assembly())], 129 L, L, B, B) --> 130 [(comment(S), lineNumber(N))]. 131 % Label references are replaced by the (1 byte) address of the label, 132 % therefore the byte counter advances by 1. The symbolic label is 133 % stored as the "byte code" in assembly/1. In the second pass, these 134 % references are resolved by the number representing the absolute 135 % position. 136 command([(labelReference(S, M), lineNumber(N), bytePosition(B), assembly(S))], 137 L, L, B, B1) --> 138 [(labelReference(S, M), lineNumber(N))], 139 { B1 is B + 1 }. 140 % Number are very similar to label references. The difference is that 141 % we can already generate assemble code for them; no future 142 % de-referencing required. 143 command([(number(X, M), lineNumber(N), bytePosition(B), assembly(X))], 144 L, L, B, B1) --> 145 [(number(X, M), lineNumber(N))], 146 { B1 is B + 1 }. 147 % Label definitions refer to the current byte position. We store the 148 % association of label and byte position. 149 command([(labelDefinition(S), lineNumber(N), bytePosition(B), assembly())], 150 L0, L1, B, B) --> 151 [(labelDefinition(S), lineNumber(N))], 152 { L1 = L0.put(S, B) }. 153 154 % Opcodes may or may not be followed by an argument (number or label 155 % reference). Since the argument indicates the addressing mode of the 156 % opcode, we have to parse them together. 157 % - Two byte opcodes 158 command([(opcode(S), lineNumber(LN0), bytePosition(B), assembly(OpByte)), 159 (number(X, M), lineNumber(LN1), bytePosition(B1), assembly(X))], 160 L, L, B, B2) --> 161 [(opcode(S), lineNumber(LN0)), 162 (number(X, M), lineNumber(LN1))], 163 { opcode_to_byte(S, M, OpByte), 164 B1 is B + 1, 165 B2 is B + 2 }. 166 command([(opcode(S), lineNumber(LN0), bytePosition(B), assembly(OpByte)), 167 (labelReference(X, M), lineNumber(LN1), bytePosition(B1), assembly(X))], 168 L, L, B, B2) --> 169 [(opcode(S), lineNumber(LN0)), 170 (labelReference(X, M), lineNumber(LN1))], 171 { opcode_to_byte(S, M, OpByte), 172 B1 is B + 1, 173 B2 is B + 2 }. 174 % - One byte opcodes 175 command([(opcode(S), lineNumber(LN0), bytePosition(B), assembly(OpByte))], 176 L, L, B, B1) --> 177 [(opcode(S), lineNumber(LN0))], 178 { opcode_to_byte(S, OpByte), 179 B1 is B + 1 }. 180 % - Unkown opcodes 181 command([(opcode(S), lineNumber(LN0), bytePosition(B), assembly(error))], 182 L, L, B, B) --> 183 [(opcode(S), lineNumber(LN0))], 184 { format('ERROR: Unknown opcode/addressing mode "~w" (line ~w)\n', [S, LN0]) }. 185 % Compiler directive set_address changes the current byte address. It 186 % does not generate any assembly code on its own. 187 command([(assembler_directive("set_address"), lineNumber(LN0), bytePosition(B1), 188 assembly()), 189 (number(B2, direct), lineNumber(LN1), bytePosition(B1), assembly())], 190 L, L, B1, B2) --> 191 [(assembler_directive("set_address"), lineNumber(LN0)), 192 (number(B2, direct), lineNumber(LN1))]. 193 194 % At this point, the assembly code is a list of bytes interspersed 195 % with label references. Here we replace the label references by the 196 % corresponding memory location. 197 dereferenceLabels([], [], _). 198 dereferenceLabels([(labelReference(L, M), lineNumber(N), bytePosition(P), 199 assembly(L))|RL], 200 [(labelReference(L, M), lineNumber(N), bytePosition(P), 201 assembly(B))|R], 202 Labels) :- 203 get_dict(L, Labels, B), 204 dereferenceLabels(RL, R, Labels). 205 dereferenceLabels([(labelReference(L, M), lineNumber(N), assembly(L))|RL], 206 [(labelReference(L, M), lineNumber(N), assembly(error))|R], 207 Labels) :- 208 format('ERROR: Unknown label "~w" (line ~w)\n', [L, N]), 209 dereferenceLabels(RL, R, Labels). 210 dereferenceLabels([F|R], [F|RD], L) :- 211 dereferenceLabels(R, RD, L). 212 213 % The tokenizer knows the following tokens: 214 % newline, comment, labelReference, labelDefinition, number, and 215 % opcode. The differen tokens are identified as follows: 216 % - Comments start with '//' and end at the end of the line. 217 % - Labels end with ':'. 218 % - Label references begin with ':'. 219 % - Numbers begin with '$'. 220 % - Everything else is an opcode. 221 % We store the line number of the token with the token for error reporting. 222 223 % LNum0 and LNum1 keep track of line numbers: LNum0 is the line number 224 % before parsing the next token, LNum1 is the line number after 225 % parsing the next token. 226 tokenize(LNum0, LNum1, [T]) --> token(LNum0, LNum1, T). 227 tokenize(LNum0, LNum2, [T|R]) --> 228 token(LNum0, LNum1, T), tokenize(LNum1, LNum2, R). 229 % When we parse a new line, the line number is incremented. 230 token(LNum0, LNum1, (newline, lineNumber(LNum0))) --> 231 whites, [10], {LNum1 is LNum0 + 1}. 232 % Parse assembler directives 233 token(LNum, LNum, (assembler_directive(A), lineNumber(LNum))) --> 234 whites, [0'.], string_without("\n \t", L), { string_to_list(A, L) }. 235 % Parse comments 236 token(LNum, LNum, (comment(C), lineNumber(LNum))) --> 237 whites, [0'/], [0'/], string_without("\n", L), { string_to_list(C, L) }. 238 % Label references - direct addressing 239 token(LNum, LNum, (labelReference(A, direct), lineNumber(LNum))) --> 240 whites, [0':], string_without("\n \t", L), 241 { string_to_list(S, L), atom_string(A, S) }. 242 % Label references - immediate addressing 243 token(LNum, LNum, (labelReference(A, immediate), lineNumber(LNum))) --> 244 whites, [0'#], [0':], string_without("\n \t", L), 245 { string_to_list(S, L), atom_string(A, S) }. 246 % Label references - indirect addressing 247 token(LNum, LNum, (labelReference(A, indirect), lineNumber(LNum))) --> 248 whites, [0'(], [0':], string_without("\n \t", L), [0')], 249 { string_to_list(S, L), atom_string(A, S) }. 250 % Label definitions 251 token(LNum, LNum, (labelDefinition(A), lineNumber(LNum))) --> 252 whites, string_without("\r\n \t:", L), [0':], 253 { string_to_list(S, L), atom_string(A, S) }. 254 % Parse numbers - direct addressing mode 255 token(LNum, LNum, (number(N, direct), lineNumber(LNum))) --> 256 whites, [0'$], xinteger(N). 257 % Parse numbers - immediate addressing mode 258 token(LNum, LNum, (number(N, immediate), lineNumber(LNum))) --> 259 whites, [0'#], [0'$], xinteger(N). 260 % Parse numbers - indirect addressing mode 261 token(LNum, LNum, (number(N, indirect), lineNumber(LNum))) --> 262 whites, [0'(], [0'$], xinteger(N), [0')]. 263 % Everything not parsed yet is an opcode 264 token(LNum, LNum, (opcode(P), lineNumber(LNum))) --> 265 whites, string_without("\n \t", L), { string_to_list(P, L) }. 266 267 268 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 269 270 :- initialization(main, main). 271 272 main([Source, Binary]) :- 273 !, 274 assemble_file(Source, Binary), 275 nl. 276 277 main(_) :- 278 writeln('Usage: eras.pl <SOURCE> <BINARY>').