eris2206

Documentation: http://frombelow.net/projects/eris2206/
Clone: git clone https://git.frombelow.net/eris2206.git
Log | Files | Refs | Submodules | README | LICENSE

eras.pl (11484B)


      1 #!/usr/bin/env -S swipl --quiet
      2 
      3 % This is an assembler for the Eris SOC. The program takes two command
      4 % line arguments: The first argument is the assembler program. Second
      5 % argument is the target binary.
      6 %
      7 % Besides writing the target file, the assembler outputs the
      8 % result. Lines in the output start with the current memory
      9 % adress. Opcodes are followed by their binary representation in round
     10 % brackets. Label references are followed by their memory location in
     11 % square brackets.
     12 %
     13 % The assembler knows one directive: .start_of_rom sets the
     14 % current memory address to $80. This is the start address of the ROM.
     15 % In order to assemble a program stored in ROM, this directive should
     16 % precede the actual code.
     17 %
     18 % Copyright 2022 Gerd Beuster (gerd@frombelow.net). This is free soft
     19 % under the GNU GPL v3 license or any later version. See COPYING in
     20 % the root directory for details.
     21 
     22 
     23 %
     24 % Load opcodes generated by microcode compiler.
     25 %
     26 
     27 :- consult("opcodes.pl").
     28 
     29 
     30 :- use_module(library(dcg/basics)).
     31 
     32 % The actual main predicate. The output file is only written if the
     33 % assembly was successful.
     34 assemble_file(SourceFile, TargetFile) :-
     35     read_file_to_string(SourceFile, S, []),
     36     string_to_list(S, L),
     37     % Step 1: Tokenize the input
     38     tokenize(1, _, Parsed, L, []),
     39     % Step 2: Assemble it
     40     assemble(Assembled, labels{}, Labels, 0, _, Parsed, []),
     41     % Step 3: De-reference labels
     42     dereferenceLabels(Assembled, Final, Labels),
     43     !,
     44     % Write result
     45     get_binary(Final, Binary),
     46     !,
     47     check_for_errors(Binary),
     48     save_binary(TargetFile, Binary),
     49     write("$00: "),
     50     print_map(Final, []).
     51 
     52 % We write a hex file as output.
     53 save_binary(TargetFile, Binary) :-
     54     open(TargetFile, write, S),
     55     write_hex(S, Binary),
     56     close(S).
     57 write_hex(_, []).
     58 write_hex(S, [F|R]) :-
     59     format(S, '~|~`0t~16R~2+ ', F),
     60     write_hex(S, R).
     61 
     62 % If the list of binary values resulting from assembly contains
     63 % the atom 'error', then an error occured in the assembly process.
     64 check_for_errors([]).
     65 check_for_errors([error|_]) :- !, fail.
     66 check_for_errors([_|R]) :- check_for_errors(R).
     67 
     68 % Write source code together with binary representation to standard
     69 % out.
     70 print_map --> print_command, print_map.
     71 print_map --> print_command.
     72 print_command --> [(newline, _, bytePosition(BytePos), _)],
     73                   { format('\n$~|~`0t~16R~2+: ', [BytePos]) }.
     74 print_command --> [(labelDefinition(L), _, _, _)],
     75                   { format('~w:', L) }.
     76 print_command --> [(opcode(P), _, _, assembly(A))],
     77                   { format(' ~w [$~|~`0t~16R~2+]', [P, A]) }.
     78 print_command --> [(number(N, direct), _, _, _)],
     79                   { format(' $~|~`0t~16R~2+', [N]) }.
     80 print_command --> [(number(N, immediate), _, _, _)],
     81                   { format(' #$~|~`0t~16R~2+', [N]) }.
     82 print_command --> [(number(N, indirect), _, _, _)],
     83                   { format(' ($~|~`0t~16R~2+)', [N]) }.
     84 print_command --> [(comment(C), _, _, _)],
     85                   { format(' //~w', [C]) }.
     86 print_command --> [(labelReference(E, direct), _, _, assembly(A))],
     87                   { format(' :~w [$~|~`0t~16R~2+]', [E, A]) }.
     88 print_command --> [(labelReference(E, immediate), _, _, assembly(A))],
     89                   { format(' #:~w [$~|~`0t~16R~2+]', [E, A]) }.
     90 print_command --> [(labelReference(E, indirect), _, _, assembly(A))],
     91                   { format(' (:~w [$~|~`0t~16R~2+])', [E, A]) }.
     92 print_command --> [(assembler_directive(D), _, _, _)],
     93                   { format('.~w', [D]) }.
     94 
     95 % The result of the assembly is a list of tokens augmented by their
     96 % binary representation. Here we remove everything but the binary
     97 % representation from the list elements, resulting in a sequence of
     98 % bytes.
     99 get_binary([], []).
    100 get_binary([(_, _, _, assembly(A))|R], [A|B]) :-
    101     get_binary(R, B).
    102 get_binary([(_, _, _, assembly())|R], B) :-
    103     get_binary(R, B).
    104 
    105 
    106 % Predicate assemble is the parser. It takes the tuples returned
    107 % by the tokenizer and augments them by the corresponding byte
    108 % representation in term assembly/1.
    109 %
    110 % When translating the tokens to their byte representation, a list of
    111 % labels and their representation in memory is created. These
    112 % variables have names like L0 and L1, for the labels before/after a
    113 % token is processed. In order to associate the labels with their
    114 % memory location, we keep track of the current memory location in the
    115 % way in varaibles named like B0 (processing the next token) and B1
    116 % (processing the next token). The actual variable names may vary.
    117 
    118 assemble(A, L0, L2, B0, B2) -->
    119     command(F, L0, L1, B0, B1),
    120     assemble(R, L1, L2, B1, B2),
    121     { append(F, R, A) }.
    122 assemble(F, L0, L1, B0, B1) --> command(F, L0, L1, B0, B1).
    123 
    124 % Newslines and comments do not generate assembler code.
    125 command([(newline, lineNumber(N), bytePosition(B), assembly())],
    126         L, L, B, B) -->
    127     [(newline, lineNumber(N))].
    128 command([(comment(S), lineNumber(N), bytePosition(B), assembly())],
    129         L, L, B, B) -->
    130     [(comment(S), lineNumber(N))].
    131 % Label references are replaced by the (1 byte) address of the label,
    132 % therefore the byte counter advances by 1. The symbolic label is
    133 % stored as the "byte code" in assembly/1. In the second pass, these
    134 % references are resolved by the number representing the absolute
    135 % position.
    136 command([(labelReference(S, M), lineNumber(N), bytePosition(B), assembly(S))],
    137         L, L, B, B1) -->
    138     [(labelReference(S, M), lineNumber(N))],
    139     { B1 is B + 1 }.
    140 % Number are very similar to label references. The difference is that
    141 % we can already generate assemble code for them; no future
    142 % de-referencing required.
    143 command([(number(X, M), lineNumber(N), bytePosition(B), assembly(X))],
    144         L, L, B, B1) -->
    145     [(number(X, M), lineNumber(N))],
    146     { B1 is B + 1 }.
    147 % Label definitions refer to the current byte position. We store the
    148 % association of label and byte position.
    149 command([(labelDefinition(S), lineNumber(N), bytePosition(B), assembly())],
    150         L0, L1, B, B) -->
    151     [(labelDefinition(S), lineNumber(N))],
    152     { L1 = L0.put(S, B) }.
    153 
    154 % Opcodes may or may not be followed by an argument (number or label
    155 % reference). Since the argument indicates the addressing mode of the
    156 % opcode, we have to parse them together.
    157 % - Two byte opcodes
    158 command([(opcode(S), lineNumber(LN0), bytePosition(B), assembly(OpByte)),
    159          (number(X, M), lineNumber(LN1), bytePosition(B1), assembly(X))],
    160         L, L, B, B2) -->
    161     [(opcode(S), lineNumber(LN0)),
    162      (number(X, M), lineNumber(LN1))],
    163     { opcode_to_byte(S, M, OpByte),
    164       B1 is B + 1,
    165       B2 is B + 2 }.
    166 command([(opcode(S), lineNumber(LN0), bytePosition(B), assembly(OpByte)),
    167          (labelReference(X, M), lineNumber(LN1), bytePosition(B1), assembly(X))],
    168         L, L, B, B2) -->
    169     [(opcode(S), lineNumber(LN0)),
    170      (labelReference(X, M), lineNumber(LN1))],
    171     { opcode_to_byte(S, M, OpByte),
    172       B1 is B + 1,
    173       B2 is B + 2 }.
    174 % - One byte opcodes
    175 command([(opcode(S), lineNumber(LN0), bytePosition(B), assembly(OpByte))],
    176         L, L, B, B1) -->
    177     [(opcode(S), lineNumber(LN0))],
    178     { opcode_to_byte(S, OpByte),
    179       B1 is B + 1 }.
    180 % - Unkown opcodes
    181 command([(opcode(S), lineNumber(LN0), bytePosition(B), assembly(error))],
    182         L, L, B, B) -->
    183     [(opcode(S), lineNumber(LN0))],
    184     { format('ERROR: Unknown opcode/addressing mode "~w" (line ~w)\n', [S, LN0]) }.
    185 % Compiler directive set_address changes the current byte address. It
    186 % does not generate any assembly code on its own.
    187 command([(assembler_directive("set_address"), lineNumber(LN0), bytePosition(B1),
    188           assembly()),
    189          (number(B2, direct), lineNumber(LN1), bytePosition(B1), assembly())],
    190         L, L, B1, B2) -->
    191     [(assembler_directive("set_address"), lineNumber(LN0)),
    192      (number(B2, direct), lineNumber(LN1))].
    193 
    194 % At this point, the assembly code is a list of bytes interspersed
    195 % with label references. Here we replace the label references by the
    196 % corresponding memory location.
    197 dereferenceLabels([], [], _).
    198 dereferenceLabels([(labelReference(L, M), lineNumber(N), bytePosition(P),
    199                     assembly(L))|RL],
    200 		          [(labelReference(L, M), lineNumber(N), bytePosition(P),
    201                     assembly(B))|R],
    202 		          Labels) :-
    203     get_dict(L, Labels, B),
    204     dereferenceLabels(RL, R, Labels).
    205 dereferenceLabels([(labelReference(L, M), lineNumber(N), assembly(L))|RL],
    206 		  [(labelReference(L, M), lineNumber(N), assembly(error))|R],
    207 		  Labels) :-
    208     format('ERROR: Unknown label "~w" (line ~w)\n', [L, N]),
    209     dereferenceLabels(RL, R, Labels).
    210 dereferenceLabels([F|R], [F|RD], L) :-
    211     dereferenceLabels(R, RD, L).
    212 
    213 % The tokenizer knows the following tokens:
    214 % newline, comment, labelReference, labelDefinition, number, and
    215 % opcode. The differen tokens are identified as follows:
    216 % - Comments start with '//' and end at the end of the line.
    217 % - Labels end with ':'.
    218 % - Label references begin with ':'.
    219 % - Numbers begin with '$'.
    220 % - Everything else is an opcode.
    221 % We store the line number of the token with the token for error reporting.
    222 
    223 % LNum0 and LNum1 keep track of line numbers: LNum0 is the line number
    224 % before parsing the next token, LNum1 is the line number after
    225 % parsing the next token.
    226 tokenize(LNum0, LNum1, [T]) --> token(LNum0, LNum1, T).
    227 tokenize(LNum0, LNum2, [T|R]) -->
    228     token(LNum0, LNum1, T), tokenize(LNum1, LNum2, R).
    229 % When we parse a new line, the line number is incremented.
    230 token(LNum0, LNum1, (newline, lineNumber(LNum0))) -->
    231     whites, [10], {LNum1 is LNum0 + 1}.
    232 % Parse assembler directives
    233 token(LNum, LNum, (assembler_directive(A), lineNumber(LNum))) -->
    234     whites, [0'.], string_without("\n \t", L), { string_to_list(A, L) }.
    235 % Parse comments
    236 token(LNum, LNum, (comment(C), lineNumber(LNum))) -->
    237     whites, [0'/], [0'/], string_without("\n", L), { string_to_list(C, L) }.
    238 % Label references - direct addressing
    239 token(LNum, LNum, (labelReference(A, direct), lineNumber(LNum))) -->
    240     whites, [0':], string_without("\n \t", L),
    241     { string_to_list(S, L), atom_string(A, S) }.
    242 % Label references - immediate addressing
    243 token(LNum, LNum, (labelReference(A, immediate), lineNumber(LNum))) -->
    244     whites, [0'#], [0':], string_without("\n \t", L),
    245     { string_to_list(S, L), atom_string(A, S) }.
    246 % Label references - indirect addressing
    247 token(LNum, LNum, (labelReference(A, indirect), lineNumber(LNum))) -->
    248     whites, [0'(], [0':], string_without("\n \t", L), [0')],
    249     { string_to_list(S, L), atom_string(A, S) }.
    250 % Label definitions
    251 token(LNum, LNum, (labelDefinition(A), lineNumber(LNum))) -->
    252     whites, string_without("\r\n \t:", L), [0':],
    253     { string_to_list(S, L), atom_string(A, S) }.
    254 % Parse numbers - direct addressing mode
    255 token(LNum, LNum, (number(N, direct), lineNumber(LNum))) -->
    256     whites, [0'$], xinteger(N).
    257 % Parse numbers - immediate addressing mode
    258 token(LNum, LNum, (number(N, immediate), lineNumber(LNum))) -->
    259     whites, [0'#], [0'$], xinteger(N).
    260 % Parse numbers - indirect addressing mode
    261 token(LNum, LNum, (number(N, indirect), lineNumber(LNum))) -->
    262     whites, [0'(], [0'$], xinteger(N), [0')].
    263 % Everything not parsed yet is an opcode
    264 token(LNum, LNum, (opcode(P), lineNumber(LNum))) -->
    265     whites, string_without("\n \t", L), { string_to_list(P, L) }.
    266 
    267 
    268 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    269 
    270 :- initialization(main, main).
    271 
    272 main([Source, Binary]) :-
    273     !,
    274     assemble_file(Source, Binary),
    275     nl.
    276 
    277 main(_) :-
    278     writeln('Usage: eras.pl <SOURCE> <BINARY>').