(module assembler mzscheme ;; --- util --- (require (lib "match.ss") (lib "pretty.ss") "composite.ss" "primitive.ss" "ns-base-stx.ss" "ns.ss" "ns-utils.ss" "base.ss" "dict.ss" "target.ss" ;; target dictionary format "list-utils.ss" "binary-utils.ss" ; "decoder.ss" "state.ss" "hhash.ss" "meta.ss" "asmgen-runtime.ss" ;; not really necessary here "macro-eval.ss" ) (provide assemble eval-macro) ;; FIXME: should it be here? ;; *** HIGH LEVEL ASSEMBLER *** ;; All code except the driver routine is side-effect free, and just ;; uses these parameters: ;; A note about dictionary: ;; dict-find access raw values from key ;; dict-find-code / dict-find-data ;; get address for code/data words in dict ;; The label dictionary. A combination of old labels and newly ;; generated ones. (define current-dict (make-parameter 'not-in-assembler)) (define (dict) (current-dict)) ;; The assembler pointers. (define current-pointers (make-parameter 'not-in-assembler)) (define (ptr@ tag) ;; (printf "ptr@\n") (hash-table-get (current-pointers) tag)) (define (ptr! tag value) ;; (printf "ptr!\n") (hash-table-put! (current-pointers) tag value)) (define (allot! tag n) ;; (printf "allot! ~a ~s\n" tag n) (ptr! tag (+ (ptr@ tag) n))) ;; Code for logging labels. It will be used to record phase errors ;; in the driver loop. (define current-log-label (make-parameter 'not-in-assembler)) (define (log-label symbol value) ((current-log-label) symbol value)) ;; What to do with undefined symbols? This is filled in by the ;; driver loop. (define current-undefined (make-parameter 'not-in-assembler)) (define (undefined label) ((current-undefined) label)) ;; DICTIONARY (define (ptr-push tag x) (let ((old (ptr@ tag))) (ptr! tag x) (dict-shadow-data (dict) tag old))) (define (ptr-pop tag) (ptr! tag (dict-find-data (dict) tag)) (dict-remove (dict) tag)) (define (label symbol value) (log-label symbol value) ;; phase error logging (dict-shadow-code (dict) symbol value)) (define (variable symbol value) ;; FIXME: no phase error logging? (dict-shadow-data (dict) symbol value)) (define (allot realm n) (allot! realm n) (dict)) ;; SYMBOL EVALUATION ;; The assembler will perform the final evaluation from symbolic ;; data to numbers. We use the convention that eval-xxx returns ;; #f if evaluation failed, so they can be used in an (or ...) ;; expression. ;; In the default context, symbols are resolved by looking in 3 ;; namespaces. (define (eval-generic-symbol sym) ;; generic symbol lookup (or (eval-ram sym) (eval-code sym) (eval-macro sym) )) ;; The meta expression evaluator use 'meta-eval' extended with names ;; from the symbol resolver above. This should use the parameterized ;; fail method: first pass, fail is OK since symbol table isn't ;; complete yet. (define (eval-expression exp) (with-default-find (lambda (name . paths) ;; (printf "META: ~s ~s\n" name paths) (let ((value (or (eval-generic-symbol name) (undefined name)))) ;; (eval-generic-symbol/fail name) (lambda s (cons value s)))) (meta-eval exp))) ;; Evaluation of symbols in particular contexts. Note that ;; constants can contain expressions. (define (eval-ram arg) (dict-find-data (dict) arg)) (define (eval-code arg) (dict-find-code (dict) arg)) ;; For immediate values, the 2 above can be combined into an ;; evaluator for all possible types. (define (eval-immediate arg) (let ((value (cond ((number? arg) arg) ;; idempotent ((symbol? arg) (eval-generic-symbol arg)) ((list? arg) (eval-expression arg)) ;; delayed immediate (else (error 'immediate-type-error "~a" arg))))) (if (number? value) (int value) #f))) ;; For code jump targets, symbols are only from the target code ;; label dictionary, but expressions can be anything. (define (eval-label thing) (cond ((number? thing) thing) ;; idempotent ((symbol? thing) (eval-code thing)) ((list? thing) (eval-expression thing)) ;; delayed address (else (error 'label-type-error "~a" thing)))) ;; Required evaluation: throw an exception on failure. (define (eval-label/fail thing) (or (eval-label thing) (error 'cannot-eval-label "~a" thing))) (define (eval-immediate/fail thing) (or (eval-immediate thing) (error 'cannot-eval-immediate "~a" thing))) (define (eval-generic-symbol/fail sym) (or (eval-generic-symbol sym) (error 'undefined "~a" sym))) (define (eval-macro name) (macro-eval/fail name)) ;; ASSEMBLER OPERATORS ;; All operations have this proto: ;; (dictionary instruction) -> (dictionary+ instructions) (define assemble-result values) ;; Dictionary operations driver. (define (register dict-operation . args) (assemble-result (apply dict-operation args) '())) ;; Operations that set the compilation point. (define (org updated-dict) (let ((here (ptr@ 'code))) (assemble-result updated-dict `((org ,here))))) ;; FIXME: the point where code is #f and we have to GUESS how many ;; instructions it takes is where it goes wrong. assume 1 for now. ;; ... do update 'code pointer (define (comma code) (allot! 'code (length code)) (assemble-result (dict) code)) ;; Branches: the address is the last operand in instruction. (define (resolve/delay instruction shift) ;; (display instruction) (newline) (match (reverse instruction) ((label . rest) (let ((addr ;; FIXME: 'final' is determined in the driver loop. ;;((if final ;; on last pass need resolution ;; eval-label/fail ;; eval-label) label))) (or (eval-label label) (undefined label)))) (if addr (again (cdr ;; drop r/a prefix (reverse `(,(shift addr) ,@rest)))) (comma `(,instruction))))))) ;; FIXME: should be false later (define (absolute instruction) (resolve/delay instruction (lambda (x) x))) (define (relative instruction) (resolve/delay instruction (lambda (x) ;; FIXME: PIC specific -> peval (- x (+ (ptr@ 'code) 1))))) ;; Generic instruction -> assembled instruction using plug-in assemblers (define (assemble-pure opcode . arguments) (match opcode ;; symbolic opcode ((= symbol? #t) (let ((assembler (asm-find opcode))) (if assembler (apply (asm-find opcode) (map ;; eval-immediate/fail (lambda (name) (or (eval-immediate name) (undefined name))) arguments)) (error 'invalid-opcode "~a" opcode)))) ;; error (other (error 'invalid-instruction "~a" other)))) (define (comma-machine ins) (comma ;; if some symbol lookup fails, just put the instruction ;; back into the stream. it might be available next pass. (with-handlers (((lambda (ex) (and (list? ex) (eq? (car ex) 'undefined))) (lambda (ex) `(,ins)))) (parameterize ((here (ptr@ 'code))) (apply assemble-pure ins))))) ;; call assemble-instruction recursively (not tail recursive due to ;; parameters!) (define (again instruction) (assemble-instruction (dict) instruction)) ;; ASSEMBLER OPERATOR DISPATCH ;; Take a single instruction and try to assemble it. If it doesn't ;; work, just return the input instruction as a singleton. (define (assemble-instruction dictionary instruction) ;; This code uses functions that read the dictionary ;; parameter. However, they do NOT write it: dictionary update ;; happens by returning a new dictionary using 'assemble-result'. (parameterize ((current-dict dictionary)) (match instruction ;; binary -> just quote ((= number? #t) (comma `(,instruction))) ;; state changing directives (('label l) (register label l (ptr@ 'code))) (('variable l) (register variable l (ptr@ 'data))) (('allot realm n) (register allot realm n)) (('org addr) (ptr! 'code (eval-label/fail addr)) ;; imperative (dynamic var) (org (dict))) (('org-push addr) (org (ptr-push 'code (eval-label/fail addr)))) (('org-pop) (org (ptr-pop 'code))) (('here) (comma `(,(ptr@ 'code)))) ;; code address resolution handled separately from other immediates (('a . rest) (absolute instruction)) (('r . rest) (relative instruction)) ;; The smart jump to subroutine. (('jsr exit address) (comma-machine `(jsr ,(eval-immediate/fail exit) ,(or (eval-label address) (undefined address))))) ;; pure: assemble + concatenate (_ (comma-machine instruction))))) ;; ASSEMBLER CONTROL FLOW ;; This uses 'assemble-instruction' to make multiple passes over the ;; assembler data. (define (cleanup-dict dict) (filter (lambda (x) (not (generated-label? (car x)))) ;; remove local labels dict)) ;; removed shadowed words ;; Data type used in driver (assemble-instruction just uses lists) ;; Filter the asm so dictionary operations won't accumulate. (define (asm-filter asm lst) (filter (match-lambda ((opcode . args) (not (any (lambda (o) (eq? opcode o)) lst)))) asm)) ;; CAT assembler word, bound to metaprogramming evaluatior (define-word assemble (dict in . stack) (let-values (((dict-out out) (assemble dict in))) (pack dict-out out stack))) ;; DRIVER (define (assemble input-dict input-instructions) (define pass-nb 0) ;; The symbols we generate are recorded in a hash table to detect ;; phase errors. (define *labels* '()) (define *bindings* (make-hash-table)) ;; Memory allocation pointers (define *pointers* (make-hash-table)) (define (log-label symbol value) (push! *labels* symbol)) ;; Snapshot the value of the labels. (define (clear-log) (set! *labels* '())) ;; The bin output needs to start with an (org xxx) instruction, ;; since it needs to be interpreted independent of the ;; dictionary. The easiest way to fix that is to just make sure ;; the asm starts with one. If there's no 'code variable, the ;; default is 0. (define (fix-asm-org dict asm) (match asm ((('org . a) . r) asm) ((('org-push . a) . r) asm) (else (cons `(org ,(or (dict-find-data dict 'code) 0)) asm)))) ;; Check if the dictionary labels have changed. (define (phase-errors) (define *phase-errors* '()) (define (get label) (hash-table-get *bindings* label (lambda () #f))) (for-each (lambda (label) (let ((recorded (get label)) (current (dict-find-code *dict* label))) ;; check for change (unless (and recorded (equal? current recorded)) (push! *phase-errors* label)) ;; check if label is also a macro (when (and (not recorded) (ns-ref `(macro ,label) #f)) (printf "WARNING: label ~a is shadowed by macro.\n" label)) ;; record current binding (hash-table-put! *bindings* label current))) *labels*) *phase-errors*) ;; Initialize pointers from dictionary. (define (init-pointers) (for-each (lambda (tag) (let ((addr (or (dict-find-data *dict* tag) 0))) ;; default ;; (printf "~s ~s\n" tag addr) (hash-table-put! *pointers* tag addr))) '(code data))) ;; After assembly, store the pointers. (define (exit-pointers) (for-each (lambda (tag) (let ((addr (hash-table-get *pointers* tag))) ;; shadow: leave previous marks in dictionary (set! *dict* (dict-shadow-data *dict* tag addr)))) '(code data))) ;; Run a single assembly pass. If assemble-instruction returns #f ;; it means the instruction might work in a next pass. Otherwise ;; it will throw an exception. ;; FIXME: within a single pass, it is illegal to define a name ;; twice. However, it is legal to redefine a name that was already ;; present. (define (asm-pass) (clear-log) (init-pointers) (parameterize ((current-pointers *pointers*)) (let next ((dict *dict*) (in *ins*) (out '())) (if (null? in) (begin (set! pass-nb (add1 pass-nb)) (set! *dict* (dict-unshadow dict)) ;; accu changes (apply append (reverse! out))) ;; return result (let-values (((dict+ result) (parameterize ((current-log-label log-label)) ;; (printf "ASM: ~s\n" (car in)) (assemble-instruction dict (car in))))) (next dict+ (cdr in) (cons result out))))))) ;; Perform a maximum of n passes, return binary result. (define (asm-with-max max) (let again () (when (>= pass-nb max) (error 'asm-infinite-loop)) ;; (printf "PASS ~s\n" pass-nb) (let ((ins (asm-pass))) (if (not (null? (phase-errors))) (again) ins)))) ;; PREPROC ;; fix format and remove markers left by compiler. (define *dict* input-dict) (define *ins* (fix-asm-org input-dict (asm-filter input-instructions '(comment cont stub)))) ;; PASS 0 (parameterize ;; Ignore undefined labels + ensure small jumps. ((current-undefined (lambda (label) (ptr@ 'code)))) ;; Ignore output and phase errors. Side effect sets up the ;; extended dictionary and bindings table. (asm-pass) (phase-errors)) ;; PASS 1-N (set! *ins* (parameterize ;; At this stage, undefined labels are an error. ((current-undefined (lambda (label) (error 'undefined-procedure-word "~a" label)))) (asm-with-max 100))) ;; POSTPROC (exit-pointers) (values (cleanup-dict *dict*) *ins*) ) ;; FORMATTING ;; it would be nice to format asm in standard pic style, so it can be ;; pumped straight into gpasm. stuff like this is better written in ;; scheme than cat. (define (format-asm line) (define (process-arg a) a) (define (format-args args) (match args (() "") ((a) (format "~s" a)) ((a . r) (string-append (format "~s, " a) (format-args r))))) (match line (((or 'r 'a) . l) (format-asm l)) (('label tag) (format (if (number? tag) " (~s)" "\n~s:\n") tag)) (('cont . rep) "") ;; "; continuation\n") (('comment c) (format "; ~a\n" c)) (('stub . r) "; stub\n") ((opcode . args) (format "\t~s\t~a\n" opcode (format-args (map process-arg args)))) )) (snarf as-push (base) ((instruction) (format-asm)) ;; ((const) (asm-constant-find)) ) )