#lang scheme/base ;; Parser for plaintext body -> xhtml as x-expression ;; The body syntax is defined by this parser. Since it's all quite ;; ad-hoc I'm not going through a grammar definition. (require scheme/promise (planet zwizwa/lib/x/regexp) (planet zwizwa/lib/x/seq-tools)) (provide parse-body-strings body-tx-word body-tx-link) (define (id x) x) (define body-tx-word (make-parameter id)) (define body-tx-link (make-parameter id)) ;(define re-all ; ;; begin + end paren | whitespace boundary | period ; #px"(?=\\(|\\))|(?<=\\(|\\))|(?<=\\s)(?=\\S)|(?<=\\S)(?=\\s)|(?=\\.\\s)") (define re-all (pregexp (re-or ;; (re-tokens "^###(.*)?\n") ;; embedded commands ;; stand-alone tokens ;; these can be part of urls: "\\(" "\\)" (re-tokens "'" "\"" "<" ">" "\\{" "\\}") ;; parenthesis ;; split at whitespace borders "(?<=\\s)(?=\\S)|(?<=\\S)(?=\\s)" ;; split outside of square brackets "(?=\\[)|(?<=\\])" ;; punctuation needs whitspace or end-of-line to parse as a ;; separate token (can be part of url) "(?=[\\,\\.\\:\\;]+?[\\s$])" ))) (define (tokenize str) (regexp-split re-all str)) (define (re-match re token) (and (string? token) (regexp-match re token))) ;; Variable names are enclosed in square brackets. (define ((reference declare) word) (apply (lambda (_ var) (if var (begin ;; (printf "registering: ~a\n" var) (hash-set! declare var #f) (hash-set! declare 'last var) (lambda (ref) ;; (printf "substituting: ~a\n" var) (let ((template (hash-ref ref var))) (if template (template word) word)))) word)) (or (re-match #px"^\\[(.*)\\]" word) (list #f #f)))) ;; Links are bound to the last variable reference if it's not yet ;; bound. This is a compromise between ease of use (how it's already ;; used) and sane behaviour. (define (bind-last! def template) (let ((last-var (hash-ref def 'last (lambda () #f)))) (when (and last-var (not (hash-ref def last-var))) (hash-set! def last-var template)))) (define ((link def) word) (define (tx current template) (let ((processed ((body-tx-link) current))) (bind-last! def template) (template processed))) (define ((ahref link) element) `(a ((href ,link)) ,element)) (define ((img link) element) (define _link (format "http://~a" link)) `(img ((src ,_link)))) ;; Link to md5 library metadata. ;; (define md5-library "file:///home/tom/library/md5") (define (lib link) (ahref (format "../library/~a" link))) (define ((isbn link) element) `(a ((href, (format "http://isbndb.com/search-all.html?kw=~a" link))) ,element)) (let ((url-match (re-match #px"^(\\S+)://(.*)" word))) (if (not url-match) ;; Each non-url word gets passed through a filter for whatever ;; processing or registration. ;; Might need extra filtering ala: ;; (if (re-match #px"^\\w+$" word) ...) ((body-tx-word) word) ;; Similar for links which get filtered after local ramblings ;; syntax transformation to standard urls. (apply (lambda (orig tag location) (tx word (case (string->symbol tag) ((isbn) (isbn location)) ((img) (img location)) ((lib) (lib location)) ((entry) (ahref location)) (else (ahref word))))) url-match)))) ;(define (side-effects tok) ; (when (regexp-match #re"^### ; #t) (define (parse-body-strings string-list) (define vars (make-hash)) ;; Pass1: tokenize + mark links + references (let* ((token-lst (apply append (map tokenize string-list))) (lst (map (compose (link vars) (reference vars)) token-lst))) ;; Pass2: evaluate ;; (printf "vars: ~a\n" (hash-map refs (lambda (k v) k))) (for/list ((e lst)) (if (procedure? e) (e vars) e))))