#lang scheme/base ;; Purely functional ramblings.txt parsing. (provide lines->xhtml xhtml-lines->xexpr file->entries article->dict+lines lines->string node-query->link escaped-lines) ;; This file essentially specifies the ad-hoc meta-syntax used in the ;; ramblings file format. The body parser is in a different file: (require "entry-body.ss" xml xml/xexpr scheme/match scheme/dict) ;; The first parsing step (article segmentation + header parsing) ;; happens immediately (it is necessary for the index table) so it ;; needs to be fast. The body parse can be done lazily per article. ;; Convert bytes to a (list-of (list-of string)) (define (file->entries filename) (define (->lines b) (map (lambda (b) (bytes->string/utf-8 b #\?)) (let ((rx ;; #rx#"\r\n" #rx#"\n" )) (regexp-split rx b)))) ;; Read the file whole into a byte string. (let ((l (file-size filename))) (let ((port (open-input-file filename))) (let ((bytes (read-bytes l port))) (close-input-port port) ;; Segment articles + lines. (map ->lines (regexp-split #px#"(?m:(?=^Entry:))" bytes)))))) ;; Convert header line to dictionary attribute. (define (parse-header-line str) (let ((m (regexp-match #px"^(\\w+): *(.*)" str))) (and m (apply (lambda (key value) (cons (string->symbol key) value)) (cdr m))))) ;; A quick and dirty parser for the standard output of the 'date' ;; command. It's not RFC 2822. The date is going to be used as (part ;; of) a file name, since it's the only thing that's unique about a ;; post, and easy to come by. ;; (define d "Sat Sep 15 20:16:48 CEST 2007") (define (date-string->id date) (define n string->number) (define (pad value) (substring (number->string (+ value 100)) 1 3)) (define (month->n str) (let ((m (string->symbol str))) (let next ((n 1) (ms '(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec))) (if (eq? m (car ms)) n (next (+ n 1) (cdr ms)))))) (let-values (((day-of-week month day time timezone year) (apply values (regexp-split #px"\\s+" date)))) (let-values (((hour minute second) (apply values (regexp-split #px":" time)))) (string->symbol (format "~a~a~a-~a~a~a" year (pad (month->n month)) (pad (n day)) (pad (n hour)) (pad (n minute)) (pad (n second))))))) (define (lines->string string-lst) (apply string-append (apply append (for/list ((l string-lst)) (list l "\n"))))) (define (article->dict+lines lines) (let next ((dict '()) (lines lines)) (let ((done (lambda () ;; Index is a 8-digit alphanumerically sortable tag ;; derived from the date string. (let ((index (let/ec otherwise (date-string->id (dict-ref dict 'Date (lambda () (otherwise 'header))))))) (values `((Index . ,index) . ,dict) lines))))) (if (null? lines) (done) (let ((attribute (parse-header-line (car lines)))) (if (not attribute) (done) (next (cons attribute dict) (cdr lines)))))))) ;; Convert lines to parsed article. (define (id x) x) (define (xhtml-lines->xexpr lines) (string->xexpr (apply string-append (append (map (lambda (l) (string-append l " ")) lines))))) (define (lines->xhtml string-lst #:tx-word [tx-word id] #:tx-link [tx-link id]) ;; Set low-level accounting routines. (parameterize ((body-tx-word tx-word) (body-tx-link tx-link)) `(pre ,@(parse-body-strings (apply append (for/list ((line string-lst)) (list line "\n"))))))) ;; For embedding regular syntax in other files, i.e. in comments. (define (escaped-lines rx lst) (for/list ((line lst) #:when (regexp-match rx line)) (list->string (cdr (string->list line))))) ;; Convert a query to a href link string that performs the query ;; through http. This abstracts http message dispatch. (define (query->strs query) (apply append (for/list ((q query)) (list "&" (format "~s=~s" (car q) (cdr q)))))) (define (node-query->link node@ query) (apply string-append (cons (format "~a?" (node@ 'ID)) (cdr (query->strs query)))))