#lang scheme/base (provide (all-defined-out)) (require ;; scheme/struct-info (for-syntax scheme/base)) (require racket/generator) ;; Share data and keep bidirectional references. ;; This converts a flat table (list of structs) representing a ;; relation to a graph structure where table entries refer to shared ;; objects, and the objects refer back to table entries. ;; This is useful for representing very redundant log data like apache ;; access logs. ;; Struct to maintain instances of references to the same object. (define-struct reflist (object id refs) #:mutable) (define (add-ref! hash object entry gen-id) (let ((er (hash-ref hash object (lambda () (let ((er (make-reflist object (gen-id) '()))) (hash-set! hash object er) er))))) (set-reflist-refs! er (cons entry (reflist-refs er))) er)) ;; Use reflist to build a table (list of vectors) with shared fields. (define (table-share gen [unpack-attribute reflist-object] [pack-result values] [hashes #f] ) ;; Register the object and put a shared instance in the vector. (define (register! vec hash object i gen-id) (vector-set! vec i (unpack-attribute (add-ref! hash object vec gen-id)))) ;; Generate ID. It's more useful to use integer for this. These ;; also don't need to be tagged with a namespace; that's only useful ;; for (define (id-generator counts index) (lambda () (let* ((c (vector-ref counts index))) (vector-set! counts index (add1 c)) c))) ;; Old approach: column-prefixed ID for global unique name. ;; (define (id-generator counts index) ;; (lambda () ;; (let* ((c (vector-ref counts index)) ;; (sym (string->symbol (format "~s:~s" index c)))) ;; (vector-set! counts index (add1 c)) ;; sym))) (let* ((first (gen)) (n (vector-length first)) (hashes (build-vector n (lambda _ (make-hash)))) (counts (make-vector n 0))) (pack-result (for/list ((entry (in-producer gen #f))) (let ((v (make-vector n))) (for ((column entry) (hash hashes) (i (in-naturals))) (register! v hash column i (id-generator counts i))) v)) hashes))) ;; Convert the internal representation into a 'let form that can be ;; evaluated to construct a shared structure. ;; FIXME: this is probably broken. It probably relies on the id field ;; in reflist being a unique name. Probable fix: just prefix with ;; column name/number. (define (table+hash->let table hashes) (define (objs->ids hashes seq) (for/list ((h hashes) (e seq)) (reflist-id (hash-ref h e)))) `(let ,(for*/list ((h hashes) ((key reflist) h)) (list (reflist-id reflist) (list 'quote (reflist-object reflist)))) (list ,@(for/list ((row table)) `(vector ,@(objs->ids hashes (vector->list row))))))) (define (table->let gen) (table-share gen reflist-id table+hash->let)) ;; TEST #; (begin (define-values (table hashes) (table-share (list (vector "foo" "a") (vector "foo" "b") (vector "foo" "c")))) (define expr (table+hash->let table hashes)) ) #; (define gen (generator () (let next ((n 3)) (if (zero? n) #f (begin (yield (vector 1 2 3)) (next (sub1 n)))))))