;; DICTIONARY

;; Hierarchical functional dictionary (a functional filesystem). This
;; seems to be the best 'default' data structure: if you don't know
;; how to organise, put it in a filesystem. It's simplest to keep
;; purely functional, and maps nicely to XML in case this is required.
;;
;; DICT = listof (NAME . VALUE | DICT)

;; The implementation uses association lists instead of hash tables,
;; which enables shadowing. This requires care to be taken about
;; preservation of order.

;; the /false functions will return #f if an operation fails. All the
;; rest throws an exception, or calls a user-defined thunk.


(module dict mzscheme
  (require (lib "match.ss")
           "list-utils.ss")

  (provide 
           
           dict-find
           dict-find/false
           dict-recursive-find
           dict-recursive-find/false

           dict-shadow

           dict-remove

           dict-set
           
           dict-default
           dict-recursive-mute
           dict-recursive-default

           dict-unshadow

           dict?
           )
           

  ;; GET

  (define ((notfound tag))
    (error 'not-found
           "dictionary lookup failed: ~a" tag))

  (define (nf-thunk nf tag)
    (match nf
           (()      (notfound tag))
           ((thunk) thunk)))

  (define (pred tag)
    (lambda (x) (eq? x tag)))
  
  ;; Non--recursive retrieval.

  ;; This is robust against type errors: if the argument is not a
  ;; dictionary, the result is notfound. This is to prevent quadratic
  ;; behaviour.
  
  (define (dict-find  top-dict tag . notfound)
    (let ((nf   (nf-thunk notfound tag))
          (tag? (pred tag)))
           
      (let next ((dict top-dict))
        (match dict
               (((name . value) . dict+)
                (if (tag? name)
                    value
                    (next dict+)))
               (other
                (nf))))))
 

  ;; Recursive retrieval.
  (define (dict-recursive-find top-dict
                               top-url . notfound)
    (let ((nf (nf-thunk notfound top-url)))
      (let down ((dict top-dict)
                 (url  top-url))

        (match url
               (() dict) ;; leaf node
               ((tag . url+)
                (down
                 (dict-find dict tag nf)
                 url+))))))
  

  ;; These return #f if search fails.
  (define (false-thunk) #f)
  
  (define (dict-find/false d t)
    (dict-find d t false-thunk))
  (define (dict-recursive-find/false d t)
    (dict-recursive-find d t false-thunk))


  ;; PUT

  ;; Since we respect order to support shadowing, PUT can mean
  ;; different things. We need to distinguish between:
  ;;
  ;; * SHADOW: to emulate the behaviour of an environment.
  ;;
  ;; * REMOVE then SHADOW: this will put the item in the front,
  ;;   meaning that subsequent operations will be faster, and reuse
  ;;   more tail.
  ;;
  ;; * REPLACE: this preserves order, but is only efficient for items
  ;;   near the front, since it rebuilds list structure.
  ;;
  ;; * SEARCH then REPLACE/SHADOW: will preserve order, and put new
  ;;   items in front.
  ;;
  ;; * REPLACE/QUEUE: will preserve order, and put new items in back.


  ;; If order is not important (shadowing is not used), it's probably
  ;; best to REMOVE+SHADOW, since this moves the often updated values
  ;; to the head of the list, making non-destructive updates more
  ;; efficient.

  ;; Let's start with the trivial one:
  (define (dict-shadow dict tag value)
    (cons (cons tag value) dict))
  
  ;; For the rest we use the most general update function:

  ;; - respects order
  ;; - reuses tails
  ;; - handler for matching cons cell (kons)
  ;; - handler for end of list (not-found)

  (define (dict-update dict
                       tail-thunk
                       top-action
                       tag)
                       

    (let ((tag? (lambda (x) (eq? tag x))))
      (let mute ((d dict))
        (match d
               ;; done
               (() (tail-thunk))

               ;; element is on top -> perform action
               ((((= tag? #t) . v) . r) (top-action d))

               ;; else recurse, preserving order
               ((item . r) (cons item (mute r)))))))


  ;; Remove a tag, assume it exists. The counterpart "remove or leave
  ;; intact" is not defined since it is inefficient: it replaces the whole list.
  
  (define (dict-remove dict tag)
    (dict-update dict
                 (notfound tag)
                 cdr
                 tag))

  ;; Mute: creates if doesn't exist.
  
  (define (dict-set dict tag value)
    (dict-shadow
     (if (dict-find/false dict tag)
         (dict-remove dict tag) dict)
     tag value))

  ;; Default: add an entry if doesn't exist
  (define (dict-default dict tag value)
    (if (dict-find/false dict tag)
        dict
        (dict-shadow dict tag value)))

  (define (dict-recursive-mute  dict url value)
    (dict-set dict
               (car url)
               (if (null? (cdr url))
                   ;; leaf node
                   value
                   
                   ;; recurse down
                   (dict-recursive-mute    ;; recurse down dictionary chain
                    (or (dict-find/false dict (car url)) '())
                    (cdr url) value))))


  ;; implemented as find + mute
  (define (dict-recursive-default dict url value)
    (if (dict-recursive-find/false dict url)
        dict
        (dict-recursive-mute dict url value)))
  
  
  ;; (dict-remove '((a . 1) (b . 2)) 'b)
  ;; (dict-set '((a . 1) (b . 2) (c . 3)) 'c 123)


  ;; remove shadowed references.
  (define (dict-unshadow-reverse dict)
    (fold
     (lambda (record clean)
       (if (assoc (car record) clean)
           clean
           (cons record clean)))
     '() dict))

  ;; preserves order. linear reverse operator is ok, since the list
  ;; returned from dict-clean-reverse is fresh.
  (define (dict-unshadow dict)
    (reverse! (dict-unshadow-reverse dict))) 


  ;; MAP

  ;; Flat map is rather trivial.
  
  (define (dict-map dict fn)
    (map
     (match-lambda
      ((tag . value) (fn tag value)))
     dict))

  ;; The thing which is interesting is combinations. That's a bit less
  ;; trivial. One of the operations i need is to combine a dict with
  ;; defaults. This is really simple with mutation.

  ;; So, how do you combine dictionaries? Let's first generalize map,
  ;; and then generalize fold and write map in terms of fold. Start
  ;; with a combination of 2 dictionaries a and b. There are 3
  ;; possible combinations:  exists in A, B, A+B

  ;; What about reducing this to the cases A and A+B, treating A as a
  ;; selector?


  ;; Looks like the most general operations here is 'collect' or
  ;; 'gather' or something similar: take n dictionaries, and collect
  ;; all the values in lists.

  ;; Let's start with the tensor product. Using the following steps:
  ;; - find the entire domain
  ;; - compute the tensor product, using some value for undefined

  ;; conclusion for now: mutating algorithms with hash tables seem a
  ;; lot easier to implement, and also quite a lot faster (assoc list
  ;; cross-ref is always quadratic)
  
  
  ;; Because order might differ, searching has to be done anyway. The
  ;; non-recursive case then becomes:

  ; (define (dict-map fn A B)
    
  
  ;; checks only the first entry
  (define (dict? dict)
    (or (null? dict)
        (and (pair? dict)
             (pair? (car dict)))))
  
  
)