[quoted string matcher regexp tom@zwizwa.be**20111103222621 Ignore-this: 5810d4640fa3096abe5f22326e59d0f8 ] hunk ./lib/x/apachelog-dump.ss 57 -(define (format-sanitize bytes) bytes) -(define sql-quote "\"") hunk ./lib/x/apachelog-dump.ss 58 -(define (format-sanitize_ bytes) - (let ((bytes (regexp-replace* #"'" bytes "\""))) - bytes)) +;(define (format-sanitize_ bytes) +; (let ((bytes (regexp-replace* #"'" bytes "\""))) +; bytes)) + +;; Should be ok.. +(define (format-sanitize bytes) bytes) hunk ./lib/x/apachelog-dump.ss 70 - (format "~a~a~a" - sql-quote - (format-sanitize el) - sql-quote - ) - ) + (format "\"~a\"" (format-sanitize el))) hunk ./lib/x/apachelog-regexp.ss 10 -;; s - quoted string +;; q - quoted string hunk ./lib/x/apachelog-regexp.ss 14 +;; w -> 1 value +;; d -> 7 values +;; q -> 2 values (2nd one is last matched char in string. FIXME: how?) + +;; Note that the quoted string does not have the outer quotes, but it +;; does keep escapes inside the string in quoted form. FIXME: how? + hunk ./lib/x/apachelog-regexp.ss 23 -(define q "\"(.*?)\"") -(define _ "\\s+?") + +(define _ "(\\s+?") + +;; quoted string (quotes not included) +; (define q "\"((\\.|[^\\\"])*)\"") ;; WRONG +(define q "\"((\\\\|\\\"|[^\\\"])*)\"") hunk ./lib/x/apachelog-regexp.ss 92 - +;; FIXME: the r are due to the double parens in the quoted string matcher. hunk ./lib/x/apachelog-regexp.ss 106 - request + request r0 hunk ./lib/x/apachelog-regexp.ss 109 - referrer - agent) + referrer r1 + agent r2) hunk ./lib/x/apachelog-regexp.ss 133 - request + request r1 hunk ./lib/x/apachelog-regexp.ss 136 - referrer - agent) + referrer r2 + agent r3) hunk ./lib/x/apachelog-regexp.ss 157 - request + request r1 hunk ./lib/x/apachelog-regexp.ss 194 -;(define log (open-input-file "/tmp/access.log")) -;(define line (read-bytes-line log)) +(define log (open-input-file "/tmp/access.log")) +(define line (read-bytes-line log)) hunk ./plt.txt 1670 - escapes correctly. + escapes correctly. see [1] \"(\\.|[^\"])*\" hunk ./plt.txt 1672 +[1] http://stackoverflow.com/questions/249791/regex-for-quoted-string-with-escaping-quotes + +Entry: Parsing strings +Date: Thu Nov 3 17:30:17 EDT 2011 + +All these hacking attempts are really hard to parse! + +This one doesn't work for the string matcher: + + +"giebrok.zwizwa.be:80 83.101.57.157 - - [26/Aug/2011:00:18:08 +0200] \"GET /WebID/IISWebAgentIF.dll?postdata=\\\"> HTTP/1.1\" 302 417 \"-\" \"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)\"" + +Or in unquoted form: +giebrok.zwizwa.be:80 83.101.57.157 - - [26/Aug/2011:00:18:08 +0200] "GET /WebID/IISWebAgentIF.dll?postdata=\"> HTTP/1.1" 302 417 "-" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)" + +(define q "\"((\\.|[^\"])*)\"") ;; quoted string (quotes not included) + +Looks like that regexp is wrong, some quoting is missing: + +(define q "\"((\\.|[^\\\"])*)\"")