Index: match.scm =================================================================== --- match.scm (revision 43132) +++ match.scm (working copy) @@ -86,12 +86,27 @@ ;;> \scheme{___} is provided as an alias for \scheme{...} when it is ;;> inconvenient to use the ellipsis (as in a syntax-rules template). -;;> The \scheme{..1} syntax is exactly like the \scheme{...} except +;;> The \scheme{**1} syntax is exactly like the \scheme{...} except ;;> that it matches one or more repetitions (like a regexp "+"). -;;> \example{(match (list 1 2) ((a b c ..1) c))} -;;> \example{(match (list 1 2 3) ((a b c ..1) c))} +;;> \example{(match (list 1 2) ((a b c **1) c))} +;;> \example{(match (list 1 2 3) ((a b c **1) c))} +;;> The \scheme{*..} syntax is like \scheme{...} except that it takes +;;> two trailing integers \scheme{} and \scheme{}, and requires +;;> the pattern to match from \scheme{} times. + +;;> \example{(match (list 1 2 3) ((a b *.. 2 4) b))} +;;> \example{(match (list 1 2 3 4 5 6) ((a b *.. 2 4) b))} +;;> \example{(match (list 1 2 3 4) ((a b *.. 2 4 c) c))} + +;;> The \scheme{( =.. )} syntax is a shorthand for +;;> \scheme{( *.. )}. + +;;> \example{(match (list 1 2) ((a b =.. 2) b))} +;;> \example{(match (list 1 2 3) ((a b =.. 2) b))} +;;> \example{(match (list 1 2 3 4) ((a b =.. 2) b))} + ;;> The boolean operators \scheme{and}, \scheme{or} and \scheme{not} ;;> can be used to group and negate patterns analogously to their ;;> Scheme counterparts. @@ -227,6 +242,12 @@ ;; performance can be found at ;; http://synthcode.com/scheme/match-cond-expand.scm ;; +;; 2021/06/21 - fix for `(a ...)' patterns where `a' is already bound +;; (thanks to Andy Wingo) +;; 2020/09/04 - perf fix for `not`; rename `..=', `..=', `..1' per SRFI 204 +;; 2020/08/21 - fixing match-letrec with unhygienic insertion +;; 2020/07/06 - adding `..=' and `..=' patterns; fixing ,@ patterns +;; 2016/10/05 - treat keywords as literals, not identifiers, in Chicken ;; 2016/03/06 - fixing named match-let (thanks to Stefan Israelsson Tampe) ;; 2015/05/09 - fixing bug in var extraction of quasiquote patterns ;; 2014/11/24 - adding Gauche's `@' pattern for named record field matching @@ -255,7 +276,7 @@ (define-syntax match-syntax-error (syntax-rules () - ((_) (match-syntax-error "invalid match-syntax-error usage")))) + ((_) (syntax-error "invalid match-syntax-error usage")))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -351,7 +372,7 @@ ;; pattern so far. (define-syntax match-two - (syntax-rules (_ ___ ..1 *** quote quasiquote ? $ struct @ object = and or not set! get!) + (syntax-rules (_ ___ **1 =.. *.. *** quote quasiquote ? $ struct @ object = and or not set! get!) ((match-two v () g+s (sk ...) fk i) (if (null? v) (sk ... i) fk)) ((match-two v (quote p) g+s (sk ...) fk i) @@ -367,7 +388,8 @@ ((match-two v (or p ...) g+s sk fk i) (match-extract-vars (or p ...) (match-gen-or v (p ...) g+s sk fk i) i ())) ((match-two v (not p) g+s (sk ...) fk i) - (match-one v p g+s (match-drop-ids fk) (sk ... i) i)) + (let ((fk2 (lambda () (sk ... i)))) + (match-one v p g+s (match-drop-ids fk) (fk2) i))) ((match-two v (get! getter) (g s) (sk ...) fk i) (let ((getter (lambda () g))) (sk ... i))) ((match-two v (set! setter) (g (s ...)) (sk ...) fk i) @@ -387,10 +409,18 @@ (match-extract-vars p (match-gen-search v p q g+s sk fk i) i ())) ((match-two v (p *** . q) g+s sk fk i) (match-syntax-error "invalid use of ***" (p *** . q))) - ((match-two v (p ..1) g+s sk fk i) + ((match-two v (p **1) g+s sk fk i) (if (pair? v) (match-one v (p ___) g+s sk fk i) fk)) + ((match-two v (p =.. n . r) g+s sk fk i) + (match-extract-vars + p + (match-gen-ellipsis/range n n v p r g+s sk fk i) i ())) + ((match-two v (p *.. n m . r) g+s sk fk i) + (match-extract-vars + p + (match-gen-ellipsis/range n m v p r g+s sk fk i) i ())) ((match-two v ($ rec p ...) g+s sk fk i) (if (is-a? v rec) (match-record-refs v rec 0 (p ...) g+s sk fk i) @@ -423,30 +453,34 @@ ;; already bound symbol or some other literal, in which case we ;; compare it with EQUAL?. ((match-two v x g+s (sk ...) fk (id ...)) - (let-syntax - ((new-sym? - (syntax-rules (id ...) - ((new-sym? x sk2 fk2) sk2) - ((new-sym? y sk2 fk2) fk2)))) - (new-sym? random-sym-to-match - (let ((x v)) (sk ... (id ... x))) - (if (equal? v x) (sk ... (id ...)) fk)))) + ;; This extra match-check-identifier is optional in general, but + ;; can serve as a fast path, and is needed to distinguish + ;; keywords in Chicken. + (match-check-identifier + x + (let-syntax + ((new-sym? + (syntax-rules (id ...) + ((new-sym? x sk2 fk2) sk2) + ((new-sym? y sk2 fk2) fk2)))) + (new-sym? random-sym-to-match + (let ((x v)) (sk ... (id ... x))) + (if (equal? v x) (sk ... (id ...)) fk))) + (if (equal? v x) (sk ... (id ...)) fk))) )) ;; QUASIQUOTE patterns (define-syntax match-quasiquote - (syntax-rules (unquote unquote-splicing quasiquote) + (syntax-rules (unquote unquote-splicing quasiquote or) ((_ v (unquote p) g+s sk fk i) (match-one v p g+s sk fk i)) ((_ v ((unquote-splicing p) . rest) g+s sk fk i) - (if (pair? v) - (match-one v - (p . tmp) - (match-quasiquote tmp rest g+s sk fk) - fk - i) - fk)) + ;; TODO: it is an error to have another unquote-splicing in rest, + ;; check this and signal explicitly + (match-extract-vars + p + (match-gen-ellipsis/qq v p rest g+s sk fk i) i ())) ((_ v (quasiquote p) g+s sk fk i . depth) (match-quasiquote v p g+s sk fk i #f . depth)) ((_ v (unquote p) g+s sk fk i x . depth) @@ -501,7 +535,8 @@ (define-syntax match-gen-or (syntax-rules () ((_ v p g+s (sk ...) fk (i ...) ((id id-ls) ...)) - (let ((sk2 (lambda (id ...) (sk ... (i ... id ...))))) + (let ((sk2 (lambda (id ...) (sk ... (i ... id ...)))) + (id (if #f #f)) ...) (match-gen-or-step v p g+s (match-drop-ids (sk2 id ...)) fk (i ...)))))) (define-syntax match-gen-or-step @@ -531,12 +566,13 @@ (define-syntax match-gen-ellipsis (syntax-rules () + ;; TODO: restore fast path when p is not already bound ((_ v p () g+s (sk ...) fk i ((id id-ls) ...)) (match-check-identifier p - ;; simplest case equivalent to (p ...), just bind the list - (let ((p v)) - (if (list? p) - (sk ... i) + ;; simplest case equivalent to (p ...), just match the list + (let ((w v)) + (if (list? w) + (match-one w p g+s (sk ...) fk i) fk)) ;; simple case, match all elements of the list (let loop ((ls v) (id-ls '()) ...) @@ -550,11 +586,56 @@ fk i))) (else fk))))) - ((_ v p r g+s (sk ...) fk i ((id id-ls) ...)) - ;; general case, trailing patterns to match, keep track of the - ;; remaining list length so we don't need any backtracking + ((_ v p r g+s sk fk (i ...) ((id id-ls) ...)) (match-verify-no-ellipsis r + (match-bound-identifier-memv + p + (i ...) + ;; p is bound, match the list up to the known length, then + ;; match the trailing patterns + (let loop ((ls v) (expect p)) + (cond + ((null? expect) + (match-one ls r (#f #f) sk fk (i ...))) + ((pair? ls) + (let ((w (car ls)) + (e (car expect))) + (if (equal? (car ls) (car expect)) + (match-drop-ids (loop (cdr ls) (cdr expect))) + fk))) + (else + fk))) + ;; general case, trailing patterns to match, keep track of + ;; the remaining list length so we don't need any backtracking + (let* ((tail-len (length 'r)) + (ls v) + (len (and (list? ls) (length ls)))) + (if (or (not len) (< len tail-len)) + fk + (let loop ((ls ls) (n len) (id-ls '()) ...) + (cond + ((= n tail-len) + (let ((id (reverse id-ls)) ...) + (match-one ls r (#f #f) sk fk (i ... id ...)))) + ((pair? ls) + (let ((w (car ls))) + (match-one w p ((car ls) (set-car! ls)) + (match-drop-ids + (loop (cdr ls) (- n 1) (cons id id-ls) ...)) + fk + (i ...)))) + (else + fk))) + ))))))) + +;; Variant of the above where the rest pattern is in a quasiquote. + +(define-syntax match-gen-ellipsis/qq + (syntax-rules () + ((_ v p r g+s (sk ...) fk (i ...) ((id id-ls) ...)) + (match-verify-no-ellipsis + r (let* ((tail-len (length 'r)) (ls v) (len (and (list? ls) (length ls)))) @@ -562,18 +643,51 @@ fk (let loop ((ls ls) (n len) (id-ls '()) ...) (cond - ((= n tail-len) + ((= n tail-len) + (let ((id (reverse id-ls)) ...) + (match-quasiquote ls r g+s (sk ...) fk (i ... id ...)))) + ((pair? ls) + (let ((w (car ls))) + (match-one w p ((car ls) (set-car! ls)) + (match-drop-ids + (loop (cdr ls) (- n 1) (cons id id-ls) ...)) + fk + (i ...)))) + (else + fk))))))))) + +;; Variant of above which takes an n/m range for the number of +;; repetitions. At least n elements much match, and up to m elements +;; are greedily consumed. + +(define-syntax match-gen-ellipsis/range + (syntax-rules () + ((_ %lo %hi v p r g+s (sk ...) fk (i ...) ((id id-ls) ...)) + ;; general case, trailing patterns to match, keep track of the + ;; remaining list length so we don't need any backtracking + (match-verify-no-ellipsis + r + (let* ((lo %lo) + (hi %hi) + (tail-len (length 'r)) + (ls v) + (len (and (list? ls) (- (length ls) tail-len)))) + (if (and len (<= lo len hi)) + (let loop ((ls ls) (j 0) (id-ls '()) ...) + (cond + ((= j len) (let ((id (reverse id-ls)) ...) - (match-one ls r (#f #f) (sk ...) fk i))) + (match-one ls r (#f #f) (sk ...) fk (i ... id ...)))) ((pair? ls) (let ((w (car ls))) (match-one w p ((car ls) (set-car! ls)) (match-drop-ids - (loop (cdr ls) (- n 1) (cons id id-ls) ...)) + (loop (cdr ls) (+ j 1) (cons id id-ls) ...)) fk - i))) + (i ...)))) (else - fk))))))))) + fk))) + fk)))))) ;; This is just a safety check. Although unlike syntax-rules we allow ;; trailing patterns after an ellipsis, we explicitly disable multiple @@ -740,7 +854,7 @@ ;; (match-extract-vars pattern continuation (ids ...) (new-vars ...)) (define-syntax match-extract-vars - (syntax-rules (_ ___ ..1 *** ? $ struct @ object = quote quasiquote and or not get! set!) + (syntax-rules (_ ___ **1 =.. *.. *** ? $ struct @ object = quote quasiquote and or not get! set!) ((match-extract-vars (? pred . p) . x) (match-extract-vars p . x)) ((match-extract-vars ($ rec . p) . x) @@ -777,7 +891,9 @@ ((match-extract-vars _ (k ...) i v) (k ... v)) ((match-extract-vars ___ (k ...) i v) (k ... v)) ((match-extract-vars *** (k ...) i v) (k ... v)) - ((match-extract-vars ..1 (k ...) i v) (k ... v)) + ((match-extract-vars **1 (k ...) i v) (k ... v)) + ((match-extract-vars =.. (k ...) i v) (k ... v)) + ((match-extract-vars *.. (k ...) i v) (k ... v)) ;; This is the main part, the only place where we might add a new ;; var if it's an unbound symbol. ((match-extract-vars p (k ...) (i ...) v) @@ -855,34 +971,24 @@ (define-syntax match-let (syntax-rules () ((_ ((var value) ...) . body) - (match-let/helper let () () ((var value) ...) . body)) + (match-let/aux () () ((var value) ...) . body)) ((_ loop ((var init) ...) . body) (match-named-let loop () ((var init) ...) . body)))) -;;> Similar to \scheme{match-let}, but analogously to \scheme{letrec} -;;> matches and binds the variables with all match variables in scope. - -(define-syntax match-letrec +(define-syntax match-let/aux (syntax-rules () - ((_ ((var value) ...) . body) - (match-let/helper letrec () () ((var value) ...) . body)))) - -(define-syntax match-let/helper - (syntax-rules () - ((_ let ((var expr) ...) () () . body) + ((_ ((var expr) ...) () () . body) (let ((var expr) ...) . body)) - ((_ let ((var expr) ...) ((pat tmp) ...) () . body) + ((_ ((var expr) ...) ((pat tmp) ...) () . body) (let ((var expr) ...) (match-let* ((pat tmp) ...) . body))) - ((_ let (v ...) (p ...) (((a . b) expr) . rest) . body) - (match-let/helper - let (v ... (tmp expr)) (p ... ((a . b) tmp)) rest . body)) - ((_ let (v ...) (p ...) ((#(a ...) expr) . rest) . body) - (match-let/helper - let (v ... (tmp expr)) (p ... (#(a ...) tmp)) rest . body)) - ((_ let (v ...) (p ...) ((a expr) . rest) . body) - (match-let/helper let (v ... (a expr)) (p ...) rest . body)))) + ((_ (v ...) (p ...) (((a . b) expr) . rest) . body) + (match-let/aux (v ... (tmp expr)) (p ... ((a . b) tmp)) rest . body)) + ((_ (v ...) (p ...) ((#(a ...) expr) . rest) . body) + (match-let/aux (v ... (tmp expr)) (p ... (#(a ...) tmp)) rest . body)) + ((_ (v ...) (p ...) ((a expr) . rest) . body) + (match-let/aux (v ... (a expr)) (p ...) rest . body)))) (define-syntax match-named-let (syntax-rules () @@ -906,7 +1012,88 @@ ((_ ((pat expr) . rest) . body) (match expr (pat (match-let* rest . body)))))) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Challenge stage - unhygienic insertion. +;; +;; It's possible to implement match-letrec without unhygienic +;; insertion by building the let+set! logic directly into the match +;; code above (passing a parameter to distinguish let vs letrec). +;; However, it makes the code much more complicated, so we religate +;; the complexity here. +;;> Similar to \scheme{match-let}, but analogously to \scheme{letrec} +;;> matches and binds the variables with all match variables in scope. + +(define-syntax match-letrec + (syntax-rules () + ((_ ((pat val) ...) . body) + (match-letrec-one (pat ...) (((pat val) ...) . body) ())))) + +;; 1: extract all ids in all patterns +(define-syntax match-letrec-one + (syntax-rules () + ((_ (pat . rest) expr ((id tmp) ...)) + (match-extract-vars + pat (match-letrec-one rest expr) (id ...) ((id tmp) ...))) + ((_ () expr ((id tmp) ...)) + (match-letrec-two expr () ((id tmp) ...))))) + +;; 2: rewrite ids +(define-syntax match-letrec-two + (syntax-rules () + ((_ (() . body) ((var2 val2) ...) ((id tmp) ...)) + ;; We know the ids, their tmp names, and the renamed patterns + ;; with the tmp names - expand to the classic letrec pattern of + ;; let+set!. That is, we bind the original identifiers written + ;; in the source with let, run match on their renamed versions, + ;; then set! the originals to the matched values. + (let ((id (if #f #f)) ...) + (match-let ((var2 val2) ...) + (set! id tmp) ... + . body))) + ((_ (((var val) . rest) . body) ((var2 val2) ...) ids) + (match-rewrite + var + ids + (match-letrec-two-step (rest . body) ((var2 val2) ...) ids val))))) + +(define-syntax match-letrec-two-step + (syntax-rules () + ((_ next (rewrites ...) ids val var) + (match-letrec-two next (rewrites ... (var val)) ids)))) + +;; This is where the work is done. To rewrite all occurrences of any +;; id with its tmp, we need to walk the expression, using CPS to +;; restore the original structure. We also need to be careful to pass +;; the tmp directly to the macro doing the insertion so that it +;; doesn't get renamed. This trick was originally found by Al* +;; Petrofsky in a message titled "How to write seemingly unhygienic +;; macros using syntax-rules" sent to comp.lang.scheme in Nov 2001. + +(define-syntax match-rewrite + (syntax-rules (quote) + ((match-rewrite (quote x) ids (k ...)) + (k ... (quote x))) + ((match-rewrite (p . q) ids k) + (match-rewrite p ids (match-rewrite2 q ids (match-cons k)))) + ((match-rewrite () ids (k ...)) + (k ... ())) + ((match-rewrite p () (k ...)) + (k ... p)) + ((match-rewrite p ((id tmp) . rest) (k ...)) + (match-bound-identifier=? p id (k ... tmp) (match-rewrite p rest (k ...)))) + )) + +(define-syntax match-rewrite2 + (syntax-rules () + ((match-rewrite2 q ids (k ...) p) + (match-rewrite q ids (k ... p))))) + +(define-syntax match-cons + (syntax-rules () + ((match-cons (k ...) p q) + (k ... (p . q))))) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Otherwise COND-EXPANDed bits. @@ -923,12 +1110,52 @@ (lambda (expr rename compare) (if (identifier? (cadr expr)) (car (cddr expr)) - (cadr (cddr expr))))))) + (cadr (cddr expr)))))) + (define-syntax match-bound-identifier=? + (er-macro-transformer + (lambda (expr rename compare) + (if (eq? (cadr expr) (car (cddr expr))) + (cadr (cddr expr)) + (car (cddr (cddr expr))))))) + (define-syntax match-bound-identifier-memv + (er-macro-transformer + (lambda (expr rename compare) + (if (memv (cadr expr) (car (cddr expr))) + (cadr (cddr expr)) + (car (cddr (cddr expr)))))))) + (chicken + (define-syntax match-check-ellipsis + (er-macro-transformer + (lambda (expr rename compare) + (if (compare '... (cadr expr)) + (car (cddr expr)) + (cadr (cddr expr)))))) + (define-syntax match-check-identifier + (er-macro-transformer + (lambda (expr rename compare) + (if (and (symbol? (cadr expr)) (not (keyword? (cadr expr)))) + (car (cddr expr)) + (cadr (cddr expr)))))) + (define-syntax match-bound-identifier=? + (er-macro-transformer + (lambda (expr rename compare) + (if (eq? (cadr expr) (car (cddr expr))) + (cadr (cddr expr)) + (car (cddr (cddr expr))))))) + (define-syntax match-bound-identifier-memv + (er-macro-transformer + (lambda (expr rename compare) + (if (memv (cadr expr) (car (cddr expr))) + (cadr (cddr expr)) + (car (cddr (cddr expr)))))))) + (else ;; Portable versions ;; - ;; This *should* work, but doesn't :( + ;; This is the R7RS version. For other standards, and + ;; implementations not yet up-to-spec we have to use some tricks. + ;; ;; (define-syntax match-check-ellipsis ;; (syntax-rules (...) ;; ((_ ... sk fk) sk) @@ -970,4 +1197,30 @@ ((sym? x sk fk) sk) ;; otherwise x is a non-symbol datum ((sym? y sk fk) fk)))) - (sym? abracadabra success-k failure-k))))))) + (sym? abracadabra success-k failure-k))))) + + ;; This check is inlined in some cases above, but included here for + ;; the convenience of match-rewrite. + (define-syntax match-bound-identifier=? + (syntax-rules () + ((match-bound-identifier=? a b sk fk) + (let-syntax ((b (syntax-rules ()))) + (let-syntax ((eq (syntax-rules (b) + ((eq b) sk) + ((eq _) fk)))) + (eq a)))))) + + ;; Variant of above for a list of ids. + (define-syntax match-bound-identifier-memv + (syntax-rules () + ((match-bound-identifier-memv a (id ...) sk fk) + (match-check-identifier + a + (let-syntax + ((memv? + (syntax-rules (id ...) + ((memv? a sk2 fk2) fk2) + ((memv? anything-else sk2 fk2) sk2)))) + (memv? random-sym-to-match sk fk)) + fk)))) + )) Index: matchable.scm =================================================================== --- matchable.scm (revision 43132) +++ matchable.scm (working copy) @@ -6,6 +6,7 @@ match-let* match-letrec) (import scheme (chicken base) (chicken memory representation)) + (import-for-syntax (chicken keyword)) ;; CHICKEN-specific glue