;;; Simple backtracking parser generator ;;; ;;; (C) 2007 Gergő ÉRDI ;;; http://cactus.rulez.org/ (defmacro defineparser (name startrule &body rules) (let ((rulenames (mapcar #'car rules))) `(defun ,name (str lexer &optional (terminal-matcher #'eq)) (labels ((match-terminal (x y) (funcall terminal-matcher x y)) (rulep (x) (and (listp x) (find (car x) ',rulenames))) (backtracking-parser-step (state input history stack) (ecase state ((q) ;; Accept/substitute (cond ;; If we finished in state q -> the input matches ((not input) (values 'match input history stack)) (t (let ((next-element (car stack))) (,@'(case next-element) ;; Substitute a non-terminal using its first rule ,@(loop for (rulename first-substitution) in rules collect `((,rulename) (values 'q input (cons '(,rulename . 1) history) (append ',first-substitution (cdr stack))))) ;; Try matching a single terminal, backtrack if fails (otherwise (if (match-terminal (car input) next-element) (values 'q (cdr input) (cons (car input) history) (cdr stack)) (values 'b input history stack)))))))) ((b) ;; Backtrack (let ((last-element (car history))) (cond ;; Putback ((not (rulep last-element)) (values 'b (cons last-element input) (cdr history) (cons last-element stack))) (t (let ((rulename (car last-element)) (rulenum (cdr last-element))) (,@'(ecase rulename) ;; Try next substitution of non-terminal ,@(loop for rule in rules collecting (let ((rulename (car rule)) (rulebody (cdr rule))) `((,rulename) (,@'(ecase rulenum) ,@(loop for substitutions on rulebody for i from 1 collecting `((,i) (let ((popped-stack (nthcdr ,(list-length (first substitutions)) stack))) ,(if (second substitutions) `(values 'q input (cons '(,rulename . ,(+ i 1)) (cdr history)) (append ',(second substitutions) popped-stack)) ;; If no more substitutions are possible, backtrack or fail (if (eq rulename startrule) `(if (cdr history) (values 'b input (cdr history) (cons ',rulename popped-stack)) (values 'nomatch input history popped-stack)) `(values 'b input (cdr history) (cons ',rulename popped-stack))))))))))))))))))) ;; Create a syntax tree from a history (syntaxtree-from-history (history) (cond ((atom (car history)) (values (car history) (cdr history))) (t (let ((rulename (car (car history))) (rulenum (cdr (car history)))) (,@'(ecase rulename) ,@(loop for rule in rules collecting (let ((rulename (car rule)) (substitutions (cdr rule))) `((,rulename) (,@'(ecase rulenum) ,@(loop for subst in substitutions for i from 1 collecting `((,i) (let* ((rest (cdr history)) (children (loop for i from 1 to ,(list-length subst) collecting (multiple-value-bind (child child-rest) (syntaxtree-from-history rest) (setq rest child-rest) child)))) (values (cons ',rulename children) rest))))))))))))) ;; Parser loop: run state machine iterations until state is 'match or 'nomatch (backtracking-parser-loop (state input history stack) (cond ((eq state 'match) (syntaxtree-from-history (reverse history))) ((eq state 'nomatch) ()) (t (multiple-value-call #'backtracking-parser-loop (backtracking-parser-step state input history stack)))))) (backtracking-parser-loop 'q (funcall lexer str) () '(,startrule)))))) (defineparser expression-parser E (E (T) (T E^)) (E^ (#\+ T) (#\+ T E^)) (T (F) (F T^)) (T^ (#\* F) (#\* F T^)) (F (LIT) (#\( E #\))) (LIT (#\a))) ; (LIT (DIGIT) (DIGIT LIT)) ; (DIGIT (#\a))) ; (DIGIT (#\0) (#\1) (#\2) (#\3) (#\4) (#\5) (#\6) (#\7) (#\8) (#\9))) (defineparser example-parser S (S (#\a A #\d) (#\a B)) (A (#\b) (#\c)) (B (#\c #\c #\d) (#\d #\d #\c))) (labels ((lexer (str) (loop for c across str collect c))) (pprint (example-parser "accd" #'lexer))) (labels ((lexer (str) (loop for c across str collect c))) (pprint (expression-parser "(a*(a+a))" #'lexer)))