;;; shift/reduce LALR(1) parser generator ;;; ;;; (C) 2008 Gergő ÉRDI ;;; http://cactus.rulez.org/ (in-package :common-lisp-user) (defpackage :cactus.parser-shiftreduce (:use :common-lisp :cactus.parser-readahead) (:export :defparser :defparser/actions :*parser-debug-stream* :with-parser-debug-stream :formals-mismatch-error)) (in-package :cactus.parser-shiftreduce) ;;; LR(1) items (defun make-lritem (rulename past future readahead) (list rulename past future readahead)) (defun make-lritem-start (rulename subst readahead) (make-lritem rulename (list) subst readahead)) (defun lritem-rulename (item) (first item)) (defun lritem-past (item) (second item)) (defun lritem-future (item) (third item)) (defun lritem-next (item) (car (lritem-future item))) (defun lritem-readahead (item) (fourth item)) (defun lritem-shift (item readahead) (make-lritem (lritem-rulename item) (append (lritem-past item) (list (lritem-next item))) (cdr (lritem-future item)) readahead)) (defun lritem= (item1 item2) (equal item1 item2)) (defun lritem=/merge-readahead (item1 item2) (and (eq (lritem-rulename item1) (lritem-rulename item2)) (equal (lritem-past item1) (lritem-past item2)) (equal (lritem-future item1) (lritem-future item2)))) ;;; Grammar syntax (defun rule-rulename (rule) (car rule)) (defun rule-substs (rule) (cdr rule)) (defun subst-action-subst (subst) (first subst)) (defun subst-action-formals (subst) (second subst)) (defun subst-action-action (subst) (cddr subst)) ;;; Sets (defun set= (set1 set2 &key (test #'equal)) (not (set-exclusive-or set1 set2 :test test))) (defun lrset= (set1 set2) (set= set1 set2 :test #'lritem=)) (defun lrset=/merge-readahead (set1 set2) (set= set1 set2 :test #'lritem=/merge-readahead)) (define-condition formals-mismatch-error (error) ((rulename :initarg :rulename :reader rulename) (subst-action :initarg :subst-action :reader subst-action)) (:report (lambda (condition stream) (let ((subst (subst-action-subst (subst-action condition))) (formals (subst-action-formals (subst-action condition)))) (format stream "Wrong number of arguments (should be ~A) in argument list~&~A~&in callback for substitution ~&~A -> ~A" (length subst) formals (rulename condition) subst))))) ;;; with-names (defmacro with-names (names &body body) `(let ,(loop for name in names collecting `(,name (gensym ,(symbol-name name)))) ,@body)) ;;; *parser-debug-stream* (defvar *parser-debug-stream* nil) (defmacro with-parser-debug-stream (s &body body) `(let ((*parser-debug-stream* ,s)) ,@body)) ;;; defparser, defparser/actions (defmacro defparser (name startrule &body rules) (labels ((subst-with-action (rulename subst substnum) (let ((formals (loop for sym in subst collecting (gensym "op")))) `(,subst ,formals (list '(,rulename . ,substnum) ,@formals))))) `(defparser/actions ,name ,startrule ,@(loop for rule in rules collecting (cons (car rule) (loop for subst in (cdr rule) for substnum from 0 collecting (subst-with-action (car rule) subst substnum))))))) (defmacro defparser/actions (name startrule &body rules) (with-names (state stack input readahead rulename substnum substlen result shift laststate reduce parse-step terminal-matches-p terminal-matcher shift-accept) (let* ((syntetic-startrule (gensym "S")) (startitem (make-lritem-start syntetic-startrule (list startrule) '())) (rules (cons `(,syntetic-startrule ((,startrule) (s) s)) rules)) (plainrules (loop for rule in rules collecting (list (rule-rulename rule) (mapcar #'subst-action-subst (rule-substs rule)))))) (with-readahead-functions plainrules (labels ((closure (item) (let ((expanded-rules (list))) (labels ((rulename-expanded-p (rulename first) (member (list rulename first) expanded-rules :test #'equal)) (register-expansion (rulename first) (push (list rulename first) expanded-rules)) (closure (item) (adjoin item (and (rulename-p (lritem-next item)) (closure-expand item)))) (closure-expand (item) (let ((rulename (lritem-next item))) (loop for first in (append (if (epsilon-subst-p (cdr (lritem-future item))) (list (lritem-readahead item))) (readahead-terminals (cdr (lritem-future item)))) when (not (rulename-expanded-p rulename first)) appending (progn (register-expansion rulename first) (loop for subst-action in (rule-substs (assoc rulename rules)) appending (closure (make-lritem-start rulename (subst-action-subst subst-action) first)))))))) (closure item)))) (closure-set (items) (let ((result (list))) (mapcar #'(lambda (item) (mapcar #'(lambda (closure-item) (pushnew closure-item result :test #'lritem=)) (closure item))) items) result)) (shifted-items (item) (if (lritem-future item) (lritem-shift item (lritem-readahead item)))) (shift-set (items sym) (labels ((shift-if-read (item) (if (equal (lritem-next item) sym) (list (shifted-items item))))) (closure-set (loop for item in items when (shift-if-read item) appending it)))) (possible-shifts (items) (let ((nexts (list))) (loop for next in (mapcar #'lritem-next items) when next do (pushnew next nexts)) nexts)) (canonical-sets (item) (let ((sets (list (closure item)))) (labels ((new-sets (set) (let ((new-sets (list))) (loop for sym in (possible-shifts set) do (pushnew (shift-set set sym) new-sets :test #'lrset=)) new-sets)) (iter (sets-to-process) (when sets-to-process (loop for new-set in (new-sets (car sets-to-process)) when (not (member new-set sets :test #'lrset=)) do (nconc sets-to-process (list new-set))) (iter (cdr sets-to-process))))) (iter sets)) (format t "Generated LR(1) machine with ~A states~%" (length sets)) (labels ((set-merged-p (set) (find set sets :test #'lrset=/merge-readahead :end (position set sets :test #'lrset=))) (union* (sets) (if sets (union (car sets) (union* (cdr sets)) :test #'lrset=)))) ;; Note that this takes O(n^2) time, with n being the number of LR(1) machine states (loop for set on sets when (not (set-merged-p (car set))) collecting (union* (loop for equivalent-set in set when (lrset=/merge-readahead equivalent-set (car set)) collecting equivalent-set))))))) (let ((canonical-sets (canonical-sets startitem))) (format t "Generated LALR(1) machine with ~A states~%" (length canonical-sets)) (labels ((canonical-set-read-num (canonical-set sym) (position (shift-set canonical-set sym) canonical-sets :test #'lrset=/merge-readahead)) (subst-num (finished-item) (position (lritem-past finished-item) (rule-substs (assoc (lritem-rulename finished-item) rules)) :test #'(lambda (subst subst-action) (equal subst (subst-action-subst subst-action))))) (generate-terminal-matcher (terminal) (if terminal `(,terminal-matches-p ',terminal) `(not ,readahead))) (generate-reducer () `(let ((,laststate (second (car ,stack)))) (cond ,@(loop for canonical-set in canonical-sets for state-num from 0 appending (loop for reduced-rule in (possible-shifts canonical-set) when (rulename-p reduced-rule) collecting `((and (eq ,laststate ,state-num) (eq ,rulename (quote ,reduced-rule))) (,shift ,result ,(canonical-set-read-num canonical-set reduced-rule))))) (t (error "Internal error reducing ~A in ~A" ,rulename ,laststate))))) (generate-reduce-actions () (labels ((genformal () (gensym "ARG")) (generate-callback (rulename subst-action) (let* ((subst-len (length (subst-action-subst subst-action))) (filled-formals (reverse (subst-action-formals subst-action))) (ignored-formals (loop for i on filled-formals when (not (car i)) collecting (setf (car i) (genformal))))) ;; When #formals > #subst, there's nothing we can do... (when (> (length filled-formals) subst-len) (error 'formals-mismatch-error :rulename rulename :subst-action subst-action)) ;; ... however, when #formals < #subst, we can always just add dummy formals (when (< (length filled-formals) subst-len) (restart-case (error 'formals-mismatch-error :rulename rulename :subst-action subst-action) (create-dummy-formals () :report "Fill argument list with dummy arguments" (loop for sym-with-missing-formal in (nthcdr (length filled-formals) (subst-action-subst subst-action)) do (push (genformal) filled-formals) do (push (car filled-formals) ignored-formals))))) ;; Assemble the lambda and call it with the results of previous reductions `((lambda ,filled-formals (declare (ignore ,@ignored-formals)) ,@(subst-action-action subst-action)) ,@(loop for param-num below subst-len collecting `(first (nth ,param-num ,stack))))))) `(ecase ,rulename ,@(loop for rule in rules when (not (eq (rule-rulename rule) syntetic-startrule)) collecting `((,(rule-rulename rule)) (ecase ,substnum ,@(loop for subst in (rule-substs rule) for substnum from 0 collecting (list substnum (generate-callback (rule-rulename rule) subst))))))))) (generate-shifter () `(ecase ,state ,@(loop for canonical-set in canonical-sets for state-num from 0 collecting `(,state-num (cond ,@(loop for finished-item in canonical-set when (not (lritem-next finished-item)) collecting `(,(generate-terminal-matcher (lritem-readahead finished-item)) ,(if (eq (lritem-rulename finished-item) syntetic-startrule) `(first (car ,stack)) `(,reduce (quote ,(lritem-rulename finished-item)) ,(subst-num finished-item) ,(length (lritem-past finished-item)))))) ,@(loop for terminal in (possible-shifts canonical-set) when (not (rulename-p terminal)) collecting `(,(generate-terminal-matcher terminal) (,shift-accept ,(canonical-set-read-num canonical-set terminal)))) (t (error "Parse error near ~A in ~A" ,readahead ,state-num)))))))) `(defun ,name (,input &optional (,terminal-matcher #'equal)) (let ((,stack (list (list nil 0)))) (labels ((,parse-step () (let ((,state (second (car ,stack))) (,readahead (car ,input))) (labels ((,terminal-matches-p (token) (and ,readahead (funcall ,terminal-matcher ,readahead token))) (,shift-accept (newstate) (,shift (pop ,input) newstate)) (,shift (readahead newstate) (format *parser-debug-stream* "~&Shifting to state ~A" newstate) (push (list readahead newstate) ,stack) (,parse-step)) (,reduce (,rulename ,substnum ,substlen) (format *parser-debug-stream* "~&Reducing ~A" ,rulename) (let ((,result ,(generate-reduce-actions))) (setf ,stack (nthcdr ,substlen ,stack)) ,(generate-reducer)))) (declare (ignorable (function ,reduce))) (format *parser-debug-stream* "~&Processing state ~A, readahead ~A" ,state ,readahead) ,(generate-shifter))))) (,parse-step)))))))))))