;;; Utilities for readahead parser generators ;;; ;;; (C) 2008 Gergő ÉRDI ;;; http://cactus.rulez.org/ (in-package :common-lisp-user) (defpackage :cactus.parser-readahead (:use :common-lisp :cactus.aset) (:export :with-readahead-functions :rulename-p :epsilon-subst-p :readahead-terminals :skip-terminals :readahead-ambiguity-error :readahead-ambiguity-error-rulename :unexpected-token-error :unexpected-token-error-expected :unexpected-token-error-found)) (in-package :cactus.parser-readahead) (define-condition readahead-ambiguity-error (error) ((rulename :initarg :rulename :reader readahead-ambiguity-error-rulename)) (:report (lambda (condition stream) (format stream "Read-ahead ambiguity between substitutions of nonterminal ~A" (readahead-ambiguity-error-rulename condition))))) (define-condition unexpected-token-error (error) ((expected :initarg :expected :initform nil :reader unexpected-token-error-expected) (found :initarg :found :reader unexpected-token-error-found)) (:report (lambda (condition stream) (format stream "Unexpected token: ~A" (unexpected-token-error-found condition)) (if (unexpected-token-error-expected condition) (format stream " (expected: ~A)" (unexpected-token-error-expected condition)))))) (defmacro with-readahead-functions (rules-name &body body) "Defines four local functions: rulename-p, epsilon-subst-p, readahead-terminals and skip-terminals" `(let* ((rulenames (mapcar #'car ,rules-name)) (terminals (loop for subst in (loop for (rulename . substs) in ,rules-name appending substs) appending (loop for sym in subst when (not (member sym rulenames)) collecting sym))) (epsilon-rulenames (let ((epsilon-rulenames (list))) (labels ((epsilon-p (rulename) (or (not rulename) (member rulename epsilon-rulenames))) (epsilon-subst-p (subst) (or (not subst) (every #'epsilon-p subst))) (epsilon-rule-p (rule) (some #'epsilon-subst-p (cdr rule)))) ;; epsilon rulenames can be collected in at most (length ,rules-name) iterations ;; (it's the discrete limit of the recursive series) (dolist (rule-unused ,rules-name epsilon-rulenames) (declare (ignore rule-unused)) (setf epsilon-rulenames (loop for rule in ,rules-name when (epsilon-rule-p rule) collecting (car rule)))))))) (labels ((rulename-p (sym) (member sym rulenames)) (epsilon-rulename-p (sym) (member sym epsilon-rulenames)) (epsilon-subst-p (subst) (every #'epsilon-rulename-p subst)) (find-firsts (subst) (and subst (cons (car subst) (and (epsilon-rulename-p (car subst)) (find-firsts (cdr subst)))))) (find-lasts (subst) (find-firsts (reverse subst))) (find-befores (subst) (maplist #'(lambda (head-and-tail) (cons (car head-and-tail) (find-firsts (cdr head-and-tail)))) subst)) (warshall (alist1 alist2) (mapcar #'(lambda (key-values) (cons (car key-values) (set-from-list (loop for value in (cdr key-values) appending (cdr (assoc value alist2)))))) alist1)) (transitive (alist) (let ((alist alist)) (dolist (item-unused alist alist) ; Do (length alist) iterations (declare (ignore item-unused)) (setf alist (mapcar #'(lambda (key-values) (cons (car key-values) (set-from-list (append (cdr key-values) (loop for value in (cdr key-values) appending (loop for new-value in (cdr (assoc value alist)) when (not (member new-value (cdr key-values) :test #'equal)) collecting new-value)))))) alist))))) (reflexive (alist) (aset-from-alist (defrag-alist (append alist (mapcar #'(lambda (sym) (list sym sym)) (append rulenames terminals))) :test #'equal)))) (declare (ignorable (function rulename-p) (function epsilon-subst-p))) (let* ((firsts (mapcar #'(lambda (rule) (cons (car rule) (set-from-list (loop for subst in (cdr rule) appending (find-firsts subst))))) ,rules-name)) (lasts (defrag-alist (mapcan #'(lambda (rule) (mapcar #'(lambda (last-sym) (list last-sym (car rule))) (set-from-list (loop for subst in (cdr rule) appending (find-lasts subst))))) ,rules-name) :test #'equal)) (befores (mapcar #'(lambda (key-value) (cons (car key-value) (set-from-list (cdr key-value)))) (defrag-alist (loop for rule in ,rules-name appending (mapcan #'find-befores (cdr rule))) :test #'equal))) (f+ (transitive firsts)) (first-terminals (mapcar #'(lambda (rule) (cons (car rule) (set-from-list (loop for sym in (cdr (assoc (car rule) f+)) when (not (rulename-p sym)) collecting sym)))) ,rules-name)) (l*bf* (warshall (warshall (reflexive (transitive lasts)) befores) (reflexive (transitive firsts)))) (follow-terminals (mapcar #'(lambda (rule) (cons (car rule) (set-from-list (loop for sym in (cdr (assoc (car rule) l*bf*)) when (not (rulename-p sym)) collecting sym)))) ,rules-name))) (labels ((readahead-terminals (subst) (and subst (if (rulename-p (car subst)) (append (cdr (assoc (car subst) first-terminals)) (and (epsilon-rulename-p (car subst)) (readahead-terminals (cdr subst)))) (list (car subst))))) (skip-terminals (rulename) (cdr (assoc rulename follow-terminals)))) (declare (ignorable (function readahead-terminals) (function skip-terminals))) ,@body)))))