(require 'ebnf-otz)
(defvar ebnf-bnf-lex nil
"Value returned by `ebnf-bnf-lex' function.")
(defun ebnf-bnf-parser (start)
"EBNF parser."
(let ((total (+ (- ebnf-limit start) 1))
(bias (1- start))
(origin (point))
prod-list token rule)
(goto-char start)
(setq token (ebnf-bnf-lex))
(and (eq token 'end-of-input)
(error "Invalid EBNF file format"))
(while (not (eq token 'end-of-input))
(ebnf-message-float
"Parsing...%s%%"
(/ (* (- (point) bias) 100.0) total))
(setq token (ebnf-production token)
rule (cdr token)
token (car token))
(or (ebnf-add-empty-rule-list rule)
(setq prod-list (cons rule prod-list))))
(goto-char origin)
prod-list))
(defun ebnf-production (token)
(let ((header ebnf-bnf-lex)
(action ebnf-action)
body)
(setq ebnf-action nil)
(or (eq token 'non-terminal)
(error "Invalid header production"))
(or (eq (ebnf-bnf-lex) 'equal)
(error "Invalid production: missing `='"))
(setq body (ebnf-body))
(or (eq (car body) 'period)
(error "Invalid production: missing `.'"))
(setq body (cdr body))
(ebnf-eps-add-production header)
(cons (ebnf-bnf-lex)
(ebnf-make-production header body action))))
(defun ebnf-body ()
(let (body sequence)
(while (eq (car (setq sequence (ebnf-sequence))) 'alternative)
(setq sequence (cdr sequence)
body (cons sequence body)))
(ebnf-token-alternative body sequence)))
(defun ebnf-sequence ()
(let ((token (ebnf-bnf-lex))
seq term)
(while (setq term (ebnf-exception token)
token (car term)
term (cdr term))
(setq seq (cons term seq)))
(cons token
(ebnf-token-sequence seq))))
(defun ebnf-exception (token)
(let ((term (ebnf-repeat token)))
(if (not (eq (car term) 'except))
term
(let ((exception (ebnf-repeat (ebnf-bnf-lex))))
(ebnf-no-non-terminal (cdr exception))
(ebnf-token-except (cdr term) exception)))))
(defun ebnf-no-non-terminal (node)
(and (vectorp node)
(let ((kind (ebnf-node-kind node)))
(cond
((eq kind 'ebnf-generate-non-terminal)
(error "Exception sequence should not contain a non-terminal"))
((eq kind 'ebnf-generate-repeat)
(ebnf-no-non-terminal (ebnf-node-separator node)))
((memq kind '(ebnf-generate-optional ebnf-generate-except))
(ebnf-no-non-terminal (ebnf-node-list node)))
((memq kind '(ebnf-generate-one-or-more ebnf-generate-zero-or-more))
(ebnf-no-non-terminal (ebnf-node-list node))
(ebnf-no-non-terminal (ebnf-node-separator node)))
((memq kind '(ebnf-generate-alternative ebnf-generate-sequence))
(let ((seq (ebnf-node-list node)))
(while seq
(ebnf-no-non-terminal (car seq))
(setq seq (cdr seq)))))
))))
(defun ebnf-repeat (token)
(if (not (eq token 'integer))
(ebnf-term token)
(let ((times ebnf-bnf-lex)
upper)
(or (eq (ebnf-bnf-lex) 'repeat)
(error "Missing `*'"))
(setq token (ebnf-bnf-lex))
(when (eq token 'integer)
(setq upper ebnf-bnf-lex
token (ebnf-bnf-lex)))
(ebnf-token-repeat times (ebnf-term token) upper))))
(defun ebnf-term (token)
(let ((factor (ebnf-factor token)))
(and factor
(setq token (ebnf-bnf-lex)))
(cond
((eq token 'one-or-more)
(cons (ebnf-bnf-lex)
(and factor
(let ((kind (ebnf-node-kind factor)))
(cond
((memq kind '(ebnf-generate-zero-or-more
ebnf-generate-one-or-more))
factor)
((eq kind 'ebnf-generate-optional)
(ebnf-make-zero-or-more (list factor)))
(t
(ebnf-make-one-or-more (list factor)))
)))))
((eq token 'list)
(setq token (ebnf-bnf-lex))
(let ((sep (ebnf-factor token)))
(and sep
(setq factor (or factor (ebnf-make-empty))))
(cons (if sep
(ebnf-bnf-lex)
token)
(and factor
(ebnf-make-one-or-more factor sep)))))
(t
(cons token factor))
)))
(defun ebnf-factor (token)
(cond
((eq token 'terminal)
(ebnf-make-terminal ebnf-bnf-lex))
((eq token 'non-terminal)
(ebnf-make-non-terminal ebnf-bnf-lex))
((eq token 'special)
(ebnf-make-special ebnf-bnf-lex))
((eq token 'begin-group)
(let ((body (ebnf-body)))
(or (eq (car body) 'end-group)
(error "Missing `)'"))
(cdr body)))
((eq token 'begin-optional)
(let ((body (ebnf-body)))
(or (eq (car body) 'end-optional)
(error "Missing `]'"))
(ebnf-token-optional (cdr body))))
((eq token 'begin-list)
(let* ((body (ebnf-body))
(token (car body))
(list-part (cdr body))
sep-part)
(and (eq token 'list-separator)
(setq body (ebnf-body) token (car body)
sep-part (cdr body)))
(cond
((eq token 'end-one-or-more)
(ebnf-make-one-or-more list-part sep-part))
((eq token 'end-zero-or-more)
(ebnf-make-zero-or-more list-part sep-part))
(t
(error "Missing `}+', `}*' or `}'"))
)))
(t
nil)
))
(defconst ebnf-bnf-token-table (make-vector 256 'error)
"Vector used to map characters to a lexical token.")
(defun ebnf-bnf-initialize ()
"Initialize EBNF token table."
(let ((char ?\040))
(while (< char ?\060)
(aset ebnf-bnf-token-table char 'non-terminal)
(setq char (1+ char)))
(while (< char ?\072)
(aset ebnf-bnf-token-table char 'integer)
(setq char (1+ char)))
(while (< char ?\177)
(aset ebnf-bnf-token-table char 'non-terminal)
(setq char (1+ char)))
(setq char ?\240)
(while (< char ?\400)
(aset ebnf-bnf-token-table char 'non-terminal)
(setq char (1+ char)))
(aset ebnf-bnf-token-table ?\013 'space) (aset ebnf-bnf-token-table ?\n 'space) (aset ebnf-bnf-token-table ?\r 'space) (aset ebnf-bnf-token-table ?\t 'space) (aset ebnf-bnf-token-table ?\ 'space) (aset ebnf-bnf-token-table ?\f 'form-feed) (aset ebnf-bnf-token-table ?\" 'terminal)
(aset ebnf-bnf-token-table ?\? 'special)
(aset ebnf-bnf-token-table ?\( 'begin-group)
(aset ebnf-bnf-token-table ?\) 'end-group)
(aset ebnf-bnf-token-table ?* 'repeat)
(aset ebnf-bnf-token-table ?- 'except)
(aset ebnf-bnf-token-table ?= 'equal)
(aset ebnf-bnf-token-table ?\[ 'begin-optional)
(aset ebnf-bnf-token-table ?\] 'end-optional)
(aset ebnf-bnf-token-table ?\{ 'begin-list)
(aset ebnf-bnf-token-table ?| 'alternative)
(aset ebnf-bnf-token-table ?\} 'end-list)
(aset ebnf-bnf-token-table ?/ 'list)
(aset ebnf-bnf-token-table ?+ 'one-or-more)
(aset ebnf-bnf-token-table ?$ 'default)
;; Override comment character:
(aset ebnf-bnf-token-table ebnf-lex-comment-char 'comment)
;; Override end of production character:
(aset ebnf-bnf-token-table ebnf-lex-eop-char 'period)))
;; replace the range "\240-\377" (see `ebnf-range-regexp').
(defconst ebnf-bnf-non-terminal-chars
(ebnf-range-regexp "!#%&'*-,0-:<>@-Z\\\\^-z~" ?\240 ?\377))
(defun ebnf-bnf-lex ()
"Lexical analyzer for EBNF.
Return a lexical token.
See documentation for variable `ebnf-bnf-lex'."
(if (>= (point) ebnf-limit)
'end-of-input
(let (token)
;; skip spaces and comments
(while (if (> (following-char) 255)
(progn
(setq token 'error)
nil)
(setq token (aref ebnf-bnf-token-table (following-char)))
(cond
((eq token 'space)
(skip-chars-forward " \013\n\r\t" ebnf-limit)
(< (point) ebnf-limit))
((eq token 'comment)
(ebnf-bnf-skip-comment))
((eq token 'form-feed)
(forward-char)
(setq ebnf-action 'form-feed))
(t nil)
)))
(setq ebnf-default-p nil)
(cond
;; end of input
((>= (point) ebnf-limit)
'end-of-input)
;; error
((eq token 'error)
(error "Invalid character"))
;; default
((eq token 'default)
(forward-char)
(if (memq (aref ebnf-bnf-token-table (following-char))
'(terminal non-terminal special))
(prog1
(ebnf-bnf-lex)
(setq ebnf-default-p t))
(error "Invalid `default' element")))
;; integer
((eq token 'integer)
(setq ebnf-bnf-lex (ebnf-buffer-substring "0-9"))
'integer)
;; special: ?special?
((eq token 'special)
(setq ebnf-bnf-lex (concat (and ebnf-special-show-delimiter "?")
(ebnf-string " ->@-~" ?\? "special")
(and ebnf-special-show-delimiter "?")))
'special)
;; terminal: "string"
((eq token 'terminal)
(setq ebnf-bnf-lex (ebnf-unescape-string (ebnf-get-string)))
'terminal)
;; non-terminal or terminal
((eq token 'non-terminal)
(setq ebnf-bnf-lex (ebnf-buffer-substring ebnf-bnf-non-terminal-chars))
(let ((case-fold-search ebnf-case-fold-search)
match)
(if (and ebnf-terminal-regexp
(setq match (string-match ebnf-terminal-regexp
ebnf-bnf-lex))
(zerop match)
(= (match-end 0) (length ebnf-bnf-lex)))
'terminal
'non-terminal)))
;; end of list: }+, }*, }
((eq token 'end-list)
(forward-char)
(cond
((= (following-char) ?+)
(forward-char)
'end-one-or-more)
((= (following-char) ?*)
(forward-char)
'end-zero-or-more)
(t
'end-zero-or-more)
))
;; alternative: |, ||
((eq token 'alternative)
(forward-char)
(if (/= (following-char) ?|)
'alternative
(forward-char)
'list-separator))
;; miscellaneous: {, (, ), [, ], ., =, /, +, -, *
(t
(forward-char)
token)
))))
;; replace the range "\177-\237" (see `ebnf-range-regexp').
(defconst ebnf-bnf-comment-chars
(ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237))
(defun ebnf-bnf-skip-comment ()
(forward-char)
(cond
;; open EPS file
((and ebnf-eps-executing (= (following-char) ?\[))
(ebnf-eps-add-context (ebnf-bnf-eps-filename)))
;; close EPS file
((and ebnf-eps-executing (= (following-char) ?\]))
(ebnf-eps-remove-context (ebnf-bnf-eps-filename)))
;; any other action in comment
(t
(setq ebnf-action (aref ebnf-comment-table (following-char)))
(skip-chars-forward ebnf-bnf-comment-chars ebnf-limit))
)
;; check for a valid end of comment
(cond ((>= (point) ebnf-limit)
nil)
((= (following-char) ?\n)
(forward-char)
t)
(t
(error "Invalid character"))
))
(defun ebnf-bnf-eps-filename ()
(forward-char)
(ebnf-buffer-substring ebnf-bnf-comment-chars))
(defun ebnf-unescape-string (str)
(let* ((len (length str))
(size (1- len))
(istr 0)
(n-esc 0))
;; count number of escapes
(while (< istr size)
(setq istr (+ istr
(if (= (aref str istr) ?\\)
(progn
(setq n-esc (1+ n-esc))
2)
1))))
(if (zerop n-esc)
;; no escapes
str
;; at least one escape
(let ((new (make-string (- len n-esc) ?\ ))
(inew 0))
;; eliminate all escapes
(setq istr 0)
(while (> n-esc 0)
(and (= (aref str istr) ?\\)
(setq istr (1+ istr)
n-esc (1- n-esc)))
(aset new inew (aref str istr))
(setq inew (1+ inew)
istr (1+ istr)))
;; remaining string has no escape
(while (< istr len)
(aset new inew (aref str istr))
(setq inew (1+ inew)
istr (1+ istr)))
new))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(provide 'ebnf-bnf)
;;; arch-tag: 3b1834d3-8367-475b-80d5-8e0bbd00ce50
;;; ebnf-bnf.el ends here