(require 'ebnf-otz)
(defvar ebnf-ebx-lex nil
"Value returned by `ebnf-ebx-lex' function.")
(defun ebnf-ebx-parser (start)
"EBNFX parser."
(let ((total (+ (- ebnf-limit start) 1))
(bias (1- start))
(origin (point))
rule-list token rule)
(goto-char start)
(setq token (ebnf-ebx-lex))
(and (eq token 'end-of-input)
(error "Invalid EBNFX file format"))
(and (eq token 'end-of-rule)
(setq token (ebnf-ebx-lex)))
(while (not (eq token 'end-of-input))
(ebnf-message-float
"Parsing...%s%%"
(/ (* (- (point) bias) 100.0) total))
(setq token (ebnf-ebx-rule token)
rule (cdr token)
token (car token))
(or (ebnf-add-empty-rule-list rule)
(setq rule-list (cons rule rule-list))))
(goto-char origin)
rule-list))
(defun ebnf-ebx-rule (token)
(let ((name ebnf-ebx-lex)
(action ebnf-action)
elements)
(setq ebnf-action nil)
(or (eq token 'non-terminal)
(error "Invalid rule name"))
(setq token (ebnf-ebx-lex))
(or (eq token 'production)
(error "Invalid rule: missing `::='"))
(setq elements (ebnf-ebx-expression))
(or (memq (car elements) '(end-of-rule end-of-input))
(error "Invalid rule: there is no end of rule"))
(setq elements (cdr elements))
(ebnf-eps-add-production name)
(cons (ebnf-ebx-lex)
(ebnf-make-production name elements action))))
(defun ebnf-ebx-expression ()
(let (body concatenation)
(while (eq (car (setq concatenation
(ebnf-ebx-concatenation (ebnf-ebx-lex))))
'alternative)
(setq body (cons (cdr concatenation) body)))
(ebnf-token-alternative body concatenation)))
(defun ebnf-ebx-concatenation (token)
(let ((term (ebnf-ebx-exception token))
seq)
(or (setq token (car term)
term (cdr term))
(error "Empty element"))
(setq seq (cons term seq))
(while (setq term (ebnf-ebx-exception token)
token (car term)
term (cdr term))
(setq seq (cons term seq)))
(cons token
(ebnf-token-sequence seq))))
(defun ebnf-ebx-exception (token)
(let ((term (ebnf-ebx-term token)))
(if (eq (car term) 'exception)
(let ((except (ebnf-ebx-term (ebnf-ebx-lex))))
(cons (car except)
(ebnf-make-except (cdr term) (cdr except))))
term)))
(defun ebnf-ebx-term (token)
(let ((factor (ebnf-ebx-factor token)))
(when factor
(setq token (ebnf-ebx-lex))
(cond ((eq token 'zero-or-more)
(setq factor (ebnf-make-zero-or-more factor)
token (ebnf-ebx-lex)))
((eq token 'one-or-more)
(setq factor (ebnf-make-one-or-more factor)
token (ebnf-ebx-lex)))
((eq token 'optional)
(setq factor (ebnf-token-optional factor)
token (ebnf-ebx-lex)))))
(cons token factor)))
(defun ebnf-ebx-factor (token)
(cond
((eq token 'terminal)
(ebnf-make-terminal ebnf-ebx-lex))
((eq token 'non-terminal)
(ebnf-make-non-terminal ebnf-ebx-lex))
((eq token 'begin-group)
(let ((body (ebnf-ebx-expression)))
(or (eq (car body) 'end-group)
(error "Missing `)'"))
(cdr body)))
(t
nil)
))
(defconst ebnf-ebx-token-table (make-vector 256 'error)
"Vector used to map characters to a lexical token.")
(defun ebnf-ebx-initialize ()
"Initialize EBNFX token table."
(let ((char ?\101))
(while (< char ?\133)
(aset ebnf-ebx-token-table char 'non-terminal)
(setq char (1+ char)))
(setq char ?\141)
(while (< char ?\173)
(aset ebnf-ebx-token-table char 'non-terminal)
(setq char (1+ char)))
(setq char ?\240)
(while (< char ?\400)
(aset ebnf-ebx-token-table char 'non-terminal)
(setq char (1+ char)))
(aset ebnf-ebx-token-table ?\n 'end-of-rule) (aset ebnf-ebx-token-table ?\r 'end-of-rule) (aset ebnf-ebx-token-table ?\013 'space) (aset ebnf-ebx-token-table ?\t 'space) (aset ebnf-ebx-token-table ?\ 'space) (aset ebnf-ebx-token-table ?\f 'form-feed) (aset ebnf-ebx-token-table ?# 'hash)
(aset ebnf-ebx-token-table ?\" 'double-quote)
(aset ebnf-ebx-token-table ?\' 'single-quote)
(aset ebnf-ebx-token-table ?\( 'begin-group)
(aset ebnf-ebx-token-table ?\) 'end-group)
(aset ebnf-ebx-token-table ?- 'exception)
(aset ebnf-ebx-token-table ?: 'colon)
(aset ebnf-ebx-token-table ?\[ 'begin-square)
(aset ebnf-ebx-token-table ?| 'alternative)
(aset ebnf-ebx-token-table ?* 'zero-or-more)
(aset ebnf-ebx-token-table ?+ 'one-or-more)
(aset ebnf-ebx-token-table ?\? 'optional)
;; Override comment character:
(aset ebnf-ebx-token-table ?/ 'comment)))
;; replace the range "\240-\377" (see `ebnf-range-regexp').
(defconst ebnf-ebx-non-terminal-chars
(ebnf-range-regexp "-_A-Za-z" ?\240 ?\377))
(defconst ebnf-ebx-non-terminal-letter-chars
(ebnf-range-regexp "A-Za-z" ?\240 ?\377))
(defun ebnf-ebx-lex ()
"Lexical analyzer for EBNFX.
Return a lexical token.
See documentation for variable `ebnf-ebx-lex'."
(if (>= (point) ebnf-limit)
'end-of-input
(let (token)
;; skip spaces and comments
(while (if (> (following-char) 255)
(progn
(setq token 'error)
nil)
(setq token (aref ebnf-ebx-token-table (following-char)))
(cond
((eq token 'space)
(skip-chars-forward " \013\t" ebnf-limit)
(< (point) ebnf-limit))
((eq token 'comment)
(ebnf-ebx-skip-comment))
((eq token 'form-feed)
(forward-char)
(setq ebnf-action 'form-feed))
((eq token 'end-of-rule)
(ebnf-ebx-skip-end-of-rule))
((and (eq token 'begin-square)
(let ((case-fold-search t))
(looking-at "\\[\\(wfc\\|vc\\):")))
(ebnf-ebx-skip-constraint))
(t nil)
)))
(cond
;; end of input
((>= (point) ebnf-limit)
'end-of-input)
;; error
((eq token 'error)
(error "Invalid character"))
;; end of rule
((eq token 'end-of-rule)
'end-of-rule)
;; terminal: #x [0-9A-Fa-f]+
((eq token 'hash)
(setq ebnf-ebx-lex (ebnf-ebx-character))
'terminal)
;; terminal: "string"
((eq token 'double-quote)
(setq ebnf-ebx-lex (ebnf-ebx-string ?\"))
'terminal)
;; terminal: 'string'
((eq token 'single-quote)
(setq ebnf-ebx-lex (ebnf-ebx-string ?\'))
'terminal)
;; terminal: [ ^? ( char ( - char )? )+ ]
((eq token 'begin-square)
(setq ebnf-ebx-lex (ebnf-ebx-range))
'terminal)
;; non-terminal: NAME
((eq token 'non-terminal)
(setq ebnf-ebx-lex
(ebnf-buffer-substring ebnf-ebx-non-terminal-chars))
'non-terminal)
;; colon: ::=
((eq token 'colon)
(or (looking-at "::=")
(error "Missing `::=' token"))
(forward-char 3)
'production)
;; miscellaneous: (, ), *, +, ?, |, -
(t
(forward-char)
token)
))))
;; replace the range "\177-\237" (see `ebnf-range-regexp').
(defconst ebnf-ebx-constraint-chars
(ebnf-range-regexp "^\000-\010\016-\037]" ?\177 ?\237))
(defun ebnf-ebx-skip-constraint ()
(or (> (skip-chars-forward ebnf-ebx-constraint-chars ebnf-limit) 0)
(error "Invalid character"))
(or (= (following-char) ?\])
(error "Missing end of constraint `]'"))
(forward-char)
t)
(defun ebnf-ebx-skip-end-of-rule ()
(let (eor-p)
(while (progn
;; end of rule ==> 2 or more consecutive end of lines
(setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1)
eor-p))
;; skip spaces
(skip-chars-forward " \013\t" ebnf-limit)
;; skip comments
(and (= (following-char) ?/)
(ebnf-ebx-skip-comment))))
(not eor-p)))
;; replace the range "\177-\237" (see `ebnf-range-regexp').
(defconst ebnf-ebx-comment-chars
(ebnf-range-regexp "^\000-\010\016-\037\\*" ?\177 ?\237))
(defconst ebnf-ebx-filename-chars
(ebnf-range-regexp "^\000-\037\\*" ?\177 ?\237))
(defun ebnf-ebx-skip-comment ()
(forward-char)
(or (= (following-char) ?*)
(error "Invalid beginning of comment"))
(forward-char)
(cond
;; open EPS file
((and ebnf-eps-executing (= (following-char) ?\[))
(ebnf-eps-add-context (ebnf-ebx-eps-filename)))
;; close EPS file
((and ebnf-eps-executing (= (following-char) ?\]))
(ebnf-eps-remove-context (ebnf-ebx-eps-filename)))
;; any other action in comment
(t
(setq ebnf-action (aref ebnf-comment-table (following-char))))
)
(while (progn
(skip-chars-forward ebnf-ebx-comment-chars ebnf-limit)
(or (= (following-char) ?*)
(error "Missing end of comment"))
(forward-char)
(and (/= (following-char) ?/)
(< (point) ebnf-limit))))
;; check for a valid end of comment
(and (>= (point) ebnf-limit)
(error "Missing end of comment"))
(forward-char)
t)
(defun ebnf-ebx-eps-filename ()
(forward-char)
(let (fname nchar)
(while (progn
(setq fname
(concat fname
(ebnf-buffer-substring ebnf-ebx-filename-chars)))
(and (< (point) ebnf-limit)
(> (setq nchar (skip-chars-forward "*" ebnf-limit)) 0)
(< (point) ebnf-limit)
(/= (following-char) ?/)))
(setq fname (concat fname (make-string nchar ?*))
nchar nil))
(if (or (not nchar) (= nchar 0))
fname
(and (< (point) ebnf-limit)
(= (following-char) ?/)
(setq nchar (1- nchar)))
(concat fname (make-string nchar ?*)))))
;; replace the range "\240-\377" (see `ebnf-range-regexp').
(defconst ebnf-ebx-double-string-chars
(ebnf-range-regexp "\t -!#-~" ?\240 ?\377))
(defconst ebnf-ebx-single-string-chars
(ebnf-range-regexp "\t -&(-~" ?\240 ?\377))
(defun ebnf-ebx-string (delim)
(buffer-substring-no-properties
(progn
(forward-char)
(point))
(progn
(skip-chars-forward (if (= delim ?\")
ebnf-ebx-double-string-chars
ebnf-ebx-single-string-chars)
ebnf-limit)
(or (= (following-char) delim)
(error "Missing string delimiter `%c'" delim))
(prog1
(point)
(forward-char)))))
(defun ebnf-ebx-character ()
;; #x [0-9A-Fa-f]+
(buffer-substring-no-properties
(point)
(progn
(ebnf-ebx-hex-character)
(point))))
(defun ebnf-ebx-range ()
;; [ ^? ( char ( - char )? )+ ]
(buffer-substring-no-properties
(point)
(progn
(forward-char)
(and (= (following-char) ?^)
(forward-char))
(and (= (following-char) ?-)
(forward-char))
(while (progn
(ebnf-ebx-any-character)
(when (= (following-char) ?-)
(forward-char)
(ebnf-ebx-any-character))
(and (/= (following-char) ?\])
(< (point) ebnf-limit))))
(and (>= (point) ebnf-limit)
(error "Missing end of character range `]'"))
(forward-char)
(point))))
(defun ebnf-ebx-any-character ()
(let ((char (following-char)))
(cond ((= char ?#)
(ebnf-ebx-hex-character t))
((or (and (<= ?\ char) (<= char ?\")) ; #
(and (<= ?$ char) (<= char ?,)) ; -
(and (<= ?. char) (<= char ?\\)) ; ]
(and (<= ?^ char) (<= char ?~))
(and (<= ?\240 char) (<= char ?\377)))
(forward-char))
(t
(error "Invalid character `%c'" char)))))
(defun ebnf-ebx-hex-character (&optional no-error)
;; #x [0-9A-Fa-f]+
(forward-char)
(if (/= (following-char) ?x)
(or no-error
(error "Invalid hexadecimal character"))
(forward-char)
(or (> (skip-chars-forward "0-9A-Fa-f" ebnf-limit) 0)
(error "Invalid hexadecimal character"))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(provide 'ebnf-ebx)
;;; arch-tag: bfe2f95b-66bc-4dc6-8b7e-b7831e68f5fb
;;; ebnf-ebx.el ends here