(defgroup mail-extr nil
"Extract full name and address from RFC 822 mail header."
:prefix "mail-extr-"
:group 'mail)
(defcustom mail-extr-guess-middle-initial nil
"*Whether to try to guess middle initial from mail address.
If true, then when we see an address like \"John Smith <jqs@host.com>\"
we will assume that \"John Q. Smith\" is the fellow's name."
:type 'boolean
:group 'mail-extr)
(defcustom mail-extr-ignore-single-names t
"*Whether to ignore a name that is just a single word.
If true, then when we see an address like \"Idiot <dumb@stupid.com>\"
we will act as though we couldn't find a full name in the address."
:type 'boolean
:group 'mail-extr)
(defcustom mail-extr-full-name-prefixes
(purecopy
"\\(Prof\\|D[Rr]\\|Mrs?\\|Rev\\|Rabbi\\|SysOp\\|LCDR\\)\\.?[ \t\n]")
"*Matches prefixes to the full name that identify a person's position.
These are stripped from the full name because they do not contribute to
uniquely identifying the person."
:type 'regexp
:group 'mail-extr)
(defcustom mail-extr-@-binds-tighter-than-! nil
"*Whether the local mail transport agent looks at ! before @."
:type 'boolean
:group 'mail-extr)
(defcustom mail-extr-mangle-uucp nil
"*Whether to throw away information in UUCP addresses
by translating things like \"foo!bar!baz@host\" into \"baz@bar.UUCP\"."
:type 'boolean
:group 'mail-extr)
(defconst mail-extr-all-letters-but-separators
(purecopy "][A-Za-z{|}'~0-9`\240-\377"))
(defconst mail-extr-all-letters
(purecopy (concat mail-extr-all-letters-but-separators "---")))
(defconst mail-extr-first-letters (purecopy "A-Za-z\240-\377"))
(defconst mail-extr-last-letters (purecopy "A-Za-z\240-\377`'."))
(defconst mail-extr-leading-garbage
(purecopy (format "[^%s]+" mail-extr-first-letters)))
(defconst mail-extr-bad-dot-pattern
(purecopy
(format "\\([%s][%s]\\)\\.+\\([%s]\\)"
mail-extr-all-letters
mail-extr-last-letters
mail-extr-first-letters)))
(defconst mail-extr-full-name-suffix-pattern
(purecopy
(format
"\\(,? ?\\([JjSs][Rr]\\.?\\|V?I+V?\\)\\)\\([^%s]\\([^%s]\\|\\'\\)\\|\\'\\)"
mail-extr-all-letters mail-extr-all-letters)))
(defconst mail-extr-roman-numeral-pattern (purecopy "V?I+V?\\b"))
(defconst mail-extr-weird-acronym-pattern
(purecopy "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)"))
(defconst mail-extr-alternative-address-pattern
(purecopy "\\(aka *\\)?[a-zA-Z.]+[!@][a-zA-Z.]"))
(defconst mail-extr-trailing-comment-start-pattern
(purecopy " [-{]\\|--\\|[+@#></\;]"))
(defconst mail-extr-name-pattern
(purecopy (format "\\b[%s][%s]*[%s]"
mail-extr-first-letters
mail-extr-all-letters
mail-extr-last-letters)))
(defconst mail-extr-initial-pattern
(purecopy (format "\\b[%s]\\([. ]\\|\\b\\)" mail-extr-first-letters)))
(defconst mail-extr-telephone-extension-pattern
(purecopy
"\\(\\([Ee]xt\\|\\|[Tt]ph\\|[Tt]el\\|[Xx]\\).?\\)? *\\+?[0-9][- 0-9]+"))
(defconst mail-extr-ham-call-sign-pattern
(purecopy "\\b\\(DX[0-9]+\\|[AKNW][A-Z]?[0-9][A-Z][A-Z]?[A-Z]?\\)"))
(defconst mail-extr-normal-name-pattern
(purecopy (format "\\b[%s][%s]+[%s]"
mail-extr-first-letters
mail-extr-all-letters-but-separators
mail-extr-last-letters)))
(defconst mail-extr-two-name-pattern
(purecopy
(concat "\\`\\(" mail-extr-normal-name-pattern
"\\|" mail-extr-initial-pattern
"\\) +\\(" mail-extr-name-pattern "\\)\\(,\\|\\'\\)")))
(defconst mail-extr-listserv-list-name-pattern
(purecopy "Multiple recipients of list \\([-A-Z]+\\)"))
(defconst mail-extr-stupid-vms-date-stamp-pattern
(purecopy
"[0-9][0-9]-[JFMASOND][aepuco][nbrylgptvc]-[0-9][0-9][0-9][0-9] [0-9]+ *"))
(defconst mail-extr-hz-embedded-gb-encoded-chinese-pattern
(purecopy "~{\\([^~].\\|~[^\}]\\)+~}"))
(defconst mail-extr-x400-encoded-address-pattern
(purecopy "[a-z]?[a-z]?\\(/[A-Za-z]+\\(\\.[A-Za-z]+\\)?=[^/]+\\)+/?\\'"))
(defconst mail-extr-x400-encoded-address-field-pattern-format
(purecopy "/%s=\\([^/]+\\)\\(/\\|\\'\\)"))
(defconst mail-extr-x400-encoded-address-surname-pattern
(purecopy
(format mail-extr-x400-encoded-address-field-pattern-format "[Ss]")))
(defconst mail-extr-x400-encoded-address-given-name-pattern
(purecopy
(format mail-extr-x400-encoded-address-field-pattern-format "[Gg]")))
(defconst mail-extr-x400-encoded-address-full-name-pattern
(purecopy
(format mail-extr-x400-encoded-address-field-pattern-format "[Pp][Nn]")))
(defconst mail-extr-address-syntax-table (make-syntax-table))
(defconst mail-extr-address-comment-syntax-table (make-syntax-table))
(defconst mail-extr-address-domain-literal-syntax-table (make-syntax-table))
(defconst mail-extr-address-text-comment-syntax-table (make-syntax-table))
(defconst mail-extr-address-text-syntax-table (make-syntax-table))
(mapcar
(function
(lambda (pair)
(let ((syntax-table (symbol-value (car pair))))
(mapcar
(function
(lambda (item)
(if (eq 2 (length item))
(modify-syntax-entry (car item) (car (cdr item)) syntax-table)
(let ((char (nth 0 item))
(bound (nth 1 item))
(syntax (nth 2 item)))
(while (<= char bound)
(modify-syntax-entry char syntax syntax-table)
(setq char (1+ char)))))))
(cdr pair)))))
'((mail-extr-address-syntax-table
(?\000 ?\037 "w") (?\040 " ") (?! ?~ "w") (?\177 "w") (?\200 ?\377 "w") (?\240 " ") (?\t " ")
(?\r " ")
(?\n " ")
(?\( ".")
(?\) ".")
(?< ".")
(?> ".")
(?@ ".")
(?, ".")
(?\ (?: ".")
(?\\ "\\")
(?\" "\"")
(?. ".")
(?\[ ".")
(?\] ".")
(?% ".")
(?! ".") )
(mail-extr-address-comment-syntax-table
(?\000 ?\377 "w")
(?\040 " ")
(?\240 " ")
(?\t " ")
(?\r " ")
(?\n " ")
(?\( "\(\)")
(?\) "\)\(")
(?\\ "\\"))
(mail-extr-address-domain-literal-syntax-table
(?\000 ?\377 "w")
(?\040 " ")
(?\240 " ")
(?\t " ")
(?\r " ")
(?\n " ")
(?\[ "\(\]") (?\] "\)\[") (?\\ "\\"))
(mail-extr-address-text-comment-syntax-table
(?\000 ?\377 "w")
(?\040 " ")
(?\240 " ")
(?\t " ")
(?\r " ")
(?\n " ")
(?\( "\(\)")
(?\) "\)\(")
(?\[ "\(\]")
(?\] "\)\[")
(?\{ "\(\}")
(?\} "\)\{")
(?\\ "\\")
(?\" "\"")
)
(mail-extr-address-text-syntax-table
(?\000 ?\177 ".")
(?\200 ?\377 "w")
(?\040 " ")
(?\t " ")
(?\r " ")
(?\n " ")
(?A ?Z "w")
(?a ?z "w")
(?- "w")
(?\} "w")
(?\{ "w")
(?| "w")
(?\' "w")
(?~ "w")
(?0 ?9 "w"))
))
(defmacro mail-extr-delete-char (n)
(list 'delete-region '(point) (list '+ '(point) n)))
(defmacro mail-extr-skip-whitespace-forward ()
'(skip-chars-forward " \t\n\r\240"))
(defmacro mail-extr-skip-whitespace-backward ()
'(skip-chars-backward " \t\n\r\240"))
(defmacro mail-extr-undo-backslash-quoting (beg end)
(`(save-excursion
(save-restriction
(narrow-to-region (, beg) (, end))
(goto-char (point-min))
(while (search-forward "\\" nil t)
(mail-extr-delete-char -1)
(or (eobp)
(forward-char 1))
)))))
(defmacro mail-extr-nuke-char-at (pos)
(` (save-excursion
(goto-char (, pos))
(mail-extr-delete-char 1)
(insert ?\ ))))
(put 'mail-extr-nuke-outside-range
'edebug-form-spec '(symbolp &optional form form atom))
(defmacro mail-extr-nuke-outside-range (list-symbol
beg-symbol end-symbol
&optional no-replace)
(or (memq no-replace '(t nil))
(error "no-replace must be t or nil, evaluable at macroexpand-time"))
(` (let ((temp (, list-symbol))
ch)
(while temp
(setq ch (car temp))
(cond ((or (> ch (, end-symbol))
(< ch (, beg-symbol)))
(,@ (if no-replace
nil
(` ((mail-extr-nuke-char-at ch)))))
(setcar temp nil)))
(setq temp (cdr temp)))
(setq (, list-symbol) (delq nil (, list-symbol))))))
(defun mail-extr-demarkerize (marker)
(if (markerp marker)
(let ((temp (marker-position marker)))
(set-marker marker nil)
temp)
marker))
(defun mail-extr-markerize (pos)
(if (or (markerp pos) (null pos))
pos
(copy-marker pos)))
(defmacro mail-extr-last (list)
(` (let ((list (, list)))
(while (not (null (cdr list)))
(setq list (cdr list)))
(car list))))
(defmacro mail-extr-safe-move-sexp (arg)
(` (condition-case error
(progn
(goto-char (or (scan-sexps (point) (, arg)) (point)))
t)
(error
(if (string-equal (nth 1 error) "Unbalanced parentheses")
nil
(while t
(signal (car error) (cdr error))))))))
(defvar disable-initial-guessing-flag) (defvar cbeg) (defvar cend)
(defun mail-extract-address-components (address &optional all)
"Given an RFC-822 address ADDRESS, extract full name and canonical address.
Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
If no name can be extracted, FULL-NAME will be nil.
If the optional argument ALL is non-nil, then ADDRESS can contain zero
or more recipients, separated by commas, and we return a list of
the form ((FULL-NAME CANONICAL-ADDRESS) ...) with one element for
each recipient. If ALL is nil, then if ADDRESS contains more than
one recipients, all but the first is ignored.
ADDRESS may be a string or a buffer. If it is a buffer, the visible
(narrowed) portion of the buffer will be interpreted as the address.
(This feature exists so that the clever caller might be able to avoid
consing a string.)"
(let ((canonicalization-buffer (get-buffer-create " *canonical address*"))
(extraction-buffer (get-buffer-create " *extract address components*"))
value-list)
(save-excursion
(set-buffer extraction-buffer)
(fundamental-mode)
(buffer-disable-undo extraction-buffer)
(set-syntax-table mail-extr-address-syntax-table)
(widen)
(erase-buffer)
(setq case-fold-search nil)
(insert ?\ )
(cond ((stringp address)
(insert address))
((bufferp address)
(insert-buffer-substring address))
(t
(error "Invalid address: %s" address)))
(set-text-properties (point-min) (point-max) nil)
(save-excursion
(set-buffer canonicalization-buffer)
(fundamental-mode)
(buffer-disable-undo canonicalization-buffer)
(set-syntax-table mail-extr-address-syntax-table)
(setq case-fold-search nil))
(goto-char (point-min))
(while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
(replace-match "\\1 " t))
(while (and (or all (null value-list))
(progn (goto-char (point-min))
(skip-chars-forward " \t")
(not (eobp))))
(let (char
end-of-address
<-pos >-pos @-pos colon-pos comma-pos !-pos %-pos \ group-:-pos group-\ record-pos-symbol
first-real-pos last-real-pos
phrase-beg phrase-end
cbeg cend quote-beg quote-end
atom-beg atom-end
mbox-beg mbox-end
\.-ends-name
temp
fi mi li saved-%-pos saved-!-pos saved-@-pos
domain-pos \.-pos insert-point
disable-initial-guessing-flag)
(goto-char (point-min))
(or (eq (following-char) ?\ )
(insert ?\ ))
(while (progn
(mail-extr-skip-whitespace-forward)
(not (eobp)))
(setq char (char-after (point)))
(or first-real-pos
(if (not (eq char ?\())
(setq first-real-pos (point))))
(cond
((eq char ?\()
(set-syntax-table mail-extr-address-comment-syntax-table)
(if (and (not cbeg)
(save-excursion
(forward-char 1)
(mail-extr-skip-whitespace-forward)
(not (eq ?\) (char-after (point))))))
(setq cbeg (point)))
(or (mail-extr-safe-move-sexp 1)
(forward-char 1))
(set-syntax-table mail-extr-address-syntax-table)
(if (and cbeg
(not cend))
(setq cend (point))))
((eq char ?\")
;; only record the first non-empty quote's position
(if (and (not quote-beg)
(save-excursion
(forward-char 1)
(mail-extr-skip-whitespace-forward)
(not (eq ?\" (char-after (point))))))
(setq quote-beg (point)))
;; TODO: don't record if unbalanced
(or (mail-extr-safe-move-sexp 1)
(forward-char 1))
(if (and quote-beg
(not quote-end))
(setq quote-end (point))))
;; domain literals
((eq char ?\[)
(set-syntax-table mail-extr-address-domain-literal-syntax-table)
(or (mail-extr-safe-move-sexp 1)
(forward-char 1))
(set-syntax-table mail-extr-address-syntax-table))
;; commas delimit addresses when outside < > pairs.
((and (eq char ?,)
(or (and (null <-pos)
;; Handle ROUTE-ADDR address that is missing its <.
(not (eq ?@ (char-after (1+ (point))))))
(and >-pos
;; handle weird munged addresses
;; BUG FIX: This test was reversed. Thanks to the
;; brilliant Rod Whitby <rwhitby@research.canon.oz.au>
;; for discovering this!
(< (mail-extr-last <-pos) (car >-pos)))))
;; The argument contains more than one address.
;; Temporarily hide everything after this one.
(setq end-of-address (copy-marker (1+ (point))))
(narrow-to-region (point-min) (1+ (point)))
(mail-extr-delete-char 1)
(setq char ?\() ; HAVE I NO SHAME??
)
;; record the position of various interesting chars, determine
;; legality later.
((setq record-pos-symbol
(cdr (assq char
'((?< . <-pos) (?> . >-pos) (?@ . @-pos)
(?: . colon-pos) (?, . comma-pos) (?! . !-pos)
(?% . %-pos) (?\; . \;-pos)))))
(set record-pos-symbol
(cons (point) (symbol-value record-pos-symbol)))
(forward-char 1))
((eq char ?.)
(forward-char 1))
((memq char '(
;; comment terminator illegal
?\)
;; domain literal terminator illegal
?\]
;; \ allowed only within quoted strings,
;; domain literals, and comments
?\\
))
(mail-extr-nuke-char-at (point))
(forward-char 1))
(t
(forward-word 1)))
(or (eq char ?\()
;; At the end of first address of a multiple address header.
(and (eq char ?,)
(eobp))
(setq last-real-pos (point))))
;; Use only the leftmost <, if any. Replace all others with spaces.
(while (cdr <-pos)
(mail-extr-nuke-char-at (car <-pos))
(setq <-pos (cdr <-pos)))
;; Use only the rightmost >, if any. Replace all others with spaces.
(while (cdr >-pos)
(mail-extr-nuke-char-at (nth 1 >-pos))
(setcdr >-pos (nthcdr 2 >-pos)))
;; If multiple @s and a :, but no < and >, insert around buffer.
;; Example: @foo.bar.dom,@xxx.yyy.zzz:mailbox@aaa.bbb.ccc
;; This commonly happens on the UUCP "From " line. Ugh.
(cond ((and (> (length @-pos) 1)
(eq 1 (length colon-pos)) ;TODO: check if between last two @s
(not \;-pos)
(not <-pos))
(goto-char (point-min))
(mail-extr-delete-char 1)
(setq <-pos (list (point)))
(insert ?<)))
;; If < but no >, insert > in rightmost possible position
(cond ((and <-pos
(null >-pos))
(goto-char (point-max))
(setq >-pos (list (point)))
(insert ?>)))
;; If > but no <, replace > with space.
(cond ((and >-pos
(null <-pos))
(mail-extr-nuke-char-at (car >-pos))
(setq >-pos nil)))
;; Turn >-pos and <-pos into non-lists
(setq >-pos (car >-pos)
<-pos (car <-pos))
;; Trim other punctuation lists of items outside < > pair to handle
;; stupid MTAs.
(cond (<-pos ; don't need to check >-pos also
;; handle bozo software that violates RFC 822 by sticking
;; punctuation marks outside of a < > pair
(mail-extr-nuke-outside-range @-pos <-pos >-pos t)
;; RFC 822 says nothing about these two outside < >, but
;; remove those positions from the lists to make things
;; easier.
(mail-extr-nuke-outside-range !-pos <-pos >-pos t)
(mail-extr-nuke-outside-range %-pos <-pos >-pos t)))
;; Check for : that indicates GROUP list and for : part of
;; ROUTE-ADDR spec.
;; Can't possibly be more than two :. Nuke any extra.
(while colon-pos
(setq temp (car colon-pos)
colon-pos (cdr colon-pos))
(cond ((and <-pos >-pos
(> temp <-pos)
(< temp >-pos))
(if (or route-addr-:-pos
(< (length @-pos) 2)
(> temp (car @-pos))
(< temp (nth 1 @-pos)))
(mail-extr-nuke-char-at temp)
(setq route-addr-:-pos temp)))
((or (not <-pos)
(and <-pos
(< temp <-pos)))
(setq group-:-pos temp))))
;; Nuke any ; that is in or to the left of a < > pair or to the left
;; of a GROUP starting :. Also, there may only be one ;.
(while \;-pos
(setq temp (car \;-pos)
\;-pos (cdr \;-pos))
(cond ((and <-pos >-pos
(> temp <-pos)
(< temp >-pos))
(mail-extr-nuke-char-at temp))
((and (or (not group-:-pos)
(> temp group-:-pos))
(not group-\;-pos))
(setq group-\;-pos temp))))
;; Nuke unmatched GROUP syntax characters.
(cond ((and group-:-pos (not group-\;-pos))
;; *** Do I really need to erase it?
(mail-extr-nuke-char-at group-:-pos)
(setq group-:-pos nil)))
(cond ((and group-\;-pos (not group-:-pos))
;; *** Do I really need to erase it?
(mail-extr-nuke-char-at group-\;-pos)
(setq group-\;-pos nil)))
;; Handle junk like " (cond
(group-\ (mail-extr-nuke-outside-range !-pos group-:-pos group-\ (mail-extr-nuke-outside-range @-pos group-:-pos group-\ (mail-extr-nuke-outside-range %-pos group-:-pos group-\ (mail-extr-nuke-outside-range comma-pos group-:-pos group-\ (and last-real-pos
(> last-real-pos (1+ group-\ (setq last-real-pos (1+ group-\ (and cend
(> cend group-\ (setq cend nil
cbeg nil))
(and quote-end
(> quote-end group-\ (setq quote-end nil
quote-beg nil))
))
(mail-extr-nuke-outside-range comma-pos 1 1)
(cond (<-pos
(goto-char <-pos)
(mail-extr-skip-whitespace-backward)
(setq phrase-end (point))
(goto-char (or (point-min)))
(mail-extr-skip-whitespace-forward)
(if (< (point) phrase-end)
(setq phrase-beg (point))
(setq phrase-end nil))))
(cond (route-addr-:-pos
(setq !-pos nil
%-pos nil
>-pos (copy-marker >-pos)
route-addr-:-pos (copy-marker route-addr-:-pos))
(goto-char >-pos)
(insert-before-markers ?X)
(goto-char (car @-pos))
(while (setq @-pos (cdr @-pos))
(mail-extr-delete-char 1)
(setq %-pos (cons (point-marker) %-pos))
(insert "%")
(goto-char (1- >-pos))
(save-excursion
(insert-buffer-substring extraction-buffer
(car @-pos) route-addr-:-pos)
(delete-region (car @-pos) route-addr-:-pos))
(or (cdr @-pos)
(setq saved-@-pos (list (point)))))
(setq @-pos saved-@-pos)
(goto-char >-pos)
(mail-extr-delete-char -1)
(mail-extr-nuke-char-at route-addr-:-pos)
(mail-extr-demarkerize route-addr-:-pos)
(setq route-addr-:-pos nil
>-pos (mail-extr-demarkerize >-pos)
%-pos (mapcar 'mail-extr-demarkerize %-pos))))
(setq @-pos (car @-pos))
(save-excursion
(set-buffer canonicalization-buffer)
(widen)
(erase-buffer)
(insert-buffer-substring extraction-buffer)
(if <-pos
(narrow-to-region (progn
(goto-char (1+ <-pos))
(mail-extr-skip-whitespace-forward)
(point))
>-pos)
(if (and first-real-pos last-real-pos)
(narrow-to-region first-real-pos last-real-pos)
(narrow-to-region (point-max) (point-max))
))
(and @-pos %-pos
(mail-extr-nuke-outside-range %-pos (point-min) @-pos))
(and %-pos !-pos
(mail-extr-nuke-outside-range !-pos (point-min) (car %-pos)))
(and @-pos !-pos (not %-pos)
(mail-extr-nuke-outside-range !-pos (point-min) @-pos))
(cond ((and %-pos
(not @-pos))
(goto-char (car %-pos))
(mail-extr-delete-char 1)
(setq @-pos (point))
(insert "@")
(setq %-pos (cdr %-pos))))
(if mail-extr-mangle-uucp
(cond (!-pos
(save-restriction
(cond ((and @-pos
mail-extr-@-binds-tighter-than-!)
(goto-char @-pos)
(setq %-pos (cons (point) %-pos)
@-pos nil)
(mail-extr-delete-char 1)
(insert "%")
(setq insert-point (point-max)))
(mail-extr-@-binds-tighter-than-!
(setq insert-point (point-max)))
(%-pos
(setq insert-point (mail-extr-last %-pos)
saved-%-pos (mapcar 'mail-extr-markerize %-pos)
%-pos nil
@-pos (mail-extr-markerize @-pos)))
(@-pos
(setq insert-point @-pos)
(setq @-pos (mail-extr-markerize @-pos)))
(t
(setq insert-point (point-max))))
(narrow-to-region (point-min) insert-point)
(setq saved-!-pos (car !-pos))
(while !-pos
(goto-char (point-max))
(cond ((and (not @-pos)
(not (cdr !-pos)))
(setq @-pos (point))
(insert-before-markers "@ "))
(t
(setq %-pos (cons (point) %-pos))
(insert-before-markers "% ")))
(backward-char 1)
(insert-buffer-substring
(current-buffer)
(if (nth 1 !-pos)
(1+ (nth 1 !-pos))
(point-min))
(car !-pos))
(mail-extr-delete-char 1)
(or (save-excursion
(mail-extr-safe-move-sexp -1)
(mail-extr-skip-whitespace-backward)
(eq ?. (preceding-char)))
(insert-before-markers
(if (save-excursion
(mail-extr-skip-whitespace-backward)
(eq ?. (preceding-char)))
""
".")
"uucp"))
(setq !-pos (cdr !-pos))))
(and saved-%-pos
(setq %-pos (append (mapcar 'mail-extr-demarkerize
saved-%-pos)
%-pos)))
(setq @-pos (mail-extr-demarkerize @-pos))
(narrow-to-region (1+ saved-!-pos) (point-max)))))
(cond ((and %-pos
(not @-pos))
(goto-char (car %-pos))
(mail-extr-delete-char 1)
(setq @-pos (point))
(insert "@")
(setq %-pos (cdr %-pos))))
(setq %-pos (nreverse %-pos))
(cond (%-pos (setq temp %-pos)
(catch 'truncated
(while temp
(goto-char (or (nth 1 temp)
@-pos))
(mail-extr-skip-whitespace-backward)
(save-excursion
(mail-extr-safe-move-sexp -1)
(setq domain-pos (point))
(mail-extr-skip-whitespace-backward)
(setq \.-pos (eq ?. (preceding-char))))
(cond ((and \.-pos
(let ((s (intern-soft
(buffer-substring domain-pos (point))
mail-extr-all-top-level-domains)))
(and s (get s 'domain-name))))
(narrow-to-region (point-min) (point))
(goto-char (car temp))
(mail-extr-delete-char 1)
(setq @-pos (point))
(setcdr temp nil)
(setq %-pos (delq @-pos %-pos))
(insert "@")
(throw 'truncated t)))
(setq temp (cdr temp))))))
(setq mbox-beg (point-min)
mbox-end (if %-pos (car %-pos)
(or @-pos
(point-max)))))
(cond (
(and phrase-beg
(eq quote-beg phrase-beg)
(<= quote-end phrase-end))
(narrow-to-region (1+ quote-beg) (1- quote-end))
(mail-extr-undo-backslash-quoting (point-min) (point-max)))
(phrase-beg
(narrow-to-region phrase-beg phrase-end))
(cbeg
(narrow-to-region (1+ cbeg) (1- cend))
(mail-extr-undo-backslash-quoting (point-min) (point-max))
(goto-char (point-min))
)
(t
(goto-char (point-max))
(narrow-to-region (point) (point))
(insert-buffer-substring canonicalization-buffer
mbox-beg mbox-end)
(goto-char (point-min))
(setq \.-ends-name (re-search-forward "[_0-9]" nil t))
(goto-char (point-min))
(if (not mail-extr-mangle-uucp)
(modify-syntax-entry ?! "w" (syntax-table)))
(while (progn
(mail-extr-skip-whitespace-forward)
(not (eobp)))
(setq char (char-after (point)))
(cond
((eq char ?\")
(setq quote-beg (point))
(or (mail-extr-safe-move-sexp 1)
;; TODO: handle this error condition!!!!!
(forward-char 1))
;; take into account deletions
(setq quote-end (- (point) 2))
(save-excursion
(backward-char 1)
(mail-extr-delete-char 1)
(goto-char quote-beg)
(or (eobp)
(mail-extr-delete-char 1)))
(mail-extr-undo-backslash-quoting quote-beg quote-end)
(or (eq ?\ (char-after (point)))
(insert " "))
;; (setq mailbox-name-processed-flag t)
(setq \.-ends-name t))
((eq char ?.)
(if (memq (char-after (1+ (point))) '(?_ ?=))
(progn
(forward-char 1)
(mail-extr-delete-char 1)
(insert ?\ ))
(if \.-ends-name
(narrow-to-region (point-min) (point))
(mail-extr-delete-char 1)
(insert " ")))
;; (setq mailbox-name-processed-flag t)
)
((memq (char-syntax char) '(?. ?\\))
(mail-extr-delete-char 1)
(insert " ")
;; (setq mailbox-name-processed-flag t)
)
(t
(setq atom-beg (point))
(forward-word 1)
(setq atom-end (point))
(goto-char atom-beg)
(save-restriction
(narrow-to-region atom-beg atom-end)
(cond
;; Handle X.400 addresses encoded in RFC-822.
;; *** Shit! This has to handle the case where it is
;; *** embedded in a quote too!
;; *** Shit! The input is being broken up into atoms
;; *** by periods!
((looking-at mail-extr-x400-encoded-address-pattern)
;; Copy the contents of the individual fields that
;; might hold name data to the beginning.
(mapcar
(function
(lambda (field-pattern)
(cond
((save-excursion
(re-search-forward field-pattern nil t))
(insert-buffer-substring (current-buffer)
(match-beginning 1)
(match-end 1))
(insert " ")))))
(list mail-extr-x400-encoded-address-given-name-pattern
mail-extr-x400-encoded-address-surname-pattern
mail-extr-x400-encoded-address-full-name-pattern))
;; Discard the rest, since it contains stuff like
;; routing information, not part of a name.
(mail-extr-skip-whitespace-backward)
(delete-region (point) (point-max))
;; Handle periods used for spacing.
(while (re-search-forward mail-extr-bad-dot-pattern nil t)
(replace-match "\\1 \\2" t))
;; (setq mailbox-name-processed-flag t)
)
;; Handle normal addresses.
(t
(goto-char (point-min))
;; Handle _ and = used for spacing.
(while (re-search-forward "\\([^_=]+\\)[_=]" nil t)
(replace-match "\\1 " t)
;; (setq mailbox-name-processed-flag t)
)
(goto-char (point-max))))))))
;; undo the dirty deed
(if (not mail-extr-mangle-uucp)
(modify-syntax-entry ?! "." (syntax-table)))
;;
;; If we derived the name from the mailbox part of the address,
;; and we only got one word out of it, don't treat that as a
;; name. "foo@bar" --> (nil "foo@bar"), not ("foo" "foo@bar")
;; (if (not mailbox-name-processed-flag)
;; (delete-region (point-min) (point-max)))
))
(set-syntax-table mail-extr-address-text-syntax-table)
(mail-extr-voodoo mbox-beg mbox-end canonicalization-buffer)
(goto-char (point-min))
;; If name is "First Last" and userid is "F?L", then assume
;; the middle initial is the second letter in the userid.
;; Initial code by Jamie Zawinski <jwz@lucid.com>
;; *** Make it work when there's a suffix as well.
(goto-char (point-min))
(cond ((and mail-extr-guess-middle-initial
(not disable-initial-guessing-flag)
(eq 3 (- mbox-end mbox-beg))
(progn
(goto-char (point-min))
(looking-at mail-extr-two-name-pattern)))
(setq fi (char-after (match-beginning 0))
li (char-after (match-beginning 3)))
(save-excursion
(set-buffer canonicalization-buffer)
;; char-equal is ignoring case here, so no need to upcase
;; or downcase.
(let ((case-fold-search t))
(and (char-equal fi (char-after mbox-beg))
(char-equal li (char-after (1- mbox-end)))
(setq mi (char-after (1+ mbox-beg))))))
(cond ((and mi
;; TODO: use better table than syntax table
(eq ?w (char-syntax mi)))
(goto-char (match-beginning 3))
(insert (upcase mi) ". ")))))
;; Nuke name if it is the same as mailbox name.
(let ((buffer-length (- (point-max) (point-min)))
(i 0)
(names-match-flag t))
(cond ((and (> buffer-length 0)
(eq buffer-length (- mbox-end mbox-beg)))
(goto-char (point-max))
(insert-buffer-substring canonicalization-buffer
mbox-beg mbox-end)
(while (and names-match-flag
(< i buffer-length))
(or (eq (downcase (char-after (+ i (point-min))))
(downcase
(char-after (+ i buffer-length (point-min)))))
(setq names-match-flag nil))
(setq i (1+ i)))
(delete-region (+ (point-min) buffer-length) (point-max))
(if names-match-flag
(narrow-to-region (point) (point))))))
;; Nuke name if it's just one word.
(goto-char (point-min))
(and mail-extr-ignore-single-names
(not (re-search-forward "[- ]" nil t))
(narrow-to-region (point) (point)))
;; Record the result
(setq value-list
(cons (list (if (not (= (point-min) (point-max)))
(buffer-string))
(save-excursion
(set-buffer canonicalization-buffer)
(if (not (= (point-min) (point-max)))
(buffer-string))))
value-list))
;; Unless one address is all we wanted,
;; delete this one from extraction-buffer
;; and get ready to extract the next address.
(when all
(if end-of-address
(narrow-to-region 1 end-of-address)
(widen))
(delete-region (point-min) (point-max))
(widen))
)))
(if all (nreverse value-list) (car value-list))
))
(defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer)
(let ((word-count 0)
(case-fold-search nil)
mixed-case-flag lower-case-flag ;;upper-case-flag
suffix-flag last-name-comma-flag
;;cbeg cend
initial
begin-again-flag
drop-this-word-if-trailing-flag
drop-last-word-if-trailing-flag
word-found-flag
this-word-beg last-word-beg
name-beg name-end
name-done-flag
)
(save-excursion
(set-syntax-table mail-extr-address-text-syntax-table)
;; Get rid of comments.
(goto-char (point-min))
(while (not (eobp))
;; Initialize for this iteration of the loop.
(skip-chars-forward "^({[\"'`")
(let ((cbeg (point)))
(set-syntax-table mail-extr-address-text-comment-syntax-table)
(cond ((memq (following-char) '(?\' ?\`))
(search-forward "'" nil 'move
(if (eq ?\' (following-char)) 2 1)))
(t
(or (mail-extr-safe-move-sexp 1)
(goto-char (point-max)))))
(set-syntax-table mail-extr-address-text-syntax-table)
(when (eq (char-after cbeg) ?\()
(delete-region cbeg (point))
(skip-chars-backward " \t")
(if (looking-at "\\([ \t]+$\\|[ \t]+,\\)")
(replace-match "")
(setq cbeg (point))
(skip-chars-forward " \t")
(if (bobp)
(delete-region (point) cbeg)
(just-one-space))))))
(cond ((not (search-forward " " nil t))
(goto-char (point-min))
(cond ((search-forward "_" nil t)
(goto-char (point-min))
(while (search-forward "_" nil t)
(replace-match " " t)))
((search-forward "." nil t)
(goto-char (point-min))
(while (re-search-forward mail-extr-bad-dot-pattern nil t)
(replace-match "\\1 \\2" t))))))
(goto-char (point-min))
(while (not name-done-flag)
(cond (word-found-flag
(setq last-word-beg this-word-beg)
(setq drop-last-word-if-trailing-flag
drop-this-word-if-trailing-flag)
(setq word-found-flag nil)))
(cond (begin-again-flag
(setq word-count 0)
(setq last-word-beg nil)
(setq drop-last-word-if-trailing-flag nil)
(setq mixed-case-flag nil)
(setq lower-case-flag nil)
(setq begin-again-flag nil)
))
(mail-extr-skip-whitespace-forward)
(if (eq word-count 0) (narrow-to-region (point) (point-max)))
(setq this-word-beg (point))
(setq drop-this-word-if-trailing-flag nil)
(cond
((and (eq word-count 0)
(looking-at mail-extr-full-name-prefixes))
(goto-char (match-end 0))
(narrow-to-region (point) (point-max)))
((and (>= word-count 2)
(looking-at mail-extr-full-name-suffix-pattern))
(mail-extr-skip-whitespace-backward)
(setq suffix-flag (point))
(if (eq ?, (following-char))
(forward-char 1)
(insert ?,))
(or (eq ?\ (following-char))
(insert ?\ ))
(mail-extr-skip-whitespace-forward)
(cond ((memq (following-char) '(?j ?J ?s ?S))
(capitalize-word 1)
(if (eq (following-char) ?.)
(forward-char 1)
(insert ?.)))
(t
(upcase-word 1)))
(setq word-found-flag t)
(setq name-done-flag t))
((looking-at "MKA \\(.+\\)") (goto-char (match-beginning 1))
(narrow-to-region (point) (point-max))
(setq begin-again-flag t))
((and (eq ?, (following-char))
(eq word-count 1))
(forward-char 1)
(setq last-name-comma-flag t)
(or (eq ?\ (following-char))
(insert ?\ )))
((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
(setq cbeg (point))
(set-syntax-table mail-extr-address-text-comment-syntax-table)
(cond ((memq (following-char) '(?\' ?\`))
(or (search-forward "'" nil t
(if (eq ?\' (following-char)) 2 1))
(mail-extr-delete-char 1)))
(t
(or (mail-extr-safe-move-sexp 1)
(goto-char (point-max)))))
(set-syntax-table mail-extr-address-text-syntax-table)
(setq cend (point))
(cond
;; Handle case of entire name being quoted
((and (eq word-count 0)
(looking-at " *\\'")
(>= (- cend cbeg) 2))
(narrow-to-region (1+ cbeg) (1- cend))
(goto-char (point-min)))
(t
;; Handle case of quoted initial
(if (and (or (= 3 (- cend cbeg))
(and (= 4 (- cend cbeg))
(eq ?. (char-after (+ 2 cbeg)))))
(not (looking-at " *\\'")))
(setq initial (char-after (1+ cbeg)))
(setq initial nil))
(delete-region cbeg cend)
(if initial
(insert initial ". ")))))
;; Handle *Stupid* VMS date stamps
((looking-at mail-extr-stupid-vms-date-stamp-pattern)
(replace-match "" t))
;; Handle Chinese characters.
((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern)
(goto-char (match-end 0))
(setq word-found-flag t))
;; Skip initial garbage characters.
;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
((and (eq word-count 0)
(looking-at mail-extr-leading-garbage))
(goto-char (match-end 0))
;; *** Skip backward over these???
;; (skip-chars-backward "& \"")
(narrow-to-region (point) (point-max)))
((or
(and (>= word-count 2)
mixed-case-flag
(looking-at mail-extr-weird-acronym-pattern)
(not (looking-at mail-extr-roman-numeral-pattern)))
(looking-at mail-extr-alternative-address-pattern)
(looking-at mail-extr-trailing-comment-start-pattern)
(looking-at mail-extr-telephone-extension-pattern))
(setq name-done-flag t))
((looking-at mail-extr-ham-call-sign-pattern)
(delete-region (match-beginning 0) (match-end 0)))
((looking-at mail-extr-initial-pattern)
(or (eq (following-char) (upcase (following-char)))
(setq lower-case-flag t))
(forward-char 1)
(if (eq ?. (following-char))
(forward-char 1)
(insert ?.))
(or (eq ?\ (following-char))
(insert ?\ ))
(setq word-found-flag t))
((and (eq word-count 0)
(looking-at mail-extr-listserv-list-name-pattern))
(narrow-to-region (match-beginning 1) (match-end 1))
(setq word-found-flag t)
(setq name-done-flag t))
((and (> word-count 0)
(eq ?\ (preceding-char))
(eq (following-char) ?&)
(eq (1+ (point)) (point-max)))
(mail-extr-delete-char 1)
(capitalize-region
(point)
(progn
(insert-buffer-substring canonicalization-buffer
mbox-beg mbox-end)
(point)))
(setq disable-initial-guessing-flag t)
(setq word-found-flag t))
((and (> word-count 0) (eq (following-char) ?\&))
(setq name-beg (point))
(setq name-end (1+ name-beg))
(setq word-found-flag t)
(goto-char name-end))
((looking-at mail-extr-name-pattern)
(setq name-beg (point))
(setq name-end (match-end 0))
(and (>= word-count 2)
(not lower-case-flag)
(or
(looking-at "[a-z][a-z][a-z][a-z]+[ \t]*\\'")
(eq ?. (char-after (1- name-end))))
(setq drop-this-word-if-trailing-flag t))
(if (re-search-forward "[a-z]" name-end t)
(if (progn
(goto-char name-beg)
(re-search-forward "[A-Z]" name-end t))
(setq mixed-case-flag t)
(setq lower-case-flag t))
)
(goto-char name-end)
(setq word-found-flag t))
(t
(setq name-done-flag t)
))
(if word-found-flag
(setq word-count (1+ word-count))))
(if (and (not suffix-flag)
(looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'"))
(goto-char (setq suffix-flag (point-max))))
(narrow-to-region (point-min)
(or (and drop-last-word-if-trailing-flag
last-word-beg)
(point)))
(cond ((not suffix-flag)
(goto-char (point-min))
(let ((case-fold-search t))
(if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'")
(erase-buffer)))))
(cond (last-name-comma-flag
(goto-char (point-min))
(search-forward ",")
(setq name-end (1- (point)))
(goto-char (or suffix-flag (point-max)))
(or (eq ?\ (preceding-char))
(insert ?\ ))
(insert-buffer-substring (current-buffer) (point-min) name-end)
(goto-char name-end)
(skip-chars-forward "\t ,")
(narrow-to-region (point) (point-max))))
(goto-char (point-min))
(while (re-search-forward "[ \t\n]+" nil t)
(replace-match (if (eobp) "" " ") t))
)))
(defconst mail-extr-all-top-level-domains
(let ((ob (make-vector 509 0)))
(mapcar
(function
(lambda (x)
(put (intern (downcase (car x)) ob)
'domain-name
(if (nth 2 x)
(format (nth 2 x) (nth 1 x))
(nth 1 x)))))
'(
("ad" "Andorra")
("ae" "United Arab Emirates")
("ag" "Antigua and Barbuda")
("al" "Albania")
("am" "Armenia")
("ao" "Angola")
("aq" "Antarctica") ("ar" "Argentina" "Argentine Republic")
("at" "Austria" "The Republic of %s")
("au" "Australia")
("az" "Azerbaijan")
("ba" "Bosnia-Herzegovina")
("bb" "Barbados")
("bd" "Bangladesh")
("be" "Belgium" "The Kingdom of %s")
("bf" "Burkina Faso")
("bg" "Bulgaria")
("bh" "Bahrain")
("bm" "Bermuda")
("bo" "Bolivia" "Republic of %s")
("br" "Brazil" "The Federative Republic of %s")
("bs" "Bahamas")
("bw" "Botswana")
("by" "Belarus")
("bz" "Belize")
("ca" "Canada")
("cg" "Congo")
("ch" "Switzerland" "The Swiss Confederation")
("ci" "Ivory Coast")
("cl" "Chile" "The Republic of %s")
("cm" "Cameroon") ("cn" "China" "The People's Republic of %s")
("co" "Colombia")
("cr" "Costa Rica" "The Republic of %s")
("cs" "Czechoslovakia")
("cu" "Cuba")
("cy" "Cyprus")
("cz" "Czech Republic")
("de" "Germany")
("dk" "Denmark")
("dm" "Dominica")
("do" "Dominican Republic" "The %s")
("dz" "Algeria")
("ec" "Ecuador" "The Republic of %s")
("ee" "Estonia")
("eg" "Egypt" "The Arab Republic of %s")
("er" "Eritrea")
("es" "Spain" "The Kingdom of %s")
("et" "Ethiopia")
("fi" "Finland" "The Republic of %s")
("fo" "Faroe Islands")
("fr" "France")
("ga" "Gabon")
("gb" "United Kingdom")
("gd" "Grenada")
("ge" "Georgia")
("gf" "Guyana (Fr.)")
("gj" "Fiji")
("gl" "Greenland")
("gm" "Gambia")
("gp" "Guadeloupe (Fr.)")
("gr" "Greece" "The Hellenic Republic (%s)")
("gt" "Guatemala")
("gu" "Guam (U.S.)")
("hk" "Hong Kong")
("hn" "Honduras")
("hr" "Croatia" "Croatia (Hrvatska)")
("ht" "Haiti")
("hu" "Hungary" "The Hungarian Republic")
("id" "Indonesia")
("ie" "Ireland")
("il" "Israel" "The State of %s")
("im" "Isle of Man" "The %s")
("in" "India" "The Republic of %s")
("ir" "Iran")
("is" "Iceland" "The Republic of %s")
("it" "Italy" "The Italian Republic")
("jm" "Jamaica")
("jo" "Jordan")
("jp" "Japan")
("ke" "Kenya")
("kn" "St. Kitts, Nevis, and Anguilla")
("kp" "Korea (North)")
("kr" "Korea (South)")
("kw" "Kuwait")
("kz" "Kazakhstan")
("lb" "Lebanon")
("lc" "St. Lucia")
("li" "Liechtenstein")
("lk" "Sri Lanka" "The Democratic Socialist Republic of %s")
("ls" "Lesotho")
("lt" "Lithuania")
("lu" "Luxembourg")
("lv" "Latvia")
("ma" "Morocco")
("mc" "Monaco")
("md" "Moldova" "The Republic of %s")
("mg" "Madagascar")
("mk" "Macedonia")
("ml" "Mali")
("mo" "Macau")
("mt" "Malta")
("mu" "Mauritius")
("mv" "Maldives")
("mw" "Malawi")
("mx" "Mexico" "The United Mexican States")
("my" "Malaysia" "%s (changed to Myanmar?)") ("mz" "Mozambique")
("na" "Namibia")
("nc" "New Caledonia (Fr.)")
("ne" "Niger") ("ni" "Nicaragua" "The Republic of %s")
("nl" "Netherlands" "The Kingdom of the %s")
("no" "Norway" "The Kingdom of %s")
("np" "Nepal") ("nu" "Niue")
("nz" "New Zealand")
("pa" "Panama")
("pe" "Peru")
("pf" "Polynesia (Fr.)")
("pg" "Papua New Guinea")
("ph" "Philippines" "The Republic of the %s")
("pk" "Pakistan")
("pl" "Poland")
("pr" "Puerto Rico (U.S.)")
("pt" "Portugal" "The Portuguese Republic")
("py" "Paraguay")
("qa" "Qatar")
("re" "Reunion (Fr.)") ("ro" "Romania")
("ru" "Russian Federation")
("sa" "Saudi Arabia")
("sc" "Seychelles")
("sd" "Sudan")
("se" "Sweden" "The Kingdom of %s")
("sg" "Singapore" "The Republic of %s")
("si" "Slovenia")
("sj" "Svalbard and Jan Mayen Is.") ("sk" "Slovakia" "The Slovak Republic")
("sm" "San Marino")
("sn" "Senegal")
("sr" "Suriname")
("su" "U.S.S.R." "The Union of Soviet Socialist Republics")
("sz" "Swaziland")
("tg" "Togo")
("th" "Thailand" "The Kingdom of %s")
("tm" "Turkmenistan") ("tn" "Tunisia")
("to" "Tonga")
("tr" "Turkey" "The Republic of %s")
("tt" "Trinidad and Tobago")
("tw" "Taiwan")
("ua" "Ukraine")
("uk" "United Kingdom" "The %s of Great Britain and Northern Ireland")
("us" "United States" "The %s of America")
("uy" "Uruguay" "The Eastern Republic of %s")
("vc" "St. Vincent and the Grenadines")
("ve" "Venezuela" "The Republic of %s")
("vi" "Virgin Islands (U.S.)")
("vn" "Vietnam")
("vu" "Vanuatu")
("yu" "Yugoslavia" "Yugoslavia, AKA Serbia-Montenegro")
("za" "South Africa" "The Republic of %s")
("zw" "Zimbabwe" "Republic of %s")
("arpa" t "Advanced Research Projects Agency (U.S. DoD)")
("bitnet" t "Because It's Time NET")
("com" t "Commercial")
("edu" t "Educational")
("gov" t "Government (U.S.)")
("int" t "International (NATO)")
("mil" t "Military (U.S.)")
("nato" t "North Atlantic Treaty Organization")
("net" t "Network")
("org" t "Non-profit Organization")
("uucp" t "Unix to Unix CoPy")
))
ob))
(defun what-domain (domain)
"Convert mail domain DOMAIN to the country it corresponds to."
(interactive
(let ((completion-ignore-case t))
(list (completing-read "Domain: "
mail-extr-all-top-level-domains nil t))))
(or (setq domain (intern-soft (downcase domain)
mail-extr-all-top-level-domains))
(error "No such domain"))
(message "%s: %s" (upcase (symbol-name domain)) (get domain 'domain-name)))
(provide 'mail-extr)