(defvar iso2022-gb-designation "\e$A")
(defvar hz-gb-designnation "~{")
(defvar iso2022-ascii-designation "\e(B")
(defvar hz-ascii-designnation "~}")
(defvar zw-start-gb "^zW")
(defvar hz/zw-start-gb
(concat hz-gb-designnation "\\|" zw-start-gb "\\|[^\0-\177]"))
(defvar decode-hz-line-continuation nil
"Flag to tell if we should care line continuation convention of Hz.")
(defconst hz-set-msb-table
(eval-when-compile
(let ((chars nil)
(i 0))
(while (< i 33)
(push i chars)
(setq i (1+ i)))
(while (< i 127)
(push (+ i 128) chars)
(setq i (1+ i)))
(apply 'string (nreverse chars)))))
(defun decode-hz-region (beg end)
"Decode HZ/ZW encoded text in the current region.
Return the length of resulting text."
(interactive "r")
(save-excursion
(save-restriction
(let (pos ch)
(narrow-to-region beg end)
(goto-char (point-min))
(while (search-forward "~" nil t)
(setq ch (following-char))
(if (or (= ch ?\n) (= ch ?~)) (delete-char -1)))
(goto-char (point-min))
(setq beg nil)
(while (re-search-forward hz/zw-start-gb nil t)
(setq pos (match-beginning 0)
ch (char-after pos))
(or beg (setq beg pos))
(end-of-line)
(setq end (point))
(if (>= ch 128) nil
(goto-char pos)
(delete-char 2)
(setq end (- end 2))
(if (= ch ?z) (progn
(translate-region (point) end hz-set-msb-table)
(goto-char end))
(if (search-forward hz-ascii-designnation
(if decode-hz-line-continuation nil end)
t)
(delete-char -2))
(setq end (point))
(translate-region pos (point) hz-set-msb-table))))
(if beg
(decode-coding-region beg end 'euc-china)))
(- (point-max) (point-min)))))
(defun decode-hz-buffer ()
"Decode HZ/ZW encoded text in the current buffer."
(interactive)
(decode-hz-region (point-min) (point-max)))
(defun encode-hz-region (beg end)
"Encode the text in the current region to HZ.
Return the length of resulting text."
(interactive "r")
(save-excursion
(save-restriction
(narrow-to-region beg end)
(goto-char (point-min))
(while (search-forward "~" nil t) (insert ?~))
(goto-char (point-min))
(if (re-search-forward "\\cc" nil t)
(let (pos)
(goto-char (setq pos (match-beginning 0)))
(encode-coding-region pos (point-max) 'iso-2022-7bit)
(goto-char pos)
(while (search-forward iso2022-gb-designation nil t)
(delete-char -3)
(insert hz-gb-designnation))
(goto-char pos)
(while (search-forward iso2022-ascii-designation nil t)
(delete-char -3)
(insert hz-ascii-designnation))))
(- (point-max) (point-min)))))
(defun encode-hz-buffer ()
"Encode the text in the current buffer to HZ."
(interactive)
(encode-hz-region (point-min) (point-max)))
(eval-when-compile
(defun big5-to-flat-code (num)
"Convert NUM in Big 5 encoding to a `flat code'.
0xA140 will be mapped to position 0, 0xA141 to position 1, etc.
There are no gaps in the flat code."
(let ((hi (/ num 256))
(lo (% num 256)))
(+ (* 157 (- hi #xa1))
(- lo (if (>= lo #xa1) 98 64)))))
(defun flat-code-to-big5 (num)
"Convert NUM from a `flat code' to Big 5 encoding.
This is the inverse function of `big5-to-flat-code'."
(let ((hi (/ num 157))
(lo (% num 157)))
(+ (* 256 (+ hi #xa1))
(+ lo (if (< lo 63) 64 98)))))
(defun euc-to-flat-code (num)
"Convert NUM in EUC encoding (in GL representation) to a `flat code'.
0x2121 will be mapped to position 0, 0x2122 to position 1, etc.
There are no gaps in the flat code."
(let ((hi (/ num 256))
(lo (% num 256)))
(+ (* 94 (- hi #x21))
(- lo #x21))))
(defun flat-code-to-euc (num)
"Convert NUM from a `flat code' to EUC encoding (in GL representation).
The inverse function of `euc-to-flat-code'. The high and low bytes are
returned in a list."
(let ((hi (/ num 94))
(lo (% num 94)))
(list (+ hi #x21) (+ lo #x21))))
(defun expand-euc-big5-alist (alist)
"Create a translation table and fills it with data given in ALIST.
Elements of ALIST can be either given as
((euc-charset . startchar) . (big5-range-begin . big5-range-end))
or as
(euc-character . big5-charcode)
The former maps a range of glyphs in an EUC charset (where STARTCHAR
is in GL representation) to a certain range of Big 5 encoded
characters, the latter maps a single glyph. Glyphs which can't be
mapped will be represented with the byte 0xFF.
The return value is the filled translation table."
(let ((chartable (make-char-table 'translation-table #xFF))
char
big5
i
end
codepoint
charset)
(dolist (elem alist)
(setq char (car elem)
big5 (cdr elem))
(cond ((and (consp char)
(consp big5))
(setq i (big5-to-flat-code (car big5))
end (big5-to-flat-code (cdr big5))
codepoint (euc-to-flat-code (cdr char))
charset (car char))
(while (>= end i)
(aset chartable
(decode-big5-char (flat-code-to-big5 i))
(apply (function make-char)
charset
(flat-code-to-euc codepoint)))
(setq i (1+ i)
codepoint (1+ codepoint))))
((and (char-valid-p char)
(numberp big5))
(setq i (decode-big5-char big5))
(aset chartable i char))
(t
(error "Unknown slot type: %S" elem))))
chartable)))
(define-translation-table 'big5-to-cns
(eval-when-compile
(expand-euc-big5-alist
'(
((chinese-cns11643-1 . #x2121) . (#xA140 . #xA1F5))
(?$(G"X(B . #xA1F6)
(?$(G"W(B . #xA1F7)
((chinese-cns11643-1 . #x2259) . (#xA1F8 . #xA2AE))
((chinese-cns11643-1 . #x2421) . (#xA2AF . #xA3BF))
((chinese-cns11643-1 . #x4221) . (#xA3C0 . #xA3E0))
((chinese-cns11643-1 . #x4421) . (#xA440 . #xACFD))
(?$(GWS(B . #xACFE)
((chinese-cns11643-1 . #x5323) . (#xAD40 . #xAFCF))
((chinese-cns11643-1 . #x5754) . (#xAFD0 . #xBBC7))
((chinese-cns11643-1 . #x6B51) . (#xBBC8 . #xBE51))
(?$(GkP(B . #xBE52)
((chinese-cns11643-1 . #x6F5C) . (#xBE53 . #xC1AA))
((chinese-cns11643-1 . #x7536) . (#xC1AB . #xC2CA))
(?$(Gu5(B . #xC2CB)
((chinese-cns11643-1 . #x7737) . (#xC2CC . #xC360))
((chinese-cns11643-1 . #x782E) . (#xC361 . #xC3B8))
(?$(Gxe(B . #xC3B9)
(?$(Gxd(B . #xC3BA)
((chinese-cns11643-1 . #x7866) . (#xC3BB . #xC455))
(?$(Gx-(B . #xC456)
((chinese-cns11643-1 . #x7962) . (#xC457 . #xC67E))
((chinese-cns11643-1 . #x2621) . (#xC6A1 . #xC6BE))
(?$(G'#(B . #xC6BF)
(?$(G'$(B . #xC6C0)
(?$(G'&(B . #xC6C1)
(?$(G'((B . #xC6C2)
(?$(G'-(B . #xC6C3)
(?$(G'.(B . #xC6C4)
(?$(G'/(B . #xC6C5)
(?$(G'4(B . #xC6C6)
(?$(G'7(B . #xC6C7)
(?$(G':(B . #xC6C8)
(?$(G'<(B . #xC6C9)
(?$(G'B(B . #xC6CA)
(?$(G'G(B . #xC6CB)
(?$(G'N(B . #xC6CC)
(?$(G'S(B . #xC6CD)
(?$(G'T(B . #xC6CE)
(?$(G'U(B . #xC6CF)
(?$(G'Y(B . #xC6D0)
(?$(G'Z(B . #xC6D1)
(?$(G'a(B . #xC6D2)
(?$(G'f(B . #xC6D3)
(?$(G()(B . #xC6D4)
(?$(G(*(B . #xC6D5)
(?$(G(c(B . #xC6D6)
(?$(G(l(B . #xC6D7)
(?$(J!#(B . #xC879)
(?$(J!$(B . #xC87B)
(?$(J!*(B . #xC87D)
(?$(J!R(B . #xC8A2)
((chinese-cns11643-2 . #x2121) . (#xC940 . #xC949))
(?$(GDB(B . #xC94A) ((chinese-cns11643-2 . #x212B) . (#xC94B . #xC96B))
((chinese-cns11643-2 . #x214D) . (#xC96C . #xC9BD))
(?$(H!L(B . #xC9BE)
((chinese-cns11643-2 . #x217D) . (#xC9BF . #xC9EC))
((chinese-cns11643-2 . #x224E) . (#xC9ED . #xCAF6))
(?$(H"M(B . #xCAF7)
((chinese-cns11643-2 . #x2439) . (#xCAF8 . #xD6CB))
(?$(H>c(B . #xD6CC)
((chinese-cns11643-2 . #x3770) . (#xD6CD . #xD779))
(?$(H?j(B . #xD77A)
((chinese-cns11643-2 . #x387E) . (#xD77B . #xDADE))
(?$(H7o(B . #xDADF)
((chinese-cns11643-2 . #x3E64) . (#xDAE0 . #xDBA6))
((chinese-cns11643-2 . #x3F6B) . (#xDBA7 . #xDDFB))
(?$(HAv(B . #xDDFC);; a duplicate of #xDCD1
((chinese-cns11643-2 . #x4424) . (#xDDFD . #xE8A2))
((chinese-cns11643-2 . #x554C) . (#xE8A3 . #xE975))
((chinese-cns11643-2 . #x5723) . (#xE976 . #xEB5A))
((chinese-cns11643-2 . #x5A29) . (#xEB5B . #xEBF0))
(?$(HUK(B . #xEBF1)
((chinese-cns11643-2 . #x5B3F) . (#xEBF2 . #xECDD))
(?$(HW"(B . #xECDE)
((chinese-cns11643-2 . #x5C6A) . (#xECDF . #xEDA9))
((chinese-cns11643-2 . #x5D75) . (#xEDAA . #xEEEA))
(?$(Hd/(B . #xEEEB)
((chinese-cns11643-2 . #x6039) . (#xEEEC . #xF055))
(?$(H]t(B . #xF056)
((chinese-cns11643-2 . #x6243) . (#xF057 . #xF0CA))
(?$(HZ((B . #xF0CB)
((chinese-cns11643-2 . #x6337) . (#xF0CC . #xF162))
((chinese-cns11643-2 . #x6430) . (#xF163 . #xF16A))
(?$(Hga(B . #xF16B)
((chinese-cns11643-2 . #x6438) . (#xF16C . #xF267))
(?$(Hi4(B . #xF268)
((chinese-cns11643-2 . #x6573) . (#xF269 . #xF2C2))
((chinese-cns11643-2 . #x664E) . (#xF2C3 . #xF374))
((chinese-cns11643-2 . #x6762) . (#xF375 . #xF465))
((chinese-cns11643-2 . #x6935) . (#xF466 . #xF4B4))
(?$(HfM(B . #xF4B5)
((chinese-cns11643-2 . #x6962) . (#xF4B6 . #xF4FC))
((chinese-cns11643-2 . #x6A4C) . (#xF4FD . #xF662))
(?$(HjK(B . #xF663)
((chinese-cns11643-2 . #x6C52) . (#xF664 . #xF976))
((chinese-cns11643-2 . #x7167) . (#xF977 . #xF9C3))
(?$(Hqf(B . #xF9C4)
(?$(Hr4(B . #xF9C5)
(?$(Hr@(B . #xF9C6)
((chinese-cns11643-2 . #x7235) . (#xF9C7 . #xF9D1))
((chinese-cns11643-2 . #x7241) . (#xF9D2 . #xF9D5))
(?$(IC7(B . #xF9D6)
(?$(IOP(B . #xF9D7)
(?$(IDN(B . #xF9D8)
(?$(IPJ(B . #xF9D9)
(?$(I,](B . #xF9DA)
(?$(I=~(B . #xF9DB)
(?$(IK\(B . #xF9DC)
)
))
)
(provide 'china-util)