summaryrefslogtreecommitdiff
path: root/lisp/international/ja-dic-cnv.el
diff options
context:
space:
mode:
authorNoam Postavsky <npostavs@gmail.com>2017-07-01 22:39:16 -0400
committerNoam Postavsky <npostavs@gmail.com>2017-08-21 20:52:25 -0400
commit9d7973530f912c6001445ba9b83b7893b466aee8 (patch)
tree4581c401602c8def0331858ff9ef16122b00229d /lisp/international/ja-dic-cnv.el
parentba0bb332dd841274208f71e0739e0c5e5d231d7a (diff)
downloademacs-9d7973530f912c6001445ba9b83b7893b466aee8.tar.gz
Optimize skkdic conversion (Bug#28043)
The primary speedup comes from the optimizing lookup-nested-alist and set-nested-alist for the case where the key is a string. This brings the time down to less than half the original. * lisp/international/mule-util.el (lookup-nested-alist) (set-nested-alist): Use `assq' instead of `assoc' when KEYSEQ is a string. * lisp/international/ja-dic-cnv.el (skkdic-collect-okuri-nasi) (skkdic-convert-okuri-nasi): Use progress-reporter functions instead of calculating ratio of work done inline. (skkdic-reduced-candidates): Call `char-category-set' on the first character of the string directly, instead of using a regexp for the character category. (skkdic--japanese-category-set): New constant. (skkdic-collect-okuri-nasi): Just set `skkdic-okuri-nasi-entries-count' at once at the end rather than updating it throughout the loop. (skkdic-convert-postfix skkdic-convert-prefix) skkdic-get-candidate-list, skkdic-collect-okuri-nasi) (skkdic-extract-conversion-data): Use `match-string-no-properties' instead of `match-string'.
Diffstat (limited to 'lisp/international/ja-dic-cnv.el')
-rw-r--r--lisp/international/ja-dic-cnv.el61
1 files changed, 29 insertions, 32 deletions
diff --git a/lisp/international/ja-dic-cnv.el b/lisp/international/ja-dic-cnv.el
index e80b1b28810..63eede093d5 100644
--- a/lisp/international/ja-dic-cnv.el
+++ b/lisp/international/ja-dic-cnv.el
@@ -125,10 +125,10 @@
;; Search postfix entries.
(while (re-search-forward "^[#<>?]\\(\\(\\cH\\|$B!<(B\\)+\\) " nil t)
- (let ((kana (match-string 1))
+ (let ((kana (match-string-no-properties 1))
str candidates)
(while (looking-at "/[#0-9 ]*\\([^/\n]*\\)/")
- (setq str (match-string 1))
+ (setq str (match-string-no-properties 1))
(if (not (member str candidates))
(setq candidates (cons str candidates)))
(goto-char (match-end 1)))
@@ -158,10 +158,10 @@
"(skkdic-set-prefix\n"))
(save-excursion
(while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\)[<>?] " nil t)
- (let ((kana (match-string 1))
+ (let ((kana (match-string-no-properties 1))
str candidates)
(while (looking-at "/\\([^/\n]+\\)/")
- (setq str (match-string 1))
+ (setq str (match-string-no-properties 1))
(if (not (member str candidates))
(setq candidates (cons str candidates)))
(goto-char (match-end 1)))
@@ -180,8 +180,8 @@
(let (candidates)
(goto-char from)
(while (re-search-forward "/[^/ \n]+" to t)
- (setq candidates (cons (buffer-substring (1+ (match-beginning 0))
- (match-end 0))
+ (setq candidates (cons (buffer-substring-no-properties
+ (1+ (match-beginning 0)) (match-end 0))
candidates)))
candidates))
@@ -251,12 +251,16 @@
;; Return list of candidates which excludes some from CANDIDATES.
;; Excluded candidates can be derived from another entry.
+(defconst skkdic--japanese-category-set (make-category-set "j"))
+
(defun skkdic-reduced-candidates (skkbuf kana candidates)
(let (elt l)
(while candidates
(setq elt (car candidates))
(if (or (= (length elt) 1)
- (and (string-match "^\\cj" elt)
+ (and (bool-vector-subsetp
+ skkdic--japanese-category-set
+ (char-category-set (aref elt 0)))
(not (skkdic-breakup-string skkbuf kana elt 0 (length elt)
'first))))
(setq l (cons elt l)))
@@ -267,24 +271,18 @@
(defvar skkdic-okuri-nasi-entries-count 0)
(defun skkdic-collect-okuri-nasi ()
- (message "Collecting OKURI-NASI entries ...")
(save-excursion
- (let ((prev-ratio 0)
- ratio)
+ (let ((progress (make-progress-reporter "Collecting OKURI-NASI entries"
+ (point) (point-max)
+ nil 10)))
(while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\) \\(/\\cj.*\\)/$"
nil t)
- (let ((kana (match-string 1))
+ (let ((kana (match-string-no-properties 1))
(candidates (skkdic-get-candidate-list (match-beginning 3)
(match-end 3))))
(setq skkdic-okuri-nasi-entries
- (cons (cons kana candidates) skkdic-okuri-nasi-entries)
- skkdic-okuri-nasi-entries-count
- (1+ skkdic-okuri-nasi-entries-count))
- (setq ratio (floor (* (point) 100.0) (point-max)))
- (if (/= (/ prev-ratio 10) (/ ratio 10))
- (progn
- (message "collected %2d%% ..." ratio)
- (setq prev-ratio ratio)))
+ (cons (cons kana candidates) skkdic-okuri-nasi-entries))
+ (progress-reporter-update progress (point))
(while candidates
(let ((entry (lookup-nested-alist (car candidates)
skkdic-word-list nil nil t)))
@@ -292,26 +290,24 @@
(setcar entry (cons kana (car entry)))
(set-nested-alist (car candidates) (list kana)
skkdic-word-list)))
- (setq candidates (cdr candidates))))))))
+ (setq candidates (cdr candidates)))))
+ (setq skkdic-okuri-nasi-entries-count (length skkdic-okuri-nasi-entries))
+ (progress-reporter-done progress))))
(defun skkdic-convert-okuri-nasi (skkbuf buf)
- (message "Processing OKURI-NASI entries ...")
(with-current-buffer buf
(insert ";; Setting okuri-nasi entries.\n"
"(skkdic-set-okuri-nasi\n")
(let ((l (nreverse skkdic-okuri-nasi-entries))
- (count 0)
- (prev-ratio 0)
- ratio)
+ (progress (make-progress-reporter "Processing OKURI-NASI entries"
+ 0 skkdic-okuri-nasi-entries-count
+ nil 10))
+ (count 0))
(while l
(let ((kana (car (car l)))
(candidates (cdr (car l))))
- (setq ratio (floor (* count 100.0) skkdic-okuri-nasi-entries-count)
- count (1+ count))
- (if (/= (/ prev-ratio 10) (/ ratio 10))
- (progn
- (message "processed %2d%% ..." ratio)
- (setq prev-ratio ratio)))
+ (setq count (1+ count))
+ (progress-reporter-update progress count)
(if (setq candidates
(skkdic-reduced-candidates skkbuf kana candidates))
(progn
@@ -320,7 +316,8 @@
(insert " " (car candidates))
(setq candidates (cdr candidates)))
(insert "\"\n"))))
- (setq l (cdr l))))
+ (setq l (cdr l)))
+ (progress-reporter-done progress))
(insert ")\n\n")))
(defun skkdic-convert (filename &optional dirname)
@@ -467,7 +464,7 @@ To get complete usage, invoke:
(i (match-end 0))
candidates)
(while (string-match "[^ ]+" entry i)
- (setq candidates (cons (match-string 0 entry) candidates))
+ (setq candidates (cons (match-string-no-properties 0 entry) candidates))
(setq i (match-end 0)))
(cons (skkdic-get-kana-compact-codes kana) candidates)))