summaryrefslogtreecommitdiff
path: root/lisp
diff options
context:
space:
mode:
authorLars Magne Ingebrigtsen <larsi@gnus.org>2014-11-03 01:01:20 +0100
committerLars Magne Ingebrigtsen <larsi@gnus.org>2014-11-03 01:01:20 +0100
commit2e8259b044fda2a6424b71eb8368cafa2fa6d86e (patch)
treee24c9c4697446de0489d776de66a660cc01d0680 /lisp
parent816cad6e2414474b06ebb4f691fc1bdb9a8953a4 (diff)
downloademacs-2e8259b044fda2a6424b71eb8368cafa2fa6d86e.tar.gz
Add a new, somewhat experimental "readability" command to eww
* net/eww.el (eww-readable): New command and keystroke. * net/shr.el (shr-retransform-dom): New function.
Diffstat (limited to 'lisp')
-rw-r--r--lisp/ChangeLog4
-rw-r--r--lisp/net/eww.el56
-rw-r--r--lisp/net/shr.el20
3 files changed, 80 insertions, 0 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index eb374375198..b6e32f285ce 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,5 +1,9 @@
2014-11-02 Lars Magne Ingebrigtsen <larsi@gnus.org>
+ * net/eww.el (eww-readable): New command and keystroke.
+
+ * net/shr.el (shr-retransform-dom): New function.
+
* net/eww.el (eww-display-html): Set `eww-current-source' in the
correct buffer.
(eww-view-source): Use it.
diff --git a/lisp/net/eww.el b/lisp/net/eww.el
index e4acd69ef4d..579f0878bbd 100644
--- a/lisp/net/eww.el
+++ b/lisp/net/eww.el
@@ -402,6 +402,7 @@ word(s) will be searched for via `eww-search-prefix'."
(setq-local eww-contents-url nil))
(defun eww-view-source ()
+ "View the HTML source code of the current page."
(interactive)
(let ((buf (get-buffer-create "*eww-source*"))
(source eww-current-source))
@@ -413,6 +414,60 @@ word(s) will be searched for via `eww-search-prefix'."
(html-mode)))
(view-buffer buf)))
+(defun eww-readable ()
+ "View the main \"readable\" parts of the current web page.
+This command uses heuristics to find the parts of the web page that
+contains the main textual portion, leaving out navigation menus and
+the like."
+ (interactive)
+ (let* ((source eww-current-source)
+ (dom (shr-transform-dom
+ (with-temp-buffer
+ (insert source)
+ (libxml-parse-html-region (point-min) (point-max))))))
+ (eww-score-readability dom)
+ (eww-display-html 'utf-8 nil (shr-retransform-dom
+ (eww-highest-readability dom)))
+ (setq eww-current-source source)))
+
+(defun eww-score-readability (node)
+ (let ((score -1))
+ (cond
+ ((memq (car node) '(script head))
+ (setq score -2))
+ ((eq (car node) 'meta)
+ (setq score -1))
+ ((eq (car node) 'a)
+ (setq score (- (length (split-string
+ (or (cdr (assoc 'text (cdr node))) ""))))))
+ (t
+ (dolist (elem (cdr node))
+ (cond
+ ((eq (car elem) 'text)
+ (setq score (+ score (length (split-string (cdr elem))))))
+ ((consp (cdr elem))
+ (setq score (+ score
+ (or (cdr (assoc :eww-readability-score (cdr elem)))
+ (eww-score-readability elem)))))))))
+ ;; Cache the score of the node to avoid recomputing all the time.
+ (setcdr node (cons (cons :eww-readability-score score) (cdr node)))
+ score))
+
+(defun eww-highest-readability (node)
+ (let ((result node)
+ highest)
+ (dolist (elem (cdr node))
+ (when (and (consp (cdr elem))
+ (> (or (cdr (assoc
+ :eww-readability-score
+ (setq highest
+ (eww-highest-readability elem))))
+ most-negative-fixnum)
+ (or (cdr (assoc :eww-readability-score (cdr result)))
+ most-negative-fixnum)))
+ (setq result highest)))
+ result))
+
(defvar eww-mode-map
(let ((map (make-sparse-keymap)))
(suppress-keymap map)
@@ -435,6 +490,7 @@ word(s) will be searched for via `eww-search-prefix'."
(define-key map "w" 'eww-copy-page-url)
(define-key map "C" 'url-cookie-list)
(define-key map "v" 'eww-view-source)
+ (define-key map "R" 'eww-readable)
(define-key map "H" 'eww-list-histories)
(define-key map "b" 'eww-add-bookmark)
diff --git a/lisp/net/shr.el b/lisp/net/shr.el
index 878728c9319..59326de64dd 100644
--- a/lisp/net/shr.el
+++ b/lisp/net/shr.el
@@ -370,6 +370,26 @@ size, and full-buffer size."
(push (shr-transform-dom sub) result)))
(nreverse result)))
+(defun shr-retransform-dom (dom)
+ "Transform the shr DOM back into the libxml DOM."
+ (let ((tag (car dom))
+ (attributes nil)
+ (text nil)
+ (sub-nodes nil))
+ (dolist (elem (cdr dom))
+ (cond
+ ((eq (car elem) 'text)
+ (setq text (cdr elem)))
+ ((not (consp (cdr elem)))
+ (push (cons (intern (substring (symbol-name (car elem)) 1) obarray)
+ (cdr elem))
+ attributes))
+ (t
+ (push (shr-retransform-dom elem) sub-nodes))))
+ (append (list tag (nreverse attributes))
+ (nreverse sub-nodes)
+ (and text (list text)))))
+
(defsubst shr-generic (cont)
(dolist (sub cont)
(cond