diff options
Diffstat (limited to 'lisp/net/newst-backend.el')
-rw-r--r-- | lisp/net/newst-backend.el | 506 |
1 files changed, 337 insertions, 169 deletions
diff --git a/lisp/net/newst-backend.el b/lisp/net/newst-backend.el index f017345e8cb..072fd015b60 100644 --- a/lisp/net/newst-backend.el +++ b/lisp/net/newst-backend.el @@ -1,12 +1,11 @@ ;;; newst-backend.el --- Retrieval backend for newsticker. -;; Copyright (C) 2003-2013 Free Software Foundation, Inc. +;; Copyright (C) 2003-2015 Free Software Foundation, Inc. ;; Author: Ulf Jasper <ulf.jasper@web.de> ;; Filename: newst-backend.el ;; URL: http://www.nongnu.org/newsticker ;; Keywords: News, RSS, Atom -;; Time-stamp: "13. Mai 2011, 20:47:05 (ulf)" ;; Package: newsticker ;; ====================================================================== @@ -37,6 +36,7 @@ (require 'derived) (require 'xml) +(require 'url-parse) ;; Silence warnings (defvar w3-mode-map) @@ -47,9 +47,6 @@ "List of timers for news retrieval. This is an alist, each element consisting of (feed-name . timer).") -(defvar newsticker--download-logos nil - "If non-nil download feed logos if available.") - (defvar newsticker--sentinel-callback nil "Function called at end of `newsticker--sentinel'.") @@ -196,7 +193,7 @@ RSS or Atom file. The file is retrieved by calling wget, or whatever you specify as `newsticker-wget-name'. URL may also be a function which returns news data. In this case -`newsticker-retrieval-method' etc. are ignored for this feed. +`newsticker-retrieval-method' etc. are ignored for this feed. The START-TIME can be either a string, or nil. If it is a string it specifies a fixed time at which this feed shall be retrieved for the @@ -238,7 +235,7 @@ which apply for this feed only, overriding the value of 'intern "Method for retrieving news from the web, either `intern' or `extern'. Default value `intern' uses Emacs' built-in asynchronous download -capabilities ('url-retrieve'). If set to `extern' the external +capabilities (`url-retrieve'). If set to `extern' the external program wget is used, see `newsticker-wget-name'." :type '(choice :tag "Method" (const :tag "Intern" intern) @@ -335,9 +332,9 @@ deleted at the next retrieval." This is an alist of the form (FEED-NAME PATTERN-LIST). I.e. each element consists of a FEED-NAME a PATTERN-LIST. Each element of the pattern-list has the form (AGE TITLE-OR-DESCRIPTION REGEXP). -AGE must be one of the symbols 'old or 'immortal. -TITLE-OR-DESCRIPTION must be on of the symbols 'title, -'description, or 'all. REGEXP is a regular expression, i.e. a +AGE must be one of the symbols `old' or `immortal'. +TITLE-OR-DESCRIPTION must be one of the symbols `title', +`description', or `all'. REGEXP is a regular expression, i.e., a string. This filter is checked after a new headline has been retrieved. @@ -346,8 +343,8 @@ pattern-list is checked: The new headline will be marked as AGE if REGEXP matches the headline's TITLE-OR-DESCRIPTION. If, for example, `newsticker-auto-mark-filter-list' looks like - \((slashdot ('old 'title \"^Forget me!$\") ('immortal 'title \"Read me\") - \('immortal 'all \"important\")))) + ((slashdot (\\='old \\='title \"^Forget me!$\") (\\='immortal \\='title \"Read me\") + (\\='immortal \\='all \"important\")))) then all articles from slashdot are marked as old if they have the title \"Forget me!\". All articles with a title containing @@ -483,14 +480,6 @@ that can be added." ;; ====================================================================== ;;; Internal variables ;; ====================================================================== -(defvar newsticker--item-list nil - "List of newsticker items.") -(defvar newsticker--item-position 0 - "Actual position in list of newsticker items.") -(defvar newsticker--prev-message "There was no previous message yet!" - "Last message that the newsticker displayed.") -(defvar newsticker--scrollable-text "" - "The text which is scrolled smoothly in the echo area.") (defvar newsticker--buffer-uptodate-p nil "Tells whether the newsticker buffer is up to date.") (defvar newsticker--latest-update-time (current-time) @@ -573,7 +562,7 @@ If non-nil only the current headline is visible.") "Return guid of ITEM." (newsticker--guid-to-string (assoc 'guid (newsticker--extra item)))) (defsubst newsticker--enclosure (item) - "Return enclosure element of ITEM in the form \(...FIXME...\) or nil." + "Return enclosure element of ITEM in the form (...FIXME...) or nil." (let ((enclosure (assoc 'enclosure (newsticker--extra item)))) (if enclosure (xml-node-attributes enclosure)))) @@ -756,10 +745,14 @@ from." (insert result) ;; remove MIME header (goto-char (point-min)) - (search-forward "\n\n") + (search-forward "\n\n" nil t) (delete-region (point-min) (point)) ;; read the rss/atom contents - (newsticker--sentinel-work nil t feed-name "url-retrieve" (current-buffer)) + (newsticker--sentinel-work nil + (or (not status) + (not (eq (car status) :error))) + feed-name "url-retrieve" + (current-buffer)) (when status (let ((status-type (car status)) (status-details (cdr status))) @@ -768,7 +761,7 @@ from." ) ((eq status-type :error) (message "%s: Error while retrieving news from %s: %s: \"%s\"" - (format-time-string "%A, %H:%M" (current-time)) + (format-time-string "%A, %H:%M") feed-name (car status-details) (cdr status-details)))))))) @@ -788,6 +781,7 @@ See `newsticker-get-news'." newsticker-wget-name args))) (set-process-coding-system proc 'no-conversion 'no-conversion) (set-process-sentinel proc 'newsticker--sentinel) + (process-put proc 'nt-feed-name feed-name) (setq newsticker--process-ids (cons (process-id proc) newsticker--process-ids)) (force-mode-line-update))))) @@ -797,7 +791,7 @@ See `newsticker-get-news'." FEED-NAME must be a string which occurs as the label (i.e. the first element) in an element of `newsticker-url-list' or `newsticker-url-list-defaults'." (newsticker--debug-msg "%s: Getting news for %s" - (format-time-string "%A, %H:%M" (current-time)) + (format-time-string "%A, %H:%M") feed-name) (let* ((item (or (assoc feed-name newsticker-url-list) (assoc feed-name newsticker-url-list-defaults) @@ -823,25 +817,26 @@ Argument PROCESS is the process which has just changed its state. Argument EVENT tells what has happened to the process." (let ((p-status (process-status process)) (exit-status (process-exit-status process)) - (name (process-name process)) + (feed-name (process-get process 'nt-feed-name)) (command (process-command process)) (buffer (process-buffer process))) (newsticker--sentinel-work event (and (eq p-status 'exit) (= exit-status 0)) - name command buffer))) + feed-name command buffer))) -(defun newsticker--sentinel-work (event status-ok name command buffer) +(defun newsticker--sentinel-work (event status-ok feed-name command buffer) "Actually do the sentinel work. Argument EVENT tells what has happened to the retrieval process. Argument STATUS-OK is the final status of the retrieval process, non-nil meaning retrieval was successful. -Argument NAME is the name of the retrieval process. +Argument FEED-NAME is the name of the retrieved feed. Argument COMMAND is the command of the retrieval process. Argument BUFFER is the buffer of the retrieval process." (let ((time (current-time)) - (name-symbol (intern name)) - (something-was-added nil)) + (name-symbol (intern feed-name)) + (something-was-added nil) + (ct (current-time))) ;; catch known errors (zombie processes, rubbish-xml etc. ;; if an error occurs the news feed is not updated! (catch 'oops @@ -851,77 +846,30 @@ Argument BUFFER is the buffer of the retrieval process." newsticker--cache name-symbol newsticker--error-headline - (format + (format-message (concat "%s: Newsticker could not retrieve news from %s.\n" "Return status: `%s'\n" "Command was `%s'") - (format-time-string "%A, %H:%M" (current-time)) - name event command) + (format-time-string "%A, %H:%M") + feed-name event command) "" - (current-time) + ct 'new - 0 nil)) + 0 '((guid nil "newsticker--download-error")) + ct)) (message "%s: Error while retrieving news from %s" - (format-time-string "%A, %H:%M" (current-time)) - name) + (format-time-string "%A, %H:%M") + feed-name) (throw 'oops nil)) (let* ((coding-system 'utf-8) (node-list (save-current-buffer (set-buffer buffer) - ;; a very very dirty workaround to overcome the - ;; problems with the newest (20030621) xml.el: - ;; remove all unnecessary whitespace - (goto-char (point-min)) - (while (re-search-forward ">[ \t\r\n]+<" nil t) - (replace-match "><" nil t)) - ;; and another brutal workaround (20031105)! For some - ;; reason the xml parser does not like the colon in the - ;; doctype name "rdf:RDF" - (goto-char (point-min)) - (if (re-search-forward "<!DOCTYPE[ \t\n]+rdf:RDF" nil t) - (replace-match "<!DOCTYPE rdfColonRDF" nil t)) - ;; finally.... ~##^°!!!!! - (goto-char (point-min)) - (while (search-forward "\r\n" nil t) - (replace-match "\n" nil t)) - ;; still more brutal workarounds (20040309)! The xml - ;; parser does not like doctype rss - (goto-char (point-min)) - (if (re-search-forward "<!DOCTYPE[ \t\n]+rss[ \t\n]*>" nil t) - (replace-match "" nil t)) - ;; And another one (20050618)! (Fixed in GNU Emacs 22.0.50.18) - ;; Remove comments to avoid this xml-parsing bug: - ;; "XML files can have only one toplevel tag" - (goto-char (point-min)) - (while (search-forward "<!--" nil t) - (let ((start (match-beginning 0))) - (unless (search-forward "-->" nil t) - (error "Can't find end of comment")) - (delete-region start (point)))) - ;; And another one (20050702)! If description is HTML - ;; encoded and starts with a `<', wrap the whole - ;; description in a CDATA expression. This happened for - ;; http://www.thefreedictionary.com/_/WoD/rss.aspx?type=quote - (goto-char (point-min)) - (while (re-search-forward - "<description>\\(<img.*?\\)</description>" nil t) - (replace-match - "<description><![CDATA[ \\1 ]]></description>")) - ;; And another one (20051123)! XML parser does not - ;; like this: <yweather:location city="Frankfurt/Main" - ;; region="" country="GM" /> - ;; try to "fix" empty attributes - ;; This happened for - ;; http://xml.weather.yahoo.com/forecastrss?p=GMXX0040&u=f - (goto-char (point-min)) - (while (re-search-forward "\\(<[^>]*\\)=\"\"" nil t) - (replace-match "\\1=\" \"")) - ;; - (set-buffer-modified-p nil) + (unless (fboundp 'libxml-parse-xml-region) + (newsticker--do-xml-workarounds)) ;; check coding system (goto-char (point-min)) - (if (re-search-forward "encoding=\"\\([^\"]+\\)\"" + (if (re-search-forward "encoding=['\"]\\([^\"]+?\\)['\"]" nil t) (setq coding-system (intern (downcase (match-string 1)))) (setq coding-system @@ -930,22 +878,25 @@ Argument BUFFER is the buffer of the retrieval process." (coding-system-error (message "newsticker.el: ignoring coding system %s for %s" - coding-system name) + coding-system feed-name) nil)))) ;; Decode if possible (when coding-system (decode-coding-region (point-min) (point-max) coding-system)) (condition-case errordata - ;; The xml parser might fail - ;; or the xml might be bugged - (xml-parse-region (point-min) (point-max)) + ;; The xml parser might fail or the xml might be + ;; bugged + (if (fboundp 'libxml-parse-xml-region) + (list (libxml-parse-xml-region (point-min) (point-max) + nil t)) + (xml-parse-region (point-min) (point-max))) (error (message "Could not parse %s: %s" (buffer-name) (cadr errordata)) (throw 'oops nil))))) (topnode (car node-list)) - (channelnode (car (xml-get-children topnode 'channel))) - (imageurl nil)) + (image-url nil) + (icon-url nil)) ;; mark all items as obsolete (newsticker--cache-replace-age newsticker--cache name-symbol @@ -963,41 +914,51 @@ Argument BUFFER is the buffer of the retrieval process." ;; RSS 0.91 ((and (eq 'rss (xml-node-name topnode)) (string= "0.91" (xml-get-attribute topnode 'version))) - (setq imageurl (newsticker--get-logo-url-rss-0.91 topnode)) - (newsticker--parse-rss-0.91 name time topnode)) + (setq image-url (newsticker--get-logo-url-rss-0.91 topnode)) + (newsticker--parse-rss-0.91 feed-name time topnode)) ;; RSS 0.92 ((and (eq 'rss (xml-node-name topnode)) (string= "0.92" (xml-get-attribute topnode 'version))) - (setq imageurl (newsticker--get-logo-url-rss-0.92 topnode)) - (newsticker--parse-rss-0.92 name time topnode)) + (setq image-url (newsticker--get-logo-url-rss-0.92 topnode)) + (newsticker--parse-rss-0.92 feed-name time topnode)) ;; RSS 1.0 - ((eq 'rdf:RDF (xml-node-name topnode)) - (setq imageurl (newsticker--get-logo-url-rss-1.0 topnode)) - (newsticker--parse-rss-1.0 name time topnode)) + ((or (eq 'RDF (xml-node-name topnode)) + (eq 'rdf:RDF (xml-node-name topnode))) + (setq image-url (newsticker--get-logo-url-rss-1.0 topnode)) + (newsticker--parse-rss-1.0 feed-name time topnode)) ;; RSS 2.0 ((and (eq 'rss (xml-node-name topnode)) (string= "2.0" (xml-get-attribute topnode 'version))) - (setq imageurl (newsticker--get-logo-url-rss-2.0 topnode)) - (newsticker--parse-rss-2.0 name time topnode)) + (setq image-url (newsticker--get-logo-url-rss-2.0 topnode)) + (newsticker--parse-rss-2.0 feed-name time topnode)) ;; Atom 0.3 ((and (eq 'feed (xml-node-name topnode)) (string= "http://purl.org/atom/ns#" (xml-get-attribute topnode 'xmlns))) - (setq imageurl (newsticker--get-logo-url-atom-0.3 topnode)) - (newsticker--parse-atom-0.3 name time topnode)) + (setq image-url (newsticker--get-logo-url-atom-0.3 topnode)) + (newsticker--parse-atom-0.3 feed-name time topnode)) ;; Atom 1.0 - ((and (eq 'feed (xml-node-name topnode)) - (string= "http://www.w3.org/2005/Atom" - (xml-get-attribute topnode 'xmlns))) - (setq imageurl (newsticker--get-logo-url-atom-1.0 topnode)) - (newsticker--parse-atom-1.0 name time topnode)) - ;; unknown feed type (t - (newsticker--debug-msg "Feed type unknown: %s: %s" - (xml-node-name topnode) name) - nil)) + ;; The test for Atom 1.0 does not work when using + ;; libxml, as with libxml the namespace attribute is + ;; not in the xml tree. For the time being we skip + ;; the check and assume that we are dealing with an + ;; Atom 1.0 feed. + + ;; (and (eq 'feed (xml-node-name topnode)) + ;; (string= "http://www.w3.org/2005/Atom" + ;; (xml-get-attribute topnode 'xmlns))) + (setq image-url (newsticker--get-logo-url-atom-1.0 topnode)) + (setq icon-url (newsticker--get-icon-url-atom-1.0 topnode)) + (newsticker--parse-atom-1.0 feed-name time topnode)) + ;; unknown feed type + ;; (t + ;; (newsticker--debug-msg "Feed type unknown: %s: %s" + ;; (xml-node-name topnode) feed-name) + ;; nil) + ) (setq something-was-added t)) - (error (message "sentinelerror in %s: %s" name error-data))) + (error (message "sentinelerror in %s: %s" feed-name error-data))) ;; Remove those old items from cache which have been removed from ;; the feed @@ -1038,17 +999,97 @@ Argument BUFFER is the buffer of the retrieval process." ;; kill the process buffer if wanted (unless newsticker-debug (kill-buffer buffer)) - ;; launch retrieval of image - (when (and imageurl newsticker--download-logos) - (newsticker--image-get name imageurl))))) + ;; launch retrieval of images + (when (and (boundp 'newsticker-download-logos) + newsticker-download-logos) + ;; feed logo + (when image-url + (newsticker--image-get feed-name feed-name (newsticker--images-dir) + image-url)) + ;; icon / favicon + (setq icon-url + (or icon-url + (let* ((feed-url (newsticker--link (cadr (newsticker--cache-get-feed + (intern feed-name))))) + (uri (url-generic-parse-url feed-url))) + (when (and feed-url uri) + (setf (url-filename uri) nil) + (setf (url-target uri) nil) + (concat (url-recreate-url uri) "favicon.ico"))))) + (when icon-url + (newsticker--image-get feed-name + (concat feed-name "." + (file-name-extension icon-url)) + (newsticker--icons-dir) + icon-url)))))) (when newsticker--sentinel-callback (funcall newsticker--sentinel-callback))) +(defun newsticker--do-xml-workarounds () + "Fix all issues which `xml-parse-region' could be choking on." + + ;; a very very dirty workaround to overcome the + ;; problems with the newest (20030621) xml.el: + ;; remove all unnecessary whitespace + (goto-char (point-min)) + (while (re-search-forward ">[ \t\r\n]+<" nil t) + (replace-match "><" nil t)) + ;; and another brutal workaround (20031105)! For some + ;; reason the xml parser does not like the colon in the + ;; doctype name "rdf:RDF" + (goto-char (point-min)) + (if (re-search-forward "<!DOCTYPE[ \t\n]+rdf:RDF" nil t) + (replace-match "<!DOCTYPE rdfColonRDF" nil t)) + ;; finally.... ~##^°!!!!! + (goto-char (point-min)) + (while (search-forward "\r\n" nil t) + (replace-match "\n" nil t)) + ;; still more brutal workarounds (20040309)! The xml + ;; parser does not like doctype rss + (goto-char (point-min)) + (if (re-search-forward "<!DOCTYPE[ \t\n]+rss[ \t\n]*>" nil t) + (replace-match "" nil t)) + ;; And another one (20050618)! (Fixed in GNU Emacs 22.0.50.18) + ;; Remove comments to avoid this xml-parsing bug: + ;; "XML files can have only one toplevel tag" + (goto-char (point-min)) + (while (search-forward "<!--" nil t) + (let ((start (match-beginning 0))) + (unless (search-forward "-->" nil t) + (error "Can't find end of comment")) + (delete-region start (point)))) + ;; And another one (20050702)! If description is HTML + ;; encoded and starts with a `<', wrap the whole + ;; description in a CDATA expression. This happened for + ;; http://www.thefreedictionary.com/_/WoD/rss.aspx?type=quote + (goto-char (point-min)) + (while (re-search-forward + "<description>\\(<img.*?\\)</description>" nil t) + (replace-match + "<description><![CDATA[ \\1 ]]></description>")) + ;; And another one (20051123)! XML parser does not + ;; like this: <yweather:location city="Frankfurt/Main" + ;; region="" country="GM" /> + ;; try to "fix" empty attributes + ;; This happened for + ;; http://xml.weather.yahoo.com/forecastrss?p=GMXX0040&u=f + (goto-char (point-min)) + (while (re-search-forward "\\(<[^>]*\\)=\"\"" nil t) + (replace-match "\\1=\" \"")) + ;; + (set-buffer-modified-p nil)) + + (defun newsticker--get-logo-url-atom-1.0 (node) "Return logo URL from atom 1.0 data in NODE." (car (xml-node-children (car (xml-get-children node 'logo))))) +(defun newsticker--get-icon-url-atom-1.0 (node) + "Return icon URL from atom 1.0 data in NODE." + (car (xml-node-children + (car (xml-get-children node 'icon))))) + (defun newsticker--get-logo-url-atom-0.3 (node) "Return logo URL from atom 0.3 data in NODE." (car (xml-node-children @@ -1125,6 +1166,30 @@ same as in `newsticker--parse-atom-1.0'." (xml-node-children node)))) (or new-item new-feed))) +(defun newsticker--unxml (node) + "Reverse parsing of an xml string. +Restore an xml-string from a an xml NODE that was returned by xml-parse..." + (if (or (not node) (stringp node)) + node + (newsticker--unxml-node node))) + +(defun newsticker--unxml-node (node) + "Actually restore xml-string of an xml NODE." + (let ((qname (symbol-name (car node))) + (att-list (cadr node)) + (children (cddr node))) + (concat "<" qname + (when att-list " ") + (mapconcat 'newsticker--unxml-attribute att-list " ") + ">" + (mapconcat 'newsticker--unxml children "") "</" qname ">"))) + +(defun newsticker--unxml-attribute (attribute) + "Actually restore xml-string of an ATTRIBUTE of an xml node." + (let ((name (symbol-name (car attribute))) + (value (cdr attribute))) + (concat name "=\"" value "\""))) + (defun newsticker--parse-atom-1.0 (name time topnode) "Parse Atom 1.0 data. Argument NAME gives the name of a news feed. TIME gives the @@ -1157,8 +1222,17 @@ URL `http://www.atompub.org/2005/08/17/draft-ietf-atompub-format-11.html'" (car (xml-get-children node 'title))))) ;; desc-fn (lambda (node) - (or (car (xml-node-children - (car (xml-get-children node 'content)))) + ;; unxml the content or the summary node. Atom + ;; allows for integrating (x)html into the atom + ;; structure but we need the raw html string. + ;; e.g. http://www.heise.de/open/news/news-atom.xml + ;; http://feeds.feedburner.com/ru_nix_blogs + (or (newsticker--unxml + (car (xml-node-children + (car (xml-get-children node 'content))))) + (newsticker--unxml + (car (xml-node-children + (car (xml-get-children node 'summary))))) (car (xml-node-children (car (xml-get-children node 'summary)))))) ;; link-fn @@ -1303,9 +1377,15 @@ For the RSS 1.0 specification see URL `http://web.resource.org/rss/1.0/spec'." (car (xml-node-children (car (xml-get-children channelnode 'title)))) ;; desc - (car (xml-node-children - (car (xml-get-children channelnode - 'description)))) + (or (car (xml-node-children + (car (xml-get-children channelnode + 'encoded)))) + (car (xml-node-children + (car (xml-get-children channelnode + 'content:encoded)))) + (car (xml-node-children + (car (xml-get-children channelnode + 'description))))) ;; link (car (xml-node-children (car (xml-get-children channelnode 'link)))) @@ -1329,8 +1409,10 @@ For the RSS 1.0 specification see URL `http://web.resource.org/rss/1.0/spec'." ;; time-fn (lambda (node) (newsticker--decode-iso8601-date - (car (xml-node-children - (car (xml-get-children node 'dc:date)))))) + (or (car (xml-node-children + (car (xml-get-children node 'dc:date)))) + (car (xml-node-children + (car (xml-get-children node 'date))))))) ;; guid-fn (lambda (node) nil) @@ -1354,9 +1436,15 @@ For the RSS 2.0 specification see URL `http://blogs.law.harvard.edu/tech/rss'." (car (xml-node-children (car (xml-get-children channelnode 'title)))) ;; desc - (car (xml-node-children - (car (xml-get-children channelnode - 'description)))) + (or (car (xml-node-children + (car (xml-get-children channelnode + 'encoded)))) + (car (xml-node-children + (car (xml-get-children channelnode + 'content:encoded)))) + (car (xml-node-children + (car (xml-get-children channelnode + 'description))))) ;; link (car (xml-node-children (car (xml-get-children channelnode 'link)))) @@ -1372,6 +1460,9 @@ For the RSS 2.0 specification see URL `http://blogs.law.harvard.edu/tech/rss'." (lambda (node) (or (car (xml-node-children (car (xml-get-children node + 'encoded)))) + (car (xml-node-children + (car (xml-get-children node 'content:encoded)))) (car (xml-node-children (car (xml-get-children node @@ -1464,7 +1555,7 @@ argument, which is one of the items in ITEMLIST." ;; decode numeric entities (setq title (xml-substitute-numeric-entities title)) (when desc - (setq desc (xml-substitute-numeric-entities desc))) + (setq desc (xml-substitute-numeric-entities desc))) (setq link (xml-substitute-numeric-entities link)) ;; remove whitespace from title, desc, and link (setq title (newsticker--remove-whitespace title)) @@ -1486,9 +1577,9 @@ argument, which is one of the items in ITEMLIST." (let ((prev-age (newsticker--age old-item))) (unless newsticker-automatically-mark-items-as-old ;; Some feeds deliver items multiply, the - ;; first time we find an 'obsolete-old one the - ;; cache, the following times we find an 'old - ;; one + ;; first time we find an 'obsolete-old one in + ;; the cache, the following times we find an + ;; 'old one (if (memq prev-age '(obsolete-old old)) (setq age2 'old) (setq age2 'new))) @@ -1498,11 +1589,16 @@ argument, which is one of the items in ITEMLIST." ;; item was not there (setq item-new-p t) (setq something-was-added t)) - (setq newsticker--cache - (newsticker--cache-add - newsticker--cache (intern name) title desc link - time age1 position (funcall extra-fn node) - time age2)) + (let ((extra-elements-with-guid (funcall extra-fn node))) + (unless (assoc 'guid extra-elements-with-guid) + (setq extra-elements-with-guid + (cons `(guid nil ,(funcall guid-fn node)) + extra-elements-with-guid))) + (setq newsticker--cache + (newsticker--cache-add + newsticker--cache (intern name) title desc link + time age1 position extra-elements-with-guid + time age2))) (when item-new-p (let ((item (newsticker--cache-contains newsticker--cache (intern name) title @@ -1712,31 +1808,44 @@ Checks list of active processes against list of newsticker processes." ;; ====================================================================== (defun newsticker--images-dir () "Return directory where feed images are saved." - (concat newsticker-dir "/images")) + (concat newsticker-dir "/images/")) -(defun newsticker--image-get (feed-name url) - "Get image of the news site FEED-NAME from URL. -If the image has been downloaded in the last 24h do nothing." - (let ((image-name (concat (newsticker--images-dir) feed-name))) +(defun newsticker--icons-dir () + "Return directory where feed icons are saved." + (concat newsticker-dir "/icons/")) + +(defun newsticker--image-get (feed-name filename directory url) + "Get image for FEED-NAME by returning FILENAME from DIRECTORY. +If the file does no exist or if it is older than 24 hours +download it from URL first." + (let ((image-name (concat directory feed-name))) (if (and (file-exists-p image-name) (time-less-p (current-time) (time-add (nth 5 (file-attributes image-name)) (seconds-to-time 86400)))) (newsticker--debug-msg "%s: Getting image for %s skipped" - (format-time-string "%A, %H:%M" (current-time)) + (format-time-string "%A, %H:%M") feed-name) ;; download (newsticker--debug-msg "%s: Getting image for %s" - (format-time-string "%A, %H:%M" (current-time)) + (format-time-string "%A, %H:%M") feed-name) - (let* ((buffername (concat " *newsticker-wget-image-" feed-name "*")) - (item (or (assoc feed-name newsticker-url-list) + (if (eq newsticker-retrieval-method 'intern) + (newsticker--image-download-by-url feed-name filename directory url) + (newsticker--image-download-by-wget feed-name filename directory url))))) + +(defun newsticker--image-download-by-wget (feed-name filename directory url) + "Download image for FEED-NAME using external program. +Save image as FILENAME in DIRECTORY, download it from URL." + (let* ((proc-name (concat feed-name "-" filename)) + (buffername (concat " *newsticker-wget-image-" proc-name "*")) + (item (or (assoc feed-name newsticker-url-list) (assoc feed-name newsticker-url-list-defaults) (error - "Cannot get news for %s: Check newsticker-url-list" + "Cannot get image for %s: Check newsticker-url-list" feed-name))) - (wget-arguments (or (car (cdr (cdr (cdr (cdr item))))) - newsticker-wget-arguments))) + (wget-arguments (or (car (cdr (cdr (cdr (cdr item))))) + newsticker-wget-arguments))) (with-current-buffer (get-buffer-create buffername) (erase-buffer) ;; throw an error if there is an old wget-process around @@ -1745,39 +1854,96 @@ If the image has been downloaded in the last 24h do nothing." feed-name)) ;; start wget (let* ((args (append wget-arguments (list url))) - (proc (apply 'start-process feed-name buffername + (proc (apply 'start-process proc-name buffername newsticker-wget-name args))) (set-process-coding-system proc 'no-conversion 'no-conversion) - (set-process-sentinel proc 'newsticker--image-sentinel))))))) + (set-process-sentinel proc 'newsticker--image-sentinel) + (process-put proc 'nt-directory directory) + (process-put proc 'nt-feed-name feed-name) + (process-put proc 'nt-filename filename))))) (defun newsticker--image-sentinel (process event) "Sentinel for image-retrieving PROCESS caused by EVENT." (let* ((p-status (process-status process)) (exit-status (process-exit-status process)) - (feed-name (process-name process))) + (feed-name (process-get process 'nt-feed-name)) + (directory (process-get process 'nt-directory)) + (filename (process-get process 'nt-filename))) ;; catch known errors (zombie processes, rubbish-xml, etc.) ;; if an error occurs the news feed is not updated! (catch 'oops (unless (and (eq p-status 'exit) (= exit-status 0)) (message "%s: Error while retrieving image from %s" - (format-time-string "%A, %H:%M" (current-time)) + (format-time-string "%A, %H:%M") feed-name) + (newsticker--image-remove directory feed-name) (throw 'oops nil)) - (let (image-name) - (with-current-buffer (process-buffer process) - (setq image-name (concat (newsticker--images-dir) feed-name)) - (set-buffer-file-coding-system 'no-conversion) - ;; make sure the cache dir exists - (unless (file-directory-p (newsticker--images-dir)) - (make-directory (newsticker--images-dir))) - ;; write and close buffer - (let ((require-final-newline nil) - (backup-inhibited t) - (coding-system-for-write 'no-conversion)) - (write-region nil nil image-name nil 'quiet)) - (set-buffer-modified-p nil) - (kill-buffer (current-buffer))))))) + (newsticker--image-save (process-buffer process) directory filename)))) + +(defun newsticker--image-save (buffer directory file-name) + "Save contents of BUFFER in DIRECTORY as FILE-NAME. +Finally kill buffer." + (with-current-buffer buffer + (let ((image-name (concat directory file-name))) + (set-buffer-file-coding-system 'no-conversion) + ;; make sure the cache dir exists + (unless (file-directory-p directory) + (make-directory directory)) + ;; write and close buffer + (let ((require-final-newline nil) + (backup-inhibited t) + (coding-system-for-write 'no-conversion)) + (write-region nil nil image-name nil 'quiet)) + (set-buffer-modified-p nil) + (kill-buffer buffer)))) + +(defun newsticker--image-remove (directory file-name) + "In DIRECTORY remove FILE-NAME." + (let ((image-name (concat directory file-name))) + (when (file-exists-p file-name) + (delete-file image-name)))) + +(defun newsticker--image-download-by-url (feed-name filename directory url) + "Download image for FEED-NAME using `url-retrieve'. +Save image as FILENAME in DIRECTORY, download it from URL." + (let ((coding-system-for-read 'no-conversion)) + (condition-case error-data + (url-retrieve url 'newsticker--image-download-by-url-callback + (list feed-name directory filename)) + (error (message "Error retrieving image from %s: %s" feed-name + error-data)))) + (force-mode-line-update)) + +(defun newsticker--image-download-by-url-callback (status feed-name directory filename) + "Callback function for `newsticker--image-download-by-url'. +STATUS is the return status as delivered by `url-retrieve'. +FEED-NAME is the name of the feed that the news were retrieved +from. +The image is saved in DIRECTORY as FILENAME." + (let ((do-save + (or (not status) + (let ((status-type (car status)) + (status-details (cdr status))) + (cond ((eq status-type :redirect) + ;; don't care about redirects + t) + ((eq status-type :error) + ;; silently ignore errors + nil)))))) + (when do-save + (let ((buf (get-buffer-create (concat " *newsticker-url-image-" feed-name "-" + directory "*"))) + (result (string-to-multibyte (buffer-string)))) + (set-buffer buf) + (erase-buffer) + (insert result) + ;; remove MIME header + (goto-char (point-min)) + (search-forward "\n\n") + (delete-region (point-min) (point)) + ;; save + (newsticker--image-save buf directory filename))))) (defun newsticker--insert-image (img string) "Insert IMG with STRING at point." @@ -2192,6 +2358,7 @@ If AGE is nil, the total number of items is returned." (defun newsticker-opml-export () "OPML subscription export. Export subscriptions to a buffer in OPML Format." + ;; FIXME: use newsticker-groups (interactive) (with-current-buffer (get-buffer-create "*OPML Export*") (set-buffer-file-coding-system 'utf-8) @@ -2211,7 +2378,8 @@ Export subscriptions to a buffer in OPML Format." (insert " <outline text=\"") (insert (newsticker--title sub)) (insert "\" xmlUrl=\"") - (insert (cadr sub)) + (insert (xml-escape-string (let ((url (cadr sub))) + (if (stringp url) url (prin1-to-string url))))) (insert "\"/>\n")) (append newsticker-url-list newsticker-url-list-defaults)) (insert " </body>\n</opml>\n")) |