1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
|
;;; org-feed.el --- Add RSS feed items to Org files -*- lexical-binding: t; -*-
;;
;; Copyright (C) 2009-2017 Free Software Foundation, Inc.
;;
;; Author: Carsten Dominik <carsten at orgmode dot org>
;; Keywords: outlines, hypermedia, calendar, wp
;; Homepage: http://orgmode.org
;;
;; This file is part of GNU Emacs.
;;
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;; Commentary:
;;
;; This module allows entries to be created and changed in an Org mode
;; file triggered by items in an RSS feed. The basic functionality
;; is geared toward simply adding new items found in a feed as
;; outline nodes to an Org file. Using hooks, arbitrary actions can
;; be triggered for new or changed items.
;;
;; Selecting feeds and target locations
;; ------------------------------------
;;
;; This module is configured through a single variable, `org-feed-alist'.
;; Here is an example, using a notes/tasks feed from reQall.com.
;;
;; (setq org-feed-alist
;; '(("ReQall"
;; "http://www.reqall.com/user/feeds/rss/a1b2c3....."
;; "~/org/feeds.org" "ReQall Entries")
;;
;; With this setup, the command `M-x org-feed-update-all' will
;; collect new entries in the feed at the given URL and create
;; entries as subheadings under the "ReQall Entries" heading in the
;; file "~/org/feeds.org". Each feed should normally have its own
;; heading - however see the `:drawer' parameter.
;;
;; Besides these standard elements that need to be specified for each
;; feed, keyword-value pairs can set additional options. For example,
;; to de-select transitional entries with a title containing
;;
;; "reQall is typing what you said",
;;
;; you could use the `:filter' argument:
;;
;; (setq org-feed-alist
;; '(("ReQall"
;; "http://www.reqall.com/user/feeds/rss/a1b2c3....."
;; "~/org/feeds.org" "ReQall Entries"
;; :filter my-reqall-filter)))
;;
;; (defun my-reqall-filter (e)
;; (if (string-match "reQall is typing what you said"
;; (plist-get e :title))
;; nil
;; e))
;;
;; See the docstring for `org-feed-alist' for more details.
;;
;;
;; Keeping track of previously added entries
;; -----------------------------------------
;;
;; Since Org allows you to delete, archive, or move outline nodes,
;; org-feed.el needs to keep track of which feed items have been handled
;; before, so that they will not be handled again. For this, org-feed.el
;; stores information in a special drawer, FEEDSTATUS, under the heading
;; that received the input of the feed.
;;
;;
;; Acknowledgments
;; ---------------
;;
;; org-feed.el is based on ideas by Brad Bozarth who implemented a
;; similar mechanism using shell and awk scripts.
;;; Code:
(require 'org)
(require 'sha1)
(declare-function url-retrieve-synchronously "url"
(url &optional silent inhibit-cookies timeout))
(declare-function xml-node-children "xml" (node))
(declare-function xml-get-children "xml" (node child-name))
(declare-function xml-get-attribute "xml" (node attribute))
(declare-function xml-get-attribute-or-nil "xml" (node attribute))
(declare-function xml-substitute-special "xml" (string))
(declare-function org-capture-escaped-% "org-capture" ())
(declare-function org-capture-expand-embedded-elisp "org-capture" (&optional mark))
(declare-function org-capture-inside-embedded-elisp-p "org-capture" ())
(defgroup org-feed nil
"Options concerning RSS feeds as inputs for Org files."
:tag "Org Feed"
:group 'org)
(defcustom org-feed-alist nil
"Alist specifying RSS feeds that should create inputs for Org.
Each entry in this list specified an RSS feed tat should be queried
to create inbox items in Org. Each entry is a list with the following items:
name a custom name for this feed
URL the Feed URL
file the target Org file where entries should be listed, when
nil the target becomes the current buffer (may be an
indirect buffer) each time the feed update is invoked
headline the headline under which entries should be listed
Additional arguments can be given using keyword-value pairs. Many of these
specify functions that receive one or a list of \"entries\" as their single
argument. An entry is a property list that describes a feed item. The
property list has properties for each field in the item, for example `:title'
for the `<title>' field and `:pubDate' for the publication date. In addition,
it contains the following properties:
`:item-full-text' the full text in the <item> tag
`:guid-permalink' t when the guid property is a permalink
Here are the keyword-value pair allows in `org-feed-alist'.
:drawer drawer-name
The name of the drawer for storing feed information. The default is
\"FEEDSTATUS\". Using different drawers for different feeds allows
several feeds to target the same inbox heading.
:filter filter-function
A function to select interesting entries in the feed. It gets a single
entry as parameter. It should return the entry if it is relevant, or
nil if it is not.
:template template-string
The default action on new items in the feed is to add them as children
under the headline for the feed. The template describes how the entry
should be formatted. If not given, it defaults to
`org-feed-default-template'.
:formatter formatter-function
Instead of relying on a template, you may specify a function to format
the outline node to be inserted as a child. This function gets passed
a property list describing a single feed item, and it should return a
string that is a properly formatted Org outline node of level 1.
:new-handler function
If adding new items as children to the outline is not what you want
to do with new items, define a handler function that is called with
a list of all new items in the feed, each one represented as a property
list. The handler should do what needs to be done, and org-feed will
mark all items given to this handler as \"handled\", i.e. they will not
be passed to this handler again in future readings of the feed.
When the handler is called, point will be at the feed headline.
:changed-handler function
This function gets passed a list of all entries that have been
handled before, but are now still in the feed and have *changed*
since last handled (as evidenced by a different sha1 hash).
When the handler is called, point will be at the feed headline.
:parse-feed function
This function gets passed a buffer, and should return a list
of entries, each being a property list containing the
`:guid' and `:item-full-text' keys. The default is
`org-feed-parse-rss-feed'; `org-feed-parse-atom-feed' is an
alternative.
:parse-entry function
This function gets passed an entry as returned by the parse-feed
function, and should return the entry with interesting properties added.
The default is `org-feed-parse-rss-entry'; `org-feed-parse-atom-entry'
is an alternative."
:group 'org-feed
:type '(repeat
(list :value ("" "http://" "" "")
(string :tag "Name")
(string :tag "Feed URL")
(file :tag "File for inbox")
(string :tag "Headline for inbox")
(repeat :inline t
(choice
(list :inline t :tag "Filter"
(const :filter)
(symbol :tag "Filter Function"))
(list :inline t :tag "Template"
(const :template)
(string :tag "Template"))
(list :inline t :tag "Formatter"
(const :formatter)
(symbol :tag "Formatter Function"))
(list :inline t :tag "New items handler"
(const :new-handler)
(symbol :tag "Handler Function"))
(list :inline t :tag "Changed items"
(const :changed-handler)
(symbol :tag "Handler Function"))
(list :inline t :tag "Parse Feed"
(const :parse-feed)
(symbol :tag "Parse Feed Function"))
(list :inline t :tag "Parse Entry"
(const :parse-entry)
(symbol :tag "Parse Entry Function"))
)))))
(defcustom org-feed-drawer "FEEDSTATUS"
"The name of the drawer for feed status information.
Each feed may also specify its own drawer name using the `:drawer'
parameter in `org-feed-alist'."
:group 'org-feed
:type '(string :tag "Drawer Name"))
(defcustom org-feed-default-template "\n* %h\n %U\n %description\n %a\n"
"Template for the Org node created from RSS feed items.
This is just the default, each feed can specify its own.
Any fields from the feed item can be interpolated into the template with
%name, for example %title, %description, %pubDate etc. In addition, the
following special escapes are valid as well:
%h The title, or the first line of the description
%t The date as a stamp, either from <pubDate> (if present), or
the current date
%T Date and time
%u,%U Like %t,%T, but inactive time stamps
%a A link, from <guid> if that is a permalink, else from <link>
%(sexp) Evaluate elisp `(sexp)' and replace with the result, the simple
%-escapes above can be used as arguments, e.g. %(capitalize \\\"%h\\\")"
:group 'org-feed
:type '(string :tag "Template"))
(defcustom org-feed-save-after-adding t
"Non-nil means save buffer after adding new feed items."
:group 'org-feed
:type 'boolean)
(defcustom org-feed-retrieve-method 'url-retrieve-synchronously
"The method to be used to retrieve a feed URL.
This can be `curl' or `wget' to call these external programs, or it can be
an Emacs Lisp function that will return a buffer containing the content
of the file pointed to by the URL."
:group 'org-feed
:type '(choice
(const :tag "Internally with url.el" url-retrieve-synchronously)
(const :tag "Externally with curl" curl)
(const :tag "Externally with wget" wget)
(function :tag "Function")))
(defcustom org-feed-before-adding-hook nil
"Hook that is run before adding new feed items to a file.
You might want to commit the file in its current state to version control,
for example."
:group 'org-feed
:type 'hook)
(defcustom org-feed-after-adding-hook nil
"Hook that is run after new items have been added to a file.
Depending on `org-feed-save-after-adding', the buffer will already
have been saved."
:group 'org-feed
:type 'hook)
(defvar org-feed-buffer "*Org feed*"
"The buffer used to retrieve a feed.")
;;;###autoload
(defun org-feed-update-all ()
"Get inbox items from all feeds in `org-feed-alist'."
(interactive)
(let ((nfeeds (length org-feed-alist))
(nnew (apply '+ (mapcar 'org-feed-update org-feed-alist))))
(message "%s from %d %s"
(cond ((= nnew 0) "No new entries")
((= nnew 1) "1 new entry")
(t (format "%d new entries" nnew)))
nfeeds
(if (= nfeeds 1) "feed" "feeds"))))
;;;###autoload
(defun org-feed-update (feed &optional retrieve-only)
"Get inbox items from FEED.
FEED can be a string with an association in `org-feed-alist', or
it can be a list structured like an entry in `org-feed-alist'."
(interactive (list (org-completing-read "Feed name: " org-feed-alist)))
(if (stringp feed) (setq feed (assoc feed org-feed-alist)))
(unless feed
(error "No such feed in `org-feed-alist"))
(catch 'exit
(let ((name (car feed))
(url (nth 1 feed))
(file (or (nth 2 feed) (buffer-file-name (or (buffer-base-buffer)
(current-buffer)))))
(headline (nth 3 feed))
(filter (nth 1 (memq :filter feed)))
(formatter (nth 1 (memq :formatter feed)))
(new-handler (nth 1 (memq :new-handler feed)))
(changed-handler (nth 1 (memq :changed-handler feed)))
(template (or (nth 1 (memq :template feed))
org-feed-default-template))
(drawer (or (nth 1 (memq :drawer feed))
org-feed-drawer))
(parse-feed (or (nth 1 (memq :parse-feed feed))
'org-feed-parse-rss-feed))
(parse-entry (or (nth 1 (memq :parse-entry feed))
'org-feed-parse-rss-entry))
feed-buffer inbox-pos new-formatted
entries old-status status new changed guid-alist guid olds)
(setq feed-buffer (org-feed-get-feed url))
(unless (and feed-buffer (bufferp (get-buffer feed-buffer)))
(error "Cannot get feed %s" name))
(when retrieve-only
(throw 'exit feed-buffer))
(setq entries (funcall parse-feed feed-buffer))
(ignore-errors (kill-buffer feed-buffer))
(save-excursion
(save-window-excursion
(setq inbox-pos (org-feed-goto-inbox-internal file headline))
(setq old-status (org-feed-read-previous-status inbox-pos drawer))
;; Add the "handled" status to the appropriate entries
(setq entries (mapcar (lambda (e)
(setq e
(plist-put e :handled
(nth 1 (assoc
(plist-get e :guid)
old-status)))))
entries))
;; Find out which entries are new and which are changed
(dolist (e entries)
(if (not (plist-get e :handled))
(push e new)
(setq olds (nth 2 (assoc (plist-get e :guid) old-status)))
(if (and olds
(not (string= (sha1
(plist-get e :item-full-text))
olds)))
(push e changed))))
;; Parse the relevant entries fully
(setq new (mapcar parse-entry new)
changed (mapcar parse-entry changed))
;; Run the filter
(when filter
(setq new (delq nil (mapcar filter new))
changed (delq nil (mapcar filter new))))
(when (not (or new changed))
(message "No new items in feed %s" name)
(throw 'exit 0))
;; Get alist based on guid, to look up entries
(setq guid-alist
(append
(mapcar (lambda (e) (list (plist-get e :guid) e)) new)
(mapcar (lambda (e) (list (plist-get e :guid) e)) changed)))
;; Construct the new status
(setq status
(mapcar
(lambda (e)
(setq guid (plist-get e :guid))
(list guid
;; things count as handled if we handle them now,
;; or if they were handled previously
(if (assoc guid guid-alist) t (plist-get e :handled))
;; A hash, to detect changes
(sha1 (plist-get e :item-full-text))))
entries))
;; Handle new items in the feed
(when new
(if new-handler
(progn
(goto-char inbox-pos)
(funcall new-handler new))
;; No custom handler, do the default adding
;; Format the new entries into an alist with GUIDs in the car
(setq new-formatted
(mapcar
(lambda (e) (org-feed-format-entry e template formatter))
new)))
;; Insert the new items
(org-feed-add-items inbox-pos new-formatted))
;; Handle changed items in the feed
(when (and changed-handler changed)
(goto-char inbox-pos)
(funcall changed-handler changed))
;; Write the new status
;; We do this only now, in case something goes wrong above, so
;; that would would end up with a status that does not reflect
;; which items truely have been handled
(org-feed-write-status inbox-pos drawer status)
;; Normalize the visibility of the inbox tree
(goto-char inbox-pos)
(outline-hide-subtree)
(org-show-children)
(org-cycle-hide-drawers 'children)
;; Hooks and messages
(when org-feed-save-after-adding (save-buffer))
(message "Added %d new item%s from feed %s to file %s, heading %s"
(length new) (if (> (length new) 1) "s" "")
name
(file-name-nondirectory file) headline)
(run-hooks 'org-feed-after-adding-hook)
(length new))))))
;;;###autoload
(defun org-feed-goto-inbox (feed)
"Go to the inbox that captures the feed named FEED."
(interactive
(list (if (= (length org-feed-alist) 1)
(car org-feed-alist)
(org-completing-read "Feed name: " org-feed-alist))))
(if (stringp feed) (setq feed (assoc feed org-feed-alist)))
(unless feed
(error "No such feed in `org-feed-alist"))
(org-feed-goto-inbox-internal (nth 2 feed) (nth 3 feed)))
;;;###autoload
(defun org-feed-show-raw-feed (feed)
"Show the raw feed buffer of a feed."
(interactive
(list (if (= (length org-feed-alist) 1)
(car org-feed-alist)
(org-completing-read "Feed name: " org-feed-alist))))
(if (stringp feed) (setq feed (assoc feed org-feed-alist)))
(unless feed
(error "No such feed in `org-feed-alist"))
(pop-to-buffer-same-window
(org-feed-update feed 'retrieve-only))
(goto-char (point-min)))
(defun org-feed-goto-inbox-internal (file heading)
"Find or create HEADING in FILE.
Switch to that buffer, and return the position of that headline."
(find-file file)
(widen)
(goto-char (point-min))
(if (re-search-forward
(concat "^\\*+[ \t]+" heading "[ \t]*\\(:.*?:[ \t]*\\)?$")
nil t)
(goto-char (match-beginning 0))
(goto-char (point-max))
(insert "\n\n* " heading "\n\n")
(org-back-to-heading t))
(point))
(defun org-feed-read-previous-status (pos drawer)
"Get the alist of old GUIDs from the entry at POS.
This will find DRAWER and extract the alist."
(save-excursion
(goto-char pos)
(let ((end (save-excursion (org-end-of-subtree t t))))
(if (re-search-forward
(concat "^[ \t]*:" drawer ":[ \t]*\n\\([^\000]*?\\)\n[ \t]*:END:")
end t)
(read (match-string 1))
nil))))
(defun org-feed-write-status (pos drawer status)
"Write the feed STATUS to DRAWER in entry at POS."
(save-excursion
(goto-char pos)
(let ((end (save-excursion (org-end-of-subtree t t))))
(if (re-search-forward (concat "^[ \t]*:" drawer ":[ \t]*\n")
end t)
(progn
(goto-char (match-end 0))
(delete-region (point)
(save-excursion
(and (re-search-forward "^[ \t]*:END:" nil t)
(match-beginning 0)))))
(outline-next-heading)
(insert " :" drawer ":\n :END:\n")
(beginning-of-line 0))
(insert (pp-to-string status)))))
(defun org-feed-add-items (pos entries)
"Add the formatted items to the headline as POS."
(let (entry level)
(save-excursion
(goto-char pos)
(unless (looking-at org-complex-heading-regexp)
(error "Wrong position"))
(setq level (org-get-valid-level (length (match-string 1)) 1))
(org-end-of-subtree t t)
(skip-chars-backward " \t\n")
(beginning-of-line 2)
(setq pos (point))
(while (setq entry (pop entries))
(org-paste-subtree level entry 'yank))
(org-mark-ring-push pos))))
(defun org-feed-format-entry (entry template formatter)
"Format ENTRY so that it can be inserted into an Org file.
ENTRY is a property list. This function adds a `:formatted-for-org' property
and returns the full property list.
If that property is already present, nothing changes."
(require 'org-capture)
(if formatter (funcall formatter entry)
(let* ((dlines
(org-split-string (or (plist-get entry :description) "???")
"\n"))
(time (or (if (plist-get entry :pubDate)
(org-read-date t t (plist-get entry :pubDate)))
(current-time)))
(v-h (or (plist-get entry :title) (car dlines) "???"))
(v-t (format-time-string (org-time-stamp-format nil nil) time))
(v-T (format-time-string (org-time-stamp-format t nil) time))
(v-u (format-time-string (org-time-stamp-format nil t) time))
(v-U (format-time-string (org-time-stamp-format t t) time))
(v-a (let ((tmp (or (and (plist-get entry :guid-permalink)
(plist-get entry :guid))
(plist-get entry :link))))
(if tmp (format "[[%s]]\n" tmp ) ""))))
(with-temp-buffer
(insert template)
(goto-char (point-min))
;; Mark %() embedded elisp for later evaluation.
(org-capture-expand-embedded-elisp 'mark)
;; Simple %-escapes. `org-capture-escaped-%' may modify
;; buffer and cripple match-data. Use markers instead.
(while (re-search-forward "%\\([a-zA-Z]+\\)" nil t)
(let ((key (match-string 1))
(beg (copy-marker (match-beginning 0)))
(end (copy-marker (match-end 0))))
(unless (org-capture-escaped-%)
(delete-region beg end)
(set-marker beg nil)
(set-marker end nil)
(let ((replacement
(pcase key
("h" v-h)
("t" v-t)
("T" v-T)
("u" v-u)
("U" v-U)
("a" v-a)
(name
(let ((v (plist-get entry (intern (concat ":" name)))))
(save-excursion
(save-match-data
(beginning-of-line)
(if (looking-at
(concat "^\\([ \t]*\\)%" name "[ \t]*$"))
(org-feed-make-indented-block
v (org-get-indentation))
v))))))))
(when replacement
(insert
;; Escape string delimiters within embedded lisp.
(if (org-capture-inside-embedded-elisp-p)
(replace-regexp-in-string "\"" "\\\\\"" replacement)
replacement)))))))
;; %() embedded elisp
(org-capture-expand-embedded-elisp)
(decode-coding-string
(buffer-string) (detect-coding-region (point-min) (point-max) t))))))
(defun org-feed-make-indented-block (s n)
"Add indentation of N spaces to a multiline string S."
(if (not (string-match "\n" s))
s
(mapconcat 'identity
(org-split-string s "\n")
(concat "\n" (make-string n ?\ )))))
(defun org-feed-skip-http-headers (buffer)
"Remove HTTP headers from BUFFER, and return it.
Assumes headers are indeed present!"
(with-current-buffer buffer
(widen)
(goto-char (point-min))
(search-forward "\n\n")
(delete-region (point-min) (point))
buffer))
(defun org-feed-get-feed (url)
"Get the RSS feed file at URL and return the buffer."
(cond
((eq org-feed-retrieve-method 'url-retrieve-synchronously)
(org-feed-skip-http-headers (url-retrieve-synchronously url)))
((eq org-feed-retrieve-method 'curl)
(ignore-errors (kill-buffer org-feed-buffer))
(call-process "curl" nil org-feed-buffer nil "--silent" url)
org-feed-buffer)
((eq org-feed-retrieve-method 'wget)
(ignore-errors (kill-buffer org-feed-buffer))
(call-process "wget" nil org-feed-buffer nil "-q" "-O" "-" url)
org-feed-buffer)
((functionp org-feed-retrieve-method)
(funcall org-feed-retrieve-method url))))
(defun org-feed-parse-rss-feed (buffer)
"Parse BUFFER for RSS feed entries.
Returns a list of entries, with each entry a property list,
containing the properties `:guid' and `:item-full-text'."
(require 'xml)
(let ((case-fold-search t)
entries beg end item guid entry)
(with-current-buffer buffer
(widen)
(goto-char (point-min))
(while (re-search-forward "<item\\>.*?>" nil t)
(setq beg (point)
end (and (re-search-forward "</item>" nil t)
(match-beginning 0)))
(setq item (buffer-substring beg end)
guid (if (string-match "<guid\\>.*?>\\(.*?\\)</guid>" item)
(xml-substitute-special (match-string-no-properties 1 item))))
(setq entry (list :guid guid :item-full-text item))
(push entry entries)
(widen)
(goto-char end))
(nreverse entries))))
(defun org-feed-parse-rss-entry (entry)
"Parse the `:item-full-text' field for xml tags and create new properties."
(require 'xml)
(with-temp-buffer
(insert (plist-get entry :item-full-text))
(goto-char (point-min))
(while (re-search-forward "<\\([a-zA-Z]+\\>\\).*?>\\([^\000]*?\\)</\\1>"
nil t)
(setq entry (plist-put entry
(intern (concat ":" (match-string 1)))
(xml-substitute-special (match-string 2)))))
(goto-char (point-min))
(unless (re-search-forward "isPermaLink[ \t]*=[ \t]*\"false\"" nil t)
(setq entry (plist-put entry :guid-permalink t))))
entry)
(defun org-feed-parse-atom-feed (buffer)
"Parse BUFFER for Atom feed entries.
Returns a list of entries, with each entry a property list,
containing the properties `:guid' and `:item-full-text'.
The `:item-full-text' property actually contains the sexp
formatted as a string, not the original XML data."
(require 'xml)
(with-current-buffer buffer
(widen)
(let ((feed (car (xml-parse-region (point-min) (point-max)))))
(mapcar
(lambda (entry)
(list
:guid (car (xml-node-children (car (xml-get-children entry 'id))))
:item-full-text (prin1-to-string entry)))
(xml-get-children feed 'entry)))))
(defun org-feed-parse-atom-entry (entry)
"Parse the `:item-full-text' as a sexp and create new properties."
(let ((xml (car (read-from-string (plist-get entry :item-full-text)))))
;; Get first <link href='foo'/>.
(setq entry (plist-put entry :link
(xml-get-attribute
(car (xml-get-children xml 'link))
'href)))
;; Add <title/> as :title.
(setq entry (plist-put entry :title
(xml-substitute-special
(car (xml-node-children
(car (xml-get-children xml 'title)))))))
(let* ((content (car (xml-get-children xml 'content)))
(type (xml-get-attribute-or-nil content 'type)))
(when content
(cond
((string= type "text")
;; We like plain text.
(setq entry (plist-put entry :description
(xml-substitute-special
(car (xml-node-children content))))))
((string= type "html")
;; TODO: convert HTML to Org markup.
(setq entry (plist-put entry :description
(xml-substitute-special
(car (xml-node-children content))))))
((string= type "xhtml")
;; TODO: convert XHTML to Org markup.
(setq entry (plist-put entry :description
(prin1-to-string
(xml-node-children content)))))
(t
(setq entry (plist-put entry :description
(format-message
"Unknown `%s' content." type)))))))
entry))
(provide 'org-feed)
;; Local variables:
;; generated-autoload-file: "org-loaddefs.el"
;; End:
;;; org-feed.el ends here
|