diff options
author | Bill Wohler <wohler@newt.com> | 2004-08-15 22:00:06 +0000 |
---|---|---|
committer | Bill Wohler <wohler@newt.com> | 2004-08-15 22:00:06 +0000 |
commit | 10f865f4de7d78d73ff9a64c12254f5e5fc39041 (patch) | |
tree | 20e6920252cf80aa05fd2c6cdd15c94b41267f84 /lisp/mh-e/mh-junk.el | |
parent | 420d3446dc3cc70ffb48e49d1bf13fdbcb0d7040 (diff) | |
download | emacs-10f865f4de7d78d73ff9a64c12254f5e5fc39041.tar.gz |
Upgraded to MH-E version 7.4.80.
See etc/MH-E-NEWS and lisp/mh-e/ChangeLog for details.
Diffstat (limited to 'lisp/mh-e/mh-junk.el')
-rw-r--r-- | lisp/mh-e/mh-junk.el | 454 |
1 files changed, 234 insertions, 220 deletions
diff --git a/lisp/mh-e/mh-junk.el b/lisp/mh-e/mh-junk.el index 42ec4c444d3..095a8c3c3fd 100644 --- a/lisp/mh-e/mh-junk.el +++ b/lisp/mh-e/mh-junk.el @@ -1,6 +1,6 @@ ;;; mh-junk.el --- Interface to anti-spam measures -;; Copyright (C) 2003 Free Software Foundation, Inc. +;; Copyright (C) 2003, 2004 Free Software Foundation, Inc. ;; Author: Satyaki Das <satyaki@theforce.stanford.edu>, ;; Bill Wohler <wohler@newt.com> @@ -32,6 +32,8 @@ ;;; Code: +(eval-when-compile (require 'mh-acros)) +(mh-require-cl) (require 'mh-e) ;; Interactive functions callable from the folder buffer @@ -39,36 +41,33 @@ (defun mh-junk-blacklist (range) "Blacklist RANGE as spam. -Check the documentation of `mh-interactive-range' to see how RANGE is read in -interactive use. +This command trains the spam program in use (see the `mh-junk-program' option) +with the content of the range (see `mh-interactive-range') and then handles +the message(s) as specified by the `mh-junk-disposition' option. -First the appropriate function is called depending on the value of -`mh-junk-choice'. Then if `mh-junk-mail-folder' is a string then the message is -refiled to that folder. If nil, the message is deleted. - -To change the spam program being used, customize `mh-junk-program'. Directly -setting `mh-junk-choice' is not recommended. - -The documentation for the following functions describes what setup is needed -for the different spam fighting programs: +For more information about using your particular spam fighting program, see: + - `mh-spamassassin-blacklist' - `mh-bogofilter-blacklist' - - `mh-spamprobe-blacklist' - - `mh-spamassassin-blacklist'" + - `mh-spamprobe-blacklist'" (interactive (list (mh-interactive-range "Blacklist"))) (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist)))) (unless blacklist-func (error "Customize `mh-junk-program' appropriately")) - (let ((dest (cond ((null mh-junk-mail-folder) nil) - ((equal mh-junk-mail-folder "") "+") - ((eq (aref mh-junk-mail-folder 0) ?+) - mh-junk-mail-folder) - ((eq (aref mh-junk-mail-folder 0) ?@) + (let ((dest (cond ((null mh-junk-disposition) nil) + ((equal mh-junk-disposition "") "+") + ((eq (aref mh-junk-disposition 0) ?+) + mh-junk-disposition) + ((eq (aref mh-junk-disposition 0) ?@) (concat mh-current-folder "/" - (substring mh-junk-mail-folder 1))) - (t (concat "+" mh-junk-mail-folder))))) + (substring mh-junk-disposition 1))) + (t (concat "+" mh-junk-disposition))))) (mh-iterate-on-range msg range + (message (format "Blacklisting message %d..." msg)) (funcall (symbol-function blacklist-func) msg) + (message (format "Blacklisting message %d...done" msg)) + (if (not (memq msg mh-seen-list)) + (setq mh-seen-list (cons msg mh-seen-list))) (if dest (mh-refile-a-msg nil (intern dest)) (mh-delete-a-msg nil))) @@ -76,231 +75,124 @@ for the different spam fighting programs: ;;;###mh-autoload (defun mh-junk-whitelist (range) - "Whitelist RANGE incorrectly classified as spam. - -Check the documentation of `mh-interactive-range' to see how RANGE is read in -interactive use. + "Whitelist RANGE as ham. -First the appropriate function is called depending on the value of -`mh-junk-choice'. Then the message is refiled to `mh-inbox'. +This command reclassifies a range of messages (see `mh-interactive-range') as +ham if it were incorrectly classified as spam. It then refiles the message +into the `+inbox' folder. -To change the spam program being used, customize `mh-junk-program'. Directly -setting `mh-junk-choice' is not recommended." +The `mh-junk-program' option specifies the spam program in use." (interactive (list (mh-interactive-range "Whitelist"))) (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist)))) (unless whitelist-func (error "Customize `mh-junk-program' appropriately")) (mh-iterate-on-range msg range + (message (format "Whitelisting message %d..." msg)) (funcall (symbol-function whitelist-func) msg) + (message (format "Whitelisting message %d...done" msg)) (mh-refile-a-msg nil (intern mh-inbox))) (mh-next-msg))) -;; Bogofilter Interface - -(defvar mh-bogofilter-executable (executable-find "bogofilter")) - -(defun mh-bogofilter-blacklist (msg) - "Classify MSG as spam. -Tell bogofilter that the message is spam. +;; Spamassassin Interface -Bogofilter is a Bayesian spam filtering program. Get it from your local -distribution or from: - http://bogofilter.sourceforge.net/ +(defvar mh-spamassassin-executable (executable-find "spamassassin")) +(defvar mh-sa-learn-executable (executable-find "sa-learn")) -You first need to teach bogofilter. This is done by running +(defun mh-spamassassin-blacklist (msg) + "Blacklist MSG with SpamAssassin. - bogofilter -n < good-message +SpamAssassin is one of the more popular spam filtering programs. Get it from +your local distribution or from http://spamassassin.org/. -on every good message, and +To use SpamAssassin, add the following recipes to `.procmailrc': - bogofilter -s < spam-message + MAILDIR=$HOME/`mhparam Path` -on every spam message. Most Bayesian filters need 1000 to 5000 of each to -start doing a good job. + # Fight spam with SpamAssassin. + :0fw + | spamc -To use bogofilter, add the following .procmailrc recipes which you can also -find in the bogofilter man page: + # Anything with a spam level of 10 or more is junked immediately. + :0: + * ^X-Spam-Level: .......... + /dev/null - # Bogofilter - :0fw - | bogofilter -u -e -p + :0: + * ^X-Spam-Status: Yes + spam/. - :0 - * ^X-Bogosity: Yes, tests=bogofilter - $SPAM +If you don't use `spamc', use `spamassassin -P -a'. -Bogofilter continues to feed the messages it classifies back into its -database. Occasionally it misses, and those messages need to be reclassified. -MH-E can do this for you. Use \\[mh-junk-blacklist] to reclassify messges in -your +inbox as spam, and \\[mh-junk-whitelist] to reclassify messages in your -spambox as good messages." - (unless mh-bogofilter-executable - (error "Couldn't find the bogofilter executable")) - (let ((msg-file (mh-msg-filename msg mh-current-folder))) - (call-process mh-bogofilter-executable msg-file 0 nil "-Ns"))) +Note that one of the recipes above throws away messages with a score greater +than or equal to 10. Here's how you can determine a value that works best for +you. -(defun mh-bogofilter-whitelist (msg) - "Reinstate incorrectly filtered MSG. -Train bogofilter to think of the message as non-spam." - (unless mh-bogofilter-executable - (error "Couldn't find the bogofilter executable")) - (let ((msg-file (mh-msg-filename msg mh-current-folder))) - (call-process mh-bogofilter-executable msg-file 0 nil "-Sn"))) +First, run `spamassassin -t' on every mail message in your archive and use +Gnumeric to verify that the average plus the standard deviation of good mail +is under 5, the SpamAssassin default for \"spam\". - +Using Gnumeric, sort the messages by score and view the messages with the +highest score. Determine the score which encompasses all of your interesting +messages and add a couple of points to be conservative. Add that many dots to +the `X-Spam-Level:' header field above to send messages with that score down +the drain. -;; Spamprobe Interface +In the example above, messages with a score of 5-9 are set aside in the +`+spam' folder for later review. The major weakness of rules-based filters is +a plethora of false positives so it is worthwhile to check. -(defvar mh-spamprobe-executable (executable-find "spamprobe")) +If SpamAssassin classifies a message incorrectly, or is unsure, you can use +the MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist]. -(defun mh-spamprobe-blacklist (msg) - "Classify MSG as spam. -Tell spamprobe that the message is spam. - -Spamprobe is a Bayesian spam filtering program. More info about the program can -be found at: - http://spamprobe.sourceforge.net - -Here is a procmail recipe to stores incoming spam mail into the folder +spam -and good mail in /home/user/Mail/mdrop/mbox. This recipe is provided as an -example in the spamprobe man page. - - PATH=/bin:/usr/bin:/usr/local/bin - DEFAULT=/home/user/Mail/mdrop/mbox - SPAM=/home/user/Mail/spam/. - - # Spamprobe filtering - :0 - SCORE=| spamprobe receive - :0 wf - | formail -I \"X-SpamProbe: $SCORE\" - :0 a: - *^X-SpamProbe: SPAM - $SPAM - -Occasionally some good mail gets misclassified as spam. You can use -\\[mh-junk-whitelist] to reclassify that as good mail." - (unless mh-spamprobe-executable - (error "Couldn't find the spamprobe executable")) - (let ((msg-file (mh-msg-filename msg mh-current-folder))) - (call-process mh-spamprobe-executable msg-file 0 nil "spam"))) +The \\[mh-junk-blacklist] command adds a `blacklist_from' entry to +`~/spamassassin/user_prefs', deletes the message, and sends the message to the +Razor, so that others might not see this spam. If the `sa-learn' command is +available, the message is also recategorized as spam. -(defun mh-spamprobe-whitelist (msg) - "Reinstate incorrectly filtered MSG. -Train spamprobe to think of the message as non-spam." - (unless mh-spamprobe-executable - (error "Couldn't find the spamprobe executable")) - (let ((msg-file (mh-msg-filename msg mh-current-folder))) - (call-process mh-spamprobe-executable msg-file 0 nil "good"))) +The \\[mh-junk-whitelist] command adds a `whitelist_from' rule to the +`~/.spamassassin/user_prefs' file. If the `sa-learn' command is available, the +message is also recategorized as ham. - +Over time, you'll observe that the same host or domain occurs repeatedly in +the `blacklist_from' entries, so you might think that you could avoid future +spam by blacklisting all mail from a particular domain. The utility function +`mh-spamassassin-identify-spammers' helps you do precisely that. This function +displays a frequency count of the hosts and domains in the `blacklist_from' +entries from the last blank line in `~/.spamassassin/user_prefs' to the end of +the file. This information can be used so that you can replace multiple +`blacklist_from' entries with a single wildcard entry such as: -;; Spamassassin Interface + blacklist_from *@*amazingoffersdirect2u.com -(defvar mh-spamassassin-executable (executable-find "spamassassin")) -(defvar mh-sa-learn-executable (executable-find "sa-learn")) +In versions of SpamAssassin (2.50 and on) that support a Bayesian classifier, +\\[mh-junk-blacklist] uses the `sa-learn' program to recategorize the message +as spam. Neither MH-E, nor SpamAssassin, rebuilds the database after adding +words, so you will need to run `sa-learn --rebuild' periodically. This can be +done by adding the following to your crontab: -(defun mh-spamassassin-blacklist (msg) - "Blacklist MSG. -This is done by sending the message to Razor and by appending the sender to -~/.spamassassin/user_prefs in a blacklist_from rule. If sa-learn is available, -the message is also recategorized as spam. - -Spamassassin is an excellent spam filter. For more information, see: - http://spamassassin.org/. - -I ran \"spamassassin -t\" on every mail message in my archive and ran an -analysis in Gnumeric to find that the standard deviation of good mail -scored under 5 (coincidentally, the spamassassin default for \"spam\"). - -Furthermore, I observed that there weren't any messages with a score of 8 -or more that were interesting, so I added a couple of points to be -conservative and send any message with a score of 10 or more down the -drain. You might want to use a score of 12 or 13 to be really conservative. -I have found that this really decreases the amount of junk to review. - -Messages with a score of 5-9 are set aside for later review. The major -weakness of rules-based filters is a plethora of false positives\; I catch one -or two legitimate messages in here a week, so it is worthwhile to check. - -You might choose to do this analysis yourself to pick a good score for -deleting spam sight unseen, or you might pick a score out of a hat, or you -might choose to be very conservative and not delete any messages at all. - -Based upon this discussion, here is what the associated ~/.procmailrc -entries look like. These rules appear before my list filters so that spam -sent to mailing lists gets pruned too. - - # - # Spam - # - :0fw - | spamc - - # Anything with a spam level of 10 or more is junked immediately. - :0: - * ^X-Spam-Level: .......... - /dev/null - - :0 - * ^X-Spam-Status: Yes - $SPAM - -If you don't use \"spamc\", use \"spamassassin -P -a\". - -A handful of spam does find its way into +inbox. In this case, use -\\[mh-junk-blacklist] to add a \"blacklist_from\" line to -~/spamassassin/user_prefs, delete the message, and send the message to the -Razor, so that others might not see this spam. - -Over time, you see some patterns in the blacklisted addresses and can -replace several lines with wildcards. For example, it is clear that High -Speed Media is the biggest bunch of jerks on the Net. Here are some of the -entries I have for them, and the list continues to grow. - - blacklist_from *@*-hsm-*.com - blacklist_from *@*182*643*.com - blacklist_from *@*antarhsm*.com - blacklist_from *@*h*speed* - blacklist_from *@*hsm*182*.com - blacklist_from *@*hsm*643*.com - blacklist_from *@*hsmridi2983cslt227.com - blacklist_from *@*list*hsm*.com - blacklist_from *@h*s*media* - blacklist_from *@hsmdrct.com - blacklist_from *@hsmridi2983csltsite.com - -The function `mh-spamassassin-identify-spammers' is provided that shows the -frequency counts of the host and domain names in your blacklist_from -entries. This can be helpful when editing the blacklist_from entries. - -In versions of spamassassin (2.50 and on) that support a Bayesian classifier, -\\[mh-junk-blacklist] uses the sa-learn program to recategorize the message as -spam. Neither MH-E, nor spamassassin, rebuilds the database after adding -words, so you will need to run \"sa-learn --rebuild\" periodically. This can -be done by adding the following to your crontab: - - 0 * * * * sa-learn --rebuild > /dev/null 2>&1" + 0 * * * * sa-learn --rebuild > /dev/null 2>&1" (unless mh-spamassassin-executable - (error "Couldn't find the spamassassin executable")) + (error "Unable to find the spamassassin executable")) (let ((current-folder mh-current-folder) (msg-file (mh-msg-filename msg mh-current-folder)) (sender)) (save-excursion - (message "Giving this message the Razor...") + (message (format "Reporting message %d..." msg)) (mh-truncate-log-buffer) (call-process mh-spamassassin-executable msg-file mh-log-buffer nil - "--report" "--remove-from-whitelist") + ;;"--report" "--remove-from-whitelist" + "-r" "-R") ; spamassassin V2.20 (when mh-sa-learn-executable (message "Recategorizing this message as spam...") (call-process mh-sa-learn-executable msg-file mh-log-buffer nil "--single" "--spam" "--local" "--no-rebuild")) - (message "Blacklisting address...") + (message (format "Blacklisting message %d..." msg)) (set-buffer (get-buffer-create mh-temp-buffer)) (erase-buffer) - (call-process (expand-file-name mh-scan-prog mh-progs) nil t nil + (call-process (expand-file-name mh-scan-prog mh-progs) mh-junk-background + t nil (format "%s" msg) current-folder "-format" "%<(mymbox{from})%|%(addr{from})%>") (goto-char (point-min)) @@ -308,15 +200,19 @@ be done by adding the following to your crontab: (progn (setq sender (match-string 0)) (mh-spamassassin-add-rule "blacklist_from" sender) - (message "Blacklisting address...done")) - (message "Blacklisting address...not done (from my address)"))))) + (message (format "Blacklisting message %d...done" msg))) + (message (format "Blacklisting message %d...not done (from my address)" msg)))))) (defun mh-spamassassin-whitelist (msg) - "Whitelist MSG. -Add a whitelist_from rule to the ~/.spamassassin/user_prefs file. If sa-learn -is available, then the message is recategorized as ham." + "Whitelist MSG with SpamAssassin. + +The \\[mh-junk-whitelist] command adds a `whitelist_from' rule to the +`~/.spamassassin/user_prefs' file. If the `sa-learn' command is available, the +message is also recategorized as ham. + +See `mh-spamassassin-blacklist' for more information." (unless mh-spamassassin-executable - (error "Couldn't find the spamassassin executable")) + (error "Unable to find the spamassassin executable")) (let ((msg-file (mh-msg-filename msg mh-current-folder)) (show-buffer (get-buffer mh-show-buffer)) from) @@ -325,7 +221,8 @@ is available, then the message is recategorized as ham." (erase-buffer) (message "Removing spamassassin markup from message...") (call-process mh-spamassassin-executable msg-file mh-temp-buffer nil - "--remove-markup") + ;; "--remove-markup" + "-d") ; spamassassin V2.20 (if show-buffer (kill-buffer show-buffer)) (write-file msg-file) @@ -333,15 +230,17 @@ is available, then the message is recategorized as ham." (message "Recategorizing this message as ham...") (call-process mh-sa-learn-executable msg-file mh-temp-buffer nil "--single" "--ham" "--local --no-rebuild")) - (message "Whitelisting address...") - (setq from (car (ietf-drums-parse-address (mh-get-header-field "From:")))) + (message (format "Whitelisting message %d..." msg)) + (setq from + (car (mh-funcall-if-exists + ietf-drums-parse-address (mh-get-header-field "From:")))) (kill-buffer nil) - (unless (equal from "") + (unless (or (null from) (equal from "")) (mh-spamassassin-add-rule "whitelist_from" from)) - (message "Whitelisting address...done")))) + (message (format "Whitelisting message %d...done" msg))))) (defun mh-spamassassin-add-rule (rule body) - "Add a new rule to ~/.spamassassin/user_prefs. + "Add a new rule to `~/.spamassassin/user_prefs'. The name of the rule is RULE and its body is BODY." (save-window-excursion (let* ((line (format "%s\t%s\n" rule body)) @@ -358,15 +257,15 @@ The name of the rule is RULE and its body is BODY." (kill-buffer nil))))) (defun mh-spamassassin-identify-spammers () - "Identifies spammers who are repeat offenders. + "Identify spammers who are repeat offenders. -For each blacklist_from entry from the last blank line of -~/.spamassassin/user_prefs to the end of the file, a list of host and domain -names along with their frequency counts is displayed. This information can be -used to replace multiple blacklist_from entries with a single wildcard entry -such as: +This function displays a frequency count of the hosts and domains in the +`blacklist_from' entries from the last blank line in +`~/.spamassassin/user_prefs' to the end of the file. This information can be +used so that you can replace multiple `blacklist_from' entries with a single +wildcard entry such as: - blacklist_from *@*amazingoffersdirect2u.com" + blacklist_from *@*amazingoffersdirect2u.com" (interactive) (let* ((file (expand-file-name "~/.spamassassin/user_prefs")) (domains (make-hash-table :test 'equal))) @@ -385,7 +284,7 @@ such as: ;; Add counts for each host and domain part. (while host (setq value (gethash (car host) domains)) - (puthash (car host) (1+ (if (not value) 0 value)) domains) + (setf (gethash (car host) domains) (1+ (if (not value) 0 value))) (setq host (cdr host)))))) ;; Output @@ -400,6 +299,121 @@ such as: (reverse-region (point-min) (point-max)) (goto-char (point-min)))) + + +;; Bogofilter Interface + +(defvar mh-bogofilter-executable (executable-find "bogofilter")) + +(defun mh-bogofilter-blacklist (msg) + "Blacklist MSG with Bogofilter. + +Bogofilter is a Bayesian spam filtering program. Get it from your local +distribution or from http://bogofilter.sourceforge.net/. + +Bogofilter is taught by running: + + bogofilter -n < good-message + +on every good message, and + + bogofilter -s < spam-message + +on every spam message. This is called a full training; three other +training methods are described in the FAQ that is distributed with bogofilter. +Note that most Bayesian filters need 1000 to 5000 of each type of message to +start doing a good job. + +To use Bogofilter, add the following recipes to `.procmailrc': + + MAILDIR=$HOME/`mhparam Path` + + # Fight spam with Bogofilter. + :0fw + | bogofilter -3 -e -p + + :0: + * ^X-Bogosity: Yes, tests=bogofilter + spam/. + + :0: + * ^X-Bogosity: Unsure, tests=bogofilter + spam/unsure/. + +If Bogofilter classifies a message incorrectly, or is unsure, you can use the +MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist] to update +Bogofilter's training. + +The \"Bogofilter FAQ\" suggests that you run the following +occasionally to shrink the database: + + bogoutil -d wordlist.db | bogoutil -l wordlist.db.new + mv wordlist.db wordlist.db.prv + mv wordlist.db.new wordlist.db + +The \"Bogofilter tuning HOWTO\" describes how you can fine-tune Bogofilter." + (unless mh-bogofilter-executable + (error "Unable to find the bogofilter executable")) + (let ((msg-file (mh-msg-filename msg mh-current-folder))) + (call-process mh-bogofilter-executable msg-file mh-junk-background + nil "-s"))) + +(defun mh-bogofilter-whitelist (msg) + "Whitelist MSG with Bogofilter. + +See `mh-bogofilter-blacklist' for more information." + (unless mh-bogofilter-executable + (error "Unable to find the bogofilter executable")) + (let ((msg-file (mh-msg-filename msg mh-current-folder))) + (call-process mh-bogofilter-executable msg-file mh-junk-background + nil "-n"))) + + + +;; Spamprobe Interface + +(defvar mh-spamprobe-executable (executable-find "spamprobe")) + +(defun mh-spamprobe-blacklist (msg) + "Blacklist MSG with SpamProbe. + +SpamProbe is a Bayesian spam filtering program. Get it from your local +distribution or from http://spamprobe.sourceforge.net. + +To use SpamProbe, add the following recipes to `.procmailrc': + + MAILDIR=$HOME/`mhparam Path` + + # Fight spam with SpamProbe. + :0 + SCORE=| spamprobe receive + + :0 wf + | formail -I \"X-SpamProbe: $SCORE\" + + :0: + *^X-SpamProbe: SPAM + spam/. + +If SpamProbe classifies a message incorrectly, you can use the MH-E commands +\\[mh-junk-blacklist] and \\[mh-junk-whitelist] to update SpamProbe's +training." + (unless mh-spamprobe-executable + (error "Unable to find the spamprobe executable")) + (let ((msg-file (mh-msg-filename msg mh-current-folder))) + (call-process mh-spamprobe-executable msg-file mh-junk-background + nil "spam"))) + +(defun mh-spamprobe-whitelist (msg) + "Whitelist MSG with SpamProbe. + +See `mh-spamprobe-blacklist' for more information." + (unless mh-spamprobe-executable + (error "Unable to find the spamprobe executable")) + (let ((msg-file (mh-msg-filename msg mh-current-folder))) + (call-process mh-spamprobe-executable msg-file mh-junk-background + nil "good"))) + (provide 'mh-junk) ;;; Local Variables: |