diff options
| -rw-r--r-- | lisp/ChangeLog | 31 | ||||
| -rw-r--r-- | lisp/ffap.el | 115 | ||||
| -rw-r--r-- | lisp/thingatpt.el | 295 | ||||
| -rw-r--r-- | test/ChangeLog | 4 | ||||
| -rw-r--r-- | test/automated/thingatpt.el | 88 | 
5 files changed, 362 insertions, 171 deletions
| diff --git a/lisp/ChangeLog b/lisp/ChangeLog index b43b7bc61d1..9a7a48b186a 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,34 @@ +2013-02-04  Chong Yidong  <cyd@gnu.org> + +	* thingatpt.el: Rewrite the URL detection routines, absorbing some +	code from ffap.el. +	(thing-at-point-beginning-of-url-regexp): New var. +	(thing-at-point-uri-schemes): Update list of URI schemes. +	(thing-at-point-url-regexp): Variable deleted. +	(thing-at-point-markedup-url-regexp): Disallow newlines. +	(thing-at-point-newsgroup-regexp) +	(thing-at-point-newsgroup-heads) +	(thing-at-point-default-mail-uri-scheme): New variables. +	(thing-at-point-bounds-of-url-at-point): Rewrite.  Use ffap's +	method to find the possible bounds of the URI at point.  New +	optional argument to find ill-formed URIs. +	(thing-at-point-url-at-point): Rewrite.  New arguments for finding +	ill-formed URIs.  Use thing-at-point-bounds-of-url-at-point, and +	the scheme-adding heuristics from ffap-url-at-point. +	(thing-at-point--bounds-of-well-formed-url): New function.  Do +	parens matching to decide whether to include parens in the URI +	(Bug#9153). + +	* ffap.el: Require thingatpt. +	(ffap-url-at-point): Delegate URI detection to thing-at-point. +	All URI-valid characters are now recognized (Bug#5673). +	(ffap-string-at-point): Use use-region-p. +	(ffap-url-regexp): Extra character is handled by thing-at-point. +	(ffap-string-at-point-mode-alist): Allow parentheses. +	(ffap-newsgroup-regexp, ffap-newsgroup-heads, ffap-newsgroup-p): +	Convert to aliases; code moved to thingatpt.el. +	(ffap-gnus-hook): Use setq-local. +  2013-02-04  Glenn Morris  <rgm@gnu.org>  	* emacs-lisp/ert.el (ert--explain-format-atom): diff --git a/lisp/ffap.el b/lisp/ffap.el index c5b0784e5a2..0769469cbf2 100644 --- a/lisp/ffap.el +++ b/lisp/ffap.el @@ -106,6 +106,7 @@  ;;; Code:  (require 'url-parse) +(require 'thingatpt)  (define-obsolete-variable-alias 'ffap-version 'emacs-version "23.2") @@ -178,16 +179,14 @@ Note this name may be omitted if it equals the default    :group 'ffap)  (defvar ffap-url-regexp -  ;; Could just use `url-nonrelative-link' of w3, if loaded. -  ;; This regexp is not exhaustive, it just matches common cases.    (concat     "\\("     "news\\(post\\)?:\\|mailto:\\|file:" ; no host ok     "\\|"     "\\(ftp\\|https?\\|telnet\\|gopher\\|www\\|wais\\)://" ; needs host -   "\\)."				; require one more character -   ) -   "Regexp matching URLs.  Use nil to disable URL features in ffap.") +   "\\)") +  "Regexp matching the beginning of a URI, for FFAP. +If the value is nil, disable URL-matching features in ffap.")  (defcustom ffap-foo-at-bar-prefix "mailto"    "Presumed URL prefix type of strings like \"<foo.9z@bar>\". @@ -571,38 +570,9 @@ Looks at `ffap-ftp-default-user', returns \"\" for \"localhost\"."     (ffap-ftp-regexp (ffap-host-to-filename mach))     )) -(defvar ffap-newsgroup-regexp "^[[:lower:]]+\\.[-+[:lower:]_0-9.]+$" -  "Strings not matching this fail `ffap-newsgroup-p'.") -(defvar ffap-newsgroup-heads		; entirely inadequate -  '("alt" "comp" "gnu" "misc" "news" "sci" "soc" "talk") -  "Used by `ffap-newsgroup-p' if gnus is not running.") - -(defun ffap-newsgroup-p (string) -  "Return STRING if it looks like a newsgroup name, else nil." -  (and -   (string-match ffap-newsgroup-regexp string) -   (let ((htbs '(gnus-active-hashtb gnus-newsrc-hashtb gnus-killed-hashtb)) -	 (heads ffap-newsgroup-heads) -	 htb ret) -     (while htbs -       (setq htb (car htbs) htbs (cdr htbs)) -       (condition-case nil -	   (progn -	     ;; errs: htb symbol may be unbound, or not a hash-table. -	     ;; gnus-gethash is just a macro for intern-soft. -	     (and (symbol-value htb) -		  (intern-soft string (symbol-value htb)) -		  (setq ret string htbs nil)) -	     ;; If we made it this far, gnus is running, so ignore "heads": -	     (setq heads nil)) -	 (error nil))) -     (or ret (not heads) -	 (let ((head (string-match "\\`\\([[:lower:]]+\\)\\." string))) -	   (and head (setq head (substring string 0 (match-end 1))) -		(member head heads) -		(setq ret string)))) -     ;; Is there ever a need to modify string as a newsgroup name? -     ret))) +(defvaralias 'ffap-newsgroup-regexp 'thing-at-point-newsgroup-regexp) +(defvaralias 'ffap-newsgroup-heads  'thing-at-point-newsgroup-heads) +(defalias 'ffap-newsgroup-p 'thing-at-point-newsgroup-p)  (defsubst ffap-url-p (string)    "If STRING looks like an URL, return it (maybe improved), else nil." @@ -1017,7 +987,7 @@ If a given RFC isn't in these then `ffap-rfc-path' is offered."      ;; * no commas (good for latex)      (file "--:\\\\$+<>@-Z_[:alpha:]~*?" "<@" "@>;.,!:")      ;; An url, or maybe a email/news message-id: -    (url "--:=&?$+@-Z_[:alpha:]~#,%;*" "^[:alnum:]" ":;.,!?") +    (url "--:=&?$+@-Z_[:alpha:]~#,%;*()!'" "^[0-9a-zA-Z]" ":;.,!?")      ;; Find a string that does *not* contain a colon:      (nocolon "--9$+<>@-Z_[:alpha:]~" "<@" "@>;.,!?")      ;; A machine: @@ -1031,7 +1001,7 @@ possibly a major-mode name, or one of the symbol  Function `ffap-string-at-point' uses the data fields as follows:  1. find a maximal string of CHARS around point,  2. strip BEG chars before point from the beginning, -3. Strip END chars after point from the end.") +3. strip END chars after point from the end.")  (defvar ffap-string-at-point nil    ;; Added at suggestion of RHOGEE (for ff-paths), 7/24/95. @@ -1050,22 +1020,22 @@ Sets the variable `ffap-string-at-point' and the variable  	   (or (assq (or mode major-mode) ffap-string-at-point-mode-alist)  	       (assq 'file ffap-string-at-point-mode-alist))))  	 (pt (point)) -	 (str -	  (if (and transient-mark-mode mark-active) -	      (buffer-substring -	       (setcar ffap-string-at-point-region (region-beginning)) -	       (setcar (cdr ffap-string-at-point-region) (region-end))) -	    (buffer-substring -	     (save-excursion -	       (skip-chars-backward (car args)) -	       (skip-chars-forward (nth 1 args) pt) -	       (setcar ffap-string-at-point-region (point))) -	     (save-excursion -	       (skip-chars-forward (car args)) -	       (skip-chars-backward (nth 2 args) pt) -	       (setcar (cdr ffap-string-at-point-region) (point))))))) -    (set-text-properties 0 (length str) nil str) -    (setq ffap-string-at-point str))) +	 (beg (if (use-region-p) +		  (region-beginning) +		(save-excursion +		  (skip-chars-backward (car args)) +		  (skip-chars-forward (nth 1 args) pt) +		  (point)))) +	 (end (if (use-region-p) +		  (region-end) +		(save-excursion +		  (skip-chars-forward (car args)) +		  (skip-chars-backward (nth 2 args) pt) +		  (point))))) +    (setq ffap-string-at-point +	  (buffer-substring-no-properties +	   (setcar ffap-string-at-point-region beg) +	   (setcar (cdr ffap-string-at-point-region) end)))))  (defun ffap-string-around ()    ;; Sometimes useful to decide how to treat a string. @@ -1098,35 +1068,15 @@ Assumes the buffer has not changed."  (defun ffap-url-at-point ()    "Return URL from around point if it exists, or nil." -  ;; Could use w3's url-get-url-at-point instead.  Both handle "URL:", -  ;; ignore non-relative links, trim punctuation.  The other will -  ;; actually look back if point is in whitespace, but I would rather -  ;; ffap be less aggressive in such situations.    (when ffap-url-regexp      (or (and (eq major-mode 'w3-mode) ; In a w3 buffer button?  	     (w3-view-this-url t)) -	;; Is there a reason not to strip trailing colon? -	(let ((name (ffap-string-at-point 'url))) -	  (cond -	   ((string-match "^url:" name) (setq name (substring name 4))) -	   ((and (string-match "\\`[^:</>@]+@[^:</>@]+[[:alnum:]]\\'" name) -		 ;; "foo@bar": could be "mailto" or "news" (a Message-ID). -		 ;; Without "<>" it must be "mailto".  Otherwise could be -		 ;; either, so consult `ffap-foo-at-bar-prefix'. -		 (let ((prefix (if (and (equal (ffap-string-around) "<>") -					;; Expect some odd characters: -					(string-match "[$.0-9].*[$.0-9].*@" name)) -				   ;; Could be news: -				   ffap-foo-at-bar-prefix -				 "mailto"))) -		   (and prefix (setq name (concat prefix ":" name)))))) -	   ((ffap-newsgroup-p name) (setq name (concat "news:" name))) -	   ((and (string-match "\\`[[:alnum:]]+\\'" name) ; <mic> <root> <nobody> -		 (equal (ffap-string-around) "<>") -		 ;;	(ffap-user-p name): -		 (not (string-match "~" (expand-file-name (concat "~" name))))) -	    (setq name (concat "mailto:" name))) -	   ((ffap-url-p name))))))) +	(let ((thing-at-point-beginning-of-url-regexp ffap-url-regexp) +	      (thing-at-point-default-mail-scheme ffap-foo-at-bar-prefix)) +	  (thing-at-point-url-at-point t +				       (if (use-region-p) +					   (cons (region-beginning) +						 (region-end))))))))  (defvar ffap-gopher-regexp    "^.*\\<\\(Type\\|Name\\|Path\\|Host\\|Port\\) *= *\\(.*\\) *$" @@ -1763,7 +1713,8 @@ Only intended for interactive use."  (defun ffap-gnus-hook ()    "Bind `ffap-gnus-next' and `ffap-gnus-menu' to M-l and M-m, resp." -  (set (make-local-variable 'ffap-foo-at-bar-prefix) "news") ; message-id's +  ;; message-id's +  (setq-local thing-at-point-default-mail-uri-scheme "news")    ;; Note "l", "L", "m", "M" are taken:    (local-set-key "\M-l" 'ffap-gnus-next)    (local-set-key "\M-m" 'ffap-gnus-menu)) diff --git a/lisp/thingatpt.el b/lisp/thingatpt.el index e1e3e8e1e46..9526cb76e74 100644 --- a/lisp/thingatpt.el +++ b/lisp/thingatpt.el @@ -232,7 +232,7 @@ The bounds of THING are determined by `bounds-of-thing-at-point'."  (put 'defun 'end-op       'end-of-defun)  (put 'defun 'forward-op   'end-of-defun) -;;  Filenames and URLs  www.com/foo%32bar +;;  Filenames  (defvar thing-at-point-file-name-chars "-~/[:alnum:]_.${}#%,:"    "Characters allowable in filenames.") @@ -248,94 +248,224 @@ The bounds of THING are determined by `bounds-of-thing-at-point'."  	   (forward-char)  	 (goto-char (point-min))))) +;;  URIs + +(defvar thing-at-point-beginning-of-url-regexp nil +  "Regexp matching the beginning of a well-formed URI. +If nil, construct the regexp from `thing-at-point-uri-schemes'.") +  (defvar thing-at-point-url-path-regexp    "[^]\t\n \"'<>[^`{}]*[^]\t\n \"'<>[^`{}.,;]+" -  "A regular expression probably matching the host and filename or e-mail part of a URL.") +  "Regexp matching the host and filename or e-mail part of a URL.")  (defvar thing-at-point-short-url-regexp    (concat "[-A-Za-z0-9]+\\.[-A-Za-z0-9.]+" thing-at-point-url-path-regexp) -  "A regular expression probably matching a URL without an access scheme. -Hostname matching is stricter in this case than for -``thing-at-point-url-regexp''.") +  "Regexp matching a URI without a scheme component.")  (defvar thing-at-point-uri-schemes    ;; Officials from http://www.iana.org/assignments/uri-schemes.html -  '("ftp://" "http://" "gopher://" "mailto:" "news:" "nntp:" -    "telnet://" "wais://" "file:/" "prospero:" "z39.50s:" "z39.50r:" -    "cid:" "mid:" "vemmi:" "service:" "imap:" "nfs:" "acap:" "rtsp:" -    "tip:" "pop:" "data:" "dav:" "opaquelocktoken:" "sip:" "tel:" "fax:" -    "modem:" "ldap:" "https://" "soap.beep:" "soap.beeps:" "urn:" "go:" -    "afs:" "tn3270:" "mailserver:" -    "crid:" "dict:" "dns:" "dtn:" "h323:" "im:" "info:" "ipp:" -    "iris.beep:" "mtqp:" "mupdate:" "pres:" "sips:" "snmp:" "tag:" -    "tftp:" "xmlrpc.beep:" "xmlrpc.beeps:" "xmpp:" -  ;; Compatibility -    "snews:" "irc:" "mms://" "mmsh://") -  "Uniform Resource Identifier (URI) Schemes.") - -(defvar thing-at-point-url-regexp -  (concat "\\<\\(" (mapconcat 'identity thing-at-point-uri-schemes "\\|") "\\)" -          thing-at-point-url-path-regexp) -  "A regular expression probably matching a complete URL.") - -(defvar thing-at-point-markedup-url-regexp -  "<URL:[^>]+>" -  "A regular expression matching a URL marked up per RFC1738. -This may contain whitespace (including newlines) .") +  '("aaa://" "about:" "acap://" "apt:" "bzr://" "bzr+ssh://" +    "attachment:/" "chrome://" "cid:" "content://" "crid://" "cvs://" +    "data:" "dav:" "dict://" "doi:" "dns:" "dtn:" "feed:" "file:/" +    "finger://" "fish://" "ftp://" "geo:" "git://" "go:" "gopher://" +    "h323:" "http://" "https://" "im:" "imap://" "info:" "ipp:" +    "irc://" "irc6://" "ircs://" "iris.beep:" "jar:" "ldap://" +    "ldaps://" "mailto:" "mid:"  "mtqp://" "mupdate://" "news:" +    "nfs://" "nntp://" "opaquelocktoken:" "pop://" "pres:" +    "resource://" "rmi://" "rsync://" "rtsp://" "rtspu://" "service:" +    "sftp://" "sip:" "sips:" "smb://" "sms:" "snmp://" "soap.beep://" +    "soap.beeps://" "ssh://" "svn://" "svn+ssh://" "tag:" "tel:" +    "telnet://" "tftp://" "tip://" "tn3270://" "udp://" "urn:" +    "uuid:" "vemmi://"  "webcal://" "xri://" "xmlrpc.beep://" +    "xmlrpc.beeps://" "z39.50r://" "z39.50s://" "xmpp:" +    ;; Compatibility +    "fax:" "mms://" "mmsh://" "modem:" "prospero:" "snews:" +    "wais://") +  "List of URI schemes recognized by `thing-at-point-url-at-point'. +Each string in this list should correspond to the start of a +URI's scheme component, up to and including the trailing // if +the scheme calls for that to be present.") + +(defvar thing-at-point-markedup-url-regexp "<URL:\\([^<>\n]+\\)>" +  "Regexp matching a URL marked up per RFC1738. +This kind of markup was formerly recommended as a way to indicate +URIs, but as of RFC 3986 it is no longer recommended. +Subexpression 1 should contain the delimited URL.") + +(defvar thing-at-point-newsgroup-regexp +  "\\`[[:lower:]]+\\.[-+[:lower:]_0-9.]+\\'" +  "Regexp matching a newsgroup name.") + +(defvar thing-at-point-newsgroup-heads +  '("alt" "comp" "gnu" "misc" "news" "sci" "soc" "talk") +  "Used by `thing-at-point-newsgroup-p' if gnus is not running.") + +(defvar thing-at-point-default-mail-uri-scheme "mailto" +  "Default scheme for ill-formed URIs that look like <foo@example.com>. +If nil, do not give such URIs a scheme.")  (put 'url 'bounds-of-thing-at-point 'thing-at-point-bounds-of-url-at-point) -(defun thing-at-point-bounds-of-url-at-point () -  (let ((strip (thing-at-point-looking-at -			 thing-at-point-markedup-url-regexp))) ;; (url "") short -    (if (or strip -	    (thing-at-point-looking-at thing-at-point-url-regexp) -	    ;; Access scheme omitted? -	    ;; (setq short (thing-at-point-looking-at -	    ;;     	 thing-at-point-short-url-regexp)) -            ) -	(let ((beginning (match-beginning 0)) -	      (end (match-end 0))) -	  (when strip -            (setq beginning (+ beginning 5)) -            (setq end (- end 1))) -	  (cons beginning end))))) + +(defun thing-at-point-bounds-of-url-at-point (&optional lax) +  "Return a cons cell containing the start and end of the URI at point. +Try to find a URI using `thing-at-point-markedup-url-regexp'. +If that fails, try with `thing-at-point-beginning-of-url-regexp'. +If that also fails, and optional argument LAX is non-nil, return +the bounds of a possible ill-formed URI (one lacking a scheme)." +  ;; Look for the old <URL:foo> markup.  If found, use it. +  (or (thing-at-point--bounds-of-markedup-url) +      ;; Otherwise, find the bounds within which a URI may exist.  The +      ;; method is similar to `ffap-string-at-point'.  Note that URIs +      ;; may contain parentheses but may not contain spaces (RFC3986). +      (let* ((allowed-chars "--:=&?$+@-Z_[:alpha:]~#,%;*()!'") +	     (skip-before "^[0-9a-zA-Z]") +	     (skip-after  ":;.,!?") +	     (pt (point)) +	     (beg (save-excursion +		    (skip-chars-backward allowed-chars) +		    (skip-chars-forward skip-before pt) +		    (point))) +	     (end (save-excursion +		    (skip-chars-forward allowed-chars) +		    (skip-chars-backward skip-after pt) +		    (point)))) +	(or (thing-at-point--bounds-of-well-formed-url beg end pt) +	    (if lax (cons beg end)))))) + +(defun thing-at-point--bounds-of-markedup-url () +  (when thing-at-point-markedup-url-regexp +    (let ((case-fold-search t) +	  (pt (point)) +	  (beg (line-beginning-position)) +	  (end (line-end-position)) +	  found) +      (save-excursion +	(goto-char beg) +	(while (and (not found) +		    (<= (point) pt) +		    (< (point) end)) +	  (and (re-search-forward thing-at-point-markedup-url-regexp +				  end 1) +	       (> (point) pt) +	       (setq found t)))) +      (if found +	  (cons (match-beginning 1) (match-end 1)))))) + +(defun thing-at-point--bounds-of-well-formed-url (beg end pt) +  (save-excursion +    (goto-char beg) +    (let (url-beg paren-end regexp) +      (save-restriction +	(narrow-to-region beg end) +	;; The scheme component must either match at BEG, or have no +	;; other alphanumerical ASCII characters before it. +	(setq regexp (concat "\\(?:\\`\\|[^a-zA-Z0-9]\\)\\(" +			     (or thing-at-point-beginning-of-url-regexp +				 (regexp-opt thing-at-point-uri-schemes)) +			     "\\)")) +	(and (re-search-forward regexp end t) +	     ;; URI must have non-empty contents. +	     (< (point) end) +	     (setq url-beg (match-beginning 1)))) +      (when url-beg +	;; If there is an open paren before the URI, truncate to the +	;; matching close paren. +	(and (> url-beg (point-min)) +	     (eq (car-safe (syntax-after (1- url-beg))) 4) +	     (save-restriction +	       (narrow-to-region (1- url-beg) (min end (point-max))) +	       (setq paren-end (ignore-errors +				 (scan-lists (1- url-beg) 1 0)))) +	     (not (blink-matching-check-mismatch (1- url-beg) paren-end)) +	     (setq end (1- paren-end))) +	(cons url-beg end)))))  (put 'url 'thing-at-point 'thing-at-point-url-at-point) -(defun thing-at-point-url-at-point () -  "Return the URL around or before point. -Search backwards for the start of a URL ending at or after point.  If -no URL found, return nil.  The access scheme will be prepended if -absent: \"mailto:\" if the string contains \"@\", \"ftp://\" if it -starts with \"ftp\" and not \"ftp:/\", or \"http://\" by default." - -  (let ((url "") short strip) -    (if (or (setq strip (thing-at-point-looking-at -			 thing-at-point-markedup-url-regexp)) -	    (thing-at-point-looking-at thing-at-point-url-regexp) -	    ;; Access scheme omitted? -	    (setq short (thing-at-point-looking-at -			 thing-at-point-short-url-regexp))) -	(progn -	  (setq url (buffer-substring-no-properties (match-beginning 0) -						    (match-end 0))) -	  (and strip (setq url (substring url 5 -1))) ; Drop "<URL:" & ">" -	  ;; strip whitespace -	  (while (string-match "[ \t\n\r]+" url) -	    (setq url (replace-match "" t t url))) -	  (and short (setq url (concat (cond ((string-match "^[a-zA-Z]+:" url) -					       ;; already has a URL scheme. -					       "") -					     ((string-match "@" url) -                                              "mailto:") -					     ;; e.g. ftp.swiss... or ftp-swiss... -                                             ((string-match "^ftp" url) -                                              "ftp://") -                                             (t "http://")) -                                       url))) -	  (if (string-equal "" url) -	      nil -	    url))))) +(defun thing-at-point-url-at-point (&optional lax bounds) +  "Return the URL around or before point. +If no URL is found, return nil. + +If optional argument LAX is non-nil, look for URLs that are not +well-formed, such as foo@bar or <nobody>. + +If optional arguments BOUNDS are non-nil, it should be a cons +cell of the form (START . END), containing the beginning and end +positions of the URI.  Otherwise, these positions are detected +automatically from the text around point. + +If the scheme component is absent, either because a URI delimited +with <url:...> lacks one, or because an ill-formed URI was found +with LAX or BEG and END, try to add a scheme in the returned URI. +The scheme is chosen heuristically: \"mailto:\" if the address +looks like an email address, \"ftp://\" if it starts with +\"ftp\", etc." +  (unless bounds +    (setq bounds (thing-at-point-bounds-of-url-at-point lax))) +  (when (and bounds (< (car bounds) (cdr bounds))) +    (let ((str (buffer-substring-no-properties (car bounds) (cdr bounds)))) +      ;; If there is no scheme component, try to add one. +      (unless (string-match "\\`[a-zA-Z][-a-zA-Z0-9+.]*:" str) +	(or +	 ;; If the URI has the form <foo@bar>, treat it according to +	 ;; `thing-at-point-default-mail-uri-scheme'.  If there are +	 ;; no angle brackets, it must be mailto. +	 (when (string-match "\\`[^:</>@]+@[-.0-9=&?$+A-Z_a-z~#,%;*]" str) +	   (let ((scheme (if (and (eq (char-before (car bounds)) ?<) +				  (eq (char-after  (cdr bounds)) ?>)) +			     thing-at-point-default-mail-uri-scheme +			   "mailto"))) +	     (if scheme +		 (setq str (concat scheme ":" str))))) +	 ;; If the string is like <FOO>, where FOO is an existing user +	 ;; name on the system, treat that as an email address. +	 (and (string-match "\\`[[:alnum:]]+\\'" str) +	      (eq (char-before (car bounds)) ?<) +	      (eq (char-after  (cdr bounds)) ?>) +	      (not (string-match "~" (expand-file-name (concat "~" str)))) +	      (setq str (concat "mailto:" str))) +	 ;; If it looks like news.example.com, treat it as news. +	 (if (thing-at-point-newsgroup-p str) +	     (setq str (concat "news:" str))) +	 ;; If it looks like ftp.example.com. treat it as ftp. +	 (if (string-match "\\`ftp\\." str) +	     (setq str (concat "ftp://" str))) +	 ;; If it looks like www.example.com. treat it as http. +	 (if (string-match "\\`www\\." str) +	     (setq str (concat "http://" str))) +	 ;; Otherwise, it just isn't a URI. +	 (setq str nil))) +      str))) + +(defun thing-at-point-newsgroup-p (string) +  "Return STRING if it looks like a newsgroup name, else nil." +  (and +   (string-match thing-at-point-newsgroup-regexp string) +   (let ((htbs '(gnus-active-hashtb gnus-newsrc-hashtb gnus-killed-hashtb)) +	 (heads thing-at-point-newsgroup-heads) +	 htb ret) +     (while htbs +       (setq htb (car htbs) htbs (cdr htbs)) +       (condition-case nil +	   (progn +	     ;; errs: htb symbol may be unbound, or not a hash-table. +	     ;; gnus-gethash is just a macro for intern-soft. +	     (and (symbol-value htb) +		  (intern-soft string (symbol-value htb)) +		  (setq ret string htbs nil)) +	     ;; If we made it this far, gnus is running, so ignore "heads": +	     (setq heads nil)) +	 (error nil))) +     (or ret (not heads) +	 (let ((head (string-match "\\`\\([[:lower:]]+\\)\\." string))) +	   (and head (setq head (substring string 0 (match-end 1))) +		(member head heads) +		(setq ret string)))) +     ret))) + +(put 'url 'end-op (lambda () (end-of-thing 'url))) + +(put 'url 'beginning-op (lambda () (end-of-thing 'url)))  ;; The normal thingatpt mechanism doesn't work for complex regexps.  ;; This should work for almost any regexp wherever we are in the @@ -372,19 +502,6 @@ point."  	(goto-char match)  	(looking-at regexp))))) -(put 'url 'end-op -     (lambda () -       (let ((bounds (thing-at-point-bounds-of-url-at-point))) -         (if bounds -             (goto-char (cdr bounds)) -           (error "No URL here"))))) -(put 'url 'beginning-op -     (lambda () -       (let ((bounds (thing-at-point-bounds-of-url-at-point))) -         (if bounds -             (goto-char (car bounds)) -           (error "No URL here"))))) -  ;;   Email addresses  (defvar thing-at-point-email-regexp    "<?[-+_.~a-zA-Z][-+_.~:a-zA-Z0-9]*@[-.a-zA-Z0-9]+>?" diff --git a/test/ChangeLog b/test/ChangeLog index 651453566f2..41bb1be190e 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,7 @@ +2013-02-04  Chong Yidong  <cyd@gnu.org> + +	* automated/thingatpt.el: New file. +  2013-02-03  Chong Yidong  <cyd@gnu.org>  	* automated/files.el (file-test--do-local-variables-test): Avoid diff --git a/test/automated/thingatpt.el b/test/automated/thingatpt.el new file mode 100644 index 00000000000..f33a8f4b0e6 --- /dev/null +++ b/test/automated/thingatpt.el @@ -0,0 +1,88 @@ +;;; thingatpt.el --- tests for thing-at-point. + +;; Copyright (C) 2013 Free Software Foundation, Inc. + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>. + +;;; Code: + +(require 'ert) + +(defvar thing-at-point-test-data +  '(("http://1.gnu.org" 1  url "http://1.gnu.org") +    ("http://2.gnu.org" 6 url "http://2.gnu.org") +    ("http://3.gnu.org" 19 url "http://3.gnu.org") +    ("https://4.gnu.org" 1  url "https://4.gnu.org") +    ("bzr://savannah.gnu.org" 1 url "bzr://savannah.gnu.org") +    ("A geo URI (geo:3.14159,-2.71828)." 12 url "geo:3.14159,-2.71828") +    ("Visit http://5.gnu.org now." 5 url nil) +    ("Visit http://6.gnu.org now." 7 url "http://6.gnu.org") +    ("Visit http://7.gnu.org now." 22 url "http://7.gnu.org") +    ("Visit http://8.gnu.org now." 22 url "http://8.gnu.org") +    ("Visit http://9.gnu.org now." 24 url nil) +    ;; Invalid URIs +    ("<<<<" 2 url nil) +    ("<>" 1 url nil) +    ("<url:>" 1 url nil) +    ("http://" 1 url nil) +    ;; Invalid schema +    ("foo://www.gnu.org" 1 url nil) +    ("foohttp://www.gnu.org" 1 url nil) +    ;; Non alphanumeric characters can be found in URIs +    ("ftp://example.net/~foo!;#bar=baz&goo=bob" 3 url "ftp://example.net/~foo!;#bar=baz&goo=bob") +    ("bzr+ssh://user@example.net:5/a%20d,5" 34 url "bzr+ssh://user@example.net:5/a%20d,5") +    ;; <url:...> markup +    ("Url: <url:foo://1.example.com>..." 8 url "foo://1.example.com") +    ("Url: <url:foo://2.example.com>..." 30 url "foo://2.example.com") +    ("Url: <url:foo://www.gnu.org/a bc>..." 20 url "foo://www.gnu.org/a bc") +    ;; Hack used by thing-at-point: drop punctuation at end of URI. +    ("Go to http://www.gnu.org, for details" 7 url "http://www.gnu.org") +    ("Go to http://www.gnu.org." 24 url "http://www.gnu.org") +    ;; Standard URI delimiters +    ("Go to \"http://10.gnu.org\"." 8 url "http://10.gnu.org") +    ("Go to \"http://11.gnu.org/\"." 26 url "http://11.gnu.org/") +    ("Go to <http://12.gnu.org> now." 8 url "http://12.gnu.org") +    ("Go to <http://13.gnu.org> now." 24 url "http://13.gnu.org") +    ;; Parenthesis handling (non-standard) +    ("http://example.com/a(b)c" 21 url "http://example.com/a(b)c") +    ("http://example.com/a(b)" 21 url "http://example.com/a(b)") +    ("(http://example.com/abc)" 2 url "http://example.com/abc") +    ("This (http://example.com/a(b))" 7 url "http://example.com/a(b)") +    ("This (http://example.com/a(b))" 30 url "http://example.com/a(b)") +    ("This (http://example.com/a(b))" 5 url nil) +    ("http://example.com/ab)c" 4 url "http://example.com/ab)c") +    ;; URL markup, lacking schema +    ("<url:foo@example.com>" 1 url "mailto:foo@example.com") +    ("<url:ftp.example.net/abc/>" 1 url "ftp://ftp.example.net/abc/")) +  "List of thing-at-point tests. +Each list element should have the form + +  (STRING POS THING RESULT) + +where STRING is a string of buffer contents, POS is the value of +point, THING is a symbol argument for `thing-at-point', and +RESULT should be the result of calling `thing-at-point' from that +position to retrieve THING.") + +(ert-deftest thing-at-point-tests () +  "Test the file-local variables implementation." +  (dolist (test thing-at-point-test-data) +    (with-temp-buffer +      (insert (nth 0 test)) +      (goto-char (nth 1 test)) +      (should (equal (thing-at-point (nth 2 test)) (nth 3 test)))))) + +;;; thingatpt.el ends here | 
