summaryrefslogtreecommitdiff
path: root/trunk/src/examples/urlExtractorNew.py
diff options
context:
space:
mode:
authorptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-08-09 21:50:19 +0000
committerptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-08-09 21:50:19 +0000
commit0b19bb71ba5a4afa84e673a8239935426fa0db23 (patch)
treee9abae9c616fdfdfebd9a8a0931d8f21824f30d2 /trunk/src/examples/urlExtractorNew.py
parentb2c3ade75384efe76b8774b607e17fe98fab92ef (diff)
downloadpyparsing_2.1.6.tar.gz
Remove incorrect tag directorypyparsing_2.1.6
git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/tags/pyparsing_2.1.6@405 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
Diffstat (limited to 'trunk/src/examples/urlExtractorNew.py')
-rw-r--r--trunk/src/examples/urlExtractorNew.py35
1 files changed, 0 insertions, 35 deletions
diff --git a/trunk/src/examples/urlExtractorNew.py b/trunk/src/examples/urlExtractorNew.py
deleted file mode 100644
index 0aac875..0000000
--- a/trunk/src/examples/urlExtractorNew.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# URL extractor
-# Copyright 2004, Paul McGuire
-from pyparsing import Literal,Suppress,CharsNotIn,CaselessLiteral,\
- Word,dblQuotedString,alphanums,SkipTo,makeHTMLTags
-import urllib.request, urllib.parse, urllib.error
-import pprint
-
-# Define the pyparsing grammar for a URL, that is:
-# URLlink ::= <a href= URL>linkText</a>
-# URL ::= doubleQuotedString | alphanumericWordPath
-# Note that whitespace may appear just about anywhere in the link. Note also
-# that it is not necessary to explicitly show this in the pyparsing grammar; by default,
-# pyparsing skips over whitespace between tokens.
-linkOpenTag,linkCloseTag = makeHTMLTags("a")
-link = linkOpenTag + SkipTo(linkCloseTag)("body") + linkCloseTag.suppress()
-
-# Go get some HTML with some links in it.
-serverListPage = urllib.request.urlopen( "http://www.google.com" )
-htmlText = serverListPage.read()
-serverListPage.close()
-
-# scanString is a generator that loops through the input htmlText, and for each
-# match yields the tokens and start and end locations (for this application, we are
-# not interested in the start and end values).
-for toks,strt,end in link.scanString(htmlText):
- print(toks.startA.href,"->",toks.body)
-
-# Create dictionary from list comprehension, assembled from each pair of tokens returned
-# from a matched URL.
-pprint.pprint(
- dict( [ (toks.body,toks.startA.href) for toks,strt,end in link.scanString(htmlText) ] )
- )
-
-
-