From 0b19bb71ba5a4afa84e673a8239935426fa0db23 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Tue, 9 Aug 2016 21:50:19 +0000 Subject: Remove incorrect tag directory git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/tags/pyparsing_2.1.6@405 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b --- trunk/src/examples/urlExtractorNew.py | 35 ----------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 trunk/src/examples/urlExtractorNew.py (limited to 'trunk/src/examples/urlExtractorNew.py') diff --git a/trunk/src/examples/urlExtractorNew.py b/trunk/src/examples/urlExtractorNew.py deleted file mode 100644 index 0aac875..0000000 --- a/trunk/src/examples/urlExtractorNew.py +++ /dev/null @@ -1,35 +0,0 @@ -# URL extractor -# Copyright 2004, Paul McGuire -from pyparsing import Literal,Suppress,CharsNotIn,CaselessLiteral,\ - Word,dblQuotedString,alphanums,SkipTo,makeHTMLTags -import urllib.request, urllib.parse, urllib.error -import pprint - -# Define the pyparsing grammar for a URL, that is: -# URLlink ::= linkText -# URL ::= doubleQuotedString | alphanumericWordPath -# Note that whitespace may appear just about anywhere in the link. Note also -# that it is not necessary to explicitly show this in the pyparsing grammar; by default, -# pyparsing skips over whitespace between tokens. -linkOpenTag,linkCloseTag = makeHTMLTags("a") -link = linkOpenTag + SkipTo(linkCloseTag)("body") + linkCloseTag.suppress() - -# Go get some HTML with some links in it. -serverListPage = urllib.request.urlopen( "http://www.google.com" ) -htmlText = serverListPage.read() -serverListPage.close() - -# scanString is a generator that loops through the input htmlText, and for each -# match yields the tokens and start and end locations (for this application, we are -# not interested in the start and end values). -for toks,strt,end in link.scanString(htmlText): - print(toks.startA.href,"->",toks.body) - -# Create dictionary from list comprehension, assembled from each pair of tokens returned -# from a matched URL. -pprint.pprint( - dict( [ (toks.body,toks.startA.href) for toks,strt,end in link.scanString(htmlText) ] ) - ) - - - -- cgit v1.2.1