From 0b19bb71ba5a4afa84e673a8239935426fa0db23 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Tue, 9 Aug 2016 21:50:19 +0000 Subject: Remove incorrect tag directory git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/tags/pyparsing_2.1.6@405 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b --- trunk/src/examples/htmlStripper.py | 39 -------------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 trunk/src/examples/htmlStripper.py (limited to 'trunk/src/examples/htmlStripper.py') diff --git a/trunk/src/examples/htmlStripper.py b/trunk/src/examples/htmlStripper.py deleted file mode 100644 index 0b0f459..0000000 --- a/trunk/src/examples/htmlStripper.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# htmlStripper.py -# -# Sample code for stripping HTML markup tags and scripts from -# HTML source files. -# -# Copyright (c) 2006, Paul McGuire -# -from pyparsing import * -import urllib.request, urllib.parse, urllib.error - -removeText = replaceWith("") -scriptOpen,scriptClose = makeHTMLTags("script") -scriptBody = scriptOpen + SkipTo(scriptClose) + scriptClose -scriptBody.setParseAction(removeText) - -anyTag,anyClose = makeHTMLTags(Word(alphas,alphanums+":_")) -anyTag.setParseAction(removeText) -anyClose.setParseAction(removeText) -htmlComment.setParseAction(removeText) - -commonHTMLEntity.setParseAction(replaceHTMLEntity) - -# get some HTML -targetURL = "http://wiki.python.org/moin/PythonDecoratorLibrary" -targetPage = urllib.request.urlopen( targetURL ) -targetHTML = targetPage.read() -targetPage.close() - -# first pass, strip out tags and translate entities -firstPass = (htmlComment | scriptBody | commonHTMLEntity | - anyTag | anyClose ).transformString(targetHTML) - -# first pass leaves many blank lines, collapse these down -repeatedNewlines = LineEnd() + OneOrMore(LineEnd()) -repeatedNewlines.setParseAction(replaceWith("\n\n")) -secondPass = repeatedNewlines.transformString(firstPass) - -print(secondPass) \ No newline at end of file -- cgit v1.2.1