summaryrefslogtreecommitdiff
path: root/trunk/src/examples/htmlStripper.py
diff options
context:
space:
mode:
authorptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-08-09 21:50:19 +0000
committerptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-08-09 21:50:19 +0000
commit0b19bb71ba5a4afa84e673a8239935426fa0db23 (patch)
treee9abae9c616fdfdfebd9a8a0931d8f21824f30d2 /trunk/src/examples/htmlStripper.py
parentb2c3ade75384efe76b8774b607e17fe98fab92ef (diff)
downloadpyparsing_2.1.6.tar.gz
Remove incorrect tag directorypyparsing_2.1.6
git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/tags/pyparsing_2.1.6@405 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
Diffstat (limited to 'trunk/src/examples/htmlStripper.py')
-rw-r--r--trunk/src/examples/htmlStripper.py39
1 files changed, 0 insertions, 39 deletions
diff --git a/trunk/src/examples/htmlStripper.py b/trunk/src/examples/htmlStripper.py
deleted file mode 100644
index 0b0f459..0000000
--- a/trunk/src/examples/htmlStripper.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#
-# htmlStripper.py
-#
-# Sample code for stripping HTML markup tags and scripts from
-# HTML source files.
-#
-# Copyright (c) 2006, Paul McGuire
-#
-from pyparsing import *
-import urllib.request, urllib.parse, urllib.error
-
-removeText = replaceWith("")
-scriptOpen,scriptClose = makeHTMLTags("script")
-scriptBody = scriptOpen + SkipTo(scriptClose) + scriptClose
-scriptBody.setParseAction(removeText)
-
-anyTag,anyClose = makeHTMLTags(Word(alphas,alphanums+":_"))
-anyTag.setParseAction(removeText)
-anyClose.setParseAction(removeText)
-htmlComment.setParseAction(removeText)
-
-commonHTMLEntity.setParseAction(replaceHTMLEntity)
-
-# get some HTML
-targetURL = "http://wiki.python.org/moin/PythonDecoratorLibrary"
-targetPage = urllib.request.urlopen( targetURL )
-targetHTML = targetPage.read()
-targetPage.close()
-
-# first pass, strip out tags and translate entities
-firstPass = (htmlComment | scriptBody | commonHTMLEntity |
- anyTag | anyClose ).transformString(targetHTML)
-
-# first pass leaves many blank lines, collapse these down
-repeatedNewlines = LineEnd() + OneOrMore(LineEnd())
-repeatedNewlines.setParseAction(replaceWith("\n\n"))
-secondPass = repeatedNewlines.transformString(firstPass)
-
-print(secondPass) \ No newline at end of file