diff options
| author | Guido van Rossum <guido@python.org> | 1995-02-27 13:16:55 +0000 | 
|---|---|---|
| committer | Guido van Rossum <guido@python.org> | 1995-02-27 13:16:55 +0000 | 
| commit | 7c750e1e099128157430d26ffa7e2a44d87daf3c (patch) | |
| tree | 7c74472b5402733b5d52519799fbc9415fc4cb6c | |
| parent | eb9e9d2b2a61629e7562587a679367c3bb52c92b (diff) | |
| download | cpython-git-7c750e1e099128157430d26ffa7e2a44d87daf3c.tar.gz | |
added html parser and supporting cast
| -rw-r--r-- | Lib/Para.py | 408 | ||||
| -rw-r--r-- | Lib/fmt.py | 621 | ||||
| -rw-r--r-- | Lib/htmllib.py | 635 | ||||
| -rw-r--r-- | Lib/lib-old/Para.py | 408 | ||||
| -rw-r--r-- | Lib/lib-old/fmt.py | 621 | ||||
| -rw-r--r-- | Lib/sgmllib.py | 321 | 
6 files changed, 3014 insertions, 0 deletions
| diff --git a/Lib/Para.py b/Lib/Para.py new file mode 100644 index 0000000000..6a7057ddbf --- /dev/null +++ b/Lib/Para.py @@ -0,0 +1,408 @@ +# Text formatting abstractions + + +# Oft-used type object +Int = type(0) + + +# Represent a paragraph.  This is a list of words with associated +# font and size information, plus indents and justification for the +# entire paragraph. +# Once the words have been added to a paragraph, it can be laid out +# for different line widths.  Once laid out, it can be rendered at +# different screen locations.  Once rendered, it can be queried +# for mouse hits, and parts of the text can be highlighted +class Para: +	# +	def __init__(self): +		self.words = [] # The words +		self.just = 'l' # Justification: 'l', 'r', 'lr' or 'c' +		self.indent_left = self.indent_right = self.indent_hang = 0 +		# Final lay-out parameters, may change +		self.left = self.top = self.right = self.bottom = \ +			self.width = self.height = self.lines = None +	# +	# Add a word, computing size information for it. +	# Words may also be added manually by appending to self.words +	# Each word should be a 7-tuple: +	# (font, text, width, space, stretch, ascent, descent) +	def addword(self, d, font, text, space, stretch): +		if font <> None: +			d.setfont(font) +		width = d.textwidth(text) +		ascent = d.baseline() +		descent = d.lineheight() - ascent +		spw = d.textwidth(' ') +		space = space * spw +		stretch = stretch * spw +		tuple = (font, text, width, space, stretch, ascent, descent) +		self.words.append(tuple) +	# +	# Hooks to begin and end anchors -- insert numbers in the word list! +	def bgn_anchor(self, id): +		self.words.append(id) +	# +	def end_anchor(self, id): +		self.words.append(0) +	# +	# Return the total length (width) of the text added so far, in pixels +	def getlength(self): +		total = 0 +		for word in self.words: +			if type(word) <> Int: +				total = total + word[2] + word[3] +		return total +	# +	# Tab to a given position (relative to the current left indent): +	# remove all stretch, add fixed space up to the new indent. +	# If the current position is already beying the tab stop, +	# don't add any new space (but still remove the stretch) +	def tabto(self, tab): +		total = 0 +		as, de = 1, 0 +		for i in range(len(self.words)): +			word = self.words[i] +			if type(word) == Int: continue +			fo, te, wi, sp, st, as, de = word +			self.words[i] = fo, te, wi, sp, 0, as, de +			total = total + wi + sp +		if total < tab: +			self.words.append(None, '', 0, tab-total, 0, as, de) +	# +	# Make a hanging tag: tab to hang, increment indent_left by hang, +	# and reset indent_hang to -hang +	def makehangingtag(self, hang): +		self.tabto(hang) +		self.indent_left = self.indent_left + hang +		self.indent_hang = -hang +	# +	# Decide where the line breaks will be given some screen width +	def layout(self, linewidth): +		self.width = linewidth +		height = 0 +		self.lines = lines = [] +		avail1 = self.width - self.indent_left - self.indent_right +		avail = avail1 - self.indent_hang +		words = self.words +		i = 0 +		n = len(words) +		lastfont = None +		while i < n: +			firstfont = lastfont +			charcount = 0 +			width = 0 +			stretch = 0 +			ascent = 0 +			descent = 0 +			lsp = 0 +			j = i +			while i < n: +				word = words[i] +				if type(word) == Int: +					if word > 0 and width >= avail: +						break +					i = i+1 +					continue +				fo, te, wi, sp, st, as, de = word +				if width + wi > avail and width > 0 and wi > 0: +					break +				if fo <> None: +					lastfont = fo +					if width == 0: +						firstfont = fo +				charcount = charcount + len(te) + (sp > 0) +				width = width + wi + sp +				lsp = sp +				stretch = stretch + st +				lst = st +				ascent = max(ascent, as) +				descent = max(descent, de) +				i = i+1 +			while i > j and type(words[i-1]) == Int and \ +				words[i-1] > 0: i = i-1 +			width = width - lsp +			if i < n: +				stretch = stretch - lst +			else: +				stretch = 0 +			tuple = i-j, firstfont, charcount, width, stretch, \ +				ascent, descent +			lines.append(tuple) +			height = height + ascent + descent +			avail = avail1 +		self.height = height +	# +	# Call a function for all words in a line +	def visit(self, wordfunc, anchorfunc): +		avail1 = self.width - self.indent_left - self.indent_right +		avail = avail1 - self.indent_hang +		v = self.top +		i = 0 +		for tuple in self.lines: +			wordcount, firstfont, charcount, width, stretch, \ +				ascent, descent = tuple +			h = self.left + self.indent_left +			if i == 0: h = h + self.indent_hang +			extra = 0 +			if self.just == 'r': h = h + avail - width +			elif self.just == 'c': h = h + (avail - width) / 2 +			elif self.just == 'lr' and stretch > 0: +				extra = avail - width +			v2 = v + ascent + descent +			for j in range(i, i+wordcount): +				word = self.words[j] +				if type(word) == Int: +					ok = anchorfunc(self, tuple, word, \ +							h, v) +					if ok <> None: return ok +					continue +				fo, te, wi, sp, st, as, de = word +				if extra > 0 and stretch > 0: +					ex = extra * st / stretch +					extra = extra - ex +					stretch = stretch - st +				else: +					ex = 0 +				h2 = h + wi + sp + ex +				ok = wordfunc(self, tuple, word, h, v, \ +					h2, v2, (j==i), (j==i+wordcount-1)) +				if ok <> None: return ok +				h = h2 +			v = v2 +			i = i + wordcount +			avail = avail1 +	# +	# Render a paragraph in "drawing object" d, using the rectangle +	# given by (left, top, right) with an unspecified bottom. +	# Return the computed bottom of the text. +	def render(self, d, left, top, right): +		if self.width <> right-left: +			self.layout(right-left) +		self.left = left +		self.top = top +		self.right = right +		self.bottom = self.top + self.height +		self.anchorid = 0 +		try: +			self.d = d +			self.visit(self.__class__._renderword, \ +				   self.__class__._renderanchor) +		finally: +			self.d = None +		return self.bottom +	# +	def _renderword(self, tuple, word, h, v, h2, v2, isfirst, islast): +		if word[0] <> None: self.d.setfont(word[0]) +		baseline = v + tuple[5] +		self.d.text((h, baseline - word[5]), word[1]) +		if self.anchorid > 0: +			self.d.line((h, baseline+2), (h2, baseline+2)) +	# +	def _renderanchor(self, tuple, word, h, v): +		self.anchorid = word +	# +	# Return which anchor(s) was hit by the mouse +	def hitcheck(self, mouseh, mousev): +		self.mouseh = mouseh +		self.mousev = mousev +		self.anchorid = 0 +		self.hits = [] +		self.visit(self.__class__._hitcheckword, \ +			   self.__class__._hitcheckanchor) +		return self.hits +	# +	def _hitcheckword(self, tuple, word, h, v, h2, v2, isfirst, islast): +		if self.anchorid > 0 and h <= self.mouseh <= h2 and \ +			v <= self.mousev <= v2: +			self.hits.append(self.anchorid) +	# +	def _hitcheckanchor(self, tuple, word, h, v): +		self.anchorid = word +	# +	# Return whether the given anchor id is present +	def hasanchor(self, id): +		return id in self.words or -id in self.words +	# +	# Extract the raw text from the word list, substituting one space +	# for non-empty inter-word space, and terminating with '\n' +	def extract(self): +		text = '' +		for w in self.words: +			if type(w) <> Int: +				word = w[1] +				if w[3]: word = word + ' ' +				text = text + word +		return text + '\n' +	# +	# Return which character position was hit by the mouse, as +	# an offset in the entire text as returned by extract(). +	# Return None if the mouse was not in this paragraph +	def whereis(self, d, mouseh, mousev): +		if mousev < self.top or mousev > self.bottom: +			return None +		self.mouseh = mouseh +		self.mousev = mousev +		self.lastfont = None +		self.charcount = 0 +		try: +			self.d = d +			return self.visit(self.__class__._whereisword, \ +					  self.__class__._whereisanchor) +		finally: +			self.d = None +	# +	def _whereisword(self, tuple, word, h1, v1, h2, v2, isfirst, islast): +		fo, te, wi, sp, st, as, de = word +		if fo <> None: self.lastfont = fo +		h = h1 +		if isfirst: h1 = 0 +		if islast: h2 = 999999 +		if not (v1 <= self.mousev <= v2 and h1 <= self.mouseh <= h2): +			self.charcount = self.charcount + len(te) + (sp > 0) +			return +		if self.lastfont <> None: +			self.d.setfont(self.lastfont) +		cc = 0 +		for c in te: +			cw = self.d.textwidth(c) +			if self.mouseh <= h + cw/2: +				return self.charcount + cc +			cc = cc+1 +			h = h+cw +		self.charcount = self.charcount + cc +		if self.mouseh <= (h+h2) / 2: +			return self.charcount +		else: +			return self.charcount + 1 +	# +	def _whereisanchor(self, tuple, word, h, v): +		pass +	# +	# Return screen position corresponding to position in paragraph. +	# Return tuple (h, vtop, vbaseline, vbottom). +	# This is more or less the inverse of whereis() +	def screenpos(self, d, pos): +		if pos < 0: +			ascent, descent = self.lines[0][5:7] +			return self.left, self.top, self.top + ascent, \ +				self.top + ascent + descent +		self.pos = pos +		self.lastfont = None +		try: +			self.d = d +			ok = self.visit(self.__class__._screenposword, \ +					self.__class__._screenposanchor) +		finally: +			self.d = None +		if ok == None: +			ascent, descent = self.lines[-1][5:7] +			ok = self.right, self.bottom - ascent - descent, \ +				self.bottom - descent, self.bottom +		return ok +	# +	def _screenposword(self, tuple, word, h1, v1, h2, v2, isfirst, islast): +		fo, te, wi, sp, st, as, de = word +		if fo <> None: self.lastfont = fo +		cc = len(te) + (sp > 0) +		if self.pos > cc: +			self.pos = self.pos - cc +			return +		if self.pos < cc: +			self.d.setfont(self.lastfont) +			h = h1 + self.d.textwidth(te[:self.pos]) +		else: +			h = h2 +		ascent, descent = tuple[5:7] +		return h, v1, v1+ascent, v2 +	# +	def _screenposanchor(self, tuple, word, h, v): +		pass +	# +	# Invert the stretch of text between pos1 and pos2. +	# If pos1 is None, the beginning is implied; +	# if pos2 is None, the end is implied. +	# Undoes its own effect when called again with the same arguments +	def invert(self, d, pos1, pos2): +		if pos1 == None: +			pos1 = self.left, self.top, self.top, self.top +		else: +			pos1 = self.screenpos(d, pos1) +		if pos2 == None: +			pos2 = self.right, self.bottom,self.bottom,self.bottom +		else: +			pos2 = self.screenpos(d, pos2) +		h1, top1, baseline1, bottom1 = pos1 +		h2, top2, baseline2, bottom2 = pos2 +		if bottom1 <= top2: +			d.invert((h1, top1), (self.right, bottom1)) +			h1 = self.left +			if bottom1 < top2: +				d.invert((h1, bottom1), (self.right, top2)) +			top1, bottom1 = top2, bottom2 +		d.invert((h1, top1), (h2, bottom2)) + + +# Test class Para +# XXX This was last used on the Mac, hence the weird fonts... +def test(): +	import stdwin +	from stdwinevents import * +	words = 'The', 'quick', 'brown', 'fox', 'jumps', 'over', \ +		'the', 'lazy', 'dog.' +	paralist = [] +	for just in 'l', 'r', 'lr', 'c': +		p = Para() +		p.just = just +		p.addword(stdwin, ('New York', 'p', 12), words[0], 1, 1) +		for word in words[1:-1]: +			p.addword(stdwin, None, word, 1, 1) +		p.addword(stdwin, None, words[-1], 2, 4) +		p.addword(stdwin, ('New York', 'b', 18), 'Bye!', 0, 0) +		p.addword(stdwin, ('New York', 'p', 10), 'Bye!', 0, 0) +		paralist.append(p) +	window = stdwin.open('Para.test()') +	start = stop = selpara = None +	while 1: +		etype, win, detail = stdwin.getevent() +		if etype == WE_CLOSE: +			break +		if etype == WE_SIZE: +			window.change((0, 0), (1000, 1000)) +		if etype == WE_DRAW: +			width, height = window.getwinsize() +			d = None +			try: +				d = window.begindrawing() +				d.cliprect(detail) +				d.erase(detail) +				v = 0 +				for p in paralist: +					v = p.render(d, 0, v, width) +					if p == selpara and \ +					   start <> None and stop <> None: +						p.invert(d, start, stop) +			finally: +				if d: d.close() +		if etype == WE_MOUSE_DOWN: +			if selpara and start <> None and stop <> None: +				d = window.begindrawing() +				selpara.invert(d, start, stop) +				d.close() +			start = stop = selpara = None +			mouseh, mousev = detail[0] +			for p in paralist: +				start = p.whereis(stdwin, mouseh, mousev) +				if start <> None: +					selpara = p +					break +		if etype == WE_MOUSE_UP and start <> None and selpara: +			mouseh, mousev = detail[0] +			stop = selpara.whereis(stdwin, mouseh, mousev) +			if stop == None: start = selpara = None +			else: +				if start > stop: +					start, stop = stop, start +				d = window.begindrawing() +				selpara.invert(d, start, stop) +				d.close() +	window.close() diff --git a/Lib/fmt.py b/Lib/fmt.py new file mode 100644 index 0000000000..c0963069e0 --- /dev/null +++ b/Lib/fmt.py @@ -0,0 +1,621 @@ +# Text formatting abstractions + + +import string +import Para + + +# A formatter back-end object has one method that is called by the formatter: +# addpara(p), where p is a paragraph object.  For example: + + +# Formatter back-end to do nothing at all with the paragraphs +class NullBackEnd: +	# +	def __init__(self): +		pass +	# +	def addpara(self, p): +		pass +	# +	def bgn_anchor(self, id): +		pass +	# +	def end_anchor(self, id): +		pass + + +# Formatter back-end to collect the paragraphs in a list +class SavingBackEnd(NullBackEnd): +	# +	def __init__(self): +		self.paralist = [] +	# +	def addpara(self, p): +		self.paralist.append(p) +	# +	def hitcheck(self, h, v): +		hits = [] +		for p in self.paralist: +			if p.top <= v <= p.bottom: +				for id in p.hitcheck(h, v): +					if id not in hits: +						hits.append(id) +		return hits +	# +	def extract(self): +		text = '' +		for p in self.paralist: +			text = text + (p.extract()) +		return text +	# +	def extractpart(self, long1, long2): +		if long1 > long2: long1, long2 = long2, long1 +		para1, pos1 = long1 +		para2, pos2 = long2 +		text = '' +		while para1 < para2: +			ptext = self.paralist[para1].extract() +			text = text + ptext[pos1:] +			pos1 = 0 +			para1 = para1 + 1 +		ptext = self.paralist[para2].extract() +		return text + ptext[pos1:pos2] +	# +	def whereis(self, d, h, v): +		total = 0 +		for i in range(len(self.paralist)): +			p = self.paralist[i] +			result = p.whereis(d, h, v) +			if result <> None: +				return i, result +		return None +	# +	def roundtowords(self, long1, long2): +		i, offset = long1 +		text = self.paralist[i].extract() +		while offset > 0 and text[offset-1] <> ' ': offset = offset-1 +		long1 = i, offset +		# +		i, offset = long2 +		text = self.paralist[i].extract() +		n = len(text) +		while offset < n-1 and text[offset] <> ' ': offset = offset+1 +		long2 = i, offset +		# +		return long1, long2 +	# +	def roundtoparagraphs(self, long1, long2): +		long1 = long1[0], 0 +		long2 = long2[0], len(self.paralist[long2[0]].extract()) +		return long1, long2 + + +# Formatter back-end to send the text directly to the drawing object +class WritingBackEnd(NullBackEnd): +	# +	def __init__(self, d, width): +		self.d = d +		self.width = width +		self.lineno = 0 +	# +	def addpara(self, p): +		self.lineno = p.render(self.d, 0, self.lineno, self.width) + + +# A formatter receives a stream of formatting instructions and assembles +# these into a stream of paragraphs on to a back-end.  The assembly is +# parametrized by a text measurement object, which must match the output +# operations of the back-end.  The back-end is responsible for splitting +# paragraphs up in lines of a given maximum width.  (This is done because +# in a windowing environment, when the window size changes, there is no +# need to redo the assembly into paragraphs, but the splitting into lines +# must be done taking the new window size into account.) + + +# Formatter base class.  Initialize it with a text measurement object, +# which is used for text measurements, and a back-end object, +# which receives the completed paragraphs.  The formatting methods are: +# setfont(font) +# setleftindent(nspaces) +# setjust(type) where type is 'l', 'c', 'r', or 'lr' +# flush() +# vspace(nlines) +# needvspace(nlines) +# addword(word, nspaces) +class BaseFormatter: +	# +	def __init__(self, d, b): +		# Drawing object used for text measurements +		self.d = d +		# +		# BackEnd object receiving completed paragraphs +		self.b = b +		# +		# Parameters of the formatting model +		self.leftindent = 0 +		self.just = 'l' +		self.font = None +		self.blanklines = 0 +		# +		# Parameters derived from the current font +		self.space = d.textwidth(' ') +		self.line = d.lineheight() +		self.ascent = d.baseline() +		self.descent = self.line - self.ascent +		# +		# Parameter derived from the default font +		self.n_space = self.space +		# +		# Current paragraph being built +		self.para = None +		self.nospace = 1 +		# +		# Font to set on the next word +		self.nextfont = None +	# +	def newpara(self): +		return Para.Para() +	# +	def setfont(self, font): +		if font == None: return +		self.font = self.nextfont = font +		d = self.d +		d.setfont(font) +		self.space = d.textwidth(' ') +		self.line = d.lineheight() +		self.ascent = d.baseline() +		self.descent = self.line - self.ascent +	# +	def setleftindent(self, nspaces): +		self.leftindent = int(self.n_space * nspaces) +		if self.para: +			hang = self.leftindent - self.para.indent_left +			if hang > 0 and self.para.getlength() <= hang: +				self.para.makehangingtag(hang) +				self.nospace = 1 +			else: +				self.flush() +	# +	def setrightindent(self, nspaces): +		self.rightindent = int(self.n_space * nspaces) +		if self.para: +			self.para.indent_right = self.rightindent +			self.flush() +	# +	def setjust(self, just): +		self.just = just +		if self.para: +			self.para.just = self.just +	# +	def flush(self): +		if self.para: +			self.b.addpara(self.para) +			self.para = None +			if self.font <> None: +				self.d.setfont(self.font) +		self.nospace = 1 +	# +	def vspace(self, nlines): +		self.flush() +		if nlines > 0: +			self.para = self.newpara() +			tuple = None, '', 0, 0, 0, int(nlines*self.line), 0 +			self.para.words.append(tuple) +			self.flush() +			self.blanklines = self.blanklines + nlines +	# +	def needvspace(self, nlines): +		self.flush() # Just to be sure +		if nlines > self.blanklines: +			self.vspace(nlines - self.blanklines) +	# +	def addword(self, text, space): +		if self.nospace and not text: +			return +		self.nospace = 0 +		self.blanklines = 0 +		if not self.para: +			self.para = self.newpara() +			self.para.indent_left = self.leftindent +			self.para.just = self.just +			self.nextfont = self.font +		space = int(space * self.space) +		self.para.words.append(self.nextfont, text, \ +			self.d.textwidth(text), space, space, \ +			self.ascent, self.descent) +		self.nextfont = None +	# +	def bgn_anchor(self, id): +		if not self.para: +			self.nospace = 0 +			self.addword('', 0) +		self.para.bgn_anchor(id) +	# +	def end_anchor(self, id): +		if not self.para: +			self.nospace = 0 +			self.addword('', 0) +		self.para.end_anchor(id) + + +# Measuring object for measuring text as viewed on a tty +class NullMeasurer: +	# +	def __init__(self): +		pass +	# +	def setfont(self, font): +		pass +	# +	def textwidth(self, text): +		return len(text) +	# +	def lineheight(self): +		return 1 +	# +	def baseline(self): +		return 0 + + +# Drawing object for writing plain ASCII text to a file +class FileWriter: +	# +	def __init__(self, fp): +		self.fp = fp +		self.lineno, self.colno = 0, 0 +	# +	def setfont(self, font): +		pass +	# +	def text(self, (h, v), str): +		if not str: return +		if '\n' in str: +			raise ValueError, 'can\'t write \\n' +		while self.lineno < v: +			self.fp.write('\n') +			self.colno, self.lineno = 0, self.lineno + 1 +		while self.lineno > v: +			# XXX This should never happen... +			self.fp.write('\033[A') # ANSI up arrow +			self.lineno = self.lineno - 1 +		if self.colno < h: +			self.fp.write(' ' * (h - self.colno)) +		elif self.colno > h: +			self.fp.write('\b' * (self.colno - h)) +		self.colno = h +		self.fp.write(str) +		self.colno = h + len(str) + + +# Formatting class to do nothing at all with the data +class NullFormatter(BaseFormatter): +	# +	def __init__(self): +		d = NullMeasurer() +		b = NullBackEnd() +		BaseFormatter.__init__(self, d, b) + + +# Formatting class to write directly to a file +class WritingFormatter(BaseFormatter): +	# +	def __init__(self, fp, width): +		dm = NullMeasurer() +		dw = FileWriter(fp) +		b = WritingBackEnd(dw, width) +		BaseFormatter.__init__(self, dm, b) +		self.blanklines = 1 +	# +	# Suppress multiple blank lines +	def needvspace(self, nlines): +		BaseFormatter.needvspace(self, min(1, nlines)) + + +# A "FunnyFormatter" writes ASCII text with a twist: *bold words*, +# _italic text_ and _underlined words_, and `quoted text'. +# It assumes that the fonts are 'r', 'i', 'b', 'u', 'q': (roman, +# italic, bold, underline, quote). +# Moreover, if the font is in upper case, the text is converted to +# UPPER CASE. +class FunnyFormatter(WritingFormatter): +	# +	def flush(self): +		if self.para: finalize(self.para) +		WritingFormatter.flush(self) + + +# Surrounds *bold words* and _italic text_ in a paragraph with +# appropriate markers, fixing the size (assuming these characters' +# width is 1). +openchar = \ +    {'b':'*', 'i':'_', 'u':'_', 'q':'`', 'B':'*', 'I':'_', 'U':'_', 'Q':'`'} +closechar = \ +    {'b':'*', 'i':'_', 'u':'_', 'q':'\'', 'B':'*', 'I':'_', 'U':'_', 'Q':'\''} +def finalize(para): +	oldfont = curfont = 'r' +	para.words.append('r', '', 0, 0, 0, 0) # temporary, deleted at end +	for i in range(len(para.words)): +		fo, te, wi = para.words[i][:3] +		if fo <> None: curfont = fo +		if curfont <> oldfont: +			if closechar.has_key(oldfont): +				c = closechar[oldfont] +				j = i-1 +				while j > 0 and para.words[j][1] == '': j = j-1 +				fo1, te1, wi1 = para.words[j][:3] +				te1 = te1 + c +				wi1 = wi1 + len(c) +				para.words[j] = (fo1, te1, wi1) + \ +					para.words[j][3:] +			if openchar.has_key(curfont) and te: +				c = openchar[curfont] +				te = c + te +				wi = len(c) + wi +				para.words[i] = (fo, te, wi) + \ +					para.words[i][3:] +			if te: oldfont = curfont +			else: oldfont = 'r' +		if curfont in string.uppercase: +			te = string.upper(te) +			para.words[i] = (fo, te, wi) + para.words[i][3:] +	del para.words[-1] + + +# Formatter back-end to draw the text in a window. +# This has an option to draw while the paragraphs are being added, +# to minimize the delay before the user sees anything. +# This manages the entire "document" of the window. +class StdwinBackEnd(SavingBackEnd): +	# +	def __init__(self, window, drawnow): +		self.window = window +		self.drawnow = drawnow +		self.width = window.getwinsize()[0] +		self.selection = None +		self.height = 0 +		window.setorigin(0, 0) +		window.setdocsize(0, 0) +		self.d = window.begindrawing() +		SavingBackEnd.__init__(self) +	# +	def finish(self): +		self.d.close() +		self.d = None +		self.window.setdocsize(0, self.height) +	# +	def addpara(self, p): +		self.paralist.append(p) +		if self.drawnow: +			self.height = \ +				p.render(self.d, 0, self.height, self.width) +		else: +			p.layout(self.width) +			p.left = 0 +			p.top = self.height +			p.right = self.width +			p.bottom = self.height + p.height +			self.height = p.bottom +	# +	def resize(self): +		self.window.change((0, 0), (self.width, self.height)) +		self.width = self.window.getwinsize()[0] +		self.height = 0 +		for p in self.paralist: +			p.layout(self.width) +			p.left = 0 +			p.top = self.height +			p.right = self.width +			p.bottom = self.height + p.height +			self.height = p.bottom +		self.window.change((0, 0), (self.width, self.height)) +		self.window.setdocsize(0, self.height) +	# +	def redraw(self, area): +		d = self.window.begindrawing() +		(left, top), (right, bottom) = area +		d.erase(area) +		d.cliprect(area) +		for p in self.paralist: +			if top < p.bottom and p.top < bottom: +				v = p.render(d, p.left, p.top, p.right) +		if self.selection: +			self.invert(d, self.selection) +		d.close() +	# +	def setselection(self, new): +		if new: +			long1, long2 = new +			pos1 = long1[:3] +			pos2 = long2[:3] +			new = pos1, pos2 +		if new <> self.selection: +			d = self.window.begindrawing() +			if self.selection: +				self.invert(d, self.selection) +			if new: +				self.invert(d, new) +			d.close() +			self.selection = new +	# +	def getselection(self): +		return self.selection +	# +	def extractselection(self): +		if self.selection: +			a, b = self.selection +			return self.extractpart(a, b) +		else: +			return None +	# +	def invert(self, d, region): +		long1, long2 = region +		if long1 > long2: long1, long2 = long2, long1 +		para1, pos1 = long1 +		para2, pos2 = long2 +		while para1 < para2: +			self.paralist[para1].invert(d, pos1, None) +			pos1 = None +			para1 = para1 + 1 +		self.paralist[para2].invert(d, pos1, pos2) +	# +	def search(self, prog): +		import regex, string +		if type(prog) == type(''): +			prog = regex.compile(string.lower(prog)) +		if self.selection: +			iold = self.selection[0][0] +		else: +			iold = -1 +		hit = None +		for i in range(len(self.paralist)): +			if i == iold or i < iold and hit: +				continue +			p = self.paralist[i] +			text = string.lower(p.extract()) +			if prog.search(text) >= 0: +				a, b = prog.regs[0] +				long1 = i, a +				long2 = i, b +				hit = long1, long2 +				if i > iold: +					break +		if hit: +			self.setselection(hit) +			i = hit[0][0] +			p = self.paralist[i] +			self.window.show((p.left, p.top), (p.right, p.bottom)) +			return 1 +		else: +			return 0 +	# +	def showanchor(self, id): +		for i in range(len(self.paralist)): +			p = self.paralist[i] +			if p.hasanchor(id): +				long1 = i, 0 +				long2 = i, len(p.extract()) +				hit = long1, long2 +				self.setselection(hit) +				self.window.show( \ +					(p.left, p.top), (p.right, p.bottom)) +				break + + +# GL extensions + +class GLFontCache: +	# +	def __init__(self): +		self.reset() +		self.setfont('') +	# +	def reset(self): +		self.fontkey = None +		self.fonthandle = None +		self.fontinfo = None +		self.fontcache = {} +	# +	def close(self): +		self.reset() +	# +	def setfont(self, fontkey): +		if fontkey == '': +			fontkey = 'Times-Roman 12' +		elif ' ' not in fontkey: +			fontkey = fontkey + ' 12' +		if fontkey == self.fontkey: +			return +		if self.fontcache.has_key(fontkey): +			handle = self.fontcache[fontkey] +		else: +			import string +			i = string.index(fontkey, ' ') +			name, sizestr = fontkey[:i], fontkey[i:] +			size = eval(sizestr) +			key1 = name + ' 1' +			key = name + ' ' + `size` +			# NB key may differ from fontkey! +			if self.fontcache.has_key(key): +				handle = self.fontcache[key] +			else: +				if self.fontcache.has_key(key1): +					handle = self.fontcache[key1] +				else: +					import fm +					handle = fm.findfont(name) +					self.fontcache[key1] = handle +				handle = handle.scalefont(size) +				self.fontcache[fontkey] = \ +					self.fontcache[key] = handle +		self.fontkey = fontkey +		if self.fonthandle <> handle: +			self.fonthandle = handle +			self.fontinfo = handle.getfontinfo() +			handle.setfont() + + +class GLMeasurer(GLFontCache): +	# +	def textwidth(self, text): +		return self.fonthandle.getstrwidth(text) +	# +	def baseline(self): +		return self.fontinfo[6] - self.fontinfo[3] +	# +	def lineheight(self): +		return self.fontinfo[6] + + +class GLWriter(GLFontCache): +	# +	# NOTES: +	# (1) Use gl.ortho2 to use X pixel coordinates! +	# +	def text(self, (h, v), text): +		import gl, fm +		gl.cmov2i(h, v + self.fontinfo[6] - self.fontinfo[3]) +		fm.prstr(text) +	# +	def setfont(self, fontkey): +		oldhandle = self.fonthandle +		GLFontCache.setfont(fontkey) +		if self.fonthandle <> oldhandle: +			handle.setfont() + + +class GLMeasurerWriter(GLMeasurer, GLWriter): +	pass + + +class GLBackEnd(SavingBackEnd): +	# +	def __init__(self, wid): +		import gl +		gl.winset(wid) +		self.wid = wid +		self.width = gl.getsize()[1] +		self.height = 0 +		self.d = GLMeasurerWriter() +		SavingBackEnd.__init__(self) +	# +	def finish(self): +		pass +	# +	def addpara(self, p): +		self.paralist.append(p) +		self.height = p.render(self.d, 0, self.height, self.width) +	# +	def redraw(self): +		import gl +		gl.winset(self.wid) +		width = gl.getsize()[1] +		if width <> self.width: +			setdocsize = 1 +			self.width = width +			for p in self.paralist: +				p.top = p.bottom = None +		d = self.d +		v = 0 +		for p in self.paralist: +			v = p.render(d, 0, v, width) diff --git a/Lib/htmllib.py b/Lib/htmllib.py new file mode 100644 index 0000000000..8b3e62b708 --- /dev/null +++ b/Lib/htmllib.py @@ -0,0 +1,635 @@ +# A parser for HTML documents + + +# HTML: HyperText Markup Language; an SGML-like syntax used by WWW to +# describe hypertext documents +# +# SGML: Standard Generalized Markup Language +# +# WWW: World-Wide Web; a distributed hypertext system develped at CERN +# +# CERN: European Particle Physics Laboratory in Geneva, Switzerland + + +# This file is only concerned with parsing and formatting HTML +# documents, not with the other (hypertext and networking) aspects of +# the WWW project.  (It does support highlighting of anchors.) + + +import os +import sys +import regex +import string +import sgmllib + + +class HTMLParser(sgmllib.SGMLParser): + +	# Copy base class entities and add some +	entitydefs = {} +	for key in sgmllib.SGMLParser.entitydefs.keys(): +		entitydefs[key] = sgmllib.SGMLParser.entitydefs[key] +	entitydefs['bullet'] = '*' + +	# Provided -- handlers for tags introducing literal text +	 +	def start_listing(self, attrs): +		self.setliteral('listing') +		self.literal_bgn('listing', attrs) + +	def end_listing(self): +		self.literal_end('listing') + +	def start_xmp(self, attrs): +		self.setliteral('xmp') +		self.literal_bgn('xmp', attrs) + +	def end_xmp(self): +		self.literal_end('xmp') + +	def do_plaintext(self, attrs): +		self.setnomoretags() +		self.literal_bgn('plaintext', attrs) + +	# To be overridden -- begin/end literal mode +	def literal_bgn(self, tag, attrs): pass +	def literal_end(self, tag): pass + + +# Next level of sophistication -- collect anchors, title, nextid and isindex +class CollectingParser(HTMLParser): +	# +	def __init__(self): +		HTMLParser.__init__(self) +		self.savetext = None +		self.nextid = '' +		self.isindex = 0 +		self.title = '' +		self.inanchor = 0 +		self.anchors = [] +		self.anchornames = [] +		self.anchortypes = [] +	# +	def start_a(self, attrs): +		self.inanchor = 0 +		href = '' +		name = '' +		type = '' +		for attrname, value in attrs: +			if attrname == 'href': +				href = value +			if attrname == 'name=': +				name = value +			if attrname == 'type=': +				type = string.lower(value) +		if not (href or name): +			return +		self.anchors.append(href) +		self.anchornames.append(name) +		self.anchortypes.append(type) +		self.inanchor = len(self.anchors) +		if not href: +			self.inanchor = -self.inanchor +	# +	def end_a(self): +		if self.inanchor > 0: +			# Don't show anchors pointing into the current document +			if self.anchors[self.inanchor-1][:1] <> '#': +				self.handle_data('[' + `self.inanchor` + ']') +		self.inanchor = 0 +	# +	def start_header(self, attrs): pass +	def end_header(self): pass +	# +	# (head is the same as header) +	def start_head(self, attrs): pass +	def end_head(self): pass +	# +	def start_body(self, attrs): pass +	def end_body(self): pass +	# +	def do_nextid(self, attrs): +		self.nextid = attrs +	# +	def do_isindex(self, attrs): +		self.isindex = 1 +	# +	def start_title(self, attrs): +		self.savetext = '' +	# +	def end_title(self): +		if self.savetext <> None: +			self.title = self.savetext +			self.savetext = None +	# +	def handle_data(self, text): +		if self.savetext is not None: +			self.savetext = self.savetext + text + + +# Formatting parser -- takes a formatter and a style sheet as arguments + +# XXX The use of style sheets should change: for each tag and end tag +# there should be a style definition, and a style definition should +# encompass many more parameters: font, justification, indentation, +# vspace before, vspace after, hanging tag... + +wordprog = regex.compile('[^ \t\n]*') +spaceprog = regex.compile('[ \t\n]*') + +class FormattingParser(CollectingParser): + +	def __init__(self, formatter, stylesheet): +		CollectingParser.__init__(self) +		self.fmt = formatter +		self.stl = stylesheet +		self.savetext = None +		self.compact = 0 +		self.nofill = 0 +		self.resetfont() +		self.setindent(self.stl.stdindent) + +	def resetfont(self): +		self.fontstack = [] +		self.stylestack = [] +		self.fontset = self.stl.stdfontset +		self.style = ROMAN +		self.passfont() + +	def passfont(self): +		font = self.fontset[self.style] +		self.fmt.setfont(font) + +	def pushstyle(self, style): +		self.stylestack.append(self.style) +		self.style = min(style, len(self.fontset)-1) +		self.passfont() + +	def popstyle(self): +		self.style = self.stylestack[-1] +		del self.stylestack[-1] +		self.passfont() + +	def pushfontset(self, fontset, style): +		self.fontstack.append(self.fontset) +		self.fontset = fontset +		self.pushstyle(style) + +	def popfontset(self): +		self.fontset = self.fontstack[-1] +		del self.fontstack[-1] +		self.popstyle() + +	def flush(self): +		self.fmt.flush() + +	def setindent(self, n): +		self.fmt.setleftindent(n) + +	def needvspace(self, n): +		self.fmt.needvspace(n) + +	def close(self): +		HTMLParser.close(self) +		self.fmt.flush() + +	def handle_literal(self, text): +		lines = string.splitfields(text, '\n') +		for i in range(1, len(lines)): +			lines[i] = string.expandtabs(lines[i], 8) +		for line in lines[:-1]: +			self.fmt.addword(line, 0) +			self.fmt.flush() +			self.fmt.nospace = 0 +		for line in lines[-1:]: +			self.fmt.addword(line, 0) + +	def handle_data(self, text): +		if self.savetext is not None: +			self.savetext = self.savetext + text +			return +		if self.literal: +			self.handle_literal(text) +			return +		i = 0 +		n = len(text) +		while i < n: +			j = i + wordprog.match(text, i) +			word = text[i:j] +			i = j + spaceprog.match(text, j) +			self.fmt.addword(word, i-j) +			if self.nofill and '\n' in text[j:i]: +				self.fmt.flush() +				self.fmt.nospace = 0 +				i = j+1 +				while text[i-1] <> '\n': i = i+1 + +	def literal_bgn(self, tag, attrs): +		if tag == 'plaintext': +			self.flush() +		else: +			self.needvspace(1) +		self.pushfontset(self.stl.stdfontset, FIXED) +		self.setindent(self.stl.literalindent) + +	def literal_end(self, tag): +		self.needvspace(1) +		self.popfontset() +		self.setindent(self.stl.stdindent) + +	def start_title(self, attrs): +		self.flush() +		self.savetext = '' +	# NB end_title is unchanged + +	def do_p(self, attrs): +		if self.compact: +			self.flush() +		else: +			self.needvspace(1) + +	def start_h1(self, attrs): +		self.needvspace(2) +		self.setindent(self.stl.h1indent) +		self.pushfontset(self.stl.h1fontset, BOLD) +		self.fmt.setjust('c') + +	def end_h1(self): +		self.popfontset() +		self.needvspace(2) +		self.setindent(self.stl.stdindent) +		self.fmt.setjust('l') + +	def start_h2(self, attrs): +		self.needvspace(1) +		self.setindent(self.stl.h2indent) +		self.pushfontset(self.stl.h2fontset, BOLD) + +	def end_h2(self): +		self.popfontset() +		self.needvspace(1) +		self.setindent(self.stl.stdindent) + +	def start_h3(self, attrs): +		self.needvspace(1) +		self.setindent(self.stl.stdindent) +		self.pushfontset(self.stl.h3fontset, BOLD) + +	def end_h3(self): +		self.popfontset() +		self.needvspace(1) +		self.setindent(self.stl.stdindent) + +	def start_h4(self, attrs): +		self.needvspace(1) +		self.setindent(self.stl.stdindent) +		self.pushfontset(self.stl.stdfontset, BOLD) + +	def end_h4(self): +		self.popfontset() +		self.needvspace(1) +		self.setindent(self.stl.stdindent) + +	start_h5 = start_h4 +	end_h5 = end_h4 + +	start_h6 = start_h5 +	end_h6 = end_h5 + +	start_h7 = start_h6 +	end_h7 = end_h6 + +	def start_ul(self, attrs): +		self.needvspace(1) +		for attrname, value in attrs: +			if attrname == 'compact': +				self.compact = 1 +				self.setindent(0) +				break +		else: +			self.setindent(self.stl.ulindent) + +	start_dir = start_menu = start_ol = start_ul + +	do_li = do_p + +	def end_ul(self): +		self.compact = 0 +		self.needvspace(1) +		self.setindent(self.stl.stdindent) + +	end_dir = end_menu = end_ol = end_ul + +	def start_dl(self, attrs): +		for attrname, value in attrs: +			if attrname == 'compact': +				self.compact = 1 +		self.needvspace(1) + +	def end_dl(self): +		self.compact = 0 +		self.needvspace(1) +		self.setindent(self.stl.stdindent) + +	def do_dt(self, attrs): +		if self.compact: +			self.flush() +		else: +			self.needvspace(1) +		self.setindent(self.stl.stdindent) + +	def do_dd(self, attrs): +		self.fmt.addword('', 1) +		self.setindent(self.stl.ddindent) + +	def start_address(self, attrs): +		self.compact = 1 +		self.needvspace(1) +		self.fmt.setjust('r') + +	def end_address(self): +		self.compact = 0 +		self.needvspace(1) +		self.setindent(self.stl.stdindent) +		self.fmt.setjust('l') + +	def start_pre(self, attrs): +		self.needvspace(1) +		self.nofill = self.nofill + 1 +		self.pushstyle(FIXED) + +	def end_pre(self): +		self.popstyle() +		self.nofill = self.nofill - 1 +		self.needvspace(1) + +	start_typewriter = start_pre +	end_typewriter = end_pre + +	def do_img(self, attrs): +		self.fmt.addword('(image)', 0) + +	# Physical styles + +	def start_tt(self, attrs): self.pushstyle(FIXED) +	def end_tt(self): self.popstyle() + +	def start_b(self, attrs): self.pushstyle(BOLD) +	def end_b(self): self.popstyle() + +	def start_i(self, attrs): self.pushstyle(ITALIC) +	def end_i(self): self.popstyle() + +	def start_u(self, attrs): self.pushstyle(ITALIC) # Underline??? +	def end_u(self): self.popstyle() + +	def start_r(self, attrs): self.pushstyle(ROMAN) # Not official +	def end_r(self): self.popstyle() + +	# Logical styles + +	start_em = start_i +	end_em = end_i + +	start_strong = start_b +	end_strong = end_b + +	start_code = start_tt +	end_code = end_tt + +	start_samp = start_tt +	end_samp = end_tt + +	start_kbd = start_tt +	end_kbd = end_tt + +	start_file = start_tt # unofficial +	end_file = end_tt + +	start_var = start_i +	end_var = end_i + +	start_dfn = start_i +	end_dfn = end_i + +	start_cite = start_i +	end_cite = end_i + +	start_hp1 = start_i +	end_hp1 = start_i + +	start_hp2 = start_b +	end_hp2 = end_b + +	def unknown_starttag(self, tag, attrs): +		print '*** unknown <' + tag + '>' + +	def unknown_endtag(self, tag): +		print '*** unknown </' + tag + '>' + + +# An extension of the formatting parser which formats anchors differently. +class AnchoringParser(FormattingParser): + +	def start_a(self, attrs): +		FormattingParser.start_a(self, attrs) +		if self.inanchor: +			self.fmt.bgn_anchor(self.inanchor) + +	def end_a(self): +		if self.inanchor: +			self.fmt.end_anchor(self.inanchor) +			self.inanchor = 0 + + +# Style sheet -- this is never instantiated, but the attributes +# of the class object itself are used to specify fonts to be used +# for various paragraph styles. +# A font set is a non-empty list of fonts, in the order: +# [roman, italic, bold, fixed]. +# When a style is not available the nearest lower style is used + +ROMAN = 0 +ITALIC = 1 +BOLD = 2 +FIXED = 3 + +class NullStylesheet: +	# Fonts -- none +	stdfontset = [None] +	h1fontset = [None] +	h2fontset = [None] +	h3fontset = [None] +	# Indents +	stdindent = 2 +	ddindent = 25 +	ulindent = 4 +	h1indent = 0 +	h2indent = 0 +	literalindent = 0 + + +class X11Stylesheet(NullStylesheet): +	stdfontset = [ \ +		'-*-helvetica-medium-r-normal-*-*-100-100-*-*-*-*-*', \ +		'-*-helvetica-medium-o-normal-*-*-100-100-*-*-*-*-*', \ +		'-*-helvetica-bold-r-normal-*-*-100-100-*-*-*-*-*', \ +		'-*-courier-medium-r-normal-*-*-100-100-*-*-*-*-*', \ +		] +	h1fontset = [ \ +		'-*-helvetica-medium-r-normal-*-*-180-100-*-*-*-*-*', \ +		'-*-helvetica-medium-o-normal-*-*-180-100-*-*-*-*-*', \ +		'-*-helvetica-bold-r-normal-*-*-180-100-*-*-*-*-*', \ +		] +	h2fontset = [ \ +		'-*-helvetica-medium-r-normal-*-*-140-100-*-*-*-*-*', \ +		'-*-helvetica-medium-o-normal-*-*-140-100-*-*-*-*-*', \ +		'-*-helvetica-bold-r-normal-*-*-140-100-*-*-*-*-*', \ +		] +	h3fontset = [ \ +		'-*-helvetica-medium-r-normal-*-*-120-100-*-*-*-*-*', \ +		'-*-helvetica-medium-o-normal-*-*-120-100-*-*-*-*-*', \ +		'-*-helvetica-bold-r-normal-*-*-120-100-*-*-*-*-*', \ +		] +	ddindent = 40 + + +class MacStylesheet(NullStylesheet): +	stdfontset = [ \ +		('Geneva', 'p', 10), \ +		('Geneva', 'i', 10), \ +		('Geneva', 'b', 10), \ +		('Monaco', 'p', 10), \ +		] +	h1fontset = [ \ +		('Geneva', 'p', 18), \ +		('Geneva', 'i', 18), \ +		('Geneva', 'b', 18), \ +		('Monaco', 'p', 18), \ +		] +	h3fontset = [ \ +		('Geneva', 'p', 14), \ +		('Geneva', 'i', 14), \ +		('Geneva', 'b', 14), \ +		('Monaco', 'p', 14), \ +		] +	h3fontset = [ \ +		('Geneva', 'p', 12), \ +		('Geneva', 'i', 12), \ +		('Geneva', 'b', 12), \ +		('Monaco', 'p', 12), \ +		] + + +if os.name == 'mac': +	StdwinStylesheet = MacStylesheet +else: +	StdwinStylesheet = X11Stylesheet + + +class GLStylesheet(NullStylesheet): +	stdfontset = [ \ +		'Helvetica 10', \ +		'Helvetica-Italic 10', \ +		'Helvetica-Bold 10', \ +		'Courier 10', \ +		] +	h1fontset = [ \ +		'Helvetica 18', \ +		'Helvetica-Italic 18', \ +		'Helvetica-Bold 18', \ +		'Courier 18', \ +		] +	h2fontset = [ \ +		'Helvetica 14', \ +		'Helvetica-Italic 14', \ +		'Helvetica-Bold 14', \ +		'Courier 14', \ +		] +	h3fontset = [ \ +		'Helvetica 12', \ +		'Helvetica-Italic 12', \ +		'Helvetica-Bold 12', \ +		'Courier 12', \ +		] + + +# Test program -- produces no output but times how long it takes +# to send a document to a null formatter, exclusive of I/O + +def test(): +	import fmt +	import time +	if sys.argv[1:]: file = sys.argv[1] +	else: file = 'test.html' +	data = open(file, 'r').read() +	t0 = time.time() +	fmtr = fmt.WritingFormatter(sys.stdout, 79) +	p = FormattingParser(fmtr, NullStylesheet) +	p.feed(data) +	p.close() +	t1 = time.time() +	print +	print '*** Formatting time:', round(t1-t0, 3), 'seconds.' + + +# Test program using stdwin + +def testStdwin(): +	import stdwin, fmt +	from stdwinevents import * +	if sys.argv[1:]: file = sys.argv[1] +	else: file = 'test.html' +	data = open(file, 'r').read() +	window = stdwin.open('testStdwin') +	b = None +	while 1: +		etype, ewin, edetail = stdwin.getevent() +		if etype == WE_CLOSE: +			break +		if etype == WE_SIZE: +			window.setdocsize(0, 0) +			window.setorigin(0, 0) +			window.change((0, 0), (10000, 30000)) # XXX +		if etype == WE_DRAW: +			if not b: +				b = fmt.StdwinBackEnd(window, 1) +				f = fmt.BaseFormatter(b.d, b) +				p = FormattingParser(f, \ +							    MacStylesheet) +				p.feed(data) +				p.close() +				b.finish() +			else: +				b.redraw(edetail) +	window.close() + + +# Test program using GL + +def testGL(): +	import gl, GL, fmt +	if sys.argv[1:]: file = sys.argv[1] +	else: file = 'test.html' +	data = open(file, 'r').read() +	W, H = 600, 600 +	gl.foreground() +	gl.prefsize(W, H) +	wid = gl.winopen('testGL') +	gl.ortho2(0, W, H, 0) +	gl.color(GL.WHITE) +	gl.clear() +	gl.color(GL.BLACK) +	b = fmt.GLBackEnd(wid) +	f = fmt.BaseFormatter(b.d, b) +	p = FormattingParser(f, GLStylesheet) +	p.feed(data) +	p.close() +	b.finish() +	# +	import time +	time.sleep(5) + + +if __name__ == '__main__': +	test() diff --git a/Lib/lib-old/Para.py b/Lib/lib-old/Para.py new file mode 100644 index 0000000000..6a7057ddbf --- /dev/null +++ b/Lib/lib-old/Para.py @@ -0,0 +1,408 @@ +# Text formatting abstractions + + +# Oft-used type object +Int = type(0) + + +# Represent a paragraph.  This is a list of words with associated +# font and size information, plus indents and justification for the +# entire paragraph. +# Once the words have been added to a paragraph, it can be laid out +# for different line widths.  Once laid out, it can be rendered at +# different screen locations.  Once rendered, it can be queried +# for mouse hits, and parts of the text can be highlighted +class Para: +	# +	def __init__(self): +		self.words = [] # The words +		self.just = 'l' # Justification: 'l', 'r', 'lr' or 'c' +		self.indent_left = self.indent_right = self.indent_hang = 0 +		# Final lay-out parameters, may change +		self.left = self.top = self.right = self.bottom = \ +			self.width = self.height = self.lines = None +	# +	# Add a word, computing size information for it. +	# Words may also be added manually by appending to self.words +	# Each word should be a 7-tuple: +	# (font, text, width, space, stretch, ascent, descent) +	def addword(self, d, font, text, space, stretch): +		if font <> None: +			d.setfont(font) +		width = d.textwidth(text) +		ascent = d.baseline() +		descent = d.lineheight() - ascent +		spw = d.textwidth(' ') +		space = space * spw +		stretch = stretch * spw +		tuple = (font, text, width, space, stretch, ascent, descent) +		self.words.append(tuple) +	# +	# Hooks to begin and end anchors -- insert numbers in the word list! +	def bgn_anchor(self, id): +		self.words.append(id) +	# +	def end_anchor(self, id): +		self.words.append(0) +	# +	# Return the total length (width) of the text added so far, in pixels +	def getlength(self): +		total = 0 +		for word in self.words: +			if type(word) <> Int: +				total = total + word[2] + word[3] +		return total +	# +	# Tab to a given position (relative to the current left indent): +	# remove all stretch, add fixed space up to the new indent. +	# If the current position is already beying the tab stop, +	# don't add any new space (but still remove the stretch) +	def tabto(self, tab): +		total = 0 +		as, de = 1, 0 +		for i in range(len(self.words)): +			word = self.words[i] +			if type(word) == Int: continue +			fo, te, wi, sp, st, as, de = word +			self.words[i] = fo, te, wi, sp, 0, as, de +			total = total + wi + sp +		if total < tab: +			self.words.append(None, '', 0, tab-total, 0, as, de) +	# +	# Make a hanging tag: tab to hang, increment indent_left by hang, +	# and reset indent_hang to -hang +	def makehangingtag(self, hang): +		self.tabto(hang) +		self.indent_left = self.indent_left + hang +		self.indent_hang = -hang +	# +	# Decide where the line breaks will be given some screen width +	def layout(self, linewidth): +		self.width = linewidth +		height = 0 +		self.lines = lines = [] +		avail1 = self.width - self.indent_left - self.indent_right +		avail = avail1 - self.indent_hang +		words = self.words +		i = 0 +		n = len(words) +		lastfont = None +		while i < n: +			firstfont = lastfont +			charcount = 0 +			width = 0 +			stretch = 0 +			ascent = 0 +			descent = 0 +			lsp = 0 +			j = i +			while i < n: +				word = words[i] +				if type(word) == Int: +					if word > 0 and width >= avail: +						break +					i = i+1 +					continue +				fo, te, wi, sp, st, as, de = word +				if width + wi > avail and width > 0 and wi > 0: +					break +				if fo <> None: +					lastfont = fo +					if width == 0: +						firstfont = fo +				charcount = charcount + len(te) + (sp > 0) +				width = width + wi + sp +				lsp = sp +				stretch = stretch + st +				lst = st +				ascent = max(ascent, as) +				descent = max(descent, de) +				i = i+1 +			while i > j and type(words[i-1]) == Int and \ +				words[i-1] > 0: i = i-1 +			width = width - lsp +			if i < n: +				stretch = stretch - lst +			else: +				stretch = 0 +			tuple = i-j, firstfont, charcount, width, stretch, \ +				ascent, descent +			lines.append(tuple) +			height = height + ascent + descent +			avail = avail1 +		self.height = height +	# +	# Call a function for all words in a line +	def visit(self, wordfunc, anchorfunc): +		avail1 = self.width - self.indent_left - self.indent_right +		avail = avail1 - self.indent_hang +		v = self.top +		i = 0 +		for tuple in self.lines: +			wordcount, firstfont, charcount, width, stretch, \ +				ascent, descent = tuple +			h = self.left + self.indent_left +			if i == 0: h = h + self.indent_hang +			extra = 0 +			if self.just == 'r': h = h + avail - width +			elif self.just == 'c': h = h + (avail - width) / 2 +			elif self.just == 'lr' and stretch > 0: +				extra = avail - width +			v2 = v + ascent + descent +			for j in range(i, i+wordcount): +				word = self.words[j] +				if type(word) == Int: +					ok = anchorfunc(self, tuple, word, \ +							h, v) +					if ok <> None: return ok +					continue +				fo, te, wi, sp, st, as, de = word +				if extra > 0 and stretch > 0: +					ex = extra * st / stretch +					extra = extra - ex +					stretch = stretch - st +				else: +					ex = 0 +				h2 = h + wi + sp + ex +				ok = wordfunc(self, tuple, word, h, v, \ +					h2, v2, (j==i), (j==i+wordcount-1)) +				if ok <> None: return ok +				h = h2 +			v = v2 +			i = i + wordcount +			avail = avail1 +	# +	# Render a paragraph in "drawing object" d, using the rectangle +	# given by (left, top, right) with an unspecified bottom. +	# Return the computed bottom of the text. +	def render(self, d, left, top, right): +		if self.width <> right-left: +			self.layout(right-left) +		self.left = left +		self.top = top +		self.right = right +		self.bottom = self.top + self.height +		self.anchorid = 0 +		try: +			self.d = d +			self.visit(self.__class__._renderword, \ +				   self.__class__._renderanchor) +		finally: +			self.d = None +		return self.bottom +	# +	def _renderword(self, tuple, word, h, v, h2, v2, isfirst, islast): +		if word[0] <> None: self.d.setfont(word[0]) +		baseline = v + tuple[5] +		self.d.text((h, baseline - word[5]), word[1]) +		if self.anchorid > 0: +			self.d.line((h, baseline+2), (h2, baseline+2)) +	# +	def _renderanchor(self, tuple, word, h, v): +		self.anchorid = word +	# +	# Return which anchor(s) was hit by the mouse +	def hitcheck(self, mouseh, mousev): +		self.mouseh = mouseh +		self.mousev = mousev +		self.anchorid = 0 +		self.hits = [] +		self.visit(self.__class__._hitcheckword, \ +			   self.__class__._hitcheckanchor) +		return self.hits +	# +	def _hitcheckword(self, tuple, word, h, v, h2, v2, isfirst, islast): +		if self.anchorid > 0 and h <= self.mouseh <= h2 and \ +			v <= self.mousev <= v2: +			self.hits.append(self.anchorid) +	# +	def _hitcheckanchor(self, tuple, word, h, v): +		self.anchorid = word +	# +	# Return whether the given anchor id is present +	def hasanchor(self, id): +		return id in self.words or -id in self.words +	# +	# Extract the raw text from the word list, substituting one space +	# for non-empty inter-word space, and terminating with '\n' +	def extract(self): +		text = '' +		for w in self.words: +			if type(w) <> Int: +				word = w[1] +				if w[3]: word = word + ' ' +				text = text + word +		return text + '\n' +	# +	# Return which character position was hit by the mouse, as +	# an offset in the entire text as returned by extract(). +	# Return None if the mouse was not in this paragraph +	def whereis(self, d, mouseh, mousev): +		if mousev < self.top or mousev > self.bottom: +			return None +		self.mouseh = mouseh +		self.mousev = mousev +		self.lastfont = None +		self.charcount = 0 +		try: +			self.d = d +			return self.visit(self.__class__._whereisword, \ +					  self.__class__._whereisanchor) +		finally: +			self.d = None +	# +	def _whereisword(self, tuple, word, h1, v1, h2, v2, isfirst, islast): +		fo, te, wi, sp, st, as, de = word +		if fo <> None: self.lastfont = fo +		h = h1 +		if isfirst: h1 = 0 +		if islast: h2 = 999999 +		if not (v1 <= self.mousev <= v2 and h1 <= self.mouseh <= h2): +			self.charcount = self.charcount + len(te) + (sp > 0) +			return +		if self.lastfont <> None: +			self.d.setfont(self.lastfont) +		cc = 0 +		for c in te: +			cw = self.d.textwidth(c) +			if self.mouseh <= h + cw/2: +				return self.charcount + cc +			cc = cc+1 +			h = h+cw +		self.charcount = self.charcount + cc +		if self.mouseh <= (h+h2) / 2: +			return self.charcount +		else: +			return self.charcount + 1 +	# +	def _whereisanchor(self, tuple, word, h, v): +		pass +	# +	# Return screen position corresponding to position in paragraph. +	# Return tuple (h, vtop, vbaseline, vbottom). +	# This is more or less the inverse of whereis() +	def screenpos(self, d, pos): +		if pos < 0: +			ascent, descent = self.lines[0][5:7] +			return self.left, self.top, self.top + ascent, \ +				self.top + ascent + descent +		self.pos = pos +		self.lastfont = None +		try: +			self.d = d +			ok = self.visit(self.__class__._screenposword, \ +					self.__class__._screenposanchor) +		finally: +			self.d = None +		if ok == None: +			ascent, descent = self.lines[-1][5:7] +			ok = self.right, self.bottom - ascent - descent, \ +				self.bottom - descent, self.bottom +		return ok +	# +	def _screenposword(self, tuple, word, h1, v1, h2, v2, isfirst, islast): +		fo, te, wi, sp, st, as, de = word +		if fo <> None: self.lastfont = fo +		cc = len(te) + (sp > 0) +		if self.pos > cc: +			self.pos = self.pos - cc +			return +		if self.pos < cc: +			self.d.setfont(self.lastfont) +			h = h1 + self.d.textwidth(te[:self.pos]) +		else: +			h = h2 +		ascent, descent = tuple[5:7] +		return h, v1, v1+ascent, v2 +	# +	def _screenposanchor(self, tuple, word, h, v): +		pass +	# +	# Invert the stretch of text between pos1 and pos2. +	# If pos1 is None, the beginning is implied; +	# if pos2 is None, the end is implied. +	# Undoes its own effect when called again with the same arguments +	def invert(self, d, pos1, pos2): +		if pos1 == None: +			pos1 = self.left, self.top, self.top, self.top +		else: +			pos1 = self.screenpos(d, pos1) +		if pos2 == None: +			pos2 = self.right, self.bottom,self.bottom,self.bottom +		else: +			pos2 = self.screenpos(d, pos2) +		h1, top1, baseline1, bottom1 = pos1 +		h2, top2, baseline2, bottom2 = pos2 +		if bottom1 <= top2: +			d.invert((h1, top1), (self.right, bottom1)) +			h1 = self.left +			if bottom1 < top2: +				d.invert((h1, bottom1), (self.right, top2)) +			top1, bottom1 = top2, bottom2 +		d.invert((h1, top1), (h2, bottom2)) + + +# Test class Para +# XXX This was last used on the Mac, hence the weird fonts... +def test(): +	import stdwin +	from stdwinevents import * +	words = 'The', 'quick', 'brown', 'fox', 'jumps', 'over', \ +		'the', 'lazy', 'dog.' +	paralist = [] +	for just in 'l', 'r', 'lr', 'c': +		p = Para() +		p.just = just +		p.addword(stdwin, ('New York', 'p', 12), words[0], 1, 1) +		for word in words[1:-1]: +			p.addword(stdwin, None, word, 1, 1) +		p.addword(stdwin, None, words[-1], 2, 4) +		p.addword(stdwin, ('New York', 'b', 18), 'Bye!', 0, 0) +		p.addword(stdwin, ('New York', 'p', 10), 'Bye!', 0, 0) +		paralist.append(p) +	window = stdwin.open('Para.test()') +	start = stop = selpara = None +	while 1: +		etype, win, detail = stdwin.getevent() +		if etype == WE_CLOSE: +			break +		if etype == WE_SIZE: +			window.change((0, 0), (1000, 1000)) +		if etype == WE_DRAW: +			width, height = window.getwinsize() +			d = None +			try: +				d = window.begindrawing() +				d.cliprect(detail) +				d.erase(detail) +				v = 0 +				for p in paralist: +					v = p.render(d, 0, v, width) +					if p == selpara and \ +					   start <> None and stop <> None: +						p.invert(d, start, stop) +			finally: +				if d: d.close() +		if etype == WE_MOUSE_DOWN: +			if selpara and start <> None and stop <> None: +				d = window.begindrawing() +				selpara.invert(d, start, stop) +				d.close() +			start = stop = selpara = None +			mouseh, mousev = detail[0] +			for p in paralist: +				start = p.whereis(stdwin, mouseh, mousev) +				if start <> None: +					selpara = p +					break +		if etype == WE_MOUSE_UP and start <> None and selpara: +			mouseh, mousev = detail[0] +			stop = selpara.whereis(stdwin, mouseh, mousev) +			if stop == None: start = selpara = None +			else: +				if start > stop: +					start, stop = stop, start +				d = window.begindrawing() +				selpara.invert(d, start, stop) +				d.close() +	window.close() diff --git a/Lib/lib-old/fmt.py b/Lib/lib-old/fmt.py new file mode 100644 index 0000000000..c0963069e0 --- /dev/null +++ b/Lib/lib-old/fmt.py @@ -0,0 +1,621 @@ +# Text formatting abstractions + + +import string +import Para + + +# A formatter back-end object has one method that is called by the formatter: +# addpara(p), where p is a paragraph object.  For example: + + +# Formatter back-end to do nothing at all with the paragraphs +class NullBackEnd: +	# +	def __init__(self): +		pass +	# +	def addpara(self, p): +		pass +	# +	def bgn_anchor(self, id): +		pass +	# +	def end_anchor(self, id): +		pass + + +# Formatter back-end to collect the paragraphs in a list +class SavingBackEnd(NullBackEnd): +	# +	def __init__(self): +		self.paralist = [] +	# +	def addpara(self, p): +		self.paralist.append(p) +	# +	def hitcheck(self, h, v): +		hits = [] +		for p in self.paralist: +			if p.top <= v <= p.bottom: +				for id in p.hitcheck(h, v): +					if id not in hits: +						hits.append(id) +		return hits +	# +	def extract(self): +		text = '' +		for p in self.paralist: +			text = text + (p.extract()) +		return text +	# +	def extractpart(self, long1, long2): +		if long1 > long2: long1, long2 = long2, long1 +		para1, pos1 = long1 +		para2, pos2 = long2 +		text = '' +		while para1 < para2: +			ptext = self.paralist[para1].extract() +			text = text + ptext[pos1:] +			pos1 = 0 +			para1 = para1 + 1 +		ptext = self.paralist[para2].extract() +		return text + ptext[pos1:pos2] +	# +	def whereis(self, d, h, v): +		total = 0 +		for i in range(len(self.paralist)): +			p = self.paralist[i] +			result = p.whereis(d, h, v) +			if result <> None: +				return i, result +		return None +	# +	def roundtowords(self, long1, long2): +		i, offset = long1 +		text = self.paralist[i].extract() +		while offset > 0 and text[offset-1] <> ' ': offset = offset-1 +		long1 = i, offset +		# +		i, offset = long2 +		text = self.paralist[i].extract() +		n = len(text) +		while offset < n-1 and text[offset] <> ' ': offset = offset+1 +		long2 = i, offset +		# +		return long1, long2 +	# +	def roundtoparagraphs(self, long1, long2): +		long1 = long1[0], 0 +		long2 = long2[0], len(self.paralist[long2[0]].extract()) +		return long1, long2 + + +# Formatter back-end to send the text directly to the drawing object +class WritingBackEnd(NullBackEnd): +	# +	def __init__(self, d, width): +		self.d = d +		self.width = width +		self.lineno = 0 +	# +	def addpara(self, p): +		self.lineno = p.render(self.d, 0, self.lineno, self.width) + + +# A formatter receives a stream of formatting instructions and assembles +# these into a stream of paragraphs on to a back-end.  The assembly is +# parametrized by a text measurement object, which must match the output +# operations of the back-end.  The back-end is responsible for splitting +# paragraphs up in lines of a given maximum width.  (This is done because +# in a windowing environment, when the window size changes, there is no +# need to redo the assembly into paragraphs, but the splitting into lines +# must be done taking the new window size into account.) + + +# Formatter base class.  Initialize it with a text measurement object, +# which is used for text measurements, and a back-end object, +# which receives the completed paragraphs.  The formatting methods are: +# setfont(font) +# setleftindent(nspaces) +# setjust(type) where type is 'l', 'c', 'r', or 'lr' +# flush() +# vspace(nlines) +# needvspace(nlines) +# addword(word, nspaces) +class BaseFormatter: +	# +	def __init__(self, d, b): +		# Drawing object used for text measurements +		self.d = d +		# +		# BackEnd object receiving completed paragraphs +		self.b = b +		# +		# Parameters of the formatting model +		self.leftindent = 0 +		self.just = 'l' +		self.font = None +		self.blanklines = 0 +		# +		# Parameters derived from the current font +		self.space = d.textwidth(' ') +		self.line = d.lineheight() +		self.ascent = d.baseline() +		self.descent = self.line - self.ascent +		# +		# Parameter derived from the default font +		self.n_space = self.space +		# +		# Current paragraph being built +		self.para = None +		self.nospace = 1 +		# +		# Font to set on the next word +		self.nextfont = None +	# +	def newpara(self): +		return Para.Para() +	# +	def setfont(self, font): +		if font == None: return +		self.font = self.nextfont = font +		d = self.d +		d.setfont(font) +		self.space = d.textwidth(' ') +		self.line = d.lineheight() +		self.ascent = d.baseline() +		self.descent = self.line - self.ascent +	# +	def setleftindent(self, nspaces): +		self.leftindent = int(self.n_space * nspaces) +		if self.para: +			hang = self.leftindent - self.para.indent_left +			if hang > 0 and self.para.getlength() <= hang: +				self.para.makehangingtag(hang) +				self.nospace = 1 +			else: +				self.flush() +	# +	def setrightindent(self, nspaces): +		self.rightindent = int(self.n_space * nspaces) +		if self.para: +			self.para.indent_right = self.rightindent +			self.flush() +	# +	def setjust(self, just): +		self.just = just +		if self.para: +			self.para.just = self.just +	# +	def flush(self): +		if self.para: +			self.b.addpara(self.para) +			self.para = None +			if self.font <> None: +				self.d.setfont(self.font) +		self.nospace = 1 +	# +	def vspace(self, nlines): +		self.flush() +		if nlines > 0: +			self.para = self.newpara() +			tuple = None, '', 0, 0, 0, int(nlines*self.line), 0 +			self.para.words.append(tuple) +			self.flush() +			self.blanklines = self.blanklines + nlines +	# +	def needvspace(self, nlines): +		self.flush() # Just to be sure +		if nlines > self.blanklines: +			self.vspace(nlines - self.blanklines) +	# +	def addword(self, text, space): +		if self.nospace and not text: +			return +		self.nospace = 0 +		self.blanklines = 0 +		if not self.para: +			self.para = self.newpara() +			self.para.indent_left = self.leftindent +			self.para.just = self.just +			self.nextfont = self.font +		space = int(space * self.space) +		self.para.words.append(self.nextfont, text, \ +			self.d.textwidth(text), space, space, \ +			self.ascent, self.descent) +		self.nextfont = None +	# +	def bgn_anchor(self, id): +		if not self.para: +			self.nospace = 0 +			self.addword('', 0) +		self.para.bgn_anchor(id) +	# +	def end_anchor(self, id): +		if not self.para: +			self.nospace = 0 +			self.addword('', 0) +		self.para.end_anchor(id) + + +# Measuring object for measuring text as viewed on a tty +class NullMeasurer: +	# +	def __init__(self): +		pass +	# +	def setfont(self, font): +		pass +	# +	def textwidth(self, text): +		return len(text) +	# +	def lineheight(self): +		return 1 +	# +	def baseline(self): +		return 0 + + +# Drawing object for writing plain ASCII text to a file +class FileWriter: +	# +	def __init__(self, fp): +		self.fp = fp +		self.lineno, self.colno = 0, 0 +	# +	def setfont(self, font): +		pass +	# +	def text(self, (h, v), str): +		if not str: return +		if '\n' in str: +			raise ValueError, 'can\'t write \\n' +		while self.lineno < v: +			self.fp.write('\n') +			self.colno, self.lineno = 0, self.lineno + 1 +		while self.lineno > v: +			# XXX This should never happen... +			self.fp.write('\033[A') # ANSI up arrow +			self.lineno = self.lineno - 1 +		if self.colno < h: +			self.fp.write(' ' * (h - self.colno)) +		elif self.colno > h: +			self.fp.write('\b' * (self.colno - h)) +		self.colno = h +		self.fp.write(str) +		self.colno = h + len(str) + + +# Formatting class to do nothing at all with the data +class NullFormatter(BaseFormatter): +	# +	def __init__(self): +		d = NullMeasurer() +		b = NullBackEnd() +		BaseFormatter.__init__(self, d, b) + + +# Formatting class to write directly to a file +class WritingFormatter(BaseFormatter): +	# +	def __init__(self, fp, width): +		dm = NullMeasurer() +		dw = FileWriter(fp) +		b = WritingBackEnd(dw, width) +		BaseFormatter.__init__(self, dm, b) +		self.blanklines = 1 +	# +	# Suppress multiple blank lines +	def needvspace(self, nlines): +		BaseFormatter.needvspace(self, min(1, nlines)) + + +# A "FunnyFormatter" writes ASCII text with a twist: *bold words*, +# _italic text_ and _underlined words_, and `quoted text'. +# It assumes that the fonts are 'r', 'i', 'b', 'u', 'q': (roman, +# italic, bold, underline, quote). +# Moreover, if the font is in upper case, the text is converted to +# UPPER CASE. +class FunnyFormatter(WritingFormatter): +	# +	def flush(self): +		if self.para: finalize(self.para) +		WritingFormatter.flush(self) + + +# Surrounds *bold words* and _italic text_ in a paragraph with +# appropriate markers, fixing the size (assuming these characters' +# width is 1). +openchar = \ +    {'b':'*', 'i':'_', 'u':'_', 'q':'`', 'B':'*', 'I':'_', 'U':'_', 'Q':'`'} +closechar = \ +    {'b':'*', 'i':'_', 'u':'_', 'q':'\'', 'B':'*', 'I':'_', 'U':'_', 'Q':'\''} +def finalize(para): +	oldfont = curfont = 'r' +	para.words.append('r', '', 0, 0, 0, 0) # temporary, deleted at end +	for i in range(len(para.words)): +		fo, te, wi = para.words[i][:3] +		if fo <> None: curfont = fo +		if curfont <> oldfont: +			if closechar.has_key(oldfont): +				c = closechar[oldfont] +				j = i-1 +				while j > 0 and para.words[j][1] == '': j = j-1 +				fo1, te1, wi1 = para.words[j][:3] +				te1 = te1 + c +				wi1 = wi1 + len(c) +				para.words[j] = (fo1, te1, wi1) + \ +					para.words[j][3:] +			if openchar.has_key(curfont) and te: +				c = openchar[curfont] +				te = c + te +				wi = len(c) + wi +				para.words[i] = (fo, te, wi) + \ +					para.words[i][3:] +			if te: oldfont = curfont +			else: oldfont = 'r' +		if curfont in string.uppercase: +			te = string.upper(te) +			para.words[i] = (fo, te, wi) + para.words[i][3:] +	del para.words[-1] + + +# Formatter back-end to draw the text in a window. +# This has an option to draw while the paragraphs are being added, +# to minimize the delay before the user sees anything. +# This manages the entire "document" of the window. +class StdwinBackEnd(SavingBackEnd): +	# +	def __init__(self, window, drawnow): +		self.window = window +		self.drawnow = drawnow +		self.width = window.getwinsize()[0] +		self.selection = None +		self.height = 0 +		window.setorigin(0, 0) +		window.setdocsize(0, 0) +		self.d = window.begindrawing() +		SavingBackEnd.__init__(self) +	# +	def finish(self): +		self.d.close() +		self.d = None +		self.window.setdocsize(0, self.height) +	# +	def addpara(self, p): +		self.paralist.append(p) +		if self.drawnow: +			self.height = \ +				p.render(self.d, 0, self.height, self.width) +		else: +			p.layout(self.width) +			p.left = 0 +			p.top = self.height +			p.right = self.width +			p.bottom = self.height + p.height +			self.height = p.bottom +	# +	def resize(self): +		self.window.change((0, 0), (self.width, self.height)) +		self.width = self.window.getwinsize()[0] +		self.height = 0 +		for p in self.paralist: +			p.layout(self.width) +			p.left = 0 +			p.top = self.height +			p.right = self.width +			p.bottom = self.height + p.height +			self.height = p.bottom +		self.window.change((0, 0), (self.width, self.height)) +		self.window.setdocsize(0, self.height) +	# +	def redraw(self, area): +		d = self.window.begindrawing() +		(left, top), (right, bottom) = area +		d.erase(area) +		d.cliprect(area) +		for p in self.paralist: +			if top < p.bottom and p.top < bottom: +				v = p.render(d, p.left, p.top, p.right) +		if self.selection: +			self.invert(d, self.selection) +		d.close() +	# +	def setselection(self, new): +		if new: +			long1, long2 = new +			pos1 = long1[:3] +			pos2 = long2[:3] +			new = pos1, pos2 +		if new <> self.selection: +			d = self.window.begindrawing() +			if self.selection: +				self.invert(d, self.selection) +			if new: +				self.invert(d, new) +			d.close() +			self.selection = new +	# +	def getselection(self): +		return self.selection +	# +	def extractselection(self): +		if self.selection: +			a, b = self.selection +			return self.extractpart(a, b) +		else: +			return None +	# +	def invert(self, d, region): +		long1, long2 = region +		if long1 > long2: long1, long2 = long2, long1 +		para1, pos1 = long1 +		para2, pos2 = long2 +		while para1 < para2: +			self.paralist[para1].invert(d, pos1, None) +			pos1 = None +			para1 = para1 + 1 +		self.paralist[para2].invert(d, pos1, pos2) +	# +	def search(self, prog): +		import regex, string +		if type(prog) == type(''): +			prog = regex.compile(string.lower(prog)) +		if self.selection: +			iold = self.selection[0][0] +		else: +			iold = -1 +		hit = None +		for i in range(len(self.paralist)): +			if i == iold or i < iold and hit: +				continue +			p = self.paralist[i] +			text = string.lower(p.extract()) +			if prog.search(text) >= 0: +				a, b = prog.regs[0] +				long1 = i, a +				long2 = i, b +				hit = long1, long2 +				if i > iold: +					break +		if hit: +			self.setselection(hit) +			i = hit[0][0] +			p = self.paralist[i] +			self.window.show((p.left, p.top), (p.right, p.bottom)) +			return 1 +		else: +			return 0 +	# +	def showanchor(self, id): +		for i in range(len(self.paralist)): +			p = self.paralist[i] +			if p.hasanchor(id): +				long1 = i, 0 +				long2 = i, len(p.extract()) +				hit = long1, long2 +				self.setselection(hit) +				self.window.show( \ +					(p.left, p.top), (p.right, p.bottom)) +				break + + +# GL extensions + +class GLFontCache: +	# +	def __init__(self): +		self.reset() +		self.setfont('') +	# +	def reset(self): +		self.fontkey = None +		self.fonthandle = None +		self.fontinfo = None +		self.fontcache = {} +	# +	def close(self): +		self.reset() +	# +	def setfont(self, fontkey): +		if fontkey == '': +			fontkey = 'Times-Roman 12' +		elif ' ' not in fontkey: +			fontkey = fontkey + ' 12' +		if fontkey == self.fontkey: +			return +		if self.fontcache.has_key(fontkey): +			handle = self.fontcache[fontkey] +		else: +			import string +			i = string.index(fontkey, ' ') +			name, sizestr = fontkey[:i], fontkey[i:] +			size = eval(sizestr) +			key1 = name + ' 1' +			key = name + ' ' + `size` +			# NB key may differ from fontkey! +			if self.fontcache.has_key(key): +				handle = self.fontcache[key] +			else: +				if self.fontcache.has_key(key1): +					handle = self.fontcache[key1] +				else: +					import fm +					handle = fm.findfont(name) +					self.fontcache[key1] = handle +				handle = handle.scalefont(size) +				self.fontcache[fontkey] = \ +					self.fontcache[key] = handle +		self.fontkey = fontkey +		if self.fonthandle <> handle: +			self.fonthandle = handle +			self.fontinfo = handle.getfontinfo() +			handle.setfont() + + +class GLMeasurer(GLFontCache): +	# +	def textwidth(self, text): +		return self.fonthandle.getstrwidth(text) +	# +	def baseline(self): +		return self.fontinfo[6] - self.fontinfo[3] +	# +	def lineheight(self): +		return self.fontinfo[6] + + +class GLWriter(GLFontCache): +	# +	# NOTES: +	# (1) Use gl.ortho2 to use X pixel coordinates! +	# +	def text(self, (h, v), text): +		import gl, fm +		gl.cmov2i(h, v + self.fontinfo[6] - self.fontinfo[3]) +		fm.prstr(text) +	# +	def setfont(self, fontkey): +		oldhandle = self.fonthandle +		GLFontCache.setfont(fontkey) +		if self.fonthandle <> oldhandle: +			handle.setfont() + + +class GLMeasurerWriter(GLMeasurer, GLWriter): +	pass + + +class GLBackEnd(SavingBackEnd): +	# +	def __init__(self, wid): +		import gl +		gl.winset(wid) +		self.wid = wid +		self.width = gl.getsize()[1] +		self.height = 0 +		self.d = GLMeasurerWriter() +		SavingBackEnd.__init__(self) +	# +	def finish(self): +		pass +	# +	def addpara(self, p): +		self.paralist.append(p) +		self.height = p.render(self.d, 0, self.height, self.width) +	# +	def redraw(self): +		import gl +		gl.winset(self.wid) +		width = gl.getsize()[1] +		if width <> self.width: +			setdocsize = 1 +			self.width = width +			for p in self.paralist: +				p.top = p.bottom = None +		d = self.d +		v = 0 +		for p in self.paralist: +			v = p.render(d, 0, v, width) diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py new file mode 100644 index 0000000000..af75e0df26 --- /dev/null +++ b/Lib/sgmllib.py @@ -0,0 +1,321 @@ +# A parser for SGML, using the derived class as static DTD. + +# XXX This only supports those SGML features used by HTML. + +# XXX There should be a way to distinguish between PCDATA (parsed +# character data -- the normal case), RCDATA (replaceable character +# data -- only char and entity references and end tags are special) +# and CDATA (character data -- only end tags are special). + + +import regex +import string + + +# Regular expressions used for parsing + +incomplete = regex.compile( \ +	  '<!-?\|</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*\|</?\|' + \ +	  '&#[a-zA-Z0-9]*\|&[a-zA-Z][a-zA-Z0-9]*\|&') +entityref = regex.compile('&[a-zA-Z][a-zA-Z0-9]*[;.]') +charref = regex.compile('&#[a-zA-Z0-9]+;') +starttagopen = regex.compile('<[a-zA-Z]') +endtag = regex.compile('</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*>') +commentopen = regex.compile('<!--') + + +# SGML parser base class -- find tags and call handler functions. +# Usage: p = SGMLParser(); p.feed(data); ...; p.close(). +# The dtd is defined by deriving a class which defines methods +# with special names to handle tags: start_foo and end_foo to handle +# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself. +# (Tags are converted to lower case for this purpose.)  The data +# between tags is passed to the parser by calling self.handle_data() +# with some data as argument (the data may be split up in arbutrary +# chunks).  Entity references are passed by calling +# self.handle_entityref() with the entity reference as argument. + +class SGMLParser: + +	# Interface -- initialize and reset this instance +	def __init__(self): +		self.reset() + +	# Interface -- reset this instance.  Loses all unprocessed data +	def reset(self): +		self.rawdata = '' +		self.stack = [] +		self.nomoretags = 0 +		self.literal = 0 + +	# For derived classes only -- enter literal mode (CDATA) till EOF +	def setnomoretags(self): +		self.nomoretags = self.literal = 1 + +	# For derived classes only -- enter literal mode (CDATA) +	def setliteral(self, *args): +		self.literal = 1 + +	# Interface -- feed some data to the parser.  Call this as +	# often as you want, with as little or as much text as you +	# want (may include '\n').  (This just saves the text, all the +	# processing is done by process() or close().) +	def feed(self, data): +		self.rawdata = self.rawdata + data +		self.goahead(0) + +	# Interface -- handle the remaining data +	def close(self): +		self.goahead(1) + +	# Internal -- handle data as far as reasonable.  May leave state +	# and data to be processed by a subsequent call.  If 'end' is +	# true, force handling all data as if followed by EOF marker. +	def goahead(self, end): +		rawdata = self.rawdata +		i = 0 +		n = len(rawdata) +		while i < n: +			if self.nomoretags: +				self.handle_data(rawdata[i:n]) +				i = n +				break +			j = incomplete.search(rawdata, i) +			if j < 0: j = n +			if i < j: self.handle_data(rawdata[i:j]) +			i = j +			if i == n: break +			if rawdata[i] == '<': +				if starttagopen.match(rawdata, i) >= 0: +					if self.literal: +						self.handle_data(rawdata[i]) +						i = i+1 +						continue +					k = self.parse_starttag(i) +					if k < 0: break +					i = i + k +					continue +				k = endtag.match(rawdata, i) +				if k >= 0: +					j = i+k +					self.parse_endtag(rawdata[i:j]) +					i = j +					self.literal = 0 +					continue +				if commentopen.match(rawdata, i) >= 0: +					if self.literal: +						self.handle_data(rawdata[i]) +						i = i+1 +						continue +					k = self.parse_comment(i) +					if k < 0: break +					i = i+k +					continue +			elif rawdata[i] == '&': +				k = charref.match(rawdata, i) +				if k >= 0: +					j = i+k +					self.handle_charref(rawdata[i+2:j-1]) +					i = j +					continue +				k = entityref.match(rawdata, i) +				if k >= 0: +					j = i+k +					self.handle_entityref(rawdata[i+1:j-1]) +					i = j +					continue +			else: +				raise RuntimeError, 'neither < nor & ??' +			# We get here only if incomplete matches but +			# nothing else +			k = incomplete.match(rawdata, i) +			if k < 0: raise RuntimeError, 'no incomplete match ??' +			j = i+k +			if j == n: break # Really incomplete +			self.handle_data(rawdata[i:j]) +			i = j +		# end while +		if end and i < n: +			self.handle_data(rawdata[i:n]) +			i = n +		self.rawdata = rawdata[i:] +		# XXX if end: check for empty stack + +	# Internal -- parse comment, return length or -1 if not ternimated +	def parse_comment(self, i): +		rawdata = self.rawdata +		if rawdata[i:i+4] <> '<!--': +			raise RuntimeError, 'unexpected call to handle_comment' +		try: +			j = string.index(rawdata, '--', i+4) +		except string.index_error: +			return -1 +		self.handle_comment(rawdata[i+4: j]) +		j = j+2 +		n = len(rawdata) +		while j < n and rawdata[j] in ' \t\n': j = j+1 +		if j == n: return -1 # Wait for final '>' +		if rawdata[j] == '>': +			j = j+1 +		else: +			print '*** comment not terminated with >' +			print repr(rawdata[j-5:j]), '*!*', repr(rawdata[j:j+5]) +		return j-i + +	# Internal -- handle starttag, return length or -1 if not terminated +	def parse_starttag(self, i): +		rawdata = self.rawdata +		try: +			j = string.index(rawdata, '>', i) +		except string.index_error: +			return -1 +		# Now parse the data between i+1 and j into a tag and attrs +		attrs = [] +		tagfind = regex.compile('[a-zA-Z][a-zA-Z0-9]*') +		attrfind = regex.compile( \ +		  '[ \t\n]+\([a-zA-Z][a-zA-Z0-9]*\)' + \ +		  '\([ \t\n]*=[ \t\n]*' + \ +		     '\(\'[^\']*\';\|"[^"]*"\|[-a-zA-Z0-9./:+*%?!()_#]+\)\)?') +		k = tagfind.match(rawdata, i+1) +		if k < 0: +			raise RuntimeError, 'unexpected call to parse_starttag' +		k = i+1+k +		tag = string.lower(rawdata[i+1:k]) +		while k < j: +			l = attrfind.match(rawdata, k) +			if l < 0: break +			regs = attrfind.regs +			a1, b1 = regs[1] +			a2, b2 = regs[2] +			a3, b3 = regs[3] +			attrname = rawdata[a1:b1] +			if '=' in rawdata[k:k+l]: +				attrvalue = rawdata[a3:b3] +				if attrvalue[:1] == '\'' == attrvalue[-1:] or \ +				   attrvalue[:1] == '"' == attrvalue[-1:]: +					attrvalue = attrvalue[1:-1] +			else: +				attrvalue = '' +			attrs.append(string.lower(attrname), attrvalue) +			k = k + l +		j = j+1 +		try: +			method = getattr(self, 'start_' + tag) +		except AttributeError: +			try: +				method = getattr(self, 'do_' + tag) +			except AttributeError: +				self.unknown_starttag(tag, attrs) +				return j-i +			method(attrs) +			return j-i +		self.stack.append(tag) +		method(attrs) +		return j-i + +	# Internal -- parse endtag +	def parse_endtag(self, data): +		if data[:2] <> '</' or data[-1:] <> '>': +			raise RuntimeError, 'unexpected call to parse_endtag' +		tag = string.lower(string.strip(data[2:-1])) +		try: +			method = getattr(self, 'end_' + tag) +		except AttributeError: +			self.unknown_endtag(tag) +			return +		if self.stack and self.stack[-1] == tag: +			del self.stack[-1] +		else: +			print '*** Unbalanced </' + tag + '>' +			print '*** Stack:', self.stack +			found = None +			for i in range(len(self.stack)): +				if self.stack[i] == tag: found = i +			if found <> None: +				del self.stack[found:] +		method() + +	# Example -- handle character reference, no need to override +	def handle_charref(self, name): +		try: +			n = string.atoi(name) +		except string.atoi_error: +			self.unknown_charref(name) +			return +		if not 0 <= n <= 255: +			self.unknown_charref(name) +			return +		self.handle_data(chr(n)) + +	# Definition of entities -- derived classes may override +	entitydefs = \ +		{'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''} + +	# Example -- handle entity reference, no need to override +	def handle_entityref(self, name): +		table = self.__class__.entitydefs +		name = string.lower(name) +		if table.has_key(name): +			self.handle_data(table[name]) +		else: +			self.unknown_entityref(name) +			return + +	# Example -- handle data, should be overridden +	def handle_data(self, data): +		pass + +	# Example -- handle comment, could be overridden +	def handle_comment(self, data): +		pass + +	# To be overridden -- handlers for unknown objects +	def unknown_starttag(self, tag, attrs): pass +	def unknown_endtag(self, tag): pass +	def unknown_charref(self, ref): pass +	def unknown_entityref(self, ref): pass + + +class TestSGML(SGMLParser): + +	def handle_data(self, data): +		r = repr(data) +		if len(r) > 72: +			r = r[:35] + '...' + r[-35:] +		print 'data:', r + +	def handle_comment(self, data): +		r = repr(data) +		if len(r) > 68: +			r = r[:32] + '...' + r[-32:] +		print 'comment:', r + +	def unknown_starttag(self, tag, attrs): +		print 'start tag: <' + tag, +		for name, value in attrs: +			print name + '=' + '"' + value + '"', +		print '>' + +	def unknown_endtag(self, tag): +		print 'end tag: </' + tag + '>' + +	def unknown_entityref(self, ref): +		print '*** unknown entity ref: &' + ref + ';' + +	def unknown_charref(self, ref): +		print '*** unknown char ref: &#' + ref + ';' + + +def test(): +	file = 'test.html' +	f = open(file, 'r') +	x = TestSGML() +	while 1: +		line = f.readline() +		if not line: +			x.close() +			break +		x.feed(line) + + +#test() | 
