diff options
Diffstat (limited to 'Demo/scripts/markov.py')
| -rwxr-xr-x | Demo/scripts/markov.py | 116 | 
1 files changed, 116 insertions, 0 deletions
| diff --git a/Demo/scripts/markov.py b/Demo/scripts/markov.py new file mode 100755 index 0000000000..6a6d2b2c48 --- /dev/null +++ b/Demo/scripts/markov.py @@ -0,0 +1,116 @@ +#! /usr/local/bin/python + +class Markov: +	def __init__(self, histsize, choice): +		self.histsize = histsize +		self.choice = choice +		self.trans = {} +	def add(self, state, next): +		if not self.trans.has_key(state): +			self.trans[state] = [next] +		else: +			self.trans[state].append(next) +	def put(self, seq): +		n = self.histsize +		add = self.add +		add(None, seq[:0]) +		for i in range(len(seq)): +			add(seq[max(0, i-n):i], seq[i:i+1]) +		add(seq[len(seq)-n:], None) +	def get(self): +		choice = self.choice +		trans = self.trans +		n = self.histsize +		seq = choice(trans[None]) +		while 1: +			subseq = seq[max(0, len(seq)-n):] +			options = trans[subseq] +			next = choice(options) +			if not next: break +			seq = seq + next +		return seq + +def test(): +	import sys, string, whrandom, getopt +	args = sys.argv[1:] +	try: +		opts, args = getopt.getopt(args, '0123456789cdw') +	except getopt.error: +		print 'Usage: markov [-#] [-cddqw] [file] ...' +		print 'Options:' +		print '-#: 1-digit history size (default 2)' +		print '-c: characters (default)' +		print '-w: words' +		print '-d: more debugging output' +		print '-q: no debugging output' +		print 'Input files (default stdin) are split in paragraphs' +		print 'separated blank lines and each paragraph is split' +		print 'in words by whitespace, then reconcatenated with' +		print 'exactly one space separating words.' +		print 'Output consists of paragraphs separated by blank' +		print 'lines, where lines are no longer than 72 characters.' +	histsize = 2 +	do_words = 0 +	debug = 1 +	for o, a in opts: +		if '-0' <= o <= '-9': histsize = eval(o[1:]) +		if o == '-c': do_words = 0 +		if o == '-d': debug = debug + 1 +		if o == '-q': debug = 0 +		if o == '-w': do_words = 1 +	if not args: args = ['-'] +	m = Markov(histsize, whrandom.choice) +	try: +	    for filename in args: +		    if filename == '-': +			    f = sys.stdin +			    if f.isatty(): +				    print 'Sorry, need stdin from file' +				    continue +		    else: +			    f = open(filename, 'r') +		    if debug: print 'processing', filename, '...' +		    text = f.read() +		    f.close() +		    paralist = string.splitfields(text, '\n\n') +		    for para in paralist: +			    if debug > 1: print 'feeding ...' +			    words = string.split(para) +			    if words: +				    if do_words: data = tuple(words) +				    else: data = string.joinfields(words, ' ') +				    m.put(data) +	except KeyboardInterrupt: +		print 'Interrupted -- continue with data read so far' +	if not m.trans: +		print 'No valid input files' +		return +	if debug: print 'done.' +	if debug > 1: +		for key in m.trans.keys(): +			if key is None or len(key) < histsize: +				print `key`, m.trans[key] +		if histsize == 0: print `''`, m.trans[''] +		print +	while 1: +		data = m.get() +		if do_words: words = data +		else: words = string.split(data) +		n = 0 +		limit = 72 +		for w in words: +			if n + len(w) > limit: +				print +				n = 0 +			print w, +			n = n + len(w) + 1 +		print +		print + +def tuple(list): +	if len(list) == 0: return () +	if len(list) == 1: return (list[0],) +	i = len(list)/2 +	return tuple(list[:i]) + tuple(list[i:]) + +test() | 
