From 0b19bb71ba5a4afa84e673a8239935426fa0db23 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Tue, 9 Aug 2016 21:50:19 +0000 Subject: Remove incorrect tag directory git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/tags/pyparsing_2.1.6@405 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b --- trunk/src/examples/partial_gene_match.py | 88 -------------------------------- 1 file changed, 88 deletions(-) delete mode 100644 trunk/src/examples/partial_gene_match.py (limited to 'trunk/src/examples/partial_gene_match.py') diff --git a/trunk/src/examples/partial_gene_match.py b/trunk/src/examples/partial_gene_match.py deleted file mode 100644 index 8bf5f7c..0000000 --- a/trunk/src/examples/partial_gene_match.py +++ /dev/null @@ -1,88 +0,0 @@ -# parital_gene_match.py -# -# Example showing how to create a customized pyparsing Token, in this case, -# one that is very much like Literal, but which tolerates up to 'n' character -# mismatches -from pyparsing import * - -import urllib.request, urllib.parse, urllib.error - -# read in a bunch of genomic data -datafile = urllib.request.urlopen("http://toxodb.org/common/downloads/release-6.0/Tgondii/TgondiiApicoplastORFsNAs_ToxoDB-6.0.fasta") -fastasrc = datafile.read() -datafile.close() - -""" -Sample header: ->NC_001799-6-2978-2778 | organism=Toxoplasma_gondii_RH | location=NC_001799:2778-2978(-) | length=201 -""" -integer = Word(nums).setParseAction(lambda t:int(t[0])) -genebit = Group(">" + Word(alphanums.upper()+"-_") + "|" + - Word(printables)("id") + SkipTo("length=", include=True) + - integer("genelen") + LineEnd() + - Combine(OneOrMore(Word("ACGTN")),adjacent=False)("gene")) - -# read gene data from .fasta file - takes just a few seconds -genedata = OneOrMore(genebit).parseString(fastasrc) - - -class CloseMatch(Token): - """A special subclass of Token that does *close* matches. For each - close match of the given string, a tuple is returned giving the - found close match, and a list of mismatch positions.""" - def __init__(self, seq, maxMismatches=1): - super(CloseMatch,self).__init__() - self.name = seq - self.sequence = seq - self.maxMismatches = maxMismatches - self.errmsg = "Expected " + self.sequence - self.mayIndexError = False - self.mayReturnEmpty = False - - def parseImpl( self, instring, loc, doActions=True ): - start = loc - instrlen = len(instring) - maxloc = start + len(self.sequence) - - if maxloc <= instrlen: - seq = self.sequence - seqloc = 0 - mismatches = [] - throwException = False - done = False - while loc < maxloc and not done: - if instring[loc] != seq[seqloc]: - mismatches.append(seqloc) - if len(mismatches) > self.maxMismatches: - throwException = True - done = True - loc += 1 - seqloc += 1 - else: - throwException = True - - if throwException: - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - return loc, (instring[start:loc],mismatches) - -# using the genedata extracted above, look for close matches of a gene sequence -searchseq = CloseMatch("TTAAATCTAGAAGAT", 3) -for g in genedata: - print("%s (%d)" % (g.id, g.genelen)) - print("-"*24) - for t,startLoc,endLoc in searchseq.scanString(g.gene, overlap=True): - matched, mismatches = t[0] - print("MATCH:", searchseq.sequence) - print("FOUND:", matched) - if mismatches: - print(" ", ''.join(' ' if i not in mismatches else '*' - for i,c in enumerate(searchseq.sequence))) - else: - print("") - print("at location", startLoc) - print() - print() \ No newline at end of file -- cgit v1.2.1