1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 from __future__ import generators
28
29 __doc__ = \
30 """
31 pyparsing module - Classes and methods to define and execute parsing grammars
32
33 The pyparsing module is an alternative approach to creating and executing simple grammars,
34 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
35 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
36 provides a library of classes that you use to construct the grammar directly in Python.
37
38 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
39
40 from pyparsing import Word, alphas
41
42 # define grammar of a greeting
43 greet = Word( alphas ) + "," + Word( alphas ) + "!"
44
45 hello = "Hello, World!"
46 print hello, "->", greet.parseString( hello )
47
48 The program outputs the following::
49
50 Hello, World! -> ['Hello', ',', 'World', '!']
51
52 The Python representation of the grammar is quite readable, owing to the self-explanatory
53 class names, and the use of '+', '|' and '^' operators.
54
55 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
56 object with named attributes.
57
58 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
59 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
60 - quoted strings
61 - embedded comments
62 """
63 __version__ = "1.3.3"
64 __versionTime__ = "12 September 2005 22:50"
65 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
66
67 import string
68 import copy,sys
69 import warnings
70
71
73 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
74 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
75 then < returns the unicode object | encodes it with the default encoding | ... >.
76 """
77 try:
78
79
80 return str(obj)
81
82 except UnicodeEncodeError, e:
83
84
85
86
87
88 return unicode(obj)
89
90
91
92
93
94
95
97 return dict( [(c,0) for c in strg] )
98
99 alphas = string.lowercase + string.uppercase
100 nums = string.digits
101 hexnums = nums + "ABCDEFabcdef"
102 alphanums = alphas + nums
103
105 """base exception class for all parsing runtime exceptions"""
106 __slots__ = ( "loc","msg","pstr","parserElement" )
107
108
109 - def __init__( self, pstr, loc, msg, elem=None ):
114
116 """supported attributes by name are:
117 - lineno - returns the line number of the exception text
118 - col - returns the column number of the exception text
119 - line - returns the line containing the exception text
120 """
121 if( aname == "lineno" ):
122 return lineno( self.loc, self.pstr )
123 elif( aname in ("col", "column") ):
124 return col( self.loc, self.pstr )
125 elif( aname == "line" ):
126 return line( self.loc, self.pstr )
127 else:
128 raise AttributeError, aname
129
131 return "%s (at char %d), (line:%d, col:%d)" % ( self.msg, self.loc, self.lineno, self.column )
143
145 """exception thrown when parse expressions don't match class"""
146 """supported attributes by name are:
147 - lineno - returns the line number of the exception text
148 - col - returns the column number of the exception text
149 - line - returns the line containing the exception text
150 """
151 pass
152
154 """user-throwable exception thrown when inconsistent parse content
155 is found; stops all parsing immediately"""
156 pass
157
159 """exception thrown by validate() if the grammar could be improperly recursive"""
160 - def __init__( self, parseElementList ):
161 self.parseElementTrace = parseElementList
162
164 return "RecursiveGrammarException: %s" % self.parseElementTrace
165
167 """Structured parse results, to provide multiple means of access to the parsed data:
168 - as a list (len(results))
169 - by list index (results[0], results[1], etc.)
170 - by attribute (results.<resultsName>)
171 """
172 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__modal" )
173 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
174 if isinstance(toklist, cls):
175 return toklist
176 retobj = object.__new__(cls)
177 retobj.__doinit = True
178 return retobj
179
180
181
182 - def __init__( self, toklist, name=None, asList=True, modal=True ):
214
223
233
236
239
243 """Returns all named result keys."""
244 return self.__tokdict.keys()
245
247 """Returns all named result keys and values as a list of tuples."""
248 return [(k,v[-1][0]) for k,v in self.__tokdict.items()]
249
251 """Returns all named result values."""
252 return [ v[-1][0] for v in self.__tokdict.values() ]
253
264
266 if other.__tokdict:
267 offset = len(self.__toklist)
268 addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
269 otherdictitems = [(k,(v[0],addoffset(v[1])) ) for (k,vlist) in other.__tokdict.items() for v in vlist]
270 for k,v in otherdictitems:
271 self[k] = v
272 if isinstance(v[0],ParseResults):
273 v[0].__parent = self
274 self.__toklist += other.__toklist
275 del other
276 return self
277
280
282 out = "["
283 sep = ""
284 for i in self.__toklist:
285 if isinstance(i, ParseResults):
286 out += sep + _ustr(i)
287 else:
288 out += sep + repr(i)
289 sep = ", "
290 out += "]"
291 return out
292
303
305 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
306 out = []
307 for res in self.__toklist:
308 if isinstance(res,ParseResults):
309 out.append( res.asList() )
310 else:
311 out.append( res )
312 return out
313
315 """Returns the named parse results as dictionary."""
316 return dict( self.items() )
317
326
327 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
328 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
329 nl = "\n"
330 out = []
331 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() for v in vlist ] )
332 nextLevelIndent = indent + " "
333
334
335 if not formatted:
336 indent = ""
337 nextLevelIndent = ""
338 nl = ""
339
340 selfTag = None
341 if doctag is not None:
342 selfTag = doctag
343 else:
344 if self.__name:
345 selfTag = self.__name
346
347 if not selfTag:
348 if namedItemsOnly:
349 return ""
350 else:
351 selfTag = "ITEM"
352
353 out += [ nl, indent, "<", selfTag, ">" ]
354
355 worklist = self.__toklist
356 for i,res in enumerate(worklist):
357 if isinstance(res,ParseResults):
358 if i in namedItems:
359 out += [ res.asXML(namedItems[i], namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
360 else:
361 out += [ res.asXML(None, namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
362 else:
363
364 resTag = None
365 if i in namedItems:
366 resTag = namedItems[i]
367 if not resTag:
368 if namedItemsOnly:
369 continue
370 else:
371 resTag = "ITEM"
372 out += [ nl, nextLevelIndent, "<", resTag, ">", _ustr(res), "</", resTag, ">" ]
373
374 out += [ nl, indent, "</", selfTag, ">" ]
375 return "".join(out)
376
377
379 for k,vlist in self.__tokdict.items():
380 for v,loc in vlist:
381 if sub is v:
382 return k
383 return None
384
386 """Returns the results name for this token expression."""
387 if self.__name:
388 return self.__name
389 elif self.__parent:
390 par = self.__parent
391 if par:
392 return par.__lookup(self)
393 else:
394 return None
395 elif (len(self) == 1 and
396 len(self.__tokdict) == 1 and
397 self.__tokdict.values()[0][0][1] in (0,-1)):
398 return self.__tokdict.keys()[0]
399 else:
400 return None
401
403 """Returns current column within a string, counting newlines as line separators
404 The first column is number 1.
405 """
406 return loc - strg.rfind("\n", 0, loc)
407
409 """Returns current line number within a string, counting newlines as line separators
410 The first line is number 1.
411 """
412 return strg.count("\n",0,loc) + 1
413
414 -def line( loc, strg ):
415 """Returns the line of text containing loc within a string, counting newlines as line separators
416 The first line is number 1.
417 """
418 lastCR = strg.rfind("\n", 0, loc)
419 nextCR = strg.find("\n", loc)
420 if nextCR > 0:
421 return strg[lastCR+1:nextCR]
422 else:
423 return strg[lastCR+1:]
424
427
430
432 print "Exception raised:", exc
433
435 """'Do-nothing' debug action, to suppress debugging output during parsing."""
436 pass
437
439 """Abstract base level parser element class."""
440 DEFAULT_WHITE_CHARS = " \n\t\r"
441
446 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
447
449 self.parseAction = None
450
451 self.strRepr = None
452 self.resultsName = None
453 self.saveAsList = savelist
454 self.skipWhitespace = True
455 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
456 self.mayReturnEmpty = False
457 self.keepTabs = False
458 self.ignoreExprs = []
459 self.debug = False
460 self.streamlined = False
461 self.mayIndexError = True
462 self.errmsg = ""
463 self.modalResults = True
464 self.debugActions = ( None, None, None )
465
467 """Make a copy of this ParseElement. Useful for defining different parse actions
468 for the same parsing pattern, using copies of the original parse element."""
469 cpy = copy.copy( self )
470 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
471 return cpy
472
474 """Define name for this expression, for use in debugging."""
475 self.name = name
476 self.errmsg = "Expected " + self.name
477 return self
478
480 """Define name for referencing matching tokens as a nested attribute
481 of the returned parse results.
482 NOTE: this returns a *copy* of the original ParseElement object;
483 this is so that the client can define a basic element, such as an
484 integer, and reference it in multiple places with different names.
485 """
486 newself = self.copy()
487 newself.resultsName = name
488 newself.modalResults = not listAllMatches
489 return newself
490
492 """Define action to perform when successfully matching parse element definition.
493 Parse action fn is a callable method with the arguments (s, loc, toks) where:
494 - s = the original string being parsed
495 - loc = the location of the matching substring
496 - toks = a list of the matched tokens, packaged as a ParseResults object
497 If the function fn modifies the tokens, it can return them as the return
498 value from fn, and the modified list of tokens will replace the original.
499 Otherwise, fn does not need to return any value.
500 """
501 self.parseAction = fn
502 return self
503
505 exprsFound = True
506 while exprsFound:
507 exprsFound = False
508 for e in self.ignoreExprs:
509 try:
510 while 1:
511 loc,dummy = e.parse( instring, loc )
512 exprsFound = True
513 except ParseException:
514 pass
515 return loc
516
518 if self.ignoreExprs:
519 loc = self.skipIgnorables( instring, loc )
520
521 if self.skipWhitespace:
522 wt = self.whiteChars
523 instrlen = len(instring)
524 while loc < instrlen and instring[loc] in wt:
525 loc += 1
526
527 return loc
528
529 - def parseImpl( self, instring, loc, doActions=True ):
531
532 - def postParse( self, instring, loc, tokenlist ):
534
535
536 - def parse( self, instring, loc, doActions=True, callPreParse=True ):
537 debugging = ( self.debug )
538
539 if debugging:
540
541 if (self.debugActions[0] ):
542 self.debugActions[0]( instring, loc, self )
543 if callPreParse:
544 loc = self.preParse( instring, loc )
545 tokensStart = loc
546 try:
547 try:
548 loc,tokens = self.parseImpl( instring, loc, doActions )
549 except IndexError:
550 raise ParseException, ( instring, len(instring), self.errmsg, self )
551 except ParseException, err:
552
553 if (self.debugActions[2] ):
554 self.debugActions[2]( instring, tokensStart, self, err )
555 raise
556 else:
557 if callPreParse:
558 loc = self.preParse( instring, loc )
559 tokensStart = loc
560 if self.mayIndexError or loc >= len(instring):
561 try:
562 loc,tokens = self.parseImpl( instring, loc, doActions )
563 except IndexError:
564 raise ParseException, ( instring, len(instring), self.errmsg, self )
565 else:
566 loc,tokens = self.parseImpl( instring, loc, doActions )
567
568 tokens = self.postParse( instring, loc, tokens )
569
570 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
571 if self.parseAction and doActions:
572 if debugging:
573 try:
574 tokens = self.parseAction( instring, tokensStart, retTokens )
575 if tokens is not None:
576 if isinstance(tokens,tuple):
577 tokens = tokens[1]
578 retTokens = ParseResults( tokens,
579 self.resultsName,
580 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
581 modal=self.modalResults )
582 except ParseException, err:
583
584 if (self.debugActions[2] ):
585 self.debugActions[2]( instring, tokensStart, self, err )
586 raise
587 else:
588 tokens = self.parseAction( instring, tokensStart, retTokens )
589 if tokens is not None:
590 if isinstance(tokens,tuple):
591 tokens = tokens[1]
592 retTokens = ParseResults( tokens,
593 self.resultsName,
594 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
595 modal=self.modalResults )
596
597 if debugging:
598
599 if (self.debugActions[1] ):
600 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
601
602 return loc, retTokens
603
605 return self.parse( instring, loc, doActions=False )[0]
606
608 """Execute the parse expression with the given string.
609 This is the main interface to the client code, once the complete
610 expression has been built.
611 """
612 if not self.streamlined:
613 self.streamline()
614 self.saveAsList = True
615 for e in self.ignoreExprs:
616 e.streamline()
617 if self.keepTabs:
618 loc, tokens = self.parse( instring, 0 )
619 else:
620 loc, tokens = self.parse( instring.expandtabs(), 0 )
621 return tokens
622
624 """Scan the input string for expression matches. Each match will return the matching tokens, start location, and end location."""
625 if not self.streamlined:
626 self.streamline()
627 for e in self.ignoreExprs:
628 e.streamline()
629
630 if not self.keepTabs:
631 instring = instring.expandtabs()
632 instrlen = len(instring)
633 loc = 0
634 preparseFn = self.preParse
635 parseFn = self.parse
636 while loc < instrlen:
637 try:
638 loc = preparseFn( instring, loc )
639 nextLoc,tokens = parseFn( instring, loc, callPreParse=False )
640 except ParseException:
641 loc += 1
642 else:
643 yield tokens, loc, nextLoc
644 loc = nextLoc
645
670
672 """Implementation of + operator - returns And"""
673 if isinstance( other, basestring ):
674 other = Literal( other )
675 return And( [ self, other ] )
676
678 """Implementation of += operator"""
679 if isinstance( other, basestring ):
680 other = Literal( other )
681 return other + self
682
684 """Implementation of | operator - returns MatchFirst"""
685 if isinstance( other, basestring ):
686 other = Literal( other )
687 return MatchFirst( [ self, other ] )
688
690 """Implementation of |= operator"""
691 if isinstance( other, basestring ):
692 other = Literal( other )
693 return other | self
694
696 """Implementation of ^ operator - returns Or"""
697 if isinstance( other, basestring ):
698 other = Literal( other )
699 return Or( [ self, other ] )
700
702 """Implementation of ^= operator"""
703 if isinstance( other, basestring ):
704 other = Literal( other )
705 return other ^ self
706
708 """Implementation of & operator - returns Each"""
709 if isinstance( other, basestring ):
710 other = Literal( other )
711 return Each( [ self, other ] )
712
714 """Implementation of right-& operator"""
715 if isinstance( other, basestring ):
716 other = Literal( other )
717 return other & self
718
720 """Implementation of ~ operator - returns NotAny"""
721 return NotAny( self )
722
724 """Suppresses the output of this ParseElement; useful to keep punctuation from
725 cluttering up returned output.
726 """
727 return Suppress( self )
728
730 """Disables the skipping of whitespace before matching the characters in the
731 ParserElement's defined pattern. This is normally only used internally by
732 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
733 """
734 self.skipWhitespace = False
735 return self
736
738 """Overrides the default whitespace chars
739 """
740 self.skipWhitespace = True
741 self.whiteChars = chars
742
744 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
745 Must be called before parseString when the input grammar contains elements that
746 match <TAB> characters."""
747 self.keepTabs = True
748 return self
749
751 """Define expression to be ignored (e.g., comments) while doing pattern
752 matching; may be called repeatedly, to define multiple comment or other
753 ignorable patterns.
754 """
755 if isinstance( other, Suppress ):
756 if other not in self.ignoreExprs:
757 self.ignoreExprs.append( other )
758 else:
759 self.ignoreExprs.append( Suppress( other ) )
760 return self
761
769
777
780
783
785 self.streamlined = True
786 self.strRepr = None
787 return self
788
791
792 - def validate( self, validateTrace=[] ):
793 """Check defined expressions for valid structure, check for infinite recursive definitions."""
794 self.checkRecursion( [] )
795
797 """Execute the parse expression on the given file or filename.
798 If a filename is specified (instead of a file object),
799 the entire file is opened, read, and closed before parsing.
800 """
801 try:
802 file_contents = file_or_filename.read()
803 except AttributeError:
804 f = open(file_or_filename, "rb")
805 file_contents = f.read()
806 f.close()
807 return self.parseString(file_contents)
808
809
810 -class Token(ParserElement):
811 """Abstract ParserElement subclass, for defining atomic matching patterns."""
815
817 s = super(Token,self).setName(name)
818 self.errmsg = "Expected " + self.name
819 s.myException.msg = self.errmsg
820 return s
821
822
824 """An empty token, will always match."""
826 super(Empty,self).__init__()
827 self.name = "Empty"
828 self.mayReturnEmpty = True
829 self.mayIndexError = False
830
831
833 """A token that will never match."""
835 super(NoMatch,self).__init__()
836 self.name = "NoMatch"
837 self.mayReturnEmpty = True
838 self.mayIndexError = False
839 self.errmsg = "Unmatchable token"
840 s.myException.msg = self.errmsg
841
842 - def parseImpl( self, instring, loc, doActions=True ):
843 exc = self.myException
844 exc.loc = loc
845 exc.pstr = instring
846 raise exc
847
848
850 """Token to exactly match a specified string."""
852 super(Literal,self).__init__()
853 self.match = matchString
854 self.matchLen = len(matchString)
855 try:
856 self.firstMatchChar = matchString[0]
857 except IndexError:
858 warnings.warn("null string passed to Literal; use Empty() instead",
859 SyntaxWarning, stacklevel=2)
860 self.name = '"%s"' % self.match
861 self.errmsg = "Expected " + self.name
862 self.mayReturnEmpty = False
863 self.myException.msg = self.errmsg
864 self.mayIndexError = False
865
866
867
868
869
870 - def parseImpl( self, instring, loc, doActions=True ):
871 if (instring[loc] == self.firstMatchChar and
872 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
873 return loc+self.matchLen, self.match
874
875 exc = self.myException
876 exc.loc = loc
877 exc.pstr = instring
878 raise exc
879
881 """Token to exactly match a specified string as a keyword, that is, it must be
882 immediately followed by a non-keyword character. Compare with Literal::
883 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
884 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
885 Accepts two optional constructor arguments in addition to the keyword string:
886 identChars is a string of characters that would be valid identifier characters,
887 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
888 matching, default is False.
889 """
890 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
891
893 super(Keyword,self).__init__()
894 self.match = matchString
895 self.matchLen = len(matchString)
896 try:
897 self.firstMatchChar = matchString[0]
898 except IndexError:
899 warnings.warn("null string passed to Keyword; use Empty() instead",
900 SyntaxWarning, stacklevel=2)
901 self.name = '"%s"' % self.match
902 self.errmsg = "Expected " + self.name
903 self.mayReturnEmpty = False
904 self.myException.msg = self.errmsg
905 self.mayIndexError = False
906 self.caseless = caseless
907 if caseless:
908 self.caselessmatch = matchString.upper()
909 identChars = identChars.upper()
910 self.identChars = _str2dict(identChars)
911
912 - def parseImpl( self, instring, loc, doActions=True ):
913 if self.caseless:
914 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
915 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
916 return loc+self.matchLen, self.match
917 else:
918 if (instring[loc] == self.firstMatchChar and
919 (self.matchLen==1 or instring.startswith(self.match,loc)) and
920 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) ):
921 return loc+self.matchLen, self.match
922
923 exc = self.myException
924 exc.loc = loc
925 exc.pstr = instring
926 raise exc
927
932
937 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
938
939
941 """Token to match a specified string, ignoring case of letters.
942 Note: the matched results will always be in the case of the given
943 match string, NOT the case of the input text.
944 """
946 super(CaselessLiteral,self).__init__( matchString.upper() )
947
948 self.returnString = matchString
949 self.name = "'%s'" % self.returnString
950 self.errmsg = "Expected " + self.name
951 self.myException.msg = self.errmsg
952
953 - def parseImpl( self, instring, loc, doActions=True ):
954 if instring[ loc:loc+self.matchLen ].upper() == self.match:
955 return loc+self.matchLen, self.returnString
956
957 exc = self.myException
958 exc.loc = loc
959 exc.pstr = instring
960 raise exc
961
962
964 """Token for matching words composed of allowed character sets.
965 Defined with string containing all allowed initial characters,
966 an optional string containing allowed body characters (if omitted,
967 defaults to the initial character set), and an optional minimum,
968 maximum, and/or exact length.
969 """
970 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0 ):
971 super(Word,self).__init__()
972 self.initCharsOrig = initChars
973 self.initChars = _str2dict(initChars)
974 if bodyChars :
975 self.bodyCharsOrig = bodyChars
976 self.bodyChars = _str2dict(bodyChars)
977 else:
978 self.bodyCharsOrig = initChars
979 self.bodyChars = _str2dict(initChars)
980
981 self.maxSpecified = max > 0
982
983 self.minLen = min
984
985 if max > 0:
986 self.maxLen = max
987 else:
988 self.maxLen = sys.maxint
989
990 if exact > 0:
991 self.maxLen = exact
992 self.minLen = exact
993
994 self.name = _ustr(self)
995 self.errmsg = "Expected " + self.name
996 self.myException.msg = self.errmsg
997 self.mayIndexError = False
998
999 - def parseImpl( self, instring, loc, doActions=True ):
1000 if not(instring[ loc ] in self.initChars):
1001
1002 exc = self.myException
1003 exc.loc = loc
1004 exc.pstr = instring
1005 raise exc
1006 start = loc
1007 loc += 1
1008 bodychars = self.bodyChars
1009 maxloc = start + self.maxLen
1010 maxloc = min( maxloc, len(instring) )
1011 while loc < maxloc and instring[loc] in bodychars:
1012 loc += 1
1013
1014 throwException = False
1015 if loc - start < self.minLen:
1016 throwException = True
1017 if self.maxSpecified and loc < len(instring) and instring[loc] in bodychars:
1018 throwException = True
1019
1020 if throwException:
1021
1022 exc = self.myException
1023 exc.loc = loc
1024 exc.pstr = instring
1025 raise exc
1026
1027 return loc, instring[start:loc]
1028
1030 try:
1031 return super(Word,self).__str__()
1032 except:
1033 pass
1034
1035
1036 if self.strRepr is None:
1037
1038 def charsAsStr(s):
1039 if len(s)>4:
1040 return s[:4]+"..."
1041 else:
1042 return s
1043
1044 if ( self.initCharsOrig != self.bodyCharsOrig ):
1045 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1046 else:
1047 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1048
1049 return self.strRepr
1050
1051
1053 """Token for matching words composed of characters *not* in a given set.
1054 Defined with string containing all disallowed characters, and an optional
1055 minimum, maximum, and/or exact length.
1056 """
1057 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1058 super(CharsNotIn,self).__init__()
1059 self.skipWhitespace = False
1060 self.notChars = notChars
1061
1062 self.minLen = min
1063
1064 if max > 0:
1065 self.maxLen = max
1066 else:
1067 self.maxLen = sys.maxint
1068
1069 if exact > 0:
1070 self.maxLen = exact
1071 self.minLen = exact
1072
1073 self.name = _ustr(self)
1074 self.errmsg = "Expected " + self.name
1075 self.mayReturnEmpty = ( self.minLen == 0 )
1076 self.myException.msg = self.errmsg
1077 self.mayIndexError = False
1078
1079 - def parseImpl( self, instring, loc, doActions=True ):
1080 if instring[loc] in self.notChars:
1081
1082 exc = self.myException
1083 exc.loc = loc
1084 exc.pstr = instring
1085 raise exc
1086
1087 start = loc
1088 loc += 1
1089 notchars = self.notChars
1090 maxlen = min( start+self.maxLen, len(instring) )
1091 while loc < maxlen and \
1092 (instring[loc] not in notchars):
1093 loc += 1
1094
1095 if loc - start < self.minLen:
1096
1097 exc = self.myException
1098 exc.loc = loc
1099 exc.pstr = instring
1100 raise exc
1101
1102 return loc, instring[start:loc]
1103
1105 try:
1106 return super(CharsNotIn, self).__str__()
1107 except:
1108 pass
1109
1110 if self.strRepr is None:
1111 if len(self.notChars) > 4:
1112 self.strRepr = "!W:(%s...)" % self.notChars[:4]
1113 else:
1114 self.strRepr = "!W:(%s)" % self.notChars
1115
1116 return self.strRepr
1117
1119 """Special matching class for matching whitespace. Normally, whitespace is ignored
1120 by pyparsing grammars. This class is included when some whitespace structures
1121 are significant. Define with a string containing the whitespace characters to be
1122 matched; default is " \\t\\n". Also takes optional min, max, and exact arguments,
1123 as defined for the Word class."""
1124 whiteStrs = {
1125 " " : "<SPC>",
1126 "\t": "<TAB>",
1127 "\n": "<LF>",
1128 "\r": "<CR>",
1129 "\f": "<FF>",
1130 }
1131 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
1132 super(White,self).__init__()
1133 self.matchWhite = ws
1134 self.whiteChars = "".join([c for c in self.whiteChars if c not in self.matchWhite])
1135
1136 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
1137 self.mayReturnEmpty = True
1138 self.errmsg = "Expected " + self.name
1139 self.myException.msg = self.errmsg
1140
1141 self.minLen = min
1142
1143 if max > 0:
1144 self.maxLen = max
1145 else:
1146 self.maxLen = sys.maxint
1147
1148 if exact > 0:
1149 self.maxLen = exact
1150 self.minLen = exact
1151
1152 - def parseImpl( self, instring, loc, doActions=True ):
1153 if not(instring[ loc ] in self.matchWhite):
1154
1155 exc = self.myException
1156 exc.loc = loc
1157 exc.pstr = instring
1158 raise exc
1159 start = loc
1160 loc += 1
1161 maxloc = start + self.maxLen
1162 maxloc = min( maxloc, len(instring) )
1163 while loc < maxloc and instring[loc] in self.matchWhite:
1164 loc += 1
1165
1166 if loc - start < self.minLen:
1167
1168 exc = self.myException
1169 exc.loc = loc
1170 exc.pstr = instring
1171 raise exc
1172
1173 return loc, instring[start:loc]
1174
1175
1181
1183 """Token to advance to a specific column of input text; useful for tabular report scraping."""
1187
1189 if col(loc,instring) != self.col:
1190 instrlen = len(instring)
1191 if self.ignoreExprs:
1192 loc = self.skipIgnorables( instring, loc )
1193 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
1194 loc += 1
1195 return loc
1196
1197 - def parseImpl( self, instring, loc, doActions=True ):
1198 thiscol = col( loc, instring )
1199 if thiscol > self.col:
1200 raise ParseException, ( instring, loc, "Text not in expected column", self )
1201 newloc = loc + self.col - thiscol
1202 ret = instring[ loc: newloc ]
1203 return newloc, ret
1204
1206 """Matches if current position is at the beginning of a line within the parse string"""
1208 super(LineStart,self).__init__()
1209 self.whiteChars = " \t"
1210 self.errmsg = "Expected start of line"
1211 self.myException.msg = self.errmsg
1212
1218
1219 - def parseImpl( self, instring, loc, doActions=True ):
1220 if not( loc==0 or ( loc<len(instring) and instring[loc-1] == "\n" ) ):
1221
1222 exc = self.myException
1223 exc.loc = loc
1224 exc.pstr = instring
1225 raise exc
1226 return loc, []
1227
1229 """Matches if current position is at the end of a line within the parse string"""
1231 super(LineEnd,self).__init__()
1232 self.whiteChars = " \t"
1233 self.errmsg = "Expected end of line"
1234 self.myException.msg = self.errmsg
1235
1236 - def parseImpl( self, instring, loc, doActions=True ):
1237 if loc<len(instring):
1238 if instring[loc] == "\n":
1239 return loc+1, "\n"
1240 else:
1241
1242 exc = self.myException
1243 exc.loc = loc
1244 exc.pstr = instring
1245 raise exc
1246 else:
1247 return loc, []
1248
1250 """Matches if current position is at the beginning of the parse string"""
1252 super(StringStart,self).__init__()
1253 self.errmsg = "Expected start of text"
1254 self.myException.msg = self.errmsg
1255
1256 - def parseImpl( self, instring, loc, doActions=True ):
1257 if loc != 0:
1258
1259 if loc != self.preParse( instring, 0 ):
1260
1261 exc = self.myException
1262 exc.loc = loc
1263 exc.pstr = instring
1264 raise exc
1265 return loc, []
1266
1268 """Matches if current position is at the end of the parse string"""
1270 super(StringEnd,self).__init__()
1271 self.errmsg = "Expected end of text"
1272 self.myException.msg = self.errmsg
1273
1274 - def parseImpl( self, instring, loc, doActions=True ):
1275 if loc < len(instring):
1276
1277 exc = self.myException
1278 exc.loc = loc
1279 exc.pstr = instring
1280 raise exc
1281 return loc, []
1282
1283
1285 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
1286 - def __init__( self, exprs, savelist = False ):
1287 super(ParseExpression,self).__init__(savelist)
1288 if isinstance( exprs, list ):
1289 self.exprs = exprs
1290 elif isinstance( exprs, basestring ):
1291 self.exprs = [ Literal( exprs ) ]
1292 else:
1293 self.exprs = [ exprs ]
1294
1296 return self.exprs[i]
1297
1299 self.exprs.append( other )
1300 self.strRepr = None
1301 return self
1302
1304 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
1305 all contained expressions."""
1306 self.skipWhitespace = False
1307 self.exprs = [ copy.copy(e) for e in self.exprs ]
1308 for e in self.exprs:
1309 e.leaveWhitespace()
1310 return self
1311
1313 if isinstance( other, Suppress ):
1314 if other not in self.ignoreExprs:
1315 super( ParseExpression, self).ignore( other )
1316 for e in self.exprs:
1317 e.ignore( self.ignoreExprs[-1] )
1318 else:
1319 super( ParseExpression, self).ignore( other )
1320 for e in self.exprs:
1321 e.ignore( self.ignoreExprs[-1] )
1322 return self
1323
1325 try:
1326 return super(ParseExpression,self).__str__()
1327 except:
1328 pass
1329
1330 if self.strRepr is None:
1331 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
1332 return self.strRepr
1333
1335 super(ParseExpression,self).streamline()
1336
1337 for e in self.exprs:
1338 e.streamline()
1339
1340
1341
1342
1343 if ( len(self.exprs) == 2 ):
1344 other = self.exprs[0]
1345 if ( isinstance( other, self.__class__ ) and
1346 other.parseAction is None and
1347 other.resultsName is None and
1348 not other.debug ):
1349 self.exprs = other.exprs[:] + [ self.exprs[1] ]
1350 self.strRepr = None
1351
1352 other = self.exprs[-1]
1353 if ( isinstance( other, self.__class__ ) and
1354 other.parseAction is None and
1355 other.resultsName is None and
1356 not other.debug ):
1357 self.exprs = self.exprs[:-1] + other.exprs[:]
1358 self.strRepr = None
1359
1360 return self
1361
1366
1367 - def validate( self, validateTrace=[] ):
1372
1373
1374 -class And(ParseExpression):
1375 """Requires all given ParseExpressions to be found in the given order.
1376 Expressions may be separated by whitespace.
1377 May be constructed using the '+' operator.
1378 """
1379 - def __init__( self, exprs, savelist = True ):
1380 super(And,self).__init__(exprs, savelist)
1381 self.mayReturnEmpty = True
1382 for e in exprs:
1383 if not e.mayReturnEmpty:
1384 self.mayReturnEmpty = False
1385 break
1386 self.skipWhitespace = exprs[0].skipWhitespace
1387 self.whiteChars = exprs[0].whiteChars
1388
1389 - def parseImpl( self, instring, loc, doActions=True ):
1390 loc, resultlist = self.exprs[0].parse( instring, loc, doActions )
1391 for e in self.exprs[1:]:
1392 loc, exprtokens = e.parse( instring, loc, doActions )
1393 if exprtokens or exprtokens.keys():
1394 resultlist += exprtokens
1395 return loc, resultlist
1396
1398 if isinstance( other, basestring ):
1399 other = Literal( other )
1400 return self.append( other )
1401
1403 subRecCheckList = parseElementList[:] + [ self ]
1404 for e in self.exprs:
1405 e.checkRecursion( subRecCheckList )
1406 if not e.mayReturnEmpty:
1407 break
1408
1410 if hasattr(self,"name"):
1411 return self.name
1412
1413 if self.strRepr is None:
1414 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1415
1416 return self.strRepr
1417
1418
1419 -class Or(ParseExpression):
1420 """Requires that at least one ParseExpression is found.
1421 If two expressions match, the expression that matches the longest string will be used.
1422 May be constructed using the '^' operator.
1423 """
1424 - def __init__( self, exprs, savelist = False ):
1425 super(Or,self).__init__(exprs, savelist)
1426 self.mayReturnEmpty = False
1427 for e in exprs:
1428 if e.mayReturnEmpty:
1429 self.mayReturnEmpty = True
1430 break
1431
1432 - def parseImpl( self, instring, loc, doActions=True ):
1433 maxExcLoc = -1
1434 maxMatchLoc = -1
1435 for e in self.exprs:
1436 try:
1437 loc2 = e.tryParse( instring, loc )
1438 except ParseException, err:
1439 if err.loc > maxExcLoc:
1440 maxException = err
1441 maxExcLoc = err.loc
1442 except IndexError, err:
1443 if len(instring) > maxExcLoc:
1444 maxException = ParseException(instring,len(instring),e.errmsg,self)
1445 maxExcLoc = len(instring)
1446 else:
1447 if loc2 > maxMatchLoc:
1448 maxMatchLoc = loc2
1449 maxMatchExp = e
1450
1451 if maxMatchLoc < 0:
1452 raise maxException
1453
1454 return maxMatchExp.parse( instring, loc, doActions )
1455
1457 if isinstance( other, basestring ):
1458 other = Literal( other )
1459 return self.append( other )
1460
1462 if hasattr(self,"name"):
1463 return self.name
1464
1465 if self.strRepr is None:
1466 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1467
1468 return self.strRepr
1469
1471 subRecCheckList = parseElementList[:] + [ self ]
1472 for e in self.exprs:
1473 e.checkRecursion( subRecCheckList )
1474
1475
1477 """Requires that at least one ParseExpression is found.
1478 If two expressions match, the first one listed is the one that will match.
1479 May be constructed using the '|' operator.
1480 """
1481 - def __init__( self, exprs, savelist = False ):
1482 super(MatchFirst,self).__init__(exprs, savelist)
1483 self.mayReturnEmpty = False
1484 for e in exprs:
1485 if e.mayReturnEmpty:
1486 self.mayReturnEmpty = True
1487 break
1488
1489 - def parseImpl( self, instring, loc, doActions=True ):
1490 maxExcLoc = -1
1491 for e in self.exprs:
1492 try:
1493 ret = e.parse( instring, loc, doActions )
1494 return ret
1495 except ParseException, err:
1496 if err.loc > maxExcLoc:
1497 maxException = err
1498 maxExcLoc = err.loc
1499 except IndexError, err:
1500 if len(instring) > maxExcLoc:
1501 maxException = ParseException(instring,len(instring),e.errmsg,self)
1502 maxExcLoc = len(instring)
1503
1504
1505 else:
1506 raise maxException
1507
1509 if isinstance( other, basestring ):
1510 other = Literal( other )
1511 return self.append( other )
1512
1514 if hasattr(self,"name"):
1515 return self.name
1516
1517 if self.strRepr is None:
1518 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1519
1520 return self.strRepr
1521
1523 subRecCheckList = parseElementList[:] + [ self ]
1524 for e in self.exprs:
1525 e.checkRecursion( subRecCheckList )
1526
1527
1528 -class Each(ParseExpression):
1529 """Requires all given ParseExpressions to be found, but in any order.
1530 Expressions may be separated by whitespace.
1531 May be constructed using the '&' operator.
1532 """
1533 - def __init__( self, exprs, savelist = True ):
1534 super(Each,self).__init__(exprs, savelist)
1535 self.mayReturnEmpty = True
1536 for e in exprs:
1537 if not e.mayReturnEmpty:
1538 self.mayReturnEmpty = False
1539 break
1540 self.skipWhitespace = True
1541 self.optionals = [ e.expr for e in exprs if isinstance(e,Optional) ]
1542 self.multioptionals = [ e.expr for e in exprs if isinstance(e,ZeroOrMore) ]
1543 self.multirequired = [ e.expr for e in exprs if isinstance(e,OneOrMore) ]
1544 self.required = [ e for e in exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
1545 self.required += self.multirequired
1546
1547 - def parseImpl( self, instring, loc, doActions=True ):
1548 tmpLoc = loc
1549 tmpReqd = self.required[:]
1550 tmpOpt = self.optionals[:]
1551 matchOrder = []
1552
1553 keepMatching = True
1554 while keepMatching:
1555 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
1556 failed = []
1557 for e in tmpExprs:
1558 try:
1559 tmpLoc = e.tryParse( instring, tmpLoc )
1560 except ParseException:
1561 failed.append(e)
1562 else:
1563 matchOrder.append(e)
1564 if e in tmpReqd:
1565 tmpReqd.remove(e)
1566 elif e in tmpOpt:
1567 tmpOpt.remove(e)
1568 if len(failed) == len(tmpExprs):
1569 keepMatching = False
1570
1571 if tmpReqd:
1572 missing = ", ".join( [ str(e) for e in tmpReqd ] )
1573 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
1574
1575 resultlist = []
1576 for e in matchOrder:
1577 loc,results = e.parse(instring,loc,doActions)
1578 resultlist.append(results)
1579
1580 finalResults = ParseResults([])
1581 for r in resultlist:
1582 dups = {}
1583 for k in r.keys():
1584 if k in finalResults.keys():
1585 tmp = ParseResults(finalResults[k])
1586 tmp += ParseResults(r[k])
1587 dups[k] = tmp
1588 finalResults += ParseResults(r)
1589 for k,v in dups.items():
1590 finalResults[k] = v
1591 return loc, finalResults
1592
1594 if hasattr(self,"name"):
1595 return self.name
1596
1597 if self.strRepr is None:
1598 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1599
1600 return self.strRepr
1601
1603 subRecCheckList = parseElementList[:] + [ self ]
1604 for e in self.exprs:
1605 e.checkRecursion( subRecCheckList )
1606
1607
1609 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
1610 - def __init__( self, expr, savelist=False ):
1620
1621 - def parseImpl( self, instring, loc, doActions=True ):
1626
1633
1645
1651
1658
1659 - def validate( self, validateTrace=[] ):
1664
1666 try:
1667 return super(ParseElementEnhance,self).__str__()
1668 except:
1669 pass
1670
1671 if self.strRepr is None and self.expr is not None:
1672 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
1673 return self.strRepr
1674
1675
1677 """Lookahead matching of the given parse expression. FollowedBy
1678 does *not* advance the parsing position within the input string, it only
1679 verifies that the specified parse expression matches at the current
1680 position. FollowedBy always returns a null token list."""
1684
1685 - def parseImpl( self, instring, loc, doActions=True ):
1688
1689
1690 -class NotAny(ParseElementEnhance):
1691 """Lookahead to disallow matching with the given parse expression. NotAny
1692 does *not* advance the parsing position within the input string, it only
1693 verifies that the specified parse expression does *not* match at the current
1694 position. Also, NotAny does *not* skip over leading whitespace. NotAny
1695 always returns a null token list. May be constructed using the '~' operator."""
1697 super(NotAny,self).__init__(expr)
1698
1699 self.skipWhitespace = False
1700 self.mayReturnEmpty = True
1701 self.errmsg = "Found unexpected token, "+_ustr(self.expr)
1702 self.myException = ParseException("",0,self.errmsg,self)
1703
1704 - def parseImpl( self, instring, loc, doActions=True ):
1705 try:
1706 self.expr.tryParse( instring, loc )
1707 except (ParseException,IndexError):
1708 pass
1709 else:
1710
1711 exc = self.myException
1712 exc.loc = loc
1713 exc.pstr = instring
1714 raise exc
1715 return loc, []
1716
1718 if hasattr(self,"name"):
1719 return self.name
1720
1721 if self.strRepr is None:
1722 self.strRepr = "~{" + _ustr(self.expr) + "}"
1723
1724 return self.strRepr
1725
1726
1728 """Optional repetition of zero or more of the given expression."""
1732
1733 - def parseImpl( self, instring, loc, doActions=True ):
1734 tokens = []
1735 try:
1736 loc, tokens = self.expr.parse( instring, loc, doActions )
1737 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
1738 while 1:
1739 if hasIgnoreExprs:
1740 loc = self.skipIgnorables( instring, loc )
1741 loc, tmptokens = self.expr.parse( instring, loc, doActions )
1742 if tmptokens or tmptokens.keys():
1743 tokens += tmptokens
1744 except (ParseException,IndexError):
1745 pass
1746
1747 return loc, tokens
1748
1750 if hasattr(self,"name"):
1751 return self.name
1752
1753 if self.strRepr is None:
1754 self.strRepr = "[" + _ustr(self.expr) + "]..."
1755
1756 return self.strRepr
1757
1762
1763
1765 """Repetition of one or more of the given expression."""
1766 - def parseImpl( self, instring, loc, doActions=True ):
1767
1768 loc, tokens = self.expr.parse( instring, loc, doActions )
1769 try:
1770 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
1771 while 1:
1772 if hasIgnoreExprs:
1773 loc = self.skipIgnorables( instring, loc )
1774 loc, tmptokens = self.expr.parse( instring, loc, doActions )
1775 if tmptokens or tmptokens.keys():
1776 tokens += tmptokens
1777 except (ParseException,IndexError):
1778 pass
1779
1780 return loc, tokens
1781
1783 if hasattr(self,"name"):
1784 return self.name
1785
1786 if self.strRepr is None:
1787 self.strRepr = "{" + _ustr(self.expr) + "}..."
1788
1789 return self.strRepr
1790
1795
1796
1798 """Optional matching of the given expression.
1799 A default return string can also be specified, if the optional expression
1800 is not found.
1801 """
1802 - def __init__( self, exprs, default=None ):
1803 super(Optional,self).__init__( exprs, savelist=False )
1804 self.defaultValue = default
1805 self.mayReturnEmpty = True
1806
1807 - def parseImpl( self, instring, loc, doActions=True ):
1808 try:
1809 loc, tokens = self.expr.parse( instring, loc, doActions )
1810 except (ParseException,IndexError):
1811 if self.defaultValue is not None:
1812 tokens = [ self.defaultValue ]
1813 else:
1814 tokens = []
1815
1816 return loc, tokens
1817
1819 if hasattr(self,"name"):
1820 return self.name
1821
1822 if self.strRepr is None:
1823 self.strRepr = "[" + _ustr(self.expr) + "]"
1824
1825 return self.strRepr
1826
1827
1828 -class SkipTo(ParseElementEnhance):
1829 """Token for skipping over all undefined text until the matched expression is found.
1830 If include is set to true, the matched expression is also consumed. The ignore
1831 argument is used to define grammars (typically quoted strings and comments) that
1832 might contain false matches.
1833 """
1834 - def __init__( self, other, include=False, ignore=None ):
1844
1845 - def parseImpl( self, instring, loc, doActions=True ):
1846 startLoc = loc
1847 instrlen = len(instring)
1848 expr = self.expr
1849 while loc < instrlen:
1850 try:
1851 expr.tryParse(instring, loc)
1852 if self.includeMatch:
1853 skipText = instring[startLoc:loc]
1854 loc,mat = expr.parse(instring,loc)
1855 if mat:
1856 return loc, [ skipText, mat ]
1857 else:
1858 return loc, [ skipText ]
1859 else:
1860 return loc, [ instring[startLoc:loc] ]
1861 except (ParseException,IndexError):
1862 loc += 1
1863 exc = self.myException
1864 exc.loc = loc
1865 exc.pstr = instring
1866 raise exc
1867
1868 -class Forward(ParseElementEnhance):
1869 """Forward declaration of an expression to be defined later -
1870 used for recursive grammars, such as algebraic infix notation.
1871 When the expression is known, it is assigned to the Forward variable using the '<<' operator.
1872 """
1875
1877 self.expr = other
1878 self.mayReturnEmpty = other.mayReturnEmpty
1879 self.strRepr = None
1880 return self
1881
1883 self.skipWhitespace = False
1884 return self
1885
1887 if not self.streamlined:
1888 self.streamlined = True
1889 if self.expr is not None:
1890 self.expr.streamline()
1891 return self
1892
1893 - def validate( self, validateTrace=[] ):
1894 if self not in validateTrace:
1895 tmp = validateTrace[:]+[self]
1896 if self.expr is not None:
1897 self.expr.validate(tmp)
1898 self.checkRecursion([])
1899
1901 if hasattr(self,"name"):
1902 return self.name
1903
1904 strmethod = self.__str__
1905 self.__class__ = _ForwardNoRecurse
1906 if self.expr is not None:
1907 retString = _ustr(self.expr)
1908 else:
1909 retString = "None"
1910 self.__class__ = Forward
1911 return "Forward: "+retString
1912
1916
1918 """Abstract subclass of ParseExpression, for converting parsed results."""
1919 - def __init__( self, expr, savelist=False ):
1921
1922
1923 -class Upcase(TokenConverter):
1924 """Converter to upper case all matching tokens."""
1926 super(Upcase,self).__init__(*args)
1927 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
1928 DeprecationWarning,stacklevel=2)
1929
1930 - def postParse( self, instring, loc, tokenlist ):
1931 return map( string.upper, tokenlist )
1932
1933
1935 """Converter to concatenate all matching tokens to a single string.
1936 By default, the matching patterns must also be contiguous in the input string;
1937 this can be disabled by specifying 'adjacent=False' in the constructor.
1938 """
1939 - def __init__( self, expr, joinString="", adjacent=True ):
1940 super(Combine,self).__init__( expr )
1941
1942 if adjacent:
1943 self.leaveWhitespace()
1944 self.adjacent = adjacent
1945 self.skipWhitespace = True
1946 self.joinString = joinString
1947
1954
1955 - def postParse( self, instring, loc, tokenlist ):
1956 retToks = tokenlist.copy()
1957 del retToks[:]
1958 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
1959
1960 if self.resultsName and len(retToks.keys())>0:
1961 return [ retToks ]
1962 else:
1963 return retToks
1964
1965
1966 -class Group(TokenConverter):
1967 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
1971
1972 - def postParse( self, instring, loc, tokenlist ):
1973 return [ tokenlist ]
1974
1975 -class Dict(TokenConverter):
1976 """Converter to return a repetitive expression as a list, but also as a dictionary.
1977 Each element can also be referenced using the first token in the expression as its key.
1978 Useful for tabular report scraping when the first column can be used as a item key.
1979 """
1981 super(Dict,self).__init__( exprs )
1982 self.saveAsList = True
1983
1984 - def postParse( self, instring, loc, tokenlist ):
1985 for i,tok in enumerate(tokenlist):
1986 ikey = _ustr(tok[0]).strip()
1987 if len(tok)==1:
1988 tokenlist[ikey] = ("",i)
1989 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
1990 tokenlist[ikey] = (tok[1],i)
1991 else:
1992 dictvalue = tok.copy()
1993 del dictvalue[0]
1994 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
1995 tokenlist[ikey] = (dictvalue,i)
1996 else:
1997 tokenlist[ikey] = (dictvalue[0],i)
1998
1999 if self.resultsName:
2000 return [ tokenlist ]
2001 else:
2002 return tokenlist
2003
2004
2006 """Converter for ignoring the results of a parsed expression."""
2007 - def postParse( self, instring, loc, tokenlist ):
2009
2012
2013
2014
2015
2017 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
2018 By default, the list elements and delimiters can have intervening whitespace, and
2019 comments, but this can be overridden by passing 'combine=True' in the constructor.
2020 If combine is set to True, the matching tokens are returned as a single token
2021 string, with the delimiters included; otherwise, the matching tokens are returned
2022 as a list of tokens, with the delimiters suppressed.
2023 """
2024 if combine:
2025 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(_ustr(expr)+_ustr(delim)+"...")
2026 else:
2027 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(_ustr(expr)+_ustr(delim)+"...")
2028
2029 -def oneOf( strs, caseless=False ):
2030 """Helper to quickly define a set of alternative Literals, and makes sure to do
2031 longest-first testing when there is a conflict, regardless of the input order,
2032 but returns a MatchFirst for best performance.
2033 """
2034 if caseless:
2035 isequal = ( lambda a,b: a.upper() == b.upper() )
2036 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
2037 parseElementClass = CaselessLiteral
2038 else:
2039 isequal = ( lambda a,b: a == b )
2040 masks = ( lambda a,b: b.startswith(a) )
2041 parseElementClass = Literal
2042
2043 symbols = strs.split()
2044 i = 0
2045 while i < len(symbols)-1:
2046 cur = symbols[i]
2047 for j,other in enumerate(symbols[i+1:]):
2048 if ( isequal(other, cur) ):
2049 del symbols[i+j+1]
2050 break
2051 elif ( masks(cur, other) ):
2052 del symbols[i+j+1]
2053 symbols.insert(i,other)
2054 cur = other
2055 break
2056 else:
2057 i += 1
2058
2059 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
2060
2062 """Helper to easily and clearly define a dictionary by specifying the respective patterns
2063 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
2064 in the proper order. The key pattern can include delimiting markers or punctuation,
2065 as long as they are suppressed, thereby leaving the significant key text. The value
2066 pattern can include named results, so that the Dict results can include named token
2067 fields.
2068 """
2069 return Dict( ZeroOrMore( Group ( key + value ) ) )
2070
2071 _bslash = "\\"
2072 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
2073 empty = Empty().setName("empty")
2074
2075 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
2076 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
2077 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
2078 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
2079 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
2080 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
2081 _reBracketExpr = "[" + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
2082
2083 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
2084
2086 r"""Helper to easily define string ranges for use in Word construction. Borrows
2087 syntax from regexp '[]' string range definitions::
2088 srange("[0-9]") -> "0123456789"
2089 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
2090 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
2091 The input string must be enclosed in []'s, and the returned string is the expanded
2092 character set joined into a single string.
2093 The values enclosed in the []'s may be::
2094 a single character
2095 an escaped character with a leading backslash (such as \- or \])
2096 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
2097 an escaped octal character with a leading '\0' (\041, which is a '!' character)
2098 a range of any of the above, separated by a dash ('a-z', etc.)
2099 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
2100 """
2101 try:
2102 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
2103 except:
2104 return ""
2105
2107 """Helper method for common parse actions that simply return a literal value. Especially
2108 useful when used with transformString().
2109 """
2110 def _replFunc(*args):
2111 return replStr
2112 return _replFunc
2113
2115 """Helper parse action for removing quotation marks from parsed quoted strings.
2116 To use, add this parse action to quoted string using::
2117 quotedString.setParseAction( removeQuotes )
2118 """
2119 return t[0][1:-1]
2120
2122 """Helper parse action to convert tokens to upper case."""
2123 return map( str.upper, t )
2124
2126 """Helper parse action to convert tokens to lower case."""
2127 return map( str.lower, t )
2128
2145
2149
2153
2154 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xfe]")
2155
2156 _escapables = "tnrfbacdeghijklmopqsuvwxyz " + _bslash + "'" + '"'
2157 _octDigits = "01234567"
2158 _escapedChar = ( Word( _bslash, _escapables, exact=2 ) |
2159 Word( _bslash, _octDigits, min=2, max=4 ) )
2160 _sglQuote = Literal("'")
2161 _dblQuote = Literal('"')
2162 dblQuotedString = Combine( _dblQuote + ZeroOrMore( CharsNotIn('\\"\n\r') | _escapedChar | '""' ) + _dblQuote ).streamline().setName("string enclosed in double quotes")
2163 sglQuotedString = Combine( _sglQuote + ZeroOrMore( CharsNotIn("\\'\n\r") | _escapedChar | "''" ) + _sglQuote ).streamline().setName("string enclosed in single quotes")
2164 quotedString = ( dblQuotedString | sglQuotedString ).setName("quotedString using single or double quotes")
2165
2166
2167 cStyleComment = Combine( Literal("/*") +
2168 ZeroOrMore( CharsNotIn("*") | ( "*" + ~Literal("/") ) ) +
2169 Literal("*/") ).streamline().setName("cStyleComment enclosed in /* ... */")
2170 htmlComment = Combine( Literal("<!--") + ZeroOrMore( CharsNotIn("-") |
2171 (~Literal("-->") + Literal("-").leaveWhitespace() ) ) +
2172 Literal("-->") ).streamline().setName("htmlComment enclosed in <!-- ... -->")
2173 restOfLine = Optional( CharsNotIn( "\n\r" ), default="" ).setName("rest of line up to \\n").leaveWhitespace()
2174 dblSlashComment = "//" + restOfLine
2175 cppStyleComment = FollowedBy("/") + ( dblSlashComment | cStyleComment )
2176 javaStyleComment = cppStyleComment
2177 pythonStyleComment = "#" + restOfLine
2178 _noncomma = "".join( [ c for c in printables if c != "," ] )
2179 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
2180 Optional( Word(" \t") +
2181 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
2182 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
2183
2184
2185 if __name__ == "__main__":
2186
2187 - def test( teststring ):
2188 print teststring,"->",
2189 try:
2190 tokens = simpleSQL.parseString( teststring )
2191 tokenlist = tokens.asList()
2192 print tokenlist
2193 print "tokens = ", tokens
2194 print "tokens.columns =", tokens.columns
2195 print "tokens.tables =", tokens.tables
2196 print tokens.asXML("SQL",True)
2197 except ParseException, err:
2198 print err.line
2199 print " "*(err.column-1) + "^"
2200 print err
2201 print
2202
2203 selectToken = CaselessLiteral( "select" )
2204 fromToken = CaselessLiteral( "from" )
2205
2206 ident = Word( alphas, alphanums + "_$" )
2207 columnName = Upcase( delimitedList( ident, ".", combine=True ) )
2208 columnNameList = Group( delimitedList( columnName ) )
2209 tableName = Upcase( delimitedList( ident, ".", combine=True ) )
2210 tableNameList = Group( delimitedList( tableName ) )
2211 simpleSQL = ( selectToken + \
2212 ( '*' | columnNameList ).setResultsName( "columns" ) + \
2213 fromToken + \
2214 tableNameList.setResultsName( "tables" ) )
2215
2216 test( "SELECT * from XYZZY, ABC" )
2217 test( "select * from SYS.XYZZY" )
2218 test( "Select A from Sys.dual" )
2219 test( "Select AA,BB,CC from Sys.dual" )
2220 test( "Select A, B, C from Sys.dual" )
2221 test( "Select A, B, C from Sys.dual" )
2222 test( "Xelect A, B, C from Sys.dual" )
2223 test( "Select A, B, C frox Sys.dual" )
2224 test( "Select" )
2225 test( "Select ^^^ frox Sys.dual" )
2226 test( "Select A, B, C from Sys.dual, Table2 " )
2227