1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 __doc__ = \
26 """
27 pyparsing module - Classes and methods to define and execute parsing grammars
28
29 The pyparsing module is an alternative approach to creating and executing simple grammars,
30 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
31 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
32 provides a library of classes that you use to construct the grammar directly in Python.
33
34 Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
35
36 from pyparsing import Word, alphas
37
38 # define grammar of a greeting
39 greet = Word( alphas ) + "," + Word( alphas ) + "!"
40
41 hello = "Hello, World!"
42 print (hello, "->", greet.parseString( hello ))
43
44 The program outputs the following::
45
46 Hello, World! -> ['Hello', ',', 'World', '!']
47
48 The Python representation of the grammar is quite readable, owing to the self-explanatory
49 class names, and the use of '+', '|' and '^' operators.
50
51 The parsed results returned from L{I{ParserElement.parseString}<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
52 object with named attributes.
53
54 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
55 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
56 - quoted strings
57 - embedded comments
58 """
59
60 __version__ = "2.1.6"
61 __versionTime__ = "07 Aug 2016 04:42 UTC"
62 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
63
64 import string
65 from weakref import ref as wkref
66 import copy
67 import sys
68 import warnings
69 import re
70 import sre_constants
71 import collections
72 import pprint
73 import traceback
74 import types
75 from datetime import datetime
76
77 try:
78 from _thread import RLock
79 except ImportError:
80 from threading import RLock
81
82 try:
83 from collections import OrderedDict as _OrderedDict
84 except ImportError:
85 try:
86 from ordereddict import OrderedDict as _OrderedDict
87 except ImportError:
88 _OrderedDict = None
89
90
91
92 __all__ = [
93 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
94 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
95 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
96 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
97 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
98 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
99 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
100 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
101 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
102 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
103 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
104 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
105 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
106 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
107 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
108 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
109 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
110 'tokenMap', 'pyparsing_common',
111 ]
112
113 system_version = tuple(sys.version_info)[:3]
114 PY_3 = system_version[0] == 3
115 if PY_3:
116 _MAX_INT = sys.maxsize
117 basestring = str
118 unichr = chr
119 _ustr = str
120
121
122 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
123
124 else:
125 _MAX_INT = sys.maxint
126 range = xrange
129 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
130 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
131 then < returns the unicode object | encodes it with the default encoding | ... >.
132 """
133 if isinstance(obj,unicode):
134 return obj
135
136 try:
137
138
139 return str(obj)
140
141 except UnicodeEncodeError:
142
143 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
144 xmlcharref = Regex('&#\d+;')
145 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
146 return xmlcharref.transformString(ret)
147
148
149 singleArgBuiltins = []
150 import __builtin__
151 for fname in "sum len sorted reversed list tuple set any all min max".split():
152 try:
153 singleArgBuiltins.append(getattr(__builtin__,fname))
154 except AttributeError:
155 continue
156
157 _generatorType = type((y for y in range(1)))
160 """Escape &, <, >, ", ', etc. in a string of data."""
161
162
163 from_symbols = '&><"\''
164 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
165 for from_,to_ in zip(from_symbols, to_symbols):
166 data = data.replace(from_, to_)
167 return data
168
171
172 alphas = string.ascii_uppercase + string.ascii_lowercase
173 nums = "0123456789"
174 hexnums = nums + "ABCDEFabcdef"
175 alphanums = alphas + nums
176 _bslash = chr(92)
177 printables = "".join(c for c in string.printable if c not in string.whitespace)
180 """base exception class for all parsing runtime exceptions"""
181
182
183 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
184 self.loc = loc
185 if msg is None:
186 self.msg = pstr
187 self.pstr = ""
188 else:
189 self.msg = msg
190 self.pstr = pstr
191 self.parserElement = elem
192 self.args = (pstr, loc, msg)
193
195 """supported attributes by name are:
196 - lineno - returns the line number of the exception text
197 - col - returns the column number of the exception text
198 - line - returns the line containing the exception text
199 """
200 if( aname == "lineno" ):
201 return lineno( self.loc, self.pstr )
202 elif( aname in ("col", "column") ):
203 return col( self.loc, self.pstr )
204 elif( aname == "line" ):
205 return line( self.loc, self.pstr )
206 else:
207 raise AttributeError(aname)
208
210 return "%s (at char %d), (line:%d, col:%d)" % \
211 ( self.msg, self.loc, self.lineno, self.column )
225 return "lineno col line".split() + dir(type(self))
226
228 """
229 Exception thrown when parse expressions don't match class;
230 supported attributes by name are:
231 - lineno - returns the line number of the exception text
232 - col - returns the column number of the exception text
233 - line - returns the line containing the exception text
234
235 Example::
236 try:
237 Word(nums).setName("integer").parseString("ABC")
238 except ParseException as pe:
239 print(pe)
240 print("column: {}".format(pe.col))
241
242 prints::
243 Expected integer (at char 0), (line:1, col:1)
244 column: 1
245 """
246 pass
247
249 """user-throwable exception thrown when inconsistent parse content
250 is found; stops all parsing immediately"""
251 pass
252
254 """just like C{L{ParseFatalException}}, but thrown internally when an
255 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
256 an unbacktrackable syntax error has been found"""
260
275 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
276 - def __init__( self, parseElementList ):
277 self.parseElementTrace = parseElementList
278
280 return "RecursiveGrammarException: %s" % self.parseElementTrace
281
288 return repr(self.tup)
290 self.tup = (self.tup[0],i)
291
293 """
294 Structured parse results, to provide multiple means of access to the parsed data:
295 - as a list (C{len(results)})
296 - by list index (C{results[0], results[1]}, etc.)
297 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
298
299 Example::
300 integer = Word(nums)
301 date_str = (integer.setResultsName("year") + '/'
302 + integer.setResultsName("month") + '/'
303 + integer.setResultsName("day"))
304 # equivalent form:
305 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
306
307 result = date_str.parseString("1999/12/31")
308 print(list(result))
309 print(result[0])
310 print(result['month'])
311 print(result.day)
312 print('month' in result)
313 print('minutes' in result)
314 print(result.dump())
315 prints::
316 ['1999', '/', '12', '/', '31']
317 1999
318 12
319 31
320 True
321 False
322 ['1999', '/', '12', '/', '31']
323 - day: 31
324 - month: 12
325 - year: 1999
326 """
327 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
328 if isinstance(toklist, cls):
329 return toklist
330 retobj = object.__new__(cls)
331 retobj.__doinit = True
332 return retobj
333
334
335
336 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
337 if self.__doinit:
338 self.__doinit = False
339 self.__name = None
340 self.__parent = None
341 self.__accumNames = {}
342 self.__asList = asList
343 self.__modal = modal
344 if toklist is None:
345 toklist = []
346 if isinstance(toklist, list):
347 self.__toklist = toklist[:]
348 elif isinstance(toklist, _generatorType):
349 self.__toklist = list(toklist)
350 else:
351 self.__toklist = [toklist]
352 self.__tokdict = dict()
353
354 if name is not None and name:
355 if not modal:
356 self.__accumNames[name] = 0
357 if isinstance(name,int):
358 name = _ustr(name)
359 self.__name = name
360 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
361 if isinstance(toklist,basestring):
362 toklist = [ toklist ]
363 if asList:
364 if isinstance(toklist,ParseResults):
365 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
366 else:
367 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
368 self[name].__name = name
369 else:
370 try:
371 self[name] = toklist[0]
372 except (KeyError,TypeError,IndexError):
373 self[name] = toklist
374
376 if isinstance( i, (int,slice) ):
377 return self.__toklist[i]
378 else:
379 if i not in self.__accumNames:
380 return self.__tokdict[i][-1][0]
381 else:
382 return ParseResults([ v[0] for v in self.__tokdict[i] ])
383
385 if isinstance(v,_ParseResultsWithOffset):
386 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
387 sub = v[0]
388 elif isinstance(k,(int,slice)):
389 self.__toklist[k] = v
390 sub = v
391 else:
392 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
393 sub = v
394 if isinstance(sub,ParseResults):
395 sub.__parent = wkref(self)
396
398 if isinstance(i,(int,slice)):
399 mylen = len( self.__toklist )
400 del self.__toklist[i]
401
402
403 if isinstance(i, int):
404 if i < 0:
405 i += mylen
406 i = slice(i, i+1)
407
408 removed = list(range(*i.indices(mylen)))
409 removed.reverse()
410
411 for name,occurrences in self.__tokdict.items():
412 for j in removed:
413 for k, (value, position) in enumerate(occurrences):
414 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
415 else:
416 del self.__tokdict[i]
417
419 return k in self.__tokdict
420
421 - def __len__( self ): return len( self.__toklist )
422 - def __bool__(self): return ( not not self.__toklist )
423 __nonzero__ = __bool__
424 - def __iter__( self ): return iter( self.__toklist )
425 - def __reversed__( self ): return iter( self.__toklist[::-1] )
427 if hasattr(self.__tokdict, "iterkeys"):
428 return self.__tokdict.iterkeys()
429 else:
430 return iter(self.__tokdict)
431
433 return (self[k] for k in self._iterkeys())
434
436 return ((k, self[k]) for k in self._iterkeys())
437
438 if PY_3:
439 keys = _iterkeys
440 """Returns an iterator of all named result keys (Python 3.x only)."""
441
442 values = _itervalues
443 """Returns an iterator of all named result values (Python 3.x only)."""
444
445 items = _iteritems
446 """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
447
448 else:
449 iterkeys = _iterkeys
450 """Returns an iterator of all named result keys (Python 2.x only)."""
451
452 itervalues = _itervalues
453 """Returns an iterator of all named result values (Python 2.x only)."""
454
455 iteritems = _iteritems
456 """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
457
459 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
460 return list(self.iterkeys())
461
463 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
464 return list(self.itervalues())
465
467 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
468 return list(self.iteritems())
469
471 """Since keys() returns an iterator, this method is helpful in bypassing
472 code that looks for the existence of any defined results names."""
473 return bool(self.__tokdict)
474
475 - def pop( self, *args, **kwargs):
476 """
477 Removes and returns item at specified index (default=C{last}).
478 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
479 argument or an integer argument, it will use C{list} semantics
480 and pop tokens from the list of parsed tokens. If passed a
481 non-integer argument (most likely a string), it will use C{dict}
482 semantics and pop the corresponding value from any defined
483 results names. A second default return value argument is
484 supported, just as in C{dict.pop()}.
485
486 Example::
487 def remove_first(tokens):
488 tokens.pop(0)
489 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
490 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
491
492 label = Word(alphas)
493 patt = label("LABEL") + OneOrMore(Word(nums))
494 print(patt.parseString("AAB 123 321").dump())
495
496 # Use pop() in a parse action to remove named result (note that corresponding value is not
497 # removed from list form of results)
498 def remove_LABEL(tokens):
499 tokens.pop("LABEL")
500 return tokens
501 patt.addParseAction(remove_LABEL)
502 print(patt.parseString("AAB 123 321").dump())
503 prints::
504 ['AAB', '123', '321']
505 - LABEL: AAB
506
507 ['AAB', '123', '321']
508 """
509 if not args:
510 args = [-1]
511 for k,v in kwargs.items():
512 if k == 'default':
513 args = (args[0], v)
514 else:
515 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
516 if (isinstance(args[0], int) or
517 len(args) == 1 or
518 args[0] in self):
519 index = args[0]
520 ret = self[index]
521 del self[index]
522 return ret
523 else:
524 defaultvalue = args[1]
525 return defaultvalue
526
527 - def get(self, key, defaultValue=None):
528 """
529 Returns named result matching the given key, or if there is no
530 such name, then returns the given C{defaultValue} or C{None} if no
531 C{defaultValue} is specified.
532
533 Similar to C{dict.get()}.
534
535 Example::
536 integer = Word(nums)
537 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
538
539 result = date_str.parseString("1999/12/31")
540 print(result.get("year")) # -> '1999'
541 print(result.get("hour", "not specified")) # -> 'not specified'
542 print(result.get("hour")) # -> None
543 """
544 if key in self:
545 return self[key]
546 else:
547 return defaultValue
548
549 - def insert( self, index, insStr ):
550 """
551 Inserts new element at location index in the list of parsed tokens.
552
553 Similar to C{list.insert()}.
554
555 Example::
556 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
557
558 # use a parse action to insert the parse location in the front of the parsed results
559 def insert_locn(locn, tokens):
560 tokens.insert(0, locn)
561 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
562 """
563 self.__toklist.insert(index, insStr)
564
565 for name,occurrences in self.__tokdict.items():
566 for k, (value, position) in enumerate(occurrences):
567 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
568
570 """
571 Add single element to end of ParseResults list of elements.
572
573 Example::
574 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
575
576 # use a parse action to compute the sum of the parsed integers, and add it to the end
577 def append_sum(tokens):
578 tokens.append(sum(map(int, tokens)))
579 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
580 """
581 self.__toklist.append(item)
582
584 """
585 Add sequence of elements to end of ParseResults list of elements.
586
587 Example::
588 patt = OneOrMore(Word(alphas))
589
590 # use a parse action to append the reverse of the matched strings, to make a palindrome
591 def make_palindrome(tokens):
592 tokens.extend(reversed([t[::-1] for t in tokens]))
593 return ''.join(tokens)
594 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
595 """
596 if isinstance(itemseq, ParseResults):
597 self += itemseq
598 else:
599 self.__toklist.extend(itemseq)
600
602 """
603 Clear all elements and results names.
604 """
605 del self.__toklist[:]
606 self.__tokdict.clear()
607
609 try:
610 return self[name]
611 except KeyError:
612 return ""
613
614 if name in self.__tokdict:
615 if name not in self.__accumNames:
616 return self.__tokdict[name][-1][0]
617 else:
618 return ParseResults([ v[0] for v in self.__tokdict[name] ])
619 else:
620 return ""
621
623 ret = self.copy()
624 ret += other
625 return ret
626
628 if other.__tokdict:
629 offset = len(self.__toklist)
630 addoffset = lambda a: offset if a<0 else a+offset
631 otheritems = other.__tokdict.items()
632 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
633 for (k,vlist) in otheritems for v in vlist]
634 for k,v in otherdictitems:
635 self[k] = v
636 if isinstance(v[0],ParseResults):
637 v[0].__parent = wkref(self)
638
639 self.__toklist += other.__toklist
640 self.__accumNames.update( other.__accumNames )
641 return self
642
644 if isinstance(other,int) and other == 0:
645
646 return self.copy()
647 else:
648
649 return other + self
650
652 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
653
655 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
656
658 out = []
659 for item in self.__toklist:
660 if out and sep:
661 out.append(sep)
662 if isinstance( item, ParseResults ):
663 out += item._asStringList()
664 else:
665 out.append( _ustr(item) )
666 return out
667
669 """
670 Returns the parse results as a nested list of matching tokens, all converted to strings.
671
672 Example::
673 patt = OneOrMore(Word(alphas))
674 result = patt.parseString("sldkj lsdkj sldkj")
675 # even though the result prints in string-like form, it is actually a pyparsing ParseResults
676 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
677
678 # Use asList() to create an actual list
679 result_list = result.asList()
680 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
681 """
682 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
683
685 """
686 Returns the named parse results as a nested dictionary.
687
688 Example::
689 integer = Word(nums)
690 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
691
692 result = date_str.parseString('12/31/1999')
693 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
694
695 result_dict = result.asDict()
696 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
697
698 # even though a ParseResults supports dict-like access, sometime you just need to have a dict
699 import json
700 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
701 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
702 """
703 if PY_3:
704 item_fn = self.items
705 else:
706 item_fn = self.iteritems
707
708 def toItem(obj):
709 if isinstance(obj, ParseResults):
710 if obj.haskeys():
711 return obj.asDict()
712 else:
713 return [toItem(v) for v in obj]
714 else:
715 return obj
716
717 return dict((k,toItem(v)) for k,v in item_fn())
718
720 """
721 Returns a new copy of a C{ParseResults} object.
722 """
723 ret = ParseResults( self.__toklist )
724 ret.__tokdict = self.__tokdict.copy()
725 ret.__parent = self.__parent
726 ret.__accumNames.update( self.__accumNames )
727 ret.__name = self.__name
728 return ret
729
730 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
731 """
732 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
733 """
734 nl = "\n"
735 out = []
736 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
737 for v in vlist)
738 nextLevelIndent = indent + " "
739
740
741 if not formatted:
742 indent = ""
743 nextLevelIndent = ""
744 nl = ""
745
746 selfTag = None
747 if doctag is not None:
748 selfTag = doctag
749 else:
750 if self.__name:
751 selfTag = self.__name
752
753 if not selfTag:
754 if namedItemsOnly:
755 return ""
756 else:
757 selfTag = "ITEM"
758
759 out += [ nl, indent, "<", selfTag, ">" ]
760
761 for i,res in enumerate(self.__toklist):
762 if isinstance(res,ParseResults):
763 if i in namedItems:
764 out += [ res.asXML(namedItems[i],
765 namedItemsOnly and doctag is None,
766 nextLevelIndent,
767 formatted)]
768 else:
769 out += [ res.asXML(None,
770 namedItemsOnly and doctag is None,
771 nextLevelIndent,
772 formatted)]
773 else:
774
775 resTag = None
776 if i in namedItems:
777 resTag = namedItems[i]
778 if not resTag:
779 if namedItemsOnly:
780 continue
781 else:
782 resTag = "ITEM"
783 xmlBodyText = _xml_escape(_ustr(res))
784 out += [ nl, nextLevelIndent, "<", resTag, ">",
785 xmlBodyText,
786 "</", resTag, ">" ]
787
788 out += [ nl, indent, "</", selfTag, ">" ]
789 return "".join(out)
790
792 for k,vlist in self.__tokdict.items():
793 for v,loc in vlist:
794 if sub is v:
795 return k
796 return None
797
799 """
800 Returns the results name for this token expression. Useful when several
801 different expressions might match at a particular location.
802
803 Example::
804 integer = Word(nums)
805 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
806 house_number_expr = Suppress('#') + Word(nums, alphanums)
807 user_data = (Group(house_number_expr)("house_number")
808 | Group(ssn_expr)("ssn")
809 | Group(integer)("age"))
810 user_info = OneOrMore(user_data)
811
812 result = user_info.parseString("22 111-22-3333 #221B")
813 for item in result:
814 print(item.getName(), ':', item[0])
815 prints::
816 age : 22
817 ssn : 111-22-3333
818 house_number : 221B
819 """
820 if self.__name:
821 return self.__name
822 elif self.__parent:
823 par = self.__parent()
824 if par:
825 return par.__lookup(self)
826 else:
827 return None
828 elif (len(self) == 1 and
829 len(self.__tokdict) == 1 and
830 self.__tokdict.values()[0][0][1] in (0,-1)):
831 return self.__tokdict.keys()[0]
832 else:
833 return None
834
835 - def dump(self, indent='', depth=0, full=True):
836 """
837 Diagnostic method for listing out the contents of a C{ParseResults}.
838 Accepts an optional C{indent} argument so that this string can be embedded
839 in a nested display of other data.
840
841 Example::
842 integer = Word(nums)
843 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
844
845 result = date_str.parseString('12/31/1999')
846 print(result.dump())
847 prints::
848 ['12', '/', '31', '/', '1999']
849 - day: 1999
850 - month: 31
851 - year: 12
852 """
853 out = []
854 NL = '\n'
855 out.append( indent+_ustr(self.asList()) )
856 if full:
857 if self.haskeys():
858 items = sorted(self.items())
859 for k,v in items:
860 if out:
861 out.append(NL)
862 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
863 if isinstance(v,ParseResults):
864 if v:
865 out.append( v.dump(indent,depth+1) )
866 else:
867 out.append(_ustr(v))
868 else:
869 out.append(_ustr(v))
870 elif any(isinstance(vv,ParseResults) for vv in self):
871 v = self
872 for i,vv in enumerate(v):
873 if isinstance(vv,ParseResults):
874 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
875 else:
876 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
877
878 return "".join(out)
879
880 - def pprint(self, *args, **kwargs):
881 """
882 Pretty-printer for parsed results as a list, using the C{pprint} module.
883 Accepts additional positional or keyword args as defined for the
884 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
885
886 Example::
887 ident = Word(alphas, alphanums)
888 num = Word(nums)
889 func = Forward()
890 term = ident | num | Group('(' + func + ')')
891 func <<= ident + Group(Optional(delimitedList(term)))
892 result = func.parseString("fna a,b,(fnb c,d,200),100")
893 result.pprint(width=40)
894 prints::
895 ['fna',
896 ['a',
897 'b',
898 ['(', 'fnb', ['c', 'd', '200'], ')'],
899 '100']]
900 """
901 pprint.pprint(self.asList(), *args, **kwargs)
902
903
905 return ( self.__toklist,
906 ( self.__tokdict.copy(),
907 self.__parent is not None and self.__parent() or None,
908 self.__accumNames,
909 self.__name ) )
910
912 self.__toklist = state[0]
913 (self.__tokdict,
914 par,
915 inAccumNames,
916 self.__name) = state[1]
917 self.__accumNames = {}
918 self.__accumNames.update(inAccumNames)
919 if par is not None:
920 self.__parent = wkref(par)
921 else:
922 self.__parent = None
923
925 return self.__toklist, self.__name, self.__asList, self.__modal
926
928 return (dir(type(self)) + list(self.keys()))
929
930 collections.MutableMapping.register(ParseResults)
931
932 -def col (loc,strg):
933 """Returns current column within a string, counting newlines as line separators.
934 The first column is number 1.
935
936 Note: the default parsing behavior is to expand tabs in the input string
937 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
938 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
939 consistent view of the parsed string, the parse location, and line and column
940 positions within the parsed string.
941 """
942 s = strg
943 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
944
946 """Returns current line number within a string, counting newlines as line separators.
947 The first line is number 1.
948
949 Note: the default parsing behavior is to expand tabs in the input string
950 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
951 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
952 consistent view of the parsed string, the parse location, and line and column
953 positions within the parsed string.
954 """
955 return strg.count("\n",0,loc) + 1
956
957 -def line( loc, strg ):
958 """Returns the line of text containing loc within a string, counting newlines as line separators.
959 """
960 lastCR = strg.rfind("\n", 0, loc)
961 nextCR = strg.find("\n", loc)
962 if nextCR >= 0:
963 return strg[lastCR+1:nextCR]
964 else:
965 return strg[lastCR+1:]
966
968 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
969
971 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
972
974 print ("Exception raised:" + _ustr(exc))
975
977 """'Do-nothing' debug action, to suppress debugging output during parsing."""
978 pass
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002 'decorator to trim function calls to match the arity of the target'
1004 if func in singleArgBuiltins:
1005 return lambda s,l,t: func(t)
1006 limit = [0]
1007 foundArity = [False]
1008
1009
1010 if system_version[:2] >= (3,5):
1011 def extract_stack():
1012
1013 offset = -3 if system_version == (3,5,0) else -2
1014 frame_summary = traceback.extract_stack()[offset]
1015 return [(frame_summary.filename, frame_summary.lineno)]
1016 def extract_tb(tb):
1017 frames = traceback.extract_tb(tb)
1018 frame_summary = frames[-1]
1019 return [(frame_summary.filename, frame_summary.lineno)]
1020 else:
1021 extract_stack = traceback.extract_stack
1022 extract_tb = traceback.extract_tb
1023
1024
1025
1026
1027 LINE_DIFF = 6
1028
1029
1030 this_line = extract_stack()[-1]
1031 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
1032
1033 def wrapper(*args):
1034 while 1:
1035 try:
1036 ret = func(*args[limit[0]:])
1037 foundArity[0] = True
1038 return ret
1039 except TypeError:
1040
1041 if foundArity[0]:
1042 raise
1043 else:
1044 try:
1045 tb = sys.exc_info()[-1]
1046 if not extract_tb(tb)[-1][:2] == pa_call_line_synth:
1047 raise
1048 finally:
1049 del tb
1050
1051 if limit[0] <= maxargs:
1052 limit[0] += 1
1053 continue
1054 raise
1055
1056
1057 func_name = "<parse action>"
1058 try:
1059 func_name = getattr(func, '__name__',
1060 getattr(func, '__class__').__name__)
1061 except Exception:
1062 func_name = str(func)
1063 wrapper.__name__ = func_name
1064
1065 return wrapper
1066
1068 """Abstract base level parser element class."""
1069 DEFAULT_WHITE_CHARS = " \n\t\r"
1070 verbose_stacktrace = False
1071
1072 @staticmethod
1074 r"""
1075 Overrides the default whitespace chars
1076
1077 Example::
1078 # default whitespace chars are space, <TAB> and newline
1079 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
1080
1081 # change to just treat newline as significant
1082 ParserElement.setDefaultWhitespaceChars(" \t")
1083 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
1084 """
1085 ParserElement.DEFAULT_WHITE_CHARS = chars
1086
1087 @staticmethod
1089 """
1090 Set class to be used for inclusion of string literals into a parser.
1091
1092 Example::
1093 # default literal class used is Literal
1094 integer = Word(nums)
1095 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1096
1097 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1098
1099
1100 # change to Suppress
1101 ParserElement.inlineLiteralsUsing(Suppress)
1102 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1103
1104 date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
1105 """
1106 ParserElement._literalStringClass = cls
1107
1109 self.parseAction = list()
1110 self.failAction = None
1111
1112 self.strRepr = None
1113 self.resultsName = None
1114 self.saveAsList = savelist
1115 self.skipWhitespace = True
1116 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1117 self.copyDefaultWhiteChars = True
1118 self.mayReturnEmpty = False
1119 self.keepTabs = False
1120 self.ignoreExprs = list()
1121 self.debug = False
1122 self.streamlined = False
1123 self.mayIndexError = True
1124 self.errmsg = ""
1125 self.modalResults = True
1126 self.debugActions = ( None, None, None )
1127 self.re = None
1128 self.callPreparse = True
1129 self.callDuringTry = False
1130
1132 """
1133 Make a copy of this C{ParserElement}. Useful for defining different parse actions
1134 for the same parsing pattern, using copies of the original parse element.
1135
1136 Example::
1137 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1138 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
1139 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1140
1141 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1142 prints::
1143 [5120, 100, 655360, 268435456]
1144 Equivalent form of C{expr.copy()} is just C{expr()}::
1145 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1146 """
1147 cpy = copy.copy( self )
1148 cpy.parseAction = self.parseAction[:]
1149 cpy.ignoreExprs = self.ignoreExprs[:]
1150 if self.copyDefaultWhiteChars:
1151 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1152 return cpy
1153
1155 """
1156 Define name for this expression, makes exception messages clearer.
1157
1158 Example::
1159 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1160 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1161 """
1162 self.name = name
1163 self.errmsg = "Expected " + self.name
1164 if hasattr(self,"exception"):
1165 self.exception.msg = self.errmsg
1166 return self
1167
1169 """
1170 Define name for referencing matching tokens as a nested attribute
1171 of the returned parse results.
1172 NOTE: this returns a *copy* of the original C{ParserElement} object;
1173 this is so that the client can define a basic element, such as an
1174 integer, and reference it in multiple places with different names.
1175
1176 You can also set results names using the abbreviated syntax,
1177 C{expr("name")} in place of C{expr.setResultsName("name")} -
1178 see L{I{__call__}<__call__>}.
1179
1180 Example::
1181 date_str = (integer.setResultsName("year") + '/'
1182 + integer.setResultsName("month") + '/'
1183 + integer.setResultsName("day"))
1184
1185 # equivalent form:
1186 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1187 """
1188 newself = self.copy()
1189 if name.endswith("*"):
1190 name = name[:-1]
1191 listAllMatches=True
1192 newself.resultsName = name
1193 newself.modalResults = not listAllMatches
1194 return newself
1195
1197 """Method to invoke the Python pdb debugger when this element is
1198 about to be parsed. Set C{breakFlag} to True to enable, False to
1199 disable.
1200 """
1201 if breakFlag:
1202 _parseMethod = self._parse
1203 def breaker(instring, loc, doActions=True, callPreParse=True):
1204 import pdb
1205 pdb.set_trace()
1206 return _parseMethod( instring, loc, doActions, callPreParse )
1207 breaker._originalParseMethod = _parseMethod
1208 self._parse = breaker
1209 else:
1210 if hasattr(self._parse,"_originalParseMethod"):
1211 self._parse = self._parse._originalParseMethod
1212 return self
1213
1215 """
1216 Define action to perform when successfully matching parse element definition.
1217 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
1218 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
1219 - s = the original string being parsed (see note below)
1220 - loc = the location of the matching substring
1221 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
1222 If the functions in fns modify the tokens, they can return them as the return
1223 value from fn, and the modified list of tokens will replace the original.
1224 Otherwise, fn does not need to return any value.
1225
1226 Optional keyword arguments:
1227 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
1228
1229 Note: the default parsing behavior is to expand tabs in the input string
1230 before starting the parsing process. See L{I{parseString}<parseString>} for more information
1231 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
1232 consistent view of the parsed string, the parse location, and line and column
1233 positions within the parsed string.
1234
1235 Example::
1236 integer = Word(nums)
1237 date_str = integer + '/' + integer + '/' + integer
1238
1239 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1240
1241 # use parse action to convert to ints at parse time
1242 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1243 date_str = integer + '/' + integer + '/' + integer
1244
1245 # note that integer fields are now ints, not strings
1246 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
1247 """
1248 self.parseAction = list(map(_trim_arity, list(fns)))
1249 self.callDuringTry = kwargs.get("callDuringTry", False)
1250 return self
1251
1253 """
1254 Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
1255
1256 See examples in L{I{copy}<copy>}.
1257 """
1258 self.parseAction += list(map(_trim_arity, list(fns)))
1259 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1260 return self
1261
1263 """Add a boolean predicate function to expression's list of parse actions. See
1264 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
1265 functions passed to C{addCondition} need to return boolean success/fail of the condition.
1266
1267 Optional keyword arguments:
1268 - message = define a custom message to be used in the raised exception
1269 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1270
1271 Example::
1272 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1273 year_int = integer.copy()
1274 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1275 date_str = year_int + '/' + integer + '/' + integer
1276
1277 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1278 """
1279 msg = kwargs.get("message", "failed user-defined condition")
1280 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
1281 for fn in fns:
1282 def pa(s,l,t):
1283 if not bool(_trim_arity(fn)(s,l,t)):
1284 raise exc_type(s,l,msg)
1285 self.parseAction.append(pa)
1286 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1287 return self
1288
1290 """Define action to perform if parsing fails at this expression.
1291 Fail acton fn is a callable function that takes the arguments
1292 C{fn(s,loc,expr,err)} where:
1293 - s = string being parsed
1294 - loc = location where expression match was attempted and failed
1295 - expr = the parse expression that failed
1296 - err = the exception thrown
1297 The function returns no value. It may throw C{L{ParseFatalException}}
1298 if it is desired to stop parsing immediately."""
1299 self.failAction = fn
1300 return self
1301
1303 exprsFound = True
1304 while exprsFound:
1305 exprsFound = False
1306 for e in self.ignoreExprs:
1307 try:
1308 while 1:
1309 loc,dummy = e._parse( instring, loc )
1310 exprsFound = True
1311 except ParseException:
1312 pass
1313 return loc
1314
1316 if self.ignoreExprs:
1317 loc = self._skipIgnorables( instring, loc )
1318
1319 if self.skipWhitespace:
1320 wt = self.whiteChars
1321 instrlen = len(instring)
1322 while loc < instrlen and instring[loc] in wt:
1323 loc += 1
1324
1325 return loc
1326
1327 - def parseImpl( self, instring, loc, doActions=True ):
1329
1330 - def postParse( self, instring, loc, tokenlist ):
1332
1333
1334 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1335 debugging = ( self.debug )
1336
1337 if debugging or self.failAction:
1338
1339 if (self.debugActions[0] ):
1340 self.debugActions[0]( instring, loc, self )
1341 if callPreParse and self.callPreparse:
1342 preloc = self.preParse( instring, loc )
1343 else:
1344 preloc = loc
1345 tokensStart = preloc
1346 try:
1347 try:
1348 loc,tokens = self.parseImpl( instring, preloc, doActions )
1349 except IndexError:
1350 raise ParseException( instring, len(instring), self.errmsg, self )
1351 except ParseBaseException as err:
1352
1353 if self.debugActions[2]:
1354 self.debugActions[2]( instring, tokensStart, self, err )
1355 if self.failAction:
1356 self.failAction( instring, tokensStart, self, err )
1357 raise
1358 else:
1359 if callPreParse and self.callPreparse:
1360 preloc = self.preParse( instring, loc )
1361 else:
1362 preloc = loc
1363 tokensStart = preloc
1364 if self.mayIndexError or loc >= len(instring):
1365 try:
1366 loc,tokens = self.parseImpl( instring, preloc, doActions )
1367 except IndexError:
1368 raise ParseException( instring, len(instring), self.errmsg, self )
1369 else:
1370 loc,tokens = self.parseImpl( instring, preloc, doActions )
1371
1372 tokens = self.postParse( instring, loc, tokens )
1373
1374 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1375 if self.parseAction and (doActions or self.callDuringTry):
1376 if debugging:
1377 try:
1378 for fn in self.parseAction:
1379 tokens = fn( instring, tokensStart, retTokens )
1380 if tokens is not None:
1381 retTokens = ParseResults( tokens,
1382 self.resultsName,
1383 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1384 modal=self.modalResults )
1385 except ParseBaseException as err:
1386
1387 if (self.debugActions[2] ):
1388 self.debugActions[2]( instring, tokensStart, self, err )
1389 raise
1390 else:
1391 for fn in self.parseAction:
1392 tokens = fn( instring, tokensStart, retTokens )
1393 if tokens is not None:
1394 retTokens = ParseResults( tokens,
1395 self.resultsName,
1396 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1397 modal=self.modalResults )
1398
1399 if debugging:
1400
1401 if (self.debugActions[1] ):
1402 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1403
1404 return loc, retTokens
1405
1411
1413 try:
1414 self.tryParse(instring, loc)
1415 except (ParseException, IndexError):
1416 return False
1417 else:
1418 return True
1419
1422 cache = {}
1423 self.not_in_cache = not_in_cache = object()
1424
1425 def get(self, key):
1426 return cache.get(key, not_in_cache)
1427
1428 def set(self, key, value):
1429 cache[key] = value
1430
1431 def clear(self):
1432 cache.clear()
1433
1434 self.get = types.MethodType(get, self)
1435 self.set = types.MethodType(set, self)
1436 self.clear = types.MethodType(clear, self)
1437
1438 if _OrderedDict is not None:
1441 self.not_in_cache = not_in_cache = object()
1442
1443 cache = _OrderedDict()
1444
1445 def get(self, key):
1446 return cache.get(key, not_in_cache)
1447
1448 def set(self, key, value):
1449 cache[key] = value
1450 if len(cache) > size:
1451 cache.popitem(False)
1452
1453 def clear(self):
1454 cache.clear()
1455
1456 self.get = types.MethodType(get, self)
1457 self.set = types.MethodType(set, self)
1458 self.clear = types.MethodType(clear, self)
1459
1460 else:
1463 self.not_in_cache = not_in_cache = object()
1464
1465 cache = {}
1466 key_fifo = collections.deque([], size)
1467
1468 def get(self, key):
1469 return cache.get(key, not_in_cache)
1470
1471 def set(self, key, value):
1472 cache[key] = value
1473 if len(cache) > size:
1474 cache.pop(key_fifo.popleft(), None)
1475 key_fifo.append(key)
1476
1477 def clear(self):
1478 cache.clear()
1479 key_fifo.clear()
1480
1481 self.get = types.MethodType(get, self)
1482 self.set = types.MethodType(set, self)
1483 self.clear = types.MethodType(clear, self)
1484
1485
1486 packrat_cache = {}
1487 packrat_cache_lock = RLock()
1488 packrat_cache_stats = [0, 0]
1489
1490
1491
1492 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1493 HIT, MISS = 0, 1
1494 lookup = (self, instring, loc, callPreParse, doActions)
1495 with ParserElement.packrat_cache_lock:
1496 cache = ParserElement.packrat_cache
1497 value = cache.get(lookup)
1498 if value is cache.not_in_cache:
1499 ParserElement.packrat_cache_stats[MISS] += 1
1500 try:
1501 value = self._parseNoCache(instring, loc, doActions, callPreParse)
1502 except ParseBaseException as pe:
1503
1504 cache.set(lookup, pe.__class__(*pe.args))
1505 raise
1506 else:
1507 cache.set(lookup, (value[0], value[1].copy()))
1508 return value
1509 else:
1510 ParserElement.packrat_cache_stats[HIT] += 1
1511 if isinstance(value, Exception):
1512 raise value
1513 return (value[0], value[1].copy())
1514
1515 _parse = _parseNoCache
1516
1517 @staticmethod
1521
1522 _packratEnabled = False
1523 @staticmethod
1525 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1526 Repeated parse attempts at the same string location (which happens
1527 often in many complex grammars) can immediately return a cached value,
1528 instead of re-executing parsing/validating code. Memoizing is done of
1529 both valid results and parsing exceptions.
1530
1531 Parameters:
1532 - cache_size_limit - (default=C{128}) - if an integer value is provided
1533 will limit the size of the packrat cache; if None is passed, then
1534 the cache size will be unbounded; if 0 is passed, the cache will
1535 be effectively disabled.
1536
1537 This speedup may break existing programs that use parse actions that
1538 have side-effects. For this reason, packrat parsing is disabled when
1539 you first import pyparsing. To activate the packrat feature, your
1540 program must call the class method C{ParserElement.enablePackrat()}. If
1541 your program uses C{psyco} to "compile as you go", you must call
1542 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1543 Python will crash. For best results, call C{enablePackrat()} immediately
1544 after importing pyparsing.
1545
1546 Example::
1547 import pyparsing
1548 pyparsing.ParserElement.enablePackrat()
1549 """
1550 if not ParserElement._packratEnabled:
1551 ParserElement._packratEnabled = True
1552 if cache_size_limit is None:
1553 ParserElement.packrat_cache = ParserElement._UnboundedCache()
1554 else:
1555 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1556 ParserElement._parse = ParserElement._parseCache
1557
1559 """
1560 Execute the parse expression with the given string.
1561 This is the main interface to the client code, once the complete
1562 expression has been built.
1563
1564 If you want the grammar to require that the entire input string be
1565 successfully parsed, then set C{parseAll} to True (equivalent to ending
1566 the grammar with C{L{StringEnd()}}).
1567
1568 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1569 in order to report proper column numbers in parse actions.
1570 If the input string contains tabs and
1571 the grammar uses parse actions that use the C{loc} argument to index into the
1572 string being parsed, you can ensure you have a consistent view of the input
1573 string by:
1574 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1575 (see L{I{parseWithTabs}<parseWithTabs>})
1576 - define your parse action using the full C{(s,loc,toks)} signature, and
1577 reference the input string using the parse action's C{s} argument
1578 - explictly expand the tabs in your input string before calling
1579 C{parseString}
1580
1581 Example::
1582 Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
1583 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
1584 """
1585 ParserElement.resetCache()
1586 if not self.streamlined:
1587 self.streamline()
1588
1589 for e in self.ignoreExprs:
1590 e.streamline()
1591 if not self.keepTabs:
1592 instring = instring.expandtabs()
1593 try:
1594 loc, tokens = self._parse( instring, 0 )
1595 if parseAll:
1596 loc = self.preParse( instring, loc )
1597 se = Empty() + StringEnd()
1598 se._parse( instring, loc )
1599 except ParseBaseException as exc:
1600 if ParserElement.verbose_stacktrace:
1601 raise
1602 else:
1603
1604 raise exc
1605 else:
1606 return tokens
1607
1609 """
1610 Scan the input string for expression matches. Each match will return the
1611 matching tokens, start location, and end location. May be called with optional
1612 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1613 C{overlap} is specified, then overlapping matches will be reported.
1614
1615 Note that the start and end locations are reported relative to the string
1616 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1617 strings with embedded tabs.
1618
1619 Example::
1620 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1621 print(source)
1622 for tokens,start,end in Word(alphas).scanString(source):
1623 print(' '*start + '^'*(end-start))
1624 print(' '*start + tokens[0])
1625
1626 prints::
1627
1628 sldjf123lsdjjkf345sldkjf879lkjsfd987
1629 ^^^^^
1630 sldjf
1631 ^^^^^^^
1632 lsdjjkf
1633 ^^^^^^
1634 sldkjf
1635 ^^^^^^
1636 lkjsfd
1637 """
1638 if not self.streamlined:
1639 self.streamline()
1640 for e in self.ignoreExprs:
1641 e.streamline()
1642
1643 if not self.keepTabs:
1644 instring = _ustr(instring).expandtabs()
1645 instrlen = len(instring)
1646 loc = 0
1647 preparseFn = self.preParse
1648 parseFn = self._parse
1649 ParserElement.resetCache()
1650 matches = 0
1651 try:
1652 while loc <= instrlen and matches < maxMatches:
1653 try:
1654 preloc = preparseFn( instring, loc )
1655 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1656 except ParseException:
1657 loc = preloc+1
1658 else:
1659 if nextLoc > loc:
1660 matches += 1
1661 yield tokens, preloc, nextLoc
1662 if overlap:
1663 nextloc = preparseFn( instring, loc )
1664 if nextloc > loc:
1665 loc = nextLoc
1666 else:
1667 loc += 1
1668 else:
1669 loc = nextLoc
1670 else:
1671 loc = preloc+1
1672 except ParseBaseException as exc:
1673 if ParserElement.verbose_stacktrace:
1674 raise
1675 else:
1676
1677 raise exc
1678
1721
1723 """
1724 Another extension to C{L{scanString}}, simplifying the access to the tokens found
1725 to match the given parse expression. May be called with optional
1726 C{maxMatches} argument, to clip searching after 'n' matches are found.
1727
1728 Example::
1729 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1730 cap_word = Word(alphas.upper(), alphas.lower())
1731
1732 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
1733 prints::
1734 ['More', 'Iron', 'Lead', 'Gold', 'I']
1735 """
1736 try:
1737 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1738 except ParseBaseException as exc:
1739 if ParserElement.verbose_stacktrace:
1740 raise
1741 else:
1742
1743 raise exc
1744
1745 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1746 """
1747 Generator method to split a string using the given expression as a separator.
1748 May be called with optional C{maxsplit} argument, to limit the number of splits;
1749 and the optional C{includeSeparators} argument (default=C{False}), if the separating
1750 matching text should be included in the split results.
1751
1752 Example::
1753 punc = oneOf(list(".,;:/-!?"))
1754 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1755 prints::
1756 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1757 """
1758 splits = 0
1759 last = 0
1760 for t,s,e in self.scanString(instring, maxMatches=maxsplit):
1761 yield instring[last:s]
1762 if includeSeparators:
1763 yield t[0]
1764 last = e
1765 yield instring[last:]
1766
1768 """
1769 Implementation of + operator - returns C{L{And}}
1770 """
1771 if isinstance( other, basestring ):
1772 other = ParserElement._literalStringClass( other )
1773 if not isinstance( other, ParserElement ):
1774 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1775 SyntaxWarning, stacklevel=2)
1776 return None
1777 return And( [ self, other ] )
1778
1780 """
1781 Implementation of + operator when left operand is not a C{L{ParserElement}}
1782 """
1783 if isinstance( other, basestring ):
1784 other = ParserElement._literalStringClass( other )
1785 if not isinstance( other, ParserElement ):
1786 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1787 SyntaxWarning, stacklevel=2)
1788 return None
1789 return other + self
1790
1792 """
1793 Implementation of - operator, returns C{L{And}} with error stop
1794 """
1795 if isinstance( other, basestring ):
1796 other = ParserElement._literalStringClass( other )
1797 if not isinstance( other, ParserElement ):
1798 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1799 SyntaxWarning, stacklevel=2)
1800 return None
1801 return And( [ self, And._ErrorStop(), other ] )
1802
1804 """
1805 Implementation of - operator when left operand is not a C{L{ParserElement}}
1806 """
1807 if isinstance( other, basestring ):
1808 other = ParserElement._literalStringClass( other )
1809 if not isinstance( other, ParserElement ):
1810 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1811 SyntaxWarning, stacklevel=2)
1812 return None
1813 return other - self
1814
1816 """
1817 Implementation of * operator, allows use of C{expr * 3} in place of
1818 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1819 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1820 may also include C{None} as in:
1821 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1822 to C{expr*n + L{ZeroOrMore}(expr)}
1823 (read as "at least n instances of C{expr}")
1824 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1825 (read as "0 to n instances of C{expr}")
1826 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1827 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1828
1829 Note that C{expr*(None,n)} does not raise an exception if
1830 more than n exprs exist in the input stream; that is,
1831 C{expr*(None,n)} does not enforce a maximum number of expr
1832 occurrences. If this behavior is desired, then write
1833 C{expr*(None,n) + ~expr}
1834 """
1835 if isinstance(other,int):
1836 minElements, optElements = other,0
1837 elif isinstance(other,tuple):
1838 other = (other + (None, None))[:2]
1839 if other[0] is None:
1840 other = (0, other[1])
1841 if isinstance(other[0],int) and other[1] is None:
1842 if other[0] == 0:
1843 return ZeroOrMore(self)
1844 if other[0] == 1:
1845 return OneOrMore(self)
1846 else:
1847 return self*other[0] + ZeroOrMore(self)
1848 elif isinstance(other[0],int) and isinstance(other[1],int):
1849 minElements, optElements = other
1850 optElements -= minElements
1851 else:
1852 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1853 else:
1854 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1855
1856 if minElements < 0:
1857 raise ValueError("cannot multiply ParserElement by negative value")
1858 if optElements < 0:
1859 raise ValueError("second tuple value must be greater or equal to first tuple value")
1860 if minElements == optElements == 0:
1861 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1862
1863 if (optElements):
1864 def makeOptionalList(n):
1865 if n>1:
1866 return Optional(self + makeOptionalList(n-1))
1867 else:
1868 return Optional(self)
1869 if minElements:
1870 if minElements == 1:
1871 ret = self + makeOptionalList(optElements)
1872 else:
1873 ret = And([self]*minElements) + makeOptionalList(optElements)
1874 else:
1875 ret = makeOptionalList(optElements)
1876 else:
1877 if minElements == 1:
1878 ret = self
1879 else:
1880 ret = And([self]*minElements)
1881 return ret
1882
1885
1887 """
1888 Implementation of | operator - returns C{L{MatchFirst}}
1889 """
1890 if isinstance( other, basestring ):
1891 other = ParserElement._literalStringClass( other )
1892 if not isinstance( other, ParserElement ):
1893 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1894 SyntaxWarning, stacklevel=2)
1895 return None
1896 return MatchFirst( [ self, other ] )
1897
1899 """
1900 Implementation of | operator when left operand is not a C{L{ParserElement}}
1901 """
1902 if isinstance( other, basestring ):
1903 other = ParserElement._literalStringClass( other )
1904 if not isinstance( other, ParserElement ):
1905 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1906 SyntaxWarning, stacklevel=2)
1907 return None
1908 return other | self
1909
1911 """
1912 Implementation of ^ operator - returns C{L{Or}}
1913 """
1914 if isinstance( other, basestring ):
1915 other = ParserElement._literalStringClass( other )
1916 if not isinstance( other, ParserElement ):
1917 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1918 SyntaxWarning, stacklevel=2)
1919 return None
1920 return Or( [ self, other ] )
1921
1923 """
1924 Implementation of ^ operator when left operand is not a C{L{ParserElement}}
1925 """
1926 if isinstance( other, basestring ):
1927 other = ParserElement._literalStringClass( other )
1928 if not isinstance( other, ParserElement ):
1929 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1930 SyntaxWarning, stacklevel=2)
1931 return None
1932 return other ^ self
1933
1935 """
1936 Implementation of & operator - returns C{L{Each}}
1937 """
1938 if isinstance( other, basestring ):
1939 other = ParserElement._literalStringClass( other )
1940 if not isinstance( other, ParserElement ):
1941 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1942 SyntaxWarning, stacklevel=2)
1943 return None
1944 return Each( [ self, other ] )
1945
1947 """
1948 Implementation of & operator when left operand is not a C{L{ParserElement}}
1949 """
1950 if isinstance( other, basestring ):
1951 other = ParserElement._literalStringClass( other )
1952 if not isinstance( other, ParserElement ):
1953 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1954 SyntaxWarning, stacklevel=2)
1955 return None
1956 return other & self
1957
1959 """
1960 Implementation of ~ operator - returns C{L{NotAny}}
1961 """
1962 return NotAny( self )
1963
1965 """
1966 Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}.
1967
1968 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
1969 passed as C{True}.
1970
1971 If C{name} is omitted, same as calling C{L{copy}}.
1972
1973 Example::
1974 # these are equivalent
1975 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1976 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1977 """
1978 if name is not None:
1979 return self.setResultsName(name)
1980 else:
1981 return self.copy()
1982
1984 """
1985 Suppresses the output of this C{ParserElement}; useful to keep punctuation from
1986 cluttering up returned output.
1987 """
1988 return Suppress( self )
1989
1991 """
1992 Disables the skipping of whitespace before matching the characters in the
1993 C{ParserElement}'s defined pattern. This is normally only used internally by
1994 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1995 """
1996 self.skipWhitespace = False
1997 return self
1998
2000 """
2001 Overrides the default whitespace chars
2002 """
2003 self.skipWhitespace = True
2004 self.whiteChars = chars
2005 self.copyDefaultWhiteChars = False
2006 return self
2007
2009 """
2010 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
2011 Must be called before C{parseString} when the input grammar contains elements that
2012 match C{<TAB>} characters.
2013 """
2014 self.keepTabs = True
2015 return self
2016
2018 """
2019 Define expression to be ignored (e.g., comments) while doing pattern
2020 matching; may be called repeatedly, to define multiple comment or other
2021 ignorable patterns.
2022
2023 Example::
2024 patt = OneOrMore(Word(alphas))
2025 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2026
2027 patt.ignore(cStyleComment)
2028 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2029 """
2030 if isinstance(other, basestring):
2031 other = Suppress(other)
2032
2033 if isinstance( other, Suppress ):
2034 if other not in self.ignoreExprs:
2035 self.ignoreExprs.append(other)
2036 else:
2037 self.ignoreExprs.append( Suppress( other.copy() ) )
2038 return self
2039
2040 - def setDebugActions( self, startAction, successAction, exceptionAction ):
2041 """
2042 Enable display of debugging messages while doing pattern matching.
2043 """
2044 self.debugActions = (startAction or _defaultStartDebugAction,
2045 successAction or _defaultSuccessDebugAction,
2046 exceptionAction or _defaultExceptionDebugAction)
2047 self.debug = True
2048 return self
2049
2051 """
2052 Enable display of debugging messages while doing pattern matching.
2053 Set C{flag} to True to enable, False to disable.
2054 """
2055 if flag:
2056 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
2057 else:
2058 self.debug = False
2059 return self
2060
2063
2066
2068 self.streamlined = True
2069 self.strRepr = None
2070 return self
2071
2074
2075 - def validate( self, validateTrace=[] ):
2076 """
2077 Check defined expressions for valid structure, check for infinite recursive definitions.
2078 """
2079 self.checkRecursion( [] )
2080
2081 - def parseFile( self, file_or_filename, parseAll=False ):
2082 """
2083 Execute the parse expression on the given file or filename.
2084 If a filename is specified (instead of a file object),
2085 the entire file is opened, read, and closed before parsing.
2086 """
2087 try:
2088 file_contents = file_or_filename.read()
2089 except AttributeError:
2090 with open(file_or_filename, "r") as f:
2091 file_contents = f.read()
2092 try:
2093 return self.parseString(file_contents, parseAll)
2094 except ParseBaseException as exc:
2095 if ParserElement.verbose_stacktrace:
2096 raise
2097 else:
2098
2099 raise exc
2100
2102 if isinstance(other, ParserElement):
2103 return self is other or vars(self) == vars(other)
2104 elif isinstance(other, basestring):
2105 return self.matches(other)
2106 else:
2107 return super(ParserElement,self)==other
2108
2110 return not (self == other)
2111
2113 return hash(id(self))
2114
2116 return self == other
2117
2119 return not (self == other)
2120
2121 - def matches(self, testString, parseAll=True):
2122 """
2123 Method for quick testing of a parser against a test string. Good for simple
2124 inline microtests of sub expressions while building up larger parser.0
2125
2126 Parameters:
2127 - testString - to test against this expression for a match
2128 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2129
2130 Example::
2131 expr = Word(nums)
2132 assert expr.matches("100")
2133 """
2134 try:
2135 self.parseString(_ustr(testString), parseAll=parseAll)
2136 return True
2137 except ParseBaseException:
2138 return False
2139
2140 - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2141 """
2142 Execute the parse expression on a series of test strings, showing each
2143 test, the parsed results or where the parse failed. Quick and easy way to
2144 run a parse expression against a list of sample strings.
2145
2146 Parameters:
2147 - tests - a list of separate test strings, or a multiline string of test strings
2148 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2149 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
2150 string; pass None to disable comment filtering
2151 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
2152 if False, only dump nested list
2153 - printResults - (default=C{True}) prints test output to stdout
2154 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
2155
2156 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2157 (or failed if C{failureTests} is True), and the results contain a list of lines of each
2158 test's output
2159
2160 Example::
2161 number_expr = pyparsing_common.number.copy()
2162
2163 result = number_expr.runTests('''
2164 # unsigned integer
2165 100
2166 # negative integer
2167 -100
2168 # float with scientific notation
2169 6.02e23
2170 # integer with scientific notation
2171 1e-12
2172 ''')
2173 print("Success" if result[0] else "Failed!")
2174
2175 result = number_expr.runTests('''
2176 # stray character
2177 100Z
2178 # missing leading digit before '.'
2179 -.100
2180 # too many '.'
2181 3.14.159
2182 ''', failureTests=True)
2183 print("Success" if result[0] else "Failed!")
2184 prints::
2185 # unsigned integer
2186 100
2187 [100]
2188
2189 # negative integer
2190 -100
2191 [-100]
2192
2193 # float with scientific notation
2194 6.02e23
2195 [6.02e+23]
2196
2197 # integer with scientific notation
2198 1e-12
2199 [1e-12]
2200
2201 Success
2202
2203 # stray character
2204 100Z
2205 ^
2206 FAIL: Expected end of text (at char 3), (line:1, col:4)
2207
2208 # missing leading digit before '.'
2209 -.100
2210 ^
2211 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2212
2213 # too many '.'
2214 3.14.159
2215 ^
2216 FAIL: Expected end of text (at char 4), (line:1, col:5)
2217
2218 Success
2219 """
2220 if isinstance(tests, basestring):
2221 tests = list(map(str.strip, tests.rstrip().splitlines()))
2222 if isinstance(comment, basestring):
2223 comment = Literal(comment)
2224 allResults = []
2225 comments = []
2226 success = True
2227 for t in tests:
2228 if comment is not None and comment.matches(t, False) or comments and not t:
2229 comments.append(t)
2230 continue
2231 if not t:
2232 continue
2233 out = ['\n'.join(comments), t]
2234 comments = []
2235 try:
2236 result = self.parseString(t, parseAll=parseAll)
2237 out.append(result.dump(full=fullDump))
2238 success = success and not failureTests
2239 except ParseBaseException as pe:
2240 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2241 if '\n' in t:
2242 out.append(line(pe.loc, t))
2243 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
2244 else:
2245 out.append(' '*pe.loc + '^' + fatal)
2246 out.append("FAIL: " + str(pe))
2247 success = success and failureTests
2248 result = pe
2249
2250 if printResults:
2251 if fullDump:
2252 out.append('')
2253 print('\n'.join(out))
2254
2255 allResults.append((t, result))
2256
2257 return success, allResults
2258
2259
2260 -class Token(ParserElement):
2261 """
2262 Abstract C{ParserElement} subclass, for defining atomic matching patterns.
2263 """
2266
2267
2268 -class Empty(Token):
2269 """
2270 An empty token, will always match.
2271 """
2273 super(Empty,self).__init__()
2274 self.name = "Empty"
2275 self.mayReturnEmpty = True
2276 self.mayIndexError = False
2277
2280 """
2281 A token that will never match.
2282 """
2284 super(NoMatch,self).__init__()
2285 self.name = "NoMatch"
2286 self.mayReturnEmpty = True
2287 self.mayIndexError = False
2288 self.errmsg = "Unmatchable token"
2289
2290 - def parseImpl( self, instring, loc, doActions=True ):
2292
2295 """
2296 Token to exactly match a specified string.
2297
2298 Example::
2299 Literal('blah').parseString('blah') # -> ['blah']
2300 Literal('blah').parseString('blahfooblah') # -> ['blah']
2301 Literal('blah').parseString('bla') # -> Exception: Expected "blah"
2302
2303 For case-insensitive matching, use L{CaselessLiteral}.
2304
2305 For keyword matching (force word break before and after the matched string),
2306 use L{Keyword} or L{CaselessKeyword}.
2307 """
2309 super(Literal,self).__init__()
2310 self.match = matchString
2311 self.matchLen = len(matchString)
2312 try:
2313 self.firstMatchChar = matchString[0]
2314 except IndexError:
2315 warnings.warn("null string passed to Literal; use Empty() instead",
2316 SyntaxWarning, stacklevel=2)
2317 self.__class__ = Empty
2318 self.name = '"%s"' % _ustr(self.match)
2319 self.errmsg = "Expected " + self.name
2320 self.mayReturnEmpty = False
2321 self.mayIndexError = False
2322
2323
2324
2325
2326
2327 - def parseImpl( self, instring, loc, doActions=True ):
2328 if (instring[loc] == self.firstMatchChar and
2329 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
2330 return loc+self.matchLen, self.match
2331 raise ParseException(instring, loc, self.errmsg, self)
2332 _L = Literal
2333 ParserElement._literalStringClass = Literal
2336 """
2337 Token to exactly match a specified string as a keyword, that is, it must be
2338 immediately followed by a non-keyword character. Compare with C{L{Literal}}:
2339 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
2340 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
2341 Accepts two optional constructor arguments in addition to the keyword string:
2342 - C{identChars} is a string of characters that would be valid identifier characters,
2343 defaulting to all alphanumerics + "_" and "$"
2344 - C{caseless} allows case-insensitive matching, default is C{False}.
2345
2346 Example::
2347 Keyword("start").parseString("start") # -> ['start']
2348 Keyword("start").parseString("starting") # -> Exception
2349
2350 For case-insensitive matching, use L{CaselessKeyword}.
2351 """
2352 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
2353
2355 super(Keyword,self).__init__()
2356 self.match = matchString
2357 self.matchLen = len(matchString)
2358 try:
2359 self.firstMatchChar = matchString[0]
2360 except IndexError:
2361 warnings.warn("null string passed to Keyword; use Empty() instead",
2362 SyntaxWarning, stacklevel=2)
2363 self.name = '"%s"' % self.match
2364 self.errmsg = "Expected " + self.name
2365 self.mayReturnEmpty = False
2366 self.mayIndexError = False
2367 self.caseless = caseless
2368 if caseless:
2369 self.caselessmatch = matchString.upper()
2370 identChars = identChars.upper()
2371 self.identChars = set(identChars)
2372
2373 - def parseImpl( self, instring, loc, doActions=True ):
2374 if self.caseless:
2375 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2376 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
2377 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
2378 return loc+self.matchLen, self.match
2379 else:
2380 if (instring[loc] == self.firstMatchChar and
2381 (self.matchLen==1 or instring.startswith(self.match,loc)) and
2382 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
2383 (loc == 0 or instring[loc-1] not in self.identChars) ):
2384 return loc+self.matchLen, self.match
2385 raise ParseException(instring, loc, self.errmsg, self)
2386
2391
2392 @staticmethod
2397
2399 """
2400 Token to match a specified string, ignoring case of letters.
2401 Note: the matched results will always be in the case of the given
2402 match string, NOT the case of the input text.
2403
2404 Example::
2405 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2406
2407 (Contrast with example for L{CaselessKeyword}.)
2408 """
2410 super(CaselessLiteral,self).__init__( matchString.upper() )
2411
2412 self.returnString = matchString
2413 self.name = "'%s'" % self.returnString
2414 self.errmsg = "Expected " + self.name
2415
2416 - def parseImpl( self, instring, loc, doActions=True ):
2417 if instring[ loc:loc+self.matchLen ].upper() == self.match:
2418 return loc+self.matchLen, self.returnString
2419 raise ParseException(instring, loc, self.errmsg, self)
2420
2422 """
2423 Caseless version of L{Keyword}.
2424
2425 Example::
2426 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
2427
2428 (Contrast with example for L{CaselessLiteral}.)
2429 """
2432
2433 - def parseImpl( self, instring, loc, doActions=True ):
2434 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2435 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
2436 return loc+self.matchLen, self.match
2437 raise ParseException(instring, loc, self.errmsg, self)
2438
2440 """
2441 Token for matching words composed of allowed character sets.
2442 Defined with string containing all allowed initial characters,
2443 an optional string containing allowed body characters (if omitted,
2444 defaults to the initial character set), and an optional minimum,
2445 maximum, and/or exact length. The default value for C{min} is 1 (a
2446 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2447 are 0, meaning no maximum or exact length restriction. An optional
2448 C{excludeChars} parameter can list characters that might be found in
2449 the input C{bodyChars} string; useful to define a word of all printables
2450 except for one or two characters, for instance.
2451
2452 L{srange} is useful for defining custom character set strings for defining
2453 C{Word} expressions, using range notation from regular expression character sets.
2454
2455 A common mistake is to use C{Word} to match a specific literal string, as in
2456 C{Word("Address")}. Remember that C{Word} uses the string argument to define
2457 I{sets} of matchable characters. This expression would match "Add", "AAA",
2458 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
2459 To match an exact literal string, use L{Literal} or L{Keyword}.
2460
2461 pyparsing includes helper strings for building Words:
2462 - L{alphas}
2463 - L{nums}
2464 - L{alphanums}
2465 - L{hexnums}
2466 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
2467 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2468 - L{printables} (any non-whitespace character)
2469
2470 Example::
2471 # a word composed of digits
2472 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2473
2474 # a word with a leading capital, and zero or more lowercase
2475 capital_word = Word(alphas.upper(), alphas.lower())
2476
2477 # hostnames are alphanumeric, with leading alpha, and '-'
2478 hostname = Word(alphas, alphanums+'-')
2479
2480 # roman numeral (not a strict parser, accepts invalid mix of characters)
2481 roman = Word("IVXLCDM")
2482
2483 # any string of non-whitespace characters, except for ','
2484 csv_value = Word(printables, excludeChars=",")
2485 """
2486 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2487 super(Word,self).__init__()
2488 if excludeChars:
2489 initChars = ''.join(c for c in initChars if c not in excludeChars)
2490 if bodyChars:
2491 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
2492 self.initCharsOrig = initChars
2493 self.initChars = set(initChars)
2494 if bodyChars :
2495 self.bodyCharsOrig = bodyChars
2496 self.bodyChars = set(bodyChars)
2497 else:
2498 self.bodyCharsOrig = initChars
2499 self.bodyChars = set(initChars)
2500
2501 self.maxSpecified = max > 0
2502
2503 if min < 1:
2504 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
2505
2506 self.minLen = min
2507
2508 if max > 0:
2509 self.maxLen = max
2510 else:
2511 self.maxLen = _MAX_INT
2512
2513 if exact > 0:
2514 self.maxLen = exact
2515 self.minLen = exact
2516
2517 self.name = _ustr(self)
2518 self.errmsg = "Expected " + self.name
2519 self.mayIndexError = False
2520 self.asKeyword = asKeyword
2521
2522 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
2523 if self.bodyCharsOrig == self.initCharsOrig:
2524 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
2525 elif len(self.initCharsOrig) == 1:
2526 self.reString = "%s[%s]*" % \
2527 (re.escape(self.initCharsOrig),
2528 _escapeRegexRangeChars(self.bodyCharsOrig),)
2529 else:
2530 self.reString = "[%s][%s]*" % \
2531 (_escapeRegexRangeChars(self.initCharsOrig),
2532 _escapeRegexRangeChars(self.bodyCharsOrig),)
2533 if self.asKeyword:
2534 self.reString = r"\b"+self.reString+r"\b"
2535 try:
2536 self.re = re.compile( self.reString )
2537 except:
2538 self.re = None
2539
2540 - def parseImpl( self, instring, loc, doActions=True ):
2541 if self.re:
2542 result = self.re.match(instring,loc)
2543 if not result:
2544 raise ParseException(instring, loc, self.errmsg, self)
2545
2546 loc = result.end()
2547 return loc, result.group()
2548
2549 if not(instring[ loc ] in self.initChars):
2550 raise ParseException(instring, loc, self.errmsg, self)
2551
2552 start = loc
2553 loc += 1
2554 instrlen = len(instring)
2555 bodychars = self.bodyChars
2556 maxloc = start + self.maxLen
2557 maxloc = min( maxloc, instrlen )
2558 while loc < maxloc and instring[loc] in bodychars:
2559 loc += 1
2560
2561 throwException = False
2562 if loc - start < self.minLen:
2563 throwException = True
2564 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2565 throwException = True
2566 if self.asKeyword:
2567 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
2568 throwException = True
2569
2570 if throwException:
2571 raise ParseException(instring, loc, self.errmsg, self)
2572
2573 return loc, instring[start:loc]
2574
2576 try:
2577 return super(Word,self).__str__()
2578 except:
2579 pass
2580
2581
2582 if self.strRepr is None:
2583
2584 def charsAsStr(s):
2585 if len(s)>4:
2586 return s[:4]+"..."
2587 else:
2588 return s
2589
2590 if ( self.initCharsOrig != self.bodyCharsOrig ):
2591 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
2592 else:
2593 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
2594
2595 return self.strRepr
2596
2597
2598 -class Regex(Token):
2599 """
2600 Token for matching strings that match a given regular expression.
2601 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
2602
2603 Example::
2604 realnum = Regex(r"[+-]?\d+\.\d*")
2605 ssn = Regex(r"\d\d\d-\d\d-\d\d\d\d")
2606 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
2607 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
2608 """
2609 compiledREtype = type(re.compile("[A-Z]"))
2610 - def __init__( self, pattern, flags=0):
2611 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
2612 super(Regex,self).__init__()
2613
2614 if isinstance(pattern, basestring):
2615 if not pattern:
2616 warnings.warn("null string passed to Regex; use Empty() instead",
2617 SyntaxWarning, stacklevel=2)
2618
2619 self.pattern = pattern
2620 self.flags = flags
2621
2622 try:
2623 self.re = re.compile(self.pattern, self.flags)
2624 self.reString = self.pattern
2625 except sre_constants.error:
2626 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
2627 SyntaxWarning, stacklevel=2)
2628 raise
2629
2630 elif isinstance(pattern, Regex.compiledREtype):
2631 self.re = pattern
2632 self.pattern = \
2633 self.reString = str(pattern)
2634 self.flags = flags
2635
2636 else:
2637 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
2638
2639 self.name = _ustr(self)
2640 self.errmsg = "Expected " + self.name
2641 self.mayIndexError = False
2642 self.mayReturnEmpty = True
2643
2644 - def parseImpl( self, instring, loc, doActions=True ):
2645 result = self.re.match(instring,loc)
2646 if not result:
2647 raise ParseException(instring, loc, self.errmsg, self)
2648
2649 loc = result.end()
2650 d = result.groupdict()
2651 ret = ParseResults(result.group())
2652 if d:
2653 for k in d:
2654 ret[k] = d[k]
2655 return loc,ret
2656
2658 try:
2659 return super(Regex,self).__str__()
2660 except:
2661 pass
2662
2663 if self.strRepr is None:
2664 self.strRepr = "Re:(%s)" % repr(self.pattern)
2665
2666 return self.strRepr
2667
2670 r"""
2671 Token for matching strings that are delimited by quoting characters.
2672
2673 Defined with the following parameters:
2674 - quoteChar - string of one or more characters defining the quote delimiting string
2675 - escChar - character to escape quotes, typically backslash (default=C{None})
2676 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
2677 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
2678 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
2679 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
2680 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
2681
2682 Example::
2683 qs = QuotedString('"')
2684 print(qs.searchString('lsjdf "This is the quote" sldjf'))
2685 complex_qs = QuotedString('{{', endQuoteChar='}}')
2686 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
2687 sql_qs = QuotedString('"', escQuote='""')
2688 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
2689 prints::
2690 [['This is the quote']]
2691 [['This is the "quote"']]
2692 [['This is the quote with "embedded" quotes']]
2693 """
2694 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2695 super(QuotedString,self).__init__()
2696
2697
2698 quoteChar = quoteChar.strip()
2699 if not quoteChar:
2700 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2701 raise SyntaxError()
2702
2703 if endQuoteChar is None:
2704 endQuoteChar = quoteChar
2705 else:
2706 endQuoteChar = endQuoteChar.strip()
2707 if not endQuoteChar:
2708 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2709 raise SyntaxError()
2710
2711 self.quoteChar = quoteChar
2712 self.quoteCharLen = len(quoteChar)
2713 self.firstQuoteChar = quoteChar[0]
2714 self.endQuoteChar = endQuoteChar
2715 self.endQuoteCharLen = len(endQuoteChar)
2716 self.escChar = escChar
2717 self.escQuote = escQuote
2718 self.unquoteResults = unquoteResults
2719 self.convertWhitespaceEscapes = convertWhitespaceEscapes
2720
2721 if multiline:
2722 self.flags = re.MULTILINE | re.DOTALL
2723 self.pattern = r'%s(?:[^%s%s]' % \
2724 ( re.escape(self.quoteChar),
2725 _escapeRegexRangeChars(self.endQuoteChar[0]),
2726 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2727 else:
2728 self.flags = 0
2729 self.pattern = r'%s(?:[^%s\n\r%s]' % \
2730 ( re.escape(self.quoteChar),
2731 _escapeRegexRangeChars(self.endQuoteChar[0]),
2732 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2733 if len(self.endQuoteChar) > 1:
2734 self.pattern += (
2735 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2736 _escapeRegexRangeChars(self.endQuoteChar[i]))
2737 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2738 )
2739 if escQuote:
2740 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2741 if escChar:
2742 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2743 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2744 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2745
2746 try:
2747 self.re = re.compile(self.pattern, self.flags)
2748 self.reString = self.pattern
2749 except sre_constants.error:
2750 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2751 SyntaxWarning, stacklevel=2)
2752 raise
2753
2754 self.name = _ustr(self)
2755 self.errmsg = "Expected " + self.name
2756 self.mayIndexError = False
2757 self.mayReturnEmpty = True
2758
2759 - def parseImpl( self, instring, loc, doActions=True ):
2760 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2761 if not result:
2762 raise ParseException(instring, loc, self.errmsg, self)
2763
2764 loc = result.end()
2765 ret = result.group()
2766
2767 if self.unquoteResults:
2768
2769
2770 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2771
2772 if isinstance(ret,basestring):
2773
2774 if '\\' in ret and self.convertWhitespaceEscapes:
2775 ws_map = {
2776 r'\t' : '\t',
2777 r'\n' : '\n',
2778 r'\f' : '\f',
2779 r'\r' : '\r',
2780 }
2781 for wslit,wschar in ws_map.items():
2782 ret = ret.replace(wslit, wschar)
2783
2784
2785 if self.escChar:
2786 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
2787
2788
2789 if self.escQuote:
2790 ret = ret.replace(self.escQuote, self.endQuoteChar)
2791
2792 return loc, ret
2793
2795 try:
2796 return super(QuotedString,self).__str__()
2797 except:
2798 pass
2799
2800 if self.strRepr is None:
2801 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2802
2803 return self.strRepr
2804
2807 """
2808 Token for matching words composed of characters *not* in a given set (will
2809 include whitespace in matched characters if not listed in the provided exclusion set - see example).
2810 Defined with string containing all disallowed characters, and an optional
2811 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
2812 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2813 are 0, meaning no maximum or exact length restriction.
2814
2815 Example::
2816 # define a comma-separated-value as anything that is not a ','
2817 csv_value = CharsNotIn(',')
2818 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
2819 prints::
2820 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
2821 """
2822 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2823 super(CharsNotIn,self).__init__()
2824 self.skipWhitespace = False
2825 self.notChars = notChars
2826
2827 if min < 1:
2828 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2829
2830 self.minLen = min
2831
2832 if max > 0:
2833 self.maxLen = max
2834 else:
2835 self.maxLen = _MAX_INT
2836
2837 if exact > 0:
2838 self.maxLen = exact
2839 self.minLen = exact
2840
2841 self.name = _ustr(self)
2842 self.errmsg = "Expected " + self.name
2843 self.mayReturnEmpty = ( self.minLen == 0 )
2844 self.mayIndexError = False
2845
2846 - def parseImpl( self, instring, loc, doActions=True ):
2847 if instring[loc] in self.notChars:
2848 raise ParseException(instring, loc, self.errmsg, self)
2849
2850 start = loc
2851 loc += 1
2852 notchars = self.notChars
2853 maxlen = min( start+self.maxLen, len(instring) )
2854 while loc < maxlen and \
2855 (instring[loc] not in notchars):
2856 loc += 1
2857
2858 if loc - start < self.minLen:
2859 raise ParseException(instring, loc, self.errmsg, self)
2860
2861 return loc, instring[start:loc]
2862
2864 try:
2865 return super(CharsNotIn, self).__str__()
2866 except:
2867 pass
2868
2869 if self.strRepr is None:
2870 if len(self.notChars) > 4:
2871 self.strRepr = "!W:(%s...)" % self.notChars[:4]
2872 else:
2873 self.strRepr = "!W:(%s)" % self.notChars
2874
2875 return self.strRepr
2876
2878 """
2879 Special matching class for matching whitespace. Normally, whitespace is ignored
2880 by pyparsing grammars. This class is included when some whitespace structures
2881 are significant. Define with a string containing the whitespace characters to be
2882 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
2883 as defined for the C{L{Word}} class.
2884 """
2885 whiteStrs = {
2886 " " : "<SPC>",
2887 "\t": "<TAB>",
2888 "\n": "<LF>",
2889 "\r": "<CR>",
2890 "\f": "<FF>",
2891 }
2892 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2893 super(White,self).__init__()
2894 self.matchWhite = ws
2895 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
2896
2897 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
2898 self.mayReturnEmpty = True
2899 self.errmsg = "Expected " + self.name
2900
2901 self.minLen = min
2902
2903 if max > 0:
2904 self.maxLen = max
2905 else:
2906 self.maxLen = _MAX_INT
2907
2908 if exact > 0:
2909 self.maxLen = exact
2910 self.minLen = exact
2911
2912 - def parseImpl( self, instring, loc, doActions=True ):
2913 if not(instring[ loc ] in self.matchWhite):
2914 raise ParseException(instring, loc, self.errmsg, self)
2915 start = loc
2916 loc += 1
2917 maxloc = start + self.maxLen
2918 maxloc = min( maxloc, len(instring) )
2919 while loc < maxloc and instring[loc] in self.matchWhite:
2920 loc += 1
2921
2922 if loc - start < self.minLen:
2923 raise ParseException(instring, loc, self.errmsg, self)
2924
2925 return loc, instring[start:loc]
2926
2930 super(_PositionToken,self).__init__()
2931 self.name=self.__class__.__name__
2932 self.mayReturnEmpty = True
2933 self.mayIndexError = False
2934
2936 """
2937 Token to advance to a specific column of input text; useful for tabular report scraping.
2938 """
2942
2944 if col(loc,instring) != self.col:
2945 instrlen = len(instring)
2946 if self.ignoreExprs:
2947 loc = self._skipIgnorables( instring, loc )
2948 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2949 loc += 1
2950 return loc
2951
2952 - def parseImpl( self, instring, loc, doActions=True ):
2953 thiscol = col( loc, instring )
2954 if thiscol > self.col:
2955 raise ParseException( instring, loc, "Text not in expected column", self )
2956 newloc = loc + self.col - thiscol
2957 ret = instring[ loc: newloc ]
2958 return newloc, ret
2959
2961 """
2962 Matches if current position is at the beginning of a line within the parse string
2963 """
2968
2970 preloc = super(LineStart,self).preParse(instring,loc)
2971 if instring[preloc] == "\n":
2972 loc += 1
2973 return loc
2974
2975 - def parseImpl( self, instring, loc, doActions=True ):
2976 if not( loc==0 or
2977 (loc == self.preParse( instring, 0 )) or
2978 (instring[loc-1] == "\n") ):
2979 raise ParseException(instring, loc, self.errmsg, self)
2980 return loc, []
2981
2983 """
2984 Matches if current position is at the end of a line within the parse string
2985 """
2990
2991 - def parseImpl( self, instring, loc, doActions=True ):
2992 if loc<len(instring):
2993 if instring[loc] == "\n":
2994 return loc+1, "\n"
2995 else:
2996 raise ParseException(instring, loc, self.errmsg, self)
2997 elif loc == len(instring):
2998 return loc+1, []
2999 else:
3000 raise ParseException(instring, loc, self.errmsg, self)
3001
3003 """
3004 Matches if current position is at the beginning of the parse string
3005 """
3009
3010 - def parseImpl( self, instring, loc, doActions=True ):
3011 if loc != 0:
3012
3013 if loc != self.preParse( instring, 0 ):
3014 raise ParseException(instring, loc, self.errmsg, self)
3015 return loc, []
3016
3018 """
3019 Matches if current position is at the end of the parse string
3020 """
3024
3025 - def parseImpl( self, instring, loc, doActions=True ):
3026 if loc < len(instring):
3027 raise ParseException(instring, loc, self.errmsg, self)
3028 elif loc == len(instring):
3029 return loc+1, []
3030 elif loc > len(instring):
3031 return loc, []
3032 else:
3033 raise ParseException(instring, loc, self.errmsg, self)
3034
3036 """
3037 Matches if the current position is at the beginning of a Word, and
3038 is not preceded by any character in a given set of C{wordChars}
3039 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3040 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
3041 the string being parsed, or at the beginning of a line.
3042 """
3044 super(WordStart,self).__init__()
3045 self.wordChars = set(wordChars)
3046 self.errmsg = "Not at the start of a word"
3047
3048 - def parseImpl(self, instring, loc, doActions=True ):
3049 if loc != 0:
3050 if (instring[loc-1] in self.wordChars or
3051 instring[loc] not in self.wordChars):
3052 raise ParseException(instring, loc, self.errmsg, self)
3053 return loc, []
3054
3056 """
3057 Matches if the current position is at the end of a Word, and
3058 is not followed by any character in a given set of C{wordChars}
3059 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3060 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
3061 the string being parsed, or at the end of a line.
3062 """
3064 super(WordEnd,self).__init__()
3065 self.wordChars = set(wordChars)
3066 self.skipWhitespace = False
3067 self.errmsg = "Not at the end of a word"
3068
3069 - def parseImpl(self, instring, loc, doActions=True ):
3070 instrlen = len(instring)
3071 if instrlen>0 and loc<instrlen:
3072 if (instring[loc] in self.wordChars or
3073 instring[loc-1] not in self.wordChars):
3074 raise ParseException(instring, loc, self.errmsg, self)
3075 return loc, []
3076
3079 """
3080 Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
3081 """
3082 - def __init__( self, exprs, savelist = False ):
3083 super(ParseExpression,self).__init__(savelist)
3084 if isinstance( exprs, _generatorType ):
3085 exprs = list(exprs)
3086
3087 if isinstance( exprs, basestring ):
3088 self.exprs = [ ParserElement._literalStringClass( exprs ) ]
3089 elif isinstance( exprs, collections.Sequence ):
3090
3091 if all(isinstance(expr, basestring) for expr in exprs):
3092 exprs = map(ParserElement._literalStringClass, exprs)
3093 self.exprs = list(exprs)
3094 else:
3095 try:
3096 self.exprs = list( exprs )
3097 except TypeError:
3098 self.exprs = [ exprs ]
3099 self.callPreparse = False
3100
3102 return self.exprs[i]
3103
3105 self.exprs.append( other )
3106 self.strRepr = None
3107 return self
3108
3110 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
3111 all contained expressions."""
3112 self.skipWhitespace = False
3113 self.exprs = [ e.copy() for e in self.exprs ]
3114 for e in self.exprs:
3115 e.leaveWhitespace()
3116 return self
3117
3119 if isinstance( other, Suppress ):
3120 if other not in self.ignoreExprs:
3121 super( ParseExpression, self).ignore( other )
3122 for e in self.exprs:
3123 e.ignore( self.ignoreExprs[-1] )
3124 else:
3125 super( ParseExpression, self).ignore( other )
3126 for e in self.exprs:
3127 e.ignore( self.ignoreExprs[-1] )
3128 return self
3129
3131 try:
3132 return super(ParseExpression,self).__str__()
3133 except:
3134 pass
3135
3136 if self.strRepr is None:
3137 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
3138 return self.strRepr
3139
3141 super(ParseExpression,self).streamline()
3142
3143 for e in self.exprs:
3144 e.streamline()
3145
3146
3147
3148
3149 if ( len(self.exprs) == 2 ):
3150 other = self.exprs[0]
3151 if ( isinstance( other, self.__class__ ) and
3152 not(other.parseAction) and
3153 other.resultsName is None and
3154 not other.debug ):
3155 self.exprs = other.exprs[:] + [ self.exprs[1] ]
3156 self.strRepr = None
3157 self.mayReturnEmpty |= other.mayReturnEmpty
3158 self.mayIndexError |= other.mayIndexError
3159
3160 other = self.exprs[-1]
3161 if ( isinstance( other, self.__class__ ) and
3162 not(other.parseAction) and
3163 other.resultsName is None and
3164 not other.debug ):
3165 self.exprs = self.exprs[:-1] + other.exprs[:]
3166 self.strRepr = None
3167 self.mayReturnEmpty |= other.mayReturnEmpty
3168 self.mayIndexError |= other.mayIndexError
3169
3170 self.errmsg = "Expected " + _ustr(self)
3171
3172 return self
3173
3177
3178 - def validate( self, validateTrace=[] ):
3179 tmp = validateTrace[:]+[self]
3180 for e in self.exprs:
3181 e.validate(tmp)
3182 self.checkRecursion( [] )
3183
3188
3189 -class And(ParseExpression):
3190 """
3191 Requires all given C{ParseExpression}s to be found in the given order.
3192 Expressions may be separated by whitespace.
3193 May be constructed using the C{'+'} operator.
3194 May also be constructed using the C{'-'} operator, which will suppress backtracking.
3195
3196 Example::
3197 integer = Word(nums)
3198 name_expr = OneOrMore(Word(alphas))
3199
3200 expr = And([integer("id"),name_expr("name"),integer("age")])
3201 # more easily written as:
3202 expr = integer("id") + name_expr("name") + integer("age")
3203 """
3204
3210
3211 - def __init__( self, exprs, savelist = True ):
3212 super(And,self).__init__(exprs, savelist)
3213 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3214 self.setWhitespaceChars( self.exprs[0].whiteChars )
3215 self.skipWhitespace = self.exprs[0].skipWhitespace
3216 self.callPreparse = True
3217
3218 - def parseImpl( self, instring, loc, doActions=True ):
3219
3220
3221 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
3222 errorStop = False
3223 for e in self.exprs[1:]:
3224 if isinstance(e, And._ErrorStop):
3225 errorStop = True
3226 continue
3227 if errorStop:
3228 try:
3229 loc, exprtokens = e._parse( instring, loc, doActions )
3230 except ParseSyntaxException:
3231 raise
3232 except ParseBaseException as pe:
3233 pe.__traceback__ = None
3234 raise ParseSyntaxException(pe)
3235 except IndexError:
3236 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
3237 else:
3238 loc, exprtokens = e._parse( instring, loc, doActions )
3239 if exprtokens or exprtokens.haskeys():
3240 resultlist += exprtokens
3241 return loc, resultlist
3242
3244 if isinstance( other, basestring ):
3245 other = ParserElement._literalStringClass( other )
3246 return self.append( other )
3247
3249 subRecCheckList = parseElementList[:] + [ self ]
3250 for e in self.exprs:
3251 e.checkRecursion( subRecCheckList )
3252 if not e.mayReturnEmpty:
3253 break
3254
3256 if hasattr(self,"name"):
3257 return self.name
3258
3259 if self.strRepr is None:
3260 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
3261
3262 return self.strRepr
3263
3264
3265 -class Or(ParseExpression):
3266 """
3267 Requires that at least one C{ParseExpression} is found.
3268 If two expressions match, the expression that matches the longest string will be used.
3269 May be constructed using the C{'^'} operator.
3270
3271 Example::
3272 # construct Or using '^' operator
3273
3274 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
3275 print(number.searchString("123 3.1416 789"))
3276 prints::
3277 [['123'], ['3.1416'], ['789']]
3278 """
3279 - def __init__( self, exprs, savelist = False ):
3280 super(Or,self).__init__(exprs, savelist)
3281 if self.exprs:
3282 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3283 else:
3284 self.mayReturnEmpty = True
3285
3286 - def parseImpl( self, instring, loc, doActions=True ):
3287 maxExcLoc = -1
3288 maxException = None
3289 matches = []
3290 for e in self.exprs:
3291 try:
3292 loc2 = e.tryParse( instring, loc )
3293 except ParseException as err:
3294 err.__traceback__ = None
3295 if err.loc > maxExcLoc:
3296 maxException = err
3297 maxExcLoc = err.loc
3298 except IndexError:
3299 if len(instring) > maxExcLoc:
3300 maxException = ParseException(instring,len(instring),e.errmsg,self)
3301 maxExcLoc = len(instring)
3302 else:
3303
3304 matches.append((loc2, e))
3305
3306 if matches:
3307 matches.sort(key=lambda x: -x[0])
3308 for _,e in matches:
3309 try:
3310 return e._parse( instring, loc, doActions )
3311 except ParseException as err:
3312 err.__traceback__ = None
3313 if err.loc > maxExcLoc:
3314 maxException = err
3315 maxExcLoc = err.loc
3316
3317 if maxException is not None:
3318 maxException.msg = self.errmsg
3319 raise maxException
3320 else:
3321 raise ParseException(instring, loc, "no defined alternatives to match", self)
3322
3323
3325 if isinstance( other, basestring ):
3326 other = ParserElement._literalStringClass( other )
3327 return self.append( other )
3328
3330 if hasattr(self,"name"):
3331 return self.name
3332
3333 if self.strRepr is None:
3334 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
3335
3336 return self.strRepr
3337
3339 subRecCheckList = parseElementList[:] + [ self ]
3340 for e in self.exprs:
3341 e.checkRecursion( subRecCheckList )
3342
3345 """
3346 Requires that at least one C{ParseExpression} is found.
3347 If two expressions match, the first one listed is the one that will match.
3348 May be constructed using the C{'|'} operator.
3349
3350 Example::
3351 # construct MatchFirst using '|' operator
3352
3353 # watch the order of expressions to match
3354 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
3355 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
3356
3357 # put more selective expression first
3358 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
3359 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
3360 """
3361 - def __init__( self, exprs, savelist = False ):
3362 super(MatchFirst,self).__init__(exprs, savelist)
3363 if self.exprs:
3364 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3365 else:
3366 self.mayReturnEmpty = True
3367
3368 - def parseImpl( self, instring, loc, doActions=True ):
3369 maxExcLoc = -1
3370 maxException = None
3371 for e in self.exprs:
3372 try:
3373 ret = e._parse( instring, loc, doActions )
3374 return ret
3375 except ParseException as err:
3376 if err.loc > maxExcLoc:
3377 maxException = err
3378 maxExcLoc = err.loc
3379 except IndexError:
3380 if len(instring) > maxExcLoc:
3381 maxException = ParseException(instring,len(instring),e.errmsg,self)
3382 maxExcLoc = len(instring)
3383
3384
3385 else:
3386 if maxException is not None:
3387 maxException.msg = self.errmsg
3388 raise maxException
3389 else:
3390 raise ParseException(instring, loc, "no defined alternatives to match", self)
3391
3393 if isinstance( other, basestring ):
3394 other = ParserElement._literalStringClass( other )
3395 return self.append( other )
3396
3398 if hasattr(self,"name"):
3399 return self.name
3400
3401 if self.strRepr is None:
3402 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
3403
3404 return self.strRepr
3405
3407 subRecCheckList = parseElementList[:] + [ self ]
3408 for e in self.exprs:
3409 e.checkRecursion( subRecCheckList )
3410
3411
3412 -class Each(ParseExpression):
3413 """
3414 Requires all given C{ParseExpression}s to be found, but in any order.
3415 Expressions may be separated by whitespace.
3416 May be constructed using the C{'&'} operator.
3417
3418 Example::
3419 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
3420 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
3421 integer = Word(nums)
3422 shape_attr = "shape:" + shape_type("shape")
3423 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
3424 color_attr = "color:" + color("color")
3425 size_attr = "size:" + integer("size")
3426
3427 # use Each (using operator '&') to accept attributes in any order
3428 # (shape and posn are required, color and size are optional)
3429 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
3430
3431 shape_spec.runTests('''
3432 shape: SQUARE color: BLACK posn: 100, 120
3433 shape: CIRCLE size: 50 color: BLUE posn: 50,80
3434 color:GREEN size:20 shape:TRIANGLE posn:20,40
3435 '''
3436 )
3437 prints::
3438 shape: SQUARE color: BLACK posn: 100, 120
3439 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
3440 - color: BLACK
3441 - posn: ['100', ',', '120']
3442 - x: 100
3443 - y: 120
3444 - shape: SQUARE
3445
3446
3447 shape: CIRCLE size: 50 color: BLUE posn: 50,80
3448 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
3449 - color: BLUE
3450 - posn: ['50', ',', '80']
3451 - x: 50
3452 - y: 80
3453 - shape: CIRCLE
3454 - size: 50
3455
3456
3457 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
3458 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
3459 - color: GREEN
3460 - posn: ['20', ',', '40']
3461 - x: 20
3462 - y: 40
3463 - shape: TRIANGLE
3464 - size: 20
3465 """
3466 - def __init__( self, exprs, savelist = True ):
3467 super(Each,self).__init__(exprs, savelist)
3468 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3469 self.skipWhitespace = True
3470 self.initExprGroups = True
3471
3472 - def parseImpl( self, instring, loc, doActions=True ):
3473 if self.initExprGroups:
3474 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
3475 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
3476 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
3477 self.optionals = opt1 + opt2
3478 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
3479 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
3480 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
3481 self.required += self.multirequired
3482 self.initExprGroups = False
3483 tmpLoc = loc
3484 tmpReqd = self.required[:]
3485 tmpOpt = self.optionals[:]
3486 matchOrder = []
3487
3488 keepMatching = True
3489 while keepMatching:
3490 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
3491 failed = []
3492 for e in tmpExprs:
3493 try:
3494 tmpLoc = e.tryParse( instring, tmpLoc )
3495 except ParseException:
3496 failed.append(e)
3497 else:
3498 matchOrder.append(self.opt1map.get(id(e),e))
3499 if e in tmpReqd:
3500 tmpReqd.remove(e)
3501 elif e in tmpOpt:
3502 tmpOpt.remove(e)
3503 if len(failed) == len(tmpExprs):
3504 keepMatching = False
3505
3506 if tmpReqd:
3507 missing = ", ".join(_ustr(e) for e in tmpReqd)
3508 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
3509
3510
3511 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
3512
3513 resultlist = []
3514 for e in matchOrder:
3515 loc,results = e._parse(instring,loc,doActions)
3516 resultlist.append(results)
3517
3518 finalResults = ParseResults()
3519 for r in resultlist:
3520 dups = {}
3521 for k in r.keys():
3522 if k in finalResults:
3523 tmp = ParseResults(finalResults[k])
3524 tmp += ParseResults(r[k])
3525 dups[k] = tmp
3526 finalResults += ParseResults(r)
3527 for k,v in dups.items():
3528 finalResults[k] = v
3529 return loc, finalResults
3530
3532 if hasattr(self,"name"):
3533 return self.name
3534
3535 if self.strRepr is None:
3536 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
3537
3538 return self.strRepr
3539
3541 subRecCheckList = parseElementList[:] + [ self ]
3542 for e in self.exprs:
3543 e.checkRecursion( subRecCheckList )
3544
3547 """
3548 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
3549 """
3550 - def __init__( self, expr, savelist=False ):
3551 super(ParseElementEnhance,self).__init__(savelist)
3552 if isinstance( expr, basestring ):
3553 expr = ParserElement._literalStringClass(expr)
3554 self.expr = expr
3555 self.strRepr = None
3556 if expr is not None:
3557 self.mayIndexError = expr.mayIndexError
3558 self.mayReturnEmpty = expr.mayReturnEmpty
3559 self.setWhitespaceChars( expr.whiteChars )
3560 self.skipWhitespace = expr.skipWhitespace
3561 self.saveAsList = expr.saveAsList
3562 self.callPreparse = expr.callPreparse
3563 self.ignoreExprs.extend(expr.ignoreExprs)
3564
3565 - def parseImpl( self, instring, loc, doActions=True ):
3566 if self.expr is not None:
3567 return self.expr._parse( instring, loc, doActions, callPreParse=False )
3568 else:
3569 raise ParseException("",loc,self.errmsg,self)
3570
3572 self.skipWhitespace = False
3573 self.expr = self.expr.copy()
3574 if self.expr is not None:
3575 self.expr.leaveWhitespace()
3576 return self
3577
3579 if isinstance( other, Suppress ):
3580 if other not in self.ignoreExprs:
3581 super( ParseElementEnhance, self).ignore( other )
3582 if self.expr is not None:
3583 self.expr.ignore( self.ignoreExprs[-1] )
3584 else:
3585 super( ParseElementEnhance, self).ignore( other )
3586 if self.expr is not None:
3587 self.expr.ignore( self.ignoreExprs[-1] )
3588 return self
3589
3595
3597 if self in parseElementList:
3598 raise RecursiveGrammarException( parseElementList+[self] )
3599 subRecCheckList = parseElementList[:] + [ self ]
3600 if self.expr is not None:
3601 self.expr.checkRecursion( subRecCheckList )
3602
3603 - def validate( self, validateTrace=[] ):
3604 tmp = validateTrace[:]+[self]
3605 if self.expr is not None:
3606 self.expr.validate(tmp)
3607 self.checkRecursion( [] )
3608
3610 try:
3611 return super(ParseElementEnhance,self).__str__()
3612 except:
3613 pass
3614
3615 if self.strRepr is None and self.expr is not None:
3616 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
3617 return self.strRepr
3618
3621 """
3622 Lookahead matching of the given parse expression. C{FollowedBy}
3623 does *not* advance the parsing position within the input string, it only
3624 verifies that the specified parse expression matches at the current
3625 position. C{FollowedBy} always returns a null token list.
3626
3627 Example::
3628 # use FollowedBy to match a label only if it is followed by a ':'
3629 data_word = Word(alphas)
3630 label = data_word + FollowedBy(':')
3631 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3632
3633 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
3634 prints::
3635 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
3636 """
3640
3641 - def parseImpl( self, instring, loc, doActions=True ):
3642 self.expr.tryParse( instring, loc )
3643 return loc, []
3644
3645
3646 -class NotAny(ParseElementEnhance):
3647 """
3648 Lookahead to disallow matching with the given parse expression. C{NotAny}
3649 does *not* advance the parsing position within the input string, it only
3650 verifies that the specified parse expression does *not* match at the current
3651 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
3652 always returns a null token list. May be constructed using the '~' operator.
3653
3654 Example::
3655
3656 """
3658 super(NotAny,self).__init__(expr)
3659
3660 self.skipWhitespace = False
3661 self.mayReturnEmpty = True
3662 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3663
3664 - def parseImpl( self, instring, loc, doActions=True ):
3668
3670 if hasattr(self,"name"):
3671 return self.name
3672
3673 if self.strRepr is None:
3674 self.strRepr = "~{" + _ustr(self.expr) + "}"
3675
3676 return self.strRepr
3677
3680 """
3681 Repetition of one or more of the given expression.
3682
3683 Parameters:
3684 - expr - expression that must match one or more times
3685 - stopOn - (default=C{None}) - expression for a terminating sentinel
3686 (only required if the sentinel would ordinarily match the repetition
3687 expression)
3688
3689 Example::
3690 data_word = Word(alphas)
3691 label = data_word + FollowedBy(':')
3692 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
3693
3694 text = "shape: SQUARE posn: upper left color: BLACK"
3695 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
3696
3697 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
3698 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3699 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
3700
3701 # could also be written as
3702 (attr_expr * (1,)).parseString(text).pprint()
3703 """
3704 - def __init__( self, expr, stopOn=None):
3705 super(OneOrMore, self).__init__(expr)
3706 ender = stopOn
3707 if isinstance(ender, basestring):
3708 ender = ParserElement._literalStringClass(ender)
3709 self.not_ender = ~ender if ender is not None else None
3710
3711 - def parseImpl( self, instring, loc, doActions=True ):
3712 self_expr_parse = self.expr._parse
3713 self_skip_ignorables = self._skipIgnorables
3714 check_ender = self.not_ender is not None
3715 if check_ender:
3716 try_not_ender = self.not_ender.tryParse
3717
3718
3719
3720 if check_ender:
3721 try_not_ender(instring, loc)
3722 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
3723 try:
3724 hasIgnoreExprs = (not not self.ignoreExprs)
3725 while 1:
3726 if check_ender:
3727 try_not_ender(instring, loc)
3728 if hasIgnoreExprs:
3729 preloc = self_skip_ignorables( instring, loc )
3730 else:
3731 preloc = loc
3732 loc, tmptokens = self_expr_parse( instring, preloc, doActions )
3733 if tmptokens or tmptokens.haskeys():
3734 tokens += tmptokens
3735 except (ParseException,IndexError):
3736 pass
3737
3738 return loc, tokens
3739
3741 if hasattr(self,"name"):
3742 return self.name
3743
3744 if self.strRepr is None:
3745 self.strRepr = "{" + _ustr(self.expr) + "}..."
3746
3747 return self.strRepr
3748
3753
3755 """
3756 Optional repetition of zero or more of the given expression.
3757
3758 Parameters:
3759 - expr - expression that must match zero or more times
3760 - stopOn - (default=C{None}) - expression for a terminating sentinel
3761 (only required if the sentinel would ordinarily match the repetition
3762 expression)
3763
3764 Example: similar to L{OneOrMore}
3765 """
3766 - def __init__( self, expr, stopOn=None):
3769
3770 - def parseImpl( self, instring, loc, doActions=True ):
3775
3777 if hasattr(self,"name"):
3778 return self.name
3779
3780 if self.strRepr is None:
3781 self.strRepr = "[" + _ustr(self.expr) + "]..."
3782
3783 return self.strRepr
3784
3791
3792 _optionalNotMatched = _NullToken()
3794 """
3795 Optional matching of the given expression.
3796
3797 Parameters:
3798 - expr - expression that must match zero or more times
3799 - default (optional) - value to be returned if the optional expression is not found.
3800
3801 Example::
3802 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
3803 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
3804 zip.runTests('''
3805 # traditional ZIP code
3806 12345
3807
3808 # ZIP+4 form
3809 12101-0001
3810
3811 # invalid ZIP
3812 98765-
3813 ''')
3814 prints::
3815 # traditional ZIP code
3816 12345
3817 ['12345']
3818
3819 # ZIP+4 form
3820 12101-0001
3821 ['12101-0001']
3822
3823 # invalid ZIP
3824 98765-
3825 ^
3826 FAIL: Expected end of text (at char 5), (line:1, col:6)
3827 """
3829 super(Optional,self).__init__( expr, savelist=False )
3830 self.defaultValue = default
3831 self.mayReturnEmpty = True
3832
3833 - def parseImpl( self, instring, loc, doActions=True ):
3834 try:
3835 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
3836 except (ParseException,IndexError):
3837 if self.defaultValue is not _optionalNotMatched:
3838 if self.expr.resultsName:
3839 tokens = ParseResults([ self.defaultValue ])
3840 tokens[self.expr.resultsName] = self.defaultValue
3841 else:
3842 tokens = [ self.defaultValue ]
3843 else:
3844 tokens = []
3845 return loc, tokens
3846
3848 if hasattr(self,"name"):
3849 return self.name
3850
3851 if self.strRepr is None:
3852 self.strRepr = "[" + _ustr(self.expr) + "]"
3853
3854 return self.strRepr
3855
3856 -class SkipTo(ParseElementEnhance):
3857 """
3858 Token for skipping over all undefined text until the matched expression is found.
3859
3860 Parameters:
3861 - expr - target expression marking the end of the data to be skipped
3862 - include - (default=C{False}) if True, the target expression is also parsed
3863 (the skipped text and target expression are returned as a 2-element list).
3864 - ignore - (default=C{None}) used to define grammars (typically quoted strings and
3865 comments) that might contain false matches to the target expression
3866 - failOn - (default=C{None}) define expressions that are not allowed to be
3867 included in the skipped test; if found before the target expression is found,
3868 the SkipTo is not a match
3869
3870 Example::
3871 report = '''
3872 Outstanding Issues Report - 1 Jan 2000
3873
3874 # | Severity | Description | Days Open
3875 -----+----------+-------------------------------------------+-----------
3876 101 | Critical | Intermittent system crash | 6
3877 94 | Cosmetic | Spelling error on Login ('log|n') | 14
3878 79 | Minor | System slow when running too many reports | 47
3879 '''
3880 integer = Word(nums)
3881 SEP = Suppress('|')
3882 # use SkipTo to simply match everything up until the next SEP
3883 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
3884 # - parse action will call token.strip() for each matched token, i.e., the description body
3885 string_data = SkipTo(SEP, ignore=quotedString)
3886 string_data.setParseAction(tokenMap(str.strip))
3887 ticket_expr = (integer("issue_num") + SEP
3888 + string_data("sev") + SEP
3889 + string_data("desc") + SEP
3890 + integer("days_open"))
3891
3892 for tkt in ticket_expr.searchString(report):
3893 print tkt.dump()
3894 prints::
3895 ['101', 'Critical', 'Intermittent system crash', '6']
3896 - days_open: 6
3897 - desc: Intermittent system crash
3898 - issue_num: 101
3899 - sev: Critical
3900 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
3901 - days_open: 14
3902 - desc: Spelling error on Login ('log|n')
3903 - issue_num: 94
3904 - sev: Cosmetic
3905 ['79', 'Minor', 'System slow when running too many reports', '47']
3906 - days_open: 47
3907 - desc: System slow when running too many reports
3908 - issue_num: 79
3909 - sev: Minor
3910 """
3911 - def __init__( self, other, include=False, ignore=None, failOn=None ):
3912 super( SkipTo, self ).__init__( other )
3913 self.ignoreExpr = ignore
3914 self.mayReturnEmpty = True
3915 self.mayIndexError = False
3916 self.includeMatch = include
3917 self.asList = False
3918 if isinstance(failOn, basestring):
3919 self.failOn = ParserElement._literalStringClass(failOn)
3920 else:
3921 self.failOn = failOn
3922 self.errmsg = "No match found for "+_ustr(self.expr)
3923
3924 - def parseImpl( self, instring, loc, doActions=True ):
3925 startloc = loc
3926 instrlen = len(instring)
3927 expr = self.expr
3928 expr_parse = self.expr._parse
3929 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
3930 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
3931
3932 tmploc = loc
3933 while tmploc <= instrlen:
3934 if self_failOn_canParseNext is not None:
3935
3936 if self_failOn_canParseNext(instring, tmploc):
3937 break
3938
3939 if self_ignoreExpr_tryParse is not None:
3940
3941 while 1:
3942 try:
3943 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
3944 except ParseBaseException:
3945 break
3946
3947 try:
3948 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
3949 except (ParseException, IndexError):
3950
3951 tmploc += 1
3952 else:
3953
3954 break
3955
3956 else:
3957
3958 raise ParseException(instring, loc, self.errmsg, self)
3959
3960
3961 loc = tmploc
3962 skiptext = instring[startloc:loc]
3963 skipresult = ParseResults(skiptext)
3964
3965 if self.includeMatch:
3966 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
3967 skipresult += mat
3968
3969 return loc, skipresult
3970
3971 -class Forward(ParseElementEnhance):
3972 """
3973 Forward declaration of an expression to be defined later -
3974 used for recursive grammars, such as algebraic infix notation.
3975 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
3976
3977 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
3978 Specifically, '|' has a lower precedence than '<<', so that::
3979 fwdExpr << a | b | c
3980 will actually be evaluated as::
3981 (fwdExpr << a) | b | c
3982 thereby leaving b and c out as parseable alternatives. It is recommended that you
3983 explicitly group the values inserted into the C{Forward}::
3984 fwdExpr << (a | b | c)
3985 Converting to use the '<<=' operator instead will avoid this problem.
3986
3987 See L{ParseResults.pprint} for an example of a recursive parser created using
3988 C{Forward}.
3989 """
3992
3994 if isinstance( other, basestring ):
3995 other = ParserElement._literalStringClass(other)
3996 self.expr = other
3997 self.strRepr = None
3998 self.mayIndexError = self.expr.mayIndexError
3999 self.mayReturnEmpty = self.expr.mayReturnEmpty
4000 self.setWhitespaceChars( self.expr.whiteChars )
4001 self.skipWhitespace = self.expr.skipWhitespace
4002 self.saveAsList = self.expr.saveAsList
4003 self.ignoreExprs.extend(self.expr.ignoreExprs)
4004 return self
4005
4007 return self << other
4008
4010 self.skipWhitespace = False
4011 return self
4012
4014 if not self.streamlined:
4015 self.streamlined = True
4016 if self.expr is not None:
4017 self.expr.streamline()
4018 return self
4019
4020 - def validate( self, validateTrace=[] ):
4021 if self not in validateTrace:
4022 tmp = validateTrace[:]+[self]
4023 if self.expr is not None:
4024 self.expr.validate(tmp)
4025 self.checkRecursion([])
4026
4028 if hasattr(self,"name"):
4029 return self.name
4030 return self.__class__.__name__ + ": ..."
4031
4032
4033 self._revertClass = self.__class__
4034 self.__class__ = _ForwardNoRecurse
4035 try:
4036 if self.expr is not None:
4037 retString = _ustr(self.expr)
4038 else:
4039 retString = "None"
4040 finally:
4041 self.__class__ = self._revertClass
4042 return self.__class__.__name__ + ": " + retString
4043
4045 if self.expr is not None:
4046 return super(Forward,self).copy()
4047 else:
4048 ret = Forward()
4049 ret <<= self
4050 return ret
4051
4055
4057 """
4058 Abstract subclass of C{ParseExpression}, for converting parsed results.
4059 """
4060 - def __init__( self, expr, savelist=False ):
4063
4065 """
4066 Converter to concatenate all matching tokens to a single string.
4067 By default, the matching patterns must also be contiguous in the input string;
4068 this can be disabled by specifying C{'adjacent=False'} in the constructor.
4069
4070 Example::
4071 real = Word(nums) + '.' + Word(nums)
4072 print(real.parseString('3.1416')) # -> ['3', '.', '1416']
4073 # will also erroneously match the following
4074 print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
4075
4076 real = Combine(Word(nums) + '.' + Word(nums))
4077 print(real.parseString('3.1416')) # -> ['3.1416']
4078 # no match when there are internal spaces
4079 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
4080 """
4081 - def __init__( self, expr, joinString="", adjacent=True ):
4082 super(Combine,self).__init__( expr )
4083
4084 if adjacent:
4085 self.leaveWhitespace()
4086 self.adjacent = adjacent
4087 self.skipWhitespace = True
4088 self.joinString = joinString
4089 self.callPreparse = True
4090
4097
4098 - def postParse( self, instring, loc, tokenlist ):
4099 retToks = tokenlist.copy()
4100 del retToks[:]
4101 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
4102
4103 if self.resultsName and retToks.haskeys():
4104 return [ retToks ]
4105 else:
4106 return retToks
4107
4108 -class Group(TokenConverter):
4109 """
4110 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
4111
4112 Example::
4113 ident = Word(alphas)
4114 num = Word(nums)
4115 term = ident | num
4116 func = ident + Optional(delimitedList(term))
4117 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
4118
4119 func = ident + Group(Optional(delimitedList(term)))
4120 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
4121 """
4123 super(Group,self).__init__( expr )
4124 self.saveAsList = True
4125
4126 - def postParse( self, instring, loc, tokenlist ):
4127 return [ tokenlist ]
4128
4129 -class Dict(TokenConverter):
4130 """
4131 Converter to return a repetitive expression as a list, but also as a dictionary.
4132 Each element can also be referenced using the first token in the expression as its key.
4133 Useful for tabular report scraping when the first column can be used as a item key.
4134
4135 Example::
4136 data_word = Word(alphas)
4137 label = data_word + FollowedBy(':')
4138 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4139
4140 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4141 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4142
4143 # print attributes as plain groups
4144 print(OneOrMore(attr_expr).parseString(text).dump())
4145
4146 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
4147 result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
4148 print(result.dump())
4149
4150 # access named fields as dict entries, or output as dict
4151 print(result['shape'])
4152 print(result.asDict())
4153 prints::
4154 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
4155
4156 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4157 - color: light blue
4158 - posn: upper left
4159 - shape: SQUARE
4160 - texture: burlap
4161 SQUARE
4162 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
4163 See more examples at L{ParseResults} of accessing fields by results name.
4164 """
4166 super(Dict,self).__init__( expr )
4167 self.saveAsList = True
4168
4169 - def postParse( self, instring, loc, tokenlist ):
4170 for i,tok in enumerate(tokenlist):
4171 if len(tok) == 0:
4172 continue
4173 ikey = tok[0]
4174 if isinstance(ikey,int):
4175 ikey = _ustr(tok[0]).strip()
4176 if len(tok)==1:
4177 tokenlist[ikey] = _ParseResultsWithOffset("",i)
4178 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
4179 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
4180 else:
4181 dictvalue = tok.copy()
4182 del dictvalue[0]
4183 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
4184 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
4185 else:
4186 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
4187
4188 if self.resultsName:
4189 return [ tokenlist ]
4190 else:
4191 return tokenlist
4192
4195 """
4196 Converter for ignoring the results of a parsed expression.
4197
4198 Example::
4199 source = "a, b, c,d"
4200 wd = Word(alphas)
4201 wd_list1 = wd + ZeroOrMore(',' + wd)
4202 print(wd_list1.parseString(source))
4203
4204 # often, delimiters that are useful during parsing are just in the
4205 # way afterward - use Suppress to keep them out of the parsed output
4206 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
4207 print(wd_list2.parseString(source))
4208 prints::
4209 ['a', ',', 'b', ',', 'c', ',', 'd']
4210 ['a', 'b', 'c', 'd']
4211 (See also L{delimitedList}.)
4212 """
4213 - def postParse( self, instring, loc, tokenlist ):
4215
4218
4221 """
4222 Wrapper for parse actions, to ensure they are only called once.
4223 """
4225 self.callable = _trim_arity(methodCall)
4226 self.called = False
4228 if not self.called:
4229 results = self.callable(s,l,t)
4230 self.called = True
4231 return results
4232 raise ParseException(s,l,"")
4235
4237 """
4238 Decorator for debugging parse actions.
4239
4240 Example::
4241 wd = Word(alphas)
4242
4243 @traceParseAction
4244 def remove_duplicate_chars(tokens):
4245 return ''.join(sorted(set(''.join(tokens)))
4246
4247 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
4248 print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
4249 prints::
4250 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
4251 <<leaving remove_duplicate_chars (ret: 'dfjkls')
4252 ['dfjkls']
4253 """
4254 f = _trim_arity(f)
4255 def z(*paArgs):
4256 thisFunc = f.__name__
4257 s,l,t = paArgs[-3:]
4258 if len(paArgs)>3:
4259 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
4260 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
4261 try:
4262 ret = f(*paArgs)
4263 except Exception as exc:
4264 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
4265 raise
4266 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
4267 return ret
4268 try:
4269 z.__name__ = f.__name__
4270 except AttributeError:
4271 pass
4272 return z
4273
4274
4275
4276
4277 -def delimitedList( expr, delim=",", combine=False ):
4278 """
4279 Helper to define a delimited list of expressions - the delimiter defaults to ','.
4280 By default, the list elements and delimiters can have intervening whitespace, and
4281 comments, but this can be overridden by passing C{combine=True} in the constructor.
4282 If C{combine} is set to C{True}, the matching tokens are returned as a single token
4283 string, with the delimiters included; otherwise, the matching tokens are returned
4284 as a list of tokens, with the delimiters suppressed.
4285
4286 Example::
4287 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
4288 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
4289 """
4290 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
4291 if combine:
4292 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
4293 else:
4294 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4295
4297 """
4298 Helper to define a counted list of expressions.
4299 This helper defines a pattern of the form::
4300 integer expr expr expr...
4301 where the leading integer tells how many expr expressions follow.
4302 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
4303
4304 Example::
4305 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
4306 """
4307 arrayExpr = Forward()
4308 def countFieldParseAction(s,l,t):
4309 n = t[0]
4310 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
4311 return []
4312 if intExpr is None:
4313 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
4314 else:
4315 intExpr = intExpr.copy()
4316 intExpr.setName("arrayLen")
4317 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
4318 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
4319
4321 ret = []
4322 for i in L:
4323 if isinstance(i,list):
4324 ret.extend(_flatten(i))
4325 else:
4326 ret.append(i)
4327 return ret
4328
4330 """
4331 Helper to define an expression that is indirectly defined from
4332 the tokens matched in a previous expression, that is, it looks
4333 for a 'repeat' of a previous expression. For example::
4334 first = Word(nums)
4335 second = matchPreviousLiteral(first)
4336 matchExpr = first + ":" + second
4337 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
4338 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
4339 If this is not desired, use C{matchPreviousExpr}.
4340 Do *not* use with packrat parsing enabled.
4341 """
4342 rep = Forward()
4343 def copyTokenToRepeater(s,l,t):
4344 if t:
4345 if len(t) == 1:
4346 rep << t[0]
4347 else:
4348
4349 tflat = _flatten(t.asList())
4350 rep << And(Literal(tt) for tt in tflat)
4351 else:
4352 rep << Empty()
4353 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4354 rep.setName('(prev) ' + _ustr(expr))
4355 return rep
4356
4358 """
4359 Helper to define an expression that is indirectly defined from
4360 the tokens matched in a previous expression, that is, it looks
4361 for a 'repeat' of a previous expression. For example::
4362 first = Word(nums)
4363 second = matchPreviousExpr(first)
4364 matchExpr = first + ":" + second
4365 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
4366 expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
4367 the expressions are evaluated first, and then compared, so
4368 C{"1"} is compared with C{"10"}.
4369 Do *not* use with packrat parsing enabled.
4370 """
4371 rep = Forward()
4372 e2 = expr.copy()
4373 rep <<= e2
4374 def copyTokenToRepeater(s,l,t):
4375 matchTokens = _flatten(t.asList())
4376 def mustMatchTheseTokens(s,l,t):
4377 theseTokens = _flatten(t.asList())
4378 if theseTokens != matchTokens:
4379 raise ParseException("",0,"")
4380 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
4381 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4382 rep.setName('(prev) ' + _ustr(expr))
4383 return rep
4384
4386
4387 for c in r"\^-]":
4388 s = s.replace(c,_bslash+c)
4389 s = s.replace("\n",r"\n")
4390 s = s.replace("\t",r"\t")
4391 return _ustr(s)
4392
4393 -def oneOf( strs, caseless=False, useRegex=True ):
4394 """
4395 Helper to quickly define a set of alternative Literals, and makes sure to do
4396 longest-first testing when there is a conflict, regardless of the input order,
4397 but returns a C{L{MatchFirst}} for best performance.
4398
4399 Parameters:
4400 - strs - a string of space-delimited literals, or a list of string literals
4401 - caseless - (default=C{False}) - treat all literals as caseless
4402 - useRegex - (default=C{True}) - as an optimization, will generate a Regex
4403 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
4404 if creating a C{Regex} raises an exception)
4405
4406 Example::
4407 comp_oper = oneOf("< = > <= >= !=")
4408 var = Word(alphas)
4409 number = Word(nums)
4410 term = var | number
4411 comparison_expr = term + comp_oper + term
4412 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
4413 prints::
4414 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
4415 """
4416 if caseless:
4417 isequal = ( lambda a,b: a.upper() == b.upper() )
4418 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
4419 parseElementClass = CaselessLiteral
4420 else:
4421 isequal = ( lambda a,b: a == b )
4422 masks = ( lambda a,b: b.startswith(a) )
4423 parseElementClass = Literal
4424
4425 symbols = []
4426 if isinstance(strs,basestring):
4427 symbols = strs.split()
4428 elif isinstance(strs, collections.Sequence):
4429 symbols = list(strs[:])
4430 elif isinstance(strs, _generatorType):
4431 symbols = list(strs)
4432 else:
4433 warnings.warn("Invalid argument to oneOf, expected string or list",
4434 SyntaxWarning, stacklevel=2)
4435 if not symbols:
4436 return NoMatch()
4437
4438 i = 0
4439 while i < len(symbols)-1:
4440 cur = symbols[i]
4441 for j,other in enumerate(symbols[i+1:]):
4442 if ( isequal(other, cur) ):
4443 del symbols[i+j+1]
4444 break
4445 elif ( masks(cur, other) ):
4446 del symbols[i+j+1]
4447 symbols.insert(i,other)
4448 cur = other
4449 break
4450 else:
4451 i += 1
4452
4453 if not caseless and useRegex:
4454
4455 try:
4456 if len(symbols)==len("".join(symbols)):
4457 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
4458 else:
4459 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
4460 except:
4461 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
4462 SyntaxWarning, stacklevel=2)
4463
4464
4465
4466 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4467
4469 """
4470 Helper to easily and clearly define a dictionary by specifying the respective patterns
4471 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
4472 in the proper order. The key pattern can include delimiting markers or punctuation,
4473 as long as they are suppressed, thereby leaving the significant key text. The value
4474 pattern can include named results, so that the C{Dict} results can include named token
4475 fields.
4476
4477 Example::
4478 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4479 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4480 print(OneOrMore(attr_expr).parseString(text).dump())
4481
4482 attr_label = label
4483 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
4484
4485 # similar to Dict, but simpler call format
4486 result = dictOf(attr_label, attr_value).parseString(text)
4487 print(result.dump())
4488 print(result['shape'])
4489 print(result.shape) # object attribute access works too
4490 print(result.asDict())
4491 prints::
4492 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4493 - color: light blue
4494 - posn: upper left
4495 - shape: SQUARE
4496 - texture: burlap
4497 SQUARE
4498 SQUARE
4499 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
4500 """
4501 return Dict( ZeroOrMore( Group ( key + value ) ) )
4502
4503 -def originalTextFor(expr, asString=True):
4504 """
4505 Helper to return the original, untokenized text for a given expression. Useful to
4506 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
4507 revert separate tokens with intervening whitespace back to the original matching
4508 input text. By default, returns astring containing the original parsed text.
4509
4510 If the optional C{asString} argument is passed as C{False}, then the return value is a
4511 C{L{ParseResults}} containing any results names that were originally matched, and a
4512 single token containing the original matched text from the input string. So if
4513 the expression passed to C{L{originalTextFor}} contains expressions with defined
4514 results names, you must set C{asString} to C{False} if you want to preserve those
4515 results name values.
4516
4517 Example::
4518 src = "this is test <b> bold <i>text</i> </b> normal text "
4519 for tag in ("b","i"):
4520 opener,closer = makeHTMLTags(tag)
4521 patt = originalTextFor(opener + SkipTo(closer) + closer)
4522 print(patt.searchString(src)[0])
4523 prints::
4524 ['<b> bold <i>text</i> </b>']
4525 ['<i>text</i>']
4526 """
4527 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
4528 endlocMarker = locMarker.copy()
4529 endlocMarker.callPreparse = False
4530 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
4531 if asString:
4532 extractText = lambda s,l,t: s[t._original_start:t._original_end]
4533 else:
4534 def extractText(s,l,t):
4535 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4536 matchExpr.setParseAction(extractText)
4537 matchExpr.ignoreExprs = expr.ignoreExprs
4538 return matchExpr
4539
4541 """
4542 Helper to undo pyparsing's default grouping of And expressions, even
4543 if all but one are non-empty.
4544 """
4545 return TokenConverter(expr).setParseAction(lambda t:t[0])
4546
4548 """
4549 Helper to decorate a returned token with its starting and ending locations in the input string.
4550 This helper adds the following results names:
4551 - locn_start = location where matched expression begins
4552 - locn_end = location where matched expression ends
4553 - value = the actual parsed results
4554
4555 Be careful if the input text contains C{<TAB>} characters, you may want to call
4556 C{L{ParserElement.parseWithTabs}}
4557
4558 Example::
4559 wd = Word(alphas)
4560 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
4561 print(match)
4562 prints::
4563 [[0, 'ljsdf', 5]]
4564 [[8, 'lksdjjf', 15]]
4565 [[18, 'lkkjj', 23]]
4566 """
4567 locator = Empty().setParseAction(lambda s,l,t: l)
4568 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4569
4570
4571
4572 empty = Empty().setName("empty")
4573 lineStart = LineStart().setName("lineStart")
4574 lineEnd = LineEnd().setName("lineEnd")
4575 stringStart = StringStart().setName("stringStart")
4576 stringEnd = StringEnd().setName("stringEnd")
4577
4578 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
4579 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
4580 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
4581 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
4582 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
4583 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4586 r"""
4587 Helper to easily define string ranges for use in Word construction. Borrows
4588 syntax from regexp '[]' string range definitions::
4589 srange("[0-9]") -> "0123456789"
4590 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
4591 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
4592 The input string must be enclosed in []'s, and the returned string is the expanded
4593 character set joined into a single string.
4594 The values enclosed in the []'s may be:
4595 - a single character
4596 - an escaped character with a leading backslash (such as C{\-} or C{\]})
4597 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
4598 (C{\0x##} is also supported for backwards compatibility)
4599 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
4600 - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
4601 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
4602 """
4603 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
4604 try:
4605 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
4606 except:
4607 return ""
4608
4610 """
4611 Helper method for defining parse actions that require matching at a specific
4612 column in the input text.
4613 """
4614 def verifyCol(strg,locn,toks):
4615 if col(locn,strg) != n:
4616 raise ParseException(strg,locn,"matched token not at column %d" % n)
4617 return verifyCol
4618
4620 """
4621 Helper method for common parse actions that simply return a literal value. Especially
4622 useful when used with C{L{transformString<ParserElement.transformString>}()}.
4623
4624 Example::
4625 num = Word(nums).setParseAction(lambda toks: int(toks[0]))
4626 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
4627 term = na | num
4628
4629 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
4630 """
4631 return lambda s,l,t: [replStr]
4632
4634 """
4635 Helper parse action for removing quotation marks from parsed quoted strings.
4636
4637 Example::
4638 # by default, quotation marks are included in parsed results
4639 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
4640
4641 # use removeQuotes to strip quotation marks from parsed results
4642 quotedString.setParseAction(removeQuotes)
4643 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
4644 """
4645 return t[0][1:-1]
4646
4648 """
4649 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
4650 args are passed, they are forwarded to the given function as additional arguments after
4651 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
4652 parsed data to an integer using base 16.
4653
4654 Example (compare the last to example in L{ParserElement.transformString}::
4655 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
4656 hex_ints.runTests('''
4657 00 11 22 aa FF 0a 0d 1a
4658 ''')
4659
4660 upperword = Word(alphas).setParseAction(tokenMap(str.upper))
4661 OneOrMore(upperword).runTests('''
4662 my kingdom for a horse
4663 ''')
4664
4665 wd = Word(alphas).setParseAction(tokenMap(str.title))
4666 OneOrMore(wd).setParseAction(' '.join).runTests('''
4667 now is the winter of our discontent made glorious summer by this sun of york
4668 ''')
4669 prints::
4670 00 11 22 aa FF 0a 0d 1a
4671 [0, 17, 34, 170, 255, 10, 13, 26]
4672
4673 my kingdom for a horse
4674 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
4675
4676 now is the winter of our discontent made glorious summer by this sun of york
4677 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
4678 """
4679 def pa(s,l,t):
4680 return [func(tokn, *args) for tokn in t]
4681
4682 try:
4683 func_name = getattr(func, '__name__',
4684 getattr(func, '__class__').__name__)
4685 except Exception:
4686 func_name = str(func)
4687 pa.__name__ = func_name
4688
4689 return pa
4690
4691 upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
4692 """Helper parse action to convert tokens to upper case."""
4693
4694 downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
4695 """Helper parse action to convert tokens to lower case."""
4725
4744
4753
4755 """
4756 Helper to create a validating parse action to be used with start tags created
4757 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
4758 with a required attribute value, to avoid false matches on common tags such as
4759 C{<TD>} or C{<DIV>}.
4760
4761 Call C{withAttribute} with a series of attribute names and values. Specify the list
4762 of filter attributes names and values as:
4763 - keyword arguments, as in C{(align="right")}, or
4764 - as an explicit dict with C{**} operator, when an attribute name is also a Python
4765 reserved word, as in C{**{"class":"Customer", "align":"right"}}
4766 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
4767 For attribute names with a namespace prefix, you must use the second form. Attribute
4768 names are matched insensitive to upper/lower case.
4769
4770 If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
4771
4772 To verify that the attribute exists, but without specifying a value, pass
4773 C{withAttribute.ANY_VALUE} as the value.
4774
4775 Example::
4776 html = '''
4777 <div>
4778 Some text
4779 <div type="grid">1 4 0 1 0</div>
4780 <div type="graph">1,3 2,3 1,1</div>
4781 <div>this has no type</div>
4782 </div>
4783
4784 '''
4785 div,div_end = makeHTMLTags("div")
4786
4787 # only match div tag having a type attribute with value "grid"
4788 div_grid = div().setParseAction(withAttribute(type="grid"))
4789 grid_expr = div_grid + SkipTo(div | div_end)("body")
4790 for grid_header in grid_expr.searchString(html):
4791 print(grid_header.body)
4792
4793 # construct a match with any div tag having a type attribute, regardless of the value
4794 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
4795 div_expr = div_any_type + SkipTo(div | div_end)("body")
4796 for div_header in div_expr.searchString(html):
4797 print(div_header.body)
4798 prints::
4799 1 4 0 1 0
4800
4801 1 4 0 1 0
4802 1,3 2,3 1,1
4803 """
4804 if args:
4805 attrs = args[:]
4806 else:
4807 attrs = attrDict.items()
4808 attrs = [(k,v) for k,v in attrs]
4809 def pa(s,l,tokens):
4810 for attrName,attrValue in attrs:
4811 if attrName not in tokens:
4812 raise ParseException(s,l,"no matching attribute " + attrName)
4813 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
4814 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
4815 (attrName, tokens[attrName], attrValue))
4816 return pa
4817 withAttribute.ANY_VALUE = object()
4818
4819 -def withClass(classname, namespace=''):
4820 """
4821 Simplified version of C{L{withAttribute}} when matching on a div class - made
4822 difficult because C{class} is a reserved word in Python.
4823
4824 Example::
4825 html = '''
4826 <div>
4827 Some text
4828 <div class="grid">1 4 0 1 0</div>
4829 <div class="graph">1,3 2,3 1,1</div>
4830 <div>this <div> has no class</div>
4831 </div>
4832
4833 '''
4834 div,div_end = makeHTMLTags("div")
4835 div_grid = div().setParseAction(withClass("grid"))
4836
4837 grid_expr = div_grid + SkipTo(div | div_end)("body")
4838 for grid_header in grid_expr.searchString(html):
4839 print(grid_header.body)
4840
4841 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
4842 div_expr = div_any_type + SkipTo(div | div_end)("body")
4843 for div_header in div_expr.searchString(html):
4844 print(div_header.body)
4845 prints::
4846 1 4 0 1 0
4847
4848 1 4 0 1 0
4849 1,3 2,3 1,1
4850 """
4851 classattr = "%s:class" % namespace if namespace else "class"
4852 return withAttribute(**{classattr : classname})
4853
4854 opAssoc = _Constants()
4855 opAssoc.LEFT = object()
4856 opAssoc.RIGHT = object()
4859 """
4860 Helper method for constructing grammars of expressions made up of
4861 operators working in a precedence hierarchy. Operators may be unary or
4862 binary, left- or right-associative. Parse actions can also be attached
4863 to operator expressions.
4864
4865 Parameters:
4866 - baseExpr - expression representing the most basic element for the nested
4867 - opList - list of tuples, one for each operator precedence level in the
4868 expression grammar; each tuple is of the form
4869 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
4870 - opExpr is the pyparsing expression for the operator;
4871 may also be a string, which will be converted to a Literal;
4872 if numTerms is 3, opExpr is a tuple of two expressions, for the
4873 two operators separating the 3 terms
4874 - numTerms is the number of terms for this operator (must
4875 be 1, 2, or 3)
4876 - rightLeftAssoc is the indicator whether the operator is
4877 right or left associative, using the pyparsing-defined
4878 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
4879 - parseAction is the parse action to be associated with
4880 expressions matching this operator expression (the
4881 parse action tuple member may be omitted)
4882 - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
4883 - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
4884
4885 Example::
4886 # simple example of four-function arithmetic with ints and variable names
4887 integer = pyparsing_common.signedInteger
4888 varname = pyparsing_common.identifier
4889
4890 arith_expr = infixNotation(integer | varname,
4891 [
4892 ('-', 1, opAssoc.RIGHT),
4893 (oneOf('* /'), 2, opAssoc.LEFT),
4894 (oneOf('+ -'), 2, opAssoc.LEFT),
4895 ])
4896
4897 arith_expr.runTests('''
4898 5+3*6
4899 (5+3)*6
4900 -2--11
4901 ''', fullDump=False)
4902 prints::
4903 5+3*6
4904 [[5, '+', [3, '*', 6]]]
4905
4906 (5+3)*6
4907 [[[5, '+', 3], '*', 6]]
4908
4909 -2--11
4910 [[['-', 2], '-', ['-', 11]]]
4911 """
4912 ret = Forward()
4913 lastExpr = baseExpr | ( lpar + ret + rpar )
4914 for i,operDef in enumerate(opList):
4915 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
4916 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
4917 if arity == 3:
4918 if opExpr is None or len(opExpr) != 2:
4919 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
4920 opExpr1, opExpr2 = opExpr
4921 thisExpr = Forward().setName(termName)
4922 if rightLeftAssoc == opAssoc.LEFT:
4923 if arity == 1:
4924 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
4925 elif arity == 2:
4926 if opExpr is not None:
4927 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
4928 else:
4929 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
4930 elif arity == 3:
4931 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
4932 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
4933 else:
4934 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
4935 elif rightLeftAssoc == opAssoc.RIGHT:
4936 if arity == 1:
4937
4938 if not isinstance(opExpr, Optional):
4939 opExpr = Optional(opExpr)
4940 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
4941 elif arity == 2:
4942 if opExpr is not None:
4943 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
4944 else:
4945 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
4946 elif arity == 3:
4947 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
4948 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
4949 else:
4950 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
4951 else:
4952 raise ValueError("operator must indicate right or left associativity")
4953 if pa:
4954 matchExpr.setParseAction( pa )
4955 thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
4956 lastExpr = thisExpr
4957 ret <<= lastExpr
4958 return ret
4959
4960 operatorPrecedence = infixNotation
4961 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
4962
4963 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
4964 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
4965 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
4966 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
4967 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
4970 """
4971 Helper method for defining nested lists enclosed in opening and closing
4972 delimiters ("(" and ")" are the default).
4973
4974 Parameters:
4975 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
4976 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
4977 - content - expression for items within the nested lists (default=C{None})
4978 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
4979
4980 If an expression is not provided for the content argument, the nested
4981 expression will capture all whitespace-delimited content between delimiters
4982 as a list of separate values.
4983
4984 Use the C{ignoreExpr} argument to define expressions that may contain
4985 opening or closing characters that should not be treated as opening
4986 or closing characters for nesting, such as quotedString or a comment
4987 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
4988 The default is L{quotedString}, but if no expressions are to be ignored,
4989 then pass C{None} for this argument.
4990
4991 Example::
4992 data_type = oneOf("void int short long char float double")
4993 decl_data_type = Combine(data_type + Optional(Word('*')))
4994 ident = Word(alphas+'_', alphanums+'_')
4995 number = pyparsing_common.number
4996 arg = Group(decl_data_type + ident)
4997 LPAR,RPAR = map(Suppress, "()")
4998
4999 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
5000
5001 c_function = (decl_data_type("type")
5002 + ident("name")
5003 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
5004 + code_body("body"))
5005 c_function.ignore(cStyleComment)
5006
5007 source_code = '''
5008 int is_odd(int x) {
5009 return (x%2);
5010 }
5011
5012 int dec_to_hex(char hchar) {
5013 if (hchar >= '0' && hchar <= '9') {
5014 return (ord(hchar)-ord('0'));
5015 } else {
5016 return (10+ord(hchar)-ord('A'));
5017 }
5018 }
5019 '''
5020 for func in c_function.searchString(source_code):
5021 print("%(name)s (%(type)s) args: %(args)s" % func)
5022
5023 prints::
5024 is_odd (int) args: [['int', 'x']]
5025 dec_to_hex (int) args: [['char', 'hchar']]
5026 """
5027 if opener == closer:
5028 raise ValueError("opening and closing strings cannot be the same")
5029 if content is None:
5030 if isinstance(opener,basestring) and isinstance(closer,basestring):
5031 if len(opener) == 1 and len(closer)==1:
5032 if ignoreExpr is not None:
5033 content = (Combine(OneOrMore(~ignoreExpr +
5034 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5035 ).setParseAction(lambda t:t[0].strip()))
5036 else:
5037 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
5038 ).setParseAction(lambda t:t[0].strip()))
5039 else:
5040 if ignoreExpr is not None:
5041 content = (Combine(OneOrMore(~ignoreExpr +
5042 ~Literal(opener) + ~Literal(closer) +
5043 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5044 ).setParseAction(lambda t:t[0].strip()))
5045 else:
5046 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
5047 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5048 ).setParseAction(lambda t:t[0].strip()))
5049 else:
5050 raise ValueError("opening and closing arguments must be strings if no content expression is given")
5051 ret = Forward()
5052 if ignoreExpr is not None:
5053 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
5054 else:
5055 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
5056 ret.setName('nested %s%s expression' % (opener,closer))
5057 return ret
5058
5059 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
5060 """
5061 Helper method for defining space-delimited indentation blocks, such as
5062 those used to define block statements in Python source code.
5063
5064 Parameters:
5065 - blockStatementExpr - expression defining syntax of statement that
5066 is repeated within the indented block
5067 - indentStack - list created by caller to manage indentation stack
5068 (multiple statementWithIndentedBlock expressions within a single grammar
5069 should share a common indentStack)
5070 - indent - boolean indicating whether block must be indented beyond the
5071 the current level; set to False for block of left-most statements
5072 (default=C{True})
5073
5074 A valid block must contain at least one C{blockStatement}.
5075
5076 Example::
5077 data = '''
5078 def A(z):
5079 A1
5080 B = 100
5081 G = A2
5082 A2
5083 A3
5084 B
5085 def BB(a,b,c):
5086 BB1
5087 def BBA():
5088 bba1
5089 bba2
5090 bba3
5091 C
5092 D
5093 def spam(x,y):
5094 def eggs(z):
5095 pass
5096 '''
5097
5098
5099 indentStack = [1]
5100 stmt = Forward()
5101
5102 identifier = Word(alphas, alphanums)
5103 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
5104 func_body = indentedBlock(stmt, indentStack)
5105 funcDef = Group( funcDecl + func_body )
5106
5107 rvalue = Forward()
5108 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
5109 rvalue << (funcCall | identifier | Word(nums))
5110 assignment = Group(identifier + "=" + rvalue)
5111 stmt << ( funcDef | assignment | identifier )
5112
5113 module_body = OneOrMore(stmt)
5114
5115 parseTree = module_body.parseString(data)
5116 parseTree.pprint()
5117 prints::
5118 [['def',
5119 'A',
5120 ['(', 'z', ')'],
5121 ':',
5122 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
5123 'B',
5124 ['def',
5125 'BB',
5126 ['(', 'a', 'b', 'c', ')'],
5127 ':',
5128 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
5129 'C',
5130 'D',
5131 ['def',
5132 'spam',
5133 ['(', 'x', 'y', ')'],
5134 ':',
5135 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
5136 """
5137 def checkPeerIndent(s,l,t):
5138 if l >= len(s): return
5139 curCol = col(l,s)
5140 if curCol != indentStack[-1]:
5141 if curCol > indentStack[-1]:
5142 raise ParseFatalException(s,l,"illegal nesting")
5143 raise ParseException(s,l,"not a peer entry")
5144
5145 def checkSubIndent(s,l,t):
5146 curCol = col(l,s)
5147 if curCol > indentStack[-1]:
5148 indentStack.append( curCol )
5149 else:
5150 raise ParseException(s,l,"not a subentry")
5151
5152 def checkUnindent(s,l,t):
5153 if l >= len(s): return
5154 curCol = col(l,s)
5155 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
5156 raise ParseException(s,l,"not an unindent")
5157 indentStack.pop()
5158
5159 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
5160 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
5161 PEER = Empty().setParseAction(checkPeerIndent).setName('')
5162 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
5163 if indent:
5164 smExpr = Group( Optional(NL) +
5165
5166 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
5167 else:
5168 smExpr = Group( Optional(NL) +
5169 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
5170 blockStatementExpr.ignore(_bslash + LineEnd())
5171 return smExpr.setName('indented block')
5172
5173 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
5174 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
5175
5176 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
5177 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
5178 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5180 """Helper parser action to replace common HTML entities with their special characters"""
5181 return _htmlEntityMap.get(t.entity)
5182
5183
5184 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
5185 "Comment of the form C{/* ... */}"
5186
5187 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
5188 "Comment of the form C{<!-- ... -->}"
5189
5190 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
5191 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
5192 "Comment of the form C{// ... (to end of line)}"
5193
5194 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
5195 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
5196
5197 javaStyleComment = cppStyleComment
5198 "Same as C{L{cppStyleComment}}"
5199
5200 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
5201 "Comment of the form C{# ... (to end of line)}"
5202
5203 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
5204 Optional( Word(" \t") +
5205 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
5206 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
5207 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
5211 """
5212 Here are some common low-level expressions that may be useful in jump-starting parser development:
5213 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sciReal>})
5214 - common L{programming identifiers<identifier>}
5215 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
5216 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
5217 - L{UUID<uuid>}
5218 Parse actions:
5219 - C{L{convertToInteger}}
5220 - C{L{convertToFloat}}
5221 - C{L{convertToDate}}
5222 - C{L{convertToDatetime}}
5223 - C{L{stripHTMLTags}}
5224
5225 Example::
5226 pyparsing_common.number.runTests('''
5227 # any int or real number, returned as the appropriate type
5228 100
5229 -100
5230 +100
5231 3.14159
5232 6.02e23
5233 1e-12
5234 ''')
5235
5236 pyparsing_common.fnumber.runTests('''
5237 # any int or real number, returned as float
5238 100
5239 -100
5240 +100
5241 3.14159
5242 6.02e23
5243 1e-12
5244 ''')
5245
5246 pyparsing_common.hex_integer.runTests('''
5247 # hex numbers
5248 100
5249 FF
5250 ''')
5251
5252 pyparsing_common.fraction.runTests('''
5253 # fractions
5254 1/2
5255 -3/4
5256 ''')
5257
5258 pyparsing_common.mixed_integer.runTests('''
5259 # mixed fractions
5260 1
5261 1/2
5262 -3/4
5263 1-3/4
5264 ''')
5265
5266 import uuid
5267 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5268 pyparsing_common.uuid.runTests('''
5269 # uuid
5270 12345678-1234-5678-1234-567812345678
5271 ''')
5272 prints::
5273 # any int or real number, returned as the appropriate type
5274 100
5275 [100]
5276
5277 -100
5278 [-100]
5279
5280 +100
5281 [100]
5282
5283 3.14159
5284 [3.14159]
5285
5286 6.02e23
5287 [6.02e+23]
5288
5289 1e-12
5290 [1e-12]
5291
5292 # any int or real number, returned as float
5293 100
5294 [100.0]
5295
5296 -100
5297 [-100.0]
5298
5299 +100
5300 [100.0]
5301
5302 3.14159
5303 [3.14159]
5304
5305 6.02e23
5306 [6.02e+23]
5307
5308 1e-12
5309 [1e-12]
5310
5311 # hex numbers
5312 100
5313 [256]
5314
5315 FF
5316 [255]
5317
5318 # fractions
5319 1/2
5320 [0.5]
5321
5322 -3/4
5323 [-0.75]
5324
5325 # mixed fractions
5326 1
5327 [1]
5328
5329 1/2
5330 [0.5]
5331
5332 -3/4
5333 [-0.75]
5334
5335 1-3/4
5336 [1.75]
5337
5338 # uuid
5339 12345678-1234-5678-1234-567812345678
5340 [UUID('12345678-1234-5678-1234-567812345678')]
5341 """
5342
5343 convertToInteger = tokenMap(int)
5344 """
5345 Parse action for converting parsed integers to Python int
5346 """
5347
5348 convertToFloat = tokenMap(float)
5349 """
5350 Parse action for converting parsed numbers to Python float
5351 """
5352
5353 integer = Word(nums).setName("integer").setParseAction(convertToInteger)
5354 """expression that parses an unsigned integer, returns an int"""
5355
5356 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
5357 """expression that parses a hexadecimal integer, returns an int"""
5358
5359 signedInteger = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
5360 """expression that parses an integer with optional leading sign, returns an int"""
5361
5362 fraction = (signedInteger().setParseAction(convertToFloat) + '/' + signedInteger().setParseAction(convertToFloat)).setName("fraction")
5363 """fractional expression of an integer divided by an integer, returns a float"""
5364 fraction.addParseAction(lambda t: t[0]/t[-1])
5365
5366 mixed_integer = (fraction | signedInteger + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
5367 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
5368 mixed_integer.addParseAction(sum)
5369
5370 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
5371 """expression that parses a floating point number and returns a float"""
5372
5373 sciReal = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
5374 """expression that parses a floating point number with optional scientific notation and returns a float"""
5375
5376
5377 number = (sciReal | real | signedInteger).streamline()
5378 """any numeric expression, returns the corresponding Python type"""
5379
5380 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
5381 """any int or real number, returned as float"""
5382
5383 identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
5384 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
5385
5386 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
5387 "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
5388
5389 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
5390 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
5391 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
5392 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
5393 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
5394 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
5395 "IPv6 address (long, short, or mixed form)"
5396
5397 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
5398 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
5399
5400 @staticmethod
5402 """
5403 Helper to create a parse action for converting parsed date string to Python datetime.date
5404
5405 Params -
5406 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
5407
5408 Example::
5409 date_expr = pyparsing_common.iso8601_date.copy()
5410 date_expr.setParseAction(pyparsing_common.convertToDate())
5411 print(date_expr.parseString("1999-12-31"))
5412 prints::
5413 [datetime.date(1999, 12, 31)]
5414 """
5415 return lambda s,l,t: datetime.strptime(t[0], fmt).date()
5416
5417 @staticmethod
5419 """
5420 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
5421
5422 Params -
5423 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
5424
5425 Example::
5426 dt_expr = pyparsing_common.iso8601_datetime.copy()
5427 dt_expr.setParseAction(pyparsing_common.convertToDatetime())
5428 print(dt_expr.parseString("1999-12-31T23:59:59.999"))
5429 prints::
5430 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
5431 """
5432 return lambda s,l,t: datetime.strptime(t[0], fmt)
5433
5434 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
5435 "ISO8601 date (C{yyyy-mm-dd})"
5436
5437 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
5438 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
5439
5440 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
5441 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
5442
5443 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
5444 @staticmethod
5458
5459 if __name__ == "__main__":
5460
5461 selectToken = CaselessLiteral("select")
5462 fromToken = CaselessLiteral("from")
5463
5464 ident = Word(alphas, alphanums + "_$")
5465
5466 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5467 columnNameList = Group(delimitedList(columnName)).setName("columns")
5468 columnSpec = ('*' | columnNameList)
5469
5470 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5471 tableNameList = Group(delimitedList(tableName)).setName("tables")
5472
5473 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
5474
5475
5476 simpleSQL.runTests("""
5477 # '*' as column list and dotted table name
5478 select * from SYS.XYZZY
5479
5480 # caseless match on "SELECT", and casts back to "select"
5481 SELECT * from XYZZY, ABC
5482
5483 # list of column names, and mixed case SELECT keyword
5484 Select AA,BB,CC from Sys.dual
5485
5486 # multiple tables
5487 Select A, B, C from Sys.dual, Table2
5488
5489 # invalid SELECT keyword - should fail
5490 Xelect A, B, C from Sys.dual
5491
5492 # incomplete command - should fail
5493 Select
5494
5495 # invalid column name - should fail
5496 Select ^^^ frox Sys.dual
5497
5498 """)
5499
5500 pyparsing_common.number.runTests("""
5501 100
5502 -100
5503 +100
5504 3.14159
5505 6.02e23
5506 1e-12
5507 """)
5508
5509
5510 pyparsing_common.fnumber.runTests("""
5511 100
5512 -100
5513 +100
5514 3.14159
5515 6.02e23
5516 1e-12
5517 """)
5518
5519 pyparsing_common.hex_integer.runTests("""
5520 100
5521 FF
5522 """)
5523
5524 import uuid
5525 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5526 pyparsing_common.uuid.runTests("""
5527 12345678-1234-5678-1234-567812345678
5528 """)
5529