Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2015  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  35   
  36      from pyparsing import Word, alphas 
  37   
  38      # define grammar of a greeting 
  39      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  40   
  41      hello = "Hello, World!" 
  42      print (hello, "->", greet.parseString( hello )) 
  43   
  44  The program outputs the following:: 
  45   
  46      Hello, World! -> ['Hello', ',', 'World', '!'] 
  47   
  48  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  49  class names, and the use of '+', '|' and '^' operators. 
  50   
  51  The parsed results returned from L{I{ParserElement.parseString}<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an 
  52  object with named attributes. 
  53   
  54  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  55   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  56   - quoted strings 
  57   - embedded comments 
  58  """ 
  59   
  60  __version__ = "2.1.8" 
  61  __versionTime__ = "14 Aug 2016 08:43 UTC" 
  62  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  63   
  64  import string 
  65  from weakref import ref as wkref 
  66  import copy 
  67  import sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import collections 
  72  import pprint 
  73  import traceback 
  74  import types 
  75  from datetime import datetime 
  76   
  77  try: 
  78      from _thread import RLock 
  79  except ImportError: 
  80      from threading import RLock 
  81   
  82  try: 
  83      from collections import OrderedDict as _OrderedDict 
  84  except ImportError: 
  85      try: 
  86          from ordereddict import OrderedDict as _OrderedDict 
  87      except ImportError: 
  88          _OrderedDict = None 
  89   
  90  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  91   
  92  __all__ = [ 
  93  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  94  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  95  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  96  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  97  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  98  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',  
  99  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
 100  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
 101  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
 102  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
 103  'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 
 104  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
 105  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
 106  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
 107  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
 108  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
 109  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
 110  'tokenMap', 'pyparsing_common', 
 111  ] 
 112   
 113  system_version = tuple(sys.version_info)[:3] 
 114  PY_3 = system_version[0] == 3 
 115  if PY_3: 
 116      _MAX_INT = sys.maxsize 
 117      basestring = str 
 118      unichr = chr 
 119      _ustr = str 
 120   
 121      # build list of single arg builtins, that can be used as parse actions 
 122      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 123   
 124  else: 
 125      _MAX_INT = sys.maxint 
 126      range = xrange 
127 128 - def _ustr(obj):
129 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 130 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 131 then < returns the unicode object | encodes it with the default encoding | ... >. 132 """ 133 if isinstance(obj,unicode): 134 return obj 135 136 try: 137 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 138 # it won't break any existing code. 139 return str(obj) 140 141 except UnicodeEncodeError: 142 # Else encode it 143 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 144 xmlcharref = Regex('&#\d+;') 145 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 146 return xmlcharref.transformString(ret)
147 148 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 149 singleArgBuiltins = [] 150 import __builtin__ 151 for fname in "sum len sorted reversed list tuple set any all min max".split(): 152 try: 153 singleArgBuiltins.append(getattr(__builtin__,fname)) 154 except AttributeError: 155 continue 156 157 _generatorType = type((y for y in range(1)))
158 159 -def _xml_escape(data):
160 """Escape &, <, >, ", ', etc. in a string of data.""" 161 162 # ampersand must be replaced first 163 from_symbols = '&><"\'' 164 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 165 for from_,to_ in zip(from_symbols, to_symbols): 166 data = data.replace(from_, to_) 167 return data
168
169 -class _Constants(object):
170 pass
171 172 alphas = string.ascii_uppercase + string.ascii_lowercase 173 nums = "0123456789" 174 hexnums = nums + "ABCDEFabcdef" 175 alphanums = alphas + nums 176 _bslash = chr(92) 177 printables = "".join(c for c in string.printable if c not in string.whitespace)
178 179 -class ParseBaseException(Exception):
180 """base exception class for all parsing runtime exceptions""" 181 # Performance tuning: we construct a *lot* of these, so keep this 182 # constructor as small and fast as possible
183 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
184 self.loc = loc 185 if msg is None: 186 self.msg = pstr 187 self.pstr = "" 188 else: 189 self.msg = msg 190 self.pstr = pstr 191 self.parserElement = elem 192 self.args = (pstr, loc, msg)
193 194 @classmethod
195 - def _from_exception(cls, pe):
196 """ 197 internal factory method to simplify creating one type of ParseException 198 from another - avoids having __init__ signature conflicts among subclasses 199 """ 200 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
201
202 - def __getattr__( self, aname ):
203 """supported attributes by name are: 204 - lineno - returns the line number of the exception text 205 - col - returns the column number of the exception text 206 - line - returns the line containing the exception text 207 """ 208 if( aname == "lineno" ): 209 return lineno( self.loc, self.pstr ) 210 elif( aname in ("col", "column") ): 211 return col( self.loc, self.pstr ) 212 elif( aname == "line" ): 213 return line( self.loc, self.pstr ) 214 else: 215 raise AttributeError(aname)
216
217 - def __str__( self ):
218 return "%s (at char %d), (line:%d, col:%d)" % \ 219 ( self.msg, self.loc, self.lineno, self.column )
220 - def __repr__( self ):
221 return _ustr(self)
222 - def markInputline( self, markerString = ">!<" ):
223 """Extracts the exception line from the input string, and marks 224 the location of the exception with a special symbol. 225 """ 226 line_str = self.line 227 line_column = self.column - 1 228 if markerString: 229 line_str = "".join((line_str[:line_column], 230 markerString, line_str[line_column:])) 231 return line_str.strip()
232 - def __dir__(self):
233 return "lineno col line".split() + dir(type(self))
234
235 -class ParseException(ParseBaseException):
236 """ 237 Exception thrown when parse expressions don't match class; 238 supported attributes by name are: 239 - lineno - returns the line number of the exception text 240 - col - returns the column number of the exception text 241 - line - returns the line containing the exception text 242 243 Example:: 244 try: 245 Word(nums).setName("integer").parseString("ABC") 246 except ParseException as pe: 247 print(pe) 248 print("column: {}".format(pe.col)) 249 250 prints:: 251 Expected integer (at char 0), (line:1, col:1) 252 column: 1 253 """ 254 pass
255
256 -class ParseFatalException(ParseBaseException):
257 """user-throwable exception thrown when inconsistent parse content 258 is found; stops all parsing immediately""" 259 pass
260
261 -class ParseSyntaxException(ParseFatalException):
262 """just like L{ParseFatalException}, but thrown internally when an 263 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop 264 immediately because an unbacktrackable syntax error has been found""" 265 pass
266
267 #~ class ReparseException(ParseBaseException): 268 #~ """Experimental class - parse actions can raise this exception to cause 269 #~ pyparsing to reparse the input string: 270 #~ - with a modified input string, and/or 271 #~ - with a modified start location 272 #~ Set the values of the ReparseException in the constructor, and raise the 273 #~ exception in a parse action to cause pyparsing to use the new string/location. 274 #~ Setting the values as None causes no change to be made. 275 #~ """ 276 #~ def __init_( self, newstring, restartLoc ): 277 #~ self.newParseText = newstring 278 #~ self.reparseLoc = restartLoc 279 280 -class RecursiveGrammarException(Exception):
281 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
282 - def __init__( self, parseElementList ):
283 self.parseElementTrace = parseElementList
284
285 - def __str__( self ):
286 return "RecursiveGrammarException: %s" % self.parseElementTrace
287
288 -class _ParseResultsWithOffset(object):
289 - def __init__(self,p1,p2):
290 self.tup = (p1,p2)
291 - def __getitem__(self,i):
292 return self.tup[i]
293 - def __repr__(self):
294 return repr(self.tup)
295 - def setOffset(self,i):
296 self.tup = (self.tup[0],i)
297
298 -class ParseResults(object):
299 """ 300 Structured parse results, to provide multiple means of access to the parsed data: 301 - as a list (C{len(results)}) 302 - by list index (C{results[0], results[1]}, etc.) 303 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName}) 304 305 Example:: 306 integer = Word(nums) 307 date_str = (integer.setResultsName("year") + '/' 308 + integer.setResultsName("month") + '/' 309 + integer.setResultsName("day")) 310 # equivalent form: 311 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 312 313 result = date_str.parseString("1999/12/31") 314 315 def test(s, fn=repr): 316 print("%s -> %s" % (s, fn(eval(s)))) 317 test("list(result)") 318 test("result[0]") 319 test("result['month']") 320 test("result.day") 321 test("'month' in result") 322 test("'minutes' in result") 323 test("result.dump()", str) 324 prints:: 325 list(result) -> ['1999', '/', '12', '/', '31'] 326 result[0] -> '1999' 327 result['month'] -> '12' 328 result.day -> '31' 329 'month' in result -> True 330 'minutes' in result -> False 331 result.dump() -> ['1999', '/', '12', '/', '31'] 332 - day: 31 333 - month: 12 334 - year: 1999 335 """
336 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
337 if isinstance(toklist, cls): 338 return toklist 339 retobj = object.__new__(cls) 340 retobj.__doinit = True 341 return retobj
342 343 # Performance tuning: we construct a *lot* of these, so keep this 344 # constructor as small and fast as possible
345 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
346 if self.__doinit: 347 self.__doinit = False 348 self.__name = None 349 self.__parent = None 350 self.__accumNames = {} 351 self.__asList = asList 352 self.__modal = modal 353 if toklist is None: 354 toklist = [] 355 if isinstance(toklist, list): 356 self.__toklist = toklist[:] 357 elif isinstance(toklist, _generatorType): 358 self.__toklist = list(toklist) 359 else: 360 self.__toklist = [toklist] 361 self.__tokdict = dict() 362 363 if name is not None and name: 364 if not modal: 365 self.__accumNames[name] = 0 366 if isinstance(name,int): 367 name = _ustr(name) # will always return a str, but use _ustr for consistency 368 self.__name = name 369 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 370 if isinstance(toklist,basestring): 371 toklist = [ toklist ] 372 if asList: 373 if isinstance(toklist,ParseResults): 374 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 375 else: 376 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 377 self[name].__name = name 378 else: 379 try: 380 self[name] = toklist[0] 381 except (KeyError,TypeError,IndexError): 382 self[name] = toklist
383
384 - def __getitem__( self, i ):
385 if isinstance( i, (int,slice) ): 386 return self.__toklist[i] 387 else: 388 if i not in self.__accumNames: 389 return self.__tokdict[i][-1][0] 390 else: 391 return ParseResults([ v[0] for v in self.__tokdict[i] ])
392
393 - def __setitem__( self, k, v, isinstance=isinstance ):
394 if isinstance(v,_ParseResultsWithOffset): 395 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 396 sub = v[0] 397 elif isinstance(k,(int,slice)): 398 self.__toklist[k] = v 399 sub = v 400 else: 401 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 402 sub = v 403 if isinstance(sub,ParseResults): 404 sub.__parent = wkref(self)
405
406 - def __delitem__( self, i ):
407 if isinstance(i,(int,slice)): 408 mylen = len( self.__toklist ) 409 del self.__toklist[i] 410 411 # convert int to slice 412 if isinstance(i, int): 413 if i < 0: 414 i += mylen 415 i = slice(i, i+1) 416 # get removed indices 417 removed = list(range(*i.indices(mylen))) 418 removed.reverse() 419 # fixup indices in token dictionary 420 for name,occurrences in self.__tokdict.items(): 421 for j in removed: 422 for k, (value, position) in enumerate(occurrences): 423 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 424 else: 425 del self.__tokdict[i]
426
427 - def __contains__( self, k ):
428 return k in self.__tokdict
429
430 - def __len__( self ): return len( self.__toklist )
431 - def __bool__(self): return ( not not self.__toklist )
432 __nonzero__ = __bool__
433 - def __iter__( self ): return iter( self.__toklist )
434 - def __reversed__( self ): return iter( self.__toklist[::-1] )
435 - def _iterkeys( self ):
436 if hasattr(self.__tokdict, "iterkeys"): 437 return self.__tokdict.iterkeys() 438 else: 439 return iter(self.__tokdict)
440
441 - def _itervalues( self ):
442 return (self[k] for k in self._iterkeys())
443
444 - def _iteritems( self ):
445 return ((k, self[k]) for k in self._iterkeys())
446 447 if PY_3: 448 keys = _iterkeys 449 """Returns an iterator of all named result keys (Python 3.x only).""" 450 451 values = _itervalues 452 """Returns an iterator of all named result values (Python 3.x only).""" 453 454 items = _iteritems 455 """Returns an iterator of all named result key-value tuples (Python 3.x only).""" 456 457 else: 458 iterkeys = _iterkeys 459 """Returns an iterator of all named result keys (Python 2.x only).""" 460 461 itervalues = _itervalues 462 """Returns an iterator of all named result values (Python 2.x only).""" 463 464 iteritems = _iteritems 465 """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 466
467 - def keys( self ):
468 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 469 return list(self.iterkeys())
470
471 - def values( self ):
472 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 473 return list(self.itervalues())
474
475 - def items( self ):
476 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 477 return list(self.iteritems())
478
479 - def haskeys( self ):
480 """Since keys() returns an iterator, this method is helpful in bypassing 481 code that looks for the existence of any defined results names.""" 482 return bool(self.__tokdict)
483
484 - def pop( self, *args, **kwargs):
485 """ 486 Removes and returns item at specified index (default=C{last}). 487 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no 488 argument or an integer argument, it will use C{list} semantics 489 and pop tokens from the list of parsed tokens. If passed a 490 non-integer argument (most likely a string), it will use C{dict} 491 semantics and pop the corresponding value from any defined 492 results names. A second default return value argument is 493 supported, just as in C{dict.pop()}. 494 495 Example:: 496 def remove_first(tokens): 497 tokens.pop(0) 498 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 499 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] 500 501 label = Word(alphas) 502 patt = label("LABEL") + OneOrMore(Word(nums)) 503 print(patt.parseString("AAB 123 321").dump()) 504 505 # Use pop() in a parse action to remove named result (note that corresponding value is not 506 # removed from list form of results) 507 def remove_LABEL(tokens): 508 tokens.pop("LABEL") 509 return tokens 510 patt.addParseAction(remove_LABEL) 511 print(patt.parseString("AAB 123 321").dump()) 512 prints:: 513 ['AAB', '123', '321'] 514 - LABEL: AAB 515 516 ['AAB', '123', '321'] 517 """ 518 if not args: 519 args = [-1] 520 for k,v in kwargs.items(): 521 if k == 'default': 522 args = (args[0], v) 523 else: 524 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 525 if (isinstance(args[0], int) or 526 len(args) == 1 or 527 args[0] in self): 528 index = args[0] 529 ret = self[index] 530 del self[index] 531 return ret 532 else: 533 defaultvalue = args[1] 534 return defaultvalue
535
536 - def get(self, key, defaultValue=None):
537 """ 538 Returns named result matching the given key, or if there is no 539 such name, then returns the given C{defaultValue} or C{None} if no 540 C{defaultValue} is specified. 541 542 Similar to C{dict.get()}. 543 544 Example:: 545 integer = Word(nums) 546 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 547 548 result = date_str.parseString("1999/12/31") 549 print(result.get("year")) # -> '1999' 550 print(result.get("hour", "not specified")) # -> 'not specified' 551 print(result.get("hour")) # -> None 552 """ 553 if key in self: 554 return self[key] 555 else: 556 return defaultValue
557
558 - def insert( self, index, insStr ):
559 """ 560 Inserts new element at location index in the list of parsed tokens. 561 562 Similar to C{list.insert()}. 563 564 Example:: 565 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 566 567 # use a parse action to insert the parse location in the front of the parsed results 568 def insert_locn(locn, tokens): 569 tokens.insert(0, locn) 570 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] 571 """ 572 self.__toklist.insert(index, insStr) 573 # fixup indices in token dictionary 574 for name,occurrences in self.__tokdict.items(): 575 for k, (value, position) in enumerate(occurrences): 576 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
577
578 - def append( self, item ):
579 """ 580 Add single element to end of ParseResults list of elements. 581 582 Example:: 583 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 584 585 # use a parse action to compute the sum of the parsed integers, and add it to the end 586 def append_sum(tokens): 587 tokens.append(sum(map(int, tokens))) 588 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] 589 """ 590 self.__toklist.append(item)
591
592 - def extend( self, itemseq ):
593 """ 594 Add sequence of elements to end of ParseResults list of elements. 595 596 Example:: 597 patt = OneOrMore(Word(alphas)) 598 599 # use a parse action to append the reverse of the matched strings, to make a palindrome 600 def make_palindrome(tokens): 601 tokens.extend(reversed([t[::-1] for t in tokens])) 602 return ''.join(tokens) 603 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 604 """ 605 if isinstance(itemseq, ParseResults): 606 self += itemseq 607 else: 608 self.__toklist.extend(itemseq)
609
610 - def clear( self ):
611 """ 612 Clear all elements and results names. 613 """ 614 del self.__toklist[:] 615 self.__tokdict.clear()
616
617 - def __getattr__( self, name ):
618 try: 619 return self[name] 620 except KeyError: 621 return "" 622 623 if name in self.__tokdict: 624 if name not in self.__accumNames: 625 return self.__tokdict[name][-1][0] 626 else: 627 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 628 else: 629 return ""
630
631 - def __add__( self, other ):
632 ret = self.copy() 633 ret += other 634 return ret
635
636 - def __iadd__( self, other ):
637 if other.__tokdict: 638 offset = len(self.__toklist) 639 addoffset = lambda a: offset if a<0 else a+offset 640 otheritems = other.__tokdict.items() 641 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 642 for (k,vlist) in otheritems for v in vlist] 643 for k,v in otherdictitems: 644 self[k] = v 645 if isinstance(v[0],ParseResults): 646 v[0].__parent = wkref(self) 647 648 self.__toklist += other.__toklist 649 self.__accumNames.update( other.__accumNames ) 650 return self
651
652 - def __radd__(self, other):
653 if isinstance(other,int) and other == 0: 654 # useful for merging many ParseResults using sum() builtin 655 return self.copy() 656 else: 657 # this may raise a TypeError - so be it 658 return other + self
659
660 - def __repr__( self ):
661 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
662
663 - def __str__( self ):
664 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
665
666 - def _asStringList( self, sep='' ):
667 out = [] 668 for item in self.__toklist: 669 if out and sep: 670 out.append(sep) 671 if isinstance( item, ParseResults ): 672 out += item._asStringList() 673 else: 674 out.append( _ustr(item) ) 675 return out
676
677 - def asList( self ):
678 """ 679 Returns the parse results as a nested list of matching tokens, all converted to strings. 680 681 Example:: 682 patt = OneOrMore(Word(alphas)) 683 result = patt.parseString("sldkj lsdkj sldkj") 684 # even though the result prints in string-like form, it is actually a pyparsing ParseResults 685 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 686 687 # Use asList() to create an actual list 688 result_list = result.asList() 689 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 690 """ 691 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
692
693 - def asDict( self ):
694 """ 695 Returns the named parse results as a nested dictionary. 696 697 Example:: 698 integer = Word(nums) 699 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 700 701 result = date_str.parseString('12/31/1999') 702 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 703 704 result_dict = result.asDict() 705 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 706 707 # even though a ParseResults supports dict-like access, sometime you just need to have a dict 708 import json 709 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 710 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} 711 """ 712 if PY_3: 713 item_fn = self.items 714 else: 715 item_fn = self.iteritems 716 717 def toItem(obj): 718 if isinstance(obj, ParseResults): 719 if obj.haskeys(): 720 return obj.asDict() 721 else: 722 return [toItem(v) for v in obj] 723 else: 724 return obj
725 726 return dict((k,toItem(v)) for k,v in item_fn())
727
728 - def copy( self ):
729 """ 730 Returns a new copy of a C{ParseResults} object. 731 """ 732 ret = ParseResults( self.__toklist ) 733 ret.__tokdict = self.__tokdict.copy() 734 ret.__parent = self.__parent 735 ret.__accumNames.update( self.__accumNames ) 736 ret.__name = self.__name 737 return ret
738
739 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
740 """ 741 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. 742 """ 743 nl = "\n" 744 out = [] 745 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 746 for v in vlist) 747 nextLevelIndent = indent + " " 748 749 # collapse out indents if formatting is not desired 750 if not formatted: 751 indent = "" 752 nextLevelIndent = "" 753 nl = "" 754 755 selfTag = None 756 if doctag is not None: 757 selfTag = doctag 758 else: 759 if self.__name: 760 selfTag = self.__name 761 762 if not selfTag: 763 if namedItemsOnly: 764 return "" 765 else: 766 selfTag = "ITEM" 767 768 out += [ nl, indent, "<", selfTag, ">" ] 769 770 for i,res in enumerate(self.__toklist): 771 if isinstance(res,ParseResults): 772 if i in namedItems: 773 out += [ res.asXML(namedItems[i], 774 namedItemsOnly and doctag is None, 775 nextLevelIndent, 776 formatted)] 777 else: 778 out += [ res.asXML(None, 779 namedItemsOnly and doctag is None, 780 nextLevelIndent, 781 formatted)] 782 else: 783 # individual token, see if there is a name for it 784 resTag = None 785 if i in namedItems: 786 resTag = namedItems[i] 787 if not resTag: 788 if namedItemsOnly: 789 continue 790 else: 791 resTag = "ITEM" 792 xmlBodyText = _xml_escape(_ustr(res)) 793 out += [ nl, nextLevelIndent, "<", resTag, ">", 794 xmlBodyText, 795 "</", resTag, ">" ] 796 797 out += [ nl, indent, "</", selfTag, ">" ] 798 return "".join(out)
799
800 - def __lookup(self,sub):
801 for k,vlist in self.__tokdict.items(): 802 for v,loc in vlist: 803 if sub is v: 804 return k 805 return None
806
807 - def getName(self):
808 """ 809 Returns the results name for this token expression. Useful when several 810 different expressions might match at a particular location. 811 812 Example:: 813 integer = Word(nums) 814 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 815 house_number_expr = Suppress('#') + Word(nums, alphanums) 816 user_data = (Group(house_number_expr)("house_number") 817 | Group(ssn_expr)("ssn") 818 | Group(integer)("age")) 819 user_info = OneOrMore(user_data) 820 821 result = user_info.parseString("22 111-22-3333 #221B") 822 for item in result: 823 print(item.getName(), ':', item[0]) 824 prints:: 825 age : 22 826 ssn : 111-22-3333 827 house_number : 221B 828 """ 829 if self.__name: 830 return self.__name 831 elif self.__parent: 832 par = self.__parent() 833 if par: 834 return par.__lookup(self) 835 else: 836 return None 837 elif (len(self) == 1 and 838 len(self.__tokdict) == 1 and 839 self.__tokdict.values()[0][0][1] in (0,-1)): 840 return self.__tokdict.keys()[0] 841 else: 842 return None
843
844 - def dump(self, indent='', depth=0, full=True):
845 """ 846 Diagnostic method for listing out the contents of a C{ParseResults}. 847 Accepts an optional C{indent} argument so that this string can be embedded 848 in a nested display of other data. 849 850 Example:: 851 integer = Word(nums) 852 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 853 854 result = date_str.parseString('12/31/1999') 855 print(result.dump()) 856 prints:: 857 ['12', '/', '31', '/', '1999'] 858 - day: 1999 859 - month: 31 860 - year: 12 861 """ 862 out = [] 863 NL = '\n' 864 out.append( indent+_ustr(self.asList()) ) 865 if full: 866 if self.haskeys(): 867 items = sorted(self.items()) 868 for k,v in items: 869 if out: 870 out.append(NL) 871 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 872 if isinstance(v,ParseResults): 873 if v: 874 out.append( v.dump(indent,depth+1) ) 875 else: 876 out.append(_ustr(v)) 877 else: 878 out.append(_ustr(v)) 879 elif any(isinstance(vv,ParseResults) for vv in self): 880 v = self 881 for i,vv in enumerate(v): 882 if isinstance(vv,ParseResults): 883 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 884 else: 885 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 886 887 return "".join(out)
888
889 - def pprint(self, *args, **kwargs):
890 """ 891 Pretty-printer for parsed results as a list, using the C{pprint} module. 892 Accepts additional positional or keyword args as defined for the 893 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) 894 895 Example:: 896 ident = Word(alphas, alphanums) 897 num = Word(nums) 898 func = Forward() 899 term = ident | num | Group('(' + func + ')') 900 func <<= ident + Group(Optional(delimitedList(term))) 901 result = func.parseString("fna a,b,(fnb c,d,200),100") 902 result.pprint(width=40) 903 prints:: 904 ['fna', 905 ['a', 906 'b', 907 ['(', 'fnb', ['c', 'd', '200'], ')'], 908 '100']] 909 """ 910 pprint.pprint(self.asList(), *args, **kwargs)
911 912 # add support for pickle protocol
913 - def __getstate__(self):
914 return ( self.__toklist, 915 ( self.__tokdict.copy(), 916 self.__parent is not None and self.__parent() or None, 917 self.__accumNames, 918 self.__name ) )
919
920 - def __setstate__(self,state):
921 self.__toklist = state[0] 922 (self.__tokdict, 923 par, 924 inAccumNames, 925 self.__name) = state[1] 926 self.__accumNames = {} 927 self.__accumNames.update(inAccumNames) 928 if par is not None: 929 self.__parent = wkref(par) 930 else: 931 self.__parent = None
932
933 - def __getnewargs__(self):
934 return self.__toklist, self.__name, self.__asList, self.__modal
935
936 - def __dir__(self):
937 return (dir(type(self)) + list(self.keys()))
938 939 collections.MutableMapping.register(ParseResults)
940 941 -def col (loc,strg):
942 """Returns current column within a string, counting newlines as line separators. 943 The first column is number 1. 944 945 Note: the default parsing behavior is to expand tabs in the input string 946 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 947 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 948 consistent view of the parsed string, the parse location, and line and column 949 positions within the parsed string. 950 """ 951 s = strg 952 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
953
954 -def lineno(loc,strg):
955 """Returns current line number within a string, counting newlines as line separators. 956 The first line is number 1. 957 958 Note: the default parsing behavior is to expand tabs in the input string 959 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 960 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 961 consistent view of the parsed string, the parse location, and line and column 962 positions within the parsed string. 963 """ 964 return strg.count("\n",0,loc) + 1
965
966 -def line( loc, strg ):
967 """Returns the line of text containing loc within a string, counting newlines as line separators. 968 """ 969 lastCR = strg.rfind("\n", 0, loc) 970 nextCR = strg.find("\n", loc) 971 if nextCR >= 0: 972 return strg[lastCR+1:nextCR] 973 else: 974 return strg[lastCR+1:]
975
976 -def _defaultStartDebugAction( instring, loc, expr ):
977 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
978
979 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
980 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
981
982 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
983 print ("Exception raised:" + _ustr(exc))
984
985 -def nullDebugAction(*args):
986 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 987 pass
988 989 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 990 #~ 'decorator to trim function calls to match the arity of the target' 991 #~ def _trim_arity(func, maxargs=3): 992 #~ if func in singleArgBuiltins: 993 #~ return lambda s,l,t: func(t) 994 #~ limit = 0 995 #~ foundArity = False 996 #~ def wrapper(*args): 997 #~ nonlocal limit,foundArity 998 #~ while 1: 999 #~ try: 1000 #~ ret = func(*args[limit:]) 1001 #~ foundArity = True 1002 #~ return ret 1003 #~ except TypeError: 1004 #~ if limit == maxargs or foundArity: 1005 #~ raise 1006 #~ limit += 1 1007 #~ continue 1008 #~ return wrapper 1009 1010 # this version is Python 2.x-3.x cross-compatible 1011 'decorator to trim function calls to match the arity of the target'
1012 -def _trim_arity(func, maxargs=2):
1013 if func in singleArgBuiltins: 1014 return lambda s,l,t: func(t) 1015 limit = [0] 1016 foundArity = [False] 1017 1018 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 1019 if system_version[:2] >= (3,5): 1020 def extract_stack(limit=0): 1021 # special handling for Python 3.5.0 - extra deep call stack by 1 1022 offset = -3 if system_version == (3,5,0) else -2 1023 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset] 1024 return [(frame_summary.filename, frame_summary.lineno)]
1025 def extract_tb(tb, limit=0): 1026 frames = traceback.extract_tb(tb, limit=limit) 1027 frame_summary = frames[-1] 1028 return [(frame_summary.filename, frame_summary.lineno)] 1029 else: 1030 extract_stack = traceback.extract_stack 1031 extract_tb = traceback.extract_tb 1032 1033 # synthesize what would be returned by traceback.extract_stack at the call to 1034 # user's parse action 'func', so that we don't incur call penalty at parse time 1035 1036 LINE_DIFF = 6 1037 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 1038 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 1039 this_line = extract_stack(limit=2)[-1] 1040 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) 1041 1042 def wrapper(*args): 1043 while 1: 1044 try: 1045 ret = func(*args[limit[0]:]) 1046 foundArity[0] = True 1047 return ret 1048 except TypeError: 1049 # re-raise TypeErrors if they did not come from our arity testing 1050 if foundArity[0]: 1051 raise 1052 else: 1053 try: 1054 tb = sys.exc_info()[-1] 1055 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: 1056 raise 1057 finally: 1058 del tb 1059 1060 if limit[0] <= maxargs: 1061 limit[0] += 1 1062 continue 1063 raise 1064 1065 # copy func name to wrapper for sensible debug output 1066 func_name = "<parse action>" 1067 try: 1068 func_name = getattr(func, '__name__', 1069 getattr(func, '__class__').__name__) 1070 except Exception: 1071 func_name = str(func) 1072 wrapper.__name__ = func_name 1073 1074 return wrapper 1075
1076 -class ParserElement(object):
1077 """Abstract base level parser element class.""" 1078 DEFAULT_WHITE_CHARS = " \n\t\r" 1079 verbose_stacktrace = False 1080 1081 @staticmethod
1082 - def setDefaultWhitespaceChars( chars ):
1083 r""" 1084 Overrides the default whitespace chars 1085 1086 Example:: 1087 # default whitespace chars are space, <TAB> and newline 1088 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 1089 1090 # change to just treat newline as significant 1091 ParserElement.setDefaultWhitespaceChars(" \t") 1092 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] 1093 """ 1094 ParserElement.DEFAULT_WHITE_CHARS = chars
1095 1096 @staticmethod
1097 - def inlineLiteralsUsing(cls):
1098 """ 1099 Set class to be used for inclusion of string literals into a parser. 1100 1101 Example:: 1102 # default literal class used is Literal 1103 integer = Word(nums) 1104 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1105 1106 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1107 1108 1109 # change to Suppress 1110 ParserElement.inlineLiteralsUsing(Suppress) 1111 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1112 1113 date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] 1114 """ 1115 ParserElement._literalStringClass = cls
1116
1117 - def __init__( self, savelist=False ):
1118 self.parseAction = list() 1119 self.failAction = None 1120 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 1121 self.strRepr = None 1122 self.resultsName = None 1123 self.saveAsList = savelist 1124 self.skipWhitespace = True 1125 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1126 self.copyDefaultWhiteChars = True 1127 self.mayReturnEmpty = False # used when checking for left-recursion 1128 self.keepTabs = False 1129 self.ignoreExprs = list() 1130 self.debug = False 1131 self.streamlined = False 1132 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 1133 self.errmsg = "" 1134 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 1135 self.debugActions = ( None, None, None ) #custom debug actions 1136 self.re = None 1137 self.callPreparse = True # used to avoid redundant calls to preParse 1138 self.callDuringTry = False
1139
1140 - def copy( self ):
1141 """ 1142 Make a copy of this C{ParserElement}. Useful for defining different parse actions 1143 for the same parsing pattern, using copies of the original parse element. 1144 1145 Example:: 1146 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1147 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") 1148 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1149 1150 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) 1151 prints:: 1152 [5120, 100, 655360, 268435456] 1153 Equivalent form of C{expr.copy()} is just C{expr()}:: 1154 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1155 """ 1156 cpy = copy.copy( self ) 1157 cpy.parseAction = self.parseAction[:] 1158 cpy.ignoreExprs = self.ignoreExprs[:] 1159 if self.copyDefaultWhiteChars: 1160 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1161 return cpy
1162
1163 - def setName( self, name ):
1164 """ 1165 Define name for this expression, makes debugging and exception messages clearer. 1166 1167 Example:: 1168 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) 1169 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 1170 """ 1171 self.name = name 1172 self.errmsg = "Expected " + self.name 1173 if hasattr(self,"exception"): 1174 self.exception.msg = self.errmsg 1175 return self
1176
1177 - def setResultsName( self, name, listAllMatches=False ):
1178 """ 1179 Define name for referencing matching tokens as a nested attribute 1180 of the returned parse results. 1181 NOTE: this returns a *copy* of the original C{ParserElement} object; 1182 this is so that the client can define a basic element, such as an 1183 integer, and reference it in multiple places with different names. 1184 1185 You can also set results names using the abbreviated syntax, 1186 C{expr("name")} in place of C{expr.setResultsName("name")} - 1187 see L{I{__call__}<__call__>}. 1188 1189 Example:: 1190 date_str = (integer.setResultsName("year") + '/' 1191 + integer.setResultsName("month") + '/' 1192 + integer.setResultsName("day")) 1193 1194 # equivalent form: 1195 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1196 """ 1197 newself = self.copy() 1198 if name.endswith("*"): 1199 name = name[:-1] 1200 listAllMatches=True 1201 newself.resultsName = name 1202 newself.modalResults = not listAllMatches 1203 return newself
1204
1205 - def setBreak(self,breakFlag = True):
1206 """Method to invoke the Python pdb debugger when this element is 1207 about to be parsed. Set C{breakFlag} to True to enable, False to 1208 disable. 1209 """ 1210 if breakFlag: 1211 _parseMethod = self._parse 1212 def breaker(instring, loc, doActions=True, callPreParse=True): 1213 import pdb 1214 pdb.set_trace() 1215 return _parseMethod( instring, loc, doActions, callPreParse )
1216 breaker._originalParseMethod = _parseMethod 1217 self._parse = breaker 1218 else: 1219 if hasattr(self._parse,"_originalParseMethod"): 1220 self._parse = self._parse._originalParseMethod 1221 return self
1222
1223 - def setParseAction( self, *fns, **kwargs ):
1224 """ 1225 Define action to perform when successfully matching parse element definition. 1226 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 1227 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 1228 - s = the original string being parsed (see note below) 1229 - loc = the location of the matching substring 1230 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 1231 If the functions in fns modify the tokens, they can return them as the return 1232 value from fn, and the modified list of tokens will replace the original. 1233 Otherwise, fn does not need to return any value. 1234 1235 Optional keyword arguments: 1236 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing 1237 1238 Note: the default parsing behavior is to expand tabs in the input string 1239 before starting the parsing process. See L{I{parseString}<parseString>} for more information 1240 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 1241 consistent view of the parsed string, the parse location, and line and column 1242 positions within the parsed string. 1243 1244 Example:: 1245 integer = Word(nums) 1246 date_str = integer + '/' + integer + '/' + integer 1247 1248 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1249 1250 # use parse action to convert to ints at parse time 1251 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1252 date_str = integer + '/' + integer + '/' + integer 1253 1254 # note that integer fields are now ints, not strings 1255 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] 1256 """ 1257 self.parseAction = list(map(_trim_arity, list(fns))) 1258 self.callDuringTry = kwargs.get("callDuringTry", False) 1259 return self
1260
1261 - def addParseAction( self, *fns, **kwargs ):
1262 """ 1263 Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}. 1264 1265 See examples in L{I{copy}<copy>}. 1266 """ 1267 self.parseAction += list(map(_trim_arity, list(fns))) 1268 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1269 return self
1270
1271 - def addCondition(self, *fns, **kwargs):
1272 """Add a boolean predicate function to expression's list of parse actions. See 1273 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, 1274 functions passed to C{addCondition} need to return boolean success/fail of the condition. 1275 1276 Optional keyword arguments: 1277 - message = define a custom message to be used in the raised exception 1278 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 1279 1280 Example:: 1281 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1282 year_int = integer.copy() 1283 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 1284 date_str = year_int + '/' + integer + '/' + integer 1285 1286 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) 1287 """ 1288 msg = kwargs.get("message", "failed user-defined condition") 1289 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException 1290 for fn in fns: 1291 def pa(s,l,t): 1292 if not bool(_trim_arity(fn)(s,l,t)): 1293 raise exc_type(s,l,msg)
1294 self.parseAction.append(pa) 1295 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1296 return self 1297
1298 - def setFailAction( self, fn ):
1299 """Define action to perform if parsing fails at this expression. 1300 Fail acton fn is a callable function that takes the arguments 1301 C{fn(s,loc,expr,err)} where: 1302 - s = string being parsed 1303 - loc = location where expression match was attempted and failed 1304 - expr = the parse expression that failed 1305 - err = the exception thrown 1306 The function returns no value. It may throw C{L{ParseFatalException}} 1307 if it is desired to stop parsing immediately.""" 1308 self.failAction = fn 1309 return self
1310
1311 - def _skipIgnorables( self, instring, loc ):
1312 exprsFound = True 1313 while exprsFound: 1314 exprsFound = False 1315 for e in self.ignoreExprs: 1316 try: 1317 while 1: 1318 loc,dummy = e._parse( instring, loc ) 1319 exprsFound = True 1320 except ParseException: 1321 pass 1322 return loc
1323
1324 - def preParse( self, instring, loc ):
1325 if self.ignoreExprs: 1326 loc = self._skipIgnorables( instring, loc ) 1327 1328 if self.skipWhitespace: 1329 wt = self.whiteChars 1330 instrlen = len(instring) 1331 while loc < instrlen and instring[loc] in wt: 1332 loc += 1 1333 1334 return loc
1335
1336 - def parseImpl( self, instring, loc, doActions=True ):
1337 return loc, []
1338
1339 - def postParse( self, instring, loc, tokenlist ):
1340 return tokenlist
1341 1342 #~ @profile
1343 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1344 debugging = ( self.debug ) #and doActions ) 1345 1346 if debugging or self.failAction: 1347 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 1348 if (self.debugActions[0] ): 1349 self.debugActions[0]( instring, loc, self ) 1350 if callPreParse and self.callPreparse: 1351 preloc = self.preParse( instring, loc ) 1352 else: 1353 preloc = loc 1354 tokensStart = preloc 1355 try: 1356 try: 1357 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1358 except IndexError: 1359 raise ParseException( instring, len(instring), self.errmsg, self ) 1360 except ParseBaseException as err: 1361 #~ print ("Exception raised:", err) 1362 if self.debugActions[2]: 1363 self.debugActions[2]( instring, tokensStart, self, err ) 1364 if self.failAction: 1365 self.failAction( instring, tokensStart, self, err ) 1366 raise 1367 else: 1368 if callPreParse and self.callPreparse: 1369 preloc = self.preParse( instring, loc ) 1370 else: 1371 preloc = loc 1372 tokensStart = preloc 1373 if self.mayIndexError or loc >= len(instring): 1374 try: 1375 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1376 except IndexError: 1377 raise ParseException( instring, len(instring), self.errmsg, self ) 1378 else: 1379 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1380 1381 tokens = self.postParse( instring, loc, tokens ) 1382 1383 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1384 if self.parseAction and (doActions or self.callDuringTry): 1385 if debugging: 1386 try: 1387 for fn in self.parseAction: 1388 tokens = fn( instring, tokensStart, retTokens ) 1389 if tokens is not None: 1390 retTokens = ParseResults( tokens, 1391 self.resultsName, 1392 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1393 modal=self.modalResults ) 1394 except ParseBaseException as err: 1395 #~ print "Exception raised in user parse action:", err 1396 if (self.debugActions[2] ): 1397 self.debugActions[2]( instring, tokensStart, self, err ) 1398 raise 1399 else: 1400 for fn in self.parseAction: 1401 tokens = fn( instring, tokensStart, retTokens ) 1402 if tokens is not None: 1403 retTokens = ParseResults( tokens, 1404 self.resultsName, 1405 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1406 modal=self.modalResults ) 1407 1408 if debugging: 1409 #~ print ("Matched",self,"->",retTokens.asList()) 1410 if (self.debugActions[1] ): 1411 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1412 1413 return loc, retTokens
1414
1415 - def tryParse( self, instring, loc ):
1416 try: 1417 return self._parse( instring, loc, doActions=False )[0] 1418 except ParseFatalException: 1419 raise ParseException( instring, loc, self.errmsg, self)
1420
1421 - def canParseNext(self, instring, loc):
1422 try: 1423 self.tryParse(instring, loc) 1424 except (ParseException, IndexError): 1425 return False 1426 else: 1427 return True
1428
1429 - class _UnboundedCache(object):
1430 - def __init__(self):
1431 cache = {} 1432 self.not_in_cache = not_in_cache = object() 1433 1434 def get(self, key): 1435 return cache.get(key, not_in_cache)
1436 1437 def set(self, key, value): 1438 cache[key] = value
1439 1440 def clear(self): 1441 cache.clear() 1442 1443 self.get = types.MethodType(get, self) 1444 self.set = types.MethodType(set, self) 1445 self.clear = types.MethodType(clear, self) 1446 1447 if _OrderedDict is not None:
1448 - class _FifoCache(object):
1449 - def __init__(self, size):
1450 self.not_in_cache = not_in_cache = object() 1451 1452 cache = _OrderedDict() 1453 1454 def get(self, key): 1455 return cache.get(key, not_in_cache)
1456 1457 def set(self, key, value): 1458 cache[key] = value 1459 if len(cache) > size: 1460 cache.popitem(False)
1461 1462 def clear(self): 1463 cache.clear() 1464 1465 self.get = types.MethodType(get, self) 1466 self.set = types.MethodType(set, self) 1467 self.clear = types.MethodType(clear, self) 1468 1469 else:
1470 - class _FifoCache(object):
1471 - def __init__(self, size):
1472 self.not_in_cache = not_in_cache = object() 1473 1474 cache = {} 1475 key_fifo = collections.deque([], size) 1476 1477 def get(self, key): 1478 return cache.get(key, not_in_cache)
1479 1480 def set(self, key, value): 1481 cache[key] = value 1482 if len(cache) > size: 1483 cache.pop(key_fifo.popleft(), None) 1484 key_fifo.append(key)
1485 1486 def clear(self): 1487 cache.clear() 1488 key_fifo.clear() 1489 1490 self.get = types.MethodType(get, self) 1491 self.set = types.MethodType(set, self) 1492 self.clear = types.MethodType(clear, self) 1493 1494 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1495 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail 1496 packrat_cache_lock = RLock() 1497 packrat_cache_stats = [0, 0] 1498 1499 # this method gets repeatedly called during backtracking with the same arguments - 1500 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1501 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1502 HIT, MISS = 0, 1 1503 lookup = (self, instring, loc, callPreParse, doActions) 1504 with ParserElement.packrat_cache_lock: 1505 cache = ParserElement.packrat_cache 1506 value = cache.get(lookup) 1507 if value is cache.not_in_cache: 1508 ParserElement.packrat_cache_stats[MISS] += 1 1509 try: 1510 value = self._parseNoCache(instring, loc, doActions, callPreParse) 1511 except ParseBaseException as pe: 1512 # cache a copy of the exception, without the traceback 1513 cache.set(lookup, pe.__class__(*pe.args)) 1514 raise 1515 else: 1516 cache.set(lookup, (value[0], value[1].copy())) 1517 return value 1518 else: 1519 ParserElement.packrat_cache_stats[HIT] += 1 1520 if isinstance(value, Exception): 1521 raise value 1522 return (value[0], value[1].copy())
1523 1524 _parse = _parseNoCache 1525 1526 @staticmethod
1527 - def resetCache():
1528 ParserElement.packrat_cache.clear() 1529 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1530 1531 _packratEnabled = False 1532 @staticmethod
1533 - def enablePackrat(cache_size_limit=128):
1534 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1535 Repeated parse attempts at the same string location (which happens 1536 often in many complex grammars) can immediately return a cached value, 1537 instead of re-executing parsing/validating code. Memoizing is done of 1538 both valid results and parsing exceptions. 1539 1540 Parameters: 1541 - cache_size_limit - (default=C{128}) - if an integer value is provided 1542 will limit the size of the packrat cache; if None is passed, then 1543 the cache size will be unbounded; if 0 is passed, the cache will 1544 be effectively disabled. 1545 1546 This speedup may break existing programs that use parse actions that 1547 have side-effects. For this reason, packrat parsing is disabled when 1548 you first import pyparsing. To activate the packrat feature, your 1549 program must call the class method C{ParserElement.enablePackrat()}. If 1550 your program uses C{psyco} to "compile as you go", you must call 1551 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1552 Python will crash. For best results, call C{enablePackrat()} immediately 1553 after importing pyparsing. 1554 1555 Example:: 1556 import pyparsing 1557 pyparsing.ParserElement.enablePackrat() 1558 """ 1559 if not ParserElement._packratEnabled: 1560 ParserElement._packratEnabled = True 1561 if cache_size_limit is None: 1562 ParserElement.packrat_cache = ParserElement._UnboundedCache() 1563 else: 1564 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) 1565 ParserElement._parse = ParserElement._parseCache
1566
1567 - def parseString( self, instring, parseAll=False ):
1568 """ 1569 Execute the parse expression with the given string. 1570 This is the main interface to the client code, once the complete 1571 expression has been built. 1572 1573 If you want the grammar to require that the entire input string be 1574 successfully parsed, then set C{parseAll} to True (equivalent to ending 1575 the grammar with C{L{StringEnd()}}). 1576 1577 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1578 in order to report proper column numbers in parse actions. 1579 If the input string contains tabs and 1580 the grammar uses parse actions that use the C{loc} argument to index into the 1581 string being parsed, you can ensure you have a consistent view of the input 1582 string by: 1583 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1584 (see L{I{parseWithTabs}<parseWithTabs>}) 1585 - define your parse action using the full C{(s,loc,toks)} signature, and 1586 reference the input string using the parse action's C{s} argument 1587 - explictly expand the tabs in your input string before calling 1588 C{parseString} 1589 1590 Example:: 1591 Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] 1592 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text 1593 """ 1594 ParserElement.resetCache() 1595 if not self.streamlined: 1596 self.streamline() 1597 #~ self.saveAsList = True 1598 for e in self.ignoreExprs: 1599 e.streamline() 1600 if not self.keepTabs: 1601 instring = instring.expandtabs() 1602 try: 1603 loc, tokens = self._parse( instring, 0 ) 1604 if parseAll: 1605 loc = self.preParse( instring, loc ) 1606 se = Empty() + StringEnd() 1607 se._parse( instring, loc ) 1608 except ParseBaseException as exc: 1609 if ParserElement.verbose_stacktrace: 1610 raise 1611 else: 1612 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1613 raise exc 1614 else: 1615 return tokens
1616
1617 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1618 """ 1619 Scan the input string for expression matches. Each match will return the 1620 matching tokens, start location, and end location. May be called with optional 1621 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1622 C{overlap} is specified, then overlapping matches will be reported. 1623 1624 Note that the start and end locations are reported relative to the string 1625 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1626 strings with embedded tabs. 1627 1628 Example:: 1629 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 1630 print(source) 1631 for tokens,start,end in Word(alphas).scanString(source): 1632 print(' '*start + '^'*(end-start)) 1633 print(' '*start + tokens[0]) 1634 1635 prints:: 1636 1637 sldjf123lsdjjkf345sldkjf879lkjsfd987 1638 ^^^^^ 1639 sldjf 1640 ^^^^^^^ 1641 lsdjjkf 1642 ^^^^^^ 1643 sldkjf 1644 ^^^^^^ 1645 lkjsfd 1646 """ 1647 if not self.streamlined: 1648 self.streamline() 1649 for e in self.ignoreExprs: 1650 e.streamline() 1651 1652 if not self.keepTabs: 1653 instring = _ustr(instring).expandtabs() 1654 instrlen = len(instring) 1655 loc = 0 1656 preparseFn = self.preParse 1657 parseFn = self._parse 1658 ParserElement.resetCache() 1659 matches = 0 1660 try: 1661 while loc <= instrlen and matches < maxMatches: 1662 try: 1663 preloc = preparseFn( instring, loc ) 1664 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1665 except ParseException: 1666 loc = preloc+1 1667 else: 1668 if nextLoc > loc: 1669 matches += 1 1670 yield tokens, preloc, nextLoc 1671 if overlap: 1672 nextloc = preparseFn( instring, loc ) 1673 if nextloc > loc: 1674 loc = nextLoc 1675 else: 1676 loc += 1 1677 else: 1678 loc = nextLoc 1679 else: 1680 loc = preloc+1 1681 except ParseBaseException as exc: 1682 if ParserElement.verbose_stacktrace: 1683 raise 1684 else: 1685 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1686 raise exc
1687
1688 - def transformString( self, instring ):
1689 """ 1690 Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1691 be returned from a parse action. To use C{transformString}, define a grammar and 1692 attach a parse action to it that modifies the returned token list. 1693 Invoking C{transformString()} on a target string will then scan for matches, 1694 and replace the matched text patterns according to the logic in the parse 1695 action. C{transformString()} returns the resulting transformed string. 1696 1697 Example:: 1698 wd = Word(alphas) 1699 wd.setParseAction(lambda toks: toks[0].title()) 1700 1701 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) 1702 Prints:: 1703 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 1704 """ 1705 out = [] 1706 lastE = 0 1707 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1708 # keep string locs straight between transformString and scanString 1709 self.keepTabs = True 1710 try: 1711 for t,s,e in self.scanString( instring ): 1712 out.append( instring[lastE:s] ) 1713 if t: 1714 if isinstance(t,ParseResults): 1715 out += t.asList() 1716 elif isinstance(t,list): 1717 out += t 1718 else: 1719 out.append(t) 1720 lastE = e 1721 out.append(instring[lastE:]) 1722 out = [o for o in out if o] 1723 return "".join(map(_ustr,_flatten(out))) 1724 except ParseBaseException as exc: 1725 if ParserElement.verbose_stacktrace: 1726 raise 1727 else: 1728 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1729 raise exc
1730
1731 - def searchString( self, instring, maxMatches=_MAX_INT ):
1732 """ 1733 Another extension to C{L{scanString}}, simplifying the access to the tokens found 1734 to match the given parse expression. May be called with optional 1735 C{maxMatches} argument, to clip searching after 'n' matches are found. 1736 1737 Example:: 1738 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 1739 cap_word = Word(alphas.upper(), alphas.lower()) 1740 1741 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) 1742 prints:: 1743 ['More', 'Iron', 'Lead', 'Gold', 'I'] 1744 """ 1745 try: 1746 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1747 except ParseBaseException as exc: 1748 if ParserElement.verbose_stacktrace: 1749 raise 1750 else: 1751 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1752 raise exc
1753
1754 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1755 """ 1756 Generator method to split a string using the given expression as a separator. 1757 May be called with optional C{maxsplit} argument, to limit the number of splits; 1758 and the optional C{includeSeparators} argument (default=C{False}), if the separating 1759 matching text should be included in the split results. 1760 1761 Example:: 1762 punc = oneOf(list(".,;:/-!?")) 1763 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 1764 prints:: 1765 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 1766 """ 1767 splits = 0 1768 last = 0 1769 for t,s,e in self.scanString(instring, maxMatches=maxsplit): 1770 yield instring[last:s] 1771 if includeSeparators: 1772 yield t[0] 1773 last = e 1774 yield instring[last:]
1775
1776 - def __add__(self, other ):
1777 """ 1778 Implementation of + operator - returns C{L{And}} 1779 """ 1780 if isinstance( other, basestring ): 1781 other = ParserElement._literalStringClass( other ) 1782 if not isinstance( other, ParserElement ): 1783 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1784 SyntaxWarning, stacklevel=2) 1785 return None 1786 return And( [ self, other ] )
1787
1788 - def __radd__(self, other ):
1789 """ 1790 Implementation of + operator when left operand is not a C{L{ParserElement}} 1791 """ 1792 if isinstance( other, basestring ): 1793 other = ParserElement._literalStringClass( other ) 1794 if not isinstance( other, ParserElement ): 1795 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1796 SyntaxWarning, stacklevel=2) 1797 return None 1798 return other + self
1799
1800 - def __sub__(self, other):
1801 """ 1802 Implementation of - operator, returns C{L{And}} with error stop 1803 """ 1804 if isinstance( other, basestring ): 1805 other = ParserElement._literalStringClass( other ) 1806 if not isinstance( other, ParserElement ): 1807 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1808 SyntaxWarning, stacklevel=2) 1809 return None 1810 return And( [ self, And._ErrorStop(), other ] )
1811
1812 - def __rsub__(self, other ):
1813 """ 1814 Implementation of - operator when left operand is not a C{L{ParserElement}} 1815 """ 1816 if isinstance( other, basestring ): 1817 other = ParserElement._literalStringClass( other ) 1818 if not isinstance( other, ParserElement ): 1819 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1820 SyntaxWarning, stacklevel=2) 1821 return None 1822 return other - self
1823
1824 - def __mul__(self,other):
1825 """ 1826 Implementation of * operator, allows use of C{expr * 3} in place of 1827 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1828 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1829 may also include C{None} as in: 1830 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1831 to C{expr*n + L{ZeroOrMore}(expr)} 1832 (read as "at least n instances of C{expr}") 1833 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1834 (read as "0 to n instances of C{expr}") 1835 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1836 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1837 1838 Note that C{expr*(None,n)} does not raise an exception if 1839 more than n exprs exist in the input stream; that is, 1840 C{expr*(None,n)} does not enforce a maximum number of expr 1841 occurrences. If this behavior is desired, then write 1842 C{expr*(None,n) + ~expr} 1843 """ 1844 if isinstance(other,int): 1845 minElements, optElements = other,0 1846 elif isinstance(other,tuple): 1847 other = (other + (None, None))[:2] 1848 if other[0] is None: 1849 other = (0, other[1]) 1850 if isinstance(other[0],int) and other[1] is None: 1851 if other[0] == 0: 1852 return ZeroOrMore(self) 1853 if other[0] == 1: 1854 return OneOrMore(self) 1855 else: 1856 return self*other[0] + ZeroOrMore(self) 1857 elif isinstance(other[0],int) and isinstance(other[1],int): 1858 minElements, optElements = other 1859 optElements -= minElements 1860 else: 1861 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1862 else: 1863 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1864 1865 if minElements < 0: 1866 raise ValueError("cannot multiply ParserElement by negative value") 1867 if optElements < 0: 1868 raise ValueError("second tuple value must be greater or equal to first tuple value") 1869 if minElements == optElements == 0: 1870 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1871 1872 if (optElements): 1873 def makeOptionalList(n): 1874 if n>1: 1875 return Optional(self + makeOptionalList(n-1)) 1876 else: 1877 return Optional(self)
1878 if minElements: 1879 if minElements == 1: 1880 ret = self + makeOptionalList(optElements) 1881 else: 1882 ret = And([self]*minElements) + makeOptionalList(optElements) 1883 else: 1884 ret = makeOptionalList(optElements) 1885 else: 1886 if minElements == 1: 1887 ret = self 1888 else: 1889 ret = And([self]*minElements) 1890 return ret 1891
1892 - def __rmul__(self, other):
1893 return self.__mul__(other)
1894
1895 - def __or__(self, other ):
1896 """ 1897 Implementation of | operator - returns C{L{MatchFirst}} 1898 """ 1899 if isinstance( other, basestring ): 1900 other = ParserElement._literalStringClass( other ) 1901 if not isinstance( other, ParserElement ): 1902 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1903 SyntaxWarning, stacklevel=2) 1904 return None 1905 return MatchFirst( [ self, other ] )
1906
1907 - def __ror__(self, other ):
1908 """ 1909 Implementation of | operator when left operand is not a C{L{ParserElement}} 1910 """ 1911 if isinstance( other, basestring ): 1912 other = ParserElement._literalStringClass( other ) 1913 if not isinstance( other, ParserElement ): 1914 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1915 SyntaxWarning, stacklevel=2) 1916 return None 1917 return other | self
1918
1919 - def __xor__(self, other ):
1920 """ 1921 Implementation of ^ operator - returns C{L{Or}} 1922 """ 1923 if isinstance( other, basestring ): 1924 other = ParserElement._literalStringClass( other ) 1925 if not isinstance( other, ParserElement ): 1926 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1927 SyntaxWarning, stacklevel=2) 1928 return None 1929 return Or( [ self, other ] )
1930
1931 - def __rxor__(self, other ):
1932 """ 1933 Implementation of ^ operator when left operand is not a C{L{ParserElement}} 1934 """ 1935 if isinstance( other, basestring ): 1936 other = ParserElement._literalStringClass( other ) 1937 if not isinstance( other, ParserElement ): 1938 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1939 SyntaxWarning, stacklevel=2) 1940 return None 1941 return other ^ self
1942
1943 - def __and__(self, other ):
1944 """ 1945 Implementation of & operator - returns C{L{Each}} 1946 """ 1947 if isinstance( other, basestring ): 1948 other = ParserElement._literalStringClass( other ) 1949 if not isinstance( other, ParserElement ): 1950 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1951 SyntaxWarning, stacklevel=2) 1952 return None 1953 return Each( [ self, other ] )
1954
1955 - def __rand__(self, other ):
1956 """ 1957 Implementation of & operator when left operand is not a C{L{ParserElement}} 1958 """ 1959 if isinstance( other, basestring ): 1960 other = ParserElement._literalStringClass( other ) 1961 if not isinstance( other, ParserElement ): 1962 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1963 SyntaxWarning, stacklevel=2) 1964 return None 1965 return other & self
1966
1967 - def __invert__( self ):
1968 """ 1969 Implementation of ~ operator - returns C{L{NotAny}} 1970 """ 1971 return NotAny( self )
1972
1973 - def __call__(self, name=None):
1974 """ 1975 Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}. 1976 1977 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1978 passed as C{True}. 1979 1980 If C{name} is omitted, same as calling C{L{copy}}. 1981 1982 Example:: 1983 # these are equivalent 1984 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1985 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1986 """ 1987 if name is not None: 1988 return self.setResultsName(name) 1989 else: 1990 return self.copy()
1991
1992 - def suppress( self ):
1993 """ 1994 Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1995 cluttering up returned output. 1996 """ 1997 return Suppress( self )
1998
1999 - def leaveWhitespace( self ):
2000 """ 2001 Disables the skipping of whitespace before matching the characters in the 2002 C{ParserElement}'s defined pattern. This is normally only used internally by 2003 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 2004 """ 2005 self.skipWhitespace = False 2006 return self
2007
2008 - def setWhitespaceChars( self, chars ):
2009 """ 2010 Overrides the default whitespace chars 2011 """ 2012 self.skipWhitespace = True 2013 self.whiteChars = chars 2014 self.copyDefaultWhiteChars = False 2015 return self
2016
2017 - def parseWithTabs( self ):
2018 """ 2019 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 2020 Must be called before C{parseString} when the input grammar contains elements that 2021 match C{<TAB>} characters. 2022 """ 2023 self.keepTabs = True 2024 return self
2025
2026 - def ignore( self, other ):
2027 """ 2028 Define expression to be ignored (e.g., comments) while doing pattern 2029 matching; may be called repeatedly, to define multiple comment or other 2030 ignorable patterns. 2031 2032 Example:: 2033 patt = OneOrMore(Word(alphas)) 2034 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] 2035 2036 patt.ignore(cStyleComment) 2037 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] 2038 """ 2039 if isinstance(other, basestring): 2040 other = Suppress(other) 2041 2042 if isinstance( other, Suppress ): 2043 if other not in self.ignoreExprs: 2044 self.ignoreExprs.append(other) 2045 else: 2046 self.ignoreExprs.append( Suppress( other.copy() ) ) 2047 return self
2048
2049 - def setDebugActions( self, startAction, successAction, exceptionAction ):
2050 """ 2051 Enable display of debugging messages while doing pattern matching. 2052 """ 2053 self.debugActions = (startAction or _defaultStartDebugAction, 2054 successAction or _defaultSuccessDebugAction, 2055 exceptionAction or _defaultExceptionDebugAction) 2056 self.debug = True 2057 return self
2058
2059 - def setDebug( self, flag=True ):
2060 """ 2061 Enable display of debugging messages while doing pattern matching. 2062 Set C{flag} to True to enable, False to disable. 2063 2064 Example:: 2065 wd = Word(alphas).setName("alphaword") 2066 integer = Word(nums).setName("numword") 2067 term = wd | integer 2068 2069 # turn on debugging for wd 2070 wd.setDebug() 2071 2072 OneOrMore(term).parseString("abc 123 xyz 890") 2073 2074 prints:: 2075 Match alphaword at loc 0(1,1) 2076 Matched alphaword -> ['abc'] 2077 Match alphaword at loc 3(1,4) 2078 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 2079 Match alphaword at loc 7(1,8) 2080 Matched alphaword -> ['xyz'] 2081 Match alphaword at loc 11(1,12) 2082 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 2083 Match alphaword at loc 15(1,16) 2084 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 2085 2086 The output shown is that produced by the default debug actions. Prior to attempting 2087 to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"} 2088 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"} 2089 message is shown. Also note the use of L{setName} to assign a human-readable name to the expression, 2090 which makes debugging and exception messages easier to understand - for instance, the default 2091 name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. 2092 """ 2093 if flag: 2094 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 2095 else: 2096 self.debug = False 2097 return self
2098
2099 - def __str__( self ):
2100 return self.name
2101
2102 - def __repr__( self ):
2103 return _ustr(self)
2104
2105 - def streamline( self ):
2106 self.streamlined = True 2107 self.strRepr = None 2108 return self
2109
2110 - def checkRecursion( self, parseElementList ):
2111 pass
2112
2113 - def validate( self, validateTrace=[] ):
2114 """ 2115 Check defined expressions for valid structure, check for infinite recursive definitions. 2116 """ 2117 self.checkRecursion( [] )
2118
2119 - def parseFile( self, file_or_filename, parseAll=False ):
2120 """ 2121 Execute the parse expression on the given file or filename. 2122 If a filename is specified (instead of a file object), 2123 the entire file is opened, read, and closed before parsing. 2124 """ 2125 try: 2126 file_contents = file_or_filename.read() 2127 except AttributeError: 2128 with open(file_or_filename, "r") as f: 2129 file_contents = f.read() 2130 try: 2131 return self.parseString(file_contents, parseAll) 2132 except ParseBaseException as exc: 2133 if ParserElement.verbose_stacktrace: 2134 raise 2135 else: 2136 # catch and re-raise exception from here, clears out pyparsing internal stack trace 2137 raise exc
2138
2139 - def __eq__(self,other):
2140 if isinstance(other, ParserElement): 2141 return self is other or vars(self) == vars(other) 2142 elif isinstance(other, basestring): 2143 return self.matches(other) 2144 else: 2145 return super(ParserElement,self)==other
2146
2147 - def __ne__(self,other):
2148 return not (self == other)
2149
2150 - def __hash__(self):
2151 return hash(id(self))
2152
2153 - def __req__(self,other):
2154 return self == other
2155
2156 - def __rne__(self,other):
2157 return not (self == other)
2158
2159 - def matches(self, testString, parseAll=True):
2160 """ 2161 Method for quick testing of a parser against a test string. Good for simple 2162 inline microtests of sub expressions while building up larger parser.0 2163 2164 Parameters: 2165 - testString - to test against this expression for a match 2166 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2167 2168 Example:: 2169 expr = Word(nums) 2170 assert expr.matches("100") 2171 """ 2172 try: 2173 self.parseString(_ustr(testString), parseAll=parseAll) 2174 return True 2175 except ParseBaseException: 2176 return False
2177
2178 - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2179 """ 2180 Execute the parse expression on a series of test strings, showing each 2181 test, the parsed results or where the parse failed. Quick and easy way to 2182 run a parse expression against a list of sample strings. 2183 2184 Parameters: 2185 - tests - a list of separate test strings, or a multiline string of test strings 2186 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2187 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test 2188 string; pass None to disable comment filtering 2189 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; 2190 if False, only dump nested list 2191 - printResults - (default=C{True}) prints test output to stdout 2192 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing 2193 2194 Returns: a (success, results) tuple, where success indicates that all tests succeeded 2195 (or failed if C{failureTests} is True), and the results contain a list of lines of each 2196 test's output 2197 2198 Example:: 2199 number_expr = pyparsing_common.number.copy() 2200 2201 result = number_expr.runTests(''' 2202 # unsigned integer 2203 100 2204 # negative integer 2205 -100 2206 # float with scientific notation 2207 6.02e23 2208 # integer with scientific notation 2209 1e-12 2210 ''') 2211 print("Success" if result[0] else "Failed!") 2212 2213 result = number_expr.runTests(''' 2214 # stray character 2215 100Z 2216 # missing leading digit before '.' 2217 -.100 2218 # too many '.' 2219 3.14.159 2220 ''', failureTests=True) 2221 print("Success" if result[0] else "Failed!") 2222 prints:: 2223 # unsigned integer 2224 100 2225 [100] 2226 2227 # negative integer 2228 -100 2229 [-100] 2230 2231 # float with scientific notation 2232 6.02e23 2233 [6.02e+23] 2234 2235 # integer with scientific notation 2236 1e-12 2237 [1e-12] 2238 2239 Success 2240 2241 # stray character 2242 100Z 2243 ^ 2244 FAIL: Expected end of text (at char 3), (line:1, col:4) 2245 2246 # missing leading digit before '.' 2247 -.100 2248 ^ 2249 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 2250 2251 # too many '.' 2252 3.14.159 2253 ^ 2254 FAIL: Expected end of text (at char 4), (line:1, col:5) 2255 2256 Success 2257 """ 2258 if isinstance(tests, basestring): 2259 tests = list(map(str.strip, tests.rstrip().splitlines())) 2260 if isinstance(comment, basestring): 2261 comment = Literal(comment) 2262 allResults = [] 2263 comments = [] 2264 success = True 2265 for t in tests: 2266 if comment is not None and comment.matches(t, False) or comments and not t: 2267 comments.append(t) 2268 continue 2269 if not t: 2270 continue 2271 out = ['\n'.join(comments), t] 2272 comments = [] 2273 try: 2274 result = self.parseString(t, parseAll=parseAll) 2275 out.append(result.dump(full=fullDump)) 2276 success = success and not failureTests 2277 except ParseBaseException as pe: 2278 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 2279 if '\n' in t: 2280 out.append(line(pe.loc, t)) 2281 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) 2282 else: 2283 out.append(' '*pe.loc + '^' + fatal) 2284 out.append("FAIL: " + str(pe)) 2285 success = success and failureTests 2286 result = pe 2287 except Exception as exc: 2288 out.append("FAIL-EXCEPTION: " + str(exc)) 2289 success = success and failureTests 2290 result = exc 2291 2292 if printResults: 2293 if fullDump: 2294 out.append('') 2295 print('\n'.join(out)) 2296 2297 allResults.append((t, result)) 2298 2299 return success, allResults
2300
2301 2302 -class Token(ParserElement):
2303 """ 2304 Abstract C{ParserElement} subclass, for defining atomic matching patterns. 2305 """
2306 - def __init__( self ):
2307 super(Token,self).__init__( savelist=False )
2308
2309 2310 -class Empty(Token):
2311 """ 2312 An empty token, will always match. 2313 """
2314 - def __init__( self ):
2315 super(Empty,self).__init__() 2316 self.name = "Empty" 2317 self.mayReturnEmpty = True 2318 self.mayIndexError = False
2319
2320 2321 -class NoMatch(Token):
2322 """ 2323 A token that will never match. 2324 """
2325 - def __init__( self ):
2326 super(NoMatch,self).__init__() 2327 self.name = "NoMatch" 2328 self.mayReturnEmpty = True 2329 self.mayIndexError = False 2330 self.errmsg = "Unmatchable token"
2331
2332 - def parseImpl( self, instring, loc, doActions=True ):
2333 raise ParseException(instring, loc, self.errmsg, self)
2334
2335 2336 -class Literal(Token):
2337 """ 2338 Token to exactly match a specified string. 2339 2340 Example:: 2341 Literal('blah').parseString('blah') # -> ['blah'] 2342 Literal('blah').parseString('blahfooblah') # -> ['blah'] 2343 Literal('blah').parseString('bla') # -> Exception: Expected "blah" 2344 2345 For case-insensitive matching, use L{CaselessLiteral}. 2346 2347 For keyword matching (force word break before and after the matched string), 2348 use L{Keyword} or L{CaselessKeyword}. 2349 """
2350 - def __init__( self, matchString ):
2351 super(Literal,self).__init__() 2352 self.match = matchString 2353 self.matchLen = len(matchString) 2354 try: 2355 self.firstMatchChar = matchString[0] 2356 except IndexError: 2357 warnings.warn("null string passed to Literal; use Empty() instead", 2358 SyntaxWarning, stacklevel=2) 2359 self.__class__ = Empty 2360 self.name = '"%s"' % _ustr(self.match) 2361 self.errmsg = "Expected " + self.name 2362 self.mayReturnEmpty = False 2363 self.mayIndexError = False
2364 2365 # Performance tuning: this routine gets called a *lot* 2366 # if this is a single character match string and the first character matches, 2367 # short-circuit as quickly as possible, and avoid calling startswith 2368 #~ @profile
2369 - def parseImpl( self, instring, loc, doActions=True ):
2370 if (instring[loc] == self.firstMatchChar and 2371 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 2372 return loc+self.matchLen, self.match 2373 raise ParseException(instring, loc, self.errmsg, self)
2374 _L = Literal 2375 ParserElement._literalStringClass = Literal
2376 2377 -class Keyword(Token):
2378 """ 2379 Token to exactly match a specified string as a keyword, that is, it must be 2380 immediately followed by a non-keyword character. Compare with C{L{Literal}}: 2381 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. 2382 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 2383 Accepts two optional constructor arguments in addition to the keyword string: 2384 - C{identChars} is a string of characters that would be valid identifier characters, 2385 defaulting to all alphanumerics + "_" and "$" 2386 - C{caseless} allows case-insensitive matching, default is C{False}. 2387 2388 Example:: 2389 Keyword("start").parseString("start") # -> ['start'] 2390 Keyword("start").parseString("starting") # -> Exception 2391 2392 For case-insensitive matching, use L{CaselessKeyword}. 2393 """ 2394 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 2395
2396 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
2397 super(Keyword,self).__init__() 2398 self.match = matchString 2399 self.matchLen = len(matchString) 2400 try: 2401 self.firstMatchChar = matchString[0] 2402 except IndexError: 2403 warnings.warn("null string passed to Keyword; use Empty() instead", 2404 SyntaxWarning, stacklevel=2) 2405 self.name = '"%s"' % self.match 2406 self.errmsg = "Expected " + self.name 2407 self.mayReturnEmpty = False 2408 self.mayIndexError = False 2409 self.caseless = caseless 2410 if caseless: 2411 self.caselessmatch = matchString.upper() 2412 identChars = identChars.upper() 2413 self.identChars = set(identChars)
2414
2415 - def parseImpl( self, instring, loc, doActions=True ):
2416 if self.caseless: 2417 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2418 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 2419 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 2420 return loc+self.matchLen, self.match 2421 else: 2422 if (instring[loc] == self.firstMatchChar and 2423 (self.matchLen==1 or instring.startswith(self.match,loc)) and 2424 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 2425 (loc == 0 or instring[loc-1] not in self.identChars) ): 2426 return loc+self.matchLen, self.match 2427 raise ParseException(instring, loc, self.errmsg, self)
2428
2429 - def copy(self):
2430 c = super(Keyword,self).copy() 2431 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 2432 return c
2433 2434 @staticmethod
2435 - def setDefaultKeywordChars( chars ):
2436 """Overrides the default Keyword chars 2437 """ 2438 Keyword.DEFAULT_KEYWORD_CHARS = chars
2439
2440 -class CaselessLiteral(Literal):
2441 """ 2442 Token to match a specified string, ignoring case of letters. 2443 Note: the matched results will always be in the case of the given 2444 match string, NOT the case of the input text. 2445 2446 Example:: 2447 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] 2448 2449 (Contrast with example for L{CaselessKeyword}.) 2450 """
2451 - def __init__( self, matchString ):
2452 super(CaselessLiteral,self).__init__( matchString.upper() ) 2453 # Preserve the defining literal. 2454 self.returnString = matchString 2455 self.name = "'%s'" % self.returnString 2456 self.errmsg = "Expected " + self.name
2457
2458 - def parseImpl( self, instring, loc, doActions=True ):
2459 if instring[ loc:loc+self.matchLen ].upper() == self.match: 2460 return loc+self.matchLen, self.returnString 2461 raise ParseException(instring, loc, self.errmsg, self)
2462
2463 -class CaselessKeyword(Keyword):
2464 """ 2465 Caseless version of L{Keyword}. 2466 2467 Example:: 2468 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] 2469 2470 (Contrast with example for L{CaselessLiteral}.) 2471 """
2472 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
2473 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
2474
2475 - def parseImpl( self, instring, loc, doActions=True ):
2476 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2477 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 2478 return loc+self.matchLen, self.match 2479 raise ParseException(instring, loc, self.errmsg, self)
2480
2481 -class Word(Token):
2482 """ 2483 Token for matching words composed of allowed character sets. 2484 Defined with string containing all allowed initial characters, 2485 an optional string containing allowed body characters (if omitted, 2486 defaults to the initial character set), and an optional minimum, 2487 maximum, and/or exact length. The default value for C{min} is 1 (a 2488 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2489 are 0, meaning no maximum or exact length restriction. An optional 2490 C{excludeChars} parameter can list characters that might be found in 2491 the input C{bodyChars} string; useful to define a word of all printables 2492 except for one or two characters, for instance. 2493 2494 L{srange} is useful for defining custom character set strings for defining 2495 C{Word} expressions, using range notation from regular expression character sets. 2496 2497 A common mistake is to use C{Word} to match a specific literal string, as in 2498 C{Word("Address")}. Remember that C{Word} uses the string argument to define 2499 I{sets} of matchable characters. This expression would match "Add", "AAA", 2500 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. 2501 To match an exact literal string, use L{Literal} or L{Keyword}. 2502 2503 pyparsing includes helper strings for building Words: 2504 - L{alphas} 2505 - L{nums} 2506 - L{alphanums} 2507 - L{hexnums} 2508 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.) 2509 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.) 2510 - L{printables} (any non-whitespace character) 2511 2512 Example:: 2513 # a word composed of digits 2514 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 2515 2516 # a word with a leading capital, and zero or more lowercase 2517 capital_word = Word(alphas.upper(), alphas.lower()) 2518 2519 # hostnames are alphanumeric, with leading alpha, and '-' 2520 hostname = Word(alphas, alphanums+'-') 2521 2522 # roman numeral (not a strict parser, accepts invalid mix of characters) 2523 roman = Word("IVXLCDM") 2524 2525 # any string of non-whitespace characters, except for ',' 2526 csv_value = Word(printables, excludeChars=",") 2527 """
2528 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2529 super(Word,self).__init__() 2530 if excludeChars: 2531 initChars = ''.join(c for c in initChars if c not in excludeChars) 2532 if bodyChars: 2533 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 2534 self.initCharsOrig = initChars 2535 self.initChars = set(initChars) 2536 if bodyChars : 2537 self.bodyCharsOrig = bodyChars 2538 self.bodyChars = set(bodyChars) 2539 else: 2540 self.bodyCharsOrig = initChars 2541 self.bodyChars = set(initChars) 2542 2543 self.maxSpecified = max > 0 2544 2545 if min < 1: 2546 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 2547 2548 self.minLen = min 2549 2550 if max > 0: 2551 self.maxLen = max 2552 else: 2553 self.maxLen = _MAX_INT 2554 2555 if exact > 0: 2556 self.maxLen = exact 2557 self.minLen = exact 2558 2559 self.name = _ustr(self) 2560 self.errmsg = "Expected " + self.name 2561 self.mayIndexError = False 2562 self.asKeyword = asKeyword 2563 2564 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 2565 if self.bodyCharsOrig == self.initCharsOrig: 2566 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 2567 elif len(self.initCharsOrig) == 1: 2568 self.reString = "%s[%s]*" % \ 2569 (re.escape(self.initCharsOrig), 2570 _escapeRegexRangeChars(self.bodyCharsOrig),) 2571 else: 2572 self.reString = "[%s][%s]*" % \ 2573 (_escapeRegexRangeChars(self.initCharsOrig), 2574 _escapeRegexRangeChars(self.bodyCharsOrig),) 2575 if self.asKeyword: 2576 self.reString = r"\b"+self.reString+r"\b" 2577 try: 2578 self.re = re.compile( self.reString ) 2579 except: 2580 self.re = None
2581
2582 - def parseImpl( self, instring, loc, doActions=True ):
2583 if self.re: 2584 result = self.re.match(instring,loc) 2585 if not result: 2586 raise ParseException(instring, loc, self.errmsg, self) 2587 2588 loc = result.end() 2589 return loc, result.group() 2590 2591 if not(instring[ loc ] in self.initChars): 2592 raise ParseException(instring, loc, self.errmsg, self) 2593 2594 start = loc 2595 loc += 1 2596 instrlen = len(instring) 2597 bodychars = self.bodyChars 2598 maxloc = start + self.maxLen 2599 maxloc = min( maxloc, instrlen ) 2600 while loc < maxloc and instring[loc] in bodychars: 2601 loc += 1 2602 2603 throwException = False 2604 if loc - start < self.minLen: 2605 throwException = True 2606 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 2607 throwException = True 2608 if self.asKeyword: 2609 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 2610 throwException = True 2611 2612 if throwException: 2613 raise ParseException(instring, loc, self.errmsg, self) 2614 2615 return loc, instring[start:loc]
2616
2617 - def __str__( self ):
2618 try: 2619 return super(Word,self).__str__() 2620 except: 2621 pass 2622 2623 2624 if self.strRepr is None: 2625 2626 def charsAsStr(s): 2627 if len(s)>4: 2628 return s[:4]+"..." 2629 else: 2630 return s
2631 2632 if ( self.initCharsOrig != self.bodyCharsOrig ): 2633 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 2634 else: 2635 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 2636 2637 return self.strRepr
2638
2639 2640 -class Regex(Token):
2641 """ 2642 Token for matching strings that match a given regular expression. 2643 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 2644 If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as 2645 named parse results. 2646 2647 Example:: 2648 realnum = Regex(r"[+-]?\d+\.\d*") 2649 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)') 2650 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 2651 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 2652 """ 2653 compiledREtype = type(re.compile("[A-Z]"))
2654 - def __init__( self, pattern, flags=0):
2655 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 2656 super(Regex,self).__init__() 2657 2658 if isinstance(pattern, basestring): 2659 if not pattern: 2660 warnings.warn("null string passed to Regex; use Empty() instead", 2661 SyntaxWarning, stacklevel=2) 2662 2663 self.pattern = pattern 2664 self.flags = flags 2665 2666 try: 2667 self.re = re.compile(self.pattern, self.flags) 2668 self.reString = self.pattern 2669 except sre_constants.error: 2670 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 2671 SyntaxWarning, stacklevel=2) 2672 raise 2673 2674 elif isinstance(pattern, Regex.compiledREtype): 2675 self.re = pattern 2676 self.pattern = \ 2677 self.reString = str(pattern) 2678 self.flags = flags 2679 2680 else: 2681 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 2682 2683 self.name = _ustr(self) 2684 self.errmsg = "Expected " + self.name 2685 self.mayIndexError = False 2686 self.mayReturnEmpty = True
2687
2688 - def parseImpl( self, instring, loc, doActions=True ):
2689 result = self.re.match(instring,loc) 2690 if not result: 2691 raise ParseException(instring, loc, self.errmsg, self) 2692 2693 loc = result.end() 2694 d = result.groupdict() 2695 ret = ParseResults(result.group()) 2696 if d: 2697 for k in d: 2698 ret[k] = d[k] 2699 return loc,ret
2700
2701 - def __str__( self ):
2702 try: 2703 return super(Regex,self).__str__() 2704 except: 2705 pass 2706 2707 if self.strRepr is None: 2708 self.strRepr = "Re:(%s)" % repr(self.pattern) 2709 2710 return self.strRepr
2711
2712 2713 -class QuotedString(Token):
2714 r""" 2715 Token for matching strings that are delimited by quoting characters. 2716 2717 Defined with the following parameters: 2718 - quoteChar - string of one or more characters defining the quote delimiting string 2719 - escChar - character to escape quotes, typically backslash (default=C{None}) 2720 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None}) 2721 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 2722 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 2723 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 2724 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 2725 2726 Example:: 2727 qs = QuotedString('"') 2728 print(qs.searchString('lsjdf "This is the quote" sldjf')) 2729 complex_qs = QuotedString('{{', endQuoteChar='}}') 2730 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) 2731 sql_qs = QuotedString('"', escQuote='""') 2732 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 2733 prints:: 2734 [['This is the quote']] 2735 [['This is the "quote"']] 2736 [['This is the quote with "embedded" quotes']] 2737 """
2738 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2739 super(QuotedString,self).__init__() 2740 2741 # remove white space from quote chars - wont work anyway 2742 quoteChar = quoteChar.strip() 2743 if not quoteChar: 2744 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2745 raise SyntaxError() 2746 2747 if endQuoteChar is None: 2748 endQuoteChar = quoteChar 2749 else: 2750 endQuoteChar = endQuoteChar.strip() 2751 if not endQuoteChar: 2752 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2753 raise SyntaxError() 2754 2755 self.quoteChar = quoteChar 2756 self.quoteCharLen = len(quoteChar) 2757 self.firstQuoteChar = quoteChar[0] 2758 self.endQuoteChar = endQuoteChar 2759 self.endQuoteCharLen = len(endQuoteChar) 2760 self.escChar = escChar 2761 self.escQuote = escQuote 2762 self.unquoteResults = unquoteResults 2763 self.convertWhitespaceEscapes = convertWhitespaceEscapes 2764 2765 if multiline: 2766 self.flags = re.MULTILINE | re.DOTALL 2767 self.pattern = r'%s(?:[^%s%s]' % \ 2768 ( re.escape(self.quoteChar), 2769 _escapeRegexRangeChars(self.endQuoteChar[0]), 2770 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2771 else: 2772 self.flags = 0 2773 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 2774 ( re.escape(self.quoteChar), 2775 _escapeRegexRangeChars(self.endQuoteChar[0]), 2776 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2777 if len(self.endQuoteChar) > 1: 2778 self.pattern += ( 2779 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 2780 _escapeRegexRangeChars(self.endQuoteChar[i])) 2781 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 2782 ) 2783 if escQuote: 2784 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 2785 if escChar: 2786 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 2787 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 2788 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 2789 2790 try: 2791 self.re = re.compile(self.pattern, self.flags) 2792 self.reString = self.pattern 2793 except sre_constants.error: 2794 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 2795 SyntaxWarning, stacklevel=2) 2796 raise 2797 2798 self.name = _ustr(self) 2799 self.errmsg = "Expected " + self.name 2800 self.mayIndexError = False 2801 self.mayReturnEmpty = True
2802
2803 - def parseImpl( self, instring, loc, doActions=True ):
2804 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2805 if not result: 2806 raise ParseException(instring, loc, self.errmsg, self) 2807 2808 loc = result.end() 2809 ret = result.group() 2810 2811 if self.unquoteResults: 2812 2813 # strip off quotes 2814 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 2815 2816 if isinstance(ret,basestring): 2817 # replace escaped whitespace 2818 if '\\' in ret and self.convertWhitespaceEscapes: 2819 ws_map = { 2820 r'\t' : '\t', 2821 r'\n' : '\n', 2822 r'\f' : '\f', 2823 r'\r' : '\r', 2824 } 2825 for wslit,wschar in ws_map.items(): 2826 ret = ret.replace(wslit, wschar) 2827 2828 # replace escaped characters 2829 if self.escChar: 2830 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 2831 2832 # replace escaped quotes 2833 if self.escQuote: 2834 ret = ret.replace(self.escQuote, self.endQuoteChar) 2835 2836 return loc, ret
2837
2838 - def __str__( self ):
2839 try: 2840 return super(QuotedString,self).__str__() 2841 except: 2842 pass 2843 2844 if self.strRepr is None: 2845 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2846 2847 return self.strRepr
2848
2849 2850 -class CharsNotIn(Token):
2851 """ 2852 Token for matching words composed of characters I{not} in a given set (will 2853 include whitespace in matched characters if not listed in the provided exclusion set - see example). 2854 Defined with string containing all disallowed characters, and an optional 2855 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2856 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2857 are 0, meaning no maximum or exact length restriction. 2858 2859 Example:: 2860 # define a comma-separated-value as anything that is not a ',' 2861 csv_value = CharsNotIn(',') 2862 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) 2863 prints:: 2864 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 2865 """
2866 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2867 super(CharsNotIn,self).__init__() 2868 self.skipWhitespace = False 2869 self.notChars = notChars 2870 2871 if min < 1: 2872 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2873 2874 self.minLen = min 2875 2876 if max > 0: 2877 self.maxLen = max 2878 else: 2879 self.maxLen = _MAX_INT 2880 2881 if exact > 0: 2882 self.maxLen = exact 2883 self.minLen = exact 2884 2885 self.name = _ustr(self) 2886 self.errmsg = "Expected " + self.name 2887 self.mayReturnEmpty = ( self.minLen == 0 ) 2888 self.mayIndexError = False
2889
2890 - def parseImpl( self, instring, loc, doActions=True ):
2891 if instring[loc] in self.notChars: 2892 raise ParseException(instring, loc, self.errmsg, self) 2893 2894 start = loc 2895 loc += 1 2896 notchars = self.notChars 2897 maxlen = min( start+self.maxLen, len(instring) ) 2898 while loc < maxlen and \ 2899 (instring[loc] not in notchars): 2900 loc += 1 2901 2902 if loc - start < self.minLen: 2903 raise ParseException(instring, loc, self.errmsg, self) 2904 2905 return loc, instring[start:loc]
2906
2907 - def __str__( self ):
2908 try: 2909 return super(CharsNotIn, self).__str__() 2910 except: 2911 pass 2912 2913 if self.strRepr is None: 2914 if len(self.notChars) > 4: 2915 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2916 else: 2917 self.strRepr = "!W:(%s)" % self.notChars 2918 2919 return self.strRepr
2920
2921 -class White(Token):
2922 """ 2923 Special matching class for matching whitespace. Normally, whitespace is ignored 2924 by pyparsing grammars. This class is included when some whitespace structures 2925 are significant. Define with a string containing the whitespace characters to be 2926 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2927 as defined for the C{L{Word}} class. 2928 """ 2929 whiteStrs = { 2930 " " : "<SPC>", 2931 "\t": "<TAB>", 2932 "\n": "<LF>", 2933 "\r": "<CR>", 2934 "\f": "<FF>", 2935 }
2936 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2937 super(White,self).__init__() 2938 self.matchWhite = ws 2939 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 2940 #~ self.leaveWhitespace() 2941 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 2942 self.mayReturnEmpty = True 2943 self.errmsg = "Expected " + self.name 2944 2945 self.minLen = min 2946 2947 if max > 0: 2948 self.maxLen = max 2949 else: 2950 self.maxLen = _MAX_INT 2951 2952 if exact > 0: 2953 self.maxLen = exact 2954 self.minLen = exact
2955
2956 - def parseImpl( self, instring, loc, doActions=True ):
2957 if not(instring[ loc ] in self.matchWhite): 2958 raise ParseException(instring, loc, self.errmsg, self) 2959 start = loc 2960 loc += 1 2961 maxloc = start + self.maxLen 2962 maxloc = min( maxloc, len(instring) ) 2963 while loc < maxloc and instring[loc] in self.matchWhite: 2964 loc += 1 2965 2966 if loc - start < self.minLen: 2967 raise ParseException(instring, loc, self.errmsg, self) 2968 2969 return loc, instring[start:loc]
2970
2971 2972 -class _PositionToken(Token):
2973 - def __init__( self ):
2974 super(_PositionToken,self).__init__() 2975 self.name=self.__class__.__name__ 2976 self.mayReturnEmpty = True 2977 self.mayIndexError = False
2978
2979 -class GoToColumn(_PositionToken):
2980 """ 2981 Token to advance to a specific column of input text; useful for tabular report scraping. 2982 """
2983 - def __init__( self, colno ):
2984 super(GoToColumn,self).__init__() 2985 self.col = colno
2986
2987 - def preParse( self, instring, loc ):
2988 if col(loc,instring) != self.col: 2989 instrlen = len(instring) 2990 if self.ignoreExprs: 2991 loc = self._skipIgnorables( instring, loc ) 2992 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2993 loc += 1 2994 return loc
2995
2996 - def parseImpl( self, instring, loc, doActions=True ):
2997 thiscol = col( loc, instring ) 2998 if thiscol > self.col: 2999 raise ParseException( instring, loc, "Text not in expected column", self ) 3000 newloc = loc + self.col - thiscol 3001 ret = instring[ loc: newloc ] 3002 return newloc, ret
3003
3004 -class LineStart(_PositionToken):
3005 """ 3006 Matches if current position is at the beginning of a line within the parse string 3007 """
3008 - def __init__( self ):
3009 super(LineStart,self).__init__() 3010 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 3011 self.errmsg = "Expected start of line"
3012
3013 - def preParse( self, instring, loc ):
3014 preloc = super(LineStart,self).preParse(instring,loc) 3015 if instring[preloc] == "\n": 3016 loc += 1 3017 return loc
3018
3019 - def parseImpl( self, instring, loc, doActions=True ):
3020 if not( loc==0 or 3021 (loc == self.preParse( instring, 0 )) or 3022 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 3023 raise ParseException(instring, loc, self.errmsg, self) 3024 return loc, []
3025
3026 -class LineEnd(_PositionToken):
3027 """ 3028 Matches if current position is at the end of a line within the parse string 3029 """
3030 - def __init__( self ):
3031 super(LineEnd,self).__init__() 3032 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 3033 self.errmsg = "Expected end of line"
3034
3035 - def parseImpl( self, instring, loc, doActions=True ):
3036 if loc<len(instring): 3037 if instring[loc] == "\n": 3038 return loc+1, "\n" 3039 else: 3040 raise ParseException(instring, loc, self.errmsg, self) 3041 elif loc == len(instring): 3042 return loc+1, [] 3043 else: 3044 raise ParseException(instring, loc, self.errmsg, self)
3045
3046 -class StringStart(_PositionToken):
3047 """ 3048 Matches if current position is at the beginning of the parse string 3049 """
3050 - def __init__( self ):
3051 super(StringStart,self).__init__() 3052 self.errmsg = "Expected start of text"
3053
3054 - def parseImpl( self, instring, loc, doActions=True ):
3055 if loc != 0: 3056 # see if entire string up to here is just whitespace and ignoreables 3057 if loc != self.preParse( instring, 0 ): 3058 raise ParseException(instring, loc, self.errmsg, self) 3059 return loc, []
3060
3061 -class StringEnd(_PositionToken):
3062 """ 3063 Matches if current position is at the end of the parse string 3064 """
3065 - def __init__( self ):
3066 super(StringEnd,self).__init__() 3067 self.errmsg = "Expected end of text"
3068
3069 - def parseImpl( self, instring, loc, doActions=True ):
3070 if loc < len(instring): 3071 raise ParseException(instring, loc, self.errmsg, self) 3072 elif loc == len(instring): 3073 return loc+1, [] 3074 elif loc > len(instring): 3075 return loc, [] 3076 else: 3077 raise ParseException(instring, loc, self.errmsg, self)
3078
3079 -class WordStart(_PositionToken):
3080 """ 3081 Matches if the current position is at the beginning of a Word, and 3082 is not preceded by any character in a given set of C{wordChars} 3083 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3084 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 3085 the string being parsed, or at the beginning of a line. 3086 """
3087 - def __init__(self, wordChars = printables):
3088 super(WordStart,self).__init__() 3089 self.wordChars = set(wordChars) 3090 self.errmsg = "Not at the start of a word"
3091
3092 - def parseImpl(self, instring, loc, doActions=True ):
3093 if loc != 0: 3094 if (instring[loc-1] in self.wordChars or 3095 instring[loc] not in self.wordChars): 3096 raise ParseException(instring, loc, self.errmsg, self) 3097 return loc, []
3098
3099 -class WordEnd(_PositionToken):
3100 """ 3101 Matches if the current position is at the end of a Word, and 3102 is not followed by any character in a given set of C{wordChars} 3103 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3104 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 3105 the string being parsed, or at the end of a line. 3106 """
3107 - def __init__(self, wordChars = printables):
3108 super(WordEnd,self).__init__() 3109 self.wordChars = set(wordChars) 3110 self.skipWhitespace = False 3111 self.errmsg = "Not at the end of a word"
3112
3113 - def parseImpl(self, instring, loc, doActions=True ):
3114 instrlen = len(instring) 3115 if instrlen>0 and loc<instrlen: 3116 if (instring[loc] in self.wordChars or 3117 instring[loc-1] not in self.wordChars): 3118 raise ParseException(instring, loc, self.errmsg, self) 3119 return loc, []
3120
3121 3122 -class ParseExpression(ParserElement):
3123 """ 3124 Abstract subclass of ParserElement, for combining and post-processing parsed tokens. 3125 """
3126 - def __init__( self, exprs, savelist = False ):
3127 super(ParseExpression,self).__init__(savelist) 3128 if isinstance( exprs, _generatorType ): 3129 exprs = list(exprs) 3130 3131 if isinstance( exprs, basestring ): 3132 self.exprs = [ ParserElement._literalStringClass( exprs ) ] 3133 elif isinstance( exprs, collections.Iterable ): 3134 exprs = list(exprs) 3135 # if sequence of strings provided, wrap with Literal 3136 if all(isinstance(expr, basestring) for expr in exprs): 3137 exprs = map(ParserElement._literalStringClass, exprs) 3138 self.exprs = list(exprs) 3139 else: 3140 try: 3141 self.exprs = list( exprs ) 3142 except TypeError: 3143 self.exprs = [ exprs ] 3144 self.callPreparse = False
3145
3146 - def __getitem__( self, i ):
3147 return self.exprs[i]
3148
3149 - def append( self, other ):
3150 self.exprs.append( other ) 3151 self.strRepr = None 3152 return self
3153
3154 - def leaveWhitespace( self ):
3155 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 3156 all contained expressions.""" 3157 self.skipWhitespace = False 3158 self.exprs = [ e.copy() for e in self.exprs ] 3159 for e in self.exprs: 3160 e.leaveWhitespace() 3161 return self
3162
3163 - def ignore( self, other ):
3164 if isinstance( other, Suppress ): 3165 if other not in self.ignoreExprs: 3166 super( ParseExpression, self).ignore( other ) 3167 for e in self.exprs: 3168 e.ignore( self.ignoreExprs[-1] ) 3169 else: 3170 super( ParseExpression, self).ignore( other ) 3171 for e in self.exprs: 3172 e.ignore( self.ignoreExprs[-1] ) 3173 return self
3174
3175 - def __str__( self ):
3176 try: 3177 return super(ParseExpression,self).__str__() 3178 except: 3179 pass 3180 3181 if self.strRepr is None: 3182 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 3183 return self.strRepr
3184
3185 - def streamline( self ):
3186 super(ParseExpression,self).streamline() 3187 3188 for e in self.exprs: 3189 e.streamline() 3190 3191 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 3192 # but only if there are no parse actions or resultsNames on the nested And's 3193 # (likewise for Or's and MatchFirst's) 3194 if ( len(self.exprs) == 2 ): 3195 other = self.exprs[0] 3196 if ( isinstance( other, self.__class__ ) and 3197 not(other.parseAction) and 3198 other.resultsName is None and 3199 not other.debug ): 3200 self.exprs = other.exprs[:] + [ self.exprs[1] ] 3201 self.strRepr = None 3202 self.mayReturnEmpty |= other.mayReturnEmpty 3203 self.mayIndexError |= other.mayIndexError 3204 3205 other = self.exprs[-1] 3206 if ( isinstance( other, self.__class__ ) and 3207 not(other.parseAction) and 3208 other.resultsName is None and 3209 not other.debug ): 3210 self.exprs = self.exprs[:-1] + other.exprs[:] 3211 self.strRepr = None 3212 self.mayReturnEmpty |= other.mayReturnEmpty 3213 self.mayIndexError |= other.mayIndexError 3214 3215 self.errmsg = "Expected " + _ustr(self) 3216 3217 return self
3218
3219 - def setResultsName( self, name, listAllMatches=False ):
3220 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 3221 return ret
3222
3223 - def validate( self, validateTrace=[] ):
3224 tmp = validateTrace[:]+[self] 3225 for e in self.exprs: 3226 e.validate(tmp) 3227 self.checkRecursion( [] )
3228
3229 - def copy(self):
3230 ret = super(ParseExpression,self).copy() 3231 ret.exprs = [e.copy() for e in self.exprs] 3232 return ret
3233
3234 -class And(ParseExpression):
3235 """ 3236 Requires all given C{ParseExpression}s to be found in the given order. 3237 Expressions may be separated by whitespace. 3238 May be constructed using the C{'+'} operator. 3239 May also be constructed using the C{'-'} operator, which will suppress backtracking. 3240 3241 Example:: 3242 integer = Word(nums) 3243 name_expr = OneOrMore(Word(alphas)) 3244 3245 expr = And([integer("id"),name_expr("name"),integer("age")]) 3246 # more easily written as: 3247 expr = integer("id") + name_expr("name") + integer("age") 3248 """ 3249
3250 - class _ErrorStop(Empty):
3251 - def __init__(self, *args, **kwargs):
3252 super(And._ErrorStop,self).__init__(*args, **kwargs) 3253 self.name = '-' 3254 self.leaveWhitespace()
3255
3256 - def __init__( self, exprs, savelist = True ):
3257 super(And,self).__init__(exprs, savelist) 3258 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3259 self.setWhitespaceChars( self.exprs[0].whiteChars ) 3260 self.skipWhitespace = self.exprs[0].skipWhitespace 3261 self.callPreparse = True
3262
3263 - def parseImpl( self, instring, loc, doActions=True ):
3264 # pass False as last arg to _parse for first element, since we already 3265 # pre-parsed the string as part of our And pre-parsing 3266 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 3267 errorStop = False 3268 for e in self.exprs[1:]: 3269 if isinstance(e, And._ErrorStop): 3270 errorStop = True 3271 continue 3272 if errorStop: 3273 try: 3274 loc, exprtokens = e._parse( instring, loc, doActions ) 3275 except ParseSyntaxException: 3276 raise 3277 except ParseBaseException as pe: 3278 pe.__traceback__ = None 3279 raise ParseSyntaxException._from_exception(pe) 3280 except IndexError: 3281 raise ParseSyntaxException(instring, len(instring), self.errmsg, self) 3282 else: 3283 loc, exprtokens = e._parse( instring, loc, doActions ) 3284 if exprtokens or exprtokens.haskeys(): 3285 resultlist += exprtokens 3286 return loc, resultlist
3287
3288 - def __iadd__(self, other ):
3289 if isinstance( other, basestring ): 3290 other = ParserElement._literalStringClass( other ) 3291 return self.append( other ) #And( [ self, other ] )
3292
3293 - def checkRecursion( self, parseElementList ):
3294 subRecCheckList = parseElementList[:] + [ self ] 3295 for e in self.exprs: 3296 e.checkRecursion( subRecCheckList ) 3297 if not e.mayReturnEmpty: 3298 break
3299
3300 - def __str__( self ):
3301 if hasattr(self,"name"): 3302 return self.name 3303 3304 if self.strRepr is None: 3305 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 3306 3307 return self.strRepr
3308
3309 3310 -class Or(ParseExpression):
3311 """ 3312 Requires that at least one C{ParseExpression} is found. 3313 If two expressions match, the expression that matches the longest string will be used. 3314 May be constructed using the C{'^'} operator. 3315 3316 Example:: 3317 # construct Or using '^' operator 3318 3319 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 3320 print(number.searchString("123 3.1416 789")) 3321 prints:: 3322 [['123'], ['3.1416'], ['789']] 3323 """
3324 - def __init__( self, exprs, savelist = False ):
3325 super(Or,self).__init__(exprs, savelist) 3326 if self.exprs: 3327 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3328 else: 3329 self.mayReturnEmpty = True
3330
3331 - def parseImpl( self, instring, loc, doActions=True ):
3332 maxExcLoc = -1 3333 maxException = None 3334 matches = [] 3335 for e in self.exprs: 3336 try: 3337 loc2 = e.tryParse( instring, loc ) 3338 except ParseException as err: 3339 err.__traceback__ = None 3340 if err.loc > maxExcLoc: 3341 maxException = err 3342 maxExcLoc = err.loc 3343 except IndexError: 3344 if len(instring) > maxExcLoc: 3345 maxException = ParseException(instring,len(instring),e.errmsg,self) 3346 maxExcLoc = len(instring) 3347 else: 3348 # save match among all matches, to retry longest to shortest 3349 matches.append((loc2, e)) 3350 3351 if matches: 3352 matches.sort(key=lambda x: -x[0]) 3353 for _,e in matches: 3354 try: 3355 return e._parse( instring, loc, doActions ) 3356 except ParseException as err: 3357 err.__traceback__ = None 3358 if err.loc > maxExcLoc: 3359 maxException = err 3360 maxExcLoc = err.loc 3361 3362 if maxException is not None: 3363 maxException.msg = self.errmsg 3364 raise maxException 3365 else: 3366 raise ParseException(instring, loc, "no defined alternatives to match", self)
3367 3368
3369 - def __ixor__(self, other ):
3370 if isinstance( other, basestring ): 3371 other = ParserElement._literalStringClass( other ) 3372 return self.append( other ) #Or( [ self, other ] )
3373
3374 - def __str__( self ):
3375 if hasattr(self,"name"): 3376 return self.name 3377 3378 if self.strRepr is None: 3379 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 3380 3381 return self.strRepr
3382
3383 - def checkRecursion( self, parseElementList ):
3384 subRecCheckList = parseElementList[:] + [ self ] 3385 for e in self.exprs: 3386 e.checkRecursion( subRecCheckList )
3387
3388 3389 -class MatchFirst(ParseExpression):
3390 """ 3391 Requires that at least one C{ParseExpression} is found. 3392 If two expressions match, the first one listed is the one that will match. 3393 May be constructed using the C{'|'} operator. 3394 3395 Example:: 3396 # construct MatchFirst using '|' operator 3397 3398 # watch the order of expressions to match 3399 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 3400 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 3401 3402 # put more selective expression first 3403 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 3404 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 3405 """
3406 - def __init__( self, exprs, savelist = False ):
3407 super(MatchFirst,self).__init__(exprs, savelist) 3408 if self.exprs: 3409 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3410 else: 3411 self.mayReturnEmpty = True
3412
3413 - def parseImpl( self, instring, loc, doActions=True ):
3414 maxExcLoc = -1 3415 maxException = None 3416 for e in self.exprs: 3417 try: 3418 ret = e._parse( instring, loc, doActions ) 3419 return ret 3420 except ParseException as err: 3421 if err.loc > maxExcLoc: 3422 maxException = err 3423 maxExcLoc = err.loc 3424 except IndexError: 3425 if len(instring) > maxExcLoc: 3426 maxException = ParseException(instring,len(instring),e.errmsg,self) 3427 maxExcLoc = len(instring) 3428 3429 # only got here if no expression matched, raise exception for match that made it the furthest 3430 else: 3431 if maxException is not None: 3432 maxException.msg = self.errmsg 3433 raise maxException 3434 else: 3435 raise ParseException(instring, loc, "no defined alternatives to match", self)
3436
3437 - def __ior__(self, other ):
3438 if isinstance( other, basestring ): 3439 other = ParserElement._literalStringClass( other ) 3440 return self.append( other ) #MatchFirst( [ self, other ] )
3441
3442 - def __str__( self ):
3443 if hasattr(self,"name"): 3444 return self.name 3445 3446 if self.strRepr is None: 3447 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 3448 3449 return self.strRepr
3450
3451 - def checkRecursion( self, parseElementList ):
3452 subRecCheckList = parseElementList[:] + [ self ] 3453 for e in self.exprs: 3454 e.checkRecursion( subRecCheckList )
3455
3456 3457 -class Each(ParseExpression):
3458 """ 3459 Requires all given C{ParseExpression}s to be found, but in any order. 3460 Expressions may be separated by whitespace. 3461 May be constructed using the C{'&'} operator. 3462 3463 Example:: 3464 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 3465 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 3466 integer = Word(nums) 3467 shape_attr = "shape:" + shape_type("shape") 3468 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 3469 color_attr = "color:" + color("color") 3470 size_attr = "size:" + integer("size") 3471 3472 # use Each (using operator '&') to accept attributes in any order 3473 # (shape and posn are required, color and size are optional) 3474 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) 3475 3476 shape_spec.runTests(''' 3477 shape: SQUARE color: BLACK posn: 100, 120 3478 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3479 color:GREEN size:20 shape:TRIANGLE posn:20,40 3480 ''' 3481 ) 3482 prints:: 3483 shape: SQUARE color: BLACK posn: 100, 120 3484 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 3485 - color: BLACK 3486 - posn: ['100', ',', '120'] 3487 - x: 100 3488 - y: 120 3489 - shape: SQUARE 3490 3491 3492 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3493 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 3494 - color: BLUE 3495 - posn: ['50', ',', '80'] 3496 - x: 50 3497 - y: 80 3498 - shape: CIRCLE 3499 - size: 50 3500 3501 3502 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 3503 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 3504 - color: GREEN 3505 - posn: ['20', ',', '40'] 3506 - x: 20 3507 - y: 40 3508 - shape: TRIANGLE 3509 - size: 20 3510 """
3511 - def __init__( self, exprs, savelist = True ):
3512 super(Each,self).__init__(exprs, savelist) 3513 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3514 self.skipWhitespace = True 3515 self.initExprGroups = True
3516
3517 - def parseImpl( self, instring, loc, doActions=True ):
3518 if self.initExprGroups: 3519 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 3520 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 3521 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 3522 self.optionals = opt1 + opt2 3523 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 3524 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 3525 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 3526 self.required += self.multirequired 3527 self.initExprGroups = False 3528 tmpLoc = loc 3529 tmpReqd = self.required[:] 3530 tmpOpt = self.optionals[:] 3531 matchOrder = [] 3532 3533 keepMatching = True 3534 while keepMatching: 3535 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 3536 failed = [] 3537 for e in tmpExprs: 3538 try: 3539 tmpLoc = e.tryParse( instring, tmpLoc ) 3540 except ParseException: 3541 failed.append(e) 3542 else: 3543 matchOrder.append(self.opt1map.get(id(e),e)) 3544 if e in tmpReqd: 3545 tmpReqd.remove(e) 3546 elif e in tmpOpt: 3547 tmpOpt.remove(e) 3548 if len(failed) == len(tmpExprs): 3549 keepMatching = False 3550 3551 if tmpReqd: 3552 missing = ", ".join(_ustr(e) for e in tmpReqd) 3553 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 3554 3555 # add any unmatched Optionals, in case they have default values defined 3556 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 3557 3558 resultlist = [] 3559 for e in matchOrder: 3560 loc,results = e._parse(instring,loc,doActions) 3561 resultlist.append(results) 3562 3563 finalResults = sum(resultlist, ParseResults([])) 3564 return loc, finalResults
3565
3566 - def __str__( self ):
3567 if hasattr(self,"name"): 3568 return self.name 3569 3570 if self.strRepr is None: 3571 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 3572 3573 return self.strRepr
3574
3575 - def checkRecursion( self, parseElementList ):
3576 subRecCheckList = parseElementList[:] + [ self ] 3577 for e in self.exprs: 3578 e.checkRecursion( subRecCheckList )
3579
3580 3581 -class ParseElementEnhance(ParserElement):
3582 """ 3583 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. 3584 """
3585 - def __init__( self, expr, savelist=False ):
3586 super(ParseElementEnhance,self).__init__(savelist) 3587 if isinstance( expr, basestring ): 3588 if issubclass(ParserElement._literalStringClass, Token): 3589 expr = ParserElement._literalStringClass(expr) 3590 else: 3591 expr = ParserElement._literalStringClass(Literal(expr)) 3592 self.expr = expr 3593 self.strRepr = None 3594 if expr is not None: 3595 self.mayIndexError = expr.mayIndexError 3596 self.mayReturnEmpty = expr.mayReturnEmpty 3597 self.setWhitespaceChars( expr.whiteChars ) 3598 self.skipWhitespace = expr.skipWhitespace 3599 self.saveAsList = expr.saveAsList 3600 self.callPreparse = expr.callPreparse 3601 self.ignoreExprs.extend(expr.ignoreExprs)
3602
3603 - def parseImpl( self, instring, loc, doActions=True ):
3604 if self.expr is not None: 3605 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 3606 else: 3607 raise ParseException("",loc,self.errmsg,self)
3608
3609 - def leaveWhitespace( self ):
3610 self.skipWhitespace = False 3611 self.expr = self.expr.copy() 3612 if self.expr is not None: 3613 self.expr.leaveWhitespace() 3614 return self
3615
3616 - def ignore( self, other ):
3617 if isinstance( other, Suppress ): 3618 if other not in self.ignoreExprs: 3619 super( ParseElementEnhance, self).ignore( other ) 3620 if self.expr is not None: 3621 self.expr.ignore( self.ignoreExprs[-1] ) 3622 else: 3623 super( ParseElementEnhance, self).ignore( other ) 3624 if self.expr is not None: 3625 self.expr.ignore( self.ignoreExprs[-1] ) 3626 return self
3627
3628 - def streamline( self ):
3629 super(ParseElementEnhance,self).streamline() 3630 if self.expr is not None: 3631 self.expr.streamline() 3632 return self
3633
3634 - def checkRecursion( self, parseElementList ):
3635 if self in parseElementList: 3636 raise RecursiveGrammarException( parseElementList+[self] ) 3637 subRecCheckList = parseElementList[:] + [ self ] 3638 if self.expr is not None: 3639 self.expr.checkRecursion( subRecCheckList )
3640
3641 - def validate( self, validateTrace=[] ):
3642 tmp = validateTrace[:]+[self] 3643 if self.expr is not None: 3644 self.expr.validate(tmp) 3645 self.checkRecursion( [] )
3646
3647 - def __str__( self ):
3648 try: 3649 return super(ParseElementEnhance,self).__str__() 3650 except: 3651 pass 3652 3653 if self.strRepr is None and self.expr is not None: 3654 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 3655 return self.strRepr
3656
3657 3658 -class FollowedBy(ParseElementEnhance):
3659 """ 3660 Lookahead matching of the given parse expression. C{FollowedBy} 3661 does I{not} advance the parsing position within the input string, it only 3662 verifies that the specified parse expression matches at the current 3663 position. C{FollowedBy} always returns a null token list. 3664 3665 Example:: 3666 # use FollowedBy to match a label only if it is followed by a ':' 3667 data_word = Word(alphas) 3668 label = data_word + FollowedBy(':') 3669 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3670 3671 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() 3672 prints:: 3673 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 3674 """
3675 - def __init__( self, expr ):
3676 super(FollowedBy,self).__init__(expr) 3677 self.mayReturnEmpty = True
3678
3679 - def parseImpl( self, instring, loc, doActions=True ):
3680 self.expr.tryParse( instring, loc ) 3681 return loc, []
3682
3683 3684 -class NotAny(ParseElementEnhance):
3685 """ 3686 Lookahead to disallow matching with the given parse expression. C{NotAny} 3687 does I{not} advance the parsing position within the input string, it only 3688 verifies that the specified parse expression does I{not} match at the current 3689 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny} 3690 always returns a null token list. May be constructed using the '~' operator. 3691 3692 Example:: 3693 3694 """
3695 - def __init__( self, expr ):
3696 super(NotAny,self).__init__(expr) 3697 #~ self.leaveWhitespace() 3698 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 3699 self.mayReturnEmpty = True 3700 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3701
3702 - def parseImpl( self, instring, loc, doActions=True ):
3703 if self.expr.canParseNext(instring, loc): 3704 raise ParseException(instring, loc, self.errmsg, self) 3705 return loc, []
3706
3707 - def __str__( self ):
3708 if hasattr(self,"name"): 3709 return self.name 3710 3711 if self.strRepr is None: 3712 self.strRepr = "~{" + _ustr(self.expr) + "}" 3713 3714 return self.strRepr
3715
3716 -class _MultipleMatch(ParseElementEnhance):
3717 - def __init__( self, expr, stopOn=None):
3718 super(_MultipleMatch, self).__init__(expr) 3719 ender = stopOn 3720 if isinstance(ender, basestring): 3721 ender = ParserElement._literalStringClass(ender) 3722 self.not_ender = ~ender if ender is not None else None
3723
3724 - def parseImpl( self, instring, loc, doActions=True ):
3725 self_expr_parse = self.expr._parse 3726 self_skip_ignorables = self._skipIgnorables 3727 check_ender = self.not_ender is not None 3728 if check_ender: 3729 try_not_ender = self.not_ender.tryParse 3730 3731 # must be at least one (but first see if we are the stopOn sentinel; 3732 # if so, fail) 3733 if check_ender: 3734 try_not_ender(instring, loc) 3735 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 3736 try: 3737 hasIgnoreExprs = (not not self.ignoreExprs) 3738 while 1: 3739 if check_ender: 3740 try_not_ender(instring, loc) 3741 if hasIgnoreExprs: 3742 preloc = self_skip_ignorables( instring, loc ) 3743 else: 3744 preloc = loc 3745 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 3746 if tmptokens or tmptokens.haskeys(): 3747 tokens += tmptokens 3748 except (ParseException,IndexError): 3749 pass 3750 3751 return loc, tokens
3752
3753 -class OneOrMore(_MultipleMatch):
3754 """ 3755 Repetition of one or more of the given expression. 3756 3757 Parameters: 3758 - expr - expression that must match one or more times 3759 - stopOn - (default=C{None}) - expression for a terminating sentinel 3760 (only required if the sentinel would ordinarily match the repetition 3761 expression) 3762 3763 Example:: 3764 data_word = Word(alphas) 3765 label = data_word + FollowedBy(':') 3766 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 3767 3768 text = "shape: SQUARE posn: upper left color: BLACK" 3769 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 3770 3771 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data 3772 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3773 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 3774 3775 # could also be written as 3776 (attr_expr * (1,)).parseString(text).pprint() 3777 """ 3778
3779 - def __str__( self ):
3780 if hasattr(self,"name"): 3781 return self.name 3782 3783 if self.strRepr is None: 3784 self.strRepr = "{" + _ustr(self.expr) + "}..." 3785 3786 return self.strRepr
3787
3788 - def setResultsName( self, name, listAllMatches=False ):
3789 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 3790 ret.saveAsList = True 3791 return ret
3792
3793 -class ZeroOrMore(_MultipleMatch):
3794 """ 3795 Optional repetition of zero or more of the given expression. 3796 3797 Parameters: 3798 - expr - expression that must match zero or more times 3799 - stopOn - (default=C{None}) - expression for a terminating sentinel 3800 (only required if the sentinel would ordinarily match the repetition 3801 expression) 3802 3803 Example: similar to L{OneOrMore} 3804 """
3805 - def __init__( self, expr, stopOn=None):
3806 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 3807 self.mayReturnEmpty = True
3808
3809 - def parseImpl( self, instring, loc, doActions=True ):
3810 try: 3811 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 3812 except (ParseException,IndexError): 3813 return loc, []
3814
3815 - def __str__( self ):
3816 if hasattr(self,"name"): 3817 return self.name 3818 3819 if self.strRepr is None: 3820 self.strRepr = "[" + _ustr(self.expr) + "]..." 3821 3822 return self.strRepr
3823
3824 -class _NullToken(object):
3825 - def __bool__(self):
3826 return False
3827 __nonzero__ = __bool__
3828 - def __str__(self):
3829 return ""
3830 3831 _optionalNotMatched = _NullToken()
3832 -class Optional(ParseElementEnhance):
3833 """ 3834 Optional matching of the given expression. 3835 3836 Parameters: 3837 - expr - expression that must match zero or more times 3838 - default (optional) - value to be returned if the optional expression is not found. 3839 3840 Example:: 3841 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 3842 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) 3843 zip.runTests(''' 3844 # traditional ZIP code 3845 12345 3846 3847 # ZIP+4 form 3848 12101-0001 3849 3850 # invalid ZIP 3851 98765- 3852 ''') 3853 prints:: 3854 # traditional ZIP code 3855 12345 3856 ['12345'] 3857 3858 # ZIP+4 form 3859 12101-0001 3860 ['12101-0001'] 3861 3862 # invalid ZIP 3863 98765- 3864 ^ 3865 FAIL: Expected end of text (at char 5), (line:1, col:6) 3866 """
3867 - def __init__( self, expr, default=_optionalNotMatched ):
3868 super(Optional,self).__init__( expr, savelist=False ) 3869 self.defaultValue = default 3870 self.mayReturnEmpty = True
3871
3872 - def parseImpl( self, instring, loc, doActions=True ):
3873 try: 3874 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 3875 except (ParseException,IndexError): 3876 if self.defaultValue is not _optionalNotMatched: 3877 if self.expr.resultsName: 3878 tokens = ParseResults([ self.defaultValue ]) 3879 tokens[self.expr.resultsName] = self.defaultValue 3880 else: 3881 tokens = [ self.defaultValue ] 3882 else: 3883 tokens = [] 3884 return loc, tokens
3885
3886 - def __str__( self ):
3887 if hasattr(self,"name"): 3888 return self.name 3889 3890 if self.strRepr is None: 3891 self.strRepr = "[" + _ustr(self.expr) + "]" 3892 3893 return self.strRepr
3894
3895 -class SkipTo(ParseElementEnhance):
3896 """ 3897 Token for skipping over all undefined text until the matched expression is found. 3898 3899 Parameters: 3900 - expr - target expression marking the end of the data to be skipped 3901 - include - (default=C{False}) if True, the target expression is also parsed 3902 (the skipped text and target expression are returned as a 2-element list). 3903 - ignore - (default=C{None}) used to define grammars (typically quoted strings and 3904 comments) that might contain false matches to the target expression 3905 - failOn - (default=C{None}) define expressions that are not allowed to be 3906 included in the skipped test; if found before the target expression is found, 3907 the SkipTo is not a match 3908 3909 Example:: 3910 report = ''' 3911 Outstanding Issues Report - 1 Jan 2000 3912 3913 # | Severity | Description | Days Open 3914 -----+----------+-------------------------------------------+----------- 3915 101 | Critical | Intermittent system crash | 6 3916 94 | Cosmetic | Spelling error on Login ('log|n') | 14 3917 79 | Minor | System slow when running too many reports | 47 3918 ''' 3919 integer = Word(nums) 3920 SEP = Suppress('|') 3921 # use SkipTo to simply match everything up until the next SEP 3922 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 3923 # - parse action will call token.strip() for each matched token, i.e., the description body 3924 string_data = SkipTo(SEP, ignore=quotedString) 3925 string_data.setParseAction(tokenMap(str.strip)) 3926 ticket_expr = (integer("issue_num") + SEP 3927 + string_data("sev") + SEP 3928 + string_data("desc") + SEP 3929 + integer("days_open")) 3930 3931 for tkt in ticket_expr.searchString(report): 3932 print tkt.dump() 3933 prints:: 3934 ['101', 'Critical', 'Intermittent system crash', '6'] 3935 - days_open: 6 3936 - desc: Intermittent system crash 3937 - issue_num: 101 3938 - sev: Critical 3939 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 3940 - days_open: 14 3941 - desc: Spelling error on Login ('log|n') 3942 - issue_num: 94 3943 - sev: Cosmetic 3944 ['79', 'Minor', 'System slow when running too many reports', '47'] 3945 - days_open: 47 3946 - desc: System slow when running too many reports 3947 - issue_num: 79 3948 - sev: Minor 3949 """
3950 - def __init__( self, other, include=False, ignore=None, failOn=None ):
3951 super( SkipTo, self ).__init__( other ) 3952 self.ignoreExpr = ignore 3953 self.mayReturnEmpty = True 3954 self.mayIndexError = False 3955 self.includeMatch = include 3956 self.asList = False 3957 if isinstance(failOn, basestring): 3958 self.failOn = ParserElement._literalStringClass(failOn) 3959 else: 3960 self.failOn = failOn 3961 self.errmsg = "No match found for "+_ustr(self.expr)
3962
3963 - def parseImpl( self, instring, loc, doActions=True ):
3964 startloc = loc 3965 instrlen = len(instring) 3966 expr = self.expr 3967 expr_parse = self.expr._parse 3968 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 3969 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 3970 3971 tmploc = loc 3972 while tmploc <= instrlen: 3973 if self_failOn_canParseNext is not None: 3974 # break if failOn expression matches 3975 if self_failOn_canParseNext(instring, tmploc): 3976 break 3977 3978 if self_ignoreExpr_tryParse is not None: 3979 # advance past ignore expressions 3980 while 1: 3981 try: 3982 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 3983 except ParseBaseException: 3984 break 3985 3986 try: 3987 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 3988 except (ParseException, IndexError): 3989 # no match, advance loc in string 3990 tmploc += 1 3991 else: 3992 # matched skipto expr, done 3993 break 3994 3995 else: 3996 # ran off the end of the input string without matching skipto expr, fail 3997 raise ParseException(instring, loc, self.errmsg, self) 3998 3999 # build up return values 4000 loc = tmploc 4001 skiptext = instring[startloc:loc] 4002 skipresult = ParseResults(skiptext) 4003 4004 if self.includeMatch: 4005 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 4006 skipresult += mat 4007 4008 return loc, skipresult
4009
4010 -class Forward(ParseElementEnhance):
4011 """ 4012 Forward declaration of an expression to be defined later - 4013 used for recursive grammars, such as algebraic infix notation. 4014 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 4015 4016 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 4017 Specifically, '|' has a lower precedence than '<<', so that:: 4018 fwdExpr << a | b | c 4019 will actually be evaluated as:: 4020 (fwdExpr << a) | b | c 4021 thereby leaving b and c out as parseable alternatives. It is recommended that you 4022 explicitly group the values inserted into the C{Forward}:: 4023 fwdExpr << (a | b | c) 4024 Converting to use the '<<=' operator instead will avoid this problem. 4025 4026 See L{ParseResults.pprint} for an example of a recursive parser created using 4027 C{Forward}. 4028 """
4029 - def __init__( self, other=None ):
4030 super(Forward,self).__init__( other, savelist=False )
4031
4032 - def __lshift__( self, other ):
4033 if isinstance( other, basestring ): 4034 other = ParserElement._literalStringClass(other) 4035 self.expr = other 4036 self.strRepr = None 4037 self.mayIndexError = self.expr.mayIndexError 4038 self.mayReturnEmpty = self.expr.mayReturnEmpty 4039 self.setWhitespaceChars( self.expr.whiteChars ) 4040 self.skipWhitespace = self.expr.skipWhitespace 4041 self.saveAsList = self.expr.saveAsList 4042 self.ignoreExprs.extend(self.expr.ignoreExprs) 4043 return self
4044
4045 - def __ilshift__(self, other):
4046 return self << other
4047
4048 - def leaveWhitespace( self ):
4049 self.skipWhitespace = False 4050 return self
4051
4052 - def streamline( self ):
4053 if not self.streamlined: 4054 self.streamlined = True 4055 if self.expr is not None: 4056 self.expr.streamline() 4057 return self
4058
4059 - def validate( self, validateTrace=[] ):
4060 if self not in validateTrace: 4061 tmp = validateTrace[:]+[self] 4062 if self.expr is not None: 4063 self.expr.validate(tmp) 4064 self.checkRecursion([])
4065
4066 - def __str__( self ):
4067 if hasattr(self,"name"): 4068 return self.name 4069 return self.__class__.__name__ + ": ..." 4070 4071 # stubbed out for now - creates awful memory and perf issues 4072 self._revertClass = self.__class__ 4073 self.__class__ = _ForwardNoRecurse 4074 try: 4075 if self.expr is not None: 4076 retString = _ustr(self.expr) 4077 else: 4078 retString = "None" 4079 finally: 4080 self.__class__ = self._revertClass 4081 return self.__class__.__name__ + ": " + retString
4082
4083 - def copy(self):
4084 if self.expr is not None: 4085 return super(Forward,self).copy() 4086 else: 4087 ret = Forward() 4088 ret <<= self 4089 return ret
4090
4091 -class _ForwardNoRecurse(Forward):
4092 - def __str__( self ):
4093 return "..."
4094
4095 -class TokenConverter(ParseElementEnhance):
4096 """ 4097 Abstract subclass of C{ParseExpression}, for converting parsed results. 4098 """
4099 - def __init__( self, expr, savelist=False ):
4100 super(TokenConverter,self).__init__( expr )#, savelist ) 4101 self.saveAsList = False
4102
4103 -class Combine(TokenConverter):
4104 """ 4105 Converter to concatenate all matching tokens to a single string. 4106 By default, the matching patterns must also be contiguous in the input string; 4107 this can be disabled by specifying C{'adjacent=False'} in the constructor. 4108 4109 Example:: 4110 real = Word(nums) + '.' + Word(nums) 4111 print(real.parseString('3.1416')) # -> ['3', '.', '1416'] 4112 # will also erroneously match the following 4113 print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] 4114 4115 real = Combine(Word(nums) + '.' + Word(nums)) 4116 print(real.parseString('3.1416')) # -> ['3.1416'] 4117 # no match when there are internal spaces 4118 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) 4119 """
4120 - def __init__( self, expr, joinString="", adjacent=True ):
4121 super(Combine,self).__init__( expr ) 4122 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 4123 if adjacent: 4124 self.leaveWhitespace() 4125 self.adjacent = adjacent 4126 self.skipWhitespace = True 4127 self.joinString = joinString 4128 self.callPreparse = True
4129
4130 - def ignore( self, other ):
4131 if self.adjacent: 4132 ParserElement.ignore(self, other) 4133 else: 4134 super( Combine, self).ignore( other ) 4135 return self
4136
4137 - def postParse( self, instring, loc, tokenlist ):
4138 retToks = tokenlist.copy() 4139 del retToks[:] 4140 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 4141 4142 if self.resultsName and retToks.haskeys(): 4143 return [ retToks ] 4144 else: 4145 return retToks
4146
4147 -class Group(TokenConverter):
4148 """ 4149 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. 4150 4151 Example:: 4152 ident = Word(alphas) 4153 num = Word(nums) 4154 term = ident | num 4155 func = ident + Optional(delimitedList(term)) 4156 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] 4157 4158 func = ident + Group(Optional(delimitedList(term))) 4159 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] 4160 """
4161 - def __init__( self, expr ):
4162 super(Group,self).__init__( expr ) 4163 self.saveAsList = True
4164
4165 - def postParse( self, instring, loc, tokenlist ):
4166 return [ tokenlist ]
4167
4168 -class Dict(TokenConverter):
4169 """ 4170 Converter to return a repetitive expression as a list, but also as a dictionary. 4171 Each element can also be referenced using the first token in the expression as its key. 4172 Useful for tabular report scraping when the first column can be used as a item key. 4173 4174 Example:: 4175 data_word = Word(alphas) 4176 label = data_word + FollowedBy(':') 4177 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 4178 4179 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4180 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4181 4182 # print attributes as plain groups 4183 print(OneOrMore(attr_expr).parseString(text).dump()) 4184 4185 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names 4186 result = Dict(OneOrMore(Group(attr_expr))).parseString(text) 4187 print(result.dump()) 4188 4189 # access named fields as dict entries, or output as dict 4190 print(result['shape']) 4191 print(result.asDict()) 4192 prints:: 4193 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 4194 4195 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4196 - color: light blue 4197 - posn: upper left 4198 - shape: SQUARE 4199 - texture: burlap 4200 SQUARE 4201 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 4202 See more examples at L{ParseResults} of accessing fields by results name. 4203 """
4204 - def __init__( self, expr ):
4205 super(Dict,self).__init__( expr ) 4206 self.saveAsList = True
4207
4208 - def postParse( self, instring, loc, tokenlist ):
4209 for i,tok in enumerate(tokenlist): 4210 if len(tok) == 0: 4211 continue 4212 ikey = tok[0] 4213 if isinstance(ikey,int): 4214 ikey = _ustr(tok[0]).strip() 4215 if len(tok)==1: 4216 tokenlist[ikey] = _ParseResultsWithOffset("",i) 4217 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 4218 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 4219 else: 4220 dictvalue = tok.copy() #ParseResults(i) 4221 del dictvalue[0] 4222 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 4223 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 4224 else: 4225 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 4226 4227 if self.resultsName: 4228 return [ tokenlist ] 4229 else: 4230 return tokenlist
4231
4232 4233 -class Suppress(TokenConverter):
4234 """ 4235 Converter for ignoring the results of a parsed expression. 4236 4237 Example:: 4238 source = "a, b, c,d" 4239 wd = Word(alphas) 4240 wd_list1 = wd + ZeroOrMore(',' + wd) 4241 print(wd_list1.parseString(source)) 4242 4243 # often, delimiters that are useful during parsing are just in the 4244 # way afterward - use Suppress to keep them out of the parsed output 4245 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) 4246 print(wd_list2.parseString(source)) 4247 prints:: 4248 ['a', ',', 'b', ',', 'c', ',', 'd'] 4249 ['a', 'b', 'c', 'd'] 4250 (See also L{delimitedList}.) 4251 """
4252 - def postParse( self, instring, loc, tokenlist ):
4253 return []
4254
4255 - def suppress( self ):
4256 return self
4257
4258 4259 -class OnlyOnce(object):
4260 """ 4261 Wrapper for parse actions, to ensure they are only called once. 4262 """
4263 - def __init__(self, methodCall):
4264 self.callable = _trim_arity(methodCall) 4265 self.called = False
4266 - def __call__(self,s,l,t):
4267 if not self.called: 4268 results = self.callable(s,l,t) 4269 self.called = True 4270 return results 4271 raise ParseException(s,l,"")
4272 - def reset(self):
4273 self.called = False
4274
4275 -def traceParseAction(f):
4276 """ 4277 Decorator for debugging parse actions. 4278 4279 Example:: 4280 wd = Word(alphas) 4281 4282 @traceParseAction 4283 def remove_duplicate_chars(tokens): 4284 return ''.join(sorted(set(''.join(tokens))) 4285 4286 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) 4287 print(wds.parseString("slkdjs sld sldd sdlf sdljf")) 4288 prints:: 4289 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 4290 <<leaving remove_duplicate_chars (ret: 'dfjkls') 4291 ['dfjkls'] 4292 """ 4293 f = _trim_arity(f) 4294 def z(*paArgs): 4295 thisFunc = f.__name__ 4296 s,l,t = paArgs[-3:] 4297 if len(paArgs)>3: 4298 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 4299 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) 4300 try: 4301 ret = f(*paArgs) 4302 except Exception as exc: 4303 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 4304 raise 4305 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) ) 4306 return ret
4307 try: 4308 z.__name__ = f.__name__ 4309 except AttributeError: 4310 pass 4311 return z 4312
4313 # 4314 # global helpers 4315 # 4316 -def delimitedList( expr, delim=",", combine=False ):
4317 """ 4318 Helper to define a delimited list of expressions - the delimiter defaults to ','. 4319 By default, the list elements and delimiters can have intervening whitespace, and 4320 comments, but this can be overridden by passing C{combine=True} in the constructor. 4321 If C{combine} is set to C{True}, the matching tokens are returned as a single token 4322 string, with the delimiters included; otherwise, the matching tokens are returned 4323 as a list of tokens, with the delimiters suppressed. 4324 4325 Example:: 4326 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 4327 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 4328 """ 4329 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 4330 if combine: 4331 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 4332 else: 4333 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4334
4335 -def countedArray( expr, intExpr=None ):
4336 """ 4337 Helper to define a counted list of expressions. 4338 This helper defines a pattern of the form:: 4339 integer expr expr expr... 4340 where the leading integer tells how many expr expressions follow. 4341 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 4342 4343 Example:: 4344 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] 4345 """ 4346 arrayExpr = Forward() 4347 def countFieldParseAction(s,l,t): 4348 n = t[0] 4349 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 4350 return []
4351 if intExpr is None: 4352 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 4353 else: 4354 intExpr = intExpr.copy() 4355 intExpr.setName("arrayLen") 4356 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 4357 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 4358
4359 -def _flatten(L):
4360 ret = [] 4361 for i in L: 4362 if isinstance(i,list): 4363 ret.extend(_flatten(i)) 4364 else: 4365 ret.append(i) 4366 return ret
4367
4368 -def matchPreviousLiteral(expr):
4369 """ 4370 Helper to define an expression that is indirectly defined from 4371 the tokens matched in a previous expression, that is, it looks 4372 for a 'repeat' of a previous expression. For example:: 4373 first = Word(nums) 4374 second = matchPreviousLiteral(first) 4375 matchExpr = first + ":" + second 4376 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 4377 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 4378 If this is not desired, use C{matchPreviousExpr}. 4379 Do I{not} use with packrat parsing enabled. 4380 """ 4381 rep = Forward() 4382 def copyTokenToRepeater(s,l,t): 4383 if t: 4384 if len(t) == 1: 4385 rep << t[0] 4386 else: 4387 # flatten t tokens 4388 tflat = _flatten(t.asList()) 4389 rep << And(Literal(tt) for tt in tflat) 4390 else: 4391 rep << Empty()
4392 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4393 rep.setName('(prev) ' + _ustr(expr)) 4394 return rep 4395
4396 -def matchPreviousExpr(expr):
4397 """ 4398 Helper to define an expression that is indirectly defined from 4399 the tokens matched in a previous expression, that is, it looks 4400 for a 'repeat' of a previous expression. For example:: 4401 first = Word(nums) 4402 second = matchPreviousExpr(first) 4403 matchExpr = first + ":" + second 4404 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 4405 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"}; 4406 the expressions are evaluated first, and then compared, so 4407 C{"1"} is compared with C{"10"}. 4408 Do I{not} use with packrat parsing enabled. 4409 """ 4410 rep = Forward() 4411 e2 = expr.copy() 4412 rep <<= e2 4413 def copyTokenToRepeater(s,l,t): 4414 matchTokens = _flatten(t.asList()) 4415 def mustMatchTheseTokens(s,l,t): 4416 theseTokens = _flatten(t.asList()) 4417 if theseTokens != matchTokens: 4418 raise ParseException("",0,"")
4419 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 4420 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4421 rep.setName('(prev) ' + _ustr(expr)) 4422 return rep 4423
4424 -def _escapeRegexRangeChars(s):
4425 #~ escape these chars: ^-] 4426 for c in r"\^-]": 4427 s = s.replace(c,_bslash+c) 4428 s = s.replace("\n",r"\n") 4429 s = s.replace("\t",r"\t") 4430 return _ustr(s)
4431
4432 -def oneOf( strs, caseless=False, useRegex=True ):
4433 """ 4434 Helper to quickly define a set of alternative Literals, and makes sure to do 4435 longest-first testing when there is a conflict, regardless of the input order, 4436 but returns a C{L{MatchFirst}} for best performance. 4437 4438 Parameters: 4439 - strs - a string of space-delimited literals, or a collection of string literals 4440 - caseless - (default=C{False}) - treat all literals as caseless 4441 - useRegex - (default=C{True}) - as an optimization, will generate a Regex 4442 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 4443 if creating a C{Regex} raises an exception) 4444 4445 Example:: 4446 comp_oper = oneOf("< = > <= >= !=") 4447 var = Word(alphas) 4448 number = Word(nums) 4449 term = var | number 4450 comparison_expr = term + comp_oper + term 4451 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) 4452 prints:: 4453 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 4454 """ 4455 if caseless: 4456 isequal = ( lambda a,b: a.upper() == b.upper() ) 4457 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 4458 parseElementClass = CaselessLiteral 4459 else: 4460 isequal = ( lambda a,b: a == b ) 4461 masks = ( lambda a,b: b.startswith(a) ) 4462 parseElementClass = Literal 4463 4464 symbols = [] 4465 if isinstance(strs,basestring): 4466 symbols = strs.split() 4467 elif isinstance(strs, collections.Iterable): 4468 symbols = list(strs) 4469 else: 4470 warnings.warn("Invalid argument to oneOf, expected string or iterable", 4471 SyntaxWarning, stacklevel=2) 4472 if not symbols: 4473 return NoMatch() 4474 4475 i = 0 4476 while i < len(symbols)-1: 4477 cur = symbols[i] 4478 for j,other in enumerate(symbols[i+1:]): 4479 if ( isequal(other, cur) ): 4480 del symbols[i+j+1] 4481 break 4482 elif ( masks(cur, other) ): 4483 del symbols[i+j+1] 4484 symbols.insert(i,other) 4485 cur = other 4486 break 4487 else: 4488 i += 1 4489 4490 if not caseless and useRegex: 4491 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 4492 try: 4493 if len(symbols)==len("".join(symbols)): 4494 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4495 else: 4496 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4497 except: 4498 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 4499 SyntaxWarning, stacklevel=2) 4500 4501 4502 # last resort, just use MatchFirst 4503 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4504
4505 -def dictOf( key, value ):
4506 """ 4507 Helper to easily and clearly define a dictionary by specifying the respective patterns 4508 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 4509 in the proper order. The key pattern can include delimiting markers or punctuation, 4510 as long as they are suppressed, thereby leaving the significant key text. The value 4511 pattern can include named results, so that the C{Dict} results can include named token 4512 fields. 4513 4514 Example:: 4515 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4516 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4517 print(OneOrMore(attr_expr).parseString(text).dump()) 4518 4519 attr_label = label 4520 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) 4521 4522 # similar to Dict, but simpler call format 4523 result = dictOf(attr_label, attr_value).parseString(text) 4524 print(result.dump()) 4525 print(result['shape']) 4526 print(result.shape) # object attribute access works too 4527 print(result.asDict()) 4528 prints:: 4529 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4530 - color: light blue 4531 - posn: upper left 4532 - shape: SQUARE 4533 - texture: burlap 4534 SQUARE 4535 SQUARE 4536 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 4537 """ 4538 return Dict( ZeroOrMore( Group ( key + value ) ) )
4539
4540 -def originalTextFor(expr, asString=True):
4541 """ 4542 Helper to return the original, untokenized text for a given expression. Useful to 4543 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 4544 revert separate tokens with intervening whitespace back to the original matching 4545 input text. By default, returns astring containing the original parsed text. 4546 4547 If the optional C{asString} argument is passed as C{False}, then the return value is a 4548 C{L{ParseResults}} containing any results names that were originally matched, and a 4549 single token containing the original matched text from the input string. So if 4550 the expression passed to C{L{originalTextFor}} contains expressions with defined 4551 results names, you must set C{asString} to C{False} if you want to preserve those 4552 results name values. 4553 4554 Example:: 4555 src = "this is test <b> bold <i>text</i> </b> normal text " 4556 for tag in ("b","i"): 4557 opener,closer = makeHTMLTags(tag) 4558 patt = originalTextFor(opener + SkipTo(closer) + closer) 4559 print(patt.searchString(src)[0]) 4560 prints:: 4561 ['<b> bold <i>text</i> </b>'] 4562 ['<i>text</i>'] 4563 """ 4564 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 4565 endlocMarker = locMarker.copy() 4566 endlocMarker.callPreparse = False 4567 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 4568 if asString: 4569 extractText = lambda s,l,t: s[t._original_start:t._original_end] 4570 else: 4571 def extractText(s,l,t): 4572 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4573 matchExpr.setParseAction(extractText) 4574 matchExpr.ignoreExprs = expr.ignoreExprs 4575 return matchExpr 4576
4577 -def ungroup(expr):
4578 """ 4579 Helper to undo pyparsing's default grouping of And expressions, even 4580 if all but one are non-empty. 4581 """ 4582 return TokenConverter(expr).setParseAction(lambda t:t[0]) 4583
4584 -def locatedExpr(expr):
4585 """ 4586 Helper to decorate a returned token with its starting and ending locations in the input string. 4587 This helper adds the following results names: 4588 - locn_start = location where matched expression begins 4589 - locn_end = location where matched expression ends 4590 - value = the actual parsed results 4591 4592 Be careful if the input text contains C{<TAB>} characters, you may want to call 4593 C{L{ParserElement.parseWithTabs}} 4594 4595 Example:: 4596 wd = Word(alphas) 4597 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): 4598 print(match) 4599 prints:: 4600 [[0, 'ljsdf', 5]] 4601 [[8, 'lksdjjf', 15]] 4602 [[18, 'lkkjj', 23]] 4603 """ 4604 locator = Empty().setParseAction(lambda s,l,t: l) 4605 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4606 4607 4608 # convenience constants for positional expressions 4609 empty = Empty().setName("empty") 4610 lineStart = LineStart().setName("lineStart") 4611 lineEnd = LineEnd().setName("lineEnd") 4612 stringStart = StringStart().setName("stringStart") 4613 stringEnd = StringEnd().setName("stringEnd") 4614 4615 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 4616 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 4617 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 4618 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 4619 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 4620 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4621 4622 -def srange(s):
4623 r""" 4624 Helper to easily define string ranges for use in Word construction. Borrows 4625 syntax from regexp '[]' string range definitions:: 4626 srange("[0-9]") -> "0123456789" 4627 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 4628 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 4629 The input string must be enclosed in []'s, and the returned string is the expanded 4630 character set joined into a single string. 4631 The values enclosed in the []'s may be: 4632 - a single character 4633 - an escaped character with a leading backslash (such as C{\-} or C{\]}) 4634 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) 4635 (C{\0x##} is also supported for backwards compatibility) 4636 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) 4637 - a range of any of the above, separated by a dash (C{'a-z'}, etc.) 4638 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) 4639 """ 4640 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 4641 try: 4642 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 4643 except: 4644 return ""
4645
4646 -def matchOnlyAtCol(n):
4647 """ 4648 Helper method for defining parse actions that require matching at a specific 4649 column in the input text. 4650 """ 4651 def verifyCol(strg,locn,toks): 4652 if col(locn,strg) != n: 4653 raise ParseException(strg,locn,"matched token not at column %d" % n)
4654 return verifyCol 4655
4656 -def replaceWith(replStr):
4657 """ 4658 Helper method for common parse actions that simply return a literal value. Especially 4659 useful when used with C{L{transformString<ParserElement.transformString>}()}. 4660 4661 Example:: 4662 num = Word(nums).setParseAction(lambda toks: int(toks[0])) 4663 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) 4664 term = na | num 4665 4666 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] 4667 """ 4668 return lambda s,l,t: [replStr]
4669
4670 -def removeQuotes(s,l,t):
4671 """ 4672 Helper parse action for removing quotation marks from parsed quoted strings. 4673 4674 Example:: 4675 # by default, quotation marks are included in parsed results 4676 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] 4677 4678 # use removeQuotes to strip quotation marks from parsed results 4679 quotedString.setParseAction(removeQuotes) 4680 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] 4681 """ 4682 return t[0][1:-1]
4683
4684 -def tokenMap(func, *args):
4685 """ 4686 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 4687 args are passed, they are forwarded to the given function as additional arguments after 4688 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the 4689 parsed data to an integer using base 16. 4690 4691 Example (compare the last to example in L{ParserElement.transformString}:: 4692 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) 4693 hex_ints.runTests(''' 4694 00 11 22 aa FF 0a 0d 1a 4695 ''') 4696 4697 upperword = Word(alphas).setParseAction(tokenMap(str.upper)) 4698 OneOrMore(upperword).runTests(''' 4699 my kingdom for a horse 4700 ''') 4701 4702 wd = Word(alphas).setParseAction(tokenMap(str.title)) 4703 OneOrMore(wd).setParseAction(' '.join).runTests(''' 4704 now is the winter of our discontent made glorious summer by this sun of york 4705 ''') 4706 prints:: 4707 00 11 22 aa FF 0a 0d 1a 4708 [0, 17, 34, 170, 255, 10, 13, 26] 4709 4710 my kingdom for a horse 4711 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 4712 4713 now is the winter of our discontent made glorious summer by this sun of york 4714 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 4715 """ 4716 def pa(s,l,t): 4717 return [func(tokn, *args) for tokn in t]
4718 4719 try: 4720 func_name = getattr(func, '__name__', 4721 getattr(func, '__class__').__name__) 4722 except Exception: 4723 func_name = str(func) 4724 pa.__name__ = func_name 4725 4726 return pa 4727 4728 upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 4729 """Helper parse action to convert tokens to upper case.""" 4730 4731 downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 4732 """Helper parse action to convert tokens to lower case."""
4733 4734 -def _makeTags(tagStr, xml):
4735 """Internal helper to construct opening and closing tag expressions, given a tag name""" 4736 if isinstance(tagStr,basestring): 4737 resname = tagStr 4738 tagStr = Keyword(tagStr, caseless=not xml) 4739 else: 4740 resname = tagStr.name 4741 4742 tagAttrName = Word(alphas,alphanums+"_-:") 4743 if (xml): 4744 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 4745 openTag = Suppress("<") + tagStr("tag") + \ 4746 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 4747 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4748 else: 4749 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 4750 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 4751 openTag = Suppress("<") + tagStr("tag") + \ 4752 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 4753 Optional( Suppress("=") + tagAttrValue ) ))) + \ 4754 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4755 closeTag = Combine(_L("</") + tagStr + ">") 4756 4757 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 4758 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 4759 openTag.tag = resname 4760 closeTag.tag = resname 4761 return openTag, closeTag
4762
4763 -def makeHTMLTags(tagStr):
4764 """ 4765 Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches 4766 tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. 4767 4768 Example:: 4769 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 4770 # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple 4771 a,a_end = makeHTMLTags("A") 4772 link_expr = a + SkipTo(a_end)("link_text") + a_end 4773 4774 for link in link_expr.searchString(text): 4775 # attributes in the <A> tag (like "href" shown here) are also accessible as named results 4776 print(link.link_text, '->', link.href) 4777 prints:: 4778 pyparsing -> http://pyparsing.wikispaces.com 4779 """ 4780 return _makeTags( tagStr, False )
4781
4782 -def makeXMLTags(tagStr):
4783 """ 4784 Helper to construct opening and closing tag expressions for XML, given a tag name. Matches 4785 tags only in the given upper/lower case. 4786 4787 Example: similar to L{makeHTMLTags} 4788 """ 4789 return _makeTags( tagStr, True )
4790
4791 -def withAttribute(*args,**attrDict):
4792 """ 4793 Helper to create a validating parse action to be used with start tags created 4794 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 4795 with a required attribute value, to avoid false matches on common tags such as 4796 C{<TD>} or C{<DIV>}. 4797 4798 Call C{withAttribute} with a series of attribute names and values. Specify the list 4799 of filter attributes names and values as: 4800 - keyword arguments, as in C{(align="right")}, or 4801 - as an explicit dict with C{**} operator, when an attribute name is also a Python 4802 reserved word, as in C{**{"class":"Customer", "align":"right"}} 4803 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 4804 For attribute names with a namespace prefix, you must use the second form. Attribute 4805 names are matched insensitive to upper/lower case. 4806 4807 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 4808 4809 To verify that the attribute exists, but without specifying a value, pass 4810 C{withAttribute.ANY_VALUE} as the value. 4811 4812 Example:: 4813 html = ''' 4814 <div> 4815 Some text 4816 <div type="grid">1 4 0 1 0</div> 4817 <div type="graph">1,3 2,3 1,1</div> 4818 <div>this has no type</div> 4819 </div> 4820 4821 ''' 4822 div,div_end = makeHTMLTags("div") 4823 4824 # only match div tag having a type attribute with value "grid" 4825 div_grid = div().setParseAction(withAttribute(type="grid")) 4826 grid_expr = div_grid + SkipTo(div | div_end)("body") 4827 for grid_header in grid_expr.searchString(html): 4828 print(grid_header.body) 4829 4830 # construct a match with any div tag having a type attribute, regardless of the value 4831 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) 4832 div_expr = div_any_type + SkipTo(div | div_end)("body") 4833 for div_header in div_expr.searchString(html): 4834 print(div_header.body) 4835 prints:: 4836 1 4 0 1 0 4837 4838 1 4 0 1 0 4839 1,3 2,3 1,1 4840 """ 4841 if args: 4842 attrs = args[:] 4843 else: 4844 attrs = attrDict.items() 4845 attrs = [(k,v) for k,v in attrs] 4846 def pa(s,l,tokens): 4847 for attrName,attrValue in attrs: 4848 if attrName not in tokens: 4849 raise ParseException(s,l,"no matching attribute " + attrName) 4850 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 4851 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 4852 (attrName, tokens[attrName], attrValue))
4853 return pa 4854 withAttribute.ANY_VALUE = object()
4855 4856 -def withClass(classname, namespace=''):
4857 """ 4858 Simplified version of C{L{withAttribute}} when matching on a div class - made 4859 difficult because C{class} is a reserved word in Python. 4860 4861 Example:: 4862 html = ''' 4863 <div> 4864 Some text 4865 <div class="grid">1 4 0 1 0</div> 4866 <div class="graph">1,3 2,3 1,1</div> 4867 <div>this &lt;div&gt; has no class</div> 4868 </div> 4869 4870 ''' 4871 div,div_end = makeHTMLTags("div") 4872 div_grid = div().setParseAction(withClass("grid")) 4873 4874 grid_expr = div_grid + SkipTo(div | div_end)("body") 4875 for grid_header in grid_expr.searchString(html): 4876 print(grid_header.body) 4877 4878 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) 4879 div_expr = div_any_type + SkipTo(div | div_end)("body") 4880 for div_header in div_expr.searchString(html): 4881 print(div_header.body) 4882 prints:: 4883 1 4 0 1 0 4884 4885 1 4 0 1 0 4886 1,3 2,3 1,1 4887 """ 4888 classattr = "%s:class" % namespace if namespace else "class" 4889 return withAttribute(**{classattr : classname})
4890 4891 opAssoc = _Constants() 4892 opAssoc.LEFT = object() 4893 opAssoc.RIGHT = object()
4894 4895 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
4896 """ 4897 Helper method for constructing grammars of expressions made up of 4898 operators working in a precedence hierarchy. Operators may be unary or 4899 binary, left- or right-associative. Parse actions can also be attached 4900 to operator expressions. 4901 4902 Parameters: 4903 - baseExpr - expression representing the most basic element for the nested 4904 - opList - list of tuples, one for each operator precedence level in the 4905 expression grammar; each tuple is of the form 4906 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 4907 - opExpr is the pyparsing expression for the operator; 4908 may also be a string, which will be converted to a Literal; 4909 if numTerms is 3, opExpr is a tuple of two expressions, for the 4910 two operators separating the 3 terms 4911 - numTerms is the number of terms for this operator (must 4912 be 1, 2, or 3) 4913 - rightLeftAssoc is the indicator whether the operator is 4914 right or left associative, using the pyparsing-defined 4915 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 4916 - parseAction is the parse action to be associated with 4917 expressions matching this operator expression (the 4918 parse action tuple member may be omitted) 4919 - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) 4920 - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) 4921 4922 Example:: 4923 # simple example of four-function arithmetic with ints and variable names 4924 integer = pyparsing_common.signedInteger 4925 varname = pyparsing_common.identifier 4926 4927 arith_expr = infixNotation(integer | varname, 4928 [ 4929 ('-', 1, opAssoc.RIGHT), 4930 (oneOf('* /'), 2, opAssoc.LEFT), 4931 (oneOf('+ -'), 2, opAssoc.LEFT), 4932 ]) 4933 4934 arith_expr.runTests(''' 4935 5+3*6 4936 (5+3)*6 4937 -2--11 4938 ''', fullDump=False) 4939 prints:: 4940 5+3*6 4941 [[5, '+', [3, '*', 6]]] 4942 4943 (5+3)*6 4944 [[[5, '+', 3], '*', 6]] 4945 4946 -2--11 4947 [[['-', 2], '-', ['-', 11]]] 4948 """ 4949 ret = Forward() 4950 lastExpr = baseExpr | ( lpar + ret + rpar ) 4951 for i,operDef in enumerate(opList): 4952 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 4953 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 4954 if arity == 3: 4955 if opExpr is None or len(opExpr) != 2: 4956 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 4957 opExpr1, opExpr2 = opExpr 4958 thisExpr = Forward().setName(termName) 4959 if rightLeftAssoc == opAssoc.LEFT: 4960 if arity == 1: 4961 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 4962 elif arity == 2: 4963 if opExpr is not None: 4964 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 4965 else: 4966 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 4967 elif arity == 3: 4968 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 4969 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 4970 else: 4971 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 4972 elif rightLeftAssoc == opAssoc.RIGHT: 4973 if arity == 1: 4974 # try to avoid LR with this extra test 4975 if not isinstance(opExpr, Optional): 4976 opExpr = Optional(opExpr) 4977 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 4978 elif arity == 2: 4979 if opExpr is not None: 4980 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 4981 else: 4982 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 4983 elif arity == 3: 4984 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 4985 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 4986 else: 4987 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 4988 else: 4989 raise ValueError("operator must indicate right or left associativity") 4990 if pa: 4991 matchExpr.setParseAction( pa ) 4992 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 4993 lastExpr = thisExpr 4994 ret <<= lastExpr 4995 return ret
4996 4997 operatorPrecedence = infixNotation 4998 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" 4999 5000 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 5001 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 5002 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 5003 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 5004 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5005 5006 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
5007 """ 5008 Helper method for defining nested lists enclosed in opening and closing 5009 delimiters ("(" and ")" are the default). 5010 5011 Parameters: 5012 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression 5013 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression 5014 - content - expression for items within the nested lists (default=C{None}) 5015 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) 5016 5017 If an expression is not provided for the content argument, the nested 5018 expression will capture all whitespace-delimited content between delimiters 5019 as a list of separate values. 5020 5021 Use the C{ignoreExpr} argument to define expressions that may contain 5022 opening or closing characters that should not be treated as opening 5023 or closing characters for nesting, such as quotedString or a comment 5024 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 5025 The default is L{quotedString}, but if no expressions are to be ignored, 5026 then pass C{None} for this argument. 5027 5028 Example:: 5029 data_type = oneOf("void int short long char float double") 5030 decl_data_type = Combine(data_type + Optional(Word('*'))) 5031 ident = Word(alphas+'_', alphanums+'_') 5032 number = pyparsing_common.number 5033 arg = Group(decl_data_type + ident) 5034 LPAR,RPAR = map(Suppress, "()") 5035 5036 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) 5037 5038 c_function = (decl_data_type("type") 5039 + ident("name") 5040 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR 5041 + code_body("body")) 5042 c_function.ignore(cStyleComment) 5043 5044 source_code = ''' 5045 int is_odd(int x) { 5046 return (x%2); 5047 } 5048 5049 int dec_to_hex(char hchar) { 5050 if (hchar >= '0' && hchar <= '9') { 5051 return (ord(hchar)-ord('0')); 5052 } else { 5053 return (10+ord(hchar)-ord('A')); 5054 } 5055 } 5056 ''' 5057 for func in c_function.searchString(source_code): 5058 print("%(name)s (%(type)s) args: %(args)s" % func) 5059 5060 prints:: 5061 is_odd (int) args: [['int', 'x']] 5062 dec_to_hex (int) args: [['char', 'hchar']] 5063 """ 5064 if opener == closer: 5065 raise ValueError("opening and closing strings cannot be the same") 5066 if content is None: 5067 if isinstance(opener,basestring) and isinstance(closer,basestring): 5068 if len(opener) == 1 and len(closer)==1: 5069 if ignoreExpr is not None: 5070 content = (Combine(OneOrMore(~ignoreExpr + 5071 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5072 ).setParseAction(lambda t:t[0].strip())) 5073 else: 5074 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 5075 ).setParseAction(lambda t:t[0].strip())) 5076 else: 5077 if ignoreExpr is not None: 5078 content = (Combine(OneOrMore(~ignoreExpr + 5079 ~Literal(opener) + ~Literal(closer) + 5080 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5081 ).setParseAction(lambda t:t[0].strip())) 5082 else: 5083 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 5084 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5085 ).setParseAction(lambda t:t[0].strip())) 5086 else: 5087 raise ValueError("opening and closing arguments must be strings if no content expression is given") 5088 ret = Forward() 5089 if ignoreExpr is not None: 5090 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 5091 else: 5092 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 5093 ret.setName('nested %s%s expression' % (opener,closer)) 5094 return ret
5095
5096 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
5097 """ 5098 Helper method for defining space-delimited indentation blocks, such as 5099 those used to define block statements in Python source code. 5100 5101 Parameters: 5102 - blockStatementExpr - expression defining syntax of statement that 5103 is repeated within the indented block 5104 - indentStack - list created by caller to manage indentation stack 5105 (multiple statementWithIndentedBlock expressions within a single grammar 5106 should share a common indentStack) 5107 - indent - boolean indicating whether block must be indented beyond the 5108 the current level; set to False for block of left-most statements 5109 (default=C{True}) 5110 5111 A valid block must contain at least one C{blockStatement}. 5112 5113 Example:: 5114 data = ''' 5115 def A(z): 5116 A1 5117 B = 100 5118 G = A2 5119 A2 5120 A3 5121 B 5122 def BB(a,b,c): 5123 BB1 5124 def BBA(): 5125 bba1 5126 bba2 5127 bba3 5128 C 5129 D 5130 def spam(x,y): 5131 def eggs(z): 5132 pass 5133 ''' 5134 5135 5136 indentStack = [1] 5137 stmt = Forward() 5138 5139 identifier = Word(alphas, alphanums) 5140 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") 5141 func_body = indentedBlock(stmt, indentStack) 5142 funcDef = Group( funcDecl + func_body ) 5143 5144 rvalue = Forward() 5145 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") 5146 rvalue << (funcCall | identifier | Word(nums)) 5147 assignment = Group(identifier + "=" + rvalue) 5148 stmt << ( funcDef | assignment | identifier ) 5149 5150 module_body = OneOrMore(stmt) 5151 5152 parseTree = module_body.parseString(data) 5153 parseTree.pprint() 5154 prints:: 5155 [['def', 5156 'A', 5157 ['(', 'z', ')'], 5158 ':', 5159 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 5160 'B', 5161 ['def', 5162 'BB', 5163 ['(', 'a', 'b', 'c', ')'], 5164 ':', 5165 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 5166 'C', 5167 'D', 5168 ['def', 5169 'spam', 5170 ['(', 'x', 'y', ')'], 5171 ':', 5172 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 5173 """ 5174 def checkPeerIndent(s,l,t): 5175 if l >= len(s): return 5176 curCol = col(l,s) 5177 if curCol != indentStack[-1]: 5178 if curCol > indentStack[-1]: 5179 raise ParseFatalException(s,l,"illegal nesting") 5180 raise ParseException(s,l,"not a peer entry")
5181 5182 def checkSubIndent(s,l,t): 5183 curCol = col(l,s) 5184 if curCol > indentStack[-1]: 5185 indentStack.append( curCol ) 5186 else: 5187 raise ParseException(s,l,"not a subentry") 5188 5189 def checkUnindent(s,l,t): 5190 if l >= len(s): return 5191 curCol = col(l,s) 5192 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 5193 raise ParseException(s,l,"not an unindent") 5194 indentStack.pop() 5195 5196 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 5197 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 5198 PEER = Empty().setParseAction(checkPeerIndent).setName('') 5199 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 5200 if indent: 5201 smExpr = Group( Optional(NL) + 5202 #~ FollowedBy(blockStatementExpr) + 5203 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 5204 else: 5205 smExpr = Group( Optional(NL) + 5206 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 5207 blockStatementExpr.ignore(_bslash + LineEnd()) 5208 return smExpr.setName('indented block') 5209 5210 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 5211 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 5212 5213 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 5214 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 5215 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5216 -def replaceHTMLEntity(t):
5217 """Helper parser action to replace common HTML entities with their special characters""" 5218 return _htmlEntityMap.get(t.entity)
5219 5220 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 5221 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 5222 "Comment of the form C{/* ... */}" 5223 5224 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 5225 "Comment of the form C{<!-- ... -->}" 5226 5227 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 5228 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 5229 "Comment of the form C{// ... (to end of line)}" 5230 5231 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 5232 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" 5233 5234 javaStyleComment = cppStyleComment 5235 "Same as C{L{cppStyleComment}}" 5236 5237 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 5238 "Comment of the form C{# ... (to end of line)}" 5239 5240 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 5241 Optional( Word(" \t") + 5242 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 5243 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 5244 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
5245 5246 # some other useful expressions - using lower-case class name since we are really using this as a namespace 5247 -class pyparsing_common:
5248 """ 5249 Here are some common low-level expressions that may be useful in jump-starting parser development: 5250 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sciReal>}) 5251 - common L{programming identifiers<identifier>} 5252 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>}) 5253 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>} 5254 - L{UUID<uuid>} 5255 Parse actions: 5256 - C{L{convertToInteger}} 5257 - C{L{convertToFloat}} 5258 - C{L{convertToDate}} 5259 - C{L{convertToDatetime}} 5260 - C{L{stripHTMLTags}} 5261 5262 Example:: 5263 pyparsing_common.number.runTests(''' 5264 # any int or real number, returned as the appropriate type 5265 100 5266 -100 5267 +100 5268 3.14159 5269 6.02e23 5270 1e-12 5271 ''') 5272 5273 pyparsing_common.fnumber.runTests(''' 5274 # any int or real number, returned as float 5275 100 5276 -100 5277 +100 5278 3.14159 5279 6.02e23 5280 1e-12 5281 ''') 5282 5283 pyparsing_common.hex_integer.runTests(''' 5284 # hex numbers 5285 100 5286 FF 5287 ''') 5288 5289 pyparsing_common.fraction.runTests(''' 5290 # fractions 5291 1/2 5292 -3/4 5293 ''') 5294 5295 pyparsing_common.mixed_integer.runTests(''' 5296 # mixed fractions 5297 1 5298 1/2 5299 -3/4 5300 1-3/4 5301 ''') 5302 5303 import uuid 5304 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5305 pyparsing_common.uuid.runTests(''' 5306 # uuid 5307 12345678-1234-5678-1234-567812345678 5308 ''') 5309 prints:: 5310 # any int or real number, returned as the appropriate type 5311 100 5312 [100] 5313 5314 -100 5315 [-100] 5316 5317 +100 5318 [100] 5319 5320 3.14159 5321 [3.14159] 5322 5323 6.02e23 5324 [6.02e+23] 5325 5326 1e-12 5327 [1e-12] 5328 5329 # any int or real number, returned as float 5330 100 5331 [100.0] 5332 5333 -100 5334 [-100.0] 5335 5336 +100 5337 [100.0] 5338 5339 3.14159 5340 [3.14159] 5341 5342 6.02e23 5343 [6.02e+23] 5344 5345 1e-12 5346 [1e-12] 5347 5348 # hex numbers 5349 100 5350 [256] 5351 5352 FF 5353 [255] 5354 5355 # fractions 5356 1/2 5357 [0.5] 5358 5359 -3/4 5360 [-0.75] 5361 5362 # mixed fractions 5363 1 5364 [1] 5365 5366 1/2 5367 [0.5] 5368 5369 -3/4 5370 [-0.75] 5371 5372 1-3/4 5373 [1.75] 5374 5375 # uuid 5376 12345678-1234-5678-1234-567812345678 5377 [UUID('12345678-1234-5678-1234-567812345678')] 5378 """ 5379 5380 convertToInteger = tokenMap(int) 5381 """ 5382 Parse action for converting parsed integers to Python int 5383 """ 5384 5385 convertToFloat = tokenMap(float) 5386 """ 5387 Parse action for converting parsed numbers to Python float 5388 """ 5389 5390 integer = Word(nums).setName("integer").setParseAction(convertToInteger) 5391 """expression that parses an unsigned integer, returns an int""" 5392 5393 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) 5394 """expression that parses a hexadecimal integer, returns an int""" 5395 5396 signedInteger = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 5397 """expression that parses an integer with optional leading sign, returns an int""" 5398 5399 fraction = (signedInteger().setParseAction(convertToFloat) + '/' + signedInteger().setParseAction(convertToFloat)).setName("fraction") 5400 """fractional expression of an integer divided by an integer, returns a float""" 5401 fraction.addParseAction(lambda t: t[0]/t[-1]) 5402 5403 mixed_integer = (fraction | signedInteger + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") 5404 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 5405 mixed_integer.addParseAction(sum) 5406 5407 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) 5408 """expression that parses a floating point number and returns a float""" 5409 5410 sciReal = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) 5411 """expression that parses a floating point number with optional scientific notation and returns a float""" 5412 5413 # streamlining this expression makes the docs nicer-looking 5414 number = (sciReal | real | signedInteger).streamline() 5415 """any numeric expression, returns the corresponding Python type""" 5416 5417 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) 5418 """any int or real number, returned as float""" 5419 5420 identifier = Word(alphas+'_', alphanums+'_').setName("identifier") 5421 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 5422 5423 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") 5424 "IPv4 address (C{0.0.0.0 - 255.255.255.255})" 5425 5426 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") 5427 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") 5428 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") 5429 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 5430 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") 5431 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") 5432 "IPv6 address (long, short, or mixed form)" 5433 5434 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") 5435 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 5436 5437 @staticmethod
5438 - def convertToDate(fmt="%Y-%m-%d"):
5439 """ 5440 Helper to create a parse action for converting parsed date string to Python datetime.date 5441 5442 Params - 5443 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) 5444 5445 Example:: 5446 date_expr = pyparsing_common.iso8601_date.copy() 5447 date_expr.setParseAction(pyparsing_common.convertToDate()) 5448 print(date_expr.parseString("1999-12-31")) 5449 prints:: 5450 [datetime.date(1999, 12, 31)] 5451 """ 5452 def cvt_fn(s,l,t): 5453 try: 5454 return datetime.strptime(t[0], fmt).date() 5455 except ValueError as ve: 5456 raise ParseException(s, l, str(ve))
5457 return cvt_fn
5458 5459 @staticmethod
5460 - def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
5461 """ 5462 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime 5463 5464 Params - 5465 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) 5466 5467 Example:: 5468 dt_expr = pyparsing_common.iso8601_datetime.copy() 5469 dt_expr.setParseAction(pyparsing_common.convertToDatetime()) 5470 print(dt_expr.parseString("1999-12-31T23:59:59.999")) 5471 prints:: 5472 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 5473 """ 5474 def cvt_fn(s,l,t): 5475 try: 5476 return datetime.strptime(t[0], fmt) 5477 except ValueError as ve: 5478 raise ParseException(s, l, str(ve))
5479 return cvt_fn 5480 5481 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date") 5482 "ISO8601 date (C{yyyy-mm-dd})" 5483 5484 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") 5485 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" 5486 5487 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") 5488 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" 5489 5490 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() 5491 @staticmethod
5492 - def stripHTMLTags(s, l, tokens):
5493 """ 5494 Parse action to remove HTML tags from web page HTML source 5495 5496 Example:: 5497 # strip HTML links from normal text 5498 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 5499 td,td_end = makeHTMLTags("TD") 5500 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end 5501 5502 print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' 5503 """ 5504 return pyparsing_common._html_stripper.transformString(tokens[0])
5505 5506 if __name__ == "__main__": 5507 5508 selectToken = CaselessLiteral("select") 5509 fromToken = CaselessLiteral("from") 5510 5511 ident = Word(alphas, alphanums + "_$") 5512 5513 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5514 columnNameList = Group(delimitedList(columnName)).setName("columns") 5515 columnSpec = ('*' | columnNameList) 5516 5517 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5518 tableNameList = Group(delimitedList(tableName)).setName("tables") 5519 5520 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 5521 5522 # demo runTests method, including embedded comments in test string 5523 simpleSQL.runTests(""" 5524 # '*' as column list and dotted table name 5525 select * from SYS.XYZZY 5526 5527 # caseless match on "SELECT", and casts back to "select" 5528 SELECT * from XYZZY, ABC 5529 5530 # list of column names, and mixed case SELECT keyword 5531 Select AA,BB,CC from Sys.dual 5532 5533 # multiple tables 5534 Select A, B, C from Sys.dual, Table2 5535 5536 # invalid SELECT keyword - should fail 5537 Xelect A, B, C from Sys.dual 5538 5539 # incomplete command - should fail 5540 Select 5541 5542 # invalid column name - should fail 5543 Select ^^^ frox Sys.dual 5544 5545 """) 5546 5547 pyparsing_common.number.runTests(""" 5548 100 5549 -100 5550 +100 5551 3.14159 5552 6.02e23 5553 1e-12 5554 """) 5555 5556 # any int or real number, returned as float 5557 pyparsing_common.fnumber.runTests(""" 5558 100 5559 -100 5560 +100 5561 3.14159 5562 6.02e23 5563 1e-12 5564 """) 5565 5566 pyparsing_common.hex_integer.runTests(""" 5567 100 5568 FF 5569 """) 5570 5571 import uuid 5572 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5573 pyparsing_common.uuid.runTests(""" 5574 12345678-1234-5678-1234-567812345678 5575 """) 5576