Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2016  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form  
  35  C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements  
  36  (L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to 
  37  L{Literal} expressions):: 
  38   
  39      from pyparsing import Word, alphas 
  40   
  41      # define grammar of a greeting 
  42      greet = Word(alphas) + "," + Word(alphas) + "!" 
  43   
  44      hello = "Hello, World!" 
  45      print (hello, "->", greet.parseString(hello)) 
  46   
  47  The program outputs the following:: 
  48   
  49      Hello, World! -> ['Hello', ',', 'World', '!'] 
  50   
  51  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  52  class names, and the use of '+', '|' and '^' operators. 
  53   
  54  The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an 
  55  object with named attributes. 
  56   
  57  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  58   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  59   - quoted strings 
  60   - embedded comments 
  61  """ 
  62   
  63  __version__ = "2.1.9" 
  64  __versionTime__ = "10 Sep 2016 15:10 UTC" 
  65  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  66   
  67  import string 
  68  from weakref import ref as wkref 
  69  import copy 
  70  import sys 
  71  import warnings 
  72  import re 
  73  import sre_constants 
  74  import collections 
  75  import pprint 
  76  import traceback 
  77  import types 
  78  from datetime import datetime 
  79   
  80  try: 
  81      from _thread import RLock 
  82  except ImportError: 
  83      from threading import RLock 
  84   
  85  try: 
  86      from collections import OrderedDict as _OrderedDict 
  87  except ImportError: 
  88      try: 
  89          from ordereddict import OrderedDict as _OrderedDict 
  90      except ImportError: 
  91          _OrderedDict = None 
  92   
  93  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  94   
  95  __all__ = [ 
  96  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  97  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  98  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  99  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
 100  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
 101  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',  
 102  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
 103  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
 104  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
 105  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
 106  'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 
 107  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
 108  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
 109  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
 110  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
 111  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
 112  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
 113  'CloseMatch', 'tokenMap', 'pyparsing_common', 
 114  ] 
 115   
 116  system_version = tuple(sys.version_info)[:3] 
 117  PY_3 = system_version[0] == 3 
 118  if PY_3: 
 119      _MAX_INT = sys.maxsize 
 120      basestring = str 
 121      unichr = chr 
 122      _ustr = str 
 123   
 124      # build list of single arg builtins, that can be used as parse actions 
 125      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 126   
 127  else: 
 128      _MAX_INT = sys.maxint 
 129      range = xrange 
130 131 - def _ustr(obj):
132 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 133 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 134 then < returns the unicode object | encodes it with the default encoding | ... >. 135 """ 136 if isinstance(obj,unicode): 137 return obj 138 139 try: 140 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 141 # it won't break any existing code. 142 return str(obj) 143 144 except UnicodeEncodeError: 145 # Else encode it 146 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 147 xmlcharref = Regex('&#\d+;') 148 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 149 return xmlcharref.transformString(ret)
150 151 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 152 singleArgBuiltins = [] 153 import __builtin__ 154 for fname in "sum len sorted reversed list tuple set any all min max".split(): 155 try: 156 singleArgBuiltins.append(getattr(__builtin__,fname)) 157 except AttributeError: 158 continue 159 160 _generatorType = type((y for y in range(1)))
161 162 -def _xml_escape(data):
163 """Escape &, <, >, ", ', etc. in a string of data.""" 164 165 # ampersand must be replaced first 166 from_symbols = '&><"\'' 167 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 168 for from_,to_ in zip(from_symbols, to_symbols): 169 data = data.replace(from_, to_) 170 return data
171
172 -class _Constants(object):
173 pass
174 175 alphas = string.ascii_uppercase + string.ascii_lowercase 176 nums = "0123456789" 177 hexnums = nums + "ABCDEFabcdef" 178 alphanums = alphas + nums 179 _bslash = chr(92) 180 printables = "".join(c for c in string.printable if c not in string.whitespace)
181 182 -class ParseBaseException(Exception):
183 """base exception class for all parsing runtime exceptions""" 184 # Performance tuning: we construct a *lot* of these, so keep this 185 # constructor as small and fast as possible
186 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
187 self.loc = loc 188 if msg is None: 189 self.msg = pstr 190 self.pstr = "" 191 else: 192 self.msg = msg 193 self.pstr = pstr 194 self.parserElement = elem 195 self.args = (pstr, loc, msg)
196 197 @classmethod
198 - def _from_exception(cls, pe):
199 """ 200 internal factory method to simplify creating one type of ParseException 201 from another - avoids having __init__ signature conflicts among subclasses 202 """ 203 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
204
205 - def __getattr__( self, aname ):
206 """supported attributes by name are: 207 - lineno - returns the line number of the exception text 208 - col - returns the column number of the exception text 209 - line - returns the line containing the exception text 210 """ 211 if( aname == "lineno" ): 212 return lineno( self.loc, self.pstr ) 213 elif( aname in ("col", "column") ): 214 return col( self.loc, self.pstr ) 215 elif( aname == "line" ): 216 return line( self.loc, self.pstr ) 217 else: 218 raise AttributeError(aname)
219
220 - def __str__( self ):
221 return "%s (at char %d), (line:%d, col:%d)" % \ 222 ( self.msg, self.loc, self.lineno, self.column )
223 - def __repr__( self ):
224 return _ustr(self)
225 - def markInputline( self, markerString = ">!<" ):
226 """Extracts the exception line from the input string, and marks 227 the location of the exception with a special symbol. 228 """ 229 line_str = self.line 230 line_column = self.column - 1 231 if markerString: 232 line_str = "".join((line_str[:line_column], 233 markerString, line_str[line_column:])) 234 return line_str.strip()
235 - def __dir__(self):
236 return "lineno col line".split() + dir(type(self))
237
238 -class ParseException(ParseBaseException):
239 """ 240 Exception thrown when parse expressions don't match class; 241 supported attributes by name are: 242 - lineno - returns the line number of the exception text 243 - col - returns the column number of the exception text 244 - line - returns the line containing the exception text 245 246 Example:: 247 try: 248 Word(nums).setName("integer").parseString("ABC") 249 except ParseException as pe: 250 print(pe) 251 print("column: {}".format(pe.col)) 252 253 prints:: 254 Expected integer (at char 0), (line:1, col:1) 255 column: 1 256 """ 257 pass
258
259 -class ParseFatalException(ParseBaseException):
260 """user-throwable exception thrown when inconsistent parse content 261 is found; stops all parsing immediately""" 262 pass
263
264 -class ParseSyntaxException(ParseFatalException):
265 """just like L{ParseFatalException}, but thrown internally when an 266 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop 267 immediately because an unbacktrackable syntax error has been found""" 268 pass
269
270 #~ class ReparseException(ParseBaseException): 271 #~ """Experimental class - parse actions can raise this exception to cause 272 #~ pyparsing to reparse the input string: 273 #~ - with a modified input string, and/or 274 #~ - with a modified start location 275 #~ Set the values of the ReparseException in the constructor, and raise the 276 #~ exception in a parse action to cause pyparsing to use the new string/location. 277 #~ Setting the values as None causes no change to be made. 278 #~ """ 279 #~ def __init_( self, newstring, restartLoc ): 280 #~ self.newParseText = newstring 281 #~ self.reparseLoc = restartLoc 282 283 -class RecursiveGrammarException(Exception):
284 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
285 - def __init__( self, parseElementList ):
286 self.parseElementTrace = parseElementList
287
288 - def __str__( self ):
289 return "RecursiveGrammarException: %s" % self.parseElementTrace
290
291 -class _ParseResultsWithOffset(object):
292 - def __init__(self,p1,p2):
293 self.tup = (p1,p2)
294 - def __getitem__(self,i):
295 return self.tup[i]
296 - def __repr__(self):
297 return repr(self.tup[0])
298 - def setOffset(self,i):
299 self.tup = (self.tup[0],i)
300
301 -class ParseResults(object):
302 """ 303 Structured parse results, to provide multiple means of access to the parsed data: 304 - as a list (C{len(results)}) 305 - by list index (C{results[0], results[1]}, etc.) 306 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName}) 307 308 Example:: 309 integer = Word(nums) 310 date_str = (integer.setResultsName("year") + '/' 311 + integer.setResultsName("month") + '/' 312 + integer.setResultsName("day")) 313 # equivalent form: 314 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 315 316 # parseString returns a ParseResults object 317 result = date_str.parseString("1999/12/31") 318 319 def test(s, fn=repr): 320 print("%s -> %s" % (s, fn(eval(s)))) 321 test("list(result)") 322 test("result[0]") 323 test("result['month']") 324 test("result.day") 325 test("'month' in result") 326 test("'minutes' in result") 327 test("result.dump()", str) 328 prints:: 329 list(result) -> ['1999', '/', '12', '/', '31'] 330 result[0] -> '1999' 331 result['month'] -> '12' 332 result.day -> '31' 333 'month' in result -> True 334 'minutes' in result -> False 335 result.dump() -> ['1999', '/', '12', '/', '31'] 336 - day: 31 337 - month: 12 338 - year: 1999 339 """
340 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
341 if isinstance(toklist, cls): 342 return toklist 343 retobj = object.__new__(cls) 344 retobj.__doinit = True 345 return retobj
346 347 # Performance tuning: we construct a *lot* of these, so keep this 348 # constructor as small and fast as possible
349 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
350 if self.__doinit: 351 self.__doinit = False 352 self.__name = None 353 self.__parent = None 354 self.__accumNames = {} 355 self.__asList = asList 356 self.__modal = modal 357 if toklist is None: 358 toklist = [] 359 if isinstance(toklist, list): 360 self.__toklist = toklist[:] 361 elif isinstance(toklist, _generatorType): 362 self.__toklist = list(toklist) 363 else: 364 self.__toklist = [toklist] 365 self.__tokdict = dict() 366 367 if name is not None and name: 368 if not modal: 369 self.__accumNames[name] = 0 370 if isinstance(name,int): 371 name = _ustr(name) # will always return a str, but use _ustr for consistency 372 self.__name = name 373 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 374 if isinstance(toklist,basestring): 375 toklist = [ toklist ] 376 if asList: 377 if isinstance(toklist,ParseResults): 378 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 379 else: 380 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 381 self[name].__name = name 382 else: 383 try: 384 self[name] = toklist[0] 385 except (KeyError,TypeError,IndexError): 386 self[name] = toklist
387
388 - def __getitem__( self, i ):
389 if isinstance( i, (int,slice) ): 390 return self.__toklist[i] 391 else: 392 if i not in self.__accumNames: 393 return self.__tokdict[i][-1][0] 394 else: 395 return ParseResults([ v[0] for v in self.__tokdict[i] ])
396
397 - def __setitem__( self, k, v, isinstance=isinstance ):
398 if isinstance(v,_ParseResultsWithOffset): 399 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 400 sub = v[0] 401 elif isinstance(k,(int,slice)): 402 self.__toklist[k] = v 403 sub = v 404 else: 405 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 406 sub = v 407 if isinstance(sub,ParseResults): 408 sub.__parent = wkref(self)
409
410 - def __delitem__( self, i ):
411 if isinstance(i,(int,slice)): 412 mylen = len( self.__toklist ) 413 del self.__toklist[i] 414 415 # convert int to slice 416 if isinstance(i, int): 417 if i < 0: 418 i += mylen 419 i = slice(i, i+1) 420 # get removed indices 421 removed = list(range(*i.indices(mylen))) 422 removed.reverse() 423 # fixup indices in token dictionary 424 for name,occurrences in self.__tokdict.items(): 425 for j in removed: 426 for k, (value, position) in enumerate(occurrences): 427 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 428 else: 429 del self.__tokdict[i]
430
431 - def __contains__( self, k ):
432 return k in self.__tokdict
433
434 - def __len__( self ): return len( self.__toklist )
435 - def __bool__(self): return ( not not self.__toklist )
436 __nonzero__ = __bool__
437 - def __iter__( self ): return iter( self.__toklist )
438 - def __reversed__( self ): return iter( self.__toklist[::-1] )
439 - def _iterkeys( self ):
440 if hasattr(self.__tokdict, "iterkeys"): 441 return self.__tokdict.iterkeys() 442 else: 443 return iter(self.__tokdict)
444
445 - def _itervalues( self ):
446 return (self[k] for k in self._iterkeys())
447
448 - def _iteritems( self ):
449 return ((k, self[k]) for k in self._iterkeys())
450 451 if PY_3: 452 keys = _iterkeys 453 """Returns an iterator of all named result keys (Python 3.x only).""" 454 455 values = _itervalues 456 """Returns an iterator of all named result values (Python 3.x only).""" 457 458 items = _iteritems 459 """Returns an iterator of all named result key-value tuples (Python 3.x only).""" 460 461 else: 462 iterkeys = _iterkeys 463 """Returns an iterator of all named result keys (Python 2.x only).""" 464 465 itervalues = _itervalues 466 """Returns an iterator of all named result values (Python 2.x only).""" 467 468 iteritems = _iteritems 469 """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 470
471 - def keys( self ):
472 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 473 return list(self.iterkeys())
474
475 - def values( self ):
476 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 477 return list(self.itervalues())
478
479 - def items( self ):
480 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 481 return list(self.iteritems())
482
483 - def haskeys( self ):
484 """Since keys() returns an iterator, this method is helpful in bypassing 485 code that looks for the existence of any defined results names.""" 486 return bool(self.__tokdict)
487
488 - def pop( self, *args, **kwargs):
489 """ 490 Removes and returns item at specified index (default=C{last}). 491 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no 492 argument or an integer argument, it will use C{list} semantics 493 and pop tokens from the list of parsed tokens. If passed a 494 non-integer argument (most likely a string), it will use C{dict} 495 semantics and pop the corresponding value from any defined 496 results names. A second default return value argument is 497 supported, just as in C{dict.pop()}. 498 499 Example:: 500 def remove_first(tokens): 501 tokens.pop(0) 502 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 503 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] 504 505 label = Word(alphas) 506 patt = label("LABEL") + OneOrMore(Word(nums)) 507 print(patt.parseString("AAB 123 321").dump()) 508 509 # Use pop() in a parse action to remove named result (note that corresponding value is not 510 # removed from list form of results) 511 def remove_LABEL(tokens): 512 tokens.pop("LABEL") 513 return tokens 514 patt.addParseAction(remove_LABEL) 515 print(patt.parseString("AAB 123 321").dump()) 516 prints:: 517 ['AAB', '123', '321'] 518 - LABEL: AAB 519 520 ['AAB', '123', '321'] 521 """ 522 if not args: 523 args = [-1] 524 for k,v in kwargs.items(): 525 if k == 'default': 526 args = (args[0], v) 527 else: 528 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 529 if (isinstance(args[0], int) or 530 len(args) == 1 or 531 args[0] in self): 532 index = args[0] 533 ret = self[index] 534 del self[index] 535 return ret 536 else: 537 defaultvalue = args[1] 538 return defaultvalue
539
540 - def get(self, key, defaultValue=None):
541 """ 542 Returns named result matching the given key, or if there is no 543 such name, then returns the given C{defaultValue} or C{None} if no 544 C{defaultValue} is specified. 545 546 Similar to C{dict.get()}. 547 548 Example:: 549 integer = Word(nums) 550 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 551 552 result = date_str.parseString("1999/12/31") 553 print(result.get("year")) # -> '1999' 554 print(result.get("hour", "not specified")) # -> 'not specified' 555 print(result.get("hour")) # -> None 556 """ 557 if key in self: 558 return self[key] 559 else: 560 return defaultValue
561
562 - def insert( self, index, insStr ):
563 """ 564 Inserts new element at location index in the list of parsed tokens. 565 566 Similar to C{list.insert()}. 567 568 Example:: 569 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 570 571 # use a parse action to insert the parse location in the front of the parsed results 572 def insert_locn(locn, tokens): 573 tokens.insert(0, locn) 574 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] 575 """ 576 self.__toklist.insert(index, insStr) 577 # fixup indices in token dictionary 578 for name,occurrences in self.__tokdict.items(): 579 for k, (value, position) in enumerate(occurrences): 580 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
581
582 - def append( self, item ):
583 """ 584 Add single element to end of ParseResults list of elements. 585 586 Example:: 587 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 588 589 # use a parse action to compute the sum of the parsed integers, and add it to the end 590 def append_sum(tokens): 591 tokens.append(sum(map(int, tokens))) 592 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] 593 """ 594 self.__toklist.append(item)
595
596 - def extend( self, itemseq ):
597 """ 598 Add sequence of elements to end of ParseResults list of elements. 599 600 Example:: 601 patt = OneOrMore(Word(alphas)) 602 603 # use a parse action to append the reverse of the matched strings, to make a palindrome 604 def make_palindrome(tokens): 605 tokens.extend(reversed([t[::-1] for t in tokens])) 606 return ''.join(tokens) 607 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 608 """ 609 if isinstance(itemseq, ParseResults): 610 self += itemseq 611 else: 612 self.__toklist.extend(itemseq)
613
614 - def clear( self ):
615 """ 616 Clear all elements and results names. 617 """ 618 del self.__toklist[:] 619 self.__tokdict.clear()
620
621 - def __getattr__( self, name ):
622 try: 623 return self[name] 624 except KeyError: 625 return "" 626 627 if name in self.__tokdict: 628 if name not in self.__accumNames: 629 return self.__tokdict[name][-1][0] 630 else: 631 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 632 else: 633 return ""
634
635 - def __add__( self, other ):
636 ret = self.copy() 637 ret += other 638 return ret
639
640 - def __iadd__( self, other ):
641 if other.__tokdict: 642 offset = len(self.__toklist) 643 addoffset = lambda a: offset if a<0 else a+offset 644 otheritems = other.__tokdict.items() 645 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 646 for (k,vlist) in otheritems for v in vlist] 647 for k,v in otherdictitems: 648 self[k] = v 649 if isinstance(v[0],ParseResults): 650 v[0].__parent = wkref(self) 651 652 self.__toklist += other.__toklist 653 self.__accumNames.update( other.__accumNames ) 654 return self
655
656 - def __radd__(self, other):
657 if isinstance(other,int) and other == 0: 658 # useful for merging many ParseResults using sum() builtin 659 return self.copy() 660 else: 661 # this may raise a TypeError - so be it 662 return other + self
663
664 - def __repr__( self ):
665 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
666
667 - def __str__( self ):
668 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
669
670 - def _asStringList( self, sep='' ):
671 out = [] 672 for item in self.__toklist: 673 if out and sep: 674 out.append(sep) 675 if isinstance( item, ParseResults ): 676 out += item._asStringList() 677 else: 678 out.append( _ustr(item) ) 679 return out
680
681 - def asList( self ):
682 """ 683 Returns the parse results as a nested list of matching tokens, all converted to strings. 684 685 Example:: 686 patt = OneOrMore(Word(alphas)) 687 result = patt.parseString("sldkj lsdkj sldkj") 688 # even though the result prints in string-like form, it is actually a pyparsing ParseResults 689 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 690 691 # Use asList() to create an actual list 692 result_list = result.asList() 693 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 694 """ 695 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
696
697 - def asDict( self ):
698 """ 699 Returns the named parse results as a nested dictionary. 700 701 Example:: 702 integer = Word(nums) 703 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 704 705 result = date_str.parseString('12/31/1999') 706 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 707 708 result_dict = result.asDict() 709 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 710 711 # even though a ParseResults supports dict-like access, sometime you just need to have a dict 712 import json 713 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 714 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} 715 """ 716 if PY_3: 717 item_fn = self.items 718 else: 719 item_fn = self.iteritems 720 721 def toItem(obj): 722 if isinstance(obj, ParseResults): 723 if obj.haskeys(): 724 return obj.asDict() 725 else: 726 return [toItem(v) for v in obj] 727 else: 728 return obj
729 730 return dict((k,toItem(v)) for k,v in item_fn())
731
732 - def copy( self ):
733 """ 734 Returns a new copy of a C{ParseResults} object. 735 """ 736 ret = ParseResults( self.__toklist ) 737 ret.__tokdict = self.__tokdict.copy() 738 ret.__parent = self.__parent 739 ret.__accumNames.update( self.__accumNames ) 740 ret.__name = self.__name 741 return ret
742
743 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
744 """ 745 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. 746 """ 747 nl = "\n" 748 out = [] 749 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 750 for v in vlist) 751 nextLevelIndent = indent + " " 752 753 # collapse out indents if formatting is not desired 754 if not formatted: 755 indent = "" 756 nextLevelIndent = "" 757 nl = "" 758 759 selfTag = None 760 if doctag is not None: 761 selfTag = doctag 762 else: 763 if self.__name: 764 selfTag = self.__name 765 766 if not selfTag: 767 if namedItemsOnly: 768 return "" 769 else: 770 selfTag = "ITEM" 771 772 out += [ nl, indent, "<", selfTag, ">" ] 773 774 for i,res in enumerate(self.__toklist): 775 if isinstance(res,ParseResults): 776 if i in namedItems: 777 out += [ res.asXML(namedItems[i], 778 namedItemsOnly and doctag is None, 779 nextLevelIndent, 780 formatted)] 781 else: 782 out += [ res.asXML(None, 783 namedItemsOnly and doctag is None, 784 nextLevelIndent, 785 formatted)] 786 else: 787 # individual token, see if there is a name for it 788 resTag = None 789 if i in namedItems: 790 resTag = namedItems[i] 791 if not resTag: 792 if namedItemsOnly: 793 continue 794 else: 795 resTag = "ITEM" 796 xmlBodyText = _xml_escape(_ustr(res)) 797 out += [ nl, nextLevelIndent, "<", resTag, ">", 798 xmlBodyText, 799 "</", resTag, ">" ] 800 801 out += [ nl, indent, "</", selfTag, ">" ] 802 return "".join(out)
803
804 - def __lookup(self,sub):
805 for k,vlist in self.__tokdict.items(): 806 for v,loc in vlist: 807 if sub is v: 808 return k 809 return None
810
811 - def getName(self):
812 """ 813 Returns the results name for this token expression. Useful when several 814 different expressions might match at a particular location. 815 816 Example:: 817 integer = Word(nums) 818 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 819 house_number_expr = Suppress('#') + Word(nums, alphanums) 820 user_data = (Group(house_number_expr)("house_number") 821 | Group(ssn_expr)("ssn") 822 | Group(integer)("age")) 823 user_info = OneOrMore(user_data) 824 825 result = user_info.parseString("22 111-22-3333 #221B") 826 for item in result: 827 print(item.getName(), ':', item[0]) 828 prints:: 829 age : 22 830 ssn : 111-22-3333 831 house_number : 221B 832 """ 833 if self.__name: 834 return self.__name 835 elif self.__parent: 836 par = self.__parent() 837 if par: 838 return par.__lookup(self) 839 else: 840 return None 841 elif (len(self) == 1 and 842 len(self.__tokdict) == 1 and 843 next(iter(self.__tokdict.values()))[0][1] in (0,-1)): 844 return next(iter(self.__tokdict.keys())) 845 else: 846 return None
847
848 - def dump(self, indent='', depth=0, full=True):
849 """ 850 Diagnostic method for listing out the contents of a C{ParseResults}. 851 Accepts an optional C{indent} argument so that this string can be embedded 852 in a nested display of other data. 853 854 Example:: 855 integer = Word(nums) 856 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 857 858 result = date_str.parseString('12/31/1999') 859 print(result.dump()) 860 prints:: 861 ['12', '/', '31', '/', '1999'] 862 - day: 1999 863 - month: 31 864 - year: 12 865 """ 866 out = [] 867 NL = '\n' 868 out.append( indent+_ustr(self.asList()) ) 869 if full: 870 if self.haskeys(): 871 items = sorted(self.items()) 872 for k,v in items: 873 if out: 874 out.append(NL) 875 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 876 if isinstance(v,ParseResults): 877 if v: 878 out.append( v.dump(indent,depth+1) ) 879 else: 880 out.append(_ustr(v)) 881 else: 882 out.append(_ustr(v)) 883 elif any(isinstance(vv,ParseResults) for vv in self): 884 v = self 885 for i,vv in enumerate(v): 886 if isinstance(vv,ParseResults): 887 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 888 else: 889 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 890 891 return "".join(out)
892
893 - def pprint(self, *args, **kwargs):
894 """ 895 Pretty-printer for parsed results as a list, using the C{pprint} module. 896 Accepts additional positional or keyword args as defined for the 897 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) 898 899 Example:: 900 ident = Word(alphas, alphanums) 901 num = Word(nums) 902 func = Forward() 903 term = ident | num | Group('(' + func + ')') 904 func <<= ident + Group(Optional(delimitedList(term))) 905 result = func.parseString("fna a,b,(fnb c,d,200),100") 906 result.pprint(width=40) 907 prints:: 908 ['fna', 909 ['a', 910 'b', 911 ['(', 'fnb', ['c', 'd', '200'], ')'], 912 '100']] 913 """ 914 pprint.pprint(self.asList(), *args, **kwargs)
915 916 # add support for pickle protocol
917 - def __getstate__(self):
918 return ( self.__toklist, 919 ( self.__tokdict.copy(), 920 self.__parent is not None and self.__parent() or None, 921 self.__accumNames, 922 self.__name ) )
923
924 - def __setstate__(self,state):
925 self.__toklist = state[0] 926 (self.__tokdict, 927 par, 928 inAccumNames, 929 self.__name) = state[1] 930 self.__accumNames = {} 931 self.__accumNames.update(inAccumNames) 932 if par is not None: 933 self.__parent = wkref(par) 934 else: 935 self.__parent = None
936
937 - def __getnewargs__(self):
938 return self.__toklist, self.__name, self.__asList, self.__modal
939
940 - def __dir__(self):
941 return (dir(type(self)) + list(self.keys()))
942 943 collections.MutableMapping.register(ParseResults)
944 945 -def col (loc,strg):
946 """Returns current column within a string, counting newlines as line separators. 947 The first column is number 1. 948 949 Note: the default parsing behavior is to expand tabs in the input string 950 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 951 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 952 consistent view of the parsed string, the parse location, and line and column 953 positions within the parsed string. 954 """ 955 s = strg 956 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
957
958 -def lineno(loc,strg):
959 """Returns current line number within a string, counting newlines as line separators. 960 The first line is number 1. 961 962 Note: the default parsing behavior is to expand tabs in the input string 963 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 964 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 965 consistent view of the parsed string, the parse location, and line and column 966 positions within the parsed string. 967 """ 968 return strg.count("\n",0,loc) + 1
969
970 -def line( loc, strg ):
971 """Returns the line of text containing loc within a string, counting newlines as line separators. 972 """ 973 lastCR = strg.rfind("\n", 0, loc) 974 nextCR = strg.find("\n", loc) 975 if nextCR >= 0: 976 return strg[lastCR+1:nextCR] 977 else: 978 return strg[lastCR+1:]
979
980 -def _defaultStartDebugAction( instring, loc, expr ):
981 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
982
983 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
984 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
985
986 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
987 print ("Exception raised:" + _ustr(exc))
988
989 -def nullDebugAction(*args):
990 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 991 pass
992 993 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 994 #~ 'decorator to trim function calls to match the arity of the target' 995 #~ def _trim_arity(func, maxargs=3): 996 #~ if func in singleArgBuiltins: 997 #~ return lambda s,l,t: func(t) 998 #~ limit = 0 999 #~ foundArity = False 1000 #~ def wrapper(*args): 1001 #~ nonlocal limit,foundArity 1002 #~ while 1: 1003 #~ try: 1004 #~ ret = func(*args[limit:]) 1005 #~ foundArity = True 1006 #~ return ret 1007 #~ except TypeError: 1008 #~ if limit == maxargs or foundArity: 1009 #~ raise 1010 #~ limit += 1 1011 #~ continue 1012 #~ return wrapper 1013 1014 # this version is Python 2.x-3.x cross-compatible 1015 'decorator to trim function calls to match the arity of the target'
1016 -def _trim_arity(func, maxargs=2):
1017 if func in singleArgBuiltins: 1018 return lambda s,l,t: func(t) 1019 limit = [0] 1020 foundArity = [False] 1021 1022 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 1023 if system_version[:2] >= (3,5): 1024 def extract_stack(limit=0): 1025 # special handling for Python 3.5.0 - extra deep call stack by 1 1026 offset = -3 if system_version == (3,5,0) else -2 1027 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset] 1028 return [(frame_summary.filename, frame_summary.lineno)]
1029 def extract_tb(tb, limit=0): 1030 frames = traceback.extract_tb(tb, limit=limit) 1031 frame_summary = frames[-1] 1032 return [(frame_summary.filename, frame_summary.lineno)] 1033 else: 1034 extract_stack = traceback.extract_stack 1035 extract_tb = traceback.extract_tb 1036 1037 # synthesize what would be returned by traceback.extract_stack at the call to 1038 # user's parse action 'func', so that we don't incur call penalty at parse time 1039 1040 LINE_DIFF = 6 1041 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 1042 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 1043 this_line = extract_stack(limit=2)[-1] 1044 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) 1045 1046 def wrapper(*args): 1047 while 1: 1048 try: 1049 ret = func(*args[limit[0]:]) 1050 foundArity[0] = True 1051 return ret 1052 except TypeError: 1053 # re-raise TypeErrors if they did not come from our arity testing 1054 if foundArity[0]: 1055 raise 1056 else: 1057 try: 1058 tb = sys.exc_info()[-1] 1059 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: 1060 raise 1061 finally: 1062 del tb 1063 1064 if limit[0] <= maxargs: 1065 limit[0] += 1 1066 continue 1067 raise 1068 1069 # copy func name to wrapper for sensible debug output 1070 func_name = "<parse action>" 1071 try: 1072 func_name = getattr(func, '__name__', 1073 getattr(func, '__class__').__name__) 1074 except Exception: 1075 func_name = str(func) 1076 wrapper.__name__ = func_name 1077 1078 return wrapper 1079
1080 -class ParserElement(object):
1081 """Abstract base level parser element class.""" 1082 DEFAULT_WHITE_CHARS = " \n\t\r" 1083 verbose_stacktrace = False 1084 1085 @staticmethod
1086 - def setDefaultWhitespaceChars( chars ):
1087 r""" 1088 Overrides the default whitespace chars 1089 1090 Example:: 1091 # default whitespace chars are space, <TAB> and newline 1092 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 1093 1094 # change to just treat newline as significant 1095 ParserElement.setDefaultWhitespaceChars(" \t") 1096 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] 1097 """ 1098 ParserElement.DEFAULT_WHITE_CHARS = chars
1099 1100 @staticmethod
1101 - def inlineLiteralsUsing(cls):
1102 """ 1103 Set class to be used for inclusion of string literals into a parser. 1104 1105 Example:: 1106 # default literal class used is Literal 1107 integer = Word(nums) 1108 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1109 1110 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1111 1112 1113 # change to Suppress 1114 ParserElement.inlineLiteralsUsing(Suppress) 1115 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1116 1117 date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] 1118 """ 1119 ParserElement._literalStringClass = cls
1120
1121 - def __init__( self, savelist=False ):
1122 self.parseAction = list() 1123 self.failAction = None 1124 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 1125 self.strRepr = None 1126 self.resultsName = None 1127 self.saveAsList = savelist 1128 self.skipWhitespace = True 1129 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1130 self.copyDefaultWhiteChars = True 1131 self.mayReturnEmpty = False # used when checking for left-recursion 1132 self.keepTabs = False 1133 self.ignoreExprs = list() 1134 self.debug = False 1135 self.streamlined = False 1136 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 1137 self.errmsg = "" 1138 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 1139 self.debugActions = ( None, None, None ) #custom debug actions 1140 self.re = None 1141 self.callPreparse = True # used to avoid redundant calls to preParse 1142 self.callDuringTry = False
1143
1144 - def copy( self ):
1145 """ 1146 Make a copy of this C{ParserElement}. Useful for defining different parse actions 1147 for the same parsing pattern, using copies of the original parse element. 1148 1149 Example:: 1150 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1151 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") 1152 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1153 1154 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) 1155 prints:: 1156 [5120, 100, 655360, 268435456] 1157 Equivalent form of C{expr.copy()} is just C{expr()}:: 1158 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1159 """ 1160 cpy = copy.copy( self ) 1161 cpy.parseAction = self.parseAction[:] 1162 cpy.ignoreExprs = self.ignoreExprs[:] 1163 if self.copyDefaultWhiteChars: 1164 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1165 return cpy
1166
1167 - def setName( self, name ):
1168 """ 1169 Define name for this expression, makes debugging and exception messages clearer. 1170 1171 Example:: 1172 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) 1173 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 1174 """ 1175 self.name = name 1176 self.errmsg = "Expected " + self.name 1177 if hasattr(self,"exception"): 1178 self.exception.msg = self.errmsg 1179 return self
1180
1181 - def setResultsName( self, name, listAllMatches=False ):
1182 """ 1183 Define name for referencing matching tokens as a nested attribute 1184 of the returned parse results. 1185 NOTE: this returns a *copy* of the original C{ParserElement} object; 1186 this is so that the client can define a basic element, such as an 1187 integer, and reference it in multiple places with different names. 1188 1189 You can also set results names using the abbreviated syntax, 1190 C{expr("name")} in place of C{expr.setResultsName("name")} - 1191 see L{I{__call__}<__call__>}. 1192 1193 Example:: 1194 date_str = (integer.setResultsName("year") + '/' 1195 + integer.setResultsName("month") + '/' 1196 + integer.setResultsName("day")) 1197 1198 # equivalent form: 1199 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1200 """ 1201 newself = self.copy() 1202 if name.endswith("*"): 1203 name = name[:-1] 1204 listAllMatches=True 1205 newself.resultsName = name 1206 newself.modalResults = not listAllMatches 1207 return newself
1208
1209 - def setBreak(self,breakFlag = True):
1210 """Method to invoke the Python pdb debugger when this element is 1211 about to be parsed. Set C{breakFlag} to True to enable, False to 1212 disable. 1213 """ 1214 if breakFlag: 1215 _parseMethod = self._parse 1216 def breaker(instring, loc, doActions=True, callPreParse=True): 1217 import pdb 1218 pdb.set_trace() 1219 return _parseMethod( instring, loc, doActions, callPreParse )
1220 breaker._originalParseMethod = _parseMethod 1221 self._parse = breaker 1222 else: 1223 if hasattr(self._parse,"_originalParseMethod"): 1224 self._parse = self._parse._originalParseMethod 1225 return self
1226
1227 - def setParseAction( self, *fns, **kwargs ):
1228 """ 1229 Define action to perform when successfully matching parse element definition. 1230 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 1231 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 1232 - s = the original string being parsed (see note below) 1233 - loc = the location of the matching substring 1234 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 1235 If the functions in fns modify the tokens, they can return them as the return 1236 value from fn, and the modified list of tokens will replace the original. 1237 Otherwise, fn does not need to return any value. 1238 1239 Optional keyword arguments: 1240 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing 1241 1242 Note: the default parsing behavior is to expand tabs in the input string 1243 before starting the parsing process. See L{I{parseString}<parseString>} for more information 1244 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 1245 consistent view of the parsed string, the parse location, and line and column 1246 positions within the parsed string. 1247 1248 Example:: 1249 integer = Word(nums) 1250 date_str = integer + '/' + integer + '/' + integer 1251 1252 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1253 1254 # use parse action to convert to ints at parse time 1255 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1256 date_str = integer + '/' + integer + '/' + integer 1257 1258 # note that integer fields are now ints, not strings 1259 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] 1260 """ 1261 self.parseAction = list(map(_trim_arity, list(fns))) 1262 self.callDuringTry = kwargs.get("callDuringTry", False) 1263 return self
1264
1265 - def addParseAction( self, *fns, **kwargs ):
1266 """ 1267 Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}. 1268 1269 See examples in L{I{copy}<copy>}. 1270 """ 1271 self.parseAction += list(map(_trim_arity, list(fns))) 1272 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1273 return self
1274
1275 - def addCondition(self, *fns, **kwargs):
1276 """Add a boolean predicate function to expression's list of parse actions. See 1277 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, 1278 functions passed to C{addCondition} need to return boolean success/fail of the condition. 1279 1280 Optional keyword arguments: 1281 - message = define a custom message to be used in the raised exception 1282 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 1283 1284 Example:: 1285 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1286 year_int = integer.copy() 1287 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 1288 date_str = year_int + '/' + integer + '/' + integer 1289 1290 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) 1291 """ 1292 msg = kwargs.get("message", "failed user-defined condition") 1293 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException 1294 for fn in fns: 1295 def pa(s,l,t): 1296 if not bool(_trim_arity(fn)(s,l,t)): 1297 raise exc_type(s,l,msg)
1298 self.parseAction.append(pa) 1299 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1300 return self 1301
1302 - def setFailAction( self, fn ):
1303 """Define action to perform if parsing fails at this expression. 1304 Fail acton fn is a callable function that takes the arguments 1305 C{fn(s,loc,expr,err)} where: 1306 - s = string being parsed 1307 - loc = location where expression match was attempted and failed 1308 - expr = the parse expression that failed 1309 - err = the exception thrown 1310 The function returns no value. It may throw C{L{ParseFatalException}} 1311 if it is desired to stop parsing immediately.""" 1312 self.failAction = fn 1313 return self
1314
1315 - def _skipIgnorables( self, instring, loc ):
1316 exprsFound = True 1317 while exprsFound: 1318 exprsFound = False 1319 for e in self.ignoreExprs: 1320 try: 1321 while 1: 1322 loc,dummy = e._parse( instring, loc ) 1323 exprsFound = True 1324 except ParseException: 1325 pass 1326 return loc
1327
1328 - def preParse( self, instring, loc ):
1329 if self.ignoreExprs: 1330 loc = self._skipIgnorables( instring, loc ) 1331 1332 if self.skipWhitespace: 1333 wt = self.whiteChars 1334 instrlen = len(instring) 1335 while loc < instrlen and instring[loc] in wt: 1336 loc += 1 1337 1338 return loc
1339
1340 - def parseImpl( self, instring, loc, doActions=True ):
1341 return loc, []
1342
1343 - def postParse( self, instring, loc, tokenlist ):
1344 return tokenlist
1345 1346 #~ @profile
1347 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1348 debugging = ( self.debug ) #and doActions ) 1349 1350 if debugging or self.failAction: 1351 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 1352 if (self.debugActions[0] ): 1353 self.debugActions[0]( instring, loc, self ) 1354 if callPreParse and self.callPreparse: 1355 preloc = self.preParse( instring, loc ) 1356 else: 1357 preloc = loc 1358 tokensStart = preloc 1359 try: 1360 try: 1361 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1362 except IndexError: 1363 raise ParseException( instring, len(instring), self.errmsg, self ) 1364 except ParseBaseException as err: 1365 #~ print ("Exception raised:", err) 1366 if self.debugActions[2]: 1367 self.debugActions[2]( instring, tokensStart, self, err ) 1368 if self.failAction: 1369 self.failAction( instring, tokensStart, self, err ) 1370 raise 1371 else: 1372 if callPreParse and self.callPreparse: 1373 preloc = self.preParse( instring, loc ) 1374 else: 1375 preloc = loc 1376 tokensStart = preloc 1377 if self.mayIndexError or loc >= len(instring): 1378 try: 1379 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1380 except IndexError: 1381 raise ParseException( instring, len(instring), self.errmsg, self ) 1382 else: 1383 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1384 1385 tokens = self.postParse( instring, loc, tokens ) 1386 1387 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1388 if self.parseAction and (doActions or self.callDuringTry): 1389 if debugging: 1390 try: 1391 for fn in self.parseAction: 1392 tokens = fn( instring, tokensStart, retTokens ) 1393 if tokens is not None: 1394 retTokens = ParseResults( tokens, 1395 self.resultsName, 1396 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1397 modal=self.modalResults ) 1398 except ParseBaseException as err: 1399 #~ print "Exception raised in user parse action:", err 1400 if (self.debugActions[2] ): 1401 self.debugActions[2]( instring, tokensStart, self, err ) 1402 raise 1403 else: 1404 for fn in self.parseAction: 1405 tokens = fn( instring, tokensStart, retTokens ) 1406 if tokens is not None: 1407 retTokens = ParseResults( tokens, 1408 self.resultsName, 1409 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1410 modal=self.modalResults ) 1411 1412 if debugging: 1413 #~ print ("Matched",self,"->",retTokens.asList()) 1414 if (self.debugActions[1] ): 1415 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1416 1417 return loc, retTokens
1418
1419 - def tryParse( self, instring, loc ):
1420 try: 1421 return self._parse( instring, loc, doActions=False )[0] 1422 except ParseFatalException: 1423 raise ParseException( instring, loc, self.errmsg, self)
1424
1425 - def canParseNext(self, instring, loc):
1426 try: 1427 self.tryParse(instring, loc) 1428 except (ParseException, IndexError): 1429 return False 1430 else: 1431 return True
1432
1433 - class _UnboundedCache(object):
1434 - def __init__(self):
1435 cache = {} 1436 self.not_in_cache = not_in_cache = object() 1437 1438 def get(self, key): 1439 return cache.get(key, not_in_cache)
1440 1441 def set(self, key, value): 1442 cache[key] = value
1443 1444 def clear(self): 1445 cache.clear() 1446 1447 self.get = types.MethodType(get, self) 1448 self.set = types.MethodType(set, self) 1449 self.clear = types.MethodType(clear, self) 1450 1451 if _OrderedDict is not None:
1452 - class _FifoCache(object):
1453 - def __init__(self, size):
1454 self.not_in_cache = not_in_cache = object() 1455 1456 cache = _OrderedDict() 1457 1458 def get(self, key): 1459 return cache.get(key, not_in_cache)
1460 1461 def set(self, key, value): 1462 cache[key] = value 1463 if len(cache) > size: 1464 cache.popitem(False)
1465 1466 def clear(self): 1467 cache.clear() 1468 1469 self.get = types.MethodType(get, self) 1470 self.set = types.MethodType(set, self) 1471 self.clear = types.MethodType(clear, self) 1472 1473 else:
1474 - class _FifoCache(object):
1475 - def __init__(self, size):
1476 self.not_in_cache = not_in_cache = object() 1477 1478 cache = {} 1479 key_fifo = collections.deque([], size) 1480 1481 def get(self, key): 1482 return cache.get(key, not_in_cache)
1483 1484 def set(self, key, value): 1485 cache[key] = value 1486 if len(cache) > size: 1487 cache.pop(key_fifo.popleft(), None) 1488 key_fifo.append(key)
1489 1490 def clear(self): 1491 cache.clear() 1492 key_fifo.clear() 1493 1494 self.get = types.MethodType(get, self) 1495 self.set = types.MethodType(set, self) 1496 self.clear = types.MethodType(clear, self) 1497 1498 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1499 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail 1500 packrat_cache_lock = RLock() 1501 packrat_cache_stats = [0, 0] 1502 1503 # this method gets repeatedly called during backtracking with the same arguments - 1504 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1505 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1506 HIT, MISS = 0, 1 1507 lookup = (self, instring, loc, callPreParse, doActions) 1508 with ParserElement.packrat_cache_lock: 1509 cache = ParserElement.packrat_cache 1510 value = cache.get(lookup) 1511 if value is cache.not_in_cache: 1512 ParserElement.packrat_cache_stats[MISS] += 1 1513 try: 1514 value = self._parseNoCache(instring, loc, doActions, callPreParse) 1515 except ParseBaseException as pe: 1516 # cache a copy of the exception, without the traceback 1517 cache.set(lookup, pe.__class__(*pe.args)) 1518 raise 1519 else: 1520 cache.set(lookup, (value[0], value[1].copy())) 1521 return value 1522 else: 1523 ParserElement.packrat_cache_stats[HIT] += 1 1524 if isinstance(value, Exception): 1525 raise value 1526 return (value[0], value[1].copy())
1527 1528 _parse = _parseNoCache 1529 1530 @staticmethod
1531 - def resetCache():
1532 ParserElement.packrat_cache.clear() 1533 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1534 1535 _packratEnabled = False 1536 @staticmethod
1537 - def enablePackrat(cache_size_limit=128):
1538 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1539 Repeated parse attempts at the same string location (which happens 1540 often in many complex grammars) can immediately return a cached value, 1541 instead of re-executing parsing/validating code. Memoizing is done of 1542 both valid results and parsing exceptions. 1543 1544 Parameters: 1545 - cache_size_limit - (default=C{128}) - if an integer value is provided 1546 will limit the size of the packrat cache; if None is passed, then 1547 the cache size will be unbounded; if 0 is passed, the cache will 1548 be effectively disabled. 1549 1550 This speedup may break existing programs that use parse actions that 1551 have side-effects. For this reason, packrat parsing is disabled when 1552 you first import pyparsing. To activate the packrat feature, your 1553 program must call the class method C{ParserElement.enablePackrat()}. If 1554 your program uses C{psyco} to "compile as you go", you must call 1555 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1556 Python will crash. For best results, call C{enablePackrat()} immediately 1557 after importing pyparsing. 1558 1559 Example:: 1560 import pyparsing 1561 pyparsing.ParserElement.enablePackrat() 1562 """ 1563 if not ParserElement._packratEnabled: 1564 ParserElement._packratEnabled = True 1565 if cache_size_limit is None: 1566 ParserElement.packrat_cache = ParserElement._UnboundedCache() 1567 else: 1568 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) 1569 ParserElement._parse = ParserElement._parseCache
1570
1571 - def parseString( self, instring, parseAll=False ):
1572 """ 1573 Execute the parse expression with the given string. 1574 This is the main interface to the client code, once the complete 1575 expression has been built. 1576 1577 If you want the grammar to require that the entire input string be 1578 successfully parsed, then set C{parseAll} to True (equivalent to ending 1579 the grammar with C{L{StringEnd()}}). 1580 1581 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1582 in order to report proper column numbers in parse actions. 1583 If the input string contains tabs and 1584 the grammar uses parse actions that use the C{loc} argument to index into the 1585 string being parsed, you can ensure you have a consistent view of the input 1586 string by: 1587 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1588 (see L{I{parseWithTabs}<parseWithTabs>}) 1589 - define your parse action using the full C{(s,loc,toks)} signature, and 1590 reference the input string using the parse action's C{s} argument 1591 - explictly expand the tabs in your input string before calling 1592 C{parseString} 1593 1594 Example:: 1595 Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] 1596 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text 1597 """ 1598 ParserElement.resetCache() 1599 if not self.streamlined: 1600 self.streamline() 1601 #~ self.saveAsList = True 1602 for e in self.ignoreExprs: 1603 e.streamline() 1604 if not self.keepTabs: 1605 instring = instring.expandtabs() 1606 try: 1607 loc, tokens = self._parse( instring, 0 ) 1608 if parseAll: 1609 loc = self.preParse( instring, loc ) 1610 se = Empty() + StringEnd() 1611 se._parse( instring, loc ) 1612 except ParseBaseException as exc: 1613 if ParserElement.verbose_stacktrace: 1614 raise 1615 else: 1616 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1617 raise exc 1618 else: 1619 return tokens
1620
1621 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1622 """ 1623 Scan the input string for expression matches. Each match will return the 1624 matching tokens, start location, and end location. May be called with optional 1625 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1626 C{overlap} is specified, then overlapping matches will be reported. 1627 1628 Note that the start and end locations are reported relative to the string 1629 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1630 strings with embedded tabs. 1631 1632 Example:: 1633 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 1634 print(source) 1635 for tokens,start,end in Word(alphas).scanString(source): 1636 print(' '*start + '^'*(end-start)) 1637 print(' '*start + tokens[0]) 1638 1639 prints:: 1640 1641 sldjf123lsdjjkf345sldkjf879lkjsfd987 1642 ^^^^^ 1643 sldjf 1644 ^^^^^^^ 1645 lsdjjkf 1646 ^^^^^^ 1647 sldkjf 1648 ^^^^^^ 1649 lkjsfd 1650 """ 1651 if not self.streamlined: 1652 self.streamline() 1653 for e in self.ignoreExprs: 1654 e.streamline() 1655 1656 if not self.keepTabs: 1657 instring = _ustr(instring).expandtabs() 1658 instrlen = len(instring) 1659 loc = 0 1660 preparseFn = self.preParse 1661 parseFn = self._parse 1662 ParserElement.resetCache() 1663 matches = 0 1664 try: 1665 while loc <= instrlen and matches < maxMatches: 1666 try: 1667 preloc = preparseFn( instring, loc ) 1668 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1669 except ParseException: 1670 loc = preloc+1 1671 else: 1672 if nextLoc > loc: 1673 matches += 1 1674 yield tokens, preloc, nextLoc 1675 if overlap: 1676 nextloc = preparseFn( instring, loc ) 1677 if nextloc > loc: 1678 loc = nextLoc 1679 else: 1680 loc += 1 1681 else: 1682 loc = nextLoc 1683 else: 1684 loc = preloc+1 1685 except ParseBaseException as exc: 1686 if ParserElement.verbose_stacktrace: 1687 raise 1688 else: 1689 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1690 raise exc
1691
1692 - def transformString( self, instring ):
1693 """ 1694 Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1695 be returned from a parse action. To use C{transformString}, define a grammar and 1696 attach a parse action to it that modifies the returned token list. 1697 Invoking C{transformString()} on a target string will then scan for matches, 1698 and replace the matched text patterns according to the logic in the parse 1699 action. C{transformString()} returns the resulting transformed string. 1700 1701 Example:: 1702 wd = Word(alphas) 1703 wd.setParseAction(lambda toks: toks[0].title()) 1704 1705 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) 1706 Prints:: 1707 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 1708 """ 1709 out = [] 1710 lastE = 0 1711 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1712 # keep string locs straight between transformString and scanString 1713 self.keepTabs = True 1714 try: 1715 for t,s,e in self.scanString( instring ): 1716 out.append( instring[lastE:s] ) 1717 if t: 1718 if isinstance(t,ParseResults): 1719 out += t.asList() 1720 elif isinstance(t,list): 1721 out += t 1722 else: 1723 out.append(t) 1724 lastE = e 1725 out.append(instring[lastE:]) 1726 out = [o for o in out if o] 1727 return "".join(map(_ustr,_flatten(out))) 1728 except ParseBaseException as exc: 1729 if ParserElement.verbose_stacktrace: 1730 raise 1731 else: 1732 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1733 raise exc
1734
1735 - def searchString( self, instring, maxMatches=_MAX_INT ):
1736 """ 1737 Another extension to C{L{scanString}}, simplifying the access to the tokens found 1738 to match the given parse expression. May be called with optional 1739 C{maxMatches} argument, to clip searching after 'n' matches are found. 1740 1741 Example:: 1742 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 1743 cap_word = Word(alphas.upper(), alphas.lower()) 1744 1745 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) 1746 prints:: 1747 ['More', 'Iron', 'Lead', 'Gold', 'I'] 1748 """ 1749 try: 1750 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1751 except ParseBaseException as exc: 1752 if ParserElement.verbose_stacktrace: 1753 raise 1754 else: 1755 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1756 raise exc
1757
1758 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1759 """ 1760 Generator method to split a string using the given expression as a separator. 1761 May be called with optional C{maxsplit} argument, to limit the number of splits; 1762 and the optional C{includeSeparators} argument (default=C{False}), if the separating 1763 matching text should be included in the split results. 1764 1765 Example:: 1766 punc = oneOf(list(".,;:/-!?")) 1767 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 1768 prints:: 1769 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 1770 """ 1771 splits = 0 1772 last = 0 1773 for t,s,e in self.scanString(instring, maxMatches=maxsplit): 1774 yield instring[last:s] 1775 if includeSeparators: 1776 yield t[0] 1777 last = e 1778 yield instring[last:]
1779
1780 - def __add__(self, other ):
1781 """ 1782 Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement 1783 converts them to L{Literal}s by default. 1784 1785 Example:: 1786 greet = Word(alphas) + "," + Word(alphas) + "!" 1787 hello = "Hello, World!" 1788 print (hello, "->", greet.parseString(hello)) 1789 Prints:: 1790 Hello, World! -> ['Hello', ',', 'World', '!'] 1791 """ 1792 if isinstance( other, basestring ): 1793 other = ParserElement._literalStringClass( other ) 1794 if not isinstance( other, ParserElement ): 1795 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1796 SyntaxWarning, stacklevel=2) 1797 return None 1798 return And( [ self, other ] )
1799
1800 - def __radd__(self, other ):
1801 """ 1802 Implementation of + operator when left operand is not a C{L{ParserElement}} 1803 """ 1804 if isinstance( other, basestring ): 1805 other = ParserElement._literalStringClass( other ) 1806 if not isinstance( other, ParserElement ): 1807 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1808 SyntaxWarning, stacklevel=2) 1809 return None 1810 return other + self
1811
1812 - def __sub__(self, other):
1813 """ 1814 Implementation of - operator, returns C{L{And}} with error stop 1815 """ 1816 if isinstance( other, basestring ): 1817 other = ParserElement._literalStringClass( other ) 1818 if not isinstance( other, ParserElement ): 1819 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1820 SyntaxWarning, stacklevel=2) 1821 return None 1822 return And( [ self, And._ErrorStop(), other ] )
1823
1824 - def __rsub__(self, other ):
1825 """ 1826 Implementation of - operator when left operand is not a C{L{ParserElement}} 1827 """ 1828 if isinstance( other, basestring ): 1829 other = ParserElement._literalStringClass( other ) 1830 if not isinstance( other, ParserElement ): 1831 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1832 SyntaxWarning, stacklevel=2) 1833 return None 1834 return other - self
1835
1836 - def __mul__(self,other):
1837 """ 1838 Implementation of * operator, allows use of C{expr * 3} in place of 1839 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1840 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1841 may also include C{None} as in: 1842 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1843 to C{expr*n + L{ZeroOrMore}(expr)} 1844 (read as "at least n instances of C{expr}") 1845 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1846 (read as "0 to n instances of C{expr}") 1847 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1848 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1849 1850 Note that C{expr*(None,n)} does not raise an exception if 1851 more than n exprs exist in the input stream; that is, 1852 C{expr*(None,n)} does not enforce a maximum number of expr 1853 occurrences. If this behavior is desired, then write 1854 C{expr*(None,n) + ~expr} 1855 """ 1856 if isinstance(other,int): 1857 minElements, optElements = other,0 1858 elif isinstance(other,tuple): 1859 other = (other + (None, None))[:2] 1860 if other[0] is None: 1861 other = (0, other[1]) 1862 if isinstance(other[0],int) and other[1] is None: 1863 if other[0] == 0: 1864 return ZeroOrMore(self) 1865 if other[0] == 1: 1866 return OneOrMore(self) 1867 else: 1868 return self*other[0] + ZeroOrMore(self) 1869 elif isinstance(other[0],int) and isinstance(other[1],int): 1870 minElements, optElements = other 1871 optElements -= minElements 1872 else: 1873 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1874 else: 1875 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1876 1877 if minElements < 0: 1878 raise ValueError("cannot multiply ParserElement by negative value") 1879 if optElements < 0: 1880 raise ValueError("second tuple value must be greater or equal to first tuple value") 1881 if minElements == optElements == 0: 1882 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1883 1884 if (optElements): 1885 def makeOptionalList(n): 1886 if n>1: 1887 return Optional(self + makeOptionalList(n-1)) 1888 else: 1889 return Optional(self)
1890 if minElements: 1891 if minElements == 1: 1892 ret = self + makeOptionalList(optElements) 1893 else: 1894 ret = And([self]*minElements) + makeOptionalList(optElements) 1895 else: 1896 ret = makeOptionalList(optElements) 1897 else: 1898 if minElements == 1: 1899 ret = self 1900 else: 1901 ret = And([self]*minElements) 1902 return ret 1903
1904 - def __rmul__(self, other):
1905 return self.__mul__(other)
1906
1907 - def __or__(self, other ):
1908 """ 1909 Implementation of | operator - returns C{L{MatchFirst}} 1910 """ 1911 if isinstance( other, basestring ): 1912 other = ParserElement._literalStringClass( other ) 1913 if not isinstance( other, ParserElement ): 1914 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1915 SyntaxWarning, stacklevel=2) 1916 return None 1917 return MatchFirst( [ self, other ] )
1918
1919 - def __ror__(self, other ):
1920 """ 1921 Implementation of | operator when left operand is not a C{L{ParserElement}} 1922 """ 1923 if isinstance( other, basestring ): 1924 other = ParserElement._literalStringClass( other ) 1925 if not isinstance( other, ParserElement ): 1926 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1927 SyntaxWarning, stacklevel=2) 1928 return None 1929 return other | self
1930
1931 - def __xor__(self, other ):
1932 """ 1933 Implementation of ^ operator - returns C{L{Or}} 1934 """ 1935 if isinstance( other, basestring ): 1936 other = ParserElement._literalStringClass( other ) 1937 if not isinstance( other, ParserElement ): 1938 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1939 SyntaxWarning, stacklevel=2) 1940 return None 1941 return Or( [ self, other ] )
1942
1943 - def __rxor__(self, other ):
1944 """ 1945 Implementation of ^ operator when left operand is not a C{L{ParserElement}} 1946 """ 1947 if isinstance( other, basestring ): 1948 other = ParserElement._literalStringClass( other ) 1949 if not isinstance( other, ParserElement ): 1950 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1951 SyntaxWarning, stacklevel=2) 1952 return None 1953 return other ^ self
1954
1955 - def __and__(self, other ):
1956 """ 1957 Implementation of & operator - returns C{L{Each}} 1958 """ 1959 if isinstance( other, basestring ): 1960 other = ParserElement._literalStringClass( other ) 1961 if not isinstance( other, ParserElement ): 1962 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1963 SyntaxWarning, stacklevel=2) 1964 return None 1965 return Each( [ self, other ] )
1966
1967 - def __rand__(self, other ):
1968 """ 1969 Implementation of & operator when left operand is not a C{L{ParserElement}} 1970 """ 1971 if isinstance( other, basestring ): 1972 other = ParserElement._literalStringClass( other ) 1973 if not isinstance( other, ParserElement ): 1974 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1975 SyntaxWarning, stacklevel=2) 1976 return None 1977 return other & self
1978
1979 - def __invert__( self ):
1980 """ 1981 Implementation of ~ operator - returns C{L{NotAny}} 1982 """ 1983 return NotAny( self )
1984
1985 - def __call__(self, name=None):
1986 """ 1987 Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. 1988 1989 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1990 passed as C{True}. 1991 1992 If C{name} is omitted, same as calling C{L{copy}}. 1993 1994 Example:: 1995 # these are equivalent 1996 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1997 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1998 """ 1999 if name is not None: 2000 return self.setResultsName(name) 2001 else: 2002 return self.copy()
2003
2004 - def suppress( self ):
2005 """ 2006 Suppresses the output of this C{ParserElement}; useful to keep punctuation from 2007 cluttering up returned output. 2008 """ 2009 return Suppress( self )
2010
2011 - def leaveWhitespace( self ):
2012 """ 2013 Disables the skipping of whitespace before matching the characters in the 2014 C{ParserElement}'s defined pattern. This is normally only used internally by 2015 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 2016 """ 2017 self.skipWhitespace = False 2018 return self
2019
2020 - def setWhitespaceChars( self, chars ):
2021 """ 2022 Overrides the default whitespace chars 2023 """ 2024 self.skipWhitespace = True 2025 self.whiteChars = chars 2026 self.copyDefaultWhiteChars = False 2027 return self
2028
2029 - def parseWithTabs( self ):
2030 """ 2031 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 2032 Must be called before C{parseString} when the input grammar contains elements that 2033 match C{<TAB>} characters. 2034 """ 2035 self.keepTabs = True 2036 return self
2037
2038 - def ignore( self, other ):
2039 """ 2040 Define expression to be ignored (e.g., comments) while doing pattern 2041 matching; may be called repeatedly, to define multiple comment or other 2042 ignorable patterns. 2043 2044 Example:: 2045 patt = OneOrMore(Word(alphas)) 2046 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] 2047 2048 patt.ignore(cStyleComment) 2049 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] 2050 """ 2051 if isinstance(other, basestring): 2052 other = Suppress(other) 2053 2054 if isinstance( other, Suppress ): 2055 if other not in self.ignoreExprs: 2056 self.ignoreExprs.append(other) 2057 else: 2058 self.ignoreExprs.append( Suppress( other.copy() ) ) 2059 return self
2060
2061 - def setDebugActions( self, startAction, successAction, exceptionAction ):
2062 """ 2063 Enable display of debugging messages while doing pattern matching. 2064 """ 2065 self.debugActions = (startAction or _defaultStartDebugAction, 2066 successAction or _defaultSuccessDebugAction, 2067 exceptionAction or _defaultExceptionDebugAction) 2068 self.debug = True 2069 return self
2070
2071 - def setDebug( self, flag=True ):
2072 """ 2073 Enable display of debugging messages while doing pattern matching. 2074 Set C{flag} to True to enable, False to disable. 2075 2076 Example:: 2077 wd = Word(alphas).setName("alphaword") 2078 integer = Word(nums).setName("numword") 2079 term = wd | integer 2080 2081 # turn on debugging for wd 2082 wd.setDebug() 2083 2084 OneOrMore(term).parseString("abc 123 xyz 890") 2085 2086 prints:: 2087 Match alphaword at loc 0(1,1) 2088 Matched alphaword -> ['abc'] 2089 Match alphaword at loc 3(1,4) 2090 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 2091 Match alphaword at loc 7(1,8) 2092 Matched alphaword -> ['xyz'] 2093 Match alphaword at loc 11(1,12) 2094 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 2095 Match alphaword at loc 15(1,16) 2096 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 2097 2098 The output shown is that produced by the default debug actions - custom debug actions can be 2099 specified using L{setDebugActions}. Prior to attempting 2100 to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"} 2101 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"} 2102 message is shown. Also note the use of L{setName} to assign a human-readable name to the expression, 2103 which makes debugging and exception messages easier to understand - for instance, the default 2104 name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. 2105 """ 2106 if flag: 2107 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 2108 else: 2109 self.debug = False 2110 return self
2111
2112 - def __str__( self ):
2113 return self.name
2114
2115 - def __repr__( self ):
2116 return _ustr(self)
2117
2118 - def streamline( self ):
2119 self.streamlined = True 2120 self.strRepr = None 2121 return self
2122
2123 - def checkRecursion( self, parseElementList ):
2124 pass
2125
2126 - def validate( self, validateTrace=[] ):
2127 """ 2128 Check defined expressions for valid structure, check for infinite recursive definitions. 2129 """ 2130 self.checkRecursion( [] )
2131
2132 - def parseFile( self, file_or_filename, parseAll=False ):
2133 """ 2134 Execute the parse expression on the given file or filename. 2135 If a filename is specified (instead of a file object), 2136 the entire file is opened, read, and closed before parsing. 2137 """ 2138 try: 2139 file_contents = file_or_filename.read() 2140 except AttributeError: 2141 with open(file_or_filename, "r") as f: 2142 file_contents = f.read() 2143 try: 2144 return self.parseString(file_contents, parseAll) 2145 except ParseBaseException as exc: 2146 if ParserElement.verbose_stacktrace: 2147 raise 2148 else: 2149 # catch and re-raise exception from here, clears out pyparsing internal stack trace 2150 raise exc
2151
2152 - def __eq__(self,other):
2153 if isinstance(other, ParserElement): 2154 return self is other or vars(self) == vars(other) 2155 elif isinstance(other, basestring): 2156 return self.matches(other) 2157 else: 2158 return super(ParserElement,self)==other
2159
2160 - def __ne__(self,other):
2161 return not (self == other)
2162
2163 - def __hash__(self):
2164 return hash(id(self))
2165
2166 - def __req__(self,other):
2167 return self == other
2168
2169 - def __rne__(self,other):
2170 return not (self == other)
2171
2172 - def matches(self, testString, parseAll=True):
2173 """ 2174 Method for quick testing of a parser against a test string. Good for simple 2175 inline microtests of sub expressions while building up larger parser.0 2176 2177 Parameters: 2178 - testString - to test against this expression for a match 2179 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2180 2181 Example:: 2182 expr = Word(nums) 2183 assert expr.matches("100") 2184 """ 2185 try: 2186 self.parseString(_ustr(testString), parseAll=parseAll) 2187 return True 2188 except ParseBaseException: 2189 return False
2190
2191 - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2192 """ 2193 Execute the parse expression on a series of test strings, showing each 2194 test, the parsed results or where the parse failed. Quick and easy way to 2195 run a parse expression against a list of sample strings. 2196 2197 Parameters: 2198 - tests - a list of separate test strings, or a multiline string of test strings 2199 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2200 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test 2201 string; pass None to disable comment filtering 2202 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; 2203 if False, only dump nested list 2204 - printResults - (default=C{True}) prints test output to stdout 2205 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing 2206 2207 Returns: a (success, results) tuple, where success indicates that all tests succeeded 2208 (or failed if C{failureTests} is True), and the results contain a list of lines of each 2209 test's output 2210 2211 Example:: 2212 number_expr = pyparsing_common.number.copy() 2213 2214 result = number_expr.runTests(''' 2215 # unsigned integer 2216 100 2217 # negative integer 2218 -100 2219 # float with scientific notation 2220 6.02e23 2221 # integer with scientific notation 2222 1e-12 2223 ''') 2224 print("Success" if result[0] else "Failed!") 2225 2226 result = number_expr.runTests(''' 2227 # stray character 2228 100Z 2229 # missing leading digit before '.' 2230 -.100 2231 # too many '.' 2232 3.14.159 2233 ''', failureTests=True) 2234 print("Success" if result[0] else "Failed!") 2235 prints:: 2236 # unsigned integer 2237 100 2238 [100] 2239 2240 # negative integer 2241 -100 2242 [-100] 2243 2244 # float with scientific notation 2245 6.02e23 2246 [6.02e+23] 2247 2248 # integer with scientific notation 2249 1e-12 2250 [1e-12] 2251 2252 Success 2253 2254 # stray character 2255 100Z 2256 ^ 2257 FAIL: Expected end of text (at char 3), (line:1, col:4) 2258 2259 # missing leading digit before '.' 2260 -.100 2261 ^ 2262 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 2263 2264 # too many '.' 2265 3.14.159 2266 ^ 2267 FAIL: Expected end of text (at char 4), (line:1, col:5) 2268 2269 Success 2270 """ 2271 if isinstance(tests, basestring): 2272 tests = list(map(str.strip, tests.rstrip().splitlines())) 2273 if isinstance(comment, basestring): 2274 comment = Literal(comment) 2275 allResults = [] 2276 comments = [] 2277 success = True 2278 for t in tests: 2279 if comment is not None and comment.matches(t, False) or comments and not t: 2280 comments.append(t) 2281 continue 2282 if not t: 2283 continue 2284 out = ['\n'.join(comments), t] 2285 comments = [] 2286 try: 2287 result = self.parseString(t, parseAll=parseAll) 2288 out.append(result.dump(full=fullDump)) 2289 success = success and not failureTests 2290 except ParseBaseException as pe: 2291 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 2292 if '\n' in t: 2293 out.append(line(pe.loc, t)) 2294 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) 2295 else: 2296 out.append(' '*pe.loc + '^' + fatal) 2297 out.append("FAIL: " + str(pe)) 2298 success = success and failureTests 2299 result = pe 2300 except Exception as exc: 2301 out.append("FAIL-EXCEPTION: " + str(exc)) 2302 success = success and failureTests 2303 result = exc 2304 2305 if printResults: 2306 if fullDump: 2307 out.append('') 2308 print('\n'.join(out)) 2309 2310 allResults.append((t, result)) 2311 2312 return success, allResults
2313
2314 2315 -class Token(ParserElement):
2316 """ 2317 Abstract C{ParserElement} subclass, for defining atomic matching patterns. 2318 """
2319 - def __init__( self ):
2320 super(Token,self).__init__( savelist=False )
2321
2322 2323 -class Empty(Token):
2324 """ 2325 An empty token, will always match. 2326 """
2327 - def __init__( self ):
2328 super(Empty,self).__init__() 2329 self.name = "Empty" 2330 self.mayReturnEmpty = True 2331 self.mayIndexError = False
2332
2333 2334 -class NoMatch(Token):
2335 """ 2336 A token that will never match. 2337 """
2338 - def __init__( self ):
2339 super(NoMatch,self).__init__() 2340 self.name = "NoMatch" 2341 self.mayReturnEmpty = True 2342 self.mayIndexError = False 2343 self.errmsg = "Unmatchable token"
2344
2345 - def parseImpl( self, instring, loc, doActions=True ):
2346 raise ParseException(instring, loc, self.errmsg, self)
2347
2348 2349 -class Literal(Token):
2350 """ 2351 Token to exactly match a specified string. 2352 2353 Example:: 2354 Literal('blah').parseString('blah') # -> ['blah'] 2355 Literal('blah').parseString('blahfooblah') # -> ['blah'] 2356 Literal('blah').parseString('bla') # -> Exception: Expected "blah" 2357 2358 For case-insensitive matching, use L{CaselessLiteral}. 2359 2360 For keyword matching (force word break before and after the matched string), 2361 use L{Keyword} or L{CaselessKeyword}. 2362 """
2363 - def __init__( self, matchString ):
2364 super(Literal,self).__init__() 2365 self.match = matchString 2366 self.matchLen = len(matchString) 2367 try: 2368 self.firstMatchChar = matchString[0] 2369 except IndexError: 2370 warnings.warn("null string passed to Literal; use Empty() instead", 2371 SyntaxWarning, stacklevel=2) 2372 self.__class__ = Empty 2373 self.name = '"%s"' % _ustr(self.match) 2374 self.errmsg = "Expected " + self.name 2375 self.mayReturnEmpty = False 2376 self.mayIndexError = False
2377 2378 # Performance tuning: this routine gets called a *lot* 2379 # if this is a single character match string and the first character matches, 2380 # short-circuit as quickly as possible, and avoid calling startswith 2381 #~ @profile
2382 - def parseImpl( self, instring, loc, doActions=True ):
2383 if (instring[loc] == self.firstMatchChar and 2384 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 2385 return loc+self.matchLen, self.match 2386 raise ParseException(instring, loc, self.errmsg, self)
2387 _L = Literal 2388 ParserElement._literalStringClass = Literal
2389 2390 -class Keyword(Token):
2391 """ 2392 Token to exactly match a specified string as a keyword, that is, it must be 2393 immediately followed by a non-keyword character. Compare with C{L{Literal}}: 2394 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. 2395 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 2396 Accepts two optional constructor arguments in addition to the keyword string: 2397 - C{identChars} is a string of characters that would be valid identifier characters, 2398 defaulting to all alphanumerics + "_" and "$" 2399 - C{caseless} allows case-insensitive matching, default is C{False}. 2400 2401 Example:: 2402 Keyword("start").parseString("start") # -> ['start'] 2403 Keyword("start").parseString("starting") # -> Exception 2404 2405 For case-insensitive matching, use L{CaselessKeyword}. 2406 """ 2407 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 2408
2409 - def __init__( self, matchString, identChars=None, caseless=False ):
2410 super(Keyword,self).__init__() 2411 if identChars is None: 2412 identChars = Keyword.DEFAULT_KEYWORD_CHARS 2413 self.match = matchString 2414 self.matchLen = len(matchString) 2415 try: 2416 self.firstMatchChar = matchString[0] 2417 except IndexError: 2418 warnings.warn("null string passed to Keyword; use Empty() instead", 2419 SyntaxWarning, stacklevel=2) 2420 self.name = '"%s"' % self.match 2421 self.errmsg = "Expected " + self.name 2422 self.mayReturnEmpty = False 2423 self.mayIndexError = False 2424 self.caseless = caseless 2425 if caseless: 2426 self.caselessmatch = matchString.upper() 2427 identChars = identChars.upper() 2428 self.identChars = set(identChars)
2429
2430 - def parseImpl( self, instring, loc, doActions=True ):
2431 if self.caseless: 2432 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2433 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 2434 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 2435 return loc+self.matchLen, self.match 2436 else: 2437 if (instring[loc] == self.firstMatchChar and 2438 (self.matchLen==1 or instring.startswith(self.match,loc)) and 2439 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 2440 (loc == 0 or instring[loc-1] not in self.identChars) ): 2441 return loc+self.matchLen, self.match 2442 raise ParseException(instring, loc, self.errmsg, self)
2443
2444 - def copy(self):
2445 c = super(Keyword,self).copy() 2446 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 2447 return c
2448 2449 @staticmethod
2450 - def setDefaultKeywordChars( chars ):
2451 """Overrides the default Keyword chars 2452 """ 2453 Keyword.DEFAULT_KEYWORD_CHARS = chars
2454
2455 -class CaselessLiteral(Literal):
2456 """ 2457 Token to match a specified string, ignoring case of letters. 2458 Note: the matched results will always be in the case of the given 2459 match string, NOT the case of the input text. 2460 2461 Example:: 2462 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] 2463 2464 (Contrast with example for L{CaselessKeyword}.) 2465 """
2466 - def __init__( self, matchString ):
2467 super(CaselessLiteral,self).__init__( matchString.upper() ) 2468 # Preserve the defining literal. 2469 self.returnString = matchString 2470 self.name = "'%s'" % self.returnString 2471 self.errmsg = "Expected " + self.name
2472
2473 - def parseImpl( self, instring, loc, doActions=True ):
2474 if instring[ loc:loc+self.matchLen ].upper() == self.match: 2475 return loc+self.matchLen, self.returnString 2476 raise ParseException(instring, loc, self.errmsg, self)
2477
2478 -class CaselessKeyword(Keyword):
2479 """ 2480 Caseless version of L{Keyword}. 2481 2482 Example:: 2483 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] 2484 2485 (Contrast with example for L{CaselessLiteral}.) 2486 """
2487 - def __init__( self, matchString, identChars=None ):
2488 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
2489
2490 - def parseImpl( self, instring, loc, doActions=True ):
2491 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2492 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 2493 return loc+self.matchLen, self.match 2494 raise ParseException(instring, loc, self.errmsg, self)
2495
2496 -class CloseMatch(Token):
2497 """ 2498 A variation on L{Literal} which matches "close" matches, that is, 2499 strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters: 2500 - C{match_string} - string to be matched 2501 - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match 2502 2503 The results from a successful parse will contain the matched text from the input string and the following named results: 2504 - C{mismatches} - a list of the positions within the match_string where mismatches were found 2505 - C{original} - the original match_string used to compare against the input string 2506 2507 If C{mismatches} is an empty list, then the match was an exact match. 2508 2509 Example:: 2510 patt = CloseMatch("ATCATCGAATGGA") 2511 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 2512 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 2513 2514 # exact match 2515 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 2516 2517 # close match allowing up to 2 mismatches 2518 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) 2519 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 2520 """
2521 - def __init__(self, match_string, maxMismatches=1):
2522 super(CloseMatch,self).__init__() 2523 self.name = match_string 2524 self.match_string = match_string 2525 self.maxMismatches = maxMismatches 2526 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) 2527 self.mayIndexError = False 2528 self.mayReturnEmpty = False
2529
2530 - def parseImpl( self, instring, loc, doActions=True ):
2531 start = loc 2532 instrlen = len(instring) 2533 maxloc = start + len(self.match_string) 2534 2535 if maxloc <= instrlen: 2536 match_string = self.match_string 2537 match_stringloc = 0 2538 mismatches = [] 2539 maxMismatches = self.maxMismatches 2540 2541 for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)): 2542 src,mat = s_m 2543 if src != mat: 2544 mismatches.append(match_stringloc) 2545 if len(mismatches) > maxMismatches: 2546 break 2547 else: 2548 loc = match_stringloc + 1 2549 results = ParseResults([instring[start:loc]]) 2550 results['original'] = self.match_string 2551 results['mismatches'] = mismatches 2552 return loc, results 2553 2554 raise ParseException(instring, loc, self.errmsg, self)
2555
2556 2557 -class Word(Token):
2558 """ 2559 Token for matching words composed of allowed character sets. 2560 Defined with string containing all allowed initial characters, 2561 an optional string containing allowed body characters (if omitted, 2562 defaults to the initial character set), and an optional minimum, 2563 maximum, and/or exact length. The default value for C{min} is 1 (a 2564 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2565 are 0, meaning no maximum or exact length restriction. An optional 2566 C{excludeChars} parameter can list characters that might be found in 2567 the input C{bodyChars} string; useful to define a word of all printables 2568 except for one or two characters, for instance. 2569 2570 L{srange} is useful for defining custom character set strings for defining 2571 C{Word} expressions, using range notation from regular expression character sets. 2572 2573 A common mistake is to use C{Word} to match a specific literal string, as in 2574 C{Word("Address")}. Remember that C{Word} uses the string argument to define 2575 I{sets} of matchable characters. This expression would match "Add", "AAA", 2576 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. 2577 To match an exact literal string, use L{Literal} or L{Keyword}. 2578 2579 pyparsing includes helper strings for building Words: 2580 - L{alphas} 2581 - L{nums} 2582 - L{alphanums} 2583 - L{hexnums} 2584 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.) 2585 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.) 2586 - L{printables} (any non-whitespace character) 2587 2588 Example:: 2589 # a word composed of digits 2590 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 2591 2592 # a word with a leading capital, and zero or more lowercase 2593 capital_word = Word(alphas.upper(), alphas.lower()) 2594 2595 # hostnames are alphanumeric, with leading alpha, and '-' 2596 hostname = Word(alphas, alphanums+'-') 2597 2598 # roman numeral (not a strict parser, accepts invalid mix of characters) 2599 roman = Word("IVXLCDM") 2600 2601 # any string of non-whitespace characters, except for ',' 2602 csv_value = Word(printables, excludeChars=",") 2603 """
2604 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2605 super(Word,self).__init__() 2606 if excludeChars: 2607 initChars = ''.join(c for c in initChars if c not in excludeChars) 2608 if bodyChars: 2609 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 2610 self.initCharsOrig = initChars 2611 self.initChars = set(initChars) 2612 if bodyChars : 2613 self.bodyCharsOrig = bodyChars 2614 self.bodyChars = set(bodyChars) 2615 else: 2616 self.bodyCharsOrig = initChars 2617 self.bodyChars = set(initChars) 2618 2619 self.maxSpecified = max > 0 2620 2621 if min < 1: 2622 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 2623 2624 self.minLen = min 2625 2626 if max > 0: 2627 self.maxLen = max 2628 else: 2629 self.maxLen = _MAX_INT 2630 2631 if exact > 0: 2632 self.maxLen = exact 2633 self.minLen = exact 2634 2635 self.name = _ustr(self) 2636 self.errmsg = "Expected " + self.name 2637 self.mayIndexError = False 2638 self.asKeyword = asKeyword 2639 2640 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 2641 if self.bodyCharsOrig == self.initCharsOrig: 2642 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 2643 elif len(self.initCharsOrig) == 1: 2644 self.reString = "%s[%s]*" % \ 2645 (re.escape(self.initCharsOrig), 2646 _escapeRegexRangeChars(self.bodyCharsOrig),) 2647 else: 2648 self.reString = "[%s][%s]*" % \ 2649 (_escapeRegexRangeChars(self.initCharsOrig), 2650 _escapeRegexRangeChars(self.bodyCharsOrig),) 2651 if self.asKeyword: 2652 self.reString = r"\b"+self.reString+r"\b" 2653 try: 2654 self.re = re.compile( self.reString ) 2655 except: 2656 self.re = None
2657
2658 - def parseImpl( self, instring, loc, doActions=True ):
2659 if self.re: 2660 result = self.re.match(instring,loc) 2661 if not result: 2662 raise ParseException(instring, loc, self.errmsg, self) 2663 2664 loc = result.end() 2665 return loc, result.group() 2666 2667 if not(instring[ loc ] in self.initChars): 2668 raise ParseException(instring, loc, self.errmsg, self) 2669 2670 start = loc 2671 loc += 1 2672 instrlen = len(instring) 2673 bodychars = self.bodyChars 2674 maxloc = start + self.maxLen 2675 maxloc = min( maxloc, instrlen ) 2676 while loc < maxloc and instring[loc] in bodychars: 2677 loc += 1 2678 2679 throwException = False 2680 if loc - start < self.minLen: 2681 throwException = True 2682 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 2683 throwException = True 2684 if self.asKeyword: 2685 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 2686 throwException = True 2687 2688 if throwException: 2689 raise ParseException(instring, loc, self.errmsg, self) 2690 2691 return loc, instring[start:loc]
2692
2693 - def __str__( self ):
2694 try: 2695 return super(Word,self).__str__() 2696 except: 2697 pass 2698 2699 2700 if self.strRepr is None: 2701 2702 def charsAsStr(s): 2703 if len(s)>4: 2704 return s[:4]+"..." 2705 else: 2706 return s
2707 2708 if ( self.initCharsOrig != self.bodyCharsOrig ): 2709 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 2710 else: 2711 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 2712 2713 return self.strRepr
2714
2715 2716 -class Regex(Token):
2717 """ 2718 Token for matching strings that match a given regular expression. 2719 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 2720 If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as 2721 named parse results. 2722 2723 Example:: 2724 realnum = Regex(r"[+-]?\d+\.\d*") 2725 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 2726 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 2727 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 2728 """ 2729 compiledREtype = type(re.compile("[A-Z]"))
2730 - def __init__( self, pattern, flags=0):
2731 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 2732 super(Regex,self).__init__() 2733 2734 if isinstance(pattern, basestring): 2735 if not pattern: 2736 warnings.warn("null string passed to Regex; use Empty() instead", 2737 SyntaxWarning, stacklevel=2) 2738 2739 self.pattern = pattern 2740 self.flags = flags 2741 2742 try: 2743 self.re = re.compile(self.pattern, self.flags) 2744 self.reString = self.pattern 2745 except sre_constants.error: 2746 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 2747 SyntaxWarning, stacklevel=2) 2748 raise 2749 2750 elif isinstance(pattern, Regex.compiledREtype): 2751 self.re = pattern 2752 self.pattern = \ 2753 self.reString = str(pattern) 2754 self.flags = flags 2755 2756 else: 2757 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 2758 2759 self.name = _ustr(self) 2760 self.errmsg = "Expected " + self.name 2761 self.mayIndexError = False 2762 self.mayReturnEmpty = True
2763
2764 - def parseImpl( self, instring, loc, doActions=True ):
2765 result = self.re.match(instring,loc) 2766 if not result: 2767 raise ParseException(instring, loc, self.errmsg, self) 2768 2769 loc = result.end() 2770 d = result.groupdict() 2771 ret = ParseResults(result.group()) 2772 if d: 2773 for k in d: 2774 ret[k] = d[k] 2775 return loc,ret
2776
2777 - def __str__( self ):
2778 try: 2779 return super(Regex,self).__str__() 2780 except: 2781 pass 2782 2783 if self.strRepr is None: 2784 self.strRepr = "Re:(%s)" % repr(self.pattern) 2785 2786 return self.strRepr
2787
2788 2789 -class QuotedString(Token):
2790 r""" 2791 Token for matching strings that are delimited by quoting characters. 2792 2793 Defined with the following parameters: 2794 - quoteChar - string of one or more characters defining the quote delimiting string 2795 - escChar - character to escape quotes, typically backslash (default=C{None}) 2796 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None}) 2797 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 2798 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 2799 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 2800 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 2801 2802 Example:: 2803 qs = QuotedString('"') 2804 print(qs.searchString('lsjdf "This is the quote" sldjf')) 2805 complex_qs = QuotedString('{{', endQuoteChar='}}') 2806 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) 2807 sql_qs = QuotedString('"', escQuote='""') 2808 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 2809 prints:: 2810 [['This is the quote']] 2811 [['This is the "quote"']] 2812 [['This is the quote with "embedded" quotes']] 2813 """
2814 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2815 super(QuotedString,self).__init__() 2816 2817 # remove white space from quote chars - wont work anyway 2818 quoteChar = quoteChar.strip() 2819 if not quoteChar: 2820 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2821 raise SyntaxError() 2822 2823 if endQuoteChar is None: 2824 endQuoteChar = quoteChar 2825 else: 2826 endQuoteChar = endQuoteChar.strip() 2827 if not endQuoteChar: 2828 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2829 raise SyntaxError() 2830 2831 self.quoteChar = quoteChar 2832 self.quoteCharLen = len(quoteChar) 2833 self.firstQuoteChar = quoteChar[0] 2834 self.endQuoteChar = endQuoteChar 2835 self.endQuoteCharLen = len(endQuoteChar) 2836 self.escChar = escChar 2837 self.escQuote = escQuote 2838 self.unquoteResults = unquoteResults 2839 self.convertWhitespaceEscapes = convertWhitespaceEscapes 2840 2841 if multiline: 2842 self.flags = re.MULTILINE | re.DOTALL 2843 self.pattern = r'%s(?:[^%s%s]' % \ 2844 ( re.escape(self.quoteChar), 2845 _escapeRegexRangeChars(self.endQuoteChar[0]), 2846 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2847 else: 2848 self.flags = 0 2849 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 2850 ( re.escape(self.quoteChar), 2851 _escapeRegexRangeChars(self.endQuoteChar[0]), 2852 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2853 if len(self.endQuoteChar) > 1: 2854 self.pattern += ( 2855 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 2856 _escapeRegexRangeChars(self.endQuoteChar[i])) 2857 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 2858 ) 2859 if escQuote: 2860 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 2861 if escChar: 2862 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 2863 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 2864 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 2865 2866 try: 2867 self.re = re.compile(self.pattern, self.flags) 2868 self.reString = self.pattern 2869 except sre_constants.error: 2870 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 2871 SyntaxWarning, stacklevel=2) 2872 raise 2873 2874 self.name = _ustr(self) 2875 self.errmsg = "Expected " + self.name 2876 self.mayIndexError = False 2877 self.mayReturnEmpty = True
2878
2879 - def parseImpl( self, instring, loc, doActions=True ):
2880 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2881 if not result: 2882 raise ParseException(instring, loc, self.errmsg, self) 2883 2884 loc = result.end() 2885 ret = result.group() 2886 2887 if self.unquoteResults: 2888 2889 # strip off quotes 2890 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 2891 2892 if isinstance(ret,basestring): 2893 # replace escaped whitespace 2894 if '\\' in ret and self.convertWhitespaceEscapes: 2895 ws_map = { 2896 r'\t' : '\t', 2897 r'\n' : '\n', 2898 r'\f' : '\f', 2899 r'\r' : '\r', 2900 } 2901 for wslit,wschar in ws_map.items(): 2902 ret = ret.replace(wslit, wschar) 2903 2904 # replace escaped characters 2905 if self.escChar: 2906 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 2907 2908 # replace escaped quotes 2909 if self.escQuote: 2910 ret = ret.replace(self.escQuote, self.endQuoteChar) 2911 2912 return loc, ret
2913
2914 - def __str__( self ):
2915 try: 2916 return super(QuotedString,self).__str__() 2917 except: 2918 pass 2919 2920 if self.strRepr is None: 2921 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2922 2923 return self.strRepr
2924
2925 2926 -class CharsNotIn(Token):
2927 """ 2928 Token for matching words composed of characters I{not} in a given set (will 2929 include whitespace in matched characters if not listed in the provided exclusion set - see example). 2930 Defined with string containing all disallowed characters, and an optional 2931 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2932 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2933 are 0, meaning no maximum or exact length restriction. 2934 2935 Example:: 2936 # define a comma-separated-value as anything that is not a ',' 2937 csv_value = CharsNotIn(',') 2938 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) 2939 prints:: 2940 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 2941 """
2942 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2943 super(CharsNotIn,self).__init__() 2944 self.skipWhitespace = False 2945 self.notChars = notChars 2946 2947 if min < 1: 2948 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2949 2950 self.minLen = min 2951 2952 if max > 0: 2953 self.maxLen = max 2954 else: 2955 self.maxLen = _MAX_INT 2956 2957 if exact > 0: 2958 self.maxLen = exact 2959 self.minLen = exact 2960 2961 self.name = _ustr(self) 2962 self.errmsg = "Expected " + self.name 2963 self.mayReturnEmpty = ( self.minLen == 0 ) 2964 self.mayIndexError = False
2965
2966 - def parseImpl( self, instring, loc, doActions=True ):
2967 if instring[loc] in self.notChars: 2968 raise ParseException(instring, loc, self.errmsg, self) 2969 2970 start = loc 2971 loc += 1 2972 notchars = self.notChars 2973 maxlen = min( start+self.maxLen, len(instring) ) 2974 while loc < maxlen and \ 2975 (instring[loc] not in notchars): 2976 loc += 1 2977 2978 if loc - start < self.minLen: 2979 raise ParseException(instring, loc, self.errmsg, self) 2980 2981 return loc, instring[start:loc]
2982
2983 - def __str__( self ):
2984 try: 2985 return super(CharsNotIn, self).__str__() 2986 except: 2987 pass 2988 2989 if self.strRepr is None: 2990 if len(self.notChars) > 4: 2991 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2992 else: 2993 self.strRepr = "!W:(%s)" % self.notChars 2994 2995 return self.strRepr
2996
2997 -class White(Token):
2998 """ 2999 Special matching class for matching whitespace. Normally, whitespace is ignored 3000 by pyparsing grammars. This class is included when some whitespace structures 3001 are significant. Define with a string containing the whitespace characters to be 3002 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 3003 as defined for the C{L{Word}} class. 3004 """ 3005 whiteStrs = { 3006 " " : "<SPC>", 3007 "\t": "<TAB>", 3008 "\n": "<LF>", 3009 "\r": "<CR>", 3010 "\f": "<FF>", 3011 }
3012 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3013 super(White,self).__init__() 3014 self.matchWhite = ws 3015 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 3016 #~ self.leaveWhitespace() 3017 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 3018 self.mayReturnEmpty = True 3019 self.errmsg = "Expected " + self.name 3020 3021 self.minLen = min 3022 3023 if max > 0: 3024 self.maxLen = max 3025 else: 3026 self.maxLen = _MAX_INT 3027 3028 if exact > 0: 3029 self.maxLen = exact 3030 self.minLen = exact
3031
3032 - def parseImpl( self, instring, loc, doActions=True ):
3033 if not(instring[ loc ] in self.matchWhite): 3034 raise ParseException(instring, loc, self.errmsg, self) 3035 start = loc 3036 loc += 1 3037 maxloc = start + self.maxLen 3038 maxloc = min( maxloc, len(instring) ) 3039 while loc < maxloc and instring[loc] in self.matchWhite: 3040 loc += 1 3041 3042 if loc - start < self.minLen: 3043 raise ParseException(instring, loc, self.errmsg, self) 3044 3045 return loc, instring[start:loc]
3046
3047 3048 -class _PositionToken(Token):
3049 - def __init__( self ):
3050 super(_PositionToken,self).__init__() 3051 self.name=self.__class__.__name__ 3052 self.mayReturnEmpty = True 3053 self.mayIndexError = False
3054
3055 -class GoToColumn(_PositionToken):
3056 """ 3057 Token to advance to a specific column of input text; useful for tabular report scraping. 3058 """
3059 - def __init__( self, colno ):
3060 super(GoToColumn,self).__init__() 3061 self.col = colno
3062
3063 - def preParse( self, instring, loc ):
3064 if col(loc,instring) != self.col: 3065 instrlen = len(instring) 3066 if self.ignoreExprs: 3067 loc = self._skipIgnorables( instring, loc ) 3068 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 3069 loc += 1 3070 return loc
3071
3072 - def parseImpl( self, instring, loc, doActions=True ):
3073 thiscol = col( loc, instring ) 3074 if thiscol > self.col: 3075 raise ParseException( instring, loc, "Text not in expected column", self ) 3076 newloc = loc + self.col - thiscol 3077 ret = instring[ loc: newloc ] 3078 return newloc, ret
3079
3080 -class LineStart(_PositionToken):
3081 """ 3082 Matches if current position is at the beginning of a line within the parse string 3083 """
3084 - def __init__( self ):
3085 super(LineStart,self).__init__() 3086 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 3087 self.errmsg = "Expected start of line"
3088
3089 - def preParse( self, instring, loc ):
3090 preloc = super(LineStart,self).preParse(instring,loc) 3091 if instring[preloc] == "\n": 3092 loc += 1 3093 return loc
3094
3095 - def parseImpl( self, instring, loc, doActions=True ):
3096 if not( loc==0 or 3097 (loc == self.preParse( instring, 0 )) or 3098 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 3099 raise ParseException(instring, loc, self.errmsg, self) 3100 return loc, []
3101
3102 -class LineEnd(_PositionToken):
3103 """ 3104 Matches if current position is at the end of a line within the parse string 3105 """
3106 - def __init__( self ):
3107 super(LineEnd,self).__init__() 3108 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 3109 self.errmsg = "Expected end of line"
3110
3111 - def parseImpl( self, instring, loc, doActions=True ):
3112 if loc<len(instring): 3113 if instring[loc] == "\n": 3114 return loc+1, "\n" 3115 else: 3116 raise ParseException(instring, loc, self.errmsg, self) 3117 elif loc == len(instring): 3118 return loc+1, [] 3119 else: 3120 raise ParseException(instring, loc, self.errmsg, self)
3121
3122 -class StringStart(_PositionToken):
3123 """ 3124 Matches if current position is at the beginning of the parse string 3125 """
3126 - def __init__( self ):
3127 super(StringStart,self).__init__() 3128 self.errmsg = "Expected start of text"
3129
3130 - def parseImpl( self, instring, loc, doActions=True ):
3131 if loc != 0: 3132 # see if entire string up to here is just whitespace and ignoreables 3133 if loc != self.preParse( instring, 0 ): 3134 raise ParseException(instring, loc, self.errmsg, self) 3135 return loc, []
3136
3137 -class StringEnd(_PositionToken):
3138 """ 3139 Matches if current position is at the end of the parse string 3140 """
3141 - def __init__( self ):
3142 super(StringEnd,self).__init__() 3143 self.errmsg = "Expected end of text"
3144
3145 - def parseImpl( self, instring, loc, doActions=True ):
3146 if loc < len(instring): 3147 raise ParseException(instring, loc, self.errmsg, self) 3148 elif loc == len(instring): 3149 return loc+1, [] 3150 elif loc > len(instring): 3151 return loc, [] 3152 else: 3153 raise ParseException(instring, loc, self.errmsg, self)
3154
3155 -class WordStart(_PositionToken):
3156 """ 3157 Matches if the current position is at the beginning of a Word, and 3158 is not preceded by any character in a given set of C{wordChars} 3159 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3160 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 3161 the string being parsed, or at the beginning of a line. 3162 """
3163 - def __init__(self, wordChars = printables):
3164 super(WordStart,self).__init__() 3165 self.wordChars = set(wordChars) 3166 self.errmsg = "Not at the start of a word"
3167
3168 - def parseImpl(self, instring, loc, doActions=True ):
3169 if loc != 0: 3170 if (instring[loc-1] in self.wordChars or 3171 instring[loc] not in self.wordChars): 3172 raise ParseException(instring, loc, self.errmsg, self) 3173 return loc, []
3174
3175 -class WordEnd(_PositionToken):
3176 """ 3177 Matches if the current position is at the end of a Word, and 3178 is not followed by any character in a given set of C{wordChars} 3179 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3180 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 3181 the string being parsed, or at the end of a line. 3182 """
3183 - def __init__(self, wordChars = printables):
3184 super(WordEnd,self).__init__() 3185 self.wordChars = set(wordChars) 3186 self.skipWhitespace = False 3187 self.errmsg = "Not at the end of a word"
3188
3189 - def parseImpl(self, instring, loc, doActions=True ):
3190 instrlen = len(instring) 3191 if instrlen>0 and loc<instrlen: 3192 if (instring[loc] in self.wordChars or 3193 instring[loc-1] not in self.wordChars): 3194 raise ParseException(instring, loc, self.errmsg, self) 3195 return loc, []
3196
3197 3198 -class ParseExpression(ParserElement):
3199 """ 3200 Abstract subclass of ParserElement, for combining and post-processing parsed tokens. 3201 """
3202 - def __init__( self, exprs, savelist = False ):
3203 super(ParseExpression,self).__init__(savelist) 3204 if isinstance( exprs, _generatorType ): 3205 exprs = list(exprs) 3206 3207 if isinstance( exprs, basestring ): 3208 self.exprs = [ ParserElement._literalStringClass( exprs ) ] 3209 elif isinstance( exprs, collections.Iterable ): 3210 exprs = list(exprs) 3211 # if sequence of strings provided, wrap with Literal 3212 if all(isinstance(expr, basestring) for expr in exprs): 3213 exprs = map(ParserElement._literalStringClass, exprs) 3214 self.exprs = list(exprs) 3215 else: 3216 try: 3217 self.exprs = list( exprs ) 3218 except TypeError: 3219 self.exprs = [ exprs ] 3220 self.callPreparse = False
3221
3222 - def __getitem__( self, i ):
3223 return self.exprs[i]
3224
3225 - def append( self, other ):
3226 self.exprs.append( other ) 3227 self.strRepr = None 3228 return self
3229
3230 - def leaveWhitespace( self ):
3231 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 3232 all contained expressions.""" 3233 self.skipWhitespace = False 3234 self.exprs = [ e.copy() for e in self.exprs ] 3235 for e in self.exprs: 3236 e.leaveWhitespace() 3237 return self
3238
3239 - def ignore( self, other ):
3240 if isinstance( other, Suppress ): 3241 if other not in self.ignoreExprs: 3242 super( ParseExpression, self).ignore( other ) 3243 for e in self.exprs: 3244 e.ignore( self.ignoreExprs[-1] ) 3245 else: 3246 super( ParseExpression, self).ignore( other ) 3247 for e in self.exprs: 3248 e.ignore( self.ignoreExprs[-1] ) 3249 return self
3250
3251 - def __str__( self ):
3252 try: 3253 return super(ParseExpression,self).__str__() 3254 except: 3255 pass 3256 3257 if self.strRepr is None: 3258 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 3259 return self.strRepr
3260
3261 - def streamline( self ):
3262 super(ParseExpression,self).streamline() 3263 3264 for e in self.exprs: 3265 e.streamline() 3266 3267 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 3268 # but only if there are no parse actions or resultsNames on the nested And's 3269 # (likewise for Or's and MatchFirst's) 3270 if ( len(self.exprs) == 2 ): 3271 other = self.exprs[0] 3272 if ( isinstance( other, self.__class__ ) and 3273 not(other.parseAction) and 3274 other.resultsName is None and 3275 not other.debug ): 3276 self.exprs = other.exprs[:] + [ self.exprs[1] ] 3277 self.strRepr = None 3278 self.mayReturnEmpty |= other.mayReturnEmpty 3279 self.mayIndexError |= other.mayIndexError 3280 3281 other = self.exprs[-1] 3282 if ( isinstance( other, self.__class__ ) and 3283 not(other.parseAction) and 3284 other.resultsName is None and 3285 not other.debug ): 3286 self.exprs = self.exprs[:-1] + other.exprs[:] 3287 self.strRepr = None 3288 self.mayReturnEmpty |= other.mayReturnEmpty 3289 self.mayIndexError |= other.mayIndexError 3290 3291 self.errmsg = "Expected " + _ustr(self) 3292 3293 return self
3294
3295 - def setResultsName( self, name, listAllMatches=False ):
3296 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 3297 return ret
3298
3299 - def validate( self, validateTrace=[] ):
3300 tmp = validateTrace[:]+[self] 3301 for e in self.exprs: 3302 e.validate(tmp) 3303 self.checkRecursion( [] )
3304
3305 - def copy(self):
3306 ret = super(ParseExpression,self).copy() 3307 ret.exprs = [e.copy() for e in self.exprs] 3308 return ret
3309
3310 -class And(ParseExpression):
3311 """ 3312 Requires all given C{ParseExpression}s to be found in the given order. 3313 Expressions may be separated by whitespace. 3314 May be constructed using the C{'+'} operator. 3315 May also be constructed using the C{'-'} operator, which will suppress backtracking. 3316 3317 Example:: 3318 integer = Word(nums) 3319 name_expr = OneOrMore(Word(alphas)) 3320 3321 expr = And([integer("id"),name_expr("name"),integer("age")]) 3322 # more easily written as: 3323 expr = integer("id") + name_expr("name") + integer("age") 3324 """ 3325
3326 - class _ErrorStop(Empty):
3327 - def __init__(self, *args, **kwargs):
3328 super(And._ErrorStop,self).__init__(*args, **kwargs) 3329 self.name = '-' 3330 self.leaveWhitespace()
3331
3332 - def __init__( self, exprs, savelist = True ):
3333 super(And,self).__init__(exprs, savelist) 3334 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3335 self.setWhitespaceChars( self.exprs[0].whiteChars ) 3336 self.skipWhitespace = self.exprs[0].skipWhitespace 3337 self.callPreparse = True
3338
3339 - def parseImpl( self, instring, loc, doActions=True ):
3340 # pass False as last arg to _parse for first element, since we already 3341 # pre-parsed the string as part of our And pre-parsing 3342 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 3343 errorStop = False 3344 for e in self.exprs[1:]: 3345 if isinstance(e, And._ErrorStop): 3346 errorStop = True 3347 continue 3348 if errorStop: 3349 try: 3350 loc, exprtokens = e._parse( instring, loc, doActions ) 3351 except ParseSyntaxException: 3352 raise 3353 except ParseBaseException as pe: 3354 pe.__traceback__ = None 3355 raise ParseSyntaxException._from_exception(pe) 3356 except IndexError: 3357 raise ParseSyntaxException(instring, len(instring), self.errmsg, self) 3358 else: 3359 loc, exprtokens = e._parse( instring, loc, doActions ) 3360 if exprtokens or exprtokens.haskeys(): 3361 resultlist += exprtokens 3362 return loc, resultlist
3363
3364 - def __iadd__(self, other ):
3365 if isinstance( other, basestring ): 3366 other = ParserElement._literalStringClass( other ) 3367 return self.append( other ) #And( [ self, other ] )
3368
3369 - def checkRecursion( self, parseElementList ):
3370 subRecCheckList = parseElementList[:] + [ self ] 3371 for e in self.exprs: 3372 e.checkRecursion( subRecCheckList ) 3373 if not e.mayReturnEmpty: 3374 break
3375
3376 - def __str__( self ):
3377 if hasattr(self,"name"): 3378 return self.name 3379 3380 if self.strRepr is None: 3381 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 3382 3383 return self.strRepr
3384
3385 3386 -class Or(ParseExpression):
3387 """ 3388 Requires that at least one C{ParseExpression} is found. 3389 If two expressions match, the expression that matches the longest string will be used. 3390 May be constructed using the C{'^'} operator. 3391 3392 Example:: 3393 # construct Or using '^' operator 3394 3395 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 3396 print(number.searchString("123 3.1416 789")) 3397 prints:: 3398 [['123'], ['3.1416'], ['789']] 3399 """
3400 - def __init__( self, exprs, savelist = False ):
3401 super(Or,self).__init__(exprs, savelist) 3402 if self.exprs: 3403 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3404 else: 3405 self.mayReturnEmpty = True
3406
3407 - def parseImpl( self, instring, loc, doActions=True ):
3408 maxExcLoc = -1 3409 maxException = None 3410 matches = [] 3411 for e in self.exprs: 3412 try: 3413 loc2 = e.tryParse( instring, loc ) 3414 except ParseException as err: 3415 err.__traceback__ = None 3416 if err.loc > maxExcLoc: 3417 maxException = err 3418 maxExcLoc = err.loc 3419 except IndexError: 3420 if len(instring) > maxExcLoc: 3421 maxException = ParseException(instring,len(instring),e.errmsg,self) 3422 maxExcLoc = len(instring) 3423 else: 3424 # save match among all matches, to retry longest to shortest 3425 matches.append((loc2, e)) 3426 3427 if matches: 3428 matches.sort(key=lambda x: -x[0]) 3429 for _,e in matches: 3430 try: 3431 return e._parse( instring, loc, doActions ) 3432 except ParseException as err: 3433 err.__traceback__ = None 3434 if err.loc > maxExcLoc: 3435 maxException = err 3436 maxExcLoc = err.loc 3437 3438 if maxException is not None: 3439 maxException.msg = self.errmsg 3440 raise maxException 3441 else: 3442 raise ParseException(instring, loc, "no defined alternatives to match", self)
3443 3444
3445 - def __ixor__(self, other ):
3446 if isinstance( other, basestring ): 3447 other = ParserElement._literalStringClass( other ) 3448 return self.append( other ) #Or( [ self, other ] )
3449
3450 - def __str__( self ):
3451 if hasattr(self,"name"): 3452 return self.name 3453 3454 if self.strRepr is None: 3455 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 3456 3457 return self.strRepr
3458
3459 - def checkRecursion( self, parseElementList ):
3460 subRecCheckList = parseElementList[:] + [ self ] 3461 for e in self.exprs: 3462 e.checkRecursion( subRecCheckList )
3463
3464 3465 -class MatchFirst(ParseExpression):
3466 """ 3467 Requires that at least one C{ParseExpression} is found. 3468 If two expressions match, the first one listed is the one that will match. 3469 May be constructed using the C{'|'} operator. 3470 3471 Example:: 3472 # construct MatchFirst using '|' operator 3473 3474 # watch the order of expressions to match 3475 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 3476 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 3477 3478 # put more selective expression first 3479 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 3480 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 3481 """
3482 - def __init__( self, exprs, savelist = False ):
3483 super(MatchFirst,self).__init__(exprs, savelist) 3484 if self.exprs: 3485 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3486 else: 3487 self.mayReturnEmpty = True
3488
3489 - def parseImpl( self, instring, loc, doActions=True ):
3490 maxExcLoc = -1 3491 maxException = None 3492 for e in self.exprs: 3493 try: 3494 ret = e._parse( instring, loc, doActions ) 3495 return ret 3496 except ParseException as err: 3497 if err.loc > maxExcLoc: 3498 maxException = err 3499 maxExcLoc = err.loc 3500 except IndexError: 3501 if len(instring) > maxExcLoc: 3502 maxException = ParseException(instring,len(instring),e.errmsg,self) 3503 maxExcLoc = len(instring) 3504 3505 # only got here if no expression matched, raise exception for match that made it the furthest 3506 else: 3507 if maxException is not None: 3508 maxException.msg = self.errmsg 3509 raise maxException 3510 else: 3511 raise ParseException(instring, loc, "no defined alternatives to match", self)
3512
3513 - def __ior__(self, other ):
3514 if isinstance( other, basestring ): 3515 other = ParserElement._literalStringClass( other ) 3516 return self.append( other ) #MatchFirst( [ self, other ] )
3517
3518 - def __str__( self ):
3519 if hasattr(self,"name"): 3520 return self.name 3521 3522 if self.strRepr is None: 3523 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 3524 3525 return self.strRepr
3526
3527 - def checkRecursion( self, parseElementList ):
3528 subRecCheckList = parseElementList[:] + [ self ] 3529 for e in self.exprs: 3530 e.checkRecursion( subRecCheckList )
3531
3532 3533 -class Each(ParseExpression):
3534 """ 3535 Requires all given C{ParseExpression}s to be found, but in any order. 3536 Expressions may be separated by whitespace. 3537 May be constructed using the C{'&'} operator. 3538 3539 Example:: 3540 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 3541 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 3542 integer = Word(nums) 3543 shape_attr = "shape:" + shape_type("shape") 3544 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 3545 color_attr = "color:" + color("color") 3546 size_attr = "size:" + integer("size") 3547 3548 # use Each (using operator '&') to accept attributes in any order 3549 # (shape and posn are required, color and size are optional) 3550 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) 3551 3552 shape_spec.runTests(''' 3553 shape: SQUARE color: BLACK posn: 100, 120 3554 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3555 color:GREEN size:20 shape:TRIANGLE posn:20,40 3556 ''' 3557 ) 3558 prints:: 3559 shape: SQUARE color: BLACK posn: 100, 120 3560 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 3561 - color: BLACK 3562 - posn: ['100', ',', '120'] 3563 - x: 100 3564 - y: 120 3565 - shape: SQUARE 3566 3567 3568 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3569 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 3570 - color: BLUE 3571 - posn: ['50', ',', '80'] 3572 - x: 50 3573 - y: 80 3574 - shape: CIRCLE 3575 - size: 50 3576 3577 3578 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 3579 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 3580 - color: GREEN 3581 - posn: ['20', ',', '40'] 3582 - x: 20 3583 - y: 40 3584 - shape: TRIANGLE 3585 - size: 20 3586 """
3587 - def __init__( self, exprs, savelist = True ):
3588 super(Each,self).__init__(exprs, savelist) 3589 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3590 self.skipWhitespace = True 3591 self.initExprGroups = True
3592
3593 - def parseImpl( self, instring, loc, doActions=True ):
3594 if self.initExprGroups: 3595 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 3596 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 3597 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 3598 self.optionals = opt1 + opt2 3599 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 3600 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 3601 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 3602 self.required += self.multirequired 3603 self.initExprGroups = False 3604 tmpLoc = loc 3605 tmpReqd = self.required[:] 3606 tmpOpt = self.optionals[:] 3607 matchOrder = [] 3608 3609 keepMatching = True 3610 while keepMatching: 3611 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 3612 failed = [] 3613 for e in tmpExprs: 3614 try: 3615 tmpLoc = e.tryParse( instring, tmpLoc ) 3616 except ParseException: 3617 failed.append(e) 3618 else: 3619 matchOrder.append(self.opt1map.get(id(e),e)) 3620 if e in tmpReqd: 3621 tmpReqd.remove(e) 3622 elif e in tmpOpt: 3623 tmpOpt.remove(e) 3624 if len(failed) == len(tmpExprs): 3625 keepMatching = False 3626 3627 if tmpReqd: 3628 missing = ", ".join(_ustr(e) for e in tmpReqd) 3629 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 3630 3631 # add any unmatched Optionals, in case they have default values defined 3632 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 3633 3634 resultlist = [] 3635 for e in matchOrder: 3636 loc,results = e._parse(instring,loc,doActions) 3637 resultlist.append(results) 3638 3639 finalResults = sum(resultlist, ParseResults([])) 3640 return loc, finalResults
3641
3642 - def __str__( self ):
3643 if hasattr(self,"name"): 3644 return self.name 3645 3646 if self.strRepr is None: 3647 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 3648 3649 return self.strRepr
3650
3651 - def checkRecursion( self, parseElementList ):
3652 subRecCheckList = parseElementList[:] + [ self ] 3653 for e in self.exprs: 3654 e.checkRecursion( subRecCheckList )
3655
3656 3657 -class ParseElementEnhance(ParserElement):
3658 """ 3659 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. 3660 """
3661 - def __init__( self, expr, savelist=False ):
3662 super(ParseElementEnhance,self).__init__(savelist) 3663 if isinstance( expr, basestring ): 3664 if issubclass(ParserElement._literalStringClass, Token): 3665 expr = ParserElement._literalStringClass(expr) 3666 else: 3667 expr = ParserElement._literalStringClass(Literal(expr)) 3668 self.expr = expr 3669 self.strRepr = None 3670 if expr is not None: 3671 self.mayIndexError = expr.mayIndexError 3672 self.mayReturnEmpty = expr.mayReturnEmpty 3673 self.setWhitespaceChars( expr.whiteChars ) 3674 self.skipWhitespace = expr.skipWhitespace 3675 self.saveAsList = expr.saveAsList 3676 self.callPreparse = expr.callPreparse 3677 self.ignoreExprs.extend(expr.ignoreExprs)
3678
3679 - def parseImpl( self, instring, loc, doActions=True ):
3680 if self.expr is not None: 3681 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 3682 else: 3683 raise ParseException("",loc,self.errmsg,self)
3684
3685 - def leaveWhitespace( self ):
3686 self.skipWhitespace = False 3687 self.expr = self.expr.copy() 3688 if self.expr is not None: 3689 self.expr.leaveWhitespace() 3690 return self
3691
3692 - def ignore( self, other ):
3693 if isinstance( other, Suppress ): 3694 if other not in self.ignoreExprs: 3695 super( ParseElementEnhance, self).ignore( other ) 3696 if self.expr is not None: 3697 self.expr.ignore( self.ignoreExprs[-1] ) 3698 else: 3699 super( ParseElementEnhance, self).ignore( other ) 3700 if self.expr is not None: 3701 self.expr.ignore( self.ignoreExprs[-1] ) 3702 return self
3703
3704 - def streamline( self ):
3705 super(ParseElementEnhance,self).streamline() 3706 if self.expr is not None: 3707 self.expr.streamline() 3708 return self
3709
3710 - def checkRecursion( self, parseElementList ):
3711 if self in parseElementList: 3712 raise RecursiveGrammarException( parseElementList+[self] ) 3713 subRecCheckList = parseElementList[:] + [ self ] 3714 if self.expr is not None: 3715 self.expr.checkRecursion( subRecCheckList )
3716
3717 - def validate( self, validateTrace=[] ):
3718 tmp = validateTrace[:]+[self] 3719 if self.expr is not None: 3720 self.expr.validate(tmp) 3721 self.checkRecursion( [] )
3722
3723 - def __str__( self ):
3724 try: 3725 return super(ParseElementEnhance,self).__str__() 3726 except: 3727 pass 3728 3729 if self.strRepr is None and self.expr is not None: 3730 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 3731 return self.strRepr
3732
3733 3734 -class FollowedBy(ParseElementEnhance):
3735 """ 3736 Lookahead matching of the given parse expression. C{FollowedBy} 3737 does I{not} advance the parsing position within the input string, it only 3738 verifies that the specified parse expression matches at the current 3739 position. C{FollowedBy} always returns a null token list. 3740 3741 Example:: 3742 # use FollowedBy to match a label only if it is followed by a ':' 3743 data_word = Word(alphas) 3744 label = data_word + FollowedBy(':') 3745 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3746 3747 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() 3748 prints:: 3749 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 3750 """
3751 - def __init__( self, expr ):
3752 super(FollowedBy,self).__init__(expr) 3753 self.mayReturnEmpty = True
3754
3755 - def parseImpl( self, instring, loc, doActions=True ):
3756 self.expr.tryParse( instring, loc ) 3757 return loc, []
3758
3759 3760 -class NotAny(ParseElementEnhance):
3761 """ 3762 Lookahead to disallow matching with the given parse expression. C{NotAny} 3763 does I{not} advance the parsing position within the input string, it only 3764 verifies that the specified parse expression does I{not} match at the current 3765 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny} 3766 always returns a null token list. May be constructed using the '~' operator. 3767 3768 Example:: 3769 3770 """
3771 - def __init__( self, expr ):
3772 super(NotAny,self).__init__(expr) 3773 #~ self.leaveWhitespace() 3774 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 3775 self.mayReturnEmpty = True 3776 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3777
3778 - def parseImpl( self, instring, loc, doActions=True ):
3779 if self.expr.canParseNext(instring, loc): 3780 raise ParseException(instring, loc, self.errmsg, self) 3781 return loc, []
3782
3783 - def __str__( self ):
3784 if hasattr(self,"name"): 3785 return self.name 3786 3787 if self.strRepr is None: 3788 self.strRepr = "~{" + _ustr(self.expr) + "}" 3789 3790 return self.strRepr
3791
3792 -class _MultipleMatch(ParseElementEnhance):
3793 - def __init__( self, expr, stopOn=None):
3794 super(_MultipleMatch, self).__init__(expr) 3795 ender = stopOn 3796 if isinstance(ender, basestring): 3797 ender = ParserElement._literalStringClass(ender) 3798 self.not_ender = ~ender if ender is not None else None
3799
3800 - def parseImpl( self, instring, loc, doActions=True ):
3801 self_expr_parse = self.expr._parse 3802 self_skip_ignorables = self._skipIgnorables 3803 check_ender = self.not_ender is not None 3804 if check_ender: 3805 try_not_ender = self.not_ender.tryParse 3806 3807 # must be at least one (but first see if we are the stopOn sentinel; 3808 # if so, fail) 3809 if check_ender: 3810 try_not_ender(instring, loc) 3811 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 3812 try: 3813 hasIgnoreExprs = (not not self.ignoreExprs) 3814 while 1: 3815 if check_ender: 3816 try_not_ender(instring, loc) 3817 if hasIgnoreExprs: 3818 preloc = self_skip_ignorables( instring, loc ) 3819 else: 3820 preloc = loc 3821 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 3822 if tmptokens or tmptokens.haskeys(): 3823 tokens += tmptokens 3824 except (ParseException,IndexError): 3825 pass 3826 3827 return loc, tokens
3828
3829 -class OneOrMore(_MultipleMatch):
3830 """ 3831 Repetition of one or more of the given expression. 3832 3833 Parameters: 3834 - expr - expression that must match one or more times 3835 - stopOn - (default=C{None}) - expression for a terminating sentinel 3836 (only required if the sentinel would ordinarily match the repetition 3837 expression) 3838 3839 Example:: 3840 data_word = Word(alphas) 3841 label = data_word + FollowedBy(':') 3842 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 3843 3844 text = "shape: SQUARE posn: upper left color: BLACK" 3845 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 3846 3847 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data 3848 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3849 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 3850 3851 # could also be written as 3852 (attr_expr * (1,)).parseString(text).pprint() 3853 """ 3854
3855 - def __str__( self ):
3856 if hasattr(self,"name"): 3857 return self.name 3858 3859 if self.strRepr is None: 3860 self.strRepr = "{" + _ustr(self.expr) + "}..." 3861 3862 return self.strRepr
3863
3864 - def setResultsName( self, name, listAllMatches=False ):
3865 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 3866 ret.saveAsList = True 3867 return ret
3868
3869 -class ZeroOrMore(_MultipleMatch):
3870 """ 3871 Optional repetition of zero or more of the given expression. 3872 3873 Parameters: 3874 - expr - expression that must match zero or more times 3875 - stopOn - (default=C{None}) - expression for a terminating sentinel 3876 (only required if the sentinel would ordinarily match the repetition 3877 expression) 3878 3879 Example: similar to L{OneOrMore} 3880 """
3881 - def __init__( self, expr, stopOn=None):
3882 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 3883 self.mayReturnEmpty = True
3884
3885 - def parseImpl( self, instring, loc, doActions=True ):
3886 try: 3887 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 3888 except (ParseException,IndexError): 3889 return loc, []
3890
3891 - def __str__( self ):
3892 if hasattr(self,"name"): 3893 return self.name 3894 3895 if self.strRepr is None: 3896 self.strRepr = "[" + _ustr(self.expr) + "]..." 3897 3898 return self.strRepr
3899
3900 -class _NullToken(object):
3901 - def __bool__(self):
3902 return False
3903 __nonzero__ = __bool__
3904 - def __str__(self):
3905 return ""
3906 3907 _optionalNotMatched = _NullToken()
3908 -class Optional(ParseElementEnhance):
3909 """ 3910 Optional matching of the given expression. 3911 3912 Parameters: 3913 - expr - expression that must match zero or more times 3914 - default (optional) - value to be returned if the optional expression is not found. 3915 3916 Example:: 3917 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 3918 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) 3919 zip.runTests(''' 3920 # traditional ZIP code 3921 12345 3922 3923 # ZIP+4 form 3924 12101-0001 3925 3926 # invalid ZIP 3927 98765- 3928 ''') 3929 prints:: 3930 # traditional ZIP code 3931 12345 3932 ['12345'] 3933 3934 # ZIP+4 form 3935 12101-0001 3936 ['12101-0001'] 3937 3938 # invalid ZIP 3939 98765- 3940 ^ 3941 FAIL: Expected end of text (at char 5), (line:1, col:6) 3942 """
3943 - def __init__( self, expr, default=_optionalNotMatched ):
3944 super(Optional,self).__init__( expr, savelist=False ) 3945 self.defaultValue = default 3946 self.mayReturnEmpty = True
3947
3948 - def parseImpl( self, instring, loc, doActions=True ):
3949 try: 3950 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 3951 except (ParseException,IndexError): 3952 if self.defaultValue is not _optionalNotMatched: 3953 if self.expr.resultsName: 3954 tokens = ParseResults([ self.defaultValue ]) 3955 tokens[self.expr.resultsName] = self.defaultValue 3956 else: 3957 tokens = [ self.defaultValue ] 3958 else: 3959 tokens = [] 3960 return loc, tokens
3961
3962 - def __str__( self ):
3963 if hasattr(self,"name"): 3964 return self.name 3965 3966 if self.strRepr is None: 3967 self.strRepr = "[" + _ustr(self.expr) + "]" 3968 3969 return self.strRepr
3970
3971 -class SkipTo(ParseElementEnhance):
3972 """ 3973 Token for skipping over all undefined text until the matched expression is found. 3974 3975 Parameters: 3976 - expr - target expression marking the end of the data to be skipped 3977 - include - (default=C{False}) if True, the target expression is also parsed 3978 (the skipped text and target expression are returned as a 2-element list). 3979 - ignore - (default=C{None}) used to define grammars (typically quoted strings and 3980 comments) that might contain false matches to the target expression 3981 - failOn - (default=C{None}) define expressions that are not allowed to be 3982 included in the skipped test; if found before the target expression is found, 3983 the SkipTo is not a match 3984 3985 Example:: 3986 report = ''' 3987 Outstanding Issues Report - 1 Jan 2000 3988 3989 # | Severity | Description | Days Open 3990 -----+----------+-------------------------------------------+----------- 3991 101 | Critical | Intermittent system crash | 6 3992 94 | Cosmetic | Spelling error on Login ('log|n') | 14 3993 79 | Minor | System slow when running too many reports | 47 3994 ''' 3995 integer = Word(nums) 3996 SEP = Suppress('|') 3997 # use SkipTo to simply match everything up until the next SEP 3998 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 3999 # - parse action will call token.strip() for each matched token, i.e., the description body 4000 string_data = SkipTo(SEP, ignore=quotedString) 4001 string_data.setParseAction(tokenMap(str.strip)) 4002 ticket_expr = (integer("issue_num") + SEP 4003 + string_data("sev") + SEP 4004 + string_data("desc") + SEP 4005 + integer("days_open")) 4006 4007 for tkt in ticket_expr.searchString(report): 4008 print tkt.dump() 4009 prints:: 4010 ['101', 'Critical', 'Intermittent system crash', '6'] 4011 - days_open: 6 4012 - desc: Intermittent system crash 4013 - issue_num: 101 4014 - sev: Critical 4015 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 4016 - days_open: 14 4017 - desc: Spelling error on Login ('log|n') 4018 - issue_num: 94 4019 - sev: Cosmetic 4020 ['79', 'Minor', 'System slow when running too many reports', '47'] 4021 - days_open: 47 4022 - desc: System slow when running too many reports 4023 - issue_num: 79 4024 - sev: Minor 4025 """
4026 - def __init__( self, other, include=False, ignore=None, failOn=None ):
4027 super( SkipTo, self ).__init__( other ) 4028 self.ignoreExpr = ignore 4029 self.mayReturnEmpty = True 4030 self.mayIndexError = False 4031 self.includeMatch = include 4032 self.asList = False 4033 if isinstance(failOn, basestring): 4034 self.failOn = ParserElement._literalStringClass(failOn) 4035 else: 4036 self.failOn = failOn 4037 self.errmsg = "No match found for "+_ustr(self.expr)
4038
4039 - def parseImpl( self, instring, loc, doActions=True ):
4040 startloc = loc 4041 instrlen = len(instring) 4042 expr = self.expr 4043 expr_parse = self.expr._parse 4044 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 4045 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 4046 4047 tmploc = loc 4048 while tmploc <= instrlen: 4049 if self_failOn_canParseNext is not None: 4050 # break if failOn expression matches 4051 if self_failOn_canParseNext(instring, tmploc): 4052 break 4053 4054 if self_ignoreExpr_tryParse is not None: 4055 # advance past ignore expressions 4056 while 1: 4057 try: 4058 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 4059 except ParseBaseException: 4060 break 4061 4062 try: 4063 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 4064 except (ParseException, IndexError): 4065 # no match, advance loc in string 4066 tmploc += 1 4067 else: 4068 # matched skipto expr, done 4069 break 4070 4071 else: 4072 # ran off the end of the input string without matching skipto expr, fail 4073 raise ParseException(instring, loc, self.errmsg, self) 4074 4075 # build up return values 4076 loc = tmploc 4077 skiptext = instring[startloc:loc] 4078 skipresult = ParseResults(skiptext) 4079 4080 if self.includeMatch: 4081 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 4082 skipresult += mat 4083 4084 return loc, skipresult
4085
4086 -class Forward(ParseElementEnhance):
4087 """ 4088 Forward declaration of an expression to be defined later - 4089 used for recursive grammars, such as algebraic infix notation. 4090 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 4091 4092 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 4093 Specifically, '|' has a lower precedence than '<<', so that:: 4094 fwdExpr << a | b | c 4095 will actually be evaluated as:: 4096 (fwdExpr << a) | b | c 4097 thereby leaving b and c out as parseable alternatives. It is recommended that you 4098 explicitly group the values inserted into the C{Forward}:: 4099 fwdExpr << (a | b | c) 4100 Converting to use the '<<=' operator instead will avoid this problem. 4101 4102 See L{ParseResults.pprint} for an example of a recursive parser created using 4103 C{Forward}. 4104 """
4105 - def __init__( self, other=None ):
4106 super(Forward,self).__init__( other, savelist=False )
4107
4108 - def __lshift__( self, other ):
4109 if isinstance( other, basestring ): 4110 other = ParserElement._literalStringClass(other) 4111 self.expr = other 4112 self.strRepr = None 4113 self.mayIndexError = self.expr.mayIndexError 4114 self.mayReturnEmpty = self.expr.mayReturnEmpty 4115 self.setWhitespaceChars( self.expr.whiteChars ) 4116 self.skipWhitespace = self.expr.skipWhitespace 4117 self.saveAsList = self.expr.saveAsList 4118 self.ignoreExprs.extend(self.expr.ignoreExprs) 4119 return self
4120
4121 - def __ilshift__(self, other):
4122 return self << other
4123
4124 - def leaveWhitespace( self ):
4125 self.skipWhitespace = False 4126 return self
4127
4128 - def streamline( self ):
4129 if not self.streamlined: 4130 self.streamlined = True 4131 if self.expr is not None: 4132 self.expr.streamline() 4133 return self
4134
4135 - def validate( self, validateTrace=[] ):
4136 if self not in validateTrace: 4137 tmp = validateTrace[:]+[self] 4138 if self.expr is not None: 4139 self.expr.validate(tmp) 4140 self.checkRecursion([])
4141
4142 - def __str__( self ):
4143 if hasattr(self,"name"): 4144 return self.name 4145 return self.__class__.__name__ + ": ..." 4146 4147 # stubbed out for now - creates awful memory and perf issues 4148 self._revertClass = self.__class__ 4149 self.__class__ = _ForwardNoRecurse 4150 try: 4151 if self.expr is not None: 4152 retString = _ustr(self.expr) 4153 else: 4154 retString = "None" 4155 finally: 4156 self.__class__ = self._revertClass 4157 return self.__class__.__name__ + ": " + retString
4158
4159 - def copy(self):
4160 if self.expr is not None: 4161 return super(Forward,self).copy() 4162 else: 4163 ret = Forward() 4164 ret <<= self 4165 return ret
4166
4167 -class _ForwardNoRecurse(Forward):
4168 - def __str__( self ):
4169 return "..."
4170
4171 -class TokenConverter(ParseElementEnhance):
4172 """ 4173 Abstract subclass of C{ParseExpression}, for converting parsed results. 4174 """
4175 - def __init__( self, expr, savelist=False ):
4176 super(TokenConverter,self).__init__( expr )#, savelist ) 4177 self.saveAsList = False
4178
4179 -class Combine(TokenConverter):
4180 """ 4181 Converter to concatenate all matching tokens to a single string. 4182 By default, the matching patterns must also be contiguous in the input string; 4183 this can be disabled by specifying C{'adjacent=False'} in the constructor. 4184 4185 Example:: 4186 real = Word(nums) + '.' + Word(nums) 4187 print(real.parseString('3.1416')) # -> ['3', '.', '1416'] 4188 # will also erroneously match the following 4189 print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] 4190 4191 real = Combine(Word(nums) + '.' + Word(nums)) 4192 print(real.parseString('3.1416')) # -> ['3.1416'] 4193 # no match when there are internal spaces 4194 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) 4195 """
4196 - def __init__( self, expr, joinString="", adjacent=True ):
4197 super(Combine,self).__init__( expr ) 4198 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 4199 if adjacent: 4200 self.leaveWhitespace() 4201 self.adjacent = adjacent 4202 self.skipWhitespace = True 4203 self.joinString = joinString 4204 self.callPreparse = True
4205
4206 - def ignore( self, other ):
4207 if self.adjacent: 4208 ParserElement.ignore(self, other) 4209 else: 4210 super( Combine, self).ignore( other ) 4211 return self
4212
4213 - def postParse( self, instring, loc, tokenlist ):
4214 retToks = tokenlist.copy() 4215 del retToks[:] 4216 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 4217 4218 if self.resultsName and retToks.haskeys(): 4219 return [ retToks ] 4220 else: 4221 return retToks
4222
4223 -class Group(TokenConverter):
4224 """ 4225 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. 4226 4227 Example:: 4228 ident = Word(alphas) 4229 num = Word(nums) 4230 term = ident | num 4231 func = ident + Optional(delimitedList(term)) 4232 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] 4233 4234 func = ident + Group(Optional(delimitedList(term))) 4235 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] 4236 """
4237 - def __init__( self, expr ):
4238 super(Group,self).__init__( expr ) 4239 self.saveAsList = True
4240
4241 - def postParse( self, instring, loc, tokenlist ):
4242 return [ tokenlist ]
4243
4244 -class Dict(TokenConverter):
4245 """ 4246 Converter to return a repetitive expression as a list, but also as a dictionary. 4247 Each element can also be referenced using the first token in the expression as its key. 4248 Useful for tabular report scraping when the first column can be used as a item key. 4249 4250 Example:: 4251 data_word = Word(alphas) 4252 label = data_word + FollowedBy(':') 4253 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 4254 4255 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4256 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4257 4258 # print attributes as plain groups 4259 print(OneOrMore(attr_expr).parseString(text).dump()) 4260 4261 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names 4262 result = Dict(OneOrMore(Group(attr_expr))).parseString(text) 4263 print(result.dump()) 4264 4265 # access named fields as dict entries, or output as dict 4266 print(result['shape']) 4267 print(result.asDict()) 4268 prints:: 4269 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 4270 4271 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4272 - color: light blue 4273 - posn: upper left 4274 - shape: SQUARE 4275 - texture: burlap 4276 SQUARE 4277 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 4278 See more examples at L{ParseResults} of accessing fields by results name. 4279 """
4280 - def __init__( self, expr ):
4281 super(Dict,self).__init__( expr ) 4282 self.saveAsList = True
4283
4284 - def postParse( self, instring, loc, tokenlist ):
4285 for i,tok in enumerate(tokenlist): 4286 if len(tok) == 0: 4287 continue 4288 ikey = tok[0] 4289 if isinstance(ikey,int): 4290 ikey = _ustr(tok[0]).strip() 4291 if len(tok)==1: 4292 tokenlist[ikey] = _ParseResultsWithOffset("",i) 4293 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 4294 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 4295 else: 4296 dictvalue = tok.copy() #ParseResults(i) 4297 del dictvalue[0] 4298 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 4299 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 4300 else: 4301 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 4302 4303 if self.resultsName: 4304 return [ tokenlist ] 4305 else: 4306 return tokenlist
4307
4308 4309 -class Suppress(TokenConverter):
4310 """ 4311 Converter for ignoring the results of a parsed expression. 4312 4313 Example:: 4314 source = "a, b, c,d" 4315 wd = Word(alphas) 4316 wd_list1 = wd + ZeroOrMore(',' + wd) 4317 print(wd_list1.parseString(source)) 4318 4319 # often, delimiters that are useful during parsing are just in the 4320 # way afterward - use Suppress to keep them out of the parsed output 4321 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) 4322 print(wd_list2.parseString(source)) 4323 prints:: 4324 ['a', ',', 'b', ',', 'c', ',', 'd'] 4325 ['a', 'b', 'c', 'd'] 4326 (See also L{delimitedList}.) 4327 """
4328 - def postParse( self, instring, loc, tokenlist ):
4329 return []
4330
4331 - def suppress( self ):
4332 return self
4333
4334 4335 -class OnlyOnce(object):
4336 """ 4337 Wrapper for parse actions, to ensure they are only called once. 4338 """
4339 - def __init__(self, methodCall):
4340 self.callable = _trim_arity(methodCall) 4341 self.called = False
4342 - def __call__(self,s,l,t):
4343 if not self.called: 4344 results = self.callable(s,l,t) 4345 self.called = True 4346 return results 4347 raise ParseException(s,l,"")
4348 - def reset(self):
4349 self.called = False
4350
4351 -def traceParseAction(f):
4352 """ 4353 Decorator for debugging parse actions. 4354 4355 When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".} 4356 When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised. 4357 4358 Example:: 4359 wd = Word(alphas) 4360 4361 @traceParseAction 4362 def remove_duplicate_chars(tokens): 4363 return ''.join(sorted(set(''.join(tokens))) 4364 4365 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) 4366 print(wds.parseString("slkdjs sld sldd sdlf sdljf")) 4367 prints:: 4368 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 4369 <<leaving remove_duplicate_chars (ret: 'dfjkls') 4370 ['dfjkls'] 4371 """ 4372 f = _trim_arity(f) 4373 def z(*paArgs): 4374 thisFunc = f.__name__ 4375 s,l,t = paArgs[-3:] 4376 if len(paArgs)>3: 4377 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 4378 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) 4379 try: 4380 ret = f(*paArgs) 4381 except Exception as exc: 4382 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 4383 raise 4384 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) ) 4385 return ret
4386 try: 4387 z.__name__ = f.__name__ 4388 except AttributeError: 4389 pass 4390 return z 4391
4392 # 4393 # global helpers 4394 # 4395 -def delimitedList( expr, delim=",", combine=False ):
4396 """ 4397 Helper to define a delimited list of expressions - the delimiter defaults to ','. 4398 By default, the list elements and delimiters can have intervening whitespace, and 4399 comments, but this can be overridden by passing C{combine=True} in the constructor. 4400 If C{combine} is set to C{True}, the matching tokens are returned as a single token 4401 string, with the delimiters included; otherwise, the matching tokens are returned 4402 as a list of tokens, with the delimiters suppressed. 4403 4404 Example:: 4405 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 4406 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 4407 """ 4408 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 4409 if combine: 4410 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 4411 else: 4412 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4413
4414 -def countedArray( expr, intExpr=None ):
4415 """ 4416 Helper to define a counted list of expressions. 4417 This helper defines a pattern of the form:: 4418 integer expr expr expr... 4419 where the leading integer tells how many expr expressions follow. 4420 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 4421 4422 If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value. 4423 4424 Example:: 4425 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] 4426 4427 # in this parser, the leading integer value is given in binary, 4428 # '10' indicating that 2 values are in the array 4429 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) 4430 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] 4431 """ 4432 arrayExpr = Forward() 4433 def countFieldParseAction(s,l,t): 4434 n = t[0] 4435 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 4436 return []
4437 if intExpr is None: 4438 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 4439 else: 4440 intExpr = intExpr.copy() 4441 intExpr.setName("arrayLen") 4442 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 4443 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 4444
4445 -def _flatten(L):
4446 ret = [] 4447 for i in L: 4448 if isinstance(i,list): 4449 ret.extend(_flatten(i)) 4450 else: 4451 ret.append(i) 4452 return ret
4453
4454 -def matchPreviousLiteral(expr):
4455 """ 4456 Helper to define an expression that is indirectly defined from 4457 the tokens matched in a previous expression, that is, it looks 4458 for a 'repeat' of a previous expression. For example:: 4459 first = Word(nums) 4460 second = matchPreviousLiteral(first) 4461 matchExpr = first + ":" + second 4462 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 4463 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 4464 If this is not desired, use C{matchPreviousExpr}. 4465 Do I{not} use with packrat parsing enabled. 4466 """ 4467 rep = Forward() 4468 def copyTokenToRepeater(s,l,t): 4469 if t: 4470 if len(t) == 1: 4471 rep << t[0] 4472 else: 4473 # flatten t tokens 4474 tflat = _flatten(t.asList()) 4475 rep << And(Literal(tt) for tt in tflat) 4476 else: 4477 rep << Empty()
4478 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4479 rep.setName('(prev) ' + _ustr(expr)) 4480 return rep 4481
4482 -def matchPreviousExpr(expr):
4483 """ 4484 Helper to define an expression that is indirectly defined from 4485 the tokens matched in a previous expression, that is, it looks 4486 for a 'repeat' of a previous expression. For example:: 4487 first = Word(nums) 4488 second = matchPreviousExpr(first) 4489 matchExpr = first + ":" + second 4490 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 4491 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"}; 4492 the expressions are evaluated first, and then compared, so 4493 C{"1"} is compared with C{"10"}. 4494 Do I{not} use with packrat parsing enabled. 4495 """ 4496 rep = Forward() 4497 e2 = expr.copy() 4498 rep <<= e2 4499 def copyTokenToRepeater(s,l,t): 4500 matchTokens = _flatten(t.asList()) 4501 def mustMatchTheseTokens(s,l,t): 4502 theseTokens = _flatten(t.asList()) 4503 if theseTokens != matchTokens: 4504 raise ParseException("",0,"")
4505 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 4506 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4507 rep.setName('(prev) ' + _ustr(expr)) 4508 return rep 4509
4510 -def _escapeRegexRangeChars(s):
4511 #~ escape these chars: ^-] 4512 for c in r"\^-]": 4513 s = s.replace(c,_bslash+c) 4514 s = s.replace("\n",r"\n") 4515 s = s.replace("\t",r"\t") 4516 return _ustr(s)
4517
4518 -def oneOf( strs, caseless=False, useRegex=True ):
4519 """ 4520 Helper to quickly define a set of alternative Literals, and makes sure to do 4521 longest-first testing when there is a conflict, regardless of the input order, 4522 but returns a C{L{MatchFirst}} for best performance. 4523 4524 Parameters: 4525 - strs - a string of space-delimited literals, or a collection of string literals 4526 - caseless - (default=C{False}) - treat all literals as caseless 4527 - useRegex - (default=C{True}) - as an optimization, will generate a Regex 4528 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 4529 if creating a C{Regex} raises an exception) 4530 4531 Example:: 4532 comp_oper = oneOf("< = > <= >= !=") 4533 var = Word(alphas) 4534 number = Word(nums) 4535 term = var | number 4536 comparison_expr = term + comp_oper + term 4537 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) 4538 prints:: 4539 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 4540 """ 4541 if caseless: 4542 isequal = ( lambda a,b: a.upper() == b.upper() ) 4543 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 4544 parseElementClass = CaselessLiteral 4545 else: 4546 isequal = ( lambda a,b: a == b ) 4547 masks = ( lambda a,b: b.startswith(a) ) 4548 parseElementClass = Literal 4549 4550 symbols = [] 4551 if isinstance(strs,basestring): 4552 symbols = strs.split() 4553 elif isinstance(strs, collections.Iterable): 4554 symbols = list(strs) 4555 else: 4556 warnings.warn("Invalid argument to oneOf, expected string or iterable", 4557 SyntaxWarning, stacklevel=2) 4558 if not symbols: 4559 return NoMatch() 4560 4561 i = 0 4562 while i < len(symbols)-1: 4563 cur = symbols[i] 4564 for j,other in enumerate(symbols[i+1:]): 4565 if ( isequal(other, cur) ): 4566 del symbols[i+j+1] 4567 break 4568 elif ( masks(cur, other) ): 4569 del symbols[i+j+1] 4570 symbols.insert(i,other) 4571 cur = other 4572 break 4573 else: 4574 i += 1 4575 4576 if not caseless and useRegex: 4577 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 4578 try: 4579 if len(symbols)==len("".join(symbols)): 4580 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4581 else: 4582 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4583 except: 4584 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 4585 SyntaxWarning, stacklevel=2) 4586 4587 4588 # last resort, just use MatchFirst 4589 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4590
4591 -def dictOf( key, value ):
4592 """ 4593 Helper to easily and clearly define a dictionary by specifying the respective patterns 4594 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 4595 in the proper order. The key pattern can include delimiting markers or punctuation, 4596 as long as they are suppressed, thereby leaving the significant key text. The value 4597 pattern can include named results, so that the C{Dict} results can include named token 4598 fields. 4599 4600 Example:: 4601 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4602 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4603 print(OneOrMore(attr_expr).parseString(text).dump()) 4604 4605 attr_label = label 4606 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) 4607 4608 # similar to Dict, but simpler call format 4609 result = dictOf(attr_label, attr_value).parseString(text) 4610 print(result.dump()) 4611 print(result['shape']) 4612 print(result.shape) # object attribute access works too 4613 print(result.asDict()) 4614 prints:: 4615 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4616 - color: light blue 4617 - posn: upper left 4618 - shape: SQUARE 4619 - texture: burlap 4620 SQUARE 4621 SQUARE 4622 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 4623 """ 4624 return Dict( ZeroOrMore( Group ( key + value ) ) )
4625
4626 -def originalTextFor(expr, asString=True):
4627 """ 4628 Helper to return the original, untokenized text for a given expression. Useful to 4629 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 4630 revert separate tokens with intervening whitespace back to the original matching 4631 input text. By default, returns astring containing the original parsed text. 4632 4633 If the optional C{asString} argument is passed as C{False}, then the return value is a 4634 C{L{ParseResults}} containing any results names that were originally matched, and a 4635 single token containing the original matched text from the input string. So if 4636 the expression passed to C{L{originalTextFor}} contains expressions with defined 4637 results names, you must set C{asString} to C{False} if you want to preserve those 4638 results name values. 4639 4640 Example:: 4641 src = "this is test <b> bold <i>text</i> </b> normal text " 4642 for tag in ("b","i"): 4643 opener,closer = makeHTMLTags(tag) 4644 patt = originalTextFor(opener + SkipTo(closer) + closer) 4645 print(patt.searchString(src)[0]) 4646 prints:: 4647 ['<b> bold <i>text</i> </b>'] 4648 ['<i>text</i>'] 4649 """ 4650 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 4651 endlocMarker = locMarker.copy() 4652 endlocMarker.callPreparse = False 4653 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 4654 if asString: 4655 extractText = lambda s,l,t: s[t._original_start:t._original_end] 4656 else: 4657 def extractText(s,l,t): 4658 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4659 matchExpr.setParseAction(extractText) 4660 matchExpr.ignoreExprs = expr.ignoreExprs 4661 return matchExpr 4662
4663 -def ungroup(expr):
4664 """ 4665 Helper to undo pyparsing's default grouping of And expressions, even 4666 if all but one are non-empty. 4667 """ 4668 return TokenConverter(expr).setParseAction(lambda t:t[0]) 4669
4670 -def locatedExpr(expr):
4671 """ 4672 Helper to decorate a returned token with its starting and ending locations in the input string. 4673 This helper adds the following results names: 4674 - locn_start = location where matched expression begins 4675 - locn_end = location where matched expression ends 4676 - value = the actual parsed results 4677 4678 Be careful if the input text contains C{<TAB>} characters, you may want to call 4679 C{L{ParserElement.parseWithTabs}} 4680 4681 Example:: 4682 wd = Word(alphas) 4683 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): 4684 print(match) 4685 prints:: 4686 [[0, 'ljsdf', 5]] 4687 [[8, 'lksdjjf', 15]] 4688 [[18, 'lkkjj', 23]] 4689 """ 4690 locator = Empty().setParseAction(lambda s,l,t: l) 4691 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4692 4693 4694 # convenience constants for positional expressions 4695 empty = Empty().setName("empty") 4696 lineStart = LineStart().setName("lineStart") 4697 lineEnd = LineEnd().setName("lineEnd") 4698 stringStart = StringStart().setName("stringStart") 4699 stringEnd = StringEnd().setName("stringEnd") 4700 4701 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 4702 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 4703 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 4704 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 4705 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 4706 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4707 4708 -def srange(s):
4709 r""" 4710 Helper to easily define string ranges for use in Word construction. Borrows 4711 syntax from regexp '[]' string range definitions:: 4712 srange("[0-9]") -> "0123456789" 4713 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 4714 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 4715 The input string must be enclosed in []'s, and the returned string is the expanded 4716 character set joined into a single string. 4717 The values enclosed in the []'s may be: 4718 - a single character 4719 - an escaped character with a leading backslash (such as C{\-} or C{\]}) 4720 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) 4721 (C{\0x##} is also supported for backwards compatibility) 4722 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) 4723 - a range of any of the above, separated by a dash (C{'a-z'}, etc.) 4724 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) 4725 """ 4726 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 4727 try: 4728 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 4729 except: 4730 return ""
4731
4732 -def matchOnlyAtCol(n):
4733 """ 4734 Helper method for defining parse actions that require matching at a specific 4735 column in the input text. 4736 """ 4737 def verifyCol(strg,locn,toks): 4738 if col(locn,strg) != n: 4739 raise ParseException(strg,locn,"matched token not at column %d" % n)
4740 return verifyCol 4741
4742 -def replaceWith(replStr):
4743 """ 4744 Helper method for common parse actions that simply return a literal value. Especially 4745 useful when used with C{L{transformString<ParserElement.transformString>}()}. 4746 4747 Example:: 4748 num = Word(nums).setParseAction(lambda toks: int(toks[0])) 4749 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) 4750 term = na | num 4751 4752 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] 4753 """ 4754 return lambda s,l,t: [replStr]
4755
4756 -def removeQuotes(s,l,t):
4757 """ 4758 Helper parse action for removing quotation marks from parsed quoted strings. 4759 4760 Example:: 4761 # by default, quotation marks are included in parsed results 4762 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] 4763 4764 # use removeQuotes to strip quotation marks from parsed results 4765 quotedString.setParseAction(removeQuotes) 4766 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] 4767 """ 4768 return t[0][1:-1]
4769
4770 -def tokenMap(func, *args):
4771 """ 4772 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 4773 args are passed, they are forwarded to the given function as additional arguments after 4774 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the 4775 parsed data to an integer using base 16. 4776 4777 Example (compare the last to example in L{ParserElement.transformString}:: 4778 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) 4779 hex_ints.runTests(''' 4780 00 11 22 aa FF 0a 0d 1a 4781 ''') 4782 4783 upperword = Word(alphas).setParseAction(tokenMap(str.upper)) 4784 OneOrMore(upperword).runTests(''' 4785 my kingdom for a horse 4786 ''') 4787 4788 wd = Word(alphas).setParseAction(tokenMap(str.title)) 4789 OneOrMore(wd).setParseAction(' '.join).runTests(''' 4790 now is the winter of our discontent made glorious summer by this sun of york 4791 ''') 4792 prints:: 4793 00 11 22 aa FF 0a 0d 1a 4794 [0, 17, 34, 170, 255, 10, 13, 26] 4795 4796 my kingdom for a horse 4797 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 4798 4799 now is the winter of our discontent made glorious summer by this sun of york 4800 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 4801 """ 4802 def pa(s,l,t): 4803 return [func(tokn, *args) for tokn in t]
4804 4805 try: 4806 func_name = getattr(func, '__name__', 4807 getattr(func, '__class__').__name__) 4808 except Exception: 4809 func_name = str(func) 4810 pa.__name__ = func_name 4811 4812 return pa 4813 4814 upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 4815 """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}""" 4816 4817 downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 4818 """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
4819 4820 -def _makeTags(tagStr, xml):
4821 """Internal helper to construct opening and closing tag expressions, given a tag name""" 4822 if isinstance(tagStr,basestring): 4823 resname = tagStr 4824 tagStr = Keyword(tagStr, caseless=not xml) 4825 else: 4826 resname = tagStr.name 4827 4828 tagAttrName = Word(alphas,alphanums+"_-:") 4829 if (xml): 4830 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 4831 openTag = Suppress("<") + tagStr("tag") + \ 4832 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 4833 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4834 else: 4835 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 4836 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 4837 openTag = Suppress("<") + tagStr("tag") + \ 4838 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 4839 Optional( Suppress("=") + tagAttrValue ) ))) + \ 4840 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4841 closeTag = Combine(_L("</") + tagStr + ">") 4842 4843 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 4844 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 4845 openTag.tag = resname 4846 closeTag.tag = resname 4847 return openTag, closeTag
4848
4849 -def makeHTMLTags(tagStr):
4850 """ 4851 Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches 4852 tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. 4853 4854 Example:: 4855 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 4856 # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple 4857 a,a_end = makeHTMLTags("A") 4858 link_expr = a + SkipTo(a_end)("link_text") + a_end 4859 4860 for link in link_expr.searchString(text): 4861 # attributes in the <A> tag (like "href" shown here) are also accessible as named results 4862 print(link.link_text, '->', link.href) 4863 prints:: 4864 pyparsing -> http://pyparsing.wikispaces.com 4865 """ 4866 return _makeTags( tagStr, False )
4867
4868 -def makeXMLTags(tagStr):
4869 """ 4870 Helper to construct opening and closing tag expressions for XML, given a tag name. Matches 4871 tags only in the given upper/lower case. 4872 4873 Example: similar to L{makeHTMLTags} 4874 """ 4875 return _makeTags( tagStr, True )
4876
4877 -def withAttribute(*args,**attrDict):
4878 """ 4879 Helper to create a validating parse action to be used with start tags created 4880 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 4881 with a required attribute value, to avoid false matches on common tags such as 4882 C{<TD>} or C{<DIV>}. 4883 4884 Call C{withAttribute} with a series of attribute names and values. Specify the list 4885 of filter attributes names and values as: 4886 - keyword arguments, as in C{(align="right")}, or 4887 - as an explicit dict with C{**} operator, when an attribute name is also a Python 4888 reserved word, as in C{**{"class":"Customer", "align":"right"}} 4889 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 4890 For attribute names with a namespace prefix, you must use the second form. Attribute 4891 names are matched insensitive to upper/lower case. 4892 4893 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 4894 4895 To verify that the attribute exists, but without specifying a value, pass 4896 C{withAttribute.ANY_VALUE} as the value. 4897 4898 Example:: 4899 html = ''' 4900 <div> 4901 Some text 4902 <div type="grid">1 4 0 1 0</div> 4903 <div type="graph">1,3 2,3 1,1</div> 4904 <div>this has no type</div> 4905 </div> 4906 4907 ''' 4908 div,div_end = makeHTMLTags("div") 4909 4910 # only match div tag having a type attribute with value "grid" 4911 div_grid = div().setParseAction(withAttribute(type="grid")) 4912 grid_expr = div_grid + SkipTo(div | div_end)("body") 4913 for grid_header in grid_expr.searchString(html): 4914 print(grid_header.body) 4915 4916 # construct a match with any div tag having a type attribute, regardless of the value 4917 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) 4918 div_expr = div_any_type + SkipTo(div | div_end)("body") 4919 for div_header in div_expr.searchString(html): 4920 print(div_header.body) 4921 prints:: 4922 1 4 0 1 0 4923 4924 1 4 0 1 0 4925 1,3 2,3 1,1 4926 """ 4927 if args: 4928 attrs = args[:] 4929 else: 4930 attrs = attrDict.items() 4931 attrs = [(k,v) for k,v in attrs] 4932 def pa(s,l,tokens): 4933 for attrName,attrValue in attrs: 4934 if attrName not in tokens: 4935 raise ParseException(s,l,"no matching attribute " + attrName) 4936 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 4937 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 4938 (attrName, tokens[attrName], attrValue))
4939 return pa 4940 withAttribute.ANY_VALUE = object()
4941 4942 -def withClass(classname, namespace=''):
4943 """ 4944 Simplified version of C{L{withAttribute}} when matching on a div class - made 4945 difficult because C{class} is a reserved word in Python. 4946 4947 Example:: 4948 html = ''' 4949 <div> 4950 Some text 4951 <div class="grid">1 4 0 1 0</div> 4952 <div class="graph">1,3 2,3 1,1</div> 4953 <div>this &lt;div&gt; has no class</div> 4954 </div> 4955 4956 ''' 4957 div,div_end = makeHTMLTags("div") 4958 div_grid = div().setParseAction(withClass("grid")) 4959 4960 grid_expr = div_grid + SkipTo(div | div_end)("body") 4961 for grid_header in grid_expr.searchString(html): 4962 print(grid_header.body) 4963 4964 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) 4965 div_expr = div_any_type + SkipTo(div | div_end)("body") 4966 for div_header in div_expr.searchString(html): 4967 print(div_header.body) 4968 prints:: 4969 1 4 0 1 0 4970 4971 1 4 0 1 0 4972 1,3 2,3 1,1 4973 """ 4974 classattr = "%s:class" % namespace if namespace else "class" 4975 return withAttribute(**{classattr : classname})
4976 4977 opAssoc = _Constants() 4978 opAssoc.LEFT = object() 4979 opAssoc.RIGHT = object()
4980 4981 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
4982 """ 4983 Helper method for constructing grammars of expressions made up of 4984 operators working in a precedence hierarchy. Operators may be unary or 4985 binary, left- or right-associative. Parse actions can also be attached 4986 to operator expressions. 4987 4988 Parameters: 4989 - baseExpr - expression representing the most basic element for the nested 4990 - opList - list of tuples, one for each operator precedence level in the 4991 expression grammar; each tuple is of the form 4992 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 4993 - opExpr is the pyparsing expression for the operator; 4994 may also be a string, which will be converted to a Literal; 4995 if numTerms is 3, opExpr is a tuple of two expressions, for the 4996 two operators separating the 3 terms 4997 - numTerms is the number of terms for this operator (must 4998 be 1, 2, or 3) 4999 - rightLeftAssoc is the indicator whether the operator is 5000 right or left associative, using the pyparsing-defined 5001 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 5002 - parseAction is the parse action to be associated with 5003 expressions matching this operator expression (the 5004 parse action tuple member may be omitted) 5005 - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) 5006 - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) 5007 5008 Example:: 5009 # simple example of four-function arithmetic with ints and variable names 5010 integer = pyparsing_common.signed_integer 5011 varname = pyparsing_common.identifier 5012 5013 arith_expr = infixNotation(integer | varname, 5014 [ 5015 ('-', 1, opAssoc.RIGHT), 5016 (oneOf('* /'), 2, opAssoc.LEFT), 5017 (oneOf('+ -'), 2, opAssoc.LEFT), 5018 ]) 5019 5020 arith_expr.runTests(''' 5021 5+3*6 5022 (5+3)*6 5023 -2--11 5024 ''', fullDump=False) 5025 prints:: 5026 5+3*6 5027 [[5, '+', [3, '*', 6]]] 5028 5029 (5+3)*6 5030 [[[5, '+', 3], '*', 6]] 5031 5032 -2--11 5033 [[['-', 2], '-', ['-', 11]]] 5034 """ 5035 ret = Forward() 5036 lastExpr = baseExpr | ( lpar + ret + rpar ) 5037 for i,operDef in enumerate(opList): 5038 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 5039 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 5040 if arity == 3: 5041 if opExpr is None or len(opExpr) != 2: 5042 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 5043 opExpr1, opExpr2 = opExpr 5044 thisExpr = Forward().setName(termName) 5045 if rightLeftAssoc == opAssoc.LEFT: 5046 if arity == 1: 5047 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 5048 elif arity == 2: 5049 if opExpr is not None: 5050 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 5051 else: 5052 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 5053 elif arity == 3: 5054 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 5055 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 5056 else: 5057 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 5058 elif rightLeftAssoc == opAssoc.RIGHT: 5059 if arity == 1: 5060 # try to avoid LR with this extra test 5061 if not isinstance(opExpr, Optional): 5062 opExpr = Optional(opExpr) 5063 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 5064 elif arity == 2: 5065 if opExpr is not None: 5066 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 5067 else: 5068 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 5069 elif arity == 3: 5070 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 5071 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 5072 else: 5073 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 5074 else: 5075 raise ValueError("operator must indicate right or left associativity") 5076 if pa: 5077 matchExpr.setParseAction( pa ) 5078 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 5079 lastExpr = thisExpr 5080 ret <<= lastExpr 5081 return ret
5082 5083 operatorPrecedence = infixNotation 5084 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" 5085 5086 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 5087 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 5088 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 5089 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 5090 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5091 5092 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
5093 """ 5094 Helper method for defining nested lists enclosed in opening and closing 5095 delimiters ("(" and ")" are the default). 5096 5097 Parameters: 5098 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression 5099 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression 5100 - content - expression for items within the nested lists (default=C{None}) 5101 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) 5102 5103 If an expression is not provided for the content argument, the nested 5104 expression will capture all whitespace-delimited content between delimiters 5105 as a list of separate values. 5106 5107 Use the C{ignoreExpr} argument to define expressions that may contain 5108 opening or closing characters that should not be treated as opening 5109 or closing characters for nesting, such as quotedString or a comment 5110 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 5111 The default is L{quotedString}, but if no expressions are to be ignored, 5112 then pass C{None} for this argument. 5113 5114 Example:: 5115 data_type = oneOf("void int short long char float double") 5116 decl_data_type = Combine(data_type + Optional(Word('*'))) 5117 ident = Word(alphas+'_', alphanums+'_') 5118 number = pyparsing_common.number 5119 arg = Group(decl_data_type + ident) 5120 LPAR,RPAR = map(Suppress, "()") 5121 5122 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) 5123 5124 c_function = (decl_data_type("type") 5125 + ident("name") 5126 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR 5127 + code_body("body")) 5128 c_function.ignore(cStyleComment) 5129 5130 source_code = ''' 5131 int is_odd(int x) { 5132 return (x%2); 5133 } 5134 5135 int dec_to_hex(char hchar) { 5136 if (hchar >= '0' && hchar <= '9') { 5137 return (ord(hchar)-ord('0')); 5138 } else { 5139 return (10+ord(hchar)-ord('A')); 5140 } 5141 } 5142 ''' 5143 for func in c_function.searchString(source_code): 5144 print("%(name)s (%(type)s) args: %(args)s" % func) 5145 5146 prints:: 5147 is_odd (int) args: [['int', 'x']] 5148 dec_to_hex (int) args: [['char', 'hchar']] 5149 """ 5150 if opener == closer: 5151 raise ValueError("opening and closing strings cannot be the same") 5152 if content is None: 5153 if isinstance(opener,basestring) and isinstance(closer,basestring): 5154 if len(opener) == 1 and len(closer)==1: 5155 if ignoreExpr is not None: 5156 content = (Combine(OneOrMore(~ignoreExpr + 5157 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5158 ).setParseAction(lambda t:t[0].strip())) 5159 else: 5160 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 5161 ).setParseAction(lambda t:t[0].strip())) 5162 else: 5163 if ignoreExpr is not None: 5164 content = (Combine(OneOrMore(~ignoreExpr + 5165 ~Literal(opener) + ~Literal(closer) + 5166 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5167 ).setParseAction(lambda t:t[0].strip())) 5168 else: 5169 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 5170 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5171 ).setParseAction(lambda t:t[0].strip())) 5172 else: 5173 raise ValueError("opening and closing arguments must be strings if no content expression is given") 5174 ret = Forward() 5175 if ignoreExpr is not None: 5176 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 5177 else: 5178 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 5179 ret.setName('nested %s%s expression' % (opener,closer)) 5180 return ret
5181
5182 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
5183 """ 5184 Helper method for defining space-delimited indentation blocks, such as 5185 those used to define block statements in Python source code. 5186 5187 Parameters: 5188 - blockStatementExpr - expression defining syntax of statement that 5189 is repeated within the indented block 5190 - indentStack - list created by caller to manage indentation stack 5191 (multiple statementWithIndentedBlock expressions within a single grammar 5192 should share a common indentStack) 5193 - indent - boolean indicating whether block must be indented beyond the 5194 the current level; set to False for block of left-most statements 5195 (default=C{True}) 5196 5197 A valid block must contain at least one C{blockStatement}. 5198 5199 Example:: 5200 data = ''' 5201 def A(z): 5202 A1 5203 B = 100 5204 G = A2 5205 A2 5206 A3 5207 B 5208 def BB(a,b,c): 5209 BB1 5210 def BBA(): 5211 bba1 5212 bba2 5213 bba3 5214 C 5215 D 5216 def spam(x,y): 5217 def eggs(z): 5218 pass 5219 ''' 5220 5221 5222 indentStack = [1] 5223 stmt = Forward() 5224 5225 identifier = Word(alphas, alphanums) 5226 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") 5227 func_body = indentedBlock(stmt, indentStack) 5228 funcDef = Group( funcDecl + func_body ) 5229 5230 rvalue = Forward() 5231 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") 5232 rvalue << (funcCall | identifier | Word(nums)) 5233 assignment = Group(identifier + "=" + rvalue) 5234 stmt << ( funcDef | assignment | identifier ) 5235 5236 module_body = OneOrMore(stmt) 5237 5238 parseTree = module_body.parseString(data) 5239 parseTree.pprint() 5240 prints:: 5241 [['def', 5242 'A', 5243 ['(', 'z', ')'], 5244 ':', 5245 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 5246 'B', 5247 ['def', 5248 'BB', 5249 ['(', 'a', 'b', 'c', ')'], 5250 ':', 5251 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 5252 'C', 5253 'D', 5254 ['def', 5255 'spam', 5256 ['(', 'x', 'y', ')'], 5257 ':', 5258 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 5259 """ 5260 def checkPeerIndent(s,l,t): 5261 if l >= len(s): return 5262 curCol = col(l,s) 5263 if curCol != indentStack[-1]: 5264 if curCol > indentStack[-1]: 5265 raise ParseFatalException(s,l,"illegal nesting") 5266 raise ParseException(s,l,"not a peer entry")
5267 5268 def checkSubIndent(s,l,t): 5269 curCol = col(l,s) 5270 if curCol > indentStack[-1]: 5271 indentStack.append( curCol ) 5272 else: 5273 raise ParseException(s,l,"not a subentry") 5274 5275 def checkUnindent(s,l,t): 5276 if l >= len(s): return 5277 curCol = col(l,s) 5278 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 5279 raise ParseException(s,l,"not an unindent") 5280 indentStack.pop() 5281 5282 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 5283 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 5284 PEER = Empty().setParseAction(checkPeerIndent).setName('') 5285 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 5286 if indent: 5287 smExpr = Group( Optional(NL) + 5288 #~ FollowedBy(blockStatementExpr) + 5289 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 5290 else: 5291 smExpr = Group( Optional(NL) + 5292 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 5293 blockStatementExpr.ignore(_bslash + LineEnd()) 5294 return smExpr.setName('indented block') 5295 5296 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 5297 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 5298 5299 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 5300 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 5301 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5302 -def replaceHTMLEntity(t):
5303 """Helper parser action to replace common HTML entities with their special characters""" 5304 return _htmlEntityMap.get(t.entity)
5305 5306 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 5307 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 5308 "Comment of the form C{/* ... */}" 5309 5310 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 5311 "Comment of the form C{<!-- ... -->}" 5312 5313 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 5314 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 5315 "Comment of the form C{// ... (to end of line)}" 5316 5317 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 5318 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" 5319 5320 javaStyleComment = cppStyleComment 5321 "Same as C{L{cppStyleComment}}" 5322 5323 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 5324 "Comment of the form C{# ... (to end of line)}" 5325 5326 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 5327 Optional( Word(" \t") + 5328 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 5329 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 5330 """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. 5331 This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
5332 5333 # some other useful expressions - using lower-case class name since we are really using this as a namespace 5334 -class pyparsing_common:
5335 """ 5336 Here are some common low-level expressions that may be useful in jump-starting parser development: 5337 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>}) 5338 - common L{programming identifiers<identifier>} 5339 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>}) 5340 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>} 5341 - L{UUID<uuid>} 5342 - L{comma-separated list<comma_separated_list>} 5343 Parse actions: 5344 - C{L{convertToInteger}} 5345 - C{L{convertToFloat}} 5346 - C{L{convertToDate}} 5347 - C{L{convertToDatetime}} 5348 - C{L{stripHTMLTags}} 5349 - C{L{upcaseTokens}} 5350 - C{L{downcaseTokens}} 5351 5352 Example:: 5353 pyparsing_common.number.runTests(''' 5354 # any int or real number, returned as the appropriate type 5355 100 5356 -100 5357 +100 5358 3.14159 5359 6.02e23 5360 1e-12 5361 ''') 5362 5363 pyparsing_common.fnumber.runTests(''' 5364 # any int or real number, returned as float 5365 100 5366 -100 5367 +100 5368 3.14159 5369 6.02e23 5370 1e-12 5371 ''') 5372 5373 pyparsing_common.hex_integer.runTests(''' 5374 # hex numbers 5375 100 5376 FF 5377 ''') 5378 5379 pyparsing_common.fraction.runTests(''' 5380 # fractions 5381 1/2 5382 -3/4 5383 ''') 5384 5385 pyparsing_common.mixed_integer.runTests(''' 5386 # mixed fractions 5387 1 5388 1/2 5389 -3/4 5390 1-3/4 5391 ''') 5392 5393 import uuid 5394 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5395 pyparsing_common.uuid.runTests(''' 5396 # uuid 5397 12345678-1234-5678-1234-567812345678 5398 ''') 5399 prints:: 5400 # any int or real number, returned as the appropriate type 5401 100 5402 [100] 5403 5404 -100 5405 [-100] 5406 5407 +100 5408 [100] 5409 5410 3.14159 5411 [3.14159] 5412 5413 6.02e23 5414 [6.02e+23] 5415 5416 1e-12 5417 [1e-12] 5418 5419 # any int or real number, returned as float 5420 100 5421 [100.0] 5422 5423 -100 5424 [-100.0] 5425 5426 +100 5427 [100.0] 5428 5429 3.14159 5430 [3.14159] 5431 5432 6.02e23 5433 [6.02e+23] 5434 5435 1e-12 5436 [1e-12] 5437 5438 # hex numbers 5439 100 5440 [256] 5441 5442 FF 5443 [255] 5444 5445 # fractions 5446 1/2 5447 [0.5] 5448 5449 -3/4 5450 [-0.75] 5451 5452 # mixed fractions 5453 1 5454 [1] 5455 5456 1/2 5457 [0.5] 5458 5459 -3/4 5460 [-0.75] 5461 5462 1-3/4 5463 [1.75] 5464 5465 # uuid 5466 12345678-1234-5678-1234-567812345678 5467 [UUID('12345678-1234-5678-1234-567812345678')] 5468 """ 5469 5470 convertToInteger = tokenMap(int) 5471 """ 5472 Parse action for converting parsed integers to Python int 5473 """ 5474 5475 convertToFloat = tokenMap(float) 5476 """ 5477 Parse action for converting parsed numbers to Python float 5478 """ 5479 5480 integer = Word(nums).setName("integer").setParseAction(convertToInteger) 5481 """expression that parses an unsigned integer, returns an int""" 5482 5483 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) 5484 """expression that parses a hexadecimal integer, returns an int""" 5485 5486 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 5487 """expression that parses an integer with optional leading sign, returns an int""" 5488 5489 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") 5490 """fractional expression of an integer divided by an integer, returns a float""" 5491 fraction.addParseAction(lambda t: t[0]/t[-1]) 5492 5493 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") 5494 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 5495 mixed_integer.addParseAction(sum) 5496 5497 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) 5498 """expression that parses a floating point number and returns a float""" 5499 5500 sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) 5501 """expression that parses a floating point number with optional scientific notation and returns a float""" 5502 5503 # streamlining this expression makes the docs nicer-looking 5504 number = (sci_real | real | signed_integer).streamline() 5505 """any numeric expression, returns the corresponding Python type""" 5506 5507 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) 5508 """any int or real number, returned as float""" 5509 5510 identifier = Word(alphas+'_', alphanums+'_').setName("identifier") 5511 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 5512 5513 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") 5514 "IPv4 address (C{0.0.0.0 - 255.255.255.255})" 5515 5516 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") 5517 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") 5518 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") 5519 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 5520 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") 5521 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") 5522 "IPv6 address (long, short, or mixed form)" 5523 5524 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") 5525 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 5526 5527 @staticmethod
5528 - def convertToDate(fmt="%Y-%m-%d"):
5529 """ 5530 Helper to create a parse action for converting parsed date string to Python datetime.date 5531 5532 Params - 5533 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) 5534 5535 Example:: 5536 date_expr = pyparsing_common.iso8601_date.copy() 5537 date_expr.setParseAction(pyparsing_common.convertToDate()) 5538 print(date_expr.parseString("1999-12-31")) 5539 prints:: 5540 [datetime.date(1999, 12, 31)] 5541 """ 5542 def cvt_fn(s,l,t): 5543 try: 5544 return datetime.strptime(t[0], fmt).date() 5545 except ValueError as ve: 5546 raise ParseException(s, l, str(ve))
5547 return cvt_fn
5548 5549 @staticmethod
5550 - def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
5551 """ 5552 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime 5553 5554 Params - 5555 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) 5556 5557 Example:: 5558 dt_expr = pyparsing_common.iso8601_datetime.copy() 5559 dt_expr.setParseAction(pyparsing_common.convertToDatetime()) 5560 print(dt_expr.parseString("1999-12-31T23:59:59.999")) 5561 prints:: 5562 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 5563 """ 5564 def cvt_fn(s,l,t): 5565 try: 5566 return datetime.strptime(t[0], fmt) 5567 except ValueError as ve: 5568 raise ParseException(s, l, str(ve))
5569 return cvt_fn 5570 5571 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date") 5572 "ISO8601 date (C{yyyy-mm-dd})" 5573 5574 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") 5575 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" 5576 5577 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") 5578 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" 5579 5580 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() 5581 @staticmethod
5582 - def stripHTMLTags(s, l, tokens):
5583 """ 5584 Parse action to remove HTML tags from web page HTML source 5585 5586 Example:: 5587 # strip HTML links from normal text 5588 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 5589 td,td_end = makeHTMLTags("TD") 5590 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end 5591 5592 print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' 5593 """ 5594 return pyparsing_common._html_stripper.transformString(tokens[0])
5595 5596 _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') 5597 + Optional( White(" \t") ) ) ).streamline().setName("commaItem") 5598 comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") 5599 """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" 5600 5601 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) 5602 """Parse action to convert tokens to upper case.""" 5603 5604 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) 5605 """Parse action to convert tokens to lower case.""" 5606 5607 5608 if __name__ == "__main__": 5609 5610 selectToken = CaselessLiteral("select") 5611 fromToken = CaselessLiteral("from") 5612 5613 ident = Word(alphas, alphanums + "_$") 5614 5615 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5616 columnNameList = Group(delimitedList(columnName)).setName("columns") 5617 columnSpec = ('*' | columnNameList) 5618 5619 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5620 tableNameList = Group(delimitedList(tableName)).setName("tables") 5621 5622 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 5623 5624 # demo runTests method, including embedded comments in test string 5625 simpleSQL.runTests(""" 5626 # '*' as column list and dotted table name 5627 select * from SYS.XYZZY 5628 5629 # caseless match on "SELECT", and casts back to "select" 5630 SELECT * from XYZZY, ABC 5631 5632 # list of column names, and mixed case SELECT keyword 5633 Select AA,BB,CC from Sys.dual 5634 5635 # multiple tables 5636 Select A, B, C from Sys.dual, Table2 5637 5638 # invalid SELECT keyword - should fail 5639 Xelect A, B, C from Sys.dual 5640 5641 # incomplete command - should fail 5642 Select 5643 5644 # invalid column name - should fail 5645 Select ^^^ frox Sys.dual 5646 5647 """) 5648 5649 pyparsing_common.number.runTests(""" 5650 100 5651 -100 5652 +100 5653 3.14159 5654 6.02e23 5655 1e-12 5656 """) 5657 5658 # any int or real number, returned as float 5659 pyparsing_common.fnumber.runTests(""" 5660 100 5661 -100 5662 +100 5663 3.14159 5664 6.02e23 5665 1e-12 5666 """) 5667 5668 pyparsing_common.hex_integer.runTests(""" 5669 100 5670 FF 5671 """) 5672 5673 import uuid 5674 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5675 pyparsing_common.uuid.runTests(""" 5676 12345678-1234-5678-1234-567812345678 5677 """) 5678