You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

5742 lines
221 KiB

  1. # module pyparsing.py
  2. #
  3. # Copyright (c) 2003-2018 Paul T. McGuire
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining
  6. # a copy of this software and associated documentation files (the
  7. # "Software"), to deal in the Software without restriction, including
  8. # without limitation the rights to use, copy, modify, merge, publish,
  9. # distribute, sublicense, and/or sell copies of the Software, and to
  10. # permit persons to whom the Software is furnished to do so, subject to
  11. # the following conditions:
  12. #
  13. # The above copyright notice and this permission notice shall be
  14. # included in all copies or substantial portions of the Software.
  15. #
  16. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  17. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  19. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  20. # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21. # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22. # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23. #
  24. __doc__ = \
  25. """
  26. pyparsing module - Classes and methods to define and execute parsing grammars
  27. =============================================================================
  28. The pyparsing module is an alternative approach to creating and executing simple grammars,
  29. vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
  30. don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
  31. provides a library of classes that you use to construct the grammar directly in Python.
  32. Here is a program to parse "Hello, World!" (or any greeting of the form
  33. C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements
  34. (L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
  35. L{Literal} expressions)::
  36. from pyparsing import Word, alphas
  37. # define grammar of a greeting
  38. greet = Word(alphas) + "," + Word(alphas) + "!"
  39. hello = "Hello, World!"
  40. print (hello, "->", greet.parseString(hello))
  41. The program outputs the following::
  42. Hello, World! -> ['Hello', ',', 'World', '!']
  43. The Python representation of the grammar is quite readable, owing to the self-explanatory
  44. class names, and the use of '+', '|' and '^' operators.
  45. The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
  46. object with named attributes.
  47. The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
  48. - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
  49. - quoted strings
  50. - embedded comments
  51. Getting Started -
  52. -----------------
  53. Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing
  54. classes inherit from. Use the docstrings for examples of how to:
  55. - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes
  56. - construct character word-group expressions using the L{Word} class
  57. - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes
  58. - use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones
  59. - associate names with your parsed results using L{ParserElement.setResultsName}
  60. - find some helpful expression short-cuts like L{delimitedList} and L{oneOf}
  61. - find more useful common expressions in the L{pyparsing_common} namespace class
  62. """
  63. __version__ = "2.2.1"
  64. __versionTime__ = "18 Sep 2018 00:49 UTC"
  65. __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
  66. import string
  67. from weakref import ref as wkref
  68. import copy
  69. import sys
  70. import warnings
  71. import re
  72. import sre_constants
  73. import collections
  74. import pprint
  75. import traceback
  76. import types
  77. from datetime import datetime
  78. try:
  79. from _thread import RLock
  80. except ImportError:
  81. from threading import RLock
  82. try:
  83. # Python 3
  84. from collections.abc import Iterable
  85. from collections.abc import MutableMapping
  86. except ImportError:
  87. # Python 2.7
  88. from collections import Iterable
  89. from collections import MutableMapping
  90. try:
  91. from collections import OrderedDict as _OrderedDict
  92. except ImportError:
  93. try:
  94. from ordereddict import OrderedDict as _OrderedDict
  95. except ImportError:
  96. _OrderedDict = None
  97. #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
  98. __all__ = [
  99. 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
  100. 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
  101. 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
  102. 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
  103. 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
  104. 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
  105. 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
  106. 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
  107. 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
  108. 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
  109. 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
  110. 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
  111. 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
  112. 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
  113. 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
  114. 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
  115. 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
  116. 'CloseMatch', 'tokenMap', 'pyparsing_common',
  117. ]
  118. system_version = tuple(sys.version_info)[:3]
  119. PY_3 = system_version[0] == 3
  120. if PY_3:
  121. _MAX_INT = sys.maxsize
  122. basestring = str
  123. unichr = chr
  124. _ustr = str
  125. # build list of single arg builtins, that can be used as parse actions
  126. singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
  127. else:
  128. _MAX_INT = sys.maxint
  129. range = xrange
  130. def _ustr(obj):
  131. """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
  132. str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
  133. then < returns the unicode object | encodes it with the default encoding | ... >.
  134. """
  135. if isinstance(obj,unicode):
  136. return obj
  137. try:
  138. # If this works, then _ustr(obj) has the same behaviour as str(obj), so
  139. # it won't break any existing code.
  140. return str(obj)
  141. except UnicodeEncodeError:
  142. # Else encode it
  143. ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
  144. xmlcharref = Regex(r'&#\d+;')
  145. xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
  146. return xmlcharref.transformString(ret)
  147. # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
  148. singleArgBuiltins = []
  149. import __builtin__
  150. for fname in "sum len sorted reversed list tuple set any all min max".split():
  151. try:
  152. singleArgBuiltins.append(getattr(__builtin__,fname))
  153. except AttributeError:
  154. continue
  155. _generatorType = type((y for y in range(1)))
  156. def _xml_escape(data):
  157. """Escape &, <, >, ", ', etc. in a string of data."""
  158. # ampersand must be replaced first
  159. from_symbols = '&><"\''
  160. to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
  161. for from_,to_ in zip(from_symbols, to_symbols):
  162. data = data.replace(from_, to_)
  163. return data
  164. class _Constants(object):
  165. pass
  166. alphas = string.ascii_uppercase + string.ascii_lowercase
  167. nums = "0123456789"
  168. hexnums = nums + "ABCDEFabcdef"
  169. alphanums = alphas + nums
  170. _bslash = chr(92)
  171. printables = "".join(c for c in string.printable if c not in string.whitespace)
  172. class ParseBaseException(Exception):
  173. """base exception class for all parsing runtime exceptions"""
  174. # Performance tuning: we construct a *lot* of these, so keep this
  175. # constructor as small and fast as possible
  176. def __init__( self, pstr, loc=0, msg=None, elem=None ):
  177. self.loc = loc
  178. if msg is None:
  179. self.msg = pstr
  180. self.pstr = ""
  181. else:
  182. self.msg = msg
  183. self.pstr = pstr
  184. self.parserElement = elem
  185. self.args = (pstr, loc, msg)
  186. @classmethod
  187. def _from_exception(cls, pe):
  188. """
  189. internal factory method to simplify creating one type of ParseException
  190. from another - avoids having __init__ signature conflicts among subclasses
  191. """
  192. return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
  193. def __getattr__( self, aname ):
  194. """supported attributes by name are:
  195. - lineno - returns the line number of the exception text
  196. - col - returns the column number of the exception text
  197. - line - returns the line containing the exception text
  198. """
  199. if( aname == "lineno" ):
  200. return lineno( self.loc, self.pstr )
  201. elif( aname in ("col", "column") ):
  202. return col( self.loc, self.pstr )
  203. elif( aname == "line" ):
  204. return line( self.loc, self.pstr )
  205. else:
  206. raise AttributeError(aname)
  207. def __str__( self ):
  208. return "%s (at char %d), (line:%d, col:%d)" % \
  209. ( self.msg, self.loc, self.lineno, self.column )
  210. def __repr__( self ):
  211. return _ustr(self)
  212. def markInputline( self, markerString = ">!<" ):
  213. """Extracts the exception line from the input string, and marks
  214. the location of the exception with a special symbol.
  215. """
  216. line_str = self.line
  217. line_column = self.column - 1
  218. if markerString:
  219. line_str = "".join((line_str[:line_column],
  220. markerString, line_str[line_column:]))
  221. return line_str.strip()
  222. def __dir__(self):
  223. return "lineno col line".split() + dir(type(self))
  224. class ParseException(ParseBaseException):
  225. """
  226. Exception thrown when parse expressions don't match class;
  227. supported attributes by name are:
  228. - lineno - returns the line number of the exception text
  229. - col - returns the column number of the exception text
  230. - line - returns the line containing the exception text
  231. Example::
  232. try:
  233. Word(nums).setName("integer").parseString("ABC")
  234. except ParseException as pe:
  235. print(pe)
  236. print("column: {}".format(pe.col))
  237. prints::
  238. Expected integer (at char 0), (line:1, col:1)
  239. column: 1
  240. """
  241. pass
  242. class ParseFatalException(ParseBaseException):
  243. """user-throwable exception thrown when inconsistent parse content
  244. is found; stops all parsing immediately"""
  245. pass
  246. class ParseSyntaxException(ParseFatalException):
  247. """just like L{ParseFatalException}, but thrown internally when an
  248. L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
  249. immediately because an unbacktrackable syntax error has been found"""
  250. pass
  251. #~ class ReparseException(ParseBaseException):
  252. #~ """Experimental class - parse actions can raise this exception to cause
  253. #~ pyparsing to reparse the input string:
  254. #~ - with a modified input string, and/or
  255. #~ - with a modified start location
  256. #~ Set the values of the ReparseException in the constructor, and raise the
  257. #~ exception in a parse action to cause pyparsing to use the new string/location.
  258. #~ Setting the values as None causes no change to be made.
  259. #~ """
  260. #~ def __init_( self, newstring, restartLoc ):
  261. #~ self.newParseText = newstring
  262. #~ self.reparseLoc = restartLoc
  263. class RecursiveGrammarException(Exception):
  264. """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
  265. def __init__( self, parseElementList ):
  266. self.parseElementTrace = parseElementList
  267. def __str__( self ):
  268. return "RecursiveGrammarException: %s" % self.parseElementTrace
  269. class _ParseResultsWithOffset(object):
  270. def __init__(self,p1,p2):
  271. self.tup = (p1,p2)
  272. def __getitem__(self,i):
  273. return self.tup[i]
  274. def __repr__(self):
  275. return repr(self.tup[0])
  276. def setOffset(self,i):
  277. self.tup = (self.tup[0],i)
  278. class ParseResults(object):
  279. """
  280. Structured parse results, to provide multiple means of access to the parsed data:
  281. - as a list (C{len(results)})
  282. - by list index (C{results[0], results[1]}, etc.)
  283. - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
  284. Example::
  285. integer = Word(nums)
  286. date_str = (integer.setResultsName("year") + '/'
  287. + integer.setResultsName("month") + '/'
  288. + integer.setResultsName("day"))
  289. # equivalent form:
  290. # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  291. # parseString returns a ParseResults object
  292. result = date_str.parseString("1999/12/31")
  293. def test(s, fn=repr):
  294. print("%s -> %s" % (s, fn(eval(s))))
  295. test("list(result)")
  296. test("result[0]")
  297. test("result['month']")
  298. test("result.day")
  299. test("'month' in result")
  300. test("'minutes' in result")
  301. test("result.dump()", str)
  302. prints::
  303. list(result) -> ['1999', '/', '12', '/', '31']
  304. result[0] -> '1999'
  305. result['month'] -> '12'
  306. result.day -> '31'
  307. 'month' in result -> True
  308. 'minutes' in result -> False
  309. result.dump() -> ['1999', '/', '12', '/', '31']
  310. - day: 31
  311. - month: 12
  312. - year: 1999
  313. """
  314. def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
  315. if isinstance(toklist, cls):
  316. return toklist
  317. retobj = object.__new__(cls)
  318. retobj.__doinit = True
  319. return retobj
  320. # Performance tuning: we construct a *lot* of these, so keep this
  321. # constructor as small and fast as possible
  322. def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
  323. if self.__doinit:
  324. self.__doinit = False
  325. self.__name = None
  326. self.__parent = None
  327. self.__accumNames = {}
  328. self.__asList = asList
  329. self.__modal = modal
  330. if toklist is None:
  331. toklist = []
  332. if isinstance(toklist, list):
  333. self.__toklist = toklist[:]
  334. elif isinstance(toklist, _generatorType):
  335. self.__toklist = list(toklist)
  336. else:
  337. self.__toklist = [toklist]
  338. self.__tokdict = dict()
  339. if name is not None and name:
  340. if not modal:
  341. self.__accumNames[name] = 0
  342. if isinstance(name,int):
  343. name = _ustr(name) # will always return a str, but use _ustr for consistency
  344. self.__name = name
  345. if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
  346. if isinstance(toklist,basestring):
  347. toklist = [ toklist ]
  348. if asList:
  349. if isinstance(toklist,ParseResults):
  350. self[name] = _ParseResultsWithOffset(toklist.copy(),0)
  351. else:
  352. self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
  353. self[name].__name = name
  354. else:
  355. try:
  356. self[name] = toklist[0]
  357. except (KeyError,TypeError,IndexError):
  358. self[name] = toklist
  359. def __getitem__( self, i ):
  360. if isinstance( i, (int,slice) ):
  361. return self.__toklist[i]
  362. else:
  363. if i not in self.__accumNames:
  364. return self.__tokdict[i][-1][0]
  365. else:
  366. return ParseResults([ v[0] for v in self.__tokdict[i] ])
  367. def __setitem__( self, k, v, isinstance=isinstance ):
  368. if isinstance(v,_ParseResultsWithOffset):
  369. self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
  370. sub = v[0]
  371. elif isinstance(k,(int,slice)):
  372. self.__toklist[k] = v
  373. sub = v
  374. else:
  375. self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
  376. sub = v
  377. if isinstance(sub,ParseResults):
  378. sub.__parent = wkref(self)
  379. def __delitem__( self, i ):
  380. if isinstance(i,(int,slice)):
  381. mylen = len( self.__toklist )
  382. del self.__toklist[i]
  383. # convert int to slice
  384. if isinstance(i, int):
  385. if i < 0:
  386. i += mylen
  387. i = slice(i, i+1)
  388. # get removed indices
  389. removed = list(range(*i.indices(mylen)))
  390. removed.reverse()
  391. # fixup indices in token dictionary
  392. for name,occurrences in self.__tokdict.items():
  393. for j in removed:
  394. for k, (value, position) in enumerate(occurrences):
  395. occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
  396. else:
  397. del self.__tokdict[i]
  398. def __contains__( self, k ):
  399. return k in self.__tokdict
  400. def __len__( self ): return len( self.__toklist )
  401. def __bool__(self): return ( not not self.__toklist )
  402. __nonzero__ = __bool__
  403. def __iter__( self ): return iter( self.__toklist )
  404. def __reversed__( self ): return iter( self.__toklist[::-1] )
  405. def _iterkeys( self ):
  406. if hasattr(self.__tokdict, "iterkeys"):
  407. return self.__tokdict.iterkeys()
  408. else:
  409. return iter(self.__tokdict)
  410. def _itervalues( self ):
  411. return (self[k] for k in self._iterkeys())
  412. def _iteritems( self ):
  413. return ((k, self[k]) for k in self._iterkeys())
  414. if PY_3:
  415. keys = _iterkeys
  416. """Returns an iterator of all named result keys (Python 3.x only)."""
  417. values = _itervalues
  418. """Returns an iterator of all named result values (Python 3.x only)."""
  419. items = _iteritems
  420. """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
  421. else:
  422. iterkeys = _iterkeys
  423. """Returns an iterator of all named result keys (Python 2.x only)."""
  424. itervalues = _itervalues
  425. """Returns an iterator of all named result values (Python 2.x only)."""
  426. iteritems = _iteritems
  427. """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
  428. def keys( self ):
  429. """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
  430. return list(self.iterkeys())
  431. def values( self ):
  432. """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
  433. return list(self.itervalues())
  434. def items( self ):
  435. """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
  436. return list(self.iteritems())
  437. def haskeys( self ):
  438. """Since keys() returns an iterator, this method is helpful in bypassing
  439. code that looks for the existence of any defined results names."""
  440. return bool(self.__tokdict)
  441. def pop( self, *args, **kwargs):
  442. """
  443. Removes and returns item at specified index (default=C{last}).
  444. Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
  445. argument or an integer argument, it will use C{list} semantics
  446. and pop tokens from the list of parsed tokens. If passed a
  447. non-integer argument (most likely a string), it will use C{dict}
  448. semantics and pop the corresponding value from any defined
  449. results names. A second default return value argument is
  450. supported, just as in C{dict.pop()}.
  451. Example::
  452. def remove_first(tokens):
  453. tokens.pop(0)
  454. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  455. print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
  456. label = Word(alphas)
  457. patt = label("LABEL") + OneOrMore(Word(nums))
  458. print(patt.parseString("AAB 123 321").dump())
  459. # Use pop() in a parse action to remove named result (note that corresponding value is not
  460. # removed from list form of results)
  461. def remove_LABEL(tokens):
  462. tokens.pop("LABEL")
  463. return tokens
  464. patt.addParseAction(remove_LABEL)
  465. print(patt.parseString("AAB 123 321").dump())
  466. prints::
  467. ['AAB', '123', '321']
  468. - LABEL: AAB
  469. ['AAB', '123', '321']
  470. """
  471. if not args:
  472. args = [-1]
  473. for k,v in kwargs.items():
  474. if k == 'default':
  475. args = (args[0], v)
  476. else:
  477. raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
  478. if (isinstance(args[0], int) or
  479. len(args) == 1 or
  480. args[0] in self):
  481. index = args[0]
  482. ret = self[index]
  483. del self[index]
  484. return ret
  485. else:
  486. defaultvalue = args[1]
  487. return defaultvalue
  488. def get(self, key, defaultValue=None):
  489. """
  490. Returns named result matching the given key, or if there is no
  491. such name, then returns the given C{defaultValue} or C{None} if no
  492. C{defaultValue} is specified.
  493. Similar to C{dict.get()}.
  494. Example::
  495. integer = Word(nums)
  496. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  497. result = date_str.parseString("1999/12/31")
  498. print(result.get("year")) # -> '1999'
  499. print(result.get("hour", "not specified")) # -> 'not specified'
  500. print(result.get("hour")) # -> None
  501. """
  502. if key in self:
  503. return self[key]
  504. else:
  505. return defaultValue
  506. def insert( self, index, insStr ):
  507. """
  508. Inserts new element at location index in the list of parsed tokens.
  509. Similar to C{list.insert()}.
  510. Example::
  511. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  512. # use a parse action to insert the parse location in the front of the parsed results
  513. def insert_locn(locn, tokens):
  514. tokens.insert(0, locn)
  515. print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
  516. """
  517. self.__toklist.insert(index, insStr)
  518. # fixup indices in token dictionary
  519. for name,occurrences in self.__tokdict.items():
  520. for k, (value, position) in enumerate(occurrences):
  521. occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
  522. def append( self, item ):
  523. """
  524. Add single element to end of ParseResults list of elements.
  525. Example::
  526. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  527. # use a parse action to compute the sum of the parsed integers, and add it to the end
  528. def append_sum(tokens):
  529. tokens.append(sum(map(int, tokens)))
  530. print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
  531. """
  532. self.__toklist.append(item)
  533. def extend( self, itemseq ):
  534. """
  535. Add sequence of elements to end of ParseResults list of elements.
  536. Example::
  537. patt = OneOrMore(Word(alphas))
  538. # use a parse action to append the reverse of the matched strings, to make a palindrome
  539. def make_palindrome(tokens):
  540. tokens.extend(reversed([t[::-1] for t in tokens]))
  541. return ''.join(tokens)
  542. print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
  543. """
  544. if isinstance(itemseq, ParseResults):
  545. self += itemseq
  546. else:
  547. self.__toklist.extend(itemseq)
  548. def clear( self ):
  549. """
  550. Clear all elements and results names.
  551. """
  552. del self.__toklist[:]
  553. self.__tokdict.clear()
  554. def __getattr__( self, name ):
  555. try:
  556. return self[name]
  557. except KeyError:
  558. return ""
  559. if name in self.__tokdict:
  560. if name not in self.__accumNames:
  561. return self.__tokdict[name][-1][0]
  562. else:
  563. return ParseResults([ v[0] for v in self.__tokdict[name] ])
  564. else:
  565. return ""
  566. def __add__( self, other ):
  567. ret = self.copy()
  568. ret += other
  569. return ret
  570. def __iadd__( self, other ):
  571. if other.__tokdict:
  572. offset = len(self.__toklist)
  573. addoffset = lambda a: offset if a<0 else a+offset
  574. otheritems = other.__tokdict.items()
  575. otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
  576. for (k,vlist) in otheritems for v in vlist]
  577. for k,v in otherdictitems:
  578. self[k] = v
  579. if isinstance(v[0],ParseResults):
  580. v[0].__parent = wkref(self)
  581. self.__toklist += other.__toklist
  582. self.__accumNames.update( other.__accumNames )
  583. return self
  584. def __radd__(self, other):
  585. if isinstance(other,int) and other == 0:
  586. # useful for merging many ParseResults using sum() builtin
  587. return self.copy()
  588. else:
  589. # this may raise a TypeError - so be it
  590. return other + self
  591. def __repr__( self ):
  592. return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
  593. def __str__( self ):
  594. return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
  595. def _asStringList( self, sep='' ):
  596. out = []
  597. for item in self.__toklist:
  598. if out and sep:
  599. out.append(sep)
  600. if isinstance( item, ParseResults ):
  601. out += item._asStringList()
  602. else:
  603. out.append( _ustr(item) )
  604. return out
  605. def asList( self ):
  606. """
  607. Returns the parse results as a nested list of matching tokens, all converted to strings.
  608. Example::
  609. patt = OneOrMore(Word(alphas))
  610. result = patt.parseString("sldkj lsdkj sldkj")
  611. # even though the result prints in string-like form, it is actually a pyparsing ParseResults
  612. print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
  613. # Use asList() to create an actual list
  614. result_list = result.asList()
  615. print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
  616. """
  617. return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
  618. def asDict( self ):
  619. """
  620. Returns the named parse results as a nested dictionary.
  621. Example::
  622. integer = Word(nums)
  623. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  624. result = date_str.parseString('12/31/1999')
  625. print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
  626. result_dict = result.asDict()
  627. print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
  628. # even though a ParseResults supports dict-like access, sometime you just need to have a dict
  629. import json
  630. print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
  631. print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
  632. """
  633. if PY_3:
  634. item_fn = self.items
  635. else:
  636. item_fn = self.iteritems
  637. def toItem(obj):
  638. if isinstance(obj, ParseResults):
  639. if obj.haskeys():
  640. return obj.asDict()
  641. else:
  642. return [toItem(v) for v in obj]
  643. else:
  644. return obj
  645. return dict((k,toItem(v)) for k,v in item_fn())
  646. def copy( self ):
  647. """
  648. Returns a new copy of a C{ParseResults} object.
  649. """
  650. ret = ParseResults( self.__toklist )
  651. ret.__tokdict = self.__tokdict.copy()
  652. ret.__parent = self.__parent
  653. ret.__accumNames.update( self.__accumNames )
  654. ret.__name = self.__name
  655. return ret
  656. def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
  657. """
  658. (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
  659. """
  660. nl = "\n"
  661. out = []
  662. namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
  663. for v in vlist)
  664. nextLevelIndent = indent + " "
  665. # collapse out indents if formatting is not desired
  666. if not formatted:
  667. indent = ""
  668. nextLevelIndent = ""
  669. nl = ""
  670. selfTag = None
  671. if doctag is not None:
  672. selfTag = doctag
  673. else:
  674. if self.__name:
  675. selfTag = self.__name
  676. if not selfTag:
  677. if namedItemsOnly:
  678. return ""
  679. else:
  680. selfTag = "ITEM"
  681. out += [ nl, indent, "<", selfTag, ">" ]
  682. for i,res in enumerate(self.__toklist):
  683. if isinstance(res,ParseResults):
  684. if i in namedItems:
  685. out += [ res.asXML(namedItems[i],
  686. namedItemsOnly and doctag is None,
  687. nextLevelIndent,
  688. formatted)]
  689. else:
  690. out += [ res.asXML(None,
  691. namedItemsOnly and doctag is None,
  692. nextLevelIndent,
  693. formatted)]
  694. else:
  695. # individual token, see if there is a name for it
  696. resTag = None
  697. if i in namedItems:
  698. resTag = namedItems[i]
  699. if not resTag:
  700. if namedItemsOnly:
  701. continue
  702. else:
  703. resTag = "ITEM"
  704. xmlBodyText = _xml_escape(_ustr(res))
  705. out += [ nl, nextLevelIndent, "<", resTag, ">",
  706. xmlBodyText,
  707. "</", resTag, ">" ]
  708. out += [ nl, indent, "</", selfTag, ">" ]
  709. return "".join(out)
  710. def __lookup(self,sub):
  711. for k,vlist in self.__tokdict.items():
  712. for v,loc in vlist:
  713. if sub is v:
  714. return k
  715. return None
  716. def getName(self):
  717. r"""
  718. Returns the results name for this token expression. Useful when several
  719. different expressions might match at a particular location.
  720. Example::
  721. integer = Word(nums)
  722. ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
  723. house_number_expr = Suppress('#') + Word(nums, alphanums)
  724. user_data = (Group(house_number_expr)("house_number")
  725. | Group(ssn_expr)("ssn")
  726. | Group(integer)("age"))
  727. user_info = OneOrMore(user_data)
  728. result = user_info.parseString("22 111-22-3333 #221B")
  729. for item in result:
  730. print(item.getName(), ':', item[0])
  731. prints::
  732. age : 22
  733. ssn : 111-22-3333
  734. house_number : 221B
  735. """
  736. if self.__name:
  737. return self.__name
  738. elif self.__parent:
  739. par = self.__parent()
  740. if par:
  741. return par.__lookup(self)
  742. else:
  743. return None
  744. elif (len(self) == 1 and
  745. len(self.__tokdict) == 1 and
  746. next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
  747. return next(iter(self.__tokdict.keys()))
  748. else:
  749. return None
  750. def dump(self, indent='', depth=0, full=True):
  751. """
  752. Diagnostic method for listing out the contents of a C{ParseResults}.
  753. Accepts an optional C{indent} argument so that this string can be embedded
  754. in a nested display of other data.
  755. Example::
  756. integer = Word(nums)
  757. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  758. result = date_str.parseString('12/31/1999')
  759. print(result.dump())
  760. prints::
  761. ['12', '/', '31', '/', '1999']
  762. - day: 1999
  763. - month: 31
  764. - year: 12
  765. """
  766. out = []
  767. NL = '\n'
  768. out.append( indent+_ustr(self.asList()) )
  769. if full:
  770. if self.haskeys():
  771. items = sorted((str(k), v) for k,v in self.items())
  772. for k,v in items:
  773. if out:
  774. out.append(NL)
  775. out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
  776. if isinstance(v,ParseResults):
  777. if v:
  778. out.append( v.dump(indent,depth+1) )
  779. else:
  780. out.append(_ustr(v))
  781. else:
  782. out.append(repr(v))
  783. elif any(isinstance(vv,ParseResults) for vv in self):
  784. v = self
  785. for i,vv in enumerate(v):
  786. if isinstance(vv,ParseResults):
  787. out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
  788. else:
  789. out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
  790. return "".join(out)
  791. def pprint(self, *args, **kwargs):
  792. """
  793. Pretty-printer for parsed results as a list, using the C{pprint} module.
  794. Accepts additional positional or keyword args as defined for the
  795. C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
  796. Example::
  797. ident = Word(alphas, alphanums)
  798. num = Word(nums)
  799. func = Forward()
  800. term = ident | num | Group('(' + func + ')')
  801. func <<= ident + Group(Optional(delimitedList(term)))
  802. result = func.parseString("fna a,b,(fnb c,d,200),100")
  803. result.pprint(width=40)
  804. prints::
  805. ['fna',
  806. ['a',
  807. 'b',
  808. ['(', 'fnb', ['c', 'd', '200'], ')'],
  809. '100']]
  810. """
  811. pprint.pprint(self.asList(), *args, **kwargs)
  812. # add support for pickle protocol
  813. def __getstate__(self):
  814. return ( self.__toklist,
  815. ( self.__tokdict.copy(),
  816. self.__parent is not None and self.__parent() or None,
  817. self.__accumNames,
  818. self.__name ) )
  819. def __setstate__(self,state):
  820. self.__toklist = state[0]
  821. (self.__tokdict,
  822. par,
  823. inAccumNames,
  824. self.__name) = state[1]
  825. self.__accumNames = {}
  826. self.__accumNames.update(inAccumNames)
  827. if par is not None:
  828. self.__parent = wkref(par)
  829. else:
  830. self.__parent = None
  831. def __getnewargs__(self):
  832. return self.__toklist, self.__name, self.__asList, self.__modal
  833. def __dir__(self):
  834. return (dir(type(self)) + list(self.keys()))
  835. MutableMapping.register(ParseResults)
  836. def col (loc,strg):
  837. """Returns current column within a string, counting newlines as line separators.
  838. The first column is number 1.
  839. Note: the default parsing behavior is to expand tabs in the input string
  840. before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
  841. on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
  842. consistent view of the parsed string, the parse location, and line and column
  843. positions within the parsed string.
  844. """
  845. s = strg
  846. return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
  847. def lineno(loc,strg):
  848. """Returns current line number within a string, counting newlines as line separators.
  849. The first line is number 1.
  850. Note: the default parsing behavior is to expand tabs in the input string
  851. before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
  852. on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
  853. consistent view of the parsed string, the parse location, and line and column
  854. positions within the parsed string.
  855. """
  856. return strg.count("\n",0,loc) + 1
  857. def line( loc, strg ):
  858. """Returns the line of text containing loc within a string, counting newlines as line separators.
  859. """
  860. lastCR = strg.rfind("\n", 0, loc)
  861. nextCR = strg.find("\n", loc)
  862. if nextCR >= 0:
  863. return strg[lastCR+1:nextCR]
  864. else:
  865. return strg[lastCR+1:]
  866. def _defaultStartDebugAction( instring, loc, expr ):
  867. print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
  868. def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
  869. print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
  870. def _defaultExceptionDebugAction( instring, loc, expr, exc ):
  871. print ("Exception raised:" + _ustr(exc))
  872. def nullDebugAction(*args):
  873. """'Do-nothing' debug action, to suppress debugging output during parsing."""
  874. pass
  875. # Only works on Python 3.x - nonlocal is toxic to Python 2 installs
  876. #~ 'decorator to trim function calls to match the arity of the target'
  877. #~ def _trim_arity(func, maxargs=3):
  878. #~ if func in singleArgBuiltins:
  879. #~ return lambda s,l,t: func(t)
  880. #~ limit = 0
  881. #~ foundArity = False
  882. #~ def wrapper(*args):
  883. #~ nonlocal limit,foundArity
  884. #~ while 1:
  885. #~ try:
  886. #~ ret = func(*args[limit:])
  887. #~ foundArity = True
  888. #~ return ret
  889. #~ except TypeError:
  890. #~ if limit == maxargs or foundArity:
  891. #~ raise
  892. #~ limit += 1
  893. #~ continue
  894. #~ return wrapper
  895. # this version is Python 2.x-3.x cross-compatible
  896. 'decorator to trim function calls to match the arity of the target'
  897. def _trim_arity(func, maxargs=2):
  898. if func in singleArgBuiltins:
  899. return lambda s,l,t: func(t)
  900. limit = [0]
  901. foundArity = [False]
  902. # traceback return data structure changed in Py3.5 - normalize back to plain tuples
  903. if system_version[:2] >= (3,5):
  904. def extract_stack(limit=0):
  905. # special handling for Python 3.5.0 - extra deep call stack by 1
  906. offset = -3 if system_version == (3,5,0) else -2
  907. frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
  908. return [frame_summary[:2]]
  909. def extract_tb(tb, limit=0):
  910. frames = traceback.extract_tb(tb, limit=limit)
  911. frame_summary = frames[-1]
  912. return [frame_summary[:2]]
  913. else:
  914. extract_stack = traceback.extract_stack
  915. extract_tb = traceback.extract_tb
  916. # synthesize what would be returned by traceback.extract_stack at the call to
  917. # user's parse action 'func', so that we don't incur call penalty at parse time
  918. LINE_DIFF = 6
  919. # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
  920. # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
  921. this_line = extract_stack(limit=2)[-1]
  922. pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
  923. def wrapper(*args):
  924. while 1:
  925. try:
  926. ret = func(*args[limit[0]:])
  927. foundArity[0] = True
  928. return ret
  929. except TypeError:
  930. # re-raise TypeErrors if they did not come from our arity testing
  931. if foundArity[0]:
  932. raise
  933. else:
  934. try:
  935. tb = sys.exc_info()[-1]
  936. if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
  937. raise
  938. finally:
  939. del tb
  940. if limit[0] <= maxargs:
  941. limit[0] += 1
  942. continue
  943. raise
  944. # copy func name to wrapper for sensible debug output
  945. func_name = "<parse action>"
  946. try:
  947. func_name = getattr(func, '__name__',
  948. getattr(func, '__class__').__name__)
  949. except Exception:
  950. func_name = str(func)
  951. wrapper.__name__ = func_name
  952. return wrapper
  953. class ParserElement(object):
  954. """Abstract base level parser element class."""
  955. DEFAULT_WHITE_CHARS = " \n\t\r"
  956. verbose_stacktrace = False
  957. @staticmethod
  958. def setDefaultWhitespaceChars( chars ):
  959. r"""
  960. Overrides the default whitespace chars
  961. Example::
  962. # default whitespace chars are space, <TAB> and newline
  963. OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
  964. # change to just treat newline as significant
  965. ParserElement.setDefaultWhitespaceChars(" \t")
  966. OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
  967. """
  968. ParserElement.DEFAULT_WHITE_CHARS = chars
  969. @staticmethod
  970. def inlineLiteralsUsing(cls):
  971. """
  972. Set class to be used for inclusion of string literals into a parser.
  973. Example::
  974. # default literal class used is Literal
  975. integer = Word(nums)
  976. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  977. date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
  978. # change to Suppress
  979. ParserElement.inlineLiteralsUsing(Suppress)
  980. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  981. date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
  982. """
  983. ParserElement._literalStringClass = cls
  984. def __init__( self, savelist=False ):
  985. self.parseAction = list()
  986. self.failAction = None
  987. #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
  988. self.strRepr = None
  989. self.resultsName = None
  990. self.saveAsList = savelist
  991. self.skipWhitespace = True
  992. self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
  993. self.copyDefaultWhiteChars = True
  994. self.mayReturnEmpty = False # used when checking for left-recursion
  995. self.keepTabs = False
  996. self.ignoreExprs = list()
  997. self.debug = False
  998. self.streamlined = False
  999. self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
  1000. self.errmsg = ""
  1001. self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
  1002. self.debugActions = ( None, None, None ) #custom debug actions
  1003. self.re = None
  1004. self.callPreparse = True # used to avoid redundant calls to preParse
  1005. self.callDuringTry = False
  1006. def copy( self ):
  1007. """
  1008. Make a copy of this C{ParserElement}. Useful for defining different parse actions
  1009. for the same parsing pattern, using copies of the original parse element.
  1010. Example::
  1011. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1012. integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
  1013. integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
  1014. print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
  1015. prints::
  1016. [5120, 100, 655360, 268435456]
  1017. Equivalent form of C{expr.copy()} is just C{expr()}::
  1018. integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
  1019. """
  1020. cpy = copy.copy( self )
  1021. cpy.parseAction = self.parseAction[:]
  1022. cpy.ignoreExprs = self.ignoreExprs[:]
  1023. if self.copyDefaultWhiteChars:
  1024. cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
  1025. return cpy
  1026. def setName( self, name ):
  1027. """
  1028. Define name for this expression, makes debugging and exception messages clearer.
  1029. Example::
  1030. Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
  1031. Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
  1032. """
  1033. self.name = name
  1034. self.errmsg = "Expected " + self.name
  1035. if hasattr(self,"exception"):
  1036. self.exception.msg = self.errmsg
  1037. return self
  1038. def setResultsName( self, name, listAllMatches=False ):
  1039. """
  1040. Define name for referencing matching tokens as a nested attribute
  1041. of the returned parse results.
  1042. NOTE: this returns a *copy* of the original C{ParserElement} object;
  1043. this is so that the client can define a basic element, such as an
  1044. integer, and reference it in multiple places with different names.
  1045. You can also set results names using the abbreviated syntax,
  1046. C{expr("name")} in place of C{expr.setResultsName("name")} -
  1047. see L{I{__call__}<__call__>}.
  1048. Example::
  1049. date_str = (integer.setResultsName("year") + '/'
  1050. + integer.setResultsName("month") + '/'
  1051. + integer.setResultsName("day"))
  1052. # equivalent form:
  1053. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  1054. """
  1055. newself = self.copy()
  1056. if name.endswith("*"):
  1057. name = name[:-1]
  1058. listAllMatches=True
  1059. newself.resultsName = name
  1060. newself.modalResults = not listAllMatches
  1061. return newself
  1062. def setBreak(self,breakFlag = True):
  1063. """Method to invoke the Python pdb debugger when this element is
  1064. about to be parsed. Set C{breakFlag} to True to enable, False to
  1065. disable.
  1066. """
  1067. if breakFlag:
  1068. _parseMethod = self._parse
  1069. def breaker(instring, loc, doActions=True, callPreParse=True):
  1070. import pdb
  1071. pdb.set_trace()
  1072. return _parseMethod( instring, loc, doActions, callPreParse )
  1073. breaker._originalParseMethod = _parseMethod
  1074. self._parse = breaker
  1075. else:
  1076. if hasattr(self._parse,"_originalParseMethod"):
  1077. self._parse = self._parse._originalParseMethod
  1078. return self
  1079. def setParseAction( self, *fns, **kwargs ):
  1080. """
  1081. Define one or more actions to perform when successfully matching parse element definition.
  1082. Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
  1083. C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
  1084. - s = the original string being parsed (see note below)
  1085. - loc = the location of the matching substring
  1086. - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
  1087. If the functions in fns modify the tokens, they can return them as the return
  1088. value from fn, and the modified list of tokens will replace the original.
  1089. Otherwise, fn does not need to return any value.
  1090. Optional keyword arguments:
  1091. - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
  1092. Note: the default parsing behavior is to expand tabs in the input string
  1093. before starting the parsing process. See L{I{parseString}<parseString>} for more information
  1094. on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
  1095. consistent view of the parsed string, the parse location, and line and column
  1096. positions within the parsed string.
  1097. Example::
  1098. integer = Word(nums)
  1099. date_str = integer + '/' + integer + '/' + integer
  1100. date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
  1101. # use parse action to convert to ints at parse time
  1102. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1103. date_str = integer + '/' + integer + '/' + integer
  1104. # note that integer fields are now ints, not strings
  1105. date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
  1106. """
  1107. self.parseAction = list(map(_trim_arity, list(fns)))
  1108. self.callDuringTry = kwargs.get("callDuringTry", False)
  1109. return self
  1110. def addParseAction( self, *fns, **kwargs ):
  1111. """
  1112. Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
  1113. See examples in L{I{copy}<copy>}.
  1114. """
  1115. self.parseAction += list(map(_trim_arity, list(fns)))
  1116. self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
  1117. return self
  1118. def addCondition(self, *fns, **kwargs):
  1119. """Add a boolean predicate function to expression's list of parse actions. See
  1120. L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
  1121. functions passed to C{addCondition} need to return boolean success/fail of the condition.
  1122. Optional keyword arguments:
  1123. - message = define a custom message to be used in the raised exception
  1124. - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
  1125. Example::
  1126. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1127. year_int = integer.copy()
  1128. year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
  1129. date_str = year_int + '/' + integer + '/' + integer
  1130. result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
  1131. """
  1132. msg = kwargs.get("message", "failed user-defined condition")
  1133. exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
  1134. for fn in fns:
  1135. def pa(s,l,t):
  1136. if not bool(_trim_arity(fn)(s,l,t)):
  1137. raise exc_type(s,l,msg)
  1138. self.parseAction.append(pa)
  1139. self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
  1140. return self
  1141. def setFailAction( self, fn ):
  1142. """Define action to perform if parsing fails at this expression.
  1143. Fail acton fn is a callable function that takes the arguments
  1144. C{fn(s,loc,expr,err)} where:
  1145. - s = string being parsed
  1146. - loc = location where expression match was attempted and failed
  1147. - expr = the parse expression that failed
  1148. - err = the exception thrown
  1149. The function returns no value. It may throw C{L{ParseFatalException}}
  1150. if it is desired to stop parsing immediately."""
  1151. self.failAction = fn
  1152. return self
  1153. def _skipIgnorables( self, instring, loc ):
  1154. exprsFound = True
  1155. while exprsFound:
  1156. exprsFound = False
  1157. for e in self.ignoreExprs:
  1158. try:
  1159. while 1:
  1160. loc,dummy = e._parse( instring, loc )
  1161. exprsFound = True
  1162. except ParseException:
  1163. pass
  1164. return loc
  1165. def preParse( self, instring, loc ):
  1166. if self.ignoreExprs:
  1167. loc = self._skipIgnorables( instring, loc )
  1168. if self.skipWhitespace:
  1169. wt = self.whiteChars
  1170. instrlen = len(instring)
  1171. while loc < instrlen and instring[loc] in wt:
  1172. loc += 1
  1173. return loc
  1174. def parseImpl( self, instring, loc, doActions=True ):
  1175. return loc, []
  1176. def postParse( self, instring, loc, tokenlist ):
  1177. return tokenlist
  1178. #~ @profile
  1179. def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
  1180. debugging = ( self.debug ) #and doActions )
  1181. if debugging or self.failAction:
  1182. #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
  1183. if (self.debugActions[0] ):
  1184. self.debugActions[0]( instring, loc, self )
  1185. if callPreParse and self.callPreparse:
  1186. preloc = self.preParse( instring, loc )
  1187. else:
  1188. preloc = loc
  1189. tokensStart = preloc
  1190. try:
  1191. try:
  1192. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1193. except IndexError:
  1194. raise ParseException( instring, len(instring), self.errmsg, self )
  1195. except ParseBaseException as err:
  1196. #~ print ("Exception raised:", err)
  1197. if self.debugActions[2]:
  1198. self.debugActions[2]( instring, tokensStart, self, err )
  1199. if self.failAction:
  1200. self.failAction( instring, tokensStart, self, err )
  1201. raise
  1202. else:
  1203. if callPreParse and self.callPreparse:
  1204. preloc = self.preParse( instring, loc )
  1205. else:
  1206. preloc = loc
  1207. tokensStart = preloc
  1208. if self.mayIndexError or preloc >= len(instring):
  1209. try:
  1210. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1211. except IndexError:
  1212. raise ParseException( instring, len(instring), self.errmsg, self )
  1213. else:
  1214. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1215. tokens = self.postParse( instring, loc, tokens )
  1216. retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
  1217. if self.parseAction and (doActions or self.callDuringTry):
  1218. if debugging:
  1219. try:
  1220. for fn in self.parseAction:
  1221. tokens = fn( instring, tokensStart, retTokens )
  1222. if tokens is not None:
  1223. retTokens = ParseResults( tokens,
  1224. self.resultsName,
  1225. asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
  1226. modal=self.modalResults )
  1227. except ParseBaseException as err:
  1228. #~ print "Exception raised in user parse action:", err
  1229. if (self.debugActions[2] ):
  1230. self.debugActions[2]( instring, tokensStart, self, err )
  1231. raise
  1232. else:
  1233. for fn in self.parseAction:
  1234. tokens = fn( instring, tokensStart, retTokens )
  1235. if tokens is not None:
  1236. retTokens = ParseResults( tokens,
  1237. self.resultsName,
  1238. asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
  1239. modal=self.modalResults )
  1240. if debugging:
  1241. #~ print ("Matched",self,"->",retTokens.asList())
  1242. if (self.debugActions[1] ):
  1243. self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
  1244. return loc, retTokens
  1245. def tryParse( self, instring, loc ):
  1246. try:
  1247. return self._parse( instring, loc, doActions=False )[0]
  1248. except ParseFatalException:
  1249. raise ParseException( instring, loc, self.errmsg, self)
  1250. def canParseNext(self, instring, loc):
  1251. try:
  1252. self.tryParse(instring, loc)
  1253. except (ParseException, IndexError):
  1254. return False
  1255. else:
  1256. return True
  1257. class _UnboundedCache(object):
  1258. def __init__(self):
  1259. cache = {}
  1260. self.not_in_cache = not_in_cache = object()
  1261. def get(self, key):
  1262. return cache.get(key, not_in_cache)
  1263. def set(self, key, value):
  1264. cache[key] = value
  1265. def clear(self):
  1266. cache.clear()
  1267. def cache_len(self):
  1268. return len(cache)
  1269. self.get = types.MethodType(get, self)
  1270. self.set = types.MethodType(set, self)
  1271. self.clear = types.MethodType(clear, self)
  1272. self.__len__ = types.MethodType(cache_len, self)
  1273. if _OrderedDict is not None:
  1274. class _FifoCache(object):
  1275. def __init__(self, size):
  1276. self.not_in_cache = not_in_cache = object()
  1277. cache = _OrderedDict()
  1278. def get(self, key):
  1279. return cache.get(key, not_in_cache)
  1280. def set(self, key, value):
  1281. cache[key] = value
  1282. while len(cache) > size:
  1283. try:
  1284. cache.popitem(False)
  1285. except KeyError:
  1286. pass
  1287. def clear(self):
  1288. cache.clear()
  1289. def cache_len(self):
  1290. return len(cache)
  1291. self.get = types.MethodType(get, self)
  1292. self.set = types.MethodType(set, self)
  1293. self.clear = types.MethodType(clear, self)
  1294. self.__len__ = types.MethodType(cache_len, self)
  1295. else:
  1296. class _FifoCache(object):
  1297. def __init__(self, size):
  1298. self.not_in_cache = not_in_cache = object()
  1299. cache = {}
  1300. key_fifo = collections.deque([], size)
  1301. def get(self, key):
  1302. return cache.get(key, not_in_cache)
  1303. def set(self, key, value):
  1304. cache[key] = value
  1305. while len(key_fifo) > size:
  1306. cache.pop(key_fifo.popleft(), None)
  1307. key_fifo.append(key)
  1308. def clear(self):
  1309. cache.clear()
  1310. key_fifo.clear()
  1311. def cache_len(self):
  1312. return len(cache)
  1313. self.get = types.MethodType(get, self)
  1314. self.set = types.MethodType(set, self)
  1315. self.clear = types.MethodType(clear, self)
  1316. self.__len__ = types.MethodType(cache_len, self)
  1317. # argument cache for optimizing repeated calls when backtracking through recursive expressions
  1318. packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
  1319. packrat_cache_lock = RLock()
  1320. packrat_cache_stats = [0, 0]
  1321. # this method gets repeatedly called during backtracking with the same arguments -
  1322. # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
  1323. def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
  1324. HIT, MISS = 0, 1
  1325. lookup = (self, instring, loc, callPreParse, doActions)
  1326. with ParserElement.packrat_cache_lock:
  1327. cache = ParserElement.packrat_cache
  1328. value = cache.get(lookup)
  1329. if value is cache.not_in_cache:
  1330. ParserElement.packrat_cache_stats[MISS] += 1
  1331. try:
  1332. value = self._parseNoCache(instring, loc, doActions, callPreParse)
  1333. except ParseBaseException as pe:
  1334. # cache a copy of the exception, without the traceback
  1335. cache.set(lookup, pe.__class__(*pe.args))
  1336. raise
  1337. else:
  1338. cache.set(lookup, (value[0], value[1].copy()))
  1339. return value
  1340. else:
  1341. ParserElement.packrat_cache_stats[HIT] += 1
  1342. if isinstance(value, Exception):
  1343. raise value
  1344. return (value[0], value[1].copy())
  1345. _parse = _parseNoCache
  1346. @staticmethod
  1347. def resetCache():
  1348. ParserElement.packrat_cache.clear()
  1349. ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
  1350. _packratEnabled = False
  1351. @staticmethod
  1352. def enablePackrat(cache_size_limit=128):
  1353. """Enables "packrat" parsing, which adds memoizing to the parsing logic.
  1354. Repeated parse attempts at the same string location (which happens
  1355. often in many complex grammars) can immediately return a cached value,
  1356. instead of re-executing parsing/validating code. Memoizing is done of
  1357. both valid results and parsing exceptions.
  1358. Parameters:
  1359. - cache_size_limit - (default=C{128}) - if an integer value is provided
  1360. will limit the size of the packrat cache; if None is passed, then
  1361. the cache size will be unbounded; if 0 is passed, the cache will
  1362. be effectively disabled.
  1363. This speedup may break existing programs that use parse actions that
  1364. have side-effects. For this reason, packrat parsing is disabled when
  1365. you first import pyparsing. To activate the packrat feature, your
  1366. program must call the class method C{ParserElement.enablePackrat()}. If
  1367. your program uses C{psyco} to "compile as you go", you must call
  1368. C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
  1369. Python will crash. For best results, call C{enablePackrat()} immediately
  1370. after importing pyparsing.
  1371. Example::
  1372. import pyparsing
  1373. pyparsing.ParserElement.enablePackrat()
  1374. """
  1375. if not ParserElement._packratEnabled:
  1376. ParserElement._packratEnabled = True
  1377. if cache_size_limit is None:
  1378. ParserElement.packrat_cache = ParserElement._UnboundedCache()
  1379. else:
  1380. ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
  1381. ParserElement._parse = ParserElement._parseCache
  1382. def parseString( self, instring, parseAll=False ):
  1383. """
  1384. Execute the parse expression with the given string.
  1385. This is the main interface to the client code, once the complete
  1386. expression has been built.
  1387. If you want the grammar to require that the entire input string be
  1388. successfully parsed, then set C{parseAll} to True (equivalent to ending
  1389. the grammar with C{L{StringEnd()}}).
  1390. Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
  1391. in order to report proper column numbers in parse actions.
  1392. If the input string contains tabs and
  1393. the grammar uses parse actions that use the C{loc} argument to index into the
  1394. string being parsed, you can ensure you have a consistent view of the input
  1395. string by:
  1396. - calling C{parseWithTabs} on your grammar before calling C{parseString}
  1397. (see L{I{parseWithTabs}<parseWithTabs>})
  1398. - define your parse action using the full C{(s,loc,toks)} signature, and
  1399. reference the input string using the parse action's C{s} argument
  1400. - explictly expand the tabs in your input string before calling
  1401. C{parseString}
  1402. Example::
  1403. Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
  1404. Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
  1405. """
  1406. ParserElement.resetCache()
  1407. if not self.streamlined:
  1408. self.streamline()
  1409. #~ self.saveAsList = True
  1410. for e in self.ignoreExprs:
  1411. e.streamline()
  1412. if not self.keepTabs:
  1413. instring = instring.expandtabs()
  1414. try:
  1415. loc, tokens = self._parse( instring, 0 )
  1416. if parseAll:
  1417. loc = self.preParse( instring, loc )
  1418. se = Empty() + StringEnd()
  1419. se._parse( instring, loc )
  1420. except ParseBaseException as exc:
  1421. if ParserElement.verbose_stacktrace:
  1422. raise
  1423. else:
  1424. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1425. raise exc
  1426. else:
  1427. return tokens
  1428. def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
  1429. """
  1430. Scan the input string for expression matches. Each match will return the
  1431. matching tokens, start location, and end location. May be called with optional
  1432. C{maxMatches} argument, to clip scanning after 'n' matches are found. If
  1433. C{overlap} is specified, then overlapping matches will be reported.
  1434. Note that the start and end locations are reported relative to the string
  1435. being parsed. See L{I{parseString}<parseString>} for more information on parsing
  1436. strings with embedded tabs.
  1437. Example::
  1438. source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
  1439. print(source)
  1440. for tokens,start,end in Word(alphas).scanString(source):
  1441. print(' '*start + '^'*(end-start))
  1442. print(' '*start + tokens[0])
  1443. prints::
  1444. sldjf123lsdjjkf345sldkjf879lkjsfd987
  1445. ^^^^^
  1446. sldjf
  1447. ^^^^^^^
  1448. lsdjjkf
  1449. ^^^^^^
  1450. sldkjf
  1451. ^^^^^^
  1452. lkjsfd
  1453. """
  1454. if not self.streamlined:
  1455. self.streamline()
  1456. for e in self.ignoreExprs:
  1457. e.streamline()
  1458. if not self.keepTabs:
  1459. instring = _ustr(instring).expandtabs()
  1460. instrlen = len(instring)
  1461. loc = 0
  1462. preparseFn = self.preParse
  1463. parseFn = self._parse
  1464. ParserElement.resetCache()
  1465. matches = 0
  1466. try:
  1467. while loc <= instrlen and matches < maxMatches:
  1468. try:
  1469. preloc = preparseFn( instring, loc )
  1470. nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
  1471. except ParseException:
  1472. loc = preloc+1
  1473. else:
  1474. if nextLoc > loc:
  1475. matches += 1
  1476. yield tokens, preloc, nextLoc
  1477. if overlap:
  1478. nextloc = preparseFn( instring, loc )
  1479. if nextloc > loc:
  1480. loc = nextLoc
  1481. else:
  1482. loc += 1
  1483. else:
  1484. loc = nextLoc
  1485. else:
  1486. loc = preloc+1
  1487. except ParseBaseException as exc:
  1488. if ParserElement.verbose_stacktrace:
  1489. raise
  1490. else:
  1491. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1492. raise exc
  1493. def transformString( self, instring ):
  1494. """
  1495. Extension to C{L{scanString}}, to modify matching text with modified tokens that may
  1496. be returned from a parse action. To use C{transformString}, define a grammar and
  1497. attach a parse action to it that modifies the returned token list.
  1498. Invoking C{transformString()} on a target string will then scan for matches,
  1499. and replace the matched text patterns according to the logic in the parse
  1500. action. C{transformString()} returns the resulting transformed string.
  1501. Example::
  1502. wd = Word(alphas)
  1503. wd.setParseAction(lambda toks: toks[0].title())
  1504. print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
  1505. Prints::
  1506. Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
  1507. """
  1508. out = []
  1509. lastE = 0
  1510. # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
  1511. # keep string locs straight between transformString and scanString
  1512. self.keepTabs = True
  1513. try:
  1514. for t,s,e in self.scanString( instring ):
  1515. out.append( instring[lastE:s] )
  1516. if t:
  1517. if isinstance(t,ParseResults):
  1518. out += t.asList()
  1519. elif isinstance(t,list):
  1520. out += t
  1521. else:
  1522. out.append(t)
  1523. lastE = e
  1524. out.append(instring[lastE:])
  1525. out = [o for o in out if o]
  1526. return "".join(map(_ustr,_flatten(out)))
  1527. except ParseBaseException as exc:
  1528. if ParserElement.verbose_stacktrace:
  1529. raise
  1530. else:
  1531. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1532. raise exc
  1533. def searchString( self, instring, maxMatches=_MAX_INT ):
  1534. """
  1535. Another extension to C{L{scanString}}, simplifying the access to the tokens found
  1536. to match the given parse expression. May be called with optional
  1537. C{maxMatches} argument, to clip searching after 'n' matches are found.
  1538. Example::
  1539. # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
  1540. cap_word = Word(alphas.upper(), alphas.lower())
  1541. print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
  1542. # the sum() builtin can be used to merge results into a single ParseResults object
  1543. print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
  1544. prints::
  1545. [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
  1546. ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
  1547. """
  1548. try:
  1549. return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
  1550. except ParseBaseException as exc:
  1551. if ParserElement.verbose_stacktrace:
  1552. raise
  1553. else:
  1554. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1555. raise exc
  1556. def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
  1557. """
  1558. Generator method to split a string using the given expression as a separator.
  1559. May be called with optional C{maxsplit} argument, to limit the number of splits;
  1560. and the optional C{includeSeparators} argument (default=C{False}), if the separating
  1561. matching text should be included in the split results.
  1562. Example::
  1563. punc = oneOf(list(".,;:/-!?"))
  1564. print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
  1565. prints::
  1566. ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
  1567. """
  1568. splits = 0
  1569. last = 0
  1570. for t,s,e in self.scanString(instring, maxMatches=maxsplit):
  1571. yield instring[last:s]
  1572. if includeSeparators:
  1573. yield t[0]
  1574. last = e
  1575. yield instring[last:]
  1576. def __add__(self, other ):
  1577. """
  1578. Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
  1579. converts them to L{Literal}s by default.
  1580. Example::
  1581. greet = Word(alphas) + "," + Word(alphas) + "!"
  1582. hello = "Hello, World!"
  1583. print (hello, "->", greet.parseString(hello))
  1584. Prints::
  1585. Hello, World! -> ['Hello', ',', 'World', '!']
  1586. """
  1587. if isinstance( other, basestring ):
  1588. other = ParserElement._literalStringClass( other )
  1589. if not isinstance( other, ParserElement ):
  1590. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1591. SyntaxWarning, stacklevel=2)
  1592. return None
  1593. return And( [ self, other ] )
  1594. def __radd__(self, other ):
  1595. """
  1596. Implementation of + operator when left operand is not a C{L{ParserElement}}
  1597. """
  1598. if isinstance( other, basestring ):
  1599. other = ParserElement._literalStringClass( other )
  1600. if not isinstance( other, ParserElement ):
  1601. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1602. SyntaxWarning, stacklevel=2)
  1603. return None
  1604. return other + self
  1605. def __sub__(self, other):
  1606. """
  1607. Implementation of - operator, returns C{L{And}} with error stop
  1608. """
  1609. if isinstance( other, basestring ):
  1610. other = ParserElement._literalStringClass( other )
  1611. if not isinstance( other, ParserElement ):
  1612. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1613. SyntaxWarning, stacklevel=2)
  1614. return None
  1615. return self + And._ErrorStop() + other
  1616. def __rsub__(self, other ):
  1617. """
  1618. Implementation of - operator when left operand is not a C{L{ParserElement}}
  1619. """
  1620. if isinstance( other, basestring ):
  1621. other = ParserElement._literalStringClass( other )
  1622. if not isinstance( other, ParserElement ):
  1623. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1624. SyntaxWarning, stacklevel=2)
  1625. return None
  1626. return other - self
  1627. def __mul__(self,other):
  1628. """
  1629. Implementation of * operator, allows use of C{expr * 3} in place of
  1630. C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
  1631. tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
  1632. may also include C{None} as in:
  1633. - C{expr*(n,None)} or C{expr*(n,)} is equivalent
  1634. to C{expr*n + L{ZeroOrMore}(expr)}
  1635. (read as "at least n instances of C{expr}")
  1636. - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
  1637. (read as "0 to n instances of C{expr}")
  1638. - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
  1639. - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
  1640. Note that C{expr*(None,n)} does not raise an exception if
  1641. more than n exprs exist in the input stream; that is,
  1642. C{expr*(None,n)} does not enforce a maximum number of expr
  1643. occurrences. If this behavior is desired, then write
  1644. C{expr*(None,n) + ~expr}
  1645. """
  1646. if isinstance(other,int):
  1647. minElements, optElements = other,0
  1648. elif isinstance(other,tuple):
  1649. other = (other + (None, None))[:2]
  1650. if other[0] is None:
  1651. other = (0, other[1])
  1652. if isinstance(other[0],int) and other[1] is None:
  1653. if other[0] == 0:
  1654. return ZeroOrMore(self)
  1655. if other[0] == 1:
  1656. return OneOrMore(self)
  1657. else:
  1658. return self*other[0] + ZeroOrMore(self)
  1659. elif isinstance(other[0],int) and isinstance(other[1],int):
  1660. minElements, optElements = other
  1661. optElements -= minElements
  1662. else:
  1663. raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
  1664. else:
  1665. raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
  1666. if minElements < 0:
  1667. raise ValueError("cannot multiply ParserElement by negative value")
  1668. if optElements < 0:
  1669. raise ValueError("second tuple value must be greater or equal to first tuple value")
  1670. if minElements == optElements == 0:
  1671. raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
  1672. if (optElements):
  1673. def makeOptionalList(n):
  1674. if n>1:
  1675. return Optional(self + makeOptionalList(n-1))
  1676. else:
  1677. return Optional(self)
  1678. if minElements:
  1679. if minElements == 1:
  1680. ret = self + makeOptionalList(optElements)
  1681. else:
  1682. ret = And([self]*minElements) + makeOptionalList(optElements)
  1683. else:
  1684. ret = makeOptionalList(optElements)
  1685. else:
  1686. if minElements == 1:
  1687. ret = self
  1688. else:
  1689. ret = And([self]*minElements)
  1690. return ret
  1691. def __rmul__(self, other):
  1692. return self.__mul__(other)
  1693. def __or__(self, other ):
  1694. """
  1695. Implementation of | operator - returns C{L{MatchFirst}}
  1696. """
  1697. if isinstance( other, basestring ):
  1698. other = ParserElement._literalStringClass( other )
  1699. if not isinstance( other, ParserElement ):
  1700. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1701. SyntaxWarning, stacklevel=2)
  1702. return None
  1703. return MatchFirst( [ self, other ] )
  1704. def __ror__(self, other ):
  1705. """
  1706. Implementation of | operator when left operand is not a C{L{ParserElement}}
  1707. """
  1708. if isinstance( other, basestring ):
  1709. other = ParserElement._literalStringClass( other )
  1710. if not isinstance( other, ParserElement ):
  1711. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1712. SyntaxWarning, stacklevel=2)
  1713. return None
  1714. return other | self
  1715. def __xor__(self, other ):
  1716. """
  1717. Implementation of ^ operator - returns C{L{Or}}
  1718. """
  1719. if isinstance( other, basestring ):
  1720. other = ParserElement._literalStringClass( other )
  1721. if not isinstance( other, ParserElement ):
  1722. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1723. SyntaxWarning, stacklevel=2)
  1724. return None
  1725. return Or( [ self, other ] )
  1726. def __rxor__(self, other ):
  1727. """
  1728. Implementation of ^ operator when left operand is not a C{L{ParserElement}}
  1729. """
  1730. if isinstance( other, basestring ):
  1731. other = ParserElement._literalStringClass( other )
  1732. if not isinstance( other, ParserElement ):
  1733. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1734. SyntaxWarning, stacklevel=2)
  1735. return None
  1736. return other ^ self
  1737. def __and__(self, other ):
  1738. """
  1739. Implementation of & operator - returns C{L{Each}}
  1740. """
  1741. if isinstance( other, basestring ):
  1742. other = ParserElement._literalStringClass( other )
  1743. if not isinstance( other, ParserElement ):
  1744. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1745. SyntaxWarning, stacklevel=2)
  1746. return None
  1747. return Each( [ self, other ] )
  1748. def __rand__(self, other ):
  1749. """
  1750. Implementation of & operator when left operand is not a C{L{ParserElement}}
  1751. """
  1752. if isinstance( other, basestring ):
  1753. other = ParserElement._literalStringClass( other )
  1754. if not isinstance( other, ParserElement ):
  1755. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1756. SyntaxWarning, stacklevel=2)
  1757. return None
  1758. return other & self
  1759. def __invert__( self ):
  1760. """
  1761. Implementation of ~ operator - returns C{L{NotAny}}
  1762. """
  1763. return NotAny( self )
  1764. def __call__(self, name=None):
  1765. """
  1766. Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
  1767. If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
  1768. passed as C{True}.
  1769. If C{name} is omitted, same as calling C{L{copy}}.
  1770. Example::
  1771. # these are equivalent
  1772. userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
  1773. userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
  1774. """
  1775. if name is not None:
  1776. return self.setResultsName(name)
  1777. else:
  1778. return self.copy()
  1779. def suppress( self ):
  1780. """
  1781. Suppresses the output of this C{ParserElement}; useful to keep punctuation from
  1782. cluttering up returned output.
  1783. """
  1784. return Suppress( self )
  1785. def leaveWhitespace( self ):
  1786. """
  1787. Disables the skipping of whitespace before matching the characters in the
  1788. C{ParserElement}'s defined pattern. This is normally only used internally by
  1789. the pyparsing module, but may be needed in some whitespace-sensitive grammars.
  1790. """
  1791. self.skipWhitespace = False
  1792. return self
  1793. def setWhitespaceChars( self, chars ):
  1794. """
  1795. Overrides the default whitespace chars
  1796. """
  1797. self.skipWhitespace = True
  1798. self.whiteChars = chars
  1799. self.copyDefaultWhiteChars = False
  1800. return self
  1801. def parseWithTabs( self ):
  1802. """
  1803. Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
  1804. Must be called before C{parseString} when the input grammar contains elements that
  1805. match C{<TAB>} characters.
  1806. """
  1807. self.keepTabs = True
  1808. return self
  1809. def ignore( self, other ):
  1810. """
  1811. Define expression to be ignored (e.g., comments) while doing pattern
  1812. matching; may be called repeatedly, to define multiple comment or other
  1813. ignorable patterns.
  1814. Example::
  1815. patt = OneOrMore(Word(alphas))
  1816. patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
  1817. patt.ignore(cStyleComment)
  1818. patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
  1819. """
  1820. if isinstance(other, basestring):
  1821. other = Suppress(other)
  1822. if isinstance( other, Suppress ):
  1823. if other not in self.ignoreExprs:
  1824. self.ignoreExprs.append(other)
  1825. else:
  1826. self.ignoreExprs.append( Suppress( other.copy() ) )
  1827. return self
  1828. def setDebugActions( self, startAction, successAction, exceptionAction ):
  1829. """
  1830. Enable display of debugging messages while doing pattern matching.
  1831. """
  1832. self.debugActions = (startAction or _defaultStartDebugAction,
  1833. successAction or _defaultSuccessDebugAction,
  1834. exceptionAction or _defaultExceptionDebugAction)
  1835. self.debug = True
  1836. return self
  1837. def setDebug( self, flag=True ):
  1838. """
  1839. Enable display of debugging messages while doing pattern matching.
  1840. Set C{flag} to True to enable, False to disable.
  1841. Example::
  1842. wd = Word(alphas).setName("alphaword")
  1843. integer = Word(nums).setName("numword")
  1844. term = wd | integer
  1845. # turn on debugging for wd
  1846. wd.setDebug()
  1847. OneOrMore(term).parseString("abc 123 xyz 890")
  1848. prints::
  1849. Match alphaword at loc 0(1,1)
  1850. Matched alphaword -> ['abc']
  1851. Match alphaword at loc 3(1,4)
  1852. Exception raised:Expected alphaword (at char 4), (line:1, col:5)
  1853. Match alphaword at loc 7(1,8)
  1854. Matched alphaword -> ['xyz']
  1855. Match alphaword at loc 11(1,12)
  1856. Exception raised:Expected alphaword (at char 12), (line:1, col:13)
  1857. Match alphaword at loc 15(1,16)
  1858. Exception raised:Expected alphaword (at char 15), (line:1, col:16)
  1859. The output shown is that produced by the default debug actions - custom debug actions can be
  1860. specified using L{setDebugActions}. Prior to attempting
  1861. to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
  1862. is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
  1863. message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
  1864. which makes debugging and exception messages easier to understand - for instance, the default
  1865. name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
  1866. """
  1867. if flag:
  1868. self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
  1869. else:
  1870. self.debug = False
  1871. return self
  1872. def __str__( self ):
  1873. return self.name
  1874. def __repr__( self ):
  1875. return _ustr(self)
  1876. def streamline( self ):
  1877. self.streamlined = True
  1878. self.strRepr = None
  1879. return self
  1880. def checkRecursion( self, parseElementList ):
  1881. pass
  1882. def validate( self, validateTrace=[] ):
  1883. """
  1884. Check defined expressions for valid structure, check for infinite recursive definitions.
  1885. """
  1886. self.checkRecursion( [] )
  1887. def parseFile( self, file_or_filename, parseAll=False ):
  1888. """
  1889. Execute the parse expression on the given file or filename.
  1890. If a filename is specified (instead of a file object),
  1891. the entire file is opened, read, and closed before parsing.
  1892. """
  1893. try:
  1894. file_contents = file_or_filename.read()
  1895. except AttributeError:
  1896. with open(file_or_filename, "r") as f:
  1897. file_contents = f.read()
  1898. try:
  1899. return self.parseString(file_contents, parseAll)
  1900. except ParseBaseException as exc:
  1901. if ParserElement.verbose_stacktrace:
  1902. raise
  1903. else:
  1904. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1905. raise exc
  1906. def __eq__(self,other):
  1907. if isinstance(other, ParserElement):
  1908. return self is other or vars(self) == vars(other)
  1909. elif isinstance(other, basestring):
  1910. return self.matches(other)
  1911. else:
  1912. return super(ParserElement,self)==other
  1913. def __ne__(self,other):
  1914. return not (self == other)
  1915. def __hash__(self):
  1916. return hash(id(self))
  1917. def __req__(self,other):
  1918. return self == other
  1919. def __rne__(self,other):
  1920. return not (self == other)
  1921. def matches(self, testString, parseAll=True):
  1922. """
  1923. Method for quick testing of a parser against a test string. Good for simple
  1924. inline microtests of sub expressions while building up larger parser.
  1925. Parameters:
  1926. - testString - to test against this expression for a match
  1927. - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
  1928. Example::
  1929. expr = Word(nums)
  1930. assert expr.matches("100")
  1931. """
  1932. try:
  1933. self.parseString(_ustr(testString), parseAll=parseAll)
  1934. return True
  1935. except ParseBaseException:
  1936. return False
  1937. def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
  1938. """
  1939. Execute the parse expression on a series of test strings, showing each
  1940. test, the parsed results or where the parse failed. Quick and easy way to
  1941. run a parse expression against a list of sample strings.
  1942. Parameters:
  1943. - tests - a list of separate test strings, or a multiline string of test strings
  1944. - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
  1945. - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
  1946. string; pass None to disable comment filtering
  1947. - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
  1948. if False, only dump nested list
  1949. - printResults - (default=C{True}) prints test output to stdout
  1950. - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
  1951. Returns: a (success, results) tuple, where success indicates that all tests succeeded
  1952. (or failed if C{failureTests} is True), and the results contain a list of lines of each
  1953. test's output
  1954. Example::
  1955. number_expr = pyparsing_common.number.copy()
  1956. result = number_expr.runTests('''
  1957. # unsigned integer
  1958. 100
  1959. # negative integer
  1960. -100
  1961. # float with scientific notation
  1962. 6.02e23
  1963. # integer with scientific notation
  1964. 1e-12
  1965. ''')
  1966. print("Success" if result[0] else "Failed!")
  1967. result = number_expr.runTests('''
  1968. # stray character
  1969. 100Z
  1970. # missing leading digit before '.'
  1971. -.100
  1972. # too many '.'
  1973. 3.14.159
  1974. ''', failureTests=True)
  1975. print("Success" if result[0] else "Failed!")
  1976. prints::
  1977. # unsigned integer
  1978. 100
  1979. [100]
  1980. # negative integer
  1981. -100
  1982. [-100]
  1983. # float with scientific notation
  1984. 6.02e23
  1985. [6.02e+23]
  1986. # integer with scientific notation
  1987. 1e-12
  1988. [1e-12]
  1989. Success
  1990. # stray character
  1991. 100Z
  1992. ^
  1993. FAIL: Expected end of text (at char 3), (line:1, col:4)
  1994. # missing leading digit before '.'
  1995. -.100
  1996. ^
  1997. FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
  1998. # too many '.'
  1999. 3.14.159
  2000. ^
  2001. FAIL: Expected end of text (at char 4), (line:1, col:5)
  2002. Success
  2003. Each test string must be on a single line. If you want to test a string that spans multiple
  2004. lines, create a test like this::
  2005. expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
  2006. (Note that this is a raw string literal, you must include the leading 'r'.)
  2007. """
  2008. if isinstance(tests, basestring):
  2009. tests = list(map(str.strip, tests.rstrip().splitlines()))
  2010. if isinstance(comment, basestring):
  2011. comment = Literal(comment)
  2012. allResults = []
  2013. comments = []
  2014. success = True
  2015. for t in tests:
  2016. if comment is not None and comment.matches(t, False) or comments and not t:
  2017. comments.append(t)
  2018. continue
  2019. if not t:
  2020. continue
  2021. out = ['\n'.join(comments), t]
  2022. comments = []
  2023. try:
  2024. t = t.replace(r'\n','\n')
  2025. result = self.parseString(t, parseAll=parseAll)
  2026. out.append(result.dump(full=fullDump))
  2027. success = success and not failureTests
  2028. except ParseBaseException as pe:
  2029. fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
  2030. if '\n' in t:
  2031. out.append(line(pe.loc, t))
  2032. out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
  2033. else:
  2034. out.append(' '*pe.loc + '^' + fatal)
  2035. out.append("FAIL: " + str(pe))
  2036. success = success and failureTests
  2037. result = pe
  2038. except Exception as exc:
  2039. out.append("FAIL-EXCEPTION: " + str(exc))
  2040. success = success and failureTests
  2041. result = exc
  2042. if printResults:
  2043. if fullDump:
  2044. out.append('')
  2045. print('\n'.join(out))
  2046. allResults.append((t, result))
  2047. return success, allResults
  2048. class Token(ParserElement):
  2049. """
  2050. Abstract C{ParserElement} subclass, for defining atomic matching patterns.
  2051. """
  2052. def __init__( self ):
  2053. super(Token,self).__init__( savelist=False )
  2054. class Empty(Token):
  2055. """
  2056. An empty token, will always match.
  2057. """
  2058. def __init__( self ):
  2059. super(Empty,self).__init__()
  2060. self.name = "Empty"
  2061. self.mayReturnEmpty = True
  2062. self.mayIndexError = False
  2063. class NoMatch(Token):
  2064. """
  2065. A token that will never match.
  2066. """
  2067. def __init__( self ):
  2068. super(NoMatch,self).__init__()
  2069. self.name = "NoMatch"
  2070. self.mayReturnEmpty = True
  2071. self.mayIndexError = False
  2072. self.errmsg = "Unmatchable token"
  2073. def parseImpl( self, instring, loc, doActions=True ):
  2074. raise ParseException(instring, loc, self.errmsg, self)
  2075. class Literal(Token):
  2076. """
  2077. Token to exactly match a specified string.
  2078. Example::
  2079. Literal('blah').parseString('blah') # -> ['blah']
  2080. Literal('blah').parseString('blahfooblah') # -> ['blah']
  2081. Literal('blah').parseString('bla') # -> Exception: Expected "blah"
  2082. For case-insensitive matching, use L{CaselessLiteral}.
  2083. For keyword matching (force word break before and after the matched string),
  2084. use L{Keyword} or L{CaselessKeyword}.
  2085. """
  2086. def __init__( self, matchString ):
  2087. super(Literal,self).__init__()
  2088. self.match = matchString
  2089. self.matchLen = len(matchString)
  2090. try:
  2091. self.firstMatchChar = matchString[0]
  2092. except IndexError:
  2093. warnings.warn("null string passed to Literal; use Empty() instead",
  2094. SyntaxWarning, stacklevel=2)
  2095. self.__class__ = Empty
  2096. self.name = '"%s"' % _ustr(self.match)
  2097. self.errmsg = "Expected " + self.name
  2098. self.mayReturnEmpty = False
  2099. self.mayIndexError = False
  2100. # Performance tuning: this routine gets called a *lot*
  2101. # if this is a single character match string and the first character matches,
  2102. # short-circuit as quickly as possible, and avoid calling startswith
  2103. #~ @profile
  2104. def parseImpl( self, instring, loc, doActions=True ):
  2105. if (instring[loc] == self.firstMatchChar and
  2106. (self.matchLen==1 or instring.startswith(self.match,loc)) ):
  2107. return loc+self.matchLen, self.match
  2108. raise ParseException(instring, loc, self.errmsg, self)
  2109. _L = Literal
  2110. ParserElement._literalStringClass = Literal
  2111. class Keyword(Token):
  2112. """
  2113. Token to exactly match a specified string as a keyword, that is, it must be
  2114. immediately followed by a non-keyword character. Compare with C{L{Literal}}:
  2115. - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
  2116. - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
  2117. Accepts two optional constructor arguments in addition to the keyword string:
  2118. - C{identChars} is a string of characters that would be valid identifier characters,
  2119. defaulting to all alphanumerics + "_" and "$"
  2120. - C{caseless} allows case-insensitive matching, default is C{False}.
  2121. Example::
  2122. Keyword("start").parseString("start") # -> ['start']
  2123. Keyword("start").parseString("starting") # -> Exception
  2124. For case-insensitive matching, use L{CaselessKeyword}.
  2125. """
  2126. DEFAULT_KEYWORD_CHARS = alphanums+"_$"
  2127. def __init__( self, matchString, identChars=None, caseless=False ):
  2128. super(Keyword,self).__init__()
  2129. if identChars is None:
  2130. identChars = Keyword.DEFAULT_KEYWORD_CHARS
  2131. self.match = matchString
  2132. self.matchLen = len(matchString)
  2133. try:
  2134. self.firstMatchChar = matchString[0]
  2135. except IndexError:
  2136. warnings.warn("null string passed to Keyword; use Empty() instead",
  2137. SyntaxWarning, stacklevel=2)
  2138. self.name = '"%s"' % self.match
  2139. self.errmsg = "Expected " + self.name
  2140. self.mayReturnEmpty = False
  2141. self.mayIndexError = False
  2142. self.caseless = caseless
  2143. if caseless:
  2144. self.caselessmatch = matchString.upper()
  2145. identChars = identChars.upper()
  2146. self.identChars = set(identChars)
  2147. def parseImpl( self, instring, loc, doActions=True ):
  2148. if self.caseless:
  2149. if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
  2150. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
  2151. (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
  2152. return loc+self.matchLen, self.match
  2153. else:
  2154. if (instring[loc] == self.firstMatchChar and
  2155. (self.matchLen==1 or instring.startswith(self.match,loc)) and
  2156. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
  2157. (loc == 0 or instring[loc-1] not in self.identChars) ):
  2158. return loc+self.matchLen, self.match
  2159. raise ParseException(instring, loc, self.errmsg, self)
  2160. def copy(self):
  2161. c = super(Keyword,self).copy()
  2162. c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
  2163. return c
  2164. @staticmethod
  2165. def setDefaultKeywordChars( chars ):
  2166. """Overrides the default Keyword chars
  2167. """
  2168. Keyword.DEFAULT_KEYWORD_CHARS = chars
  2169. class CaselessLiteral(Literal):
  2170. """
  2171. Token to match a specified string, ignoring case of letters.
  2172. Note: the matched results will always be in the case of the given
  2173. match string, NOT the case of the input text.
  2174. Example::
  2175. OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
  2176. (Contrast with example for L{CaselessKeyword}.)
  2177. """
  2178. def __init__( self, matchString ):
  2179. super(CaselessLiteral,self).__init__( matchString.upper() )
  2180. # Preserve the defining literal.
  2181. self.returnString = matchString
  2182. self.name = "'%s'" % self.returnString
  2183. self.errmsg = "Expected " + self.name
  2184. def parseImpl( self, instring, loc, doActions=True ):
  2185. if instring[ loc:loc+self.matchLen ].upper() == self.match:
  2186. return loc+self.matchLen, self.returnString
  2187. raise ParseException(instring, loc, self.errmsg, self)
  2188. class CaselessKeyword(Keyword):
  2189. """
  2190. Caseless version of L{Keyword}.
  2191. Example::
  2192. OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
  2193. (Contrast with example for L{CaselessLiteral}.)
  2194. """
  2195. def __init__( self, matchString, identChars=None ):
  2196. super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
  2197. def parseImpl( self, instring, loc, doActions=True ):
  2198. if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
  2199. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
  2200. return loc+self.matchLen, self.match
  2201. raise ParseException(instring, loc, self.errmsg, self)
  2202. class CloseMatch(Token):
  2203. """
  2204. A variation on L{Literal} which matches "close" matches, that is,
  2205. strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
  2206. - C{match_string} - string to be matched
  2207. - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
  2208. The results from a successful parse will contain the matched text from the input string and the following named results:
  2209. - C{mismatches} - a list of the positions within the match_string where mismatches were found
  2210. - C{original} - the original match_string used to compare against the input string
  2211. If C{mismatches} is an empty list, then the match was an exact match.
  2212. Example::
  2213. patt = CloseMatch("ATCATCGAATGGA")
  2214. patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
  2215. patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
  2216. # exact match
  2217. patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
  2218. # close match allowing up to 2 mismatches
  2219. patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
  2220. patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
  2221. """
  2222. def __init__(self, match_string, maxMismatches=1):
  2223. super(CloseMatch,self).__init__()
  2224. self.name = match_string
  2225. self.match_string = match_string
  2226. self.maxMismatches = maxMismatches
  2227. self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
  2228. self.mayIndexError = False
  2229. self.mayReturnEmpty = False
  2230. def parseImpl( self, instring, loc, doActions=True ):
  2231. start = loc
  2232. instrlen = len(instring)
  2233. maxloc = start + len(self.match_string)
  2234. if maxloc <= instrlen:
  2235. match_string = self.match_string
  2236. match_stringloc = 0
  2237. mismatches = []
  2238. maxMismatches = self.maxMismatches
  2239. for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
  2240. src,mat = s_m
  2241. if src != mat:
  2242. mismatches.append(match_stringloc)
  2243. if len(mismatches) > maxMismatches:
  2244. break
  2245. else:
  2246. loc = match_stringloc + 1
  2247. results = ParseResults([instring[start:loc]])
  2248. results['original'] = self.match_string
  2249. results['mismatches'] = mismatches
  2250. return loc, results
  2251. raise ParseException(instring, loc, self.errmsg, self)
  2252. class Word(Token):
  2253. """
  2254. Token for matching words composed of allowed character sets.
  2255. Defined with string containing all allowed initial characters,
  2256. an optional string containing allowed body characters (if omitted,
  2257. defaults to the initial character set), and an optional minimum,
  2258. maximum, and/or exact length. The default value for C{min} is 1 (a
  2259. minimum value < 1 is not valid); the default values for C{max} and C{exact}
  2260. are 0, meaning no maximum or exact length restriction. An optional
  2261. C{excludeChars} parameter can list characters that might be found in
  2262. the input C{bodyChars} string; useful to define a word of all printables
  2263. except for one or two characters, for instance.
  2264. L{srange} is useful for defining custom character set strings for defining
  2265. C{Word} expressions, using range notation from regular expression character sets.
  2266. A common mistake is to use C{Word} to match a specific literal string, as in
  2267. C{Word("Address")}. Remember that C{Word} uses the string argument to define
  2268. I{sets} of matchable characters. This expression would match "Add", "AAA",
  2269. "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
  2270. To match an exact literal string, use L{Literal} or L{Keyword}.
  2271. pyparsing includes helper strings for building Words:
  2272. - L{alphas}
  2273. - L{nums}
  2274. - L{alphanums}
  2275. - L{hexnums}
  2276. - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
  2277. - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
  2278. - L{printables} (any non-whitespace character)
  2279. Example::
  2280. # a word composed of digits
  2281. integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
  2282. # a word with a leading capital, and zero or more lowercase
  2283. capital_word = Word(alphas.upper(), alphas.lower())
  2284. # hostnames are alphanumeric, with leading alpha, and '-'
  2285. hostname = Word(alphas, alphanums+'-')
  2286. # roman numeral (not a strict parser, accepts invalid mix of characters)
  2287. roman = Word("IVXLCDM")
  2288. # any string of non-whitespace characters, except for ','
  2289. csv_value = Word(printables, excludeChars=",")
  2290. """
  2291. def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
  2292. super(Word,self).__init__()
  2293. if excludeChars:
  2294. initChars = ''.join(c for c in initChars if c not in excludeChars)
  2295. if bodyChars:
  2296. bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
  2297. self.initCharsOrig = initChars
  2298. self.initChars = set(initChars)
  2299. if bodyChars :
  2300. self.bodyCharsOrig = bodyChars
  2301. self.bodyChars = set(bodyChars)
  2302. else:
  2303. self.bodyCharsOrig = initChars
  2304. self.bodyChars = set(initChars)
  2305. self.maxSpecified = max > 0
  2306. if min < 1:
  2307. raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
  2308. self.minLen = min
  2309. if max > 0:
  2310. self.maxLen = max
  2311. else:
  2312. self.maxLen = _MAX_INT
  2313. if exact > 0:
  2314. self.maxLen = exact
  2315. self.minLen = exact
  2316. self.name = _ustr(self)
  2317. self.errmsg = "Expected " + self.name
  2318. self.mayIndexError = False
  2319. self.asKeyword = asKeyword
  2320. if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
  2321. if self.bodyCharsOrig == self.initCharsOrig:
  2322. self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
  2323. elif len(self.initCharsOrig) == 1:
  2324. self.reString = "%s[%s]*" % \
  2325. (re.escape(self.initCharsOrig),
  2326. _escapeRegexRangeChars(self.bodyCharsOrig),)
  2327. else:
  2328. self.reString = "[%s][%s]*" % \
  2329. (_escapeRegexRangeChars(self.initCharsOrig),
  2330. _escapeRegexRangeChars(self.bodyCharsOrig),)
  2331. if self.asKeyword:
  2332. self.reString = r"\b"+self.reString+r"\b"
  2333. try:
  2334. self.re = re.compile( self.reString )
  2335. except Exception:
  2336. self.re = None
  2337. def parseImpl( self, instring, loc, doActions=True ):
  2338. if self.re:
  2339. result = self.re.match(instring,loc)
  2340. if not result:
  2341. raise ParseException(instring, loc, self.errmsg, self)
  2342. loc = result.end()
  2343. return loc, result.group()
  2344. if not(instring[ loc ] in self.initChars):
  2345. raise ParseException(instring, loc, self.errmsg, self)
  2346. start = loc
  2347. loc += 1
  2348. instrlen = len(instring)
  2349. bodychars = self.bodyChars
  2350. maxloc = start + self.maxLen
  2351. maxloc = min( maxloc, instrlen )
  2352. while loc < maxloc and instring[loc] in bodychars:
  2353. loc += 1
  2354. throwException = False
  2355. if loc - start < self.minLen:
  2356. throwException = True
  2357. if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
  2358. throwException = True
  2359. if self.asKeyword:
  2360. if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
  2361. throwException = True
  2362. if throwException:
  2363. raise ParseException(instring, loc, self.errmsg, self)
  2364. return loc, instring[start:loc]
  2365. def __str__( self ):
  2366. try:
  2367. return super(Word,self).__str__()
  2368. except Exception:
  2369. pass
  2370. if self.strRepr is None:
  2371. def charsAsStr(s):
  2372. if len(s)>4:
  2373. return s[:4]+"..."
  2374. else:
  2375. return s
  2376. if ( self.initCharsOrig != self.bodyCharsOrig ):
  2377. self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
  2378. else:
  2379. self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
  2380. return self.strRepr
  2381. class Regex(Token):
  2382. r"""
  2383. Token for matching strings that match a given regular expression.
  2384. Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
  2385. If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as
  2386. named parse results.
  2387. Example::
  2388. realnum = Regex(r"[+-]?\d+\.\d*")
  2389. date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
  2390. # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
  2391. roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
  2392. """
  2393. compiledREtype = type(re.compile("[A-Z]"))
  2394. def __init__( self, pattern, flags=0):
  2395. """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
  2396. super(Regex,self).__init__()
  2397. if isinstance(pattern, basestring):
  2398. if not pattern:
  2399. warnings.warn("null string passed to Regex; use Empty() instead",
  2400. SyntaxWarning, stacklevel=2)
  2401. self.pattern = pattern
  2402. self.flags = flags
  2403. try:
  2404. self.re = re.compile(self.pattern, self.flags)
  2405. self.reString = self.pattern
  2406. except sre_constants.error:
  2407. warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
  2408. SyntaxWarning, stacklevel=2)
  2409. raise
  2410. elif isinstance(pattern, Regex.compiledREtype):
  2411. self.re = pattern
  2412. self.pattern = \
  2413. self.reString = str(pattern)
  2414. self.flags = flags
  2415. else:
  2416. raise ValueError("Regex may only be constructed with a string or a compiled RE object")
  2417. self.name = _ustr(self)
  2418. self.errmsg = "Expected " + self.name
  2419. self.mayIndexError = False
  2420. self.mayReturnEmpty = True
  2421. def parseImpl( self, instring, loc, doActions=True ):
  2422. result = self.re.match(instring,loc)
  2423. if not result:
  2424. raise ParseException(instring, loc, self.errmsg, self)
  2425. loc = result.end()
  2426. d = result.groupdict()
  2427. ret = ParseResults(result.group())
  2428. if d:
  2429. for k in d:
  2430. ret[k] = d[k]
  2431. return loc,ret
  2432. def __str__( self ):
  2433. try:
  2434. return super(Regex,self).__str__()
  2435. except Exception:
  2436. pass
  2437. if self.strRepr is None:
  2438. self.strRepr = "Re:(%s)" % repr(self.pattern)
  2439. return self.strRepr
  2440. class QuotedString(Token):
  2441. r"""
  2442. Token for matching strings that are delimited by quoting characters.
  2443. Defined with the following parameters:
  2444. - quoteChar - string of one or more characters defining the quote delimiting string
  2445. - escChar - character to escape quotes, typically backslash (default=C{None})
  2446. - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
  2447. - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
  2448. - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
  2449. - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
  2450. - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
  2451. Example::
  2452. qs = QuotedString('"')
  2453. print(qs.searchString('lsjdf "This is the quote" sldjf'))
  2454. complex_qs = QuotedString('{{', endQuoteChar='}}')
  2455. print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
  2456. sql_qs = QuotedString('"', escQuote='""')
  2457. print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
  2458. prints::
  2459. [['This is the quote']]
  2460. [['This is the "quote"']]
  2461. [['This is the quote with "embedded" quotes']]
  2462. """
  2463. def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
  2464. super(QuotedString,self).__init__()
  2465. # remove white space from quote chars - wont work anyway
  2466. quoteChar = quoteChar.strip()
  2467. if not quoteChar:
  2468. warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  2469. raise SyntaxError()
  2470. if endQuoteChar is None:
  2471. endQuoteChar = quoteChar
  2472. else:
  2473. endQuoteChar = endQuoteChar.strip()
  2474. if not endQuoteChar:
  2475. warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  2476. raise SyntaxError()
  2477. self.quoteChar = quoteChar
  2478. self.quoteCharLen = len(quoteChar)
  2479. self.firstQuoteChar = quoteChar[0]
  2480. self.endQuoteChar = endQuoteChar
  2481. self.endQuoteCharLen = len(endQuoteChar)
  2482. self.escChar = escChar
  2483. self.escQuote = escQuote
  2484. self.unquoteResults = unquoteResults
  2485. self.convertWhitespaceEscapes = convertWhitespaceEscapes
  2486. if multiline:
  2487. self.flags = re.MULTILINE | re.DOTALL
  2488. self.pattern = r'%s(?:[^%s%s]' % \
  2489. ( re.escape(self.quoteChar),
  2490. _escapeRegexRangeChars(self.endQuoteChar[0]),
  2491. (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  2492. else:
  2493. self.flags = 0
  2494. self.pattern = r'%s(?:[^%s\n\r%s]' % \
  2495. ( re.escape(self.quoteChar),
  2496. _escapeRegexRangeChars(self.endQuoteChar[0]),
  2497. (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  2498. if len(self.endQuoteChar) > 1:
  2499. self.pattern += (
  2500. '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
  2501. _escapeRegexRangeChars(self.endQuoteChar[i]))
  2502. for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
  2503. )
  2504. if escQuote:
  2505. self.pattern += (r'|(?:%s)' % re.escape(escQuote))
  2506. if escChar:
  2507. self.pattern += (r'|(?:%s.)' % re.escape(escChar))
  2508. self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
  2509. self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
  2510. try:
  2511. self.re = re.compile(self.pattern, self.flags)
  2512. self.reString = self.pattern
  2513. except sre_constants.error:
  2514. warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
  2515. SyntaxWarning, stacklevel=2)
  2516. raise
  2517. self.name = _ustr(self)
  2518. self.errmsg = "Expected " + self.name
  2519. self.mayIndexError = False
  2520. self.mayReturnEmpty = True
  2521. def parseImpl( self, instring, loc, doActions=True ):
  2522. result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
  2523. if not result:
  2524. raise ParseException(instring, loc, self.errmsg, self)
  2525. loc = result.end()
  2526. ret = result.group()
  2527. if self.unquoteResults:
  2528. # strip off quotes
  2529. ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
  2530. if isinstance(ret,basestring):
  2531. # replace escaped whitespace
  2532. if '\\' in ret and self.convertWhitespaceEscapes:
  2533. ws_map = {
  2534. r'\t' : '\t',
  2535. r'\n' : '\n',
  2536. r'\f' : '\f',
  2537. r'\r' : '\r',
  2538. }
  2539. for wslit,wschar in ws_map.items():
  2540. ret = ret.replace(wslit, wschar)
  2541. # replace escaped characters
  2542. if self.escChar:
  2543. ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
  2544. # replace escaped quotes
  2545. if self.escQuote:
  2546. ret = ret.replace(self.escQuote, self.endQuoteChar)
  2547. return loc, ret
  2548. def __str__( self ):
  2549. try:
  2550. return super(QuotedString,self).__str__()
  2551. except Exception:
  2552. pass
  2553. if self.strRepr is None:
  2554. self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
  2555. return self.strRepr
  2556. class CharsNotIn(Token):
  2557. """
  2558. Token for matching words composed of characters I{not} in a given set (will
  2559. include whitespace in matched characters if not listed in the provided exclusion set - see example).
  2560. Defined with string containing all disallowed characters, and an optional
  2561. minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
  2562. minimum value < 1 is not valid); the default values for C{max} and C{exact}
  2563. are 0, meaning no maximum or exact length restriction.
  2564. Example::
  2565. # define a comma-separated-value as anything that is not a ','
  2566. csv_value = CharsNotIn(',')
  2567. print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
  2568. prints::
  2569. ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
  2570. """
  2571. def __init__( self, notChars, min=1, max=0, exact=0 ):
  2572. super(CharsNotIn,self).__init__()
  2573. self.skipWhitespace = False
  2574. self.notChars = notChars
  2575. if min < 1:
  2576. raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
  2577. self.minLen = min
  2578. if max > 0:
  2579. self.maxLen = max
  2580. else:
  2581. self.maxLen = _MAX_INT
  2582. if exact > 0:
  2583. self.maxLen = exact
  2584. self.minLen = exact
  2585. self.name = _ustr(self)
  2586. self.errmsg = "Expected " + self.name
  2587. self.mayReturnEmpty = ( self.minLen == 0 )
  2588. self.mayIndexError = False
  2589. def parseImpl( self, instring, loc, doActions=True ):
  2590. if instring[loc] in self.notChars:
  2591. raise ParseException(instring, loc, self.errmsg, self)
  2592. start = loc
  2593. loc += 1
  2594. notchars = self.notChars
  2595. maxlen = min( start+self.maxLen, len(instring) )
  2596. while loc < maxlen and \
  2597. (instring[loc] not in notchars):
  2598. loc += 1
  2599. if loc - start < self.minLen:
  2600. raise ParseException(instring, loc, self.errmsg, self)
  2601. return loc, instring[start:loc]
  2602. def __str__( self ):
  2603. try:
  2604. return super(CharsNotIn, self).__str__()
  2605. except Exception:
  2606. pass
  2607. if self.strRepr is None:
  2608. if len(self.notChars) > 4:
  2609. self.strRepr = "!W:(%s...)" % self.notChars[:4]
  2610. else:
  2611. self.strRepr = "!W:(%s)" % self.notChars
  2612. return self.strRepr
  2613. class White(Token):
  2614. """
  2615. Special matching class for matching whitespace. Normally, whitespace is ignored
  2616. by pyparsing grammars. This class is included when some whitespace structures
  2617. are significant. Define with a string containing the whitespace characters to be
  2618. matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
  2619. as defined for the C{L{Word}} class.
  2620. """
  2621. whiteStrs = {
  2622. " " : "<SPC>",
  2623. "\t": "<TAB>",
  2624. "\n": "<LF>",
  2625. "\r": "<CR>",
  2626. "\f": "<FF>",
  2627. }
  2628. def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
  2629. super(White,self).__init__()
  2630. self.matchWhite = ws
  2631. self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
  2632. #~ self.leaveWhitespace()
  2633. self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
  2634. self.mayReturnEmpty = True
  2635. self.errmsg = "Expected " + self.name
  2636. self.minLen = min
  2637. if max > 0:
  2638. self.maxLen = max
  2639. else:
  2640. self.maxLen = _MAX_INT
  2641. if exact > 0:
  2642. self.maxLen = exact
  2643. self.minLen = exact
  2644. def parseImpl( self, instring, loc, doActions=True ):
  2645. if not(instring[ loc ] in self.matchWhite):
  2646. raise ParseException(instring, loc, self.errmsg, self)
  2647. start = loc
  2648. loc += 1
  2649. maxloc = start + self.maxLen
  2650. maxloc = min( maxloc, len(instring) )
  2651. while loc < maxloc and instring[loc] in self.matchWhite:
  2652. loc += 1
  2653. if loc - start < self.minLen:
  2654. raise ParseException(instring, loc, self.errmsg, self)
  2655. return loc, instring[start:loc]
  2656. class _PositionToken(Token):
  2657. def __init__( self ):
  2658. super(_PositionToken,self).__init__()
  2659. self.name=self.__class__.__name__
  2660. self.mayReturnEmpty = True
  2661. self.mayIndexError = False
  2662. class GoToColumn(_PositionToken):
  2663. """
  2664. Token to advance to a specific column of input text; useful for tabular report scraping.
  2665. """
  2666. def __init__( self, colno ):
  2667. super(GoToColumn,self).__init__()
  2668. self.col = colno
  2669. def preParse( self, instring, loc ):
  2670. if col(loc,instring) != self.col:
  2671. instrlen = len(instring)
  2672. if self.ignoreExprs:
  2673. loc = self._skipIgnorables( instring, loc )
  2674. while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
  2675. loc += 1
  2676. return loc
  2677. def parseImpl( self, instring, loc, doActions=True ):
  2678. thiscol = col( loc, instring )
  2679. if thiscol > self.col:
  2680. raise ParseException( instring, loc, "Text not in expected column", self )
  2681. newloc = loc + self.col - thiscol
  2682. ret = instring[ loc: newloc ]
  2683. return newloc, ret
  2684. class LineStart(_PositionToken):
  2685. """
  2686. Matches if current position is at the beginning of a line within the parse string
  2687. Example::
  2688. test = '''\
  2689. AAA this line
  2690. AAA and this line
  2691. AAA but not this one
  2692. B AAA and definitely not this one
  2693. '''
  2694. for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
  2695. print(t)
  2696. Prints::
  2697. ['AAA', ' this line']
  2698. ['AAA', ' and this line']
  2699. """
  2700. def __init__( self ):
  2701. super(LineStart,self).__init__()
  2702. self.errmsg = "Expected start of line"
  2703. def parseImpl( self, instring, loc, doActions=True ):
  2704. if col(loc, instring) == 1:
  2705. return loc, []
  2706. raise ParseException(instring, loc, self.errmsg, self)
  2707. class LineEnd(_PositionToken):
  2708. """
  2709. Matches if current position is at the end of a line within the parse string
  2710. """
  2711. def __init__( self ):
  2712. super(LineEnd,self).__init__()
  2713. self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
  2714. self.errmsg = "Expected end of line"
  2715. def parseImpl( self, instring, loc, doActions=True ):
  2716. if loc<len(instring):
  2717. if instring[loc] == "\n":
  2718. return loc+1, "\n"
  2719. else:
  2720. raise ParseException(instring, loc, self.errmsg, self)
  2721. elif loc == len(instring):
  2722. return loc+1, []
  2723. else:
  2724. raise ParseException(instring, loc, self.errmsg, self)
  2725. class StringStart(_PositionToken):
  2726. """
  2727. Matches if current position is at the beginning of the parse string
  2728. """
  2729. def __init__( self ):
  2730. super(StringStart,self).__init__()
  2731. self.errmsg = "Expected start of text"
  2732. def parseImpl( self, instring, loc, doActions=True ):
  2733. if loc != 0:
  2734. # see if entire string up to here is just whitespace and ignoreables
  2735. if loc != self.preParse( instring, 0 ):
  2736. raise ParseException(instring, loc, self.errmsg, self)
  2737. return loc, []
  2738. class StringEnd(_PositionToken):
  2739. """
  2740. Matches if current position is at the end of the parse string
  2741. """
  2742. def __init__( self ):
  2743. super(StringEnd,self).__init__()
  2744. self.errmsg = "Expected end of text"
  2745. def parseImpl( self, instring, loc, doActions=True ):
  2746. if loc < len(instring):
  2747. raise ParseException(instring, loc, self.errmsg, self)
  2748. elif loc == len(instring):
  2749. return loc+1, []
  2750. elif loc > len(instring):
  2751. return loc, []
  2752. else:
  2753. raise ParseException(instring, loc, self.errmsg, self)
  2754. class WordStart(_PositionToken):
  2755. """
  2756. Matches if the current position is at the beginning of a Word, and
  2757. is not preceded by any character in a given set of C{wordChars}
  2758. (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
  2759. use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
  2760. the string being parsed, or at the beginning of a line.
  2761. """
  2762. def __init__(self, wordChars = printables):
  2763. super(WordStart,self).__init__()
  2764. self.wordChars = set(wordChars)
  2765. self.errmsg = "Not at the start of a word"
  2766. def parseImpl(self, instring, loc, doActions=True ):
  2767. if loc != 0:
  2768. if (instring[loc-1] in self.wordChars or
  2769. instring[loc] not in self.wordChars):
  2770. raise ParseException(instring, loc, self.errmsg, self)
  2771. return loc, []
  2772. class WordEnd(_PositionToken):
  2773. """
  2774. Matches if the current position is at the end of a Word, and
  2775. is not followed by any character in a given set of C{wordChars}
  2776. (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
  2777. use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
  2778. the string being parsed, or at the end of a line.
  2779. """
  2780. def __init__(self, wordChars = printables):
  2781. super(WordEnd,self).__init__()
  2782. self.wordChars = set(wordChars)
  2783. self.skipWhitespace = False
  2784. self.errmsg = "Not at the end of a word"
  2785. def parseImpl(self, instring, loc, doActions=True ):
  2786. instrlen = len(instring)
  2787. if instrlen>0 and loc<instrlen:
  2788. if (instring[loc] in self.wordChars or
  2789. instring[loc-1] not in self.wordChars):
  2790. raise ParseException(instring, loc, self.errmsg, self)
  2791. return loc, []
  2792. class ParseExpression(ParserElement):
  2793. """
  2794. Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
  2795. """
  2796. def __init__( self, exprs, savelist = False ):
  2797. super(ParseExpression,self).__init__(savelist)
  2798. if isinstance( exprs, _generatorType ):
  2799. exprs = list(exprs)
  2800. if isinstance( exprs, basestring ):
  2801. self.exprs = [ ParserElement._literalStringClass( exprs ) ]
  2802. elif isinstance( exprs, Iterable ):
  2803. exprs = list(exprs)
  2804. # if sequence of strings provided, wrap with Literal
  2805. if all(isinstance(expr, basestring) for expr in exprs):
  2806. exprs = map(ParserElement._literalStringClass, exprs)
  2807. self.exprs = list(exprs)
  2808. else:
  2809. try:
  2810. self.exprs = list( exprs )
  2811. except TypeError:
  2812. self.exprs = [ exprs ]
  2813. self.callPreparse = False
  2814. def __getitem__( self, i ):
  2815. return self.exprs[i]
  2816. def append( self, other ):
  2817. self.exprs.append( other )
  2818. self.strRepr = None
  2819. return self
  2820. def leaveWhitespace( self ):
  2821. """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
  2822. all contained expressions."""
  2823. self.skipWhitespace = False
  2824. self.exprs = [ e.copy() for e in self.exprs ]
  2825. for e in self.exprs:
  2826. e.leaveWhitespace()
  2827. return self
  2828. def ignore( self, other ):
  2829. if isinstance( other, Suppress ):
  2830. if other not in self.ignoreExprs:
  2831. super( ParseExpression, self).ignore( other )
  2832. for e in self.exprs:
  2833. e.ignore( self.ignoreExprs[-1] )
  2834. else:
  2835. super( ParseExpression, self).ignore( other )
  2836. for e in self.exprs:
  2837. e.ignore( self.ignoreExprs[-1] )
  2838. return self
  2839. def __str__( self ):
  2840. try:
  2841. return super(ParseExpression,self).__str__()
  2842. except Exception:
  2843. pass
  2844. if self.strRepr is None:
  2845. self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
  2846. return self.strRepr
  2847. def streamline( self ):
  2848. super(ParseExpression,self).streamline()
  2849. for e in self.exprs:
  2850. e.streamline()
  2851. # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
  2852. # but only if there are no parse actions or resultsNames on the nested And's
  2853. # (likewise for Or's and MatchFirst's)
  2854. if ( len(self.exprs) == 2 ):
  2855. other = self.exprs[0]
  2856. if ( isinstance( other, self.__class__ ) and
  2857. not(other.parseAction) and
  2858. other.resultsName is None and
  2859. not other.debug ):
  2860. self.exprs = other.exprs[:] + [ self.exprs[1] ]
  2861. self.strRepr = None
  2862. self.mayReturnEmpty |= other.mayReturnEmpty
  2863. self.mayIndexError |= other.mayIndexError
  2864. other = self.exprs[-1]
  2865. if ( isinstance( other, self.__class__ ) and
  2866. not(other.parseAction) and
  2867. other.resultsName is None and
  2868. not other.debug ):
  2869. self.exprs = self.exprs[:-1] + other.exprs[:]
  2870. self.strRepr = None
  2871. self.mayReturnEmpty |= other.mayReturnEmpty
  2872. self.mayIndexError |= other.mayIndexError
  2873. self.errmsg = "Expected " + _ustr(self)
  2874. return self
  2875. def setResultsName( self, name, listAllMatches=False ):
  2876. ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
  2877. return ret
  2878. def validate( self, validateTrace=[] ):
  2879. tmp = validateTrace[:]+[self]
  2880. for e in self.exprs:
  2881. e.validate(tmp)
  2882. self.checkRecursion( [] )
  2883. def copy(self):
  2884. ret = super(ParseExpression,self).copy()
  2885. ret.exprs = [e.copy() for e in self.exprs]
  2886. return ret
  2887. class And(ParseExpression):
  2888. """
  2889. Requires all given C{ParseExpression}s to be found in the given order.
  2890. Expressions may be separated by whitespace.
  2891. May be constructed using the C{'+'} operator.
  2892. May also be constructed using the C{'-'} operator, which will suppress backtracking.
  2893. Example::
  2894. integer = Word(nums)
  2895. name_expr = OneOrMore(Word(alphas))
  2896. expr = And([integer("id"),name_expr("name"),integer("age")])
  2897. # more easily written as:
  2898. expr = integer("id") + name_expr("name") + integer("age")
  2899. """
  2900. class _ErrorStop(Empty):
  2901. def __init__(self, *args, **kwargs):
  2902. super(And._ErrorStop,self).__init__(*args, **kwargs)
  2903. self.name = '-'
  2904. self.leaveWhitespace()
  2905. def __init__( self, exprs, savelist = True ):
  2906. super(And,self).__init__(exprs, savelist)
  2907. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  2908. self.setWhitespaceChars( self.exprs[0].whiteChars )
  2909. self.skipWhitespace = self.exprs[0].skipWhitespace
  2910. self.callPreparse = True
  2911. def parseImpl( self, instring, loc, doActions=True ):
  2912. # pass False as last arg to _parse for first element, since we already
  2913. # pre-parsed the string as part of our And pre-parsing
  2914. loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
  2915. errorStop = False
  2916. for e in self.exprs[1:]:
  2917. if isinstance(e, And._ErrorStop):
  2918. errorStop = True
  2919. continue
  2920. if errorStop:
  2921. try:
  2922. loc, exprtokens = e._parse( instring, loc, doActions )
  2923. except ParseSyntaxException:
  2924. raise
  2925. except ParseBaseException as pe:
  2926. pe.__traceback__ = None
  2927. raise ParseSyntaxException._from_exception(pe)
  2928. except IndexError:
  2929. raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
  2930. else:
  2931. loc, exprtokens = e._parse( instring, loc, doActions )
  2932. if exprtokens or exprtokens.haskeys():
  2933. resultlist += exprtokens
  2934. return loc, resultlist
  2935. def __iadd__(self, other ):
  2936. if isinstance( other, basestring ):
  2937. other = ParserElement._literalStringClass( other )
  2938. return self.append( other ) #And( [ self, other ] )
  2939. def checkRecursion( self, parseElementList ):
  2940. subRecCheckList = parseElementList[:] + [ self ]
  2941. for e in self.exprs:
  2942. e.checkRecursion( subRecCheckList )
  2943. if not e.mayReturnEmpty:
  2944. break
  2945. def __str__( self ):
  2946. if hasattr(self,"name"):
  2947. return self.name
  2948. if self.strRepr is None:
  2949. self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
  2950. return self.strRepr
  2951. class Or(ParseExpression):
  2952. """
  2953. Requires that at least one C{ParseExpression} is found.
  2954. If two expressions match, the expression that matches the longest string will be used.
  2955. May be constructed using the C{'^'} operator.
  2956. Example::
  2957. # construct Or using '^' operator
  2958. number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
  2959. print(number.searchString("123 3.1416 789"))
  2960. prints::
  2961. [['123'], ['3.1416'], ['789']]
  2962. """
  2963. def __init__( self, exprs, savelist = False ):
  2964. super(Or,self).__init__(exprs, savelist)
  2965. if self.exprs:
  2966. self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
  2967. else:
  2968. self.mayReturnEmpty = True
  2969. def parseImpl( self, instring, loc, doActions=True ):
  2970. maxExcLoc = -1
  2971. maxException = None
  2972. matches = []
  2973. for e in self.exprs:
  2974. try:
  2975. loc2 = e.tryParse( instring, loc )
  2976. except ParseException as err:
  2977. err.__traceback__ = None
  2978. if err.loc > maxExcLoc:
  2979. maxException = err
  2980. maxExcLoc = err.loc
  2981. except IndexError:
  2982. if len(instring) > maxExcLoc:
  2983. maxException = ParseException(instring,len(instring),e.errmsg,self)
  2984. maxExcLoc = len(instring)
  2985. else:
  2986. # save match among all matches, to retry longest to shortest
  2987. matches.append((loc2, e))
  2988. if matches:
  2989. matches.sort(key=lambda x: -x[0])
  2990. for _,e in matches:
  2991. try:
  2992. return e._parse( instring, loc, doActions )
  2993. except ParseException as err:
  2994. err.__traceback__ = None
  2995. if err.loc > maxExcLoc:
  2996. maxException = err
  2997. maxExcLoc = err.loc
  2998. if maxException is not None:
  2999. maxException.msg = self.errmsg
  3000. raise maxException
  3001. else:
  3002. raise ParseException(instring, loc, "no defined alternatives to match", self)
  3003. def __ixor__(self, other ):
  3004. if isinstance( other, basestring ):
  3005. other = ParserElement._literalStringClass( other )
  3006. return self.append( other ) #Or( [ self, other ] )
  3007. def __str__( self ):
  3008. if hasattr(self,"name"):
  3009. return self.name
  3010. if self.strRepr is None:
  3011. self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
  3012. return self.strRepr
  3013. def checkRecursion( self, parseElementList ):
  3014. subRecCheckList = parseElementList[:] + [ self ]
  3015. for e in self.exprs:
  3016. e.checkRecursion( subRecCheckList )
  3017. class MatchFirst(ParseExpression):
  3018. """
  3019. Requires that at least one C{ParseExpression} is found.
  3020. If two expressions match, the first one listed is the one that will match.
  3021. May be constructed using the C{'|'} operator.
  3022. Example::
  3023. # construct MatchFirst using '|' operator
  3024. # watch the order of expressions to match
  3025. number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
  3026. print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
  3027. # put more selective expression first
  3028. number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
  3029. print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
  3030. """
  3031. def __init__( self, exprs, savelist = False ):
  3032. super(MatchFirst,self).__init__(exprs, savelist)
  3033. if self.exprs:
  3034. self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
  3035. else:
  3036. self.mayReturnEmpty = True
  3037. def parseImpl( self, instring, loc, doActions=True ):
  3038. maxExcLoc = -1
  3039. maxException = None
  3040. for e in self.exprs:
  3041. try:
  3042. ret = e._parse( instring, loc, doActions )
  3043. return ret
  3044. except ParseException as err:
  3045. if err.loc > maxExcLoc:
  3046. maxException = err
  3047. maxExcLoc = err.loc
  3048. except IndexError:
  3049. if len(instring) > maxExcLoc:
  3050. maxException = ParseException(instring,len(instring),e.errmsg,self)
  3051. maxExcLoc = len(instring)
  3052. # only got here if no expression matched, raise exception for match that made it the furthest
  3053. else:
  3054. if maxException is not None:
  3055. maxException.msg = self.errmsg
  3056. raise maxException
  3057. else:
  3058. raise ParseException(instring, loc, "no defined alternatives to match", self)
  3059. def __ior__(self, other ):
  3060. if isinstance( other, basestring ):
  3061. other = ParserElement._literalStringClass( other )
  3062. return self.append( other ) #MatchFirst( [ self, other ] )
  3063. def __str__( self ):
  3064. if hasattr(self,"name"):
  3065. return self.name
  3066. if self.strRepr is None:
  3067. self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
  3068. return self.strRepr
  3069. def checkRecursion( self, parseElementList ):
  3070. subRecCheckList = parseElementList[:] + [ self ]
  3071. for e in self.exprs:
  3072. e.checkRecursion( subRecCheckList )
  3073. class Each(ParseExpression):
  3074. """
  3075. Requires all given C{ParseExpression}s to be found, but in any order.
  3076. Expressions may be separated by whitespace.
  3077. May be constructed using the C{'&'} operator.
  3078. Example::
  3079. color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
  3080. shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
  3081. integer = Word(nums)
  3082. shape_attr = "shape:" + shape_type("shape")
  3083. posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
  3084. color_attr = "color:" + color("color")
  3085. size_attr = "size:" + integer("size")
  3086. # use Each (using operator '&') to accept attributes in any order
  3087. # (shape and posn are required, color and size are optional)
  3088. shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
  3089. shape_spec.runTests('''
  3090. shape: SQUARE color: BLACK posn: 100, 120
  3091. shape: CIRCLE size: 50 color: BLUE posn: 50,80
  3092. color:GREEN size:20 shape:TRIANGLE posn:20,40
  3093. '''
  3094. )
  3095. prints::
  3096. shape: SQUARE color: BLACK posn: 100, 120
  3097. ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
  3098. - color: BLACK
  3099. - posn: ['100', ',', '120']
  3100. - x: 100
  3101. - y: 120
  3102. - shape: SQUARE
  3103. shape: CIRCLE size: 50 color: BLUE posn: 50,80
  3104. ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
  3105. - color: BLUE
  3106. - posn: ['50', ',', '80']
  3107. - x: 50
  3108. - y: 80
  3109. - shape: CIRCLE
  3110. - size: 50
  3111. color: GREEN size: 20 shape: TRIANGLE posn: 20,40
  3112. ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
  3113. - color: GREEN
  3114. - posn: ['20', ',', '40']
  3115. - x: 20
  3116. - y: 40
  3117. - shape: TRIANGLE
  3118. - size: 20
  3119. """
  3120. def __init__( self, exprs, savelist = True ):
  3121. super(Each,self).__init__(exprs, savelist)
  3122. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  3123. self.skipWhitespace = True
  3124. self.initExprGroups = True
  3125. def parseImpl( self, instring, loc, doActions=True ):
  3126. if self.initExprGroups:
  3127. self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
  3128. opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
  3129. opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
  3130. self.optionals = opt1 + opt2
  3131. self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
  3132. self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
  3133. self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
  3134. self.required += self.multirequired
  3135. self.initExprGroups = False
  3136. tmpLoc = loc
  3137. tmpReqd = self.required[:]
  3138. tmpOpt = self.optionals[:]
  3139. matchOrder = []
  3140. keepMatching = True
  3141. while keepMatching:
  3142. tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
  3143. failed = []
  3144. for e in tmpExprs:
  3145. try:
  3146. tmpLoc = e.tryParse( instring, tmpLoc )
  3147. except ParseException:
  3148. failed.append(e)
  3149. else:
  3150. matchOrder.append(self.opt1map.get(id(e),e))
  3151. if e in tmpReqd:
  3152. tmpReqd.remove(e)
  3153. elif e in tmpOpt:
  3154. tmpOpt.remove(e)
  3155. if len(failed) == len(tmpExprs):
  3156. keepMatching = False
  3157. if tmpReqd:
  3158. missing = ", ".join(_ustr(e) for e in tmpReqd)
  3159. raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
  3160. # add any unmatched Optionals, in case they have default values defined
  3161. matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
  3162. resultlist = []
  3163. for e in matchOrder:
  3164. loc,results = e._parse(instring,loc,doActions)
  3165. resultlist.append(results)
  3166. finalResults = sum(resultlist, ParseResults([]))
  3167. return loc, finalResults
  3168. def __str__( self ):
  3169. if hasattr(self,"name"):
  3170. return self.name
  3171. if self.strRepr is None:
  3172. self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
  3173. return self.strRepr
  3174. def checkRecursion( self, parseElementList ):
  3175. subRecCheckList = parseElementList[:] + [ self ]
  3176. for e in self.exprs:
  3177. e.checkRecursion( subRecCheckList )
  3178. class ParseElementEnhance(ParserElement):
  3179. """
  3180. Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
  3181. """
  3182. def __init__( self, expr, savelist=False ):
  3183. super(ParseElementEnhance,self).__init__(savelist)
  3184. if isinstance( expr, basestring ):
  3185. if issubclass(ParserElement._literalStringClass, Token):
  3186. expr = ParserElement._literalStringClass(expr)
  3187. else:
  3188. expr = ParserElement._literalStringClass(Literal(expr))
  3189. self.expr = expr
  3190. self.strRepr = None
  3191. if expr is not None:
  3192. self.mayIndexError = expr.mayIndexError
  3193. self.mayReturnEmpty = expr.mayReturnEmpty
  3194. self.setWhitespaceChars( expr.whiteChars )
  3195. self.skipWhitespace = expr.skipWhitespace
  3196. self.saveAsList = expr.saveAsList
  3197. self.callPreparse = expr.callPreparse
  3198. self.ignoreExprs.extend(expr.ignoreExprs)
  3199. def parseImpl( self, instring, loc, doActions=True ):
  3200. if self.expr is not None:
  3201. return self.expr._parse( instring, loc, doActions, callPreParse=False )
  3202. else:
  3203. raise ParseException("",loc,self.errmsg,self)
  3204. def leaveWhitespace( self ):
  3205. self.skipWhitespace = False
  3206. self.expr = self.expr.copy()
  3207. if self.expr is not None:
  3208. self.expr.leaveWhitespace()
  3209. return self
  3210. def ignore( self, other ):
  3211. if isinstance( other, Suppress ):
  3212. if other not in self.ignoreExprs:
  3213. super( ParseElementEnhance, self).ignore( other )
  3214. if self.expr is not None:
  3215. self.expr.ignore( self.ignoreExprs[-1] )
  3216. else:
  3217. super( ParseElementEnhance, self).ignore( other )
  3218. if self.expr is not None:
  3219. self.expr.ignore( self.ignoreExprs[-1] )
  3220. return self
  3221. def streamline( self ):
  3222. super(ParseElementEnhance,self).streamline()
  3223. if self.expr is not None:
  3224. self.expr.streamline()
  3225. return self
  3226. def checkRecursion( self, parseElementList ):
  3227. if self in parseElementList:
  3228. raise RecursiveGrammarException( parseElementList+[self] )
  3229. subRecCheckList = parseElementList[:] + [ self ]
  3230. if self.expr is not None:
  3231. self.expr.checkRecursion( subRecCheckList )
  3232. def validate( self, validateTrace=[] ):
  3233. tmp = validateTrace[:]+[self]
  3234. if self.expr is not None:
  3235. self.expr.validate(tmp)
  3236. self.checkRecursion( [] )
  3237. def __str__( self ):
  3238. try:
  3239. return super(ParseElementEnhance,self).__str__()
  3240. except Exception:
  3241. pass
  3242. if self.strRepr is None and self.expr is not None:
  3243. self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
  3244. return self.strRepr
  3245. class FollowedBy(ParseElementEnhance):
  3246. """
  3247. Lookahead matching of the given parse expression. C{FollowedBy}
  3248. does I{not} advance the parsing position within the input string, it only
  3249. verifies that the specified parse expression matches at the current
  3250. position. C{FollowedBy} always returns a null token list.
  3251. Example::
  3252. # use FollowedBy to match a label only if it is followed by a ':'
  3253. data_word = Word(alphas)
  3254. label = data_word + FollowedBy(':')
  3255. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3256. OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
  3257. prints::
  3258. [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
  3259. """
  3260. def __init__( self, expr ):
  3261. super(FollowedBy,self).__init__(expr)
  3262. self.mayReturnEmpty = True
  3263. def parseImpl( self, instring, loc, doActions=True ):
  3264. self.expr.tryParse( instring, loc )
  3265. return loc, []
  3266. class NotAny(ParseElementEnhance):
  3267. """
  3268. Lookahead to disallow matching with the given parse expression. C{NotAny}
  3269. does I{not} advance the parsing position within the input string, it only
  3270. verifies that the specified parse expression does I{not} match at the current
  3271. position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
  3272. always returns a null token list. May be constructed using the '~' operator.
  3273. Example::
  3274. """
  3275. def __init__( self, expr ):
  3276. super(NotAny,self).__init__(expr)
  3277. #~ self.leaveWhitespace()
  3278. self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
  3279. self.mayReturnEmpty = True
  3280. self.errmsg = "Found unwanted token, "+_ustr(self.expr)
  3281. def parseImpl( self, instring, loc, doActions=True ):
  3282. if self.expr.canParseNext(instring, loc):
  3283. raise ParseException(instring, loc, self.errmsg, self)
  3284. return loc, []
  3285. def __str__( self ):
  3286. if hasattr(self,"name"):
  3287. return self.name
  3288. if self.strRepr is None:
  3289. self.strRepr = "~{" + _ustr(self.expr) + "}"
  3290. return self.strRepr
  3291. class _MultipleMatch(ParseElementEnhance):
  3292. def __init__( self, expr, stopOn=None):
  3293. super(_MultipleMatch, self).__init__(expr)
  3294. self.saveAsList = True
  3295. ender = stopOn
  3296. if isinstance(ender, basestring):
  3297. ender = ParserElement._literalStringClass(ender)
  3298. self.not_ender = ~ender if ender is not None else None
  3299. def parseImpl( self, instring, loc, doActions=True ):
  3300. self_expr_parse = self.expr._parse
  3301. self_skip_ignorables = self._skipIgnorables
  3302. check_ender = self.not_ender is not None
  3303. if check_ender:
  3304. try_not_ender = self.not_ender.tryParse
  3305. # must be at least one (but first see if we are the stopOn sentinel;
  3306. # if so, fail)
  3307. if check_ender:
  3308. try_not_ender(instring, loc)
  3309. loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
  3310. try:
  3311. hasIgnoreExprs = (not not self.ignoreExprs)
  3312. while 1:
  3313. if check_ender:
  3314. try_not_ender(instring, loc)
  3315. if hasIgnoreExprs:
  3316. preloc = self_skip_ignorables( instring, loc )
  3317. else:
  3318. preloc = loc
  3319. loc, tmptokens = self_expr_parse( instring, preloc, doActions )
  3320. if tmptokens or tmptokens.haskeys():
  3321. tokens += tmptokens
  3322. except (ParseException,IndexError):
  3323. pass
  3324. return loc, tokens
  3325. class OneOrMore(_MultipleMatch):
  3326. """
  3327. Repetition of one or more of the given expression.
  3328. Parameters:
  3329. - expr - expression that must match one or more times
  3330. - stopOn - (default=C{None}) - expression for a terminating sentinel
  3331. (only required if the sentinel would ordinarily match the repetition
  3332. expression)
  3333. Example::
  3334. data_word = Word(alphas)
  3335. label = data_word + FollowedBy(':')
  3336. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
  3337. text = "shape: SQUARE posn: upper left color: BLACK"
  3338. OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
  3339. # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
  3340. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3341. OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
  3342. # could also be written as
  3343. (attr_expr * (1,)).parseString(text).pprint()
  3344. """
  3345. def __str__( self ):
  3346. if hasattr(self,"name"):
  3347. return self.name
  3348. if self.strRepr is None:
  3349. self.strRepr = "{" + _ustr(self.expr) + "}..."
  3350. return self.strRepr
  3351. class ZeroOrMore(_MultipleMatch):
  3352. """
  3353. Optional repetition of zero or more of the given expression.
  3354. Parameters:
  3355. - expr - expression that must match zero or more times
  3356. - stopOn - (default=C{None}) - expression for a terminating sentinel
  3357. (only required if the sentinel would ordinarily match the repetition
  3358. expression)
  3359. Example: similar to L{OneOrMore}
  3360. """
  3361. def __init__( self, expr, stopOn=None):
  3362. super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
  3363. self.mayReturnEmpty = True
  3364. def parseImpl( self, instring, loc, doActions=True ):
  3365. try:
  3366. return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
  3367. except (ParseException,IndexError):
  3368. return loc, []
  3369. def __str__( self ):
  3370. if hasattr(self,"name"):
  3371. return self.name
  3372. if self.strRepr is None:
  3373. self.strRepr = "[" + _ustr(self.expr) + "]..."
  3374. return self.strRepr
  3375. class _NullToken(object):
  3376. def __bool__(self):
  3377. return False
  3378. __nonzero__ = __bool__
  3379. def __str__(self):
  3380. return ""
  3381. _optionalNotMatched = _NullToken()
  3382. class Optional(ParseElementEnhance):
  3383. """
  3384. Optional matching of the given expression.
  3385. Parameters:
  3386. - expr - expression that must match zero or more times
  3387. - default (optional) - value to be returned if the optional expression is not found.
  3388. Example::
  3389. # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
  3390. zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
  3391. zip.runTests('''
  3392. # traditional ZIP code
  3393. 12345
  3394. # ZIP+4 form
  3395. 12101-0001
  3396. # invalid ZIP
  3397. 98765-
  3398. ''')
  3399. prints::
  3400. # traditional ZIP code
  3401. 12345
  3402. ['12345']
  3403. # ZIP+4 form
  3404. 12101-0001
  3405. ['12101-0001']
  3406. # invalid ZIP
  3407. 98765-
  3408. ^
  3409. FAIL: Expected end of text (at char 5), (line:1, col:6)
  3410. """
  3411. def __init__( self, expr, default=_optionalNotMatched ):
  3412. super(Optional,self).__init__( expr, savelist=False )
  3413. self.saveAsList = self.expr.saveAsList
  3414. self.defaultValue = default
  3415. self.mayReturnEmpty = True
  3416. def parseImpl( self, instring, loc, doActions=True ):
  3417. try:
  3418. loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
  3419. except (ParseException,IndexError):
  3420. if self.defaultValue is not _optionalNotMatched:
  3421. if self.expr.resultsName:
  3422. tokens = ParseResults([ self.defaultValue ])
  3423. tokens[self.expr.resultsName] = self.defaultValue
  3424. else:
  3425. tokens = [ self.defaultValue ]
  3426. else:
  3427. tokens = []
  3428. return loc, tokens
  3429. def __str__( self ):
  3430. if hasattr(self,"name"):
  3431. return self.name
  3432. if self.strRepr is None:
  3433. self.strRepr = "[" + _ustr(self.expr) + "]"
  3434. return self.strRepr
  3435. class SkipTo(ParseElementEnhance):
  3436. """
  3437. Token for skipping over all undefined text until the matched expression is found.
  3438. Parameters:
  3439. - expr - target expression marking the end of the data to be skipped
  3440. - include - (default=C{False}) if True, the target expression is also parsed
  3441. (the skipped text and target expression are returned as a 2-element list).
  3442. - ignore - (default=C{None}) used to define grammars (typically quoted strings and
  3443. comments) that might contain false matches to the target expression
  3444. - failOn - (default=C{None}) define expressions that are not allowed to be
  3445. included in the skipped test; if found before the target expression is found,
  3446. the SkipTo is not a match
  3447. Example::
  3448. report = '''
  3449. Outstanding Issues Report - 1 Jan 2000
  3450. # | Severity | Description | Days Open
  3451. -----+----------+-------------------------------------------+-----------
  3452. 101 | Critical | Intermittent system crash | 6
  3453. 94 | Cosmetic | Spelling error on Login ('log|n') | 14
  3454. 79 | Minor | System slow when running too many reports | 47
  3455. '''
  3456. integer = Word(nums)
  3457. SEP = Suppress('|')
  3458. # use SkipTo to simply match everything up until the next SEP
  3459. # - ignore quoted strings, so that a '|' character inside a quoted string does not match
  3460. # - parse action will call token.strip() for each matched token, i.e., the description body
  3461. string_data = SkipTo(SEP, ignore=quotedString)
  3462. string_data.setParseAction(tokenMap(str.strip))
  3463. ticket_expr = (integer("issue_num") + SEP
  3464. + string_data("sev") + SEP
  3465. + string_data("desc") + SEP
  3466. + integer("days_open"))
  3467. for tkt in ticket_expr.searchString(report):
  3468. print tkt.dump()
  3469. prints::
  3470. ['101', 'Critical', 'Intermittent system crash', '6']
  3471. - days_open: 6
  3472. - desc: Intermittent system crash
  3473. - issue_num: 101
  3474. - sev: Critical
  3475. ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
  3476. - days_open: 14
  3477. - desc: Spelling error on Login ('log|n')
  3478. - issue_num: 94
  3479. - sev: Cosmetic
  3480. ['79', 'Minor', 'System slow when running too many reports', '47']
  3481. - days_open: 47
  3482. - desc: System slow when running too many reports
  3483. - issue_num: 79
  3484. - sev: Minor
  3485. """
  3486. def __init__( self, other, include=False, ignore=None, failOn=None ):
  3487. super( SkipTo, self ).__init__( other )
  3488. self.ignoreExpr = ignore
  3489. self.mayReturnEmpty = True
  3490. self.mayIndexError = False
  3491. self.includeMatch = include
  3492. self.asList = False
  3493. if isinstance(failOn, basestring):
  3494. self.failOn = ParserElement._literalStringClass(failOn)
  3495. else:
  3496. self.failOn = failOn
  3497. self.errmsg = "No match found for "+_ustr(self.expr)
  3498. def parseImpl( self, instring, loc, doActions=True ):
  3499. startloc = loc
  3500. instrlen = len(instring)
  3501. expr = self.expr
  3502. expr_parse = self.expr._parse
  3503. self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
  3504. self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
  3505. tmploc = loc
  3506. while tmploc <= instrlen:
  3507. if self_failOn_canParseNext is not None:
  3508. # break if failOn expression matches
  3509. if self_failOn_canParseNext(instring, tmploc):
  3510. break
  3511. if self_ignoreExpr_tryParse is not None:
  3512. # advance past ignore expressions
  3513. while 1:
  3514. try:
  3515. tmploc = self_ignoreExpr_tryParse(instring, tmploc)
  3516. except ParseBaseException:
  3517. break
  3518. try:
  3519. expr_parse(instring, tmploc, doActions=False, callPreParse=False)
  3520. except (ParseException, IndexError):
  3521. # no match, advance loc in string
  3522. tmploc += 1
  3523. else:
  3524. # matched skipto expr, done
  3525. break
  3526. else:
  3527. # ran off the end of the input string without matching skipto expr, fail
  3528. raise ParseException(instring, loc, self.errmsg, self)
  3529. # build up return values
  3530. loc = tmploc
  3531. skiptext = instring[startloc:loc]
  3532. skipresult = ParseResults(skiptext)
  3533. if self.includeMatch:
  3534. loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
  3535. skipresult += mat
  3536. return loc, skipresult
  3537. class Forward(ParseElementEnhance):
  3538. """
  3539. Forward declaration of an expression to be defined later -
  3540. used for recursive grammars, such as algebraic infix notation.
  3541. When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
  3542. Note: take care when assigning to C{Forward} not to overlook precedence of operators.
  3543. Specifically, '|' has a lower precedence than '<<', so that::
  3544. fwdExpr << a | b | c
  3545. will actually be evaluated as::
  3546. (fwdExpr << a) | b | c
  3547. thereby leaving b and c out as parseable alternatives. It is recommended that you
  3548. explicitly group the values inserted into the C{Forward}::
  3549. fwdExpr << (a | b | c)
  3550. Converting to use the '<<=' operator instead will avoid this problem.
  3551. See L{ParseResults.pprint} for an example of a recursive parser created using
  3552. C{Forward}.
  3553. """
  3554. def __init__( self, other=None ):
  3555. super(Forward,self).__init__( other, savelist=False )
  3556. def __lshift__( self, other ):
  3557. if isinstance( other, basestring ):
  3558. other = ParserElement._literalStringClass(other)
  3559. self.expr = other
  3560. self.strRepr = None
  3561. self.mayIndexError = self.expr.mayIndexError
  3562. self.mayReturnEmpty = self.expr.mayReturnEmpty
  3563. self.setWhitespaceChars( self.expr.whiteChars )
  3564. self.skipWhitespace = self.expr.skipWhitespace
  3565. self.saveAsList = self.expr.saveAsList
  3566. self.ignoreExprs.extend(self.expr.ignoreExprs)
  3567. return self
  3568. def __ilshift__(self, other):
  3569. return self << other
  3570. def leaveWhitespace( self ):
  3571. self.skipWhitespace = False
  3572. return self
  3573. def streamline( self ):
  3574. if not self.streamlined:
  3575. self.streamlined = True
  3576. if self.expr is not None:
  3577. self.expr.streamline()
  3578. return self
  3579. def validate( self, validateTrace=[] ):
  3580. if self not in validateTrace:
  3581. tmp = validateTrace[:]+[self]
  3582. if self.expr is not None:
  3583. self.expr.validate(tmp)
  3584. self.checkRecursion([])
  3585. def __str__( self ):
  3586. if hasattr(self,"name"):
  3587. return self.name
  3588. return self.__class__.__name__ + ": ..."
  3589. # stubbed out for now - creates awful memory and perf issues
  3590. self._revertClass = self.__class__
  3591. self.__class__ = _ForwardNoRecurse
  3592. try:
  3593. if self.expr is not None:
  3594. retString = _ustr(self.expr)
  3595. else:
  3596. retString = "None"
  3597. finally:
  3598. self.__class__ = self._revertClass
  3599. return self.__class__.__name__ + ": " + retString
  3600. def copy(self):
  3601. if self.expr is not None:
  3602. return super(Forward,self).copy()
  3603. else:
  3604. ret = Forward()
  3605. ret <<= self
  3606. return ret
  3607. class _ForwardNoRecurse(Forward):
  3608. def __str__( self ):
  3609. return "..."
  3610. class TokenConverter(ParseElementEnhance):
  3611. """
  3612. Abstract subclass of C{ParseExpression}, for converting parsed results.
  3613. """
  3614. def __init__( self, expr, savelist=False ):
  3615. super(TokenConverter,self).__init__( expr )#, savelist )
  3616. self.saveAsList = False
  3617. class Combine(TokenConverter):
  3618. """
  3619. Converter to concatenate all matching tokens to a single string.
  3620. By default, the matching patterns must also be contiguous in the input string;
  3621. this can be disabled by specifying C{'adjacent=False'} in the constructor.
  3622. Example::
  3623. real = Word(nums) + '.' + Word(nums)
  3624. print(real.parseString('3.1416')) # -> ['3', '.', '1416']
  3625. # will also erroneously match the following
  3626. print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
  3627. real = Combine(Word(nums) + '.' + Word(nums))
  3628. print(real.parseString('3.1416')) # -> ['3.1416']
  3629. # no match when there are internal spaces
  3630. print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
  3631. """
  3632. def __init__( self, expr, joinString="", adjacent=True ):
  3633. super(Combine,self).__init__( expr )
  3634. # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
  3635. if adjacent:
  3636. self.leaveWhitespace()
  3637. self.adjacent = adjacent
  3638. self.skipWhitespace = True
  3639. self.joinString = joinString
  3640. self.callPreparse = True
  3641. def ignore( self, other ):
  3642. if self.adjacent:
  3643. ParserElement.ignore(self, other)
  3644. else:
  3645. super( Combine, self).ignore( other )
  3646. return self
  3647. def postParse( self, instring, loc, tokenlist ):
  3648. retToks = tokenlist.copy()
  3649. del retToks[:]
  3650. retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
  3651. if self.resultsName and retToks.haskeys():
  3652. return [ retToks ]
  3653. else:
  3654. return retToks
  3655. class Group(TokenConverter):
  3656. """
  3657. Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
  3658. Example::
  3659. ident = Word(alphas)
  3660. num = Word(nums)
  3661. term = ident | num
  3662. func = ident + Optional(delimitedList(term))
  3663. print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
  3664. func = ident + Group(Optional(delimitedList(term)))
  3665. print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
  3666. """
  3667. def __init__( self, expr ):
  3668. super(Group,self).__init__( expr )
  3669. self.saveAsList = True
  3670. def postParse( self, instring, loc, tokenlist ):
  3671. return [ tokenlist ]
  3672. class Dict(TokenConverter):
  3673. """
  3674. Converter to return a repetitive expression as a list, but also as a dictionary.
  3675. Each element can also be referenced using the first token in the expression as its key.
  3676. Useful for tabular report scraping when the first column can be used as a item key.
  3677. Example::
  3678. data_word = Word(alphas)
  3679. label = data_word + FollowedBy(':')
  3680. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
  3681. text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
  3682. attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3683. # print attributes as plain groups
  3684. print(OneOrMore(attr_expr).parseString(text).dump())
  3685. # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
  3686. result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
  3687. print(result.dump())
  3688. # access named fields as dict entries, or output as dict
  3689. print(result['shape'])
  3690. print(result.asDict())
  3691. prints::
  3692. ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
  3693. [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
  3694. - color: light blue
  3695. - posn: upper left
  3696. - shape: SQUARE
  3697. - texture: burlap
  3698. SQUARE
  3699. {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
  3700. See more examples at L{ParseResults} of accessing fields by results name.
  3701. """
  3702. def __init__( self, expr ):
  3703. super(Dict,self).__init__( expr )
  3704. self.saveAsList = True
  3705. def postParse( self, instring, loc, tokenlist ):
  3706. for i,tok in enumerate(tokenlist):
  3707. if len(tok) == 0:
  3708. continue
  3709. ikey = tok[0]
  3710. if isinstance(ikey,int):
  3711. ikey = _ustr(tok[0]).strip()
  3712. if len(tok)==1:
  3713. tokenlist[ikey] = _ParseResultsWithOffset("",i)
  3714. elif len(tok)==2 and not isinstance(tok[1],ParseResults):
  3715. tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
  3716. else:
  3717. dictvalue = tok.copy() #ParseResults(i)
  3718. del dictvalue[0]
  3719. if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
  3720. tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
  3721. else:
  3722. tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
  3723. if self.resultsName:
  3724. return [ tokenlist ]
  3725. else:
  3726. return tokenlist
  3727. class Suppress(TokenConverter):
  3728. """
  3729. Converter for ignoring the results of a parsed expression.
  3730. Example::
  3731. source = "a, b, c,d"
  3732. wd = Word(alphas)
  3733. wd_list1 = wd + ZeroOrMore(',' + wd)
  3734. print(wd_list1.parseString(source))
  3735. # often, delimiters that are useful during parsing are just in the
  3736. # way afterward - use Suppress to keep them out of the parsed output
  3737. wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
  3738. print(wd_list2.parseString(source))
  3739. prints::
  3740. ['a', ',', 'b', ',', 'c', ',', 'd']
  3741. ['a', 'b', 'c', 'd']
  3742. (See also L{delimitedList}.)
  3743. """
  3744. def postParse( self, instring, loc, tokenlist ):
  3745. return []
  3746. def suppress( self ):
  3747. return self
  3748. class OnlyOnce(object):
  3749. """
  3750. Wrapper for parse actions, to ensure they are only called once.
  3751. """
  3752. def __init__(self, methodCall):
  3753. self.callable = _trim_arity(methodCall)
  3754. self.called = False
  3755. def __call__(self,s,l,t):
  3756. if not self.called:
  3757. results = self.callable(s,l,t)
  3758. self.called = True
  3759. return results
  3760. raise ParseException(s,l,"")
  3761. def reset(self):
  3762. self.called = False
  3763. def traceParseAction(f):
  3764. """
  3765. Decorator for debugging parse actions.
  3766. When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
  3767. When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
  3768. Example::
  3769. wd = Word(alphas)
  3770. @traceParseAction
  3771. def remove_duplicate_chars(tokens):
  3772. return ''.join(sorted(set(''.join(tokens))))
  3773. wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
  3774. print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
  3775. prints::
  3776. >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
  3777. <<leaving remove_duplicate_chars (ret: 'dfjkls')
  3778. ['dfjkls']
  3779. """
  3780. f = _trim_arity(f)
  3781. def z(*paArgs):
  3782. thisFunc = f.__name__
  3783. s,l,t = paArgs[-3:]
  3784. if len(paArgs)>3:
  3785. thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
  3786. sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
  3787. try:
  3788. ret = f(*paArgs)
  3789. except Exception as exc:
  3790. sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
  3791. raise
  3792. sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
  3793. return ret
  3794. try:
  3795. z.__name__ = f.__name__
  3796. except AttributeError:
  3797. pass
  3798. return z
  3799. #
  3800. # global helpers
  3801. #
  3802. def delimitedList( expr, delim=",", combine=False ):
  3803. """
  3804. Helper to define a delimited list of expressions - the delimiter defaults to ','.
  3805. By default, the list elements and delimiters can have intervening whitespace, and
  3806. comments, but this can be overridden by passing C{combine=True} in the constructor.
  3807. If C{combine} is set to C{True}, the matching tokens are returned as a single token
  3808. string, with the delimiters included; otherwise, the matching tokens are returned
  3809. as a list of tokens, with the delimiters suppressed.
  3810. Example::
  3811. delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
  3812. delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
  3813. """
  3814. dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
  3815. if combine:
  3816. return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
  3817. else:
  3818. return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
  3819. def countedArray( expr, intExpr=None ):
  3820. """
  3821. Helper to define a counted list of expressions.
  3822. This helper defines a pattern of the form::
  3823. integer expr expr expr...
  3824. where the leading integer tells how many expr expressions follow.
  3825. The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
  3826. If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
  3827. Example::
  3828. countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
  3829. # in this parser, the leading integer value is given in binary,
  3830. # '10' indicating that 2 values are in the array
  3831. binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
  3832. countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
  3833. """
  3834. arrayExpr = Forward()
  3835. def countFieldParseAction(s,l,t):
  3836. n = t[0]
  3837. arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
  3838. return []
  3839. if intExpr is None:
  3840. intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
  3841. else:
  3842. intExpr = intExpr.copy()
  3843. intExpr.setName("arrayLen")
  3844. intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
  3845. return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
  3846. def _flatten(L):
  3847. ret = []
  3848. for i in L:
  3849. if isinstance(i,list):
  3850. ret.extend(_flatten(i))
  3851. else:
  3852. ret.append(i)
  3853. return ret
  3854. def matchPreviousLiteral(expr):
  3855. """
  3856. Helper to define an expression that is indirectly defined from
  3857. the tokens matched in a previous expression, that is, it looks
  3858. for a 'repeat' of a previous expression. For example::
  3859. first = Word(nums)
  3860. second = matchPreviousLiteral(first)
  3861. matchExpr = first + ":" + second
  3862. will match C{"1:1"}, but not C{"1:2"}. Because this matches a
  3863. previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
  3864. If this is not desired, use C{matchPreviousExpr}.
  3865. Do I{not} use with packrat parsing enabled.
  3866. """
  3867. rep = Forward()
  3868. def copyTokenToRepeater(s,l,t):
  3869. if t:
  3870. if len(t) == 1:
  3871. rep << t[0]
  3872. else:
  3873. # flatten t tokens
  3874. tflat = _flatten(t.asList())
  3875. rep << And(Literal(tt) for tt in tflat)
  3876. else:
  3877. rep << Empty()
  3878. expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
  3879. rep.setName('(prev) ' + _ustr(expr))
  3880. return rep
  3881. def matchPreviousExpr(expr):
  3882. """
  3883. Helper to define an expression that is indirectly defined from
  3884. the tokens matched in a previous expression, that is, it looks
  3885. for a 'repeat' of a previous expression. For example::
  3886. first = Word(nums)
  3887. second = matchPreviousExpr(first)
  3888. matchExpr = first + ":" + second
  3889. will match C{"1:1"}, but not C{"1:2"}. Because this matches by
  3890. expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
  3891. the expressions are evaluated first, and then compared, so
  3892. C{"1"} is compared with C{"10"}.
  3893. Do I{not} use with packrat parsing enabled.
  3894. """
  3895. rep = Forward()
  3896. e2 = expr.copy()
  3897. rep <<= e2
  3898. def copyTokenToRepeater(s,l,t):
  3899. matchTokens = _flatten(t.asList())
  3900. def mustMatchTheseTokens(s,l,t):
  3901. theseTokens = _flatten(t.asList())
  3902. if theseTokens != matchTokens:
  3903. raise ParseException("",0,"")
  3904. rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
  3905. expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
  3906. rep.setName('(prev) ' + _ustr(expr))
  3907. return rep
  3908. def _escapeRegexRangeChars(s):
  3909. #~ escape these chars: ^-]
  3910. for c in r"\^-]":
  3911. s = s.replace(c,_bslash+c)
  3912. s = s.replace("\n",r"\n")
  3913. s = s.replace("\t",r"\t")
  3914. return _ustr(s)
  3915. def oneOf( strs, caseless=False, useRegex=True ):
  3916. """
  3917. Helper to quickly define a set of alternative Literals, and makes sure to do
  3918. longest-first testing when there is a conflict, regardless of the input order,
  3919. but returns a C{L{MatchFirst}} for best performance.
  3920. Parameters:
  3921. - strs - a string of space-delimited literals, or a collection of string literals
  3922. - caseless - (default=C{False}) - treat all literals as caseless
  3923. - useRegex - (default=C{True}) - as an optimization, will generate a Regex
  3924. object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
  3925. if creating a C{Regex} raises an exception)
  3926. Example::
  3927. comp_oper = oneOf("< = > <= >= !=")
  3928. var = Word(alphas)
  3929. number = Word(nums)
  3930. term = var | number
  3931. comparison_expr = term + comp_oper + term
  3932. print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
  3933. prints::
  3934. [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
  3935. """
  3936. if caseless:
  3937. isequal = ( lambda a,b: a.upper() == b.upper() )
  3938. masks = ( lambda a,b: b.upper().startswith(a.upper()) )
  3939. parseElementClass = CaselessLiteral
  3940. else:
  3941. isequal = ( lambda a,b: a == b )
  3942. masks = ( lambda a,b: b.startswith(a) )
  3943. parseElementClass = Literal
  3944. symbols = []
  3945. if isinstance(strs,basestring):
  3946. symbols = strs.split()
  3947. elif isinstance(strs, Iterable):
  3948. symbols = list(strs)
  3949. else:
  3950. warnings.warn("Invalid argument to oneOf, expected string or iterable",
  3951. SyntaxWarning, stacklevel=2)
  3952. if not symbols:
  3953. return NoMatch()
  3954. i = 0
  3955. while i < len(symbols)-1:
  3956. cur = symbols[i]
  3957. for j,other in enumerate(symbols[i+1:]):
  3958. if ( isequal(other, cur) ):
  3959. del symbols[i+j+1]
  3960. break
  3961. elif ( masks(cur, other) ):
  3962. del symbols[i+j+1]
  3963. symbols.insert(i,other)
  3964. cur = other
  3965. break
  3966. else:
  3967. i += 1
  3968. if not caseless and useRegex:
  3969. #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
  3970. try:
  3971. if len(symbols)==len("".join(symbols)):
  3972. return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
  3973. else:
  3974. return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
  3975. except Exception:
  3976. warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
  3977. SyntaxWarning, stacklevel=2)
  3978. # last resort, just use MatchFirst
  3979. return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
  3980. def dictOf( key, value ):
  3981. """
  3982. Helper to easily and clearly define a dictionary by specifying the respective patterns
  3983. for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
  3984. in the proper order. The key pattern can include delimiting markers or punctuation,
  3985. as long as they are suppressed, thereby leaving the significant key text. The value
  3986. pattern can include named results, so that the C{Dict} results can include named token
  3987. fields.
  3988. Example::
  3989. text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
  3990. attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3991. print(OneOrMore(attr_expr).parseString(text).dump())
  3992. attr_label = label
  3993. attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
  3994. # similar to Dict, but simpler call format
  3995. result = dictOf(attr_label, attr_value).parseString(text)
  3996. print(result.dump())
  3997. print(result['shape'])
  3998. print(result.shape) # object attribute access works too
  3999. print(result.asDict())
  4000. prints::
  4001. [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
  4002. - color: light blue
  4003. - posn: upper left
  4004. - shape: SQUARE
  4005. - texture: burlap
  4006. SQUARE
  4007. SQUARE
  4008. {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
  4009. """
  4010. return Dict( ZeroOrMore( Group ( key + value ) ) )
  4011. def originalTextFor(expr, asString=True):
  4012. """
  4013. Helper to return the original, untokenized text for a given expression. Useful to
  4014. restore the parsed fields of an HTML start tag into the raw tag text itself, or to
  4015. revert separate tokens with intervening whitespace back to the original matching
  4016. input text. By default, returns astring containing the original parsed text.
  4017. If the optional C{asString} argument is passed as C{False}, then the return value is a
  4018. C{L{ParseResults}} containing any results names that were originally matched, and a
  4019. single token containing the original matched text from the input string. So if
  4020. the expression passed to C{L{originalTextFor}} contains expressions with defined
  4021. results names, you must set C{asString} to C{False} if you want to preserve those
  4022. results name values.
  4023. Example::
  4024. src = "this is test <b> bold <i>text</i> </b> normal text "
  4025. for tag in ("b","i"):
  4026. opener,closer = makeHTMLTags(tag)
  4027. patt = originalTextFor(opener + SkipTo(closer) + closer)
  4028. print(patt.searchString(src)[0])
  4029. prints::
  4030. ['<b> bold <i>text</i> </b>']
  4031. ['<i>text</i>']
  4032. """
  4033. locMarker = Empty().setParseAction(lambda s,loc,t: loc)
  4034. endlocMarker = locMarker.copy()
  4035. endlocMarker.callPreparse = False
  4036. matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
  4037. if asString:
  4038. extractText = lambda s,l,t: s[t._original_start:t._original_end]
  4039. else:
  4040. def extractText(s,l,t):
  4041. t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
  4042. matchExpr.setParseAction(extractText)
  4043. matchExpr.ignoreExprs = expr.ignoreExprs
  4044. return matchExpr
  4045. def ungroup(expr):
  4046. """
  4047. Helper to undo pyparsing's default grouping of And expressions, even
  4048. if all but one are non-empty.
  4049. """
  4050. return TokenConverter(expr).setParseAction(lambda t:t[0])
  4051. def locatedExpr(expr):
  4052. """
  4053. Helper to decorate a returned token with its starting and ending locations in the input string.
  4054. This helper adds the following results names:
  4055. - locn_start = location where matched expression begins
  4056. - locn_end = location where matched expression ends
  4057. - value = the actual parsed results
  4058. Be careful if the input text contains C{<TAB>} characters, you may want to call
  4059. C{L{ParserElement.parseWithTabs}}
  4060. Example::
  4061. wd = Word(alphas)
  4062. for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
  4063. print(match)
  4064. prints::
  4065. [[0, 'ljsdf', 5]]
  4066. [[8, 'lksdjjf', 15]]
  4067. [[18, 'lkkjj', 23]]
  4068. """
  4069. locator = Empty().setParseAction(lambda s,l,t: l)
  4070. return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
  4071. # convenience constants for positional expressions
  4072. empty = Empty().setName("empty")
  4073. lineStart = LineStart().setName("lineStart")
  4074. lineEnd = LineEnd().setName("lineEnd")
  4075. stringStart = StringStart().setName("stringStart")
  4076. stringEnd = StringEnd().setName("stringEnd")
  4077. _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
  4078. _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
  4079. _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
  4080. _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
  4081. _charRange = Group(_singleChar + Suppress("-") + _singleChar)
  4082. _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
  4083. def srange(s):
  4084. r"""
  4085. Helper to easily define string ranges for use in Word construction. Borrows
  4086. syntax from regexp '[]' string range definitions::
  4087. srange("[0-9]") -> "0123456789"
  4088. srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
  4089. srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
  4090. The input string must be enclosed in []'s, and the returned string is the expanded
  4091. character set joined into a single string.
  4092. The values enclosed in the []'s may be:
  4093. - a single character
  4094. - an escaped character with a leading backslash (such as C{\-} or C{\]})
  4095. - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
  4096. (C{\0x##} is also supported for backwards compatibility)
  4097. - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
  4098. - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
  4099. - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
  4100. """
  4101. _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
  4102. try:
  4103. return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
  4104. except Exception:
  4105. return ""
  4106. def matchOnlyAtCol(n):
  4107. """
  4108. Helper method for defining parse actions that require matching at a specific
  4109. column in the input text.
  4110. """
  4111. def verifyCol(strg,locn,toks):
  4112. if col(locn,strg) != n:
  4113. raise ParseException(strg,locn,"matched token not at column %d" % n)
  4114. return verifyCol
  4115. def replaceWith(replStr):
  4116. """
  4117. Helper method for common parse actions that simply return a literal value. Especially
  4118. useful when used with C{L{transformString<ParserElement.transformString>}()}.
  4119. Example::
  4120. num = Word(nums).setParseAction(lambda toks: int(toks[0]))
  4121. na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
  4122. term = na | num
  4123. OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
  4124. """
  4125. return lambda s,l,t: [replStr]
  4126. def removeQuotes(s,l,t):
  4127. """
  4128. Helper parse action for removing quotation marks from parsed quoted strings.
  4129. Example::
  4130. # by default, quotation marks are included in parsed results
  4131. quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
  4132. # use removeQuotes to strip quotation marks from parsed results
  4133. quotedString.setParseAction(removeQuotes)
  4134. quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
  4135. """
  4136. return t[0][1:-1]
  4137. def tokenMap(func, *args):
  4138. """
  4139. Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
  4140. args are passed, they are forwarded to the given function as additional arguments after
  4141. the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
  4142. parsed data to an integer using base 16.
  4143. Example (compare the last to example in L{ParserElement.transformString}::
  4144. hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
  4145. hex_ints.runTests('''
  4146. 00 11 22 aa FF 0a 0d 1a
  4147. ''')
  4148. upperword = Word(alphas).setParseAction(tokenMap(str.upper))
  4149. OneOrMore(upperword).runTests('''
  4150. my kingdom for a horse
  4151. ''')
  4152. wd = Word(alphas).setParseAction(tokenMap(str.title))
  4153. OneOrMore(wd).setParseAction(' '.join).runTests('''
  4154. now is the winter of our discontent made glorious summer by this sun of york
  4155. ''')
  4156. prints::
  4157. 00 11 22 aa FF 0a 0d 1a
  4158. [0, 17, 34, 170, 255, 10, 13, 26]
  4159. my kingdom for a horse
  4160. ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
  4161. now is the winter of our discontent made glorious summer by this sun of york
  4162. ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
  4163. """
  4164. def pa(s,l,t):
  4165. return [func(tokn, *args) for tokn in t]
  4166. try:
  4167. func_name = getattr(func, '__name__',
  4168. getattr(func, '__class__').__name__)
  4169. except Exception:
  4170. func_name = str(func)
  4171. pa.__name__ = func_name
  4172. return pa
  4173. upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
  4174. """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
  4175. downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
  4176. """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
  4177. def _makeTags(tagStr, xml):
  4178. """Internal helper to construct opening and closing tag expressions, given a tag name"""
  4179. if isinstance(tagStr,basestring):
  4180. resname = tagStr
  4181. tagStr = Keyword(tagStr, caseless=not xml)
  4182. else:
  4183. resname = tagStr.name
  4184. tagAttrName = Word(alphas,alphanums+"_-:")
  4185. if (xml):
  4186. tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
  4187. openTag = Suppress("<") + tagStr("tag") + \
  4188. Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
  4189. Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
  4190. else:
  4191. printablesLessRAbrack = "".join(c for c in printables if c not in ">")
  4192. tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
  4193. openTag = Suppress("<") + tagStr("tag") + \
  4194. Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
  4195. Optional( Suppress("=") + tagAttrValue ) ))) + \
  4196. Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
  4197. closeTag = Combine(_L("</") + tagStr + ">")
  4198. openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
  4199. closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
  4200. openTag.tag = resname
  4201. closeTag.tag = resname
  4202. return openTag, closeTag
  4203. def makeHTMLTags(tagStr):
  4204. """
  4205. Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
  4206. tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
  4207. Example::
  4208. text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
  4209. # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
  4210. a,a_end = makeHTMLTags("A")
  4211. link_expr = a + SkipTo(a_end)("link_text") + a_end
  4212. for link in link_expr.searchString(text):
  4213. # attributes in the <A> tag (like "href" shown here) are also accessible as named results
  4214. print(link.link_text, '->', link.href)
  4215. prints::
  4216. pyparsing -> http://pyparsing.wikispaces.com
  4217. """
  4218. return _makeTags( tagStr, False )
  4219. def makeXMLTags(tagStr):
  4220. """
  4221. Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
  4222. tags only in the given upper/lower case.
  4223. Example: similar to L{makeHTMLTags}
  4224. """
  4225. return _makeTags( tagStr, True )
  4226. def withAttribute(*args,**attrDict):
  4227. """
  4228. Helper to create a validating parse action to be used with start tags created
  4229. with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
  4230. with a required attribute value, to avoid false matches on common tags such as
  4231. C{<TD>} or C{<DIV>}.
  4232. Call C{withAttribute} with a series of attribute names and values. Specify the list
  4233. of filter attributes names and values as:
  4234. - keyword arguments, as in C{(align="right")}, or
  4235. - as an explicit dict with C{**} operator, when an attribute name is also a Python
  4236. reserved word, as in C{**{"class":"Customer", "align":"right"}}
  4237. - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
  4238. For attribute names with a namespace prefix, you must use the second form. Attribute
  4239. names are matched insensitive to upper/lower case.
  4240. If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
  4241. To verify that the attribute exists, but without specifying a value, pass
  4242. C{withAttribute.ANY_VALUE} as the value.
  4243. Example::
  4244. html = '''
  4245. <div>
  4246. Some text
  4247. <div type="grid">1 4 0 1 0</div>
  4248. <div type="graph">1,3 2,3 1,1</div>
  4249. <div>this has no type</div>
  4250. </div>
  4251. '''
  4252. div,div_end = makeHTMLTags("div")
  4253. # only match div tag having a type attribute with value "grid"
  4254. div_grid = div().setParseAction(withAttribute(type="grid"))
  4255. grid_expr = div_grid + SkipTo(div | div_end)("body")
  4256. for grid_header in grid_expr.searchString(html):
  4257. print(grid_header.body)
  4258. # construct a match with any div tag having a type attribute, regardless of the value
  4259. div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
  4260. div_expr = div_any_type + SkipTo(div | div_end)("body")
  4261. for div_header in div_expr.searchString(html):
  4262. print(div_header.body)
  4263. prints::
  4264. 1 4 0 1 0
  4265. 1 4 0 1 0
  4266. 1,3 2,3 1,1
  4267. """
  4268. if args:
  4269. attrs = args[:]
  4270. else:
  4271. attrs = attrDict.items()
  4272. attrs = [(k,v) for k,v in attrs]
  4273. def pa(s,l,tokens):
  4274. for attrName,attrValue in attrs:
  4275. if attrName not in tokens:
  4276. raise ParseException(s,l,"no matching attribute " + attrName)
  4277. if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
  4278. raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
  4279. (attrName, tokens[attrName], attrValue))
  4280. return pa
  4281. withAttribute.ANY_VALUE = object()
  4282. def withClass(classname, namespace=''):
  4283. """
  4284. Simplified version of C{L{withAttribute}} when matching on a div class - made
  4285. difficult because C{class} is a reserved word in Python.
  4286. Example::
  4287. html = '''
  4288. <div>
  4289. Some text
  4290. <div class="grid">1 4 0 1 0</div>
  4291. <div class="graph">1,3 2,3 1,1</div>
  4292. <div>this &lt;div&gt; has no class</div>
  4293. </div>
  4294. '''
  4295. div,div_end = makeHTMLTags("div")
  4296. div_grid = div().setParseAction(withClass("grid"))
  4297. grid_expr = div_grid + SkipTo(div | div_end)("body")
  4298. for grid_header in grid_expr.searchString(html):
  4299. print(grid_header.body)
  4300. div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
  4301. div_expr = div_any_type + SkipTo(div | div_end)("body")
  4302. for div_header in div_expr.searchString(html):
  4303. print(div_header.body)
  4304. prints::
  4305. 1 4 0 1 0
  4306. 1 4 0 1 0
  4307. 1,3 2,3 1,1
  4308. """
  4309. classattr = "%s:class" % namespace if namespace else "class"
  4310. return withAttribute(**{classattr : classname})
  4311. opAssoc = _Constants()
  4312. opAssoc.LEFT = object()
  4313. opAssoc.RIGHT = object()
  4314. def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
  4315. """
  4316. Helper method for constructing grammars of expressions made up of
  4317. operators working in a precedence hierarchy. Operators may be unary or
  4318. binary, left- or right-associative. Parse actions can also be attached
  4319. to operator expressions. The generated parser will also recognize the use
  4320. of parentheses to override operator precedences (see example below).
  4321. Note: if you define a deep operator list, you may see performance issues
  4322. when using infixNotation. See L{ParserElement.enablePackrat} for a
  4323. mechanism to potentially improve your parser performance.
  4324. Parameters:
  4325. - baseExpr - expression representing the most basic element for the nested
  4326. - opList - list of tuples, one for each operator precedence level in the
  4327. expression grammar; each tuple is of the form
  4328. (opExpr, numTerms, rightLeftAssoc, parseAction), where:
  4329. - opExpr is the pyparsing expression for the operator;
  4330. may also be a string, which will be converted to a Literal;
  4331. if numTerms is 3, opExpr is a tuple of two expressions, for the
  4332. two operators separating the 3 terms
  4333. - numTerms is the number of terms for this operator (must
  4334. be 1, 2, or 3)
  4335. - rightLeftAssoc is the indicator whether the operator is
  4336. right or left associative, using the pyparsing-defined
  4337. constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
  4338. - parseAction is the parse action to be associated with
  4339. expressions matching this operator expression (the
  4340. parse action tuple member may be omitted); if the parse action
  4341. is passed a tuple or list of functions, this is equivalent to
  4342. calling C{setParseAction(*fn)} (L{ParserElement.setParseAction})
  4343. - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
  4344. - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
  4345. Example::
  4346. # simple example of four-function arithmetic with ints and variable names
  4347. integer = pyparsing_common.signed_integer
  4348. varname = pyparsing_common.identifier
  4349. arith_expr = infixNotation(integer | varname,
  4350. [
  4351. ('-', 1, opAssoc.RIGHT),
  4352. (oneOf('* /'), 2, opAssoc.LEFT),
  4353. (oneOf('+ -'), 2, opAssoc.LEFT),
  4354. ])
  4355. arith_expr.runTests('''
  4356. 5+3*6
  4357. (5+3)*6
  4358. -2--11
  4359. ''', fullDump=False)
  4360. prints::
  4361. 5+3*6
  4362. [[5, '+', [3, '*', 6]]]
  4363. (5+3)*6
  4364. [[[5, '+', 3], '*', 6]]
  4365. -2--11
  4366. [[['-', 2], '-', ['-', 11]]]
  4367. """
  4368. ret = Forward()
  4369. lastExpr = baseExpr | ( lpar + ret + rpar )
  4370. for i,operDef in enumerate(opList):
  4371. opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
  4372. termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
  4373. if arity == 3:
  4374. if opExpr is None or len(opExpr) != 2:
  4375. raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
  4376. opExpr1, opExpr2 = opExpr
  4377. thisExpr = Forward().setName(termName)
  4378. if rightLeftAssoc == opAssoc.LEFT:
  4379. if arity == 1:
  4380. matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
  4381. elif arity == 2:
  4382. if opExpr is not None:
  4383. matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
  4384. else:
  4385. matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
  4386. elif arity == 3:
  4387. matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
  4388. Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
  4389. else:
  4390. raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
  4391. elif rightLeftAssoc == opAssoc.RIGHT:
  4392. if arity == 1:
  4393. # try to avoid LR with this extra test
  4394. if not isinstance(opExpr, Optional):
  4395. opExpr = Optional(opExpr)
  4396. matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
  4397. elif arity == 2:
  4398. if opExpr is not None:
  4399. matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
  4400. else:
  4401. matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
  4402. elif arity == 3:
  4403. matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
  4404. Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
  4405. else:
  4406. raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
  4407. else:
  4408. raise ValueError("operator must indicate right or left associativity")
  4409. if pa:
  4410. if isinstance(pa, (tuple, list)):
  4411. matchExpr.setParseAction(*pa)
  4412. else:
  4413. matchExpr.setParseAction(pa)
  4414. thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
  4415. lastExpr = thisExpr
  4416. ret <<= lastExpr
  4417. return ret
  4418. operatorPrecedence = infixNotation
  4419. """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
  4420. dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
  4421. sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
  4422. quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
  4423. Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
  4424. unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
  4425. def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
  4426. """
  4427. Helper method for defining nested lists enclosed in opening and closing
  4428. delimiters ("(" and ")" are the default).
  4429. Parameters:
  4430. - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
  4431. - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
  4432. - content - expression for items within the nested lists (default=C{None})
  4433. - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
  4434. If an expression is not provided for the content argument, the nested
  4435. expression will capture all whitespace-delimited content between delimiters
  4436. as a list of separate values.
  4437. Use the C{ignoreExpr} argument to define expressions that may contain
  4438. opening or closing characters that should not be treated as opening
  4439. or closing characters for nesting, such as quotedString or a comment
  4440. expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
  4441. The default is L{quotedString}, but if no expressions are to be ignored,
  4442. then pass C{None} for this argument.
  4443. Example::
  4444. data_type = oneOf("void int short long char float double")
  4445. decl_data_type = Combine(data_type + Optional(Word('*')))
  4446. ident = Word(alphas+'_', alphanums+'_')
  4447. number = pyparsing_common.number
  4448. arg = Group(decl_data_type + ident)
  4449. LPAR,RPAR = map(Suppress, "()")
  4450. code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
  4451. c_function = (decl_data_type("type")
  4452. + ident("name")
  4453. + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
  4454. + code_body("body"))
  4455. c_function.ignore(cStyleComment)
  4456. source_code = '''
  4457. int is_odd(int x) {
  4458. return (x%2);
  4459. }
  4460. int dec_to_hex(char hchar) {
  4461. if (hchar >= '0' && hchar <= '9') {
  4462. return (ord(hchar)-ord('0'));
  4463. } else {
  4464. return (10+ord(hchar)-ord('A'));
  4465. }
  4466. }
  4467. '''
  4468. for func in c_function.searchString(source_code):
  4469. print("%(name)s (%(type)s) args: %(args)s" % func)
  4470. prints::
  4471. is_odd (int) args: [['int', 'x']]
  4472. dec_to_hex (int) args: [['char', 'hchar']]
  4473. """
  4474. if opener == closer:
  4475. raise ValueError("opening and closing strings cannot be the same")
  4476. if content is None:
  4477. if isinstance(opener,basestring) and isinstance(closer,basestring):
  4478. if len(opener) == 1 and len(closer)==1:
  4479. if ignoreExpr is not None:
  4480. content = (Combine(OneOrMore(~ignoreExpr +
  4481. CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4482. ).setParseAction(lambda t:t[0].strip()))
  4483. else:
  4484. content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
  4485. ).setParseAction(lambda t:t[0].strip()))
  4486. else:
  4487. if ignoreExpr is not None:
  4488. content = (Combine(OneOrMore(~ignoreExpr +
  4489. ~Literal(opener) + ~Literal(closer) +
  4490. CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4491. ).setParseAction(lambda t:t[0].strip()))
  4492. else:
  4493. content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
  4494. CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4495. ).setParseAction(lambda t:t[0].strip()))
  4496. else:
  4497. raise ValueError("opening and closing arguments must be strings if no content expression is given")
  4498. ret = Forward()
  4499. if ignoreExpr is not None:
  4500. ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
  4501. else:
  4502. ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
  4503. ret.setName('nested %s%s expression' % (opener,closer))
  4504. return ret
  4505. def indentedBlock(blockStatementExpr, indentStack, indent=True):
  4506. """
  4507. Helper method for defining space-delimited indentation blocks, such as
  4508. those used to define block statements in Python source code.
  4509. Parameters:
  4510. - blockStatementExpr - expression defining syntax of statement that
  4511. is repeated within the indented block
  4512. - indentStack - list created by caller to manage indentation stack
  4513. (multiple statementWithIndentedBlock expressions within a single grammar
  4514. should share a common indentStack)
  4515. - indent - boolean indicating whether block must be indented beyond the
  4516. the current level; set to False for block of left-most statements
  4517. (default=C{True})
  4518. A valid block must contain at least one C{blockStatement}.
  4519. Example::
  4520. data = '''
  4521. def A(z):
  4522. A1
  4523. B = 100
  4524. G = A2
  4525. A2
  4526. A3
  4527. B
  4528. def BB(a,b,c):
  4529. BB1
  4530. def BBA():
  4531. bba1
  4532. bba2
  4533. bba3
  4534. C
  4535. D
  4536. def spam(x,y):
  4537. def eggs(z):
  4538. pass
  4539. '''
  4540. indentStack = [1]
  4541. stmt = Forward()
  4542. identifier = Word(alphas, alphanums)
  4543. funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
  4544. func_body = indentedBlock(stmt, indentStack)
  4545. funcDef = Group( funcDecl + func_body )
  4546. rvalue = Forward()
  4547. funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
  4548. rvalue << (funcCall | identifier | Word(nums))
  4549. assignment = Group(identifier + "=" + rvalue)
  4550. stmt << ( funcDef | assignment | identifier )
  4551. module_body = OneOrMore(stmt)
  4552. parseTree = module_body.parseString(data)
  4553. parseTree.pprint()
  4554. prints::
  4555. [['def',
  4556. 'A',
  4557. ['(', 'z', ')'],
  4558. ':',
  4559. [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
  4560. 'B',
  4561. ['def',
  4562. 'BB',
  4563. ['(', 'a', 'b', 'c', ')'],
  4564. ':',
  4565. [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
  4566. 'C',
  4567. 'D',
  4568. ['def',
  4569. 'spam',
  4570. ['(', 'x', 'y', ')'],
  4571. ':',
  4572. [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
  4573. """
  4574. def checkPeerIndent(s,l,t):
  4575. if l >= len(s): return
  4576. curCol = col(l,s)
  4577. if curCol != indentStack[-1]:
  4578. if curCol > indentStack[-1]:
  4579. raise ParseFatalException(s,l,"illegal nesting")
  4580. raise ParseException(s,l,"not a peer entry")
  4581. def checkSubIndent(s,l,t):
  4582. curCol = col(l,s)
  4583. if curCol > indentStack[-1]:
  4584. indentStack.append( curCol )
  4585. else:
  4586. raise ParseException(s,l,"not a subentry")
  4587. def checkUnindent(s,l,t):
  4588. if l >= len(s): return
  4589. curCol = col(l,s)
  4590. if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
  4591. raise ParseException(s,l,"not an unindent")
  4592. indentStack.pop()
  4593. NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
  4594. INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
  4595. PEER = Empty().setParseAction(checkPeerIndent).setName('')
  4596. UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
  4597. if indent:
  4598. smExpr = Group( Optional(NL) +
  4599. #~ FollowedBy(blockStatementExpr) +
  4600. INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
  4601. else:
  4602. smExpr = Group( Optional(NL) +
  4603. (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
  4604. blockStatementExpr.ignore(_bslash + LineEnd())
  4605. return smExpr.setName('indented block')
  4606. alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
  4607. punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
  4608. anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
  4609. _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
  4610. commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
  4611. def replaceHTMLEntity(t):
  4612. """Helper parser action to replace common HTML entities with their special characters"""
  4613. return _htmlEntityMap.get(t.entity)
  4614. # it's easy to get these comment structures wrong - they're very common, so may as well make them available
  4615. cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
  4616. "Comment of the form C{/* ... */}"
  4617. htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
  4618. "Comment of the form C{<!-- ... -->}"
  4619. restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
  4620. dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
  4621. "Comment of the form C{// ... (to end of line)}"
  4622. cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
  4623. "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
  4624. javaStyleComment = cppStyleComment
  4625. "Same as C{L{cppStyleComment}}"
  4626. pythonStyleComment = Regex(r"#.*").setName("Python style comment")
  4627. "Comment of the form C{# ... (to end of line)}"
  4628. _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
  4629. Optional( Word(" \t") +
  4630. ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
  4631. commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
  4632. """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
  4633. This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
  4634. # some other useful expressions - using lower-case class name since we are really using this as a namespace
  4635. class pyparsing_common:
  4636. """
  4637. Here are some common low-level expressions that may be useful in jump-starting parser development:
  4638. - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
  4639. - common L{programming identifiers<identifier>}
  4640. - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
  4641. - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
  4642. - L{UUID<uuid>}
  4643. - L{comma-separated list<comma_separated_list>}
  4644. Parse actions:
  4645. - C{L{convertToInteger}}
  4646. - C{L{convertToFloat}}
  4647. - C{L{convertToDate}}
  4648. - C{L{convertToDatetime}}
  4649. - C{L{stripHTMLTags}}
  4650. - C{L{upcaseTokens}}
  4651. - C{L{downcaseTokens}}
  4652. Example::
  4653. pyparsing_common.number.runTests('''
  4654. # any int or real number, returned as the appropriate type
  4655. 100
  4656. -100
  4657. +100
  4658. 3.14159
  4659. 6.02e23
  4660. 1e-12
  4661. ''')
  4662. pyparsing_common.fnumber.runTests('''
  4663. # any int or real number, returned as float
  4664. 100
  4665. -100
  4666. +100
  4667. 3.14159
  4668. 6.02e23
  4669. 1e-12
  4670. ''')
  4671. pyparsing_common.hex_integer.runTests('''
  4672. # hex numbers
  4673. 100
  4674. FF
  4675. ''')
  4676. pyparsing_common.fraction.runTests('''
  4677. # fractions
  4678. 1/2
  4679. -3/4
  4680. ''')
  4681. pyparsing_common.mixed_integer.runTests('''
  4682. # mixed fractions
  4683. 1
  4684. 1/2
  4685. -3/4
  4686. 1-3/4
  4687. ''')
  4688. import uuid
  4689. pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
  4690. pyparsing_common.uuid.runTests('''
  4691. # uuid
  4692. 12345678-1234-5678-1234-567812345678
  4693. ''')
  4694. prints::
  4695. # any int or real number, returned as the appropriate type
  4696. 100
  4697. [100]
  4698. -100
  4699. [-100]
  4700. +100
  4701. [100]
  4702. 3.14159
  4703. [3.14159]
  4704. 6.02e23
  4705. [6.02e+23]
  4706. 1e-12
  4707. [1e-12]
  4708. # any int or real number, returned as float
  4709. 100
  4710. [100.0]
  4711. -100
  4712. [-100.0]
  4713. +100
  4714. [100.0]
  4715. 3.14159
  4716. [3.14159]
  4717. 6.02e23
  4718. [6.02e+23]
  4719. 1e-12
  4720. [1e-12]
  4721. # hex numbers
  4722. 100
  4723. [256]
  4724. FF
  4725. [255]
  4726. # fractions
  4727. 1/2
  4728. [0.5]
  4729. -3/4
  4730. [-0.75]
  4731. # mixed fractions
  4732. 1
  4733. [1]
  4734. 1/2
  4735. [0.5]
  4736. -3/4
  4737. [-0.75]
  4738. 1-3/4
  4739. [1.75]
  4740. # uuid
  4741. 12345678-1234-5678-1234-567812345678
  4742. [UUID('12345678-1234-5678-1234-567812345678')]
  4743. """
  4744. convertToInteger = tokenMap(int)
  4745. """
  4746. Parse action for converting parsed integers to Python int
  4747. """
  4748. convertToFloat = tokenMap(float)
  4749. """
  4750. Parse action for converting parsed numbers to Python float
  4751. """
  4752. integer = Word(nums).setName("integer").setParseAction(convertToInteger)
  4753. """expression that parses an unsigned integer, returns an int"""
  4754. hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
  4755. """expression that parses a hexadecimal integer, returns an int"""
  4756. signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
  4757. """expression that parses an integer with optional leading sign, returns an int"""
  4758. fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
  4759. """fractional expression of an integer divided by an integer, returns a float"""
  4760. fraction.addParseAction(lambda t: t[0]/t[-1])
  4761. mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
  4762. """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
  4763. mixed_integer.addParseAction(sum)
  4764. real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
  4765. """expression that parses a floating point number and returns a float"""
  4766. sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
  4767. """expression that parses a floating point number with optional scientific notation and returns a float"""
  4768. # streamlining this expression makes the docs nicer-looking
  4769. number = (sci_real | real | signed_integer).streamline()
  4770. """any numeric expression, returns the corresponding Python type"""
  4771. fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
  4772. """any int or real number, returned as float"""
  4773. identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
  4774. """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
  4775. ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
  4776. "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
  4777. _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
  4778. _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
  4779. _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
  4780. _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
  4781. _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
  4782. ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
  4783. "IPv6 address (long, short, or mixed form)"
  4784. mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
  4785. "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
  4786. @staticmethod
  4787. def convertToDate(fmt="%Y-%m-%d"):
  4788. """
  4789. Helper to create a parse action for converting parsed date string to Python datetime.date
  4790. Params -
  4791. - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
  4792. Example::
  4793. date_expr = pyparsing_common.iso8601_date.copy()
  4794. date_expr.setParseAction(pyparsing_common.convertToDate())
  4795. print(date_expr.parseString("1999-12-31"))
  4796. prints::
  4797. [datetime.date(1999, 12, 31)]
  4798. """
  4799. def cvt_fn(s,l,t):
  4800. try:
  4801. return datetime.strptime(t[0], fmt).date()
  4802. except ValueError as ve:
  4803. raise ParseException(s, l, str(ve))
  4804. return cvt_fn
  4805. @staticmethod
  4806. def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
  4807. """
  4808. Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
  4809. Params -
  4810. - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
  4811. Example::
  4812. dt_expr = pyparsing_common.iso8601_datetime.copy()
  4813. dt_expr.setParseAction(pyparsing_common.convertToDatetime())
  4814. print(dt_expr.parseString("1999-12-31T23:59:59.999"))
  4815. prints::
  4816. [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
  4817. """
  4818. def cvt_fn(s,l,t):
  4819. try:
  4820. return datetime.strptime(t[0], fmt)
  4821. except ValueError as ve:
  4822. raise ParseException(s, l, str(ve))
  4823. return cvt_fn
  4824. iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
  4825. "ISO8601 date (C{yyyy-mm-dd})"
  4826. iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
  4827. "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
  4828. uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
  4829. "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
  4830. _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
  4831. @staticmethod
  4832. def stripHTMLTags(s, l, tokens):
  4833. """
  4834. Parse action to remove HTML tags from web page HTML source
  4835. Example::
  4836. # strip HTML links from normal text
  4837. text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
  4838. td,td_end = makeHTMLTags("TD")
  4839. table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
  4840. print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'
  4841. """
  4842. return pyparsing_common._html_stripper.transformString(tokens[0])
  4843. _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
  4844. + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
  4845. comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
  4846. """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
  4847. upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
  4848. """Parse action to convert tokens to upper case."""
  4849. downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
  4850. """Parse action to convert tokens to lower case."""
  4851. if __name__ == "__main__":
  4852. selectToken = CaselessLiteral("select")
  4853. fromToken = CaselessLiteral("from")
  4854. ident = Word(alphas, alphanums + "_$")
  4855. columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
  4856. columnNameList = Group(delimitedList(columnName)).setName("columns")
  4857. columnSpec = ('*' | columnNameList)
  4858. tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
  4859. tableNameList = Group(delimitedList(tableName)).setName("tables")
  4860. simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
  4861. # demo runTests method, including embedded comments in test string
  4862. simpleSQL.runTests("""
  4863. # '*' as column list and dotted table name
  4864. select * from SYS.XYZZY
  4865. # caseless match on "SELECT", and casts back to "select"
  4866. SELECT * from XYZZY, ABC
  4867. # list of column names, and mixed case SELECT keyword
  4868. Select AA,BB,CC from Sys.dual
  4869. # multiple tables
  4870. Select A, B, C from Sys.dual, Table2
  4871. # invalid SELECT keyword - should fail
  4872. Xelect A, B, C from Sys.dual
  4873. # incomplete command - should fail
  4874. Select
  4875. # invalid column name - should fail
  4876. Select ^^^ frox Sys.dual
  4877. """)
  4878. pyparsing_common.number.runTests("""
  4879. 100
  4880. -100
  4881. +100
  4882. 3.14159
  4883. 6.02e23
  4884. 1e-12
  4885. """)
  4886. # any int or real number, returned as float
  4887. pyparsing_common.fnumber.runTests("""
  4888. 100
  4889. -100
  4890. +100
  4891. 3.14159
  4892. 6.02e23
  4893. 1e-12
  4894. """)
  4895. pyparsing_common.hex_integer.runTests("""
  4896. 100
  4897. FF
  4898. """)
  4899. import uuid
  4900. pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
  4901. pyparsing_common.uuid.runTests("""
  4902. 12345678-1234-5678-1234-567812345678
  4903. """)