fparser Reference Guide  0.0.14
pattern_tools.py
1 # Modified work Copyright (c) 2017-2022 Science and Technology
2 # Facilities Council.
3 # Original work Copyright (c) 1999-2008 Pearu Peterson
4 
5 # All rights reserved.
6 
7 # Modifications made as part of the fparser project are distributed
8 # under the following license:
9 
10 # Redistribution and use in source and binary forms, with or without
11 # modification, are permitted provided that the following conditions are
12 # met:
13 
14 # 1. Redistributions of source code must retain the above copyright
15 # notice, this list of conditions and the following disclaimer.
16 
17 # 2. Redistributions in binary form must reproduce the above copyright
18 # notice, this list of conditions and the following disclaimer in the
19 # documentation and/or other materials provided with the distribution.
20 
21 # 3. Neither the name of the copyright holder nor the names of its
22 # contributors may be used to endorse or promote products derived from
23 # this software without specific prior written permission.
24 
25 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 
37 # --------------------------------------------------------------------
38 
39 # The original software (in the f2py project) was distributed under
40 # the following license:
41 
42 # Redistribution and use in source and binary forms, with or without
43 # modification, are permitted provided that the following conditions are met:
44 
45 # a. Redistributions of source code must retain the above copyright notice,
46 # this list of conditions and the following disclaimer.
47 # b. Redistributions in binary form must reproduce the above copyright
48 # notice, this list of conditions and the following disclaimer in the
49 # documentation and/or other materials provided with the distribution.
50 # c. Neither the name of the F2PY project nor the names of its
51 # contributors may be used to endorse or promote products derived from
52 # this software without specific prior written permission.
53 
54 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
55 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
58 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
61 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
64 # DAMAGE.
65 
66 """
67 Tools for constructing patterns.
68 
69 Permission to use, modify, and distribute this software is given under the
70 terms of the NumPy License. See http://scipy.org.
71 
72 NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK.
73 Author: Pearu Peterson <pearu@cens.ioc.ee>
74 Created: Oct 2006
75 
76 """
77 import re
78 
79 dollar_ok = True
80 
81 
82 class Pattern:
83  """
84  p1 | p2 -> <p1> | <p2>
85  p1 + p2 -> <p1> <p2>
86  p1 & p2 -> <p1><p2>
87  ~p1 -> [ <p1> ]
88  ~~p1 -> [ <p1> ]...
89  ~~~p1 -> <p1> [ <p1> ]...
90  ~~~~p1 -> ~~~p1
91  abs(p1) -> whole string match of <p1>
92  p1.named(name) -> match of <p1> has name
93  p1.match(string) -> return string match with <p1>
94  p1.flags(<re.I,..>)
95  p1.rsplit(..) -> split a string from the rightmost p1 occurrence
96  p1.lsplit(..) -> split a string from the leftmost p1 occurrence
97  """
98 
99  _special_symbol_map = {
100  ".": "[.]",
101  "*": "[*]",
102  "+": "[+]",
103  "|": "[|]",
104  "(": r"\(",
105  ")": r"\)",
106  "[": r"\[",
107  "]": r"\]",
108  "^": "[^]",
109  "$": "[$]",
110  "?": "[?]",
111  "{": r"\{",
112  "}": r"\}",
113  ">": "[>]",
114  "<": "[<]",
115  "=": "[=]",
116  }
117 
118  def __init__(self, label, pattern, optional=0, flags=0, value=None):
119  self.label = label
120  self.pattern = pattern
121  self.optional = optional
122  self._flags = flags
123  self.value = value
124 
125  def flags(self, *flags):
126  f = self._flags
127  for f1 in flags:
128  f = f | f1
129  return Pattern(
130  self.label, self.pattern, optional=self.optional, flags=f, value=self.value
131  )
132 
133  def get_compiled(self):
134  try:
135  return self._compiled_pattern
136  except AttributeError:
137  self._compiled_pattern = compiled = re.compile(self.pattern, self._flags)
138  return compiled
139 
140  def match(self, string):
141  return self.get_compiled().match(string)
142 
143  def search(self, string):
144  return self.get_compiled().search(string)
145 
146  def rsplit(self, string, is_add=False):
147  """
148  Return (<lhs>, <pattern_match>, <rhs>) where
149  string = lhs + pattern_match + rhs
150  and rhs does not contain pattern_match.
151  If no pattern_match is found in string, return None.
152  """
153  compiled = self.get_compiled()
154  t = compiled.split(string)
155  if is_add:
156  n = "".join(t[-3:]).replace(" ", "")
157  if abs_real_literal_constant.match(n):
158  t = t[:-3] + [n]
159  if len(t) < 3:
160  return
161  if "" in t[1:-1]:
162  return
163  rhs = t[-1].strip()
164  pattern_match = t[-2].strip()
165  assert abs(self).match(pattern_match), repr((self, string, t, pattern_match))
166  lhs = ("".join(t[:-2])).strip()
167  return lhs, pattern_match, rhs
168 
169  def lsplit(self, string):
170  """
171  Return (<lhs>, <pattern_match>, <rhs>) where
172  string = lhs + pattern_match + rhs
173  and rhs does not contain pattern_match.
174  If no pattern_match is found in string, return None.
175  """
176  compiled = self.get_compiled()
177  t = compiled.split(string) # can be optimized
178  if len(t) < 3:
179  return
180  lhs = t[0].strip()
181  pattern_match = t[1].strip()
182  rhs = ("".join(t[2:])).strip()
183  assert abs(self).match(pattern_match), repr(pattern_match)
184  return lhs, pattern_match, rhs
185 
186  def __abs__(self):
187  return Pattern(
188  self.label,
189  r"\A" + self.pattern + r"\Z",
190  flags=self._flags,
191  value=self.value,
192  )
193 
194  def __repr__(self):
195  return "%s(%r, %r)" % (self.__class__.__name__, self.label, self.pattern)
196 
197  def __or__(self, other):
198  label = "( %s OR %s )" % (self.label, other.label)
199  if self.pattern == other.pattern:
200  pattern = self.pattern
201  flags = self._flags
202  else:
203  pattern = "(%s|%s)" % (self.pattern, other.pattern)
204  flags = self._flags | other._flags
205  return Pattern(label, pattern, flags=flags)
206 
207  def __and__(self, other):
208  if isinstance(other, Pattern):
209  label = "%s%s" % (self.label, other.label)
210  pattern = self.pattern + other.pattern
211  flags = self._flags | other._flags
212  else:
213  assert isinstance(other, str), repr(other)
214  label = "%s%s" % (self.label, other)
215  pattern = self.pattern + other
216  flags = self._flags
217  return Pattern(label, pattern, flags=flags)
218 
219  def __rand__(self, other):
220  assert isinstance(other, str), repr(other)
221  label = "%s%s" % (other, self.label)
222  pattern = other + self.pattern
223  return Pattern(label, pattern, flags=self._flags)
224 
225  def __invert__(self):
226  if self.optional:
227  if self.optional == 1:
228  return Pattern(
229  self.label + "...",
230  self.pattern[:-1] + "*",
231  optional=2,
232  flags=self._flags,
233  )
234  if self.optional == 2:
235  return Pattern(
236  "%s %s" % (self.label[1:-4].strip(), self.label),
237  self.pattern[:-1] + "+",
238  optional=3,
239  flags=self._flags,
240  )
241  return self
242  label = "[ %s ]" % (self.label)
243  pattern = "(%s)?" % (self.pattern)
244  return Pattern(label, pattern, optional=1, flags=self._flags)
245 
246  def __add__(self, other):
247  if isinstance(other, Pattern):
248  label = "%s %s" % (self.label, other.label)
249  pattern = self.pattern + r"\s*" + other.pattern
250  flags = self._flags | other._flags
251  else:
252  assert isinstance(other, str), repr(other)
253  label = "%s %s" % (self.label, other)
254  other = self._special_symbol_map.get(other, other)
255  pattern = self.pattern + r"\s*" + other
256  flags = self._flags
257  return Pattern(label, pattern, flags=flags)
258 
259  def __radd__(self, other):
260  assert isinstance(other, str), repr(other)
261  label = "%s %s" % (other, self.label)
262  other = self._special_symbol_map.get(other, other)
263  pattern = other + r"\s*" + self.pattern
264  return Pattern(label, pattern, flags=self._flags)
265 
266  def named(self, name=None):
267  if name is None:
268  label = self.label
269  assert label[0] + label[-1] == "<>" and " " not in label, repr(label)
270  else:
271  label = "<%s>" % (name)
272  pattern = "(?P%s%s)" % (label.replace("-", "_"), self.pattern)
273  return Pattern(label, pattern, flags=self._flags, value=self.value)
274 
275  def rename(self, label):
276  if label[0] + label[-1] != "<>":
277  label = "<%s>" % (label)
278  return Pattern(
279  label,
280  self.pattern,
281  optional=self.optional,
282  flags=self._flags,
283  value=self.value,
284  )
285 
286  def __call__(self, string):
287  m = self.match(string)
288  if m is None:
289  return
290  if self.value is not None:
291  return self.value
292  return m.group()
293 
294 
295 # Predefined patterns
296 
297 
298 letter = Pattern("<letter>", "[A-Z]", flags=re.I)
299 if dollar_ok:
300  name = Pattern("<name>", r"[A-Z][\w$]*", flags=re.I)
301 else:
302  name = Pattern("<name>", r"[A-Z]\w*", flags=re.I)
303 # file_name pattern is start of match '^' to end of match '$', either
304 # match a single character that is not space '\S', or '|' a single
305 # character that is not space at the start '\S' and end '\S' of the
306 # match with anything '.*' inbetween.
307 file_name = Pattern("<file_name>", r"^(\S|\S.*\S)$", flags=re.I)
308 macro_name = Pattern("<macro_name>", r"[A-Z_]\w*", flags=re.I)
309 abs_macro_name = abs(macro_name)
310 digit = Pattern("<digit>", r"\d")
311 underscore = Pattern("<underscore>", "_")
312 binary_digit = Pattern("<binary-digit>", r"[01]")
313 octal_digit = Pattern("<octal-digit>", r"[0-7]")
314 hex_digit = Pattern("<hex-digit>", r"[\dA-F]", flags=re.I)
315 
316 digit_string = Pattern("<digit-string>", r"\d+")
317 abs_digit_string = abs(digit_string)
318 abs_digit_string_named = abs(digit_string.named("value"))
319 binary_digit_string = Pattern("<binary-digit-string>", r"[01]+")
320 octal_digit_string = Pattern("<octal-digit-string>", r"[0-7]+")
321 hex_digit_string = Pattern("<hex-digit-string>", r"[\dA-F]+", flags=re.I)
322 
323 sign = Pattern("<sign>", r"[+-]")
324 exponent_letter = Pattern("<exponent-letter>", r"[ED]", flags=re.I)
325 
326 alphanumeric_character = Pattern("<alphanumeric-character>", r"\w") # [A-Z0-9_]
327 special_character = Pattern(
328  "<special-character>", r'[ =+-*/\()[\]{},.:;!"%&~<>?,\'`^|$#@]'
329 )
330 character = alphanumeric_character | special_character
331 
332 kind_param = digit_string | name
333 kind_param_named = kind_param.named("kind-param")
334 signed_digit_string = ~sign + digit_string
335 int_literal_constant = digit_string + ~("_" + kind_param)
336 signed_int_literal_constant = ~sign + int_literal_constant
337 int_literal_constant_named = digit_string.named("value") + ~("_" + kind_param_named)
338 signed_int_literal_constant_named = (~sign + digit_string).named("value") + ~(
339  "_" + kind_param_named
340 )
341 
342 binary_constant = (
343  "B" + ("'" & binary_digit_string & "'" | '"' & binary_digit_string & '"')
344 ).flags(re.I)
345 octal_constant = (
346  "O" + ("'" & octal_digit_string & "'" | '"' & octal_digit_string & '"')
347 ).flags(re.I)
348 hex_constant = (
349  "Z" + ("'" & hex_digit_string & "'" | '"' & hex_digit_string & '"')
350 ).flags(re.I)
351 boz_literal_constant = binary_constant | octal_constant | hex_constant
352 
353 exponent = signed_digit_string
354 significand = digit_string + "." + ~digit_string | "." + digit_string
355 real_literal_constant = significand + ~(exponent_letter + exponent) + ~(
356  "_" + kind_param
357 ) | digit_string + exponent_letter + exponent + ~("_" + kind_param)
358 real_literal_constant_named = (
359  significand + ~(exponent_letter + exponent)
360  | digit_string + exponent_letter + exponent
361 ).named("value") + ~("_" + kind_param_named)
362 signed_real_literal_constant_named = (
363  ~sign
364  + (
365  significand + ~(exponent_letter + exponent)
366  | digit_string + exponent_letter + exponent
367  )
368 ).named("value") + ~("_" + kind_param_named)
369 signed_real_literal_constant = ~sign + real_literal_constant
370 
371 named_constant = name
372 real_part = signed_int_literal_constant | signed_real_literal_constant | named_constant
373 imag_part = real_part
374 complex_literal_constant = "(" + real_part + "," + imag_part + ")"
375 
376 a_n_rep_char = Pattern("<alpha-numeric-rep-char>", r"\w")
377 rep_char = Pattern("<rep-char>", r".")
378 char_literal_constant = ~(kind_param + "_") + (
379  "'" + ~~rep_char + "'" | '"' + ~~rep_char + '"'
380 )
381 a_n_char_literal_constant_named1 = ~(kind_param_named + "_") + (
382  ~~~("'" + ~~a_n_rep_char + "'")
383 ).named("value")
384 a_n_char_literal_constant_named2 = ~(kind_param_named + "_") + (
385  ~~~('"' + ~~a_n_rep_char + '"')
386 ).named("value")
387 
388 logical_literal_constant = (r"[.]\s*(TRUE|FALSE)\s*[.]" + ~("_" + kind_param)).flags(
389  re.I
390 )
391 logical_literal_constant_named = Pattern(
392  "<value>", r"[.]\s*(TRUE|FALSE)\s*[.]", flags=re.I
393 ).named() + ~("_" + kind_param_named)
394 literal_constant = (
395  int_literal_constant
396  | real_literal_constant
397  | complex_literal_constant
398  | logical_literal_constant
399  | char_literal_constant
400  | boz_literal_constant
401 )
402 constant = literal_constant | named_constant
403 int_constant = int_literal_constant | boz_literal_constant | named_constant
404 char_constant = char_literal_constant | named_constant
405 
406 # assume that replace_string_map is applied:
407 part_ref = name + ~((r"[(]" + name + r"[)]"))
408 data_ref = part_ref + ~~~(r"[%]" + part_ref)
409 primary = constant | name | data_ref | (r"[(]" + name + r"[)]")
410 
411 power_op = Pattern("<power-op>", r"(?<![*])[*]{2}(?![*])")
412 mult_op = Pattern("<mult-op>", r"(?<![*])[*](?![*])|(?<![/])[/](?![/])")
413 add_op = Pattern("<add-op>", r"[+-]")
414 concat_op = Pattern("<concat-op>", r"(?<![/])[/]\s*[/](?![/])")
415 rel_op = Pattern(
416  "<rel-op>",
417  r"[.]\s*EQ\s*[.]|[.]\s*NE\s*[.]|[.]\s*LT\s*[.]|[.]\s*LE\s*[.]|"
418  r"[.]\s*GT\s*[.]|[.]\s*GE\s*[.]|[=]{2}|/[=]|[<][=]|[<]|[>][=]|[>]",
419  flags=re.I,
420 )
421 not_op = Pattern("<not-op>", r"[.]\s*NOT\s*[.]", flags=re.I)
422 and_op = Pattern("<and-op>", r"[.]\s*AND\s*[.]", flags=re.I)
423 or_op = Pattern("<or-op>", r"[.]\s*OR\s*[.]", flags=re.I)
424 equiv_op = Pattern("<equiv-op>", r"[.]\s*EQV\s*[.]|[.]\s*NEQV\s*[.]", flags=re.I)
425 percent_op = Pattern("<percent-op>", r"%", flags=re.I)
426 intrinsic_operator = (
427  power_op
428  | mult_op
429  | add_op
430  | concat_op
431  | rel_op
432  | not_op
433  | and_op
434  | or_op
435  | equiv_op
436 )
437 extended_intrinsic_operator = intrinsic_operator
438 
439 defined_unary_op = Pattern("<defined-unary-op>", r"[.]\s*[A-Z]+\s*[.]", flags=re.I)
440 defined_binary_op = Pattern("<defined-binary-op>", r"[.]\s*[A-Z]+\s*[.]", flags=re.I)
441 defined_operator = defined_unary_op | defined_binary_op | extended_intrinsic_operator
442 abs_defined_operator = abs(defined_operator)
443 defined_op = Pattern("<defined-op>", "[.][A-Z]+[.]", flags=re.I)
444 abs_defined_op = abs(defined_op)
445 
446 non_defined_binary_op = intrinsic_operator | logical_literal_constant
447 
448 label = Pattern("<label>", r"\d{1,5}")
449 abs_label = abs(label)
450 
451 keyword = name
452 keyword_equal = keyword + "="
453 
454 abs_constant = abs(constant)
455 abs_literal_constant = abs(literal_constant)
456 abs_int_literal_constant = abs(int_literal_constant)
457 abs_signed_int_literal_constant = abs(signed_int_literal_constant)
458 abs_signed_int_literal_constant_named = abs(signed_int_literal_constant_named)
459 abs_int_literal_constant_named = abs(int_literal_constant_named)
460 abs_real_literal_constant = abs(real_literal_constant)
461 abs_signed_real_literal_constant = abs(signed_real_literal_constant)
462 abs_signed_real_literal_constant_named = abs(signed_real_literal_constant_named)
463 abs_real_literal_constant_named = abs(real_literal_constant_named)
464 abs_complex_literal_constant = abs(complex_literal_constant)
465 abs_logical_literal_constant = abs(logical_literal_constant)
466 abs_char_literal_constant = abs(char_literal_constant)
467 abs_boz_literal_constant = abs(boz_literal_constant)
468 abs_name = abs(name)
469 abs_a_n_char_literal_constant_named1 = abs(a_n_char_literal_constant_named1)
470 abs_a_n_char_literal_constant_named2 = abs(a_n_char_literal_constant_named2)
471 abs_logical_literal_constant_named = abs(logical_literal_constant_named)
472 abs_binary_constant = abs(binary_constant)
473 abs_octal_constant = abs(octal_constant)
474 abs_hex_constant = abs(hex_constant)
475 
476 intrinsic_type_name = Pattern(
477  "<intrinsic-type-name>",
478  r"(INTEGER|REAL|COMPLEX|LOGICAL|CHARACTER|DOUBLE\s*COMPLEX|"
479  r"DOUBLE\s*PRECISION|BYTE)",
480  flags=re.I,
481 )
482 abs_intrinsic_type_name = abs(intrinsic_type_name)
483 double_complex_name = Pattern(
484  "<double-complex-name>", r"DOUBLE\s*COMPLEX", flags=re.I, value="DOUBLE COMPLEX"
485 )
486 double_precision_name = Pattern(
487  "<double-precision-name>",
488  r"DOUBLE\s*PRECISION",
489  flags=re.I,
490  value="DOUBLE PRECISION",
491 )
492 abs_double_complex_name = abs(double_complex_name)
493 abs_double_precision_name = abs(double_precision_name)
494 
495 access_spec = Pattern("<access-spec>", r"PUBLIC|PRIVATE", flags=re.I)
496 abs_access_spec = abs(access_spec)
497 
498 implicit_none = Pattern(
499  "<implicit-none>", r"IMPLICIT\s*NONE", flags=re.I, value="IMPLICIT NONE"
500 )
501 abs_implicit_none = abs(implicit_none)
502 
503 attr_spec = Pattern(
504  "<attr-spec>",
505  r"(ALLOCATABLE|ASYNCHRONOUS|EXTERNAL|INTENT|INTRINSIC|"
506  "OPTIONAL|PARAMETER|POINTER|PROTECTED|SAVE|TARGET|VALUE|VOLATILE)",
507  flags=re.I,
508 )
509 abs_attr_spec = abs(attr_spec)
510 
511 attr_spec_f08 = Pattern(
512  "<attr-spec>",
513  r"({})".format(
514  "|".join( # extend attr_spec with attribute CONTIGUOUS
515  sorted(attr_spec.pattern.strip("()").split("|") + ["CONTIGUOUS"])
516  )
517  ),
518  flags=re.I,
519 )
520 abs_attr_spec_f08 = abs(attr_spec_f08)
521 
522 dimension = Pattern("<dimension>", r"DIMENSION", flags=re.I)
523 abs_dimension = abs(dimension)
524 
525 intent = Pattern("<intent>", r"INTENT", flags=re.I)
526 abs_intent = abs(intent)
527 
528 intent_spec = Pattern("<intent-spec>", r"INOUT|IN|OUT", flags=re.I)
529 abs_intent_spec = abs(intent_spec)
530 
531 function = Pattern("<function>", r"FUNCTION", flags=re.I)
532 subroutine = Pattern("<subroutine>", r"SUBROUTINE", flags=re.I)
533 
534 select_case = Pattern(
535  "<select-case>", r"SELECT\s*CASE", flags=re.I, value="SELECT CASE"
536 )
537 abs_select_case = abs(select_case)
def rsplit(self, string, is_add=False)