fparser Reference Guide  0.0.14
splitline.py
1 #!/usr/bin/env python
2 
3 # Modified work Copyright (c) 2017-2022 Science and Technology
4 # Facilities Council.
5 # Modified work Copyright (c) 2017 by J. Henrichs, Bureau of Meteorology
6 # Original work Copyright (c) 1999-2008 Pearu Peterson
7 
8 # All rights reserved.
9 
10 # Modifications made as part of the fparser project are distributed
11 # under the following license:
12 
13 # Redistribution and use in source and binary forms, with or without
14 # modification, are permitted provided that the following conditions are
15 # met:
16 
17 # 1. Redistributions of source code must retain the above copyright
18 # notice, this list of conditions and the following disclaimer.
19 
20 # 2. Redistributions in binary form must reproduce the above copyright
21 # notice, this list of conditions and the following disclaimer in the
22 # documentation and/or other materials provided with the distribution.
23 
24 # 3. Neither the name of the copyright holder nor the names of its
25 # contributors may be used to endorse or promote products derived from
26 # this software without specific prior written permission.
27 
28 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 
40 # --------------------------------------------------------------------
41 
42 # The original software (in the f2py project) was distributed under
43 # the following license:
44 
45 # Redistribution and use in source and binary forms, with or without
46 # modification, are permitted provided that the following conditions are met:
47 
48 # a. Redistributions of source code must retain the above copyright notice,
49 # this list of conditions and the following disclaimer.
50 # b. Redistributions in binary form must reproduce the above copyright
51 # notice, this list of conditions and the following disclaimer in the
52 # documentation and/or other materials provided with the distribution.
53 # c. Neither the name of the F2PY project nor the names of its
54 # contributors may be used to endorse or promote products derived from
55 # this software without specific prior written permission.
56 
57 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
58 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60 # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
61 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
63 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
64 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
67 # DAMAGE.
68 
69 """
70 Defines LineSplitter and helper functions.
71 
72 Original Author: Pearu Peterson <pearu@cens.ioc.ee>
73 First version created: May 2006
74 
75 -----
76 """
77 
78 
79 import re
80 
81 
82 class String(str):
83  """Dummy string class."""
84 
85 
86 class ParenString(str):
87  """Class representing a parenthesis string."""
88 
89 
90 __all__ = ["String", "string_replace_map", "splitquote", "splitparen"]
91 
92 _f2py_str_findall = re.compile(r"_F2PY_STRING_CONSTANT_\d+_").findall
93 _is_name = re.compile(r"\w*\Z", re.I).match
94 _is_simple_str = re.compile(r"\w*\Z", re.I).match
95 _f2py_findall = re.compile(
96  r"(_F2PY_STRING_CONSTANT_\d+_|F2PY_REAL_CONSTANT_\d+_|" r"F2PY_EXPR_TUPLE_\d+)"
97 ).findall
98 # A valid exponential constant must begin with a digit or a '.' (and be
99 # preceeded by a non-'word' character or the start of the string).
100 # We have to exclude '.' from the match for a non-word character as
101 # otherwise, in a string such as ".5d0", it would be matched by the
102 # non-capturing group. Since the first group is non-capturing (?:),
103 # the matched literal is in group 1.
104 # R417 for real-literal-constant does not permit whitespace.
105 exponential_constant = re.compile(
106  r"(?:[^\w.]|^)((\d+[.]\d*|\d*[.]\d+|\d+)[edED][+-]?\d+(_\w+)?)"
107 )
108 
109 
110 class StringReplaceDict(dict):
111  """
112  Dictionary object that is callable for applying map returned
113  by string_replace_map() function.
114  """
115 
116  def __call__(self, line):
117  for key in _f2py_findall(line):
118  if key in self:
119  # We only replace the occurrence of `key` corresponding to
120  # the current result of the findall. This prevents the
121  # 'replace' also affecting subsequent matches that may
122  # have key as a substring (e.g. 'F2PY_EXPR_TUPLE_10'
123  # contains 'F2PY_EXPR_TUPLE_1').
124  line = line.replace(key, self[key], 1)
125  return line
126 
127 
128 def memoize(function):
129  """Simple memoization decorator.
130 
131  :param function: The function to memoize.
132  :type function: Callable
133 
134  Note: Python 3.9 comes with a thread-safe and more efficient cache as it
135  can be bounded and we are interested in lines that have temporal locality.
136  It's the: @functools.lru_cache(maxsize=8)
137 
138  """
139  memo = {}
140 
141  def wrapper(*args, **kwargs):
142  key = args
143  if kwargs:
144  for item in kwargs.items():
145  key += item
146  result = memo.get(key, None)
147  if result is not None:
148  return result
149  result = function(*args, **kwargs)
150  memo[key] = result
151  return result
152 
153  return wrapper
154 
155 
156 @memoize
157 def string_replace_map(line, lower=False):
158  """
159  #. Replaces string constants with symbol `'_F2PY_STRING_CONSTANT_<index>_'`
160  #. Replaces (`expression`) with symbol `(F2PY_EXPR_TUPLE_<index>)`
161  #. Replaces real numerical constants containing an exponent with symbol
162  `F2PY_REAL_CONSTANT_<index>_`
163 
164  :param str line: the line of text in which to perform substitutions.
165  :param bool lower: whether or not the call to splitquote() should return \
166  items as lowercase (default is to leave the case unchanged).
167 
168  :returns: a new line and the replacement map.
169  :rtype: 2-tuple of str and \
170  :py:class:`fparser.common.splitline.StringReplaceDict`
171 
172  """
173 
174  str_idx = 0
175  const_idx = 0
176  parens_idx = 0
177 
178  items = []
179  string_map = StringReplaceDict()
180  rev_string_map = {}
181  for item in splitquote(line, lower=lower)[0]:
182  if isinstance(item, String) and not _is_simple_str(item[1:-1]):
183  key = rev_string_map.get(item)
184  if key is None:
185  str_idx += 1
186  key = "_F2PY_STRING_CONSTANT_{0}_".format(str_idx)
187  trimmed = item[1:-1]
188  string_map[key] = trimmed
189  rev_string_map[trimmed] = key
190  items.append(item[0] + key + item[-1])
191  else:
192  items.append(item)
193  newline = "".join(items)
194 
195  const_keys = []
196  for item in exponential_constant.finditer(newline):
197  # Get the first captured group as that corresponds to the literal
198  # *without* any preceding non-word character.
199  found = item.group(1)
200 
201  key = rev_string_map.get(found)
202  if key is None:
203  const_idx += 1
204  key = "F2PY_REAL_CONSTANT_{0}_".format(const_idx)
205  string_map[key] = found
206  rev_string_map[found] = key
207  const_keys.append(key)
208  newline = newline.replace(found, key)
209 
210  items = []
211  expr_keys = []
212  for item in splitparen(newline):
213  if isinstance(item, ParenString) and not _is_name(item[1:-1].strip()):
214  key = rev_string_map.get(item)
215  if key is None:
216  parens_idx += 1
217  key = "F2PY_EXPR_TUPLE_{0}".format(parens_idx)
218  trimmed = item[1:-1].strip()
219  string_map[key] = trimmed
220  rev_string_map[trimmed] = key
221  expr_keys.append(key)
222  items.append(item[0] + key + item[-1])
223  else:
224  items.append(item)
225 
226  # Ensure that any entries in the map do not themselves contain
227  # substitutions
228  found_keys = set()
229  for key in expr_keys + const_keys:
230  entry = string_map[key]
231  # Find any keys within this map entry
232  included_keys = _f2py_findall(entry)
233  if included_keys:
234  found_keys = found_keys.union(included_keys)
235  for inc_key in included_keys:
236  entry = entry.replace(inc_key, string_map[inc_key], 1)
237  string_map[key] = entry
238 
239  return "".join(items), string_map
240 
241 
242 def splitquote(line, stopchar=None, lower=False, quotechars="\"'"):
243  """
244  Fast LineSplitter
245  """
246  items = []
247  i = 0
248  while 1:
249  try:
250  char = line[i]
251  i += 1
252  except IndexError:
253  break
254  l = []
255  l_append = l.append
256  nofslashes = 0
257  if stopchar is None:
258  # search for string start
259  while 1:
260  if char in quotechars and not nofslashes % 2:
261  stopchar = char
262  i -= 1
263  break
264  if char == "\\":
265  nofslashes += 1
266  else:
267  nofslashes = 0
268  l_append(char)
269  try:
270  char = line[i]
271  i += 1
272  except IndexError:
273  break
274  if not l:
275  continue
276  item = "".join(l)
277  if lower:
278  item = item.lower()
279  items.append(item)
280  continue
281  if char == stopchar:
282  # string starts with quotechar
283  l_append(char)
284  try:
285  char = line[i]
286  i += 1
287  except IndexError:
288  if l:
289  item = String("".join(l))
290  items.append(item)
291  break
292  # else continued string
293  while 1:
294  if char == stopchar and not nofslashes % 2:
295  l_append(char)
296  stopchar = None
297  break
298  if char == "\\":
299  nofslashes += 1
300  else:
301  nofslashes = 0
302  l_append(char)
303  try:
304  char = line[i]
305  i += 1
306  except IndexError:
307  break
308  if l:
309  item = String("".join(l))
310  items.append(item)
311  return items, stopchar
312 
313 
314 def splitparen(line, paren_open="([", paren_close=")]"):
315  """
316  Splits a line into top-level parenthesis and not-parenthesised
317  parts. E.g.: "a( (1+2)*3) = b(x)" becomes:
318  ["a", "( (1+2)*3)", " = b", "(x)"]
319  :param str line: the string to split.
320  :param str paren_open: The characters that define an open parentheses.
321  :param str paren_close: The characters that define a closing parentheses.
322  :return: List of parenthesised and not-parenthesised parts
323  :rtype: list of str
324  The paren_open and paren_close strings must be matched in order:
325  paren_open[x] is closed by paren_close[x].
326  """
327 
328  assert len(paren_open) == len(paren_close)
329 
330  items = [] # Result list
331  num_backslashes = 0 # Counts consecutive "\" characters
332  # Empty if outside quotes, or set to the starting (and therefore
333  # also the ending) quote character while reading text inside quotes.
334  inside_quotes_char = ""
335  start = 0 # Index of start of current part.
336  stack = [] # Stack keeping track of required closing brackets
337 
338  for idx, char in enumerate(line):
339  if char == "\\":
340  num_backslashes = (num_backslashes + 1) % 2
341  continue
342 
343  # We had an odd number of \, so the next character is neither
344  # a real quote or parenthesis character, and can just be added.
345  if num_backslashes == 1:
346  num_backslashes = 0
347  continue
348 
349  # If we are reading a quote, keep on reading till closing
350  # quote is reached
351  if inside_quotes_char != "":
352  # Reset inside_quotes_char if we find the closing quote
353  if char == inside_quotes_char:
354  inside_quotes_char = ""
355  continue
356 
357  if char == "'" or char == '"':
358  inside_quotes_char = char
359  continue
360 
361  pos = paren_open.find(char)
362  if pos > -1:
363  if len(stack) == 0:
364  # New part starts:
365  items.append(line[start:idx])
366  start = idx
367  stack.append(paren_close[pos])
368  continue
369 
370  # Found closing bracket
371  if len(stack) > 0 and char == stack[-1]:
372  stack.pop()
373  if len(stack) == 0:
374  # Found last closing bracket
375  items.append(ParenString(line[start : idx + 1]))
376  start = idx + 1
377 
378  # Add any leftover characters as a separate item
379  if start != len(line):
380  items.append(line[start:])
381  return items