Source code for fparser.common.splitline

#!/usr/bin/env python

# Modified work Copyright (c) 2017-2022 Science and Technology
# Facilities Council.
# Modified work Copyright (c) 2017 by J. Henrichs, Bureau of Meteorology
# Original work Copyright (c) 1999-2008 Pearu Peterson

# All rights reserved.

# Modifications made as part of the fparser project are distributed
# under the following license:

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:

# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.

# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.

# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# --------------------------------------------------------------------

# The original software (in the f2py project) was distributed under
# the following license:

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:

#   a. Redistributions of source code must retain the above copyright notice,
#      this list of conditions and the following disclaimer.
#   b. Redistributions in binary form must reproduce the above copyright
#      notice, this list of conditions and the following disclaimer in the
#      documentation and/or other materials provided with the distribution.
#   c. Neither the name of the F2PY project nor the names of its
#      contributors may be used to endorse or promote products derived from
#      this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.

"""
Defines LineSplitter and helper functions.

Original Author: Pearu Peterson <pearu@cens.ioc.ee>
First version created: May 2006

"""

import re
from typing import List, Tuple, Optional, Union


[docs] class String(str): """Class used to represent a *quoted* string."""
class ParenString(str): """Class representing a parenthesis string.""" __all__ = ["String", "string_replace_map", "splitquote", "splitparen"] _f2py_str_findall = re.compile(r"_F2PY_STRING_CONSTANT_\d+_").findall _is_name = re.compile(r"\w*\Z", re.I).match _is_simple_str = re.compile(r"\w*\Z", re.I).match _f2py_findall = re.compile( r"(_F2PY_STRING_CONSTANT_\d+_|F2PY_REAL_CONSTANT_\d+_|" r"F2PY_EXPR_TUPLE_\d+)" ).findall # A valid exponential constant must begin with a digit or a '.' (and be # preceeded by a non-'word' character or the start of the string). # We have to exclude '.' from the match for a non-word character as # otherwise, in a string such as ".5d0", it would be matched by the # non-capturing group. Since the first group is non-capturing (?:), # the matched literal is in group 1. # R417 for real-literal-constant does not permit whitespace. exponential_constant = re.compile( r"(?:[^\w.]|^)((\d+[.]\d*|\d*[.]\d+|\d+)[edED][+-]?\d+(_\w+)?)" ) class StringReplaceDict(dict): """ Dictionary object that is callable for applying map returned by string_replace_map() function. """ def __call__(self, line): for key in _f2py_findall(line): if key in self: # We only replace the occurrence of `key` corresponding to # the current result of the findall. This prevents the # 'replace' also affecting subsequent matches that may # have key as a substring (e.g. 'F2PY_EXPR_TUPLE_10' # contains 'F2PY_EXPR_TUPLE_1'). line = line.replace(key, self[key], 1) return line def memoize(function): """Simple memoization decorator. :param function: The function to memoize. :type function: Callable TODO # 472 Python 3.9 comes with a thread-safe and more efficient cache as it can be bounded and we are interested in lines that have temporal locality. It's the: @functools.lru_cache(maxsize=8) """ memo = {} def wrapper(*args, **kwargs): key = args if kwargs: for item in kwargs.items(): key += item result = memo.get(key, None) if result is not None: return result result = function(*args, **kwargs) memo[key] = result return result return wrapper @memoize
[docs] def string_replace_map(line, lower=False): """ #. Replaces string constants with symbol `'_F2PY_STRING_CONSTANT_<index>_'` #. Replaces (`expression`) with symbol `(F2PY_EXPR_TUPLE_<index>)` #. Replaces real numerical constants containing an exponent with symbol `F2PY_REAL_CONSTANT_<index>_` :param str line: the line of text in which to perform substitutions. :param bool lower: whether or not the call to splitquote() should return items as lowercase (default is to leave the case unchanged). :returns: a new line and the replacement map. :rtype: Tuple[str, :py:class:`fparser.common.splitline.StringReplaceDict`] """ str_idx = 0 const_idx = 0 parens_idx = 0 items = [] string_map = StringReplaceDict() rev_string_map = {} for item in splitquote(line, lower=lower)[0]: if isinstance(item, String) and not _is_simple_str(item[1:-1]): key = rev_string_map.get(item) if key is None: str_idx += 1 key = "_F2PY_STRING_CONSTANT_{0}_".format(str_idx) trimmed = item[1:-1] string_map[key] = trimmed rev_string_map[trimmed] = key items.append(item[0] + key + item[-1]) else: items.append(item) newline = "".join(items) const_keys = [] for item in exponential_constant.finditer(newline): # Get the first captured group as that corresponds to the literal # *without* any preceding non-word character. found = item.group(1) key = rev_string_map.get(found) if key is None: const_idx += 1 key = "F2PY_REAL_CONSTANT_{0}_".format(const_idx) string_map[key] = found rev_string_map[found] = key const_keys.append(key) newline = newline.replace(found, key) items = [] expr_keys = [] for item in splitparen(newline): if isinstance(item, ParenString) and not _is_name(item[1:-1].strip()): key = rev_string_map.get(item) if key is None: parens_idx += 1 key = "F2PY_EXPR_TUPLE_{0}".format(parens_idx) trimmed = item[1:-1].strip() string_map[key] = trimmed rev_string_map[trimmed] = key expr_keys.append(key) items.append(item[0] + key + item[-1]) else: items.append(item) # Ensure that any entries in the map do not themselves contain # substitutions found_keys = set() for key in expr_keys + const_keys: entry = string_map[key] # Find any keys within this map entry included_keys = _f2py_findall(entry) if included_keys: found_keys = found_keys.union(included_keys) for inc_key in included_keys: entry = entry.replace(inc_key, string_map[inc_key], 1) string_map[key] = entry return "".join(items), string_map
def _next_quote(line: str, quote_char: Optional[str] = None, start: int = 0) -> int: """ Find the location of the first quotation char from the specified start position (defaults to the beginning of the string). In Fortran, quotation marks within quoted strings are escaped through repetition, i.e. '""' means '"' and "''" means "'". If the `quote_char` argument is supplied then this is taken to mean that we are searching within a quoted string and therefore any repeated quotation marks are interpreted as escaped quotation marks. :param line: the line of text to search. :param quote_char: the specific quotation character to search for. If it is not specified then both ' and " are searched for. :param start: the position in the line from which to search. :returns: the index of the quotation char in the supplied string or -1 if none is found. """ line_len = len(line) i = start if quote_char: target_quote_chars = [quote_char] else: target_quote_chars = ["'", '"'] while i < line_len: if line[i] in target_quote_chars: if quote_char and i < line_len - 1 and line[i + 1] == line[i]: # We're inside a quoted string so this is an escaped quotation # character ('' or ""). i += 2 continue return i i += 1 return -1
[docs] def splitquote( line: str, stopchar: Optional[str] = None, lower: bool = False ) -> Tuple[List[Union[String, str]], Optional[str]]: """ Splits the supplied line of text into parts consisting of regions that are not contained within quotes and those that are. Allows for the processing of a line that follows on from a previous one where a quoted string was begun but not closed by supporting the current closing quotation character to be specified. :param str line: the line to split. :param stopchar: the quote character that will terminate an existing quoted string or None otherwise. :param lower: whether or not to convert the non-quoted parts of the line to lowercase. :returns: tuple containing a list of the parts of the line split into those parts that are not quoted strings and those parts that are (as instances of String) as well as the quote character corresponding with any quoted string that has not been closed before the end of the line. """ def _lower(text: str): """ :returns: the supplied text lower-cased if the 'lower' argument to the parent routine is True. """ if lower: return text.lower() return text segments = [] i = 0 pos = 0 n = len(line) if stopchar: # We start inside an existing quoted region. end = _next_quote(line, quote_char=stopchar) if end != -1: # Has to be 'end+1' to include quotation char. segments.append(String(line[pos : end + 1])) pos = end + 1 else: # Didn't find a closing quotation char. return [String(line)], stopchar while pos < n: start = _next_quote(line, start=pos) if start == -1: # No opening quotation char found segments.append(_lower(line[pos:])) return segments, None if start != pos: segments.append(_lower(line[pos:start])) end = _next_quote(line, quote_char=line[start], start=start + 1) if end == -1: # Didn't find a closing quotation char. segments.append(String(line[start:])) return segments, line[start] segments.append(String(line[start : end + 1])) pos = end + 1 return segments, None
[docs] def splitparen(line, paren_open="([", paren_close=")]"): """ Splits a line into top-level parenthesis and not-parenthesised parts. E.g.: "a( (1+2)*3) = b(x)" becomes: ["a", "( (1+2)*3)", " = b", "(x)"] :param str line: the string to split. :param str paren_open: The characters that define an open parentheses. :param str paren_close: The characters that define a closing parentheses. :return: List of parenthesised and not-parenthesised parts :rtype: list of str The paren_open and paren_close strings must be matched in order: paren_open[x] is closed by paren_close[x]. """ assert len(paren_open) == len(paren_close) items = [] # Result list num_backslashes = 0 # Counts consecutive "\" characters # Empty if outside quotes, or set to the starting (and therefore # also the ending) quote character while reading text inside quotes. inside_quotes_char = "" start = 0 # Index of start of current part. stack = [] # Stack keeping track of required closing brackets for idx, char in enumerate(line): if char == "\\": num_backslashes = (num_backslashes + 1) % 2 continue # We had an odd number of \, so the next character is neither # a real quote or parenthesis character, and can just be added. if num_backslashes == 1: num_backslashes = 0 continue # If we are reading a quote, keep on reading till closing # quote is reached if inside_quotes_char != "": # Reset inside_quotes_char if we find the closing quote if char == inside_quotes_char: inside_quotes_char = "" continue if char == "'" or char == '"': inside_quotes_char = char continue pos = paren_open.find(char) if pos > -1: if len(stack) == 0: # New part starts: items.append(line[start:idx]) start = idx stack.append(paren_close[pos]) continue # Found closing bracket if len(stack) > 0 and char == stack[-1]: stack.pop() if len(stack) == 0: # Found last closing bracket items.append(ParenString(line[start : idx + 1])) start = idx + 1 # Add any leftover characters as a separate item if start != len(line): items.append(line[start:]) return items