Source code for fparser.common.splitline

#!/usr/bin/env python

# Modified work Copyright (c) 2017-2022 Science and Technology
# Facilities Council.
# Modified work Copyright (c) 2017 by J. Henrichs, Bureau of Meteorology
# Original work Copyright (c) 1999-2008 Pearu Peterson

# All rights reserved.

# Modifications made as part of the fparser project are distributed
# under the following license:

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:

# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.

# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.

# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# --------------------------------------------------------------------

# The original software (in the f2py project) was distributed under
# the following license:

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:

#   a. Redistributions of source code must retain the above copyright notice,
#      this list of conditions and the following disclaimer.
#   b. Redistributions in binary form must reproduce the above copyright
#      notice, this list of conditions and the following disclaimer in the
#      documentation and/or other materials provided with the distribution.
#   c. Neither the name of the F2PY project nor the names of its
#      contributors may be used to endorse or promote products derived from
#      this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.

"""
Defines LineSplitter and helper functions.

Original Author: Pearu Peterson <pearu@cens.ioc.ee>
First version created: May 2006

"""

import re
from typing import List, Tuple, Optional, Union



[docs]
class String(str):
    """Class used to represent a *quoted* string."""



class ParenString(str):
    """Class representing a parenthesis string."""


__all__ = ["String", "string_replace_map", "splitquote", "splitparen"]

_f2py_str_findall = re.compile(r"_F2PY_STRING_CONSTANT_\d+_").findall
_is_name = re.compile(r"\w*\Z", re.I).match
_is_simple_str = re.compile(r"\w*\Z", re.I).match
_f2py_findall = re.compile(
    r"(_F2PY_STRING_CONSTANT_\d+_|F2PY_REAL_CONSTANT_\d+_|" r"F2PY_EXPR_TUPLE_\d+)"
).findall
# A valid exponential constant must begin with a digit or a '.' (and be
# preceeded by a non-'word' character or the start of the string).
# We have to exclude '.' from the match for a non-word character as
# otherwise, in a string such as ".5d0", it would be matched by the
# non-capturing group. Since the first group is non-capturing (?:),
# the matched literal is in group 1.
# R417 for real-literal-constant does not permit whitespace.
exponential_constant = re.compile(
    r"(?:[^\w.]|^)((\d+[.]\d*|\d*[.]\d+|\d+)[edED][+-]?\d+(_\w+)?)"
)


class StringReplaceDict(dict):
    """
    Dictionary object that is callable for applying map returned
    by string_replace_map() function.
    """

    def __call__(self, line):
        for key in _f2py_findall(line):
            if key in self:
                # We only replace the occurrence of `key` corresponding to
                # the current result of the findall. This prevents the
                # 'replace' also affecting subsequent matches that may
                # have key as a substring (e.g. 'F2PY_EXPR_TUPLE_10'
                # contains 'F2PY_EXPR_TUPLE_1').
                line = line.replace(key, self[key], 1)
        return line


def memoize(function):
    """Simple memoization decorator.

    :param function: The function to memoize.
    :type function: Callable

    TODO # 472 Python 3.9 comes with a thread-safe and more efficient cache as
    it can be bounded and we are interested in lines that have temporal
    locality. It's the: @functools.lru_cache(maxsize=8)

    """
    memo = {}

    def wrapper(*args, **kwargs):
        key = args
        if kwargs:
            for item in kwargs.items():
                key += item
        result = memo.get(key, None)
        if result is not None:
            return result
        result = function(*args, **kwargs)
        memo[key] = result
        return result

    return wrapper


@memoize

[docs]
def string_replace_map(line, lower=False):
    """
    #. Replaces string constants with symbol `'_F2PY_STRING_CONSTANT_<index>_'`
    #. Replaces (`expression`) with symbol `(F2PY_EXPR_TUPLE_<index>)`
    #. Replaces real numerical constants containing an exponent with symbol
       `F2PY_REAL_CONSTANT_<index>_`

    :param str line: the line of text in which to perform substitutions.
    :param bool lower: whether or not the call to splitquote() should return
        items as lowercase (default is to leave the case unchanged).

    :returns: a new line and the replacement map.
    :rtype: Tuple[str, :py:class:`fparser.common.splitline.StringReplaceDict`]

    """

    str_idx = 0
    const_idx = 0
    parens_idx = 0

    items = []
    string_map = StringReplaceDict()
    rev_string_map = {}
    for item in splitquote(line, lower=lower)[0]:
        if isinstance(item, String) and not _is_simple_str(item[1:-1]):
            key = rev_string_map.get(item)
            if key is None:
                str_idx += 1
                key = "_F2PY_STRING_CONSTANT_{0}_".format(str_idx)
                trimmed = item[1:-1]
                string_map[key] = trimmed
                rev_string_map[trimmed] = key
            items.append(item[0] + key + item[-1])
        else:
            items.append(item)
    newline = "".join(items)

    const_keys = []
    for item in exponential_constant.finditer(newline):
        # Get the first captured group as that corresponds to the literal
        # *without* any preceding non-word character.
        found = item.group(1)

        key = rev_string_map.get(found)
        if key is None:
            const_idx += 1
            key = "F2PY_REAL_CONSTANT_{0}_".format(const_idx)
            string_map[key] = found
            rev_string_map[found] = key
            const_keys.append(key)
        newline = newline.replace(found, key)

    items = []
    expr_keys = []
    for item in splitparen(newline):
        if isinstance(item, ParenString) and not _is_name(item[1:-1].strip()):
            key = rev_string_map.get(item)
            if key is None:
                parens_idx += 1
                key = "F2PY_EXPR_TUPLE_{0}".format(parens_idx)
                trimmed = item[1:-1].strip()
                string_map[key] = trimmed
                rev_string_map[trimmed] = key
                expr_keys.append(key)
            items.append(item[0] + key + item[-1])
        else:
            items.append(item)

    # Ensure that any entries in the map do not themselves contain
    # substitutions
    found_keys = set()
    for key in expr_keys + const_keys:
        entry = string_map[key]
        # Find any keys within this map entry
        included_keys = _f2py_findall(entry)
        if included_keys:
            found_keys = found_keys.union(included_keys)
            for inc_key in included_keys:
                entry = entry.replace(inc_key, string_map[inc_key], 1)
            string_map[key] = entry

    return "".join(items), string_map



def _next_quote(line: str, quote_char: Optional[str] = None, start: int = 0) -> int:
    """
    Find the location of the first quotation char from the specified start position
    (defaults to the beginning of the string).

    In Fortran, quotation marks within quoted strings are escaped through
    repetition, i.e. '""' means '"' and "''" means "'". If the `quote_char` argument
    is supplied then this is taken to mean that we are searching within a quoted
    string and therefore any repeated quotation marks are interpreted as escaped
    quotation marks.

    :param line: the line of text to search.
    :param quote_char: the specific quotation character to search for. If it is not
        specified then both ' and " are searched for.
    :param start: the position in the line from which to search.

    :returns: the index of the quotation char in the supplied string or -1 if
              none is found.
    """
    line_len = len(line)
    i = start
    if quote_char:
        target_quote_chars = [quote_char]
    else:
        target_quote_chars = ["'", '"']

    while i < line_len:
        if line[i] in target_quote_chars:

            if quote_char and i < line_len - 1 and line[i + 1] == line[i]:
                # We're inside a quoted string so this is an escaped quotation
                # character ('' or "").
                i += 2
                continue
            return i
        i += 1
    return -1



[docs]
def splitquote(
    line: str, stopchar: Optional[str] = None, lower: bool = False
) -> Tuple[List[Union[String, str]], Optional[str]]:
    """
    Splits the supplied line of text into parts consisting of regions that
    are not contained within quotes and those that are.

    Allows for the processing of a line that follows on from a previous one
    where a quoted string was begun but not closed by supporting the
    current closing quotation character to be specified.

    :param str line: the line to split.
    :param stopchar: the quote character that will terminate an
                     existing quoted string or None otherwise.
    :param lower: whether or not to convert the non-quoted parts of the line
                  to lowercase.

    :returns: tuple containing a list of the parts of the line split into
              those parts that are not quoted strings and those parts that are
              (as instances of String) as well as the quote character
              corresponding with any quoted string that has not been closed
              before the end of the line.

    """

    def _lower(text: str):
        """
        :returns: the supplied text lower-cased if the 'lower' argument to
                  the parent routine is True.
        """
        if lower:
            return text.lower()
        return text

    segments = []
    i = 0
    pos = 0
    n = len(line)
    if stopchar:
        # We start inside an existing quoted region.
        end = _next_quote(line, quote_char=stopchar)
        if end != -1:
            # Has to be 'end+1' to include quotation char.
            segments.append(String(line[pos : end + 1]))
            pos = end + 1
        else:
            # Didn't find a closing quotation char.
            return [String(line)], stopchar

    while pos < n:
        start = _next_quote(line, start=pos)
        if start == -1:
            # No opening quotation char found
            segments.append(_lower(line[pos:]))
            return segments, None
        if start != pos:
            segments.append(_lower(line[pos:start]))
        end = _next_quote(line, quote_char=line[start], start=start + 1)
        if end == -1:
            # Didn't find a closing quotation char.
            segments.append(String(line[start:]))
            return segments, line[start]
        segments.append(String(line[start : end + 1]))
        pos = end + 1

    return segments, None




[docs]
def splitparen(line, paren_open="([", paren_close=")]"):
    """
    Splits a line into top-level parenthesis and not-parenthesised
    parts. E.g.: "a( (1+2)*3) = b(x)" becomes:
    ["a", "( (1+2)*3)", " = b", "(x)"]
    :param str line: the string to split.
    :param str paren_open: The characters that define an open parentheses.
    :param str paren_close: The characters that define a closing parentheses.
    :return: List of parenthesised and not-parenthesised parts
    :rtype: list of str
    The paren_open and paren_close strings must be matched in order:
    paren_open[x] is closed by paren_close[x].
    """

    assert len(paren_open) == len(paren_close)

    items = []  # Result list
    num_backslashes = 0  # Counts consecutive "\" characters
    # Empty if outside quotes, or set to the starting (and therefore
    # also the ending) quote character while reading text inside quotes.
    inside_quotes_char = ""
    start = 0  # Index of start of current part.
    stack = []  # Stack keeping track of required closing brackets

    for idx, char in enumerate(line):
        if char == "\\":
            num_backslashes = (num_backslashes + 1) % 2
            continue

        # We had an odd number of \, so the next character is neither
        # a real quote or parenthesis character, and can just be added.
        if num_backslashes == 1:
            num_backslashes = 0
            continue

        # If we are reading a quote, keep on reading till closing
        # quote is reached
        if inside_quotes_char != "":
            # Reset inside_quotes_char if we find the closing quote
            if char == inside_quotes_char:
                inside_quotes_char = ""
            continue

        if char == "'" or char == '"':
            inside_quotes_char = char
            continue

        pos = paren_open.find(char)
        if pos > -1:
            if len(stack) == 0:
                # New part starts:
                items.append(line[start:idx])
                start = idx
            stack.append(paren_close[pos])
            continue

        # Found closing bracket
        if len(stack) > 0 and char == stack[-1]:
            stack.pop()
            if len(stack) == 0:
                # Found last closing bracket
                items.append(ParenString(line[start : idx + 1]))
                start = idx + 1

    # Add any leftover characters as a separate item
    if start != len(line):
        items.append(line[start:])
    return items