Skip to content

str_utils

sc_crawler.str_utils #

wrap #

wrap(text, before=' ', after=' ')

Wrap string between before/after strings (default to spaces) if not empty.

Parameters:

Name Type Description Default
text str

A string.

required
before str

Characters to be added before the text.

' '
after str

Characters to be added after the text.

' '
Source code in sc_crawler/str_utils.py
def wrap(text: str, before: str = " ", after: str = " ") -> str:
    """Wrap string between before/after strings (default to spaces) if not empty.

    Args:
        text: A string.
        before: Characters to be added before the `text`.
        after: Characters to be added after the `text`.
    """
    return text if text == "" else before + text + after

space_after #

space_after(text)

Add space after string if not empty.

Source code in sc_crawler/str_utils.py
def space_after(text: str) -> str:
    """Add space after string if not empty."""
    return wrap(text, before="")

snake_case #

snake_case(text)

Convert CamelCase to snake_case.

Parameters:

Name Type Description Default
text str

A CamelCase text.

required

Returns:

Type Description
str

snake_case version of the text.

Examples:

>>> snake_case('DescriptionToComment')
'description_to_comment'
Source code in sc_crawler/str_utils.py
def snake_case(text: str) -> str:
    """Convert CamelCase to snake_case.

    Args:
        text: A CamelCase text.

    Returns:
        snake_case version of the text.

    Examples:
        >>> snake_case('DescriptionToComment')
        'description_to_comment'
    """
    return "_".join(sub("([A-Z][a-z]+)", r" \1", text).split()).lower()

plural #

plural(text)

Super basic implementation of pluralizing an English word.

Note that grammar exceptions are not handled, so better to use a proper NLP method for real use-cases.

Parameters:

Name Type Description Default
text str

A singular noun.

required

Returns:

Type Description
str

Plural form of the noun.

Examples:

>>> plural('dog')
'dogs'
>>> plural('boy') # :facepalm:
'boies'
Source code in sc_crawler/str_utils.py
def plural(text: str) -> str:
    """Super basic implementation of pluralizing an English word.

    Note that grammar exceptions are not handled, so better to use a
    proper NLP method for real use-cases.

    Args:
        text: A singular noun.

    Returns:
        Plural form of the noun.

    Examples:
        >>> plural('dog')
        'dogs'
        >>> plural('boy') # :facepalm:
        'boies'
    """
    if search("[sxz]$", text) or search("[^aeioudgkprt]h$", text):
        return sub("$", "es", text)
    if search("[aeiou]y$", text):
        return sub("y$", "ies", text)
    return text + "s"

extract_last_number #

extract_last_number(text)

Extract the last non-negative number from a string.

Parameters:

Name Type Description Default
text str

The input string from which to extract the number.

required

Returns:

Type Description
Union[float, None]

The last non-negative number found in the string, or None if no number is found.

Examples:

>>> extract_last_number("foo42")
42.0
>>> extract_last_number("foo24.42bar")
24.42
Source code in sc_crawler/str_utils.py
def extract_last_number(text: str) -> Union[float, None]:
    """Extract the last non-negative number from a string.

    Args:
        text: The input string from which to extract the number.

    Returns:
        The last non-negative number found in the string, or None if no number is found.

    Examples:
        >>> extract_last_number("foo42")
        42.0
        >>> extract_last_number("foo24.42bar")
        24.42
    """
    match = search(r"([\d\.]+)[^0-9]*$", text)
    return float(match.group(1)) if match else None