# -*- coding: utf-8 -*-
"""String functions.
.. versionadded:: 1.1.0
"""
import math
import re
import unicodedata
import pydash as pyd
from .helpers import NoValue
from .objects import to_string
from ._compat import (
html_unescape,
iteritems,
parse_qsl,
text_type,
urlencode,
urlsplit,
urlunsplit,
_range
)
__all__ = (
'camel_case',
'capitalize',
'chop',
'chop_right',
'chars',
'clean',
'count_substr',
'deburr',
'decapitalize',
'ends_with',
'ensure_ends_with',
'ensure_starts_with',
'escape',
'escape_reg_exp',
'has_substr',
'human_case',
'insert_substr',
'join',
'kebab_case',
'lines',
'lower_case',
'lower_first',
'number_format',
'pad',
'pad_end',
'pad_start',
'pascal_case',
'predecessor',
'prune',
'quote',
'reg_exp_js_match',
'reg_exp_js_replace',
'reg_exp_replace',
'repeat',
'replace',
'replace_end',
'replace_start',
'separator_case',
'series_phrase',
'series_phrase_serial',
'slugify',
'snake_case',
'split',
'start_case',
'starts_with',
'strip_tags',
'substr_left',
'substr_left_end',
'substr_right',
'substr_right_end',
'successor',
'surround',
'swap_case',
'title_case',
'to_lower',
'to_upper',
'trim',
'trim_end',
'trim_start',
'truncate',
'unescape',
'unquote',
'upper_case',
'upper_first',
'url',
'words',
)
HTML_ESCAPES = {
'&': '&',
'<': '<',
'>': '>',
'"': '"',
"'": ''',
'`': '`'
}
DEBURRED_LETTERS = {
'\xC0': 'A',
'\xC1': 'A',
'\xC2': 'A',
'\xC3': 'A',
'\xC4': 'A',
'\xC5': 'A',
'\xE0': 'a',
'\xE1': 'a',
'\xE2': 'a',
'\xE3': 'a',
'\xE4': 'a',
'\xE5': 'a',
'\xC7': 'C',
'\xE7': 'c',
'\xD0': 'D',
'\xF0': 'd',
'\xC8': 'E',
'\xC9': 'E',
'\xCA': 'E',
'\xCB': 'E',
'\xE8': 'e',
'\xE9': 'e',
'\xEA': 'e',
'\xEB': 'e',
'\xCC': 'I',
'\xCD': 'I',
'\xCE': 'I',
'\xCF': 'I',
'\xEC': 'i',
'\xED': 'i',
'\xEE': 'i',
'\xEF': 'i',
'\xD1': 'N',
'\xF1': 'n',
'\xD2': 'O',
'\xD3': 'O',
'\xD4': 'O',
'\xD5': 'O',
'\xD6': 'O',
'\xD8': 'O',
'\xF2': 'o',
'\xF3': 'o',
'\xF4': 'o',
'\xF5': 'o',
'\xF6': 'o',
'\xF8': 'o',
'\xD9': 'U',
'\xDA': 'U',
'\xDB': 'U',
'\xDC': 'U',
'\xF9': 'u',
'\xFA': 'u',
'\xFB': 'u',
'\xFC': 'u',
'\xDD': 'Y',
'\xFD': 'y',
'\xFF': 'y',
'\xC6': 'Ae',
'\xE6': 'ae',
'\xDE': 'Th',
'\xFE': 'th',
'\xDF': 'ss',
'\xD7': ' ',
'\xF7': ' '
}
# Use Javascript style regex to make Lo-Dash compatibility easier.
UPPER = '[A-Z\\xC0-\\xD6\\xD8-\\xDE]'
LOWER = '[a-z\\xDf-\\xF6\\xF8-\\xFF]+'
RE_WORDS = ('/{upper}+(?={upper}{lower})|{upper}?{lower}|{upper}+|[0-9]+/g'
.format(upper=UPPER, lower=LOWER))
RE_LATIN1 = '/[\xC0-\xFF]/g'
[docs]def camel_case(text):
"""Converts `text` to camel case.
Args:
text (str): String to convert.
Returns:
str: String converted to camel case.
Example:
>>> camel_case('FOO BAR_bAz')
'fooBarBAz'
.. versionadded:: 1.1.0
"""
text = ''.join(word.title() for word in words(text))
return text[:1].lower() + text[1:]
[docs]def capitalize(text, strict=True):
"""Capitalizes the first character of `text`.
Args:
text (str): String to capitalize.
strict (bool, optional): Whether to cast rest of string to lower
case. Defaults to ``True``.
Returns:
str: Capitalized string.
Example:
>>> capitalize('once upon a TIME')
'Once upon a time'
>>> capitalize('once upon a TIME', False)
'Once upon a TIME'
.. versionadded:: 1.1.0
.. versionchanged:: 3.0.0
Added `strict` option.
"""
text = pyd.to_string(text)
return (text.capitalize() if strict
else text[:1].upper() + text[1:])
[docs]def chars(text):
"""Split `text` into a list of single characters.
Args:
text (str): String to split up.
Returns:
list: List of individual characters.
Example:
>>> chars('onetwo')
['o', 'n', 'e', 't', 'w', 'o']
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return list(text)
[docs]def chop(text, step):
"""Break up `text` into intervals of length `step`.
Args:
text (str): String to chop.
step (int): Interval to chop `text`.
Returns:
list: List of chopped characters.
If `text` is `None` an empty list is returned.
Example:
>>> chop('abcdefg', 3)
['abc', 'def', 'g']
.. versionadded:: 3.0.0
"""
if text is None:
return []
text = pyd.to_string(text)
if step <= 0:
chopped = [text]
else:
chopped = [text[i:i + step] for i in _range(0, len(text), step)]
return chopped
[docs]def chop_right(text, step):
"""Like :func:`chop` except `text` is chopped from right.
Args:
text (str): String to chop.
step (int): Interval to chop `text`.
Returns:
list: List of chopped characters.
Example:
>>> chop_right('abcdefg', 3)
['a', 'bcd', 'efg']
.. versionadded:: 3.0.0
"""
if text is None:
return []
text = pyd.to_string(text)
if step <= 0:
chopped = [text]
else:
text_len = len(text)
chopped = [text[-(i + step):text_len - i]
for i in _range(0, text_len, step)][::-1]
return chopped
[docs]def clean(text):
"""Trim and replace multiple spaces with a single space.
Args:
text (str): String to clean.
Returns:
str: Cleaned string.
Example:
>>> clean('a b c d')
'a b c d'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return ' '.join(pyd.compact(text.split()))
[docs]def count_substr(text, subtext):
"""Count the occurrences of `subtext` in `text`.
Args:
text (str): Source string to count from.
subtext (str): String to count.
Returns:
int: Number of occurrences of `subtext` in `text`.
Example:
>>> count_substr('aabbccddaabbccdd', 'bc')
2
.. versionadded:: 3.0.0
"""
if text is None or subtext is None:
return 0
text = pyd.to_string(text)
subtext = pyd.to_string(subtext)
return text.count(subtext)
[docs]def deburr(text):
"""Deburrs `text` by converting latin-1 supplementary letters to basic
latin letters.
Args:
text (str): String to deburr.
Returns:
str: Deburred string.
Example:
>>> deburr('déjà vu')
'...
>>> 'deja vu'
'deja vu'
.. versionadded:: 2.0.0
"""
text = pyd.to_string(text)
return reg_exp_js_replace(
text,
RE_LATIN1,
lambda match: DEBURRED_LETTERS.get(match.group(), match.group()))
[docs]def decapitalize(text):
"""Decaptitalizes the first character of `text`.
Args:
text (str): String to decapitalize.
Returns:
str: Decapitalized string.
Example:
>>> decapitalize('FOO BAR')
'fOO BAR'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return text[:1].lower() + text[1:]
[docs]def ends_with(text, target, position=None):
"""Checks if `text` ends with a given target string.
Args:
text (str): String to check.
target (str): String to check for.
position (int, optional): Position to search from. Defaults to
end of `text`.
Returns:
bool: Whether `text` ends with `target`.
Example:
>>> ends_with('abc def', 'def')
True
>>> ends_with('abc def', 4)
False
.. versionadded:: 1.1.0
"""
target = pyd.to_string(target)
text = pyd.to_string(text)
if position is None:
position = len(text)
return text[:position].endswith(target)
[docs]def ensure_ends_with(text, suffix):
"""Append a given suffix to a string, but only if the source string does
not end with that suffix.
Args:
text (str): Source string to append `suffix` to.
suffix (str): String to append to the source string if the source
string does not end with `suffix`.
Returns:
str: source string possibly extended by `suffix`.
Example:
>>> ensure_ends_with('foo bar', '!')
'foo bar!'
>>> ensure_ends_with('foo bar!', '!')
'foo bar!'
.. versionadded:: 2.4.0
"""
text = pyd.to_string(text)
suffix = pyd.to_string(suffix)
return text if text.endswith(suffix) else '{0}{1}'.format(text, suffix)
[docs]def ensure_starts_with(text, prefix):
"""Prepend a given prefix to a string, but only if the source string does
not start with that prefix.
Args:
text (str): Source string to prepend `prefix` to.
suffix (str): String to prepend to the source string if the source
string does not start with `prefix`.
Returns:
str: source string possibly prefixed by `prefix`
Example:
>>> ensure_starts_with('foo bar', 'Oh my! ')
'Oh my! foo bar'
>>> ensure_starts_with('Oh my! foo bar', 'Oh my! ')
'Oh my! foo bar'
.. versionadded:: 2.4.0
"""
text = pyd.to_string(text)
prefix = pyd.to_string(prefix)
return text if text.startswith(prefix) else '{1}{0}'.format(text, prefix)
[docs]def escape(text):
r"""Converts the characters ``&``, ``<``, ``>``, ``"``, ``'``, and ``\```
in `text` to their corresponding HTML entities.
Args:
text (str): String to escape.
Returns:
str: HTML escaped string.
Example:
>>> escape('"1 > 2 && 3 < 4"')
'"1 > 2 && 3 < 4"'
.. versionadded:: 1.0.0
.. versionchanged:: 1.1.0
Moved function to :mod:`pydash.strings`.
"""
text = pyd.to_string(text)
# NOTE: Not using _compat.html_escape because Lo-Dash escapes certain chars
# differently (e.g. "'" isn't escaped by html_escape() but is by Lo-Dash).
return ''.join(HTML_ESCAPES.get(char, char) for char in text)
[docs]def escape_reg_exp(text):
"""Escapes the RegExp special characters in `text`.
Args:
text (str): String to escape.
Returns:
str: RegExp escaped string.
Example:
>>> escape_reg_exp('[()]')
'\\\\[\\\\(\\\\)\\\\]'
.. versionadded:: 1.1.0
.. versionchanged:: 4.0.0
Removed alias ``escape_re``
"""
text = pyd.to_string(text)
return re.escape(text)
[docs]def has_substr(text, subtext):
"""Returns whether `subtext` is included in `text`.
Args:
text (str): String to search.
subtext (str): String to search for.
Returns:
bool: Whether `subtext` is found in `text`.
Example:
>>> has_substr('abcdef', 'bc')
True
>>> has_substr('abcdef', 'bb')
False
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
subtext = pyd.to_string(subtext)
return text.find(subtext) >= 0
[docs]def human_case(text):
"""Converts `text` to human case which has only the first letter
capitalized and each word separated by a space.
Args:
text (str): String to convert.
Returns:
str: String converted to human case.
Example:
>>> human_case('abc-def_hij lmn')
'Abc def hij lmn'
>>> human_case('user_id')
'User'
.. versionadded:: 3.0.0
"""
return (pyd.chain(text)
.snake_case()
.reg_exp_replace('_id$', '')
.replace('_', ' ')
.capitalize()
.value())
[docs]def insert_substr(text, index, subtext):
"""Insert `subtext` in `text` starting at position `index`.
Args:
text (str): String to add substring to.
index (int): String index to insert into.
subtext (str): String to insert.
Returns:
str: Modified string.
Example:
>>> insert_substr('abcdef', 3, '--')
'abc--def'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
subtext = pyd.to_string(subtext)
return text[:index] + subtext + text[index:]
[docs]def join(array, separator=''):
"""Joins an iterable into a string using `separator` between each element.
Args:
array (iterable): Iterable to implode.
separator (str, optional): Separator to using when joining. Defaults to
``''``.
Returns:
str: Joined string.
Example:
>>> join(['a', 'b', 'c']) == 'abc'
True
>>> join([1, 2, 3, 4], '&') == '1&2&3&4'
True
>>> join('abcdef', '-') == 'a-b-c-d-e-f'
True
.. versionadded:: 2.0.0
.. versionchanged:: 4.0.0
Removed alias ``implode``.
"""
return pyd.to_string(separator).join(pyd.map_(array or (), pyd.to_string))
[docs]def kebab_case(text):
"""Converts `text` to kebab case (a.k.a. spinal case).
Args:
text (str): String to convert.
Returns:
str: String converted to kebab case.
Example:
>>> kebab_case('a b c_d-e!f')
'a-b-c-d-e-f'
.. versionadded:: 1.1.0
"""
return separator_case(text, '-')
[docs]def lines(text):
r"""Split lines in `text` into an array.
Args:
text (str): String to split.
Returns:
list: String split by lines.
Example:
>>> lines('a\nb\r\nc')
['a', 'b', 'c']
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return text.splitlines()
[docs]def lower_case(text):
"""Converts string to lower case as space separated words.
Args:
text (str): String to convert.
Returns:
str: String converted to lower case as space separated words.
Example:
>>> lower_case('fooBar')
'foo bar'
>>> lower_case('--foo-Bar--')
'foo bar'
>>> lower_case('/?*Foo10/;"B*Ar')
'foo 10 b ar'
.. versionadded:: 4.0.0
"""
return ' '.join(words(text)).lower()
[docs]def lower_first(text):
"""Converts the first character of string to lower case.
Args:
text (str): String passed in by the user.
Returns:
str: String in which the first character is converted to lower case.
Example:
>>> lower_first('FRED')
'fRED'
>>> lower_first('Foo Bar')
'foo Bar'
>>> lower_first('1foobar')
'1foobar'
>>> lower_first(';foobar')
';foobar'
.. versionadded:: 4.0.0
"""
return text[:1].lower() + text[1:]
return text
[docs]def pad(text, length, chars=' '):
"""Pads `text` on the left and right sides if it is shorter than the
given padding length. The `chars` string may be truncated if the number of
padding characters can't be evenly divided by the padding length.
Args:
text (str): String to pad.
length (int): Amount to pad.
chars (str, optional): Characters to pad with. Defaults to ``" "``.
Returns:
str: Padded string.
Example:
>>> pad('abc', 5)
' abc '
>>> pad('abc', 6, 'x')
'xabcxx'
>>> pad('abc', 5, '...')
'.abc.'
.. versionadded:: 1.1.0
.. versionchanged:: 3.0.0
Fix handling of multiple `chars` so that padded string isn't over
padded.
"""
# pylint: disable=redefined-outer-name
text = pyd.to_string(text)
text_len = len(text)
if text_len >= length:
return text
mid = (length - text_len) / 2.0
left_len = int(math.floor(mid))
right_len = int(math.ceil(mid))
chars = pad_end('', right_len, chars)
return chars[:left_len] + text + chars
[docs]def pad_end(text, length, chars=' '):
"""Pads `text` on the right side if it is shorter than the given padding
length. The `chars` string may be truncated if the number of padding
characters can't be evenly divided by the padding length.
Args:
text (str): String to pad.
length (int): Amount to pad.
chars (str, optional): Characters to pad with. Defaults to ``" "``.
Returns:
str: Padded string.
Example:
>>> pad_end('abc', 5)
'abc '
>>> pad_end('abc', 5, '.')
'abc..'
.. versionadded:: 1.1.0
.. versionchanged:: 4.0.0
Renamed from ``pad_right`` to ``pad_end``.
"""
# pylint: disable=redefined-outer-name
text = pyd.to_string(text)
length = max((length, len(text)))
return (text + repeat(chars, length))[:length]
[docs]def pad_start(text, length, chars=' '):
"""Pads `text` on the left side if it is shorter than the given padding
length. The `chars` string may be truncated if the number of padding
characters can't be evenly divided by the padding length.
Args:
text (str): String to pad.
length (int): Amount to pad.
chars (str, optional): Characters to pad with. Defaults to ``" "``.
Returns:
str: Padded string.
Example:
>>> pad_start('abc', 5)
' abc'
>>> pad_start('abc', 5, '.')
'..abc'
.. versionadded:: 1.1.0
.. versionchanged:: 4.0.0
Renamed from ``pad_left`` to ``pad_start``.
"""
# pylint: disable=redefined-outer-name
text = pyd.to_string(text)
length = max((length, len(text)))
return (repeat(chars, length) + text)[-length:]
[docs]def pascal_case(text, strict=True):
"""Like :func:`camel_case` except the first letter is capitalized.
Args:
text (str): String to convert.
strict (bool, optional): Whether to cast rest of string to lower case.
Defaults to ``True``.
Returns:
str: String converted to class case.
Example:
>>> pascal_case('FOO BAR_bAz')
'FooBarBaz'
>>> pascal_case('FOO BAR_bAz', False)
'FooBarBAz'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
if strict:
text = text.lower()
return capitalize(camel_case(text), strict=False)
[docs]def predecessor(char):
"""Return the predecessor character of `char`.
Args:
char (str): Character to find the predecessor of.
Returns:
str: Predecessor character.
Example:
>>> predecessor('c')
'b'
>>> predecessor('C')
'B'
>>> predecessor('3')
'2'
.. versionadded:: 3.0.0
"""
char = pyd.to_string(char)
return chr(ord(char) - 1)
[docs]def prune(text, length=0, omission='...'):
"""Like :func:`truncate` except it ensures that the pruned string doesn't
exceed the original length, i.e., it avoids half-chopped words when
truncating. If the pruned text + `omission` text is longer than the
original text, then the original text is returned.
Args:
text (str): String to prune.
length (int, optional): Target prune length. Defaults to ``0``.
omission (str, optional): Omission text to append to the end of the
pruned string. Defaults to ``'...'``.
Returns:
str: Pruned string.
Example:
>>> prune('Fe fi fo fum', 5)
'Fe fi...'
>>> prune('Fe fi fo fum', 6)
'Fe fi...'
>>> prune('Fe fi fo fum', 7)
'Fe fi...'
>>> prune('Fe fi fo fum', 8, ',,,')
'Fe fi fo,,,'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
text_len = len(text)
omission_len = len(omission)
if text_len <= length:
return text
# Replace non-alphanumeric chars with whitespace.
def repl(match):
char = match.group(0)
return ' ' if char.upper() == char.lower() else char
subtext = reg_exp_replace(text[:length + 1], r'.(?=\W*\w*$)', repl)
if re.match(r'\w\w', subtext[-2:]):
# Last two characters are alphanumeric. Remove last "word" from end of
# string so that we prune to the next whole word.
subtext = reg_exp_replace(subtext, r'\s*\S+$', '')
else:
# Last character (at least) is whitespace. So remove that character as
# well as any other whitespace.
subtext = subtext[:-1].rstrip()
subtext_len = len(subtext)
# Only add omission text if doing so will result in a string that is
# equal two or smaller in length than the original.
if (subtext_len + omission_len) <= text_len:
text = text[:subtext_len] + omission
return text
[docs]def quote(text, quote_char='"'):
"""Quote a string with another string.
Args:
text (str): String to be quoted.
quote_char (str, optional): the quote character. Defaults to ``"``.
Returns:
str: the quoted string.
Example:
>>> quote('To be or not to be')
'"To be or not to be"'
>>> quote('To be or not to be', "'")
"'To be or not to be'"
.. versionadded:: 2.4.0
"""
return surround(text, quote_char)
[docs]def reg_exp_js_match(text, reg_exp):
"""Return list of matches using Javascript style regular expression.
Args:
text (str): String to evaluate.
reg_exp (str): Javascript style regular expression.
Returns:
list: List of matches.
Example:
>>> reg_exp_js_match('aaBBcc', '/bb/')
[]
>>> reg_exp_js_match('aaBBcc', '/bb/i')
['BB']
>>> reg_exp_js_match('aaBBccbb', '/bb/i')
['BB']
>>> reg_exp_js_match('aaBBccbb', '/bb/gi')
['BB', 'bb']
.. versionadded:: 2.0.0
.. versionchanged:: 3.0.0
Reordered arguments to make `text` first.
.. versionchanged:: 4.0.0
Renamed from ``js_match`` to ``reg_exp_js_match``.
"""
text = pyd.to_string(text)
return js_to_py_re_find(reg_exp)(text)
[docs]def reg_exp_js_replace(text, reg_exp, repl):
"""Replace `text` with `repl` using Javascript style regular expression to
find matches.
Args:
text (str): String to evaluate.
reg_exp (str): Javascript style regular expression.
repl (str): Replacement string.
Returns:
str: Modified string.
Example:
>>> reg_exp_js_replace('aaBBcc', '/bb/', 'X')
'aaBBcc'
>>> reg_exp_js_replace('aaBBcc', '/bb/i', 'X')
'aaXcc'
>>> reg_exp_js_replace('aaBBccbb', '/bb/i', 'X')
'aaXccbb'
>>> reg_exp_js_replace('aaBBccbb', '/bb/gi', 'X')
'aaXccX'
.. versionadded:: 2.0.0
.. versionchanged:: 3.0.0
Reordered arguments to make `text` first.
.. versionchanged:: 4.0.0
Renamed from ``js_replace`` to ``reg_exp_js_replace``.
"""
text = pyd.to_string(text)
if not pyd.is_function(repl):
repl = pyd.to_string(repl)
return js_to_py_reg_exp_replace(reg_exp)(text, repl)
[docs]def reg_exp_replace(text, pattern, repl, ignore_case=False, count=0):
"""Replace occurrences of regex `pattern` with `repl` in `text`.
Optionally, ignore case when replacing. Optionally, set `count` to limit
number of replacements.
Args:
text (str): String to replace.
pattern (str): String pattern to find and replace.
repl (str): String to substitute `pattern` with.
ignore_clase (bool, optional): Whether to ignore case when replacing.
Defaults to ``False``.
count (int, optional): Maximum number of occurrences to replace.
Defaults to ``0`` which replaces all.
Returns:
str: Replaced string.
Example:
>>> reg_exp_replace('aabbcc', 'b', 'X')
'aaXXcc'
>>> reg_exp_replace('aabbcc', 'B', 'X', ignore_case=True)
'aaXXcc'
>>> reg_exp_replace('aabbcc', 'b', 'X', count=1)
'aaXbcc'
>>> reg_exp_replace('aabbcc', '[ab]', 'X')
'XXXXcc'
.. versionadded:: 3.0.0
.. versionchanged:: 4.0.0
Renamed from ``re_replace`` to ``reg_exp_replace``.
"""
if pattern is None:
return pyd.to_string(text)
return replace(text,
pattern,
repl,
ignore_case=ignore_case,
count=count,
escape=False)
[docs]def repeat(text, n=0):
"""Repeats the given string `n` times.
Args:
text (str): String to repeat.
n (int, optional): Number of times to repeat the string.
Returns:
str: Repeated string.
Example:
>>> repeat('.', 5)
'.....'
.. versionadded:: 1.1.0
"""
return pyd.to_string(text) * int(n)
[docs]def replace(text,
pattern,
repl,
ignore_case=False,
count=0,
escape=True,
from_start=False,
from_end=False):
"""Replace occurrences of `pattern` with `repl` in `text`. Optionally,
ignore case when replacing. Optionally, set `count` to limit number of
replacements.
Args:
text (str): String to replace.
pattern (str): String pattern to find and replace.
repl (str): String to substitute `pattern` with.
ignore_clase (bool, optional): Whether to ignore case when replacing.
Defaults to ``False``.
count (int, optional): Maximum number of occurrences to replace.
Defaults to ``0`` which replaces all.
escape (bool, optional): Whether to escape `pattern` when searching.
This is needed if a literal replacement is desired when `pattern`
may contain special regular expression characters. Defaults to
``True``.
from_start (bool, optional): Whether to limit replacement to start of
string.
from_end (bool, optional): Whether to limit replacement to end of
string.
Returns:
str: Replaced string.
Example:
>>> replace('aabbcc', 'b', 'X')
'aaXXcc'
>>> replace('aabbcc', 'B', 'X', ignore_case=True)
'aaXXcc'
>>> replace('aabbcc', 'b', 'X', count=1)
'aaXbcc'
>>> replace('aabbcc', '[ab]', 'X')
'aabbcc'
>>> replace('aabbcc', '[ab]', 'X', escape=False)
'XXXXcc'
.. versionadded:: 3.0.0
.. versionchanged:: 4.1.0
Added ``from_start`` and ``from_end`` arguments.
"""
text = pyd.to_string(text)
if pattern is None:
return text
pattern = pyd.to_string(pattern)
if escape:
pattern = re.escape(pattern)
if from_start and not pattern.startswith('^'):
pattern = '^' + pattern
if from_end and not pattern.endswith('$'):
pattern += '$'
if not pyd.is_function(repl):
repl = pyd.to_string(repl)
flags = re.IGNORECASE if ignore_case else 0
return re.sub(pattern, repl, text, count=count, flags=flags)
[docs]def replace_end(text,
pattern,
repl,
ignore_case=False,
escape=True):
"""Like :func:`replace` except it only replaces `text` with `repl` if
`pattern` mathces the end of `text`.
Args:
text (str): String to replace.
pattern (str): String pattern to find and replace.
repl (str): String to substitute `pattern` with.
ignore_clase (bool, optional): Whether to ignore case when replacing.
Defaults to ``False``.
escape (bool, optional): Whether to escape `pattern` when searching.
This is needed if a literal replacement is desired when `pattern`
may contain special regular expression characters. Defaults to
``True``.
Returns:
str: Replaced string.
Example:
>>> replace_end('aabbcc', 'b', 'X')
'aabbcc'
>>> replace_end('aabbcc', 'c', 'X')
'aabbcX'
.. versionadded:: 4.1.0
"""
return replace(text,
pattern,
repl,
ignore_case=ignore_case,
escape=escape,
from_end=True)
[docs]def replace_start(text,
pattern,
repl,
ignore_case=False,
escape=True):
"""Like :func:`replace` except it only replaces `text` with `repl` if
`pattern` mathces the start of `text`.
Args:
text (str): String to replace.
pattern (str): String pattern to find and replace.
repl (str): String to substitute `pattern` with.
ignore_clase (bool, optional): Whether to ignore case when replacing.
Defaults to ``False``.
escape (bool, optional): Whether to escape `pattern` when searching.
This is needed if a literal replacement is desired when `pattern`
may contain special regular expression characters. Defaults to
``True``.
Returns:
str: Replaced string.
Example:
>>> replace_start('aabbcc', 'b', 'X')
'aabbcc'
>>> replace_start('aabbcc', 'a', 'X')
'Xabbcc'
.. versionadded:: 4.1.0
"""
return replace(text,
pattern,
repl,
ignore_case=ignore_case,
escape=escape,
from_start=True)
[docs]def separator_case(text, separator):
"""Splits `text` on words and joins with `separator`.
Args:
text (str): String to convert.
separator (str): Separator to join words with.
Returns:
str: Converted string.
Example:
>>> separator_case('a!!b___c.d', '-')
'a-b-c-d'
.. versionadded:: 3.0.0
"""
return separator.join(word.lower()
for word in words(text) if word)
[docs]def series_phrase(items, separator=', ', last_separator=' and ', serial=False):
"""Join items into a grammatical series phrase, e.g., ``"item1, item2,
item3 and item4"``.
Args:
items (list): List of string items to join.
separator (str, optional): Item separator. Defaults to ``', '``.
last_separator (str, optional): Last item separator. Defaults to
``' and '``.
serial (bool, optional): Whether to include `separator` with
`last_separator` when number of items is greater than 2. Defaults
to ``False``.
Returns:
str: Joined string.
Example:
Example:
>>> series_phrase(['apples', 'bananas', 'peaches'])
'apples, bananas and peaches'
>>> series_phrase(['apples', 'bananas', 'peaches'], serial=True)
'apples, bananas, and peaches'
>>> series_phrase(['apples', 'bananas', 'peaches'], '; ', ', or ')
'apples; bananas, or peaches'
.. versionadded:: 3.0.0
"""
items = pyd.chain(items).map(pyd.to_string).compact().value()
item_count = len(items)
separator = pyd.to_string(separator)
last_separator = pyd.to_string(last_separator)
if item_count > 2 and serial:
last_separator = separator.rstrip() + last_separator
if item_count >= 2:
items = items[:-2] + [last_separator.join(items[-2:])]
return separator.join(items)
[docs]def series_phrase_serial(items, separator=', ', last_separator=' and '):
"""Join items into a grammatical series phrase using a serial separator,
e.g., ``"item1, item2, item3, and item4"``.
Args:
items (list): List of string items to join.
separator (str, optional): Item separator. Defaults to ``', '``.
last_separator (str, optional): Last item separator. Defaults to
``' and '``.
Returns:
str: Joined string.
Example:
>>> series_phrase_serial(['apples', 'bananas', 'peaches'])
'apples, bananas, and peaches'
.. versionadded:: 3.0.0
"""
return series_phrase(items, separator, last_separator, serial=True)
[docs]def slugify(text, separator='-'):
"""Convert `text` into an ASCII slug which can be used safely in URLs.
Incoming `text` is converted to unicode and noramlzied using the ``NFKD``
form. This results in some accented characters being converted to their
ASCII "equivalent" (e.g. ``é`` is converted to ``e``). Leading and trailing
whitespace is trimmed and any remaining whitespace or other special
characters without an ASCII equivalent are replaced with ``-``.
Args:
text (str): String to slugify.
separator (str, optional): Separator to use. Defaults to ``'-'``.
Returns:
str: Slugified string.
Example:
>>> slugify('This is a slug.') == 'this-is-a-slug'
True
>>> slugify('This is a slug.', '+') == 'this+is+a+slug'
True
.. versionadded:: 3.0.0
"""
normalized = (unicodedata.normalize('NFKD', text_type(pyd.to_string(text)))
.encode('ascii', 'ignore')
.decode('utf8'))
return separator_case(normalized, separator)
[docs]def snake_case(text):
"""Converts `text` to snake case.
Args:
text (str): String to convert.
Returns:
str: String converted to snake case.
Example:
>>> snake_case('This is Snake Case!')
'this_is_snake_case'
.. versionadded:: 1.1.0
.. versionchanged:: 4.0.0
Removed alias ``underscore_case``.
"""
return separator_case(text, '_')
[docs]def split(text, separator=NoValue):
"""Splits `text` on `separator`. If `separator` not provided, then `text`
is split on whitespace. If `separator` is falsey, then `text` is split on
every character.
Args:
text (str): String to explode.
separator (str, optional): Separator string to split on. Defaults to
``NoValue``.
Returns:
list: Split string.
Example:
>>> split('one potato, two potatoes, three potatoes, four!')
['one', 'potato,', 'two', 'potatoes,', 'three', 'potatoes,', 'four!']
>>> split('one potato, two potatoes, three potatoes, four!', ',')
['one potato', ' two potatoes', ' three potatoes', ' four!']
.. versionadded:: 2.0.0
.. versionchanged:: 3.0.0
Changed `separator` default to ``NoValue`` and supported splitting on
whitespace by default.
.. versionchanged:: 4.0.0
Removed alias ``explode``.
"""
text = pyd.to_string(text)
if separator is NoValue:
ret = text.split()
elif separator:
ret = text.split(separator)
else:
ret = chars(text)
return ret
[docs]def start_case(text):
"""Convert `text` to start case.
Args:
text (str): String to convert.
Returns:
str: String converted to start case.
Example:
>>> start_case("fooBar")
'Foo Bar'
.. versionadded:: 3.1.0
"""
text = pyd.to_string(text)
return ' '.join(capitalize(word, strict=False) for word in words(text))
[docs]def starts_with(text, target, position=0):
"""Checks if `text` starts with a given target string.
Args:
text (str): String to check.
target (str): String to check for.
position (int, optional): Position to search from. Defaults to
beginning of `text`.
Returns:
bool: Whether `text` starts with `target`.
Example:
>>> starts_with('abcdef', 'a')
True
>>> starts_with('abcdef', 'b')
False
>>> starts_with('abcdef', 'a', 1)
False
.. versionadded:: 1.1.0
"""
text = pyd.to_string(text)
target = pyd.to_string(target)
return text[position:].startswith(target)
return reg_exp_replace(text, r'<\/?[^>]+>', '')
[docs]def substr_left(text, subtext):
"""Searches `text` from left-to-right for `subtext` and returns a substring
consisting of the characters in `text` that are to the left of `subtext` or
all string if no match found.
Args:
text (str): String to partition.
subtext (str): String to search for.
Returns:
str: Substring to left of `subtext`.
Example:
>>> substr_left('abcdefcdg', 'cd')
'ab'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return text.partition(subtext)[0] if subtext else text
[docs]def substr_left_end(text, subtext):
"""Searches `text` from right-to-left for `subtext` and returns a substring
consisting of the characters in `text` that are to the left of `subtext`
or all string if no match found.
Args:
text (str): String to partition.
subtext (str): String to search for.
Returns:
str: Substring to left of `subtext`.
Example:
>>> substr_left_end('abcdefcdg', 'cd')
'abcdef'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return text.rpartition(subtext)[0] or text if subtext else text
[docs]def substr_right(text, subtext):
"""Searches `text` from right-to-left for `subtext` and returns a substring
consisting of the characters in `text` that are to the right of `subtext`
or all string if no match found.
Args:
text (str): String to partition.
subtext (str): String to search for.
Returns:
str: Substring to right of `subtext`.
Example:
>>> substr_right('abcdefcdg', 'cd')
'efcdg'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return text.partition(subtext)[2] or text if subtext else text
[docs]def substr_right_end(text, subtext):
"""Searches `text` from left-to-right for `subtext` and returns a substring
consisting of the characters in `text` that are to the right of `subtext`
or all string if no match found.
Args:
text (str): String to partition.
subtext (str): String to search for.
Returns:
str: Substring to right of `subtext`.
Example:
>>> substr_right_end('abcdefcdg', 'cd')
'g'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return text.rpartition(subtext)[2] if subtext else text
[docs]def successor(char):
"""Return the successor character of `char`.
Args:
char (str): Character to find the successor of.
Returns:
str: Successor character.
Example:
>>> successor('b')
'c'
>>> successor('B')
'C'
>>> successor('2')
'3'
.. versionadded:: 3.0.0
"""
char = pyd.to_string(char)
return chr(ord(char) + 1)
[docs]def surround(text, wrapper):
"""Surround a string with another string.
Args:
text (str): String to surround with `wrapper`.
wrapper (str): String by which `text` is to be surrounded.
Returns:
str: Surrounded string.
Example:
>>> surround('abc', '"')
'"abc"'
>>> surround('abc', '!')
'!abc!'
.. versionadded:: 2.4.0
"""
return '{1}{0}{1}'.format(pyd.to_string(text), pyd.to_string(wrapper))
[docs]def swap_case(text):
"""Swap case of `text` characters.
Args:
text (str): String to swap case.
Returns:
str: String with swapped case.
Example:
>>> swap_case('aBcDeF')
'AbCdEf'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
return text.swapcase()
[docs]def title_case(text):
"""Convert `text` to title case.
Args:
text (str): String to convert.
Returns:
str: String converted to title case.
Example:
>>> title_case("bob's shop")
"Bob's Shop"
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
# NOTE: Can't use text.title() since it doesn't handle apostrophes.
return ' '.join(word.capitalize() for word in re.split(' ', text))
[docs]def to_lower(text):
"""Converts the given :attr:`text` to lower text.
Args:
text (str): String to convert.
Returns:
str: String converted to lower case.
Example:
>>> to_lower('--Foo-Bar--')
'--foo-bar--'
>>> to_lower('fooBar')
'foobar'
>>> to_lower('__FOO_BAR__')
'__foo_bar__'
.. versionadded:: 4.0.0
"""
return to_string(text).lower()
[docs]def to_upper(text):
"""Converts the given :attr:`text` to upper text.
Args:
text (str): String to convert.
Returns:
str: String converted to upper case.
Example:
>>> to_upper('--Foo-Bar--')
'--FOO-BAR--'
>>> to_upper('fooBar')
'FOOBAR'
>>> to_upper('__FOO_BAR__')
'__FOO_BAR__'
.. versionadded:: 4.0.0
"""
return to_string(text).upper()
[docs]def trim(text, chars=None):
r"""Removes leading and trailing whitespace or specified characters from
`text`.
Args:
text (str): String to trim.
chars (str, optional): Specific characters to remove.
Returns:
str: Trimmed string.
Example:
>>> trim(' abc efg\r\n ')
'abc efg'
.. versionadded:: 1.1.0
"""
# pylint: disable=redefined-outer-name
text = pyd.to_string(text)
return text.strip(chars)
[docs]def trim_end(text, chars=None):
r"""Removes trailing whitespace or specified characters from `text`.
Args:
text (str): String to trim.
chars (str, optional): Specific characters to remove.
Returns:
str: Trimmed string.
Example:
>>> trim_end(' abc efg\r\n ')
' abc efg'
.. versionadded:: 1.1.0
.. versionchanged:: 4.0.0
Renamed from ``trim_right`` to ``trim_end``.
"""
text = pyd.to_string(text)
return text.rstrip(chars)
[docs]def trim_start(text, chars=None):
r"""Removes leading whitespace or specified characters from `text`.
Args:
text (str): String to trim.
chars (str, optional): Specific characters to remove.
Returns:
str: Trimmed string.
Example:
>>> trim_start(' abc efg\r\n ')
'abc efg\r\n '
.. versionadded:: 1.1.0
.. versionchanged:: 4.0.0
Renamed from ``trim_left`` to ``trim_start``.
"""
text = pyd.to_string(text)
return text.lstrip(chars)
[docs]def truncate(text, length=30, omission='...', separator=None):
"""Truncates `text` if it is longer than the given maximum string length.
The last characters of the truncated string are replaced with the omission
string which defaults to ``...``.
Args:
text (str): String to truncate.
length (int, optional): Maximum string length. Defaults to ``30``.
omission (str, optional): String to indicate text is omitted.
separator (mixed, optional): Separator pattern to truncate to.
Returns:
str: Truncated string.
Example:
>>> truncate('hello world', 5)
'he...'
>>> truncate('hello world', 5, '..')
'hel..'
>>> truncate('hello world', 10)
'hello w...'
>>> truncate('hello world', 10, separator=' ')
'hello...'
.. versionadded:: 1.1.0
.. versionchanged:: 4.0.0
Removed alias ``trunc``.
"""
text = pyd.to_string(text)
if len(text) <= length:
return text
omission_len = len(omission)
text_len = length - omission_len
text = text[:text_len]
trunc_len = len(text)
if pyd.is_string(separator):
trunc_len = text.rfind(separator)
elif pyd.is_reg_exp(separator):
last = None
for match in separator.finditer(text):
last = match
if last is not None:
trunc_len = last.start()
return text[:trunc_len] + omission
[docs]def unescape(text):
"""The inverse of :func:`escape`. This method converts the HTML entities
``&``, ``<``, ``>``, ``"``, ``'``, and ````` in
`text` to their corresponding characters.
Args:
text (str): String to unescape.
Returns:
str: HTML unescaped string.
Example:
>>> results = unescape('"1 > 2 && 3 < 4"')
>>> results == '"1 > 2 && 3 < 4"'
True
.. versionadded:: 1.0.0
.. versionchanged:: 1.1.0
Moved to :mod:`pydash.strings`.
"""
text = pyd.to_string(text)
return html_unescape(text)
[docs]def upper_case(text):
"""Converts string to upper case, as space separated words.
Args:
text (str): String to be converted to uppercase.
Returns:
str: String converted to uppercase, as space separated words.
Example:
>>> upper_case('--foo-bar--')
'FOO BAR'
>>> upper_case('fooBar')
'FOO BAR'
>>> upper_case('/?*Foo10/;"B*Ar')
'FOO 10 B AR'
.. versionadded:: 4.0.0
"""
return ' '.join(words(text)).upper()
[docs]def upper_first(text):
"""Converts the first character of string to upper case.
Args:
text (str): String passed in by the user.
Returns:
str: String in which the first character is converted to upper case.
Example:
>>> upper_first('fred')
'Fred'
>>> upper_first('foo bar')
'Foo bar'
>>> upper_first('1foobar')
'1foobar'
>>> upper_first(';foobar')
';foobar'
.. versionadded:: 4.0.0
"""
return text[:1].upper() + text[1:]
[docs]def unquote(text, quote_char='"'):
"""Unquote `text` by removing `quote_char` if `text` begins and ends with
it.
Args:
text (str): String to unquote.
Returns:
str: Unquoted string.
Example:
>>> unquote('"abc"')
'abc'
>>> unquote('"abc"', '#')
'"abc"'
>>> unquote('#abc', '#')
'#abc'
>>> unquote('#abc#', '#')
'abc'
.. versionadded:: 3.0.0
"""
text = pyd.to_string(text)
inner = text[1:-1]
if text == '{0}{1}{0}'.format(quote_char, inner):
text = inner
return text
[docs]def url(*paths, **params):
"""Combines a series of URL paths into a single URL. Optionally, pass in
keyword arguments to append query parameters.
Args:
paths (str): URL paths to combine.
Keyword Args:
params (str, optional): Query parameters.
Returns:
str: URL string.
Example:
>>> link = url('a', 'b', ['c', 'd'], '/', q='X', y='Z')
>>> path, params = link.split('?')
>>> path == 'a/b/c/d/'
True
>>> set(params.split('&')) == set(['q=X', 'y=Z'])
True
.. versionadded:: 2.2.0
"""
paths = pyd.chain(paths).flatten_deep().map(pyd.to_string).value()
paths_list = []
params_list = flatten_url_params(params)
for path in paths:
scheme, netloc, path, query, fragment = urlsplit(path)
query = parse_qsl(query)
params_list += query
paths_list.append(urlunsplit((scheme, netloc, path, '', fragment)))
path = delimitedpathjoin('/', *paths_list)
scheme, netloc, path, query, fragment = urlsplit(path)
query = urlencode(params_list)
return urlunsplit((scheme, netloc, path, query, fragment))
[docs]def words(text, pattern=None):
"""Return list of words contained in `text`.
Args:
text (str): String to split.
pattern (str, optional): Custom pattern to split words on. Defaults to
``None``.
Returns:
list: List of words.
Example:
>>> words('a b, c; d-e')
['a', 'b', 'c', 'd', 'e']
>>> words('fred, barney, & pebbles', '/[^, ]+/g')
['fred', 'barney', '&', 'pebbles']
.. versionadded:: 2.0.0
.. versionchanged:: 3.2.0
Added `pattern` argument.
.. versionchanged:: 3.2.0
Improved matching for one character words.
"""
return reg_exp_js_match(text, pattern or RE_WORDS)
#
# Utility functions not a part of main API
#
def js_to_py_re_find(reg_exp):
"""Return Python regular expression matching function based on Javascript
style regexp.
"""
pattern, options = reg_exp[1:].rsplit('/', 1)
flags = re.I if 'i' in options else 0
def find(text):
if 'g' in options:
results = re.findall(pattern, text, flags=flags)
else:
results = re.search(pattern, text, flags=flags)
if results:
results = [results.group()]
else:
results = []
return results
return find
def js_to_py_reg_exp_replace(reg_exp):
"""Return Python regular expression substitution function based on
Javascript style regexp.
"""
pattern, options = reg_exp[1:].rsplit('/', 1)
count = 0 if 'g' in options else 1
ignore_case = 'i' in options
def _replace(text, repl):
return reg_exp_replace(text,
pattern,
repl,
ignore_case=ignore_case,
count=count)
return _replace
def delimitedpathjoin(delimiter, *paths):
"""Join delimited path using specified delimiter.
>>> assert delimitedpathjoin('.', '') == ''
>>> assert delimitedpathjoin('.', '.') == '.'
>>> assert delimitedpathjoin('.', ['', '.a']) == '.a'
>>> assert delimitedpathjoin('.', ['a', '.']) == 'a.'
>>> assert delimitedpathjoin('.', ['', '.a', '', '', 'b']) == '.a.b'
>>> ret = '.a.b.c.d.e.'
>>> assert delimitedpathjoin('.', ['.a.', 'b.', '.c', 'd', 'e.']) == ret
>>> assert delimitedpathjoin('.', ['a', 'b', 'c']) == 'a.b.c'
>>> ret = 'a.b.c.d.e.f'
>>> assert delimitedpathjoin('.', ['a.b', '.c.d.', '.e.f']) == ret
>>> ret = '.a.b.c.1.'
>>> assert delimitedpathjoin('.', '.', 'a', 'b', 'c', 1, '.') == ret
>>> assert delimitedpathjoin('.', []) == ''
"""
paths = [pyd.to_string(path) for path in pyd.flatten_deep(paths) if path]
if len(paths) == 1:
# Special case where there's no need to join anything.
# Doing this because if path==[delimiter], then an extra delimiter
# would be added if the else clause ran instead.
path = paths[0]
else:
leading = delimiter if paths and paths[0].startswith(delimiter) else ''
trailing = delimiter if paths and paths[-1].endswith(delimiter) else ''
middle = delimiter.join([path.strip(delimiter)
for path in paths if path.strip(delimiter)])
path = ''.join([leading, middle, trailing])
return path
def flatten_url_params(params):
"""Flatten URL params into list of tuples. If any param value is a list or
tuple, then map each value to the param key.
>>> params = [('a', 1), ('a', [2, 3])]
>>> assert flatten_url_params(params) == [('a', 1), ('a', 2), ('a', 3)]
>>> params = {'a': [1, 2, 3]}
>>> assert flatten_url_params(params) == [('a', 1), ('a', 2), ('a', 3)]
"""
if isinstance(params, dict):
params = list(iteritems(params))
flattened = []
for param, value in params:
if isinstance(value, (list, tuple)):
flattened += zip([param] * len(value), value)
else:
flattened.append((param, value))
return flattened