Skip to content

wg_utilities.functions

Useful functions.

DatetimeFixedUnit

Bases: Enum

Enum for fixed units of time (i.e. not a month or a year).

Values are in seconds.

Source code in wg_utilities/functions/datetime_helpers.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
class DatetimeFixedUnit(Enum):
    """Enum for fixed units of time (i.e. not a month or a year).

    Values are in seconds.
    """

    WEEK = 604800
    DAY = 86400
    HOUR = 3600
    MINUTE = 60
    SECOND = 1
    MILLISECOND = 1e-3
    MICROSECOND = 1e-6
    NANOSECOND = 1e-9

backoff(exceptions=Exception, /, logger=None, *, max_tries=10, max_delay=60, timeout=3600)

Apply an exponential backoff to the decorated function.

The function will be called until it succeeds, the maximum number of tries attempted, or up to 24 hours (configurable via timeout).

** Be Careful! ** Setting max_tries max_delay, and timeout to 0 will retry as fast as possible for a whole day! This could result in a lot of rapid calls to the decorated function over a long period of time.

Parameters:

Name Type Description Default
exceptions type[Exception] | tuple[type[Exception], ...]

the exception(s) to catch

Exception
logger Logger

optional logger for logging the exception

None
max_tries int

the maximum number of tries. Setting to 0 will retry forever.

10
max_delay int

the maximum delay in seconds between tries. Setting to 0 will retry as fast as possible.

60
timeout int

the maximum time to wait for the decorated function to complete, in seconds. Setting to 0 will retry for a whole day. Defaults to 1 hour.

3600

Returns:

Name Type Description
Callable Callable[[Callable[P, R]], Callable[P, R]]

the actual decorator

Source code in wg_utilities/functions/decorators.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def backoff(
    exceptions: type[Exception] | tuple[type[Exception], ...] = Exception,
    /,
    logger: Logger | None = None,
    *,
    max_tries: int = 10,
    max_delay: int = 60,
    timeout: int = 3600,
) -> Callable[[Callable[P, R]], Callable[P, R]]:
    """Apply an exponential backoff to the decorated function.

    The function will be called until it succeeds, the maximum number of tries
    attempted, or up to 24 hours (configurable via `timeout`).

    ** Be Careful! **
    Setting max_tries max_delay, and timeout to 0 will retry as fast as possible for a whole day!
    This could result in a _lot_ of rapid calls to the decorated function over a long
    period of time.

    Args:
        exceptions (type[Exception] | tuple[type[Exception], ...]): the exception(s) to catch
        logger (Logger): optional logger for logging the exception
        max_tries (int): the maximum number of tries. Setting to 0 will retry forever.
        max_delay (int): the maximum delay in seconds between tries. Setting to 0 will
            retry as fast as possible.
        timeout (int): the maximum time to wait for the decorated function to complete,
            in seconds. Setting to 0 will retry for a whole day. Defaults to 1 hour.

    Returns:
        Callable: the actual decorator
    """

    timeout = 86400 if timeout <= 0 else min(timeout, 86400)

    def _decorator(func: Callable[P, R]) -> Callable[P, R]:
        """Apply an exponential backoff to the decorated function.

        The function will be called until it succeeds or the maximum number of tries,
        with an exponential delay between tries (up to the maximum delay).

        Args:
            func (Callable): the function being wrapped

        Returns:
            Callable: the inner function
        """

        @wraps(func)
        def worker(*args: P.args, **kwargs: P.kwargs) -> R:
            """Try to run the decorated function and calls the callback function.

            Args:
                *args (Any): any args passed to the inner func
                **kwargs (Any): any kwargs passed to the inner func

            Returns:
                Any: the result of the wrapped function

            Raises:
                Exception: any exception from the decorated function
            """

            start_time = time()

            tries = 0
            delay = 0.1
            while True:
                try:
                    return func(*args, **kwargs)
                except exceptions as exc:  # noqa: PERF203
                    if logger is not None:
                        logger.warning(
                            "Exception caught in backoff decorator (attempt %i/%i, waiting for %fs): %s %s",
                            tries,
                            max_tries,
                            delay,
                            type(exc).__name__,
                            exc,
                        )
                    tries += 1

                    if 0 < max_tries <= tries or (timeout <= (time() - start_time)):
                        raise

                    sleep(delay)
                    delay = min(delay * (2 + random()), max_delay)  # noqa: S311

        return worker

    return _decorator

chunk_list(lst, chunk_len)

Yield successive n-sized chunks from lst.

Examples:

>>> chunk_list([1, 2, 3, 4, 5, 6, 7], 2)
[1, 2]
[3, 4]
[5, 6]
[7]

Parameters:

Name Type Description Default
lst list

the list to split into chunks

required
chunk_len int

number of items per chunk

required

Yields:

Name Type Description
list list[Any]

an N-sized chunk of the main list

Raises:

Type Description
ValueError

if n is < 1

Source code in wg_utilities/functions/_functions.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def chunk_list(lst: list[Any], chunk_len: int) -> Generator[list[Any], None, None]:
    """Yield successive n-sized chunks from lst.

    Examples:
        >>> chunk_list([1, 2, 3, 4, 5, 6, 7], 2)
        [1, 2]
        [3, 4]
        [5, 6]
        [7]

    Args:
        lst (list): the list to split into chunks
        chunk_len (int): number of items per chunk

    Yields:
        list: an N-sized chunk of the main list

    Raises:
        ValueError: if n is < 1
    """

    if chunk_len < 1:
        raise ValueError("`chunk_len` must be a positive integer")

    for i in range(0, len(lst), chunk_len):
        yield lst[i : i + chunk_len]

cleanse_string(value, *, whitespace_amount=None, preserve_newlines=False)

Remove all non-alphanumeric characters from a string.

Parameters:

Name Type Description Default
value str

the input string value

required
whitespace_amount int

the number of spaces to replace whitespace for in a string. Setting to 0 preserves all whitespace, 1 is a single space, and so on. Defaults to None, which will remove all whitespace.

None
preserve_newlines bool

whether to preserve newlines in the string.

False

Returns:

Name Type Description
str str

the cleansed string

Source code in wg_utilities/functions/string_manipulation.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def cleanse_string(
    value: str,
    *,
    whitespace_amount: int | None = None,
    preserve_newlines: bool = False,
) -> str:
    """Remove all non-alphanumeric characters from a string.

    Args:
        value (str): the input string value
        whitespace_amount (int, optional): the number of spaces to replace whitespace
            for in a string. Setting to 0 preserves all whitespace, 1 is a single space,
            and so on. Defaults to None, which will remove all whitespace.
        preserve_newlines (bool, optional): whether to preserve newlines in the string.

    Returns:
        str: the cleansed string
    """
    inner_pattern = "a-zA-Z0-9"

    if preserve_newlines:
        inner_pattern += "\n"

    if whitespace_amount is None:
        return sub(rf"[^{inner_pattern}]", "", value)

    if whitespace_amount == 0:
        return sub(rf"[^{inner_pattern}\s]", "", value)

    return sub(r"\s+", " " * whitespace_amount, sub(rf"[^{inner_pattern}\s]", "", value))

flatten_dict(nested_dict, *, join_char='.', exclude_keys=None, exact_keys=False, _parent_key='')

Flatten a nested dictionary into a single level dictionary.

This function recursively traverses a dictionary and flattens any nested JSON so the resultant dict has no values of type dict. This allows for easier processing into Redshift

Examples:

>>> flatten_dict(
...     {
...         "one": 1,
...         "two": {
...             "three": 3,
...             "four": 4,
...         },
...         "five": {"six": 6},
...     },
...     join_char="-",
...     exclude_keys=["five"],
... )
{
    "one": 1,
    "two-three": 3,
    "two-four": 4,
    "five": {"six": 6}
}
>>> flatten_dict(
...     {
...         "one": 1,
...         "two": {
...             "three": 3,
...             "four": 4,
...         },
...         "five": {"two": {"six": 6}},
...     },
...     join_char="-",
...     exclude_keys=["five-two"],
...     exact_keys=True,
... )

Parameters:

Name Type Description Default
nested_dict dict

the dict to be flattened

required
join_char str

the character(s) to use when joining nested keys to form a single key

'.'
exclude_keys list

list of keys to exclude when flatting the dict

None
exact_keys bool

whether the excluded list of keys contains the exact flattened key, e.g. for {"one":{"two":{"three":3}} the exact key would be one.two or if it should exclude all keys regardless of parent

False
_parent_key str

the string that keeps track of all the nested keys, for the initial use this should be an empty string, which is the default

''

Returns:

Name Type Description
dict dict[str, Any]

a flattened dict

Source code in wg_utilities/functions/_functions.py
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
def flatten_dict(
    nested_dict: dict[str, object],
    *,
    join_char: str = ".",
    exclude_keys: list[str] | None = None,
    exact_keys: bool = False,
    _parent_key: str = "",
) -> dict[str, Any]:
    """Flatten a nested dictionary into a single level dictionary.

    This function recursively traverses a dictionary and flattens any nested JSON
    so the resultant dict has no values of type dict. This allows for easier processing
    into Redshift

    Examples:
        >>> flatten_dict(
        ...     {
        ...         "one": 1,
        ...         "two": {
        ...             "three": 3,
        ...             "four": 4,
        ...         },
        ...         "five": {"six": 6},
        ...     },
        ...     join_char="-",
        ...     exclude_keys=["five"],
        ... )
        {
            "one": 1,
            "two-three": 3,
            "two-four": 4,
            "five": {"six": 6}
        }

        >>> flatten_dict(
        ...     {
        ...         "one": 1,
        ...         "two": {
        ...             "three": 3,
        ...             "four": 4,
        ...         },
        ...         "five": {"two": {"six": 6}},
        ...     },
        ...     join_char="-",
        ...     exclude_keys=["five-two"],
        ...     exact_keys=True,
        ... )

    Args:
        nested_dict (dict): the dict to be flattened
        join_char (str): the character(s) to use when joining nested keys to form a
            single key
        exclude_keys (list): list of keys to exclude when flatting the dict
        exact_keys (bool): whether the excluded list of keys contains the exact
            flattened key, e.g. for `{"one":{"two":{"three":3}}` the exact key would be
            `one.two` or if it should exclude all keys regardless of parent
        _parent_key (str): the string that keeps track of all the nested keys,
            for the initial use this should be an empty string, which is the
            default

    Returns:
        dict: a flattened dict
    """
    output = {}

    for k, v in nested_dict.items():
        new_parent_key = (
            k if not _parent_key or (not exact_keys) else join_char.join([_parent_key, k])
        )
        if (
            isinstance(v, dict)
            and len(v) > 0
            and new_parent_key not in (e_keys := exclude_keys or [])
        ):
            output.update(
                {
                    join_char.join([str(k), str(k2)]): v2
                    for k2, v2 in flatten_dict(
                        v,
                        join_char=join_char,
                        exclude_keys=e_keys,
                        exact_keys=exact_keys,
                        _parent_key=new_parent_key,
                    ).items()
                },
            )
        else:
            output[k] = v

    return output

force_mkdir(target_path, *, path_is_file=False)

Create all directories needed for the given path.

Parameters:

Name Type Description Default
target_path str

the path to the directory which needs to be created

required
path_is_file bool

flag for whether the path is for a file, in which case the final part of the path will not be created

False

Returns:

Name Type Description
str Path

directory_path that was passed in

Source code in wg_utilities/functions/file_management.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def force_mkdir(target_path: Path, *, path_is_file: bool = False) -> Path:
    """Create all directories needed for the given path.

    Args:
        target_path (str): the path to the directory which needs to be created
        path_is_file (bool): flag for whether the path is for a file, in which case
            the final part of the path will not be created

    Returns:
        str: directory_path that was passed in
    """
    if path_is_file:
        target_path.parent.mkdir(exist_ok=True, parents=True)
    else:
        target_path.mkdir(exist_ok=True, parents=True)

    return target_path

process_json_object(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)

Generic entry point to process dicts and/or lists.

Raises:

Type Description
InvalidJsonObjectError

if an invalid JSON object/array is passed

Source code in wg_utilities/functions/json.py
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
def process_json_object(
    obj: JSONObj | JSONArr,
    /,
    *,
    target_type: type[V] | tuple[type[V], ...],
    target_processor_func: TargetProcessorFunc[V],
    pass_on_fail: bool = True,
    log_op_func_failures: bool = False,
    single_keys_to_remove: Sequence[str] | None = None,
) -> None:
    """Generic entry point to process dicts and/or lists.

    Raises:
        InvalidJsonObjectError: if an invalid JSON object/array is passed
    """

    if isinstance(obj, dict):
        traverse_dict(
            obj,
            target_type=target_type,
            target_processor_func=target_processor_func,
            pass_on_fail=pass_on_fail,
            log_op_func_failures=log_op_func_failures,
            single_keys_to_remove=single_keys_to_remove,
        )
    elif isinstance(obj, list):
        process_list(
            obj,
            target_type=target_type,
            target_processor_func=target_processor_func,
            pass_on_fail=pass_on_fail,
            log_op_func_failures=log_op_func_failures,
            single_keys_to_remove=single_keys_to_remove,
        )
    else:
        raise InvalidJsonObjectError(obj)

process_list(lst, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)

Iterate through a list, applying list_op_func to any target_type instances.

This is used in close conjunction with process_dict to recursively process a JSON object and apply a given function to any values of a given type across the whole object.

Failures in the given function can be ignored by setting pass_on_fail to True, and/or logged by setting log_op_func_failures to True. If both are set to True, then the function will log the failure and then continue.

Parameters:

Name Type Description Default
lst list

the list to iterate through

required
target_type type

the target type to apply functions to

required
target_processor_func Callable

a function to apply to instances of target_type

required
pass_on_fail bool

ignore failure in either op function

True
log_op_func_failures bool

log any failures in either op function

False
single_keys_to_remove list

a list of keys that can be "expanded" up to the parent key

None

Raises:

Type Description
Exception

if the target_processor_func fails and pass_on_fail is False

Source code in wg_utilities/functions/json.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
def process_list(
    lst: list[JSONVal],
    /,
    *,
    target_type: type[V] | tuple[type[V], ...],
    target_processor_func: TargetProcessorFunc[V],
    pass_on_fail: bool = True,
    log_op_func_failures: bool = False,
    single_keys_to_remove: Sequence[str] | None = None,
) -> None:
    """Iterate through a list, applying `list_op_func` to any `target_type` instances.

    This is used in close conjunction with `process_dict` to recursively process
    a JSON object and apply a given function to any values of a given type across the
    whole object.

    Failures in the given function can be ignored by setting `pass_on_fail` to `True`,
    and/or logged by setting `log_op_func_failures` to `True`. If both are set to
    `True`, then the function will log the failure and then continue.

    Args:
        lst (list): the list to iterate through
        target_type (type): the target type to apply functions to
        target_processor_func (Callable): a function to apply to instances of `target_type`
        pass_on_fail (bool): ignore failure in either op function
        log_op_func_failures (bool): log any failures in either op function
        single_keys_to_remove (list): a list of keys that can be "expanded" up to the parent key

    Raises:
        Exception: if the `target_processor_func` fails and `pass_on_fail` is False
    """
    for i, elem in enumerate(lst):
        if isinstance(elem, target_type):
            try:
                lst[i] = target_processor_func(cast(V, elem), list_index=i)
            except Exception:
                if log_op_func_failures:
                    LOGGER.exception("Unable to process item at index %i", i)

                if not pass_on_fail:
                    raise

        # If the new(?) value is a dict/list, then it needs to be processed
        # before continuing to the next elem in this list
        if isinstance(lst[i], dict | list):
            process_json_object(
                lst[i],  # type: ignore[arg-type]
                target_type=target_type,
                target_processor_func=target_processor_func,
                pass_on_fail=pass_on_fail,
                log_op_func_failures=log_op_func_failures,
                single_keys_to_remove=single_keys_to_remove,
            )

run_cmd(cmd, *, exit_on_error=True, shell=False)

Run commands on the command line.

Parameters:

Name Type Description Default
cmd str

the command to run in the user's terminal

required
exit_on_error bool

flag for if the script should exit if the command errored

True
shell bool

flag for running command in shell

False

Returns:

Name Type Description
str str

the output of the command

str str

the error from the command, if it errored

Raises:

Type Description
RuntimeError

if the command has a non-zero exit code

Source code in wg_utilities/functions/processes.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def run_cmd(
    cmd: str,
    *,
    exit_on_error: bool = True,
    shell: bool = False,
) -> tuple[str, str]:
    """Run commands on the command line.

    Args:
        cmd (str): the command to run in the user's terminal
        exit_on_error (bool): flag for if the script should exit if the command errored
        shell (bool): flag for running command in shell

    Returns:
        str: the output of the command
        str: the error from the command, if it errored

    Raises:
        RuntimeError: if the command has a non-zero exit code
    """

    LOGGER.debug("Running command `%s`", cmd)

    popen_input = cmd if shell else COMMAND_PATTERN.split(cmd)[1::2]

    with Popen(
        popen_input,
        stdout=PIPE,
        stderr=PIPE,
        shell=shell,  # noqa: S603
    ) as process:
        output, error = process.communicate()

        error_str = error.decode("utf-8").strip()

        if process.returncode != 0:
            if exit_on_error:
                raise RuntimeError(error_str)

            LOGGER.error(error_str)  # pragma: no cover

    return output.decode("utf-8").strip(), error_str

set_nested_value(*, json_obj, keys, target_value, final_key=None)

Update a nested value in a dictionary.

Parameters:

Name Type Description Default
json_obj dict

the JSON object to update

required
keys list

a list of keys used to traverse the dictionary

required
target_value Any

the value to set at the given location/path

required
final_key str

the final key, the value of which we're actually setting

None
Source code in wg_utilities/functions/json.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def set_nested_value(
    *,
    json_obj: dict[Any, Any],
    keys: list[str],
    target_value: Any,
    final_key: str | None = None,
) -> None:
    """Update a nested value in a dictionary.

    Args:
        json_obj (dict): the JSON object to update
        keys (list): a list of keys used to traverse the dictionary
        target_value (Any): the value to set at the given location/path
        final_key (str): the final key, the value of which we're actually setting
    """

    final_key = final_key or keys.pop()

    if len(keys) > 0:
        set_nested_value(
            json_obj=json_obj.get(keys.pop(0), {}),
            keys=keys,
            target_value=target_value,
            final_key=final_key,
        )
    else:
        json_obj[final_key] = target_value

subclasses_recursive(typ, /, *, class_filter=None, track_visited=False, __visited=None)

Get all subclasses of a class recursively.

Parameters:

Name Type Description Default
typ type

the class to get the subclasses of

required
class_filter None

a function to filter the subclasses

None
track_visited bool

whether to track visited subclasses. Useful for avoiding infinite loops. Defaults to False.

False

Yields:

Name Type Description
type type[Any]

a subclass of the given class

Source code in wg_utilities/functions/subclasses.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def subclasses_recursive(
    typ: type[Any],
    /,
    *,
    class_filter: None | Callable[[type[Any]], bool] = None,
    track_visited: bool = False,
    __visited: set[type[Any]] | None = None,
) -> Generator[type[Any], None, None]:
    """Get all subclasses of a class recursively.

    Args:
        typ (type): the class to get the subclasses of
        class_filter (None, optional): a function to filter the subclasses
        track_visited (bool, optional): whether to track visited subclasses. Useful for avoiding
            infinite loops. Defaults to False.

    Yields:
        type: a subclass of the given class
    """

    for subclass in typ.__subclasses__():
        if track_visited:
            __visited = __visited or set()
            if subclass in __visited:
                continue

            __visited.add(subclass)

        if class_filter is None or class_filter(subclass):
            yield subclass

        yield from subclasses_recursive(
            subclass,
            class_filter=class_filter,
            track_visited=track_visited,
            __visited=__visited,  # type: ignore[call-arg]
        )

traverse_dict(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)

Traverse dict, applyingtarget_processor_func to any values of type target_type.

Parameters:

Name Type Description Default
obj dict

the JSON object to traverse

required
target_type type

the target type to apply functions to

required
target_processor_func Callable

a function to apply to instances of target_type

required
pass_on_fail bool

ignore failure in either op function

True
log_op_func_failures bool

log any failures in either op function

False
single_keys_to_remove list

a list of keys that can be "expanded" up to the parent key from a dict of length one, e.g.: ... { ... "parent_1": "something", ... "parent_2": { ... "uselessKey": "actual value" ... } ... } would go to ... { ... "parent_1": "something", ... "parent_2": "actual value" ... }

None

Raises:

Type Description
Exception

if the target_processor_func fails and pass_on_fail is False

Source code in wg_utilities/functions/json.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
def traverse_dict(  # noqa: PLR0912
    obj: JSONObj,
    /,
    *,
    target_type: type[V] | tuple[type[V], ...],
    target_processor_func: TargetProcessorFunc[V],
    pass_on_fail: bool = True,
    log_op_func_failures: bool = False,
    single_keys_to_remove: Sequence[str] | None = None,
) -> None:
    """Traverse dict, applying`target_processor_func` to any values of type `target_type`.

    Args:
        obj (dict): the JSON object to traverse
        target_type (type): the target type to apply functions to
        target_processor_func (Callable): a function to apply to instances of `target_type`
        pass_on_fail (bool): ignore failure in either op function
        log_op_func_failures (bool): log any failures in either op function
        single_keys_to_remove (list): a list of keys that can be "expanded" up to the parent key from a dict of
            length one, e.g.:
            ... {
            ...     "parent_1": "something",
            ...     "parent_2": {
            ...         "uselessKey": "actual value"
            ...     }
            ... }
            would go to
            ... {
            ...     "parent_1": "something",
            ...     "parent_2": "actual value"
            ... }

    Raises:
        Exception: if the `target_processor_func` fails and `pass_on_fail` is False
    """
    for k, v in obj.items():
        if isinstance(v, target_type):
            try:
                obj.update({k: target_processor_func(cast(V, v), dict_key=k)})
                if isinstance(obj[k], dict):
                    traverse_dict(
                        # If a dict has been created from a non-dict type (e.g. `loads("{...}")`,
                        # then we need to traverse the current object again, as the new dict may
                        # contain more instances of `target_type`. Otherwise, traverse
                        # the dict (that already existed).
                        obj if target_type is not dict else cast(JSONObj, obj[k]),
                        target_type=target_type,
                        target_processor_func=target_processor_func,
                        pass_on_fail=pass_on_fail,
                        log_op_func_failures=log_op_func_failures,
                        single_keys_to_remove=single_keys_to_remove,
                    )
            except Exception:
                if log_op_func_failures:
                    LOGGER.exception("Unable to process item with key %s", k)
                if not pass_on_fail:
                    raise

            continue

        if isinstance(v, dict):
            matched_single_key = False
            if (
                len(v) == 1
                and single_keys_to_remove is not None
                and (only_key := next(iter(v.keys()))) in single_keys_to_remove
            ):
                matched_single_key = True
                if isinstance(value := v.get(only_key), target_type):
                    try:
                        value = target_processor_func(cast(V, value), dict_key=only_key)
                    except Exception:
                        if log_op_func_failures:
                            LOGGER.exception(
                                "Unable to process item with key %s",
                                k,
                            )
                        if not pass_on_fail:
                            raise

                if isinstance(value, dict):
                    # Wrap the value, so that if the top level key is one
                    # of `single_keys_to_remove` then it's processed
                    # correctly
                    tmp_wrapper: JSONObj = {"-": value}
                    traverse_dict(
                        tmp_wrapper,
                        target_type=target_type,
                        target_processor_func=target_processor_func,
                        pass_on_fail=pass_on_fail,
                        log_op_func_failures=log_op_func_failures,
                        single_keys_to_remove=single_keys_to_remove,
                    )

                    value = tmp_wrapper["-"]

                obj[k] = value

            if not matched_single_key:
                traverse_dict(
                    v,
                    target_type=target_type,
                    target_processor_func=target_processor_func,
                    pass_on_fail=pass_on_fail,
                    log_op_func_failures=log_op_func_failures,
                    single_keys_to_remove=single_keys_to_remove,
                )

            continue

        if isinstance(v, list):
            process_list(
                v,
                target_type=target_type,
                target_processor_func=target_processor_func,
                pass_on_fail=pass_on_fail,
                log_op_func_failures=log_op_func_failures,
                single_keys_to_remove=single_keys_to_remove,
            )

try_float(v, default=0.0)

Try to cast a value to a float, and returns a default if it fails.

Examples:

>>> try_float("12.34")
12.34
>>> try_float("ABC", -1)
-1
>>> try_float(1.2, 10)
1.2

Parameters:

Name Type Description Default
v Union[str, bytes, bytearray, SupportsFloat, _SupportsIndex]

The value to be cast to a float

required
default object

The value to be returned if the casting fails

0.0

Returns:

Name Type Description
float object

The value passed in, in float format, or the default

Source code in wg_utilities/functions/_functions.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def try_float(v: Any, default: Any = 0.0) -> object:
    """Try to cast a value to a float, and returns a default if it fails.

    Examples:
        >>> try_float("12.34")
        12.34

        >>> try_float("ABC", -1)
        -1

        >>> try_float(1.2, 10)
        1.2

    Args:
        v (Union[str, bytes, bytearray, SupportsFloat, _SupportsIndex]): The
            value to be cast to a float
        default (object): The value to be returned if the casting fails

    Returns:
        float: The value passed in, in float format, or the default
    """

    try:
        return float(v)
    except (ValueError, TypeError):
        return default

user_data_dir(*, project_name='WgUtilities', file_name=None, _platform=platform)

Get OS specific data directory path.

Typical user data directories are

macOS: ~/Library/Application Support Unix: ~/.local/share # or in $XDG_DATA_HOME, if defined Win 10: C:\Users\\AppData\Local

For Unix, we follow the XDG spec and support $XDG_DATA_HOME if defined.

Parameters:

Name Type Description Default
project_name str

the name of the project which the utils are running in

'WgUtilities'
file_name Optional[str]

file to be fetched from the data dir

None
_platform str

the platform to get the data dir for

platform

Returns:

Name Type Description
str Path

full path to the user-specific data dir

Source code in wg_utilities/functions/file_management.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def user_data_dir(
    *,
    project_name: str = "WgUtilities",
    file_name: str | None = None,
    _platform: str = platform,
) -> Path:
    r"""Get OS specific data directory path.

    Typical user data directories are:
        macOS:    ~/Library/Application Support
        Unix:     ~/.local/share   # or in $XDG_DATA_HOME, if defined
        Win 10:   C:\\Users\\<username>\\AppData\\Local

    For Unix, we follow the XDG spec and support $XDG_DATA_HOME if defined.

    Args:
        project_name (str): the name of the project which the utils are running in
        file_name (Optional[str]): file to be fetched from the data dir
        _platform (str): the platform to get the data dir for

    Returns:
        str: full path to the user-specific data dir
    """

    # get os specific path
    if _platform.startswith("win"):
        os_path = environ["LOCALAPPDATA"]
    elif _platform.startswith("darwin"):
        os_path = "~/Library/Application Support"
    else:
        # linux
        os_path = getenv("XDG_DATA_HOME", "~/.local/share")

    path = Path(os_path) / project_name

    if file_name:
        return force_mkdir(path.expanduser() / file_name, path_is_file=True)

    return path.expanduser()

utcnow(unit=None)

datetime.utcnow with optional unit conversion.

Gets the current UTC time and returns it in a chosen unit. If no unit is provided then it is just returned as a datetime

Parameters:

Name Type Description Default
unit DatetimeFixedUnit

the unit in which to provide the current datetime

None

Returns:

Name Type Description
Union [datetime, int]

the current UTC datetime in the chosen unit

Source code in wg_utilities/functions/datetime_helpers.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def utcnow(unit: DatetimeFixedUnit | None = None) -> datetime | int:
    """`datetime.utcnow` with optional unit conversion.

    Gets the current UTC time and returns it in a chosen unit. If no unit is
    provided then it is just returned as a datetime

    Args:
        unit (DatetimeFixedUnit): the unit in which to provide the current datetime

    Returns:
        Union([datetime, int]): the current UTC datetime in the chosen unit
    """

    if not unit:
        return datetime.now(UTC)

    return int(datetime.now(UTC).timestamp() / unit.value)

datetime_helpers

Helper functions for all things date and time related.

DatetimeFixedUnit

Bases: Enum

Enum for fixed units of time (i.e. not a month or a year).

Values are in seconds.

Source code in wg_utilities/functions/datetime_helpers.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
class DatetimeFixedUnit(Enum):
    """Enum for fixed units of time (i.e. not a month or a year).

    Values are in seconds.
    """

    WEEK = 604800
    DAY = 86400
    HOUR = 3600
    MINUTE = 60
    SECOND = 1
    MILLISECOND = 1e-3
    MICROSECOND = 1e-6
    NANOSECOND = 1e-9

utcnow(unit=None)

datetime.utcnow with optional unit conversion.

Gets the current UTC time and returns it in a chosen unit. If no unit is provided then it is just returned as a datetime

Parameters:

Name Type Description Default
unit DatetimeFixedUnit

the unit in which to provide the current datetime

None

Returns:

Name Type Description
Union [datetime, int]

the current UTC datetime in the chosen unit

Source code in wg_utilities/functions/datetime_helpers.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def utcnow(unit: DatetimeFixedUnit | None = None) -> datetime | int:
    """`datetime.utcnow` with optional unit conversion.

    Gets the current UTC time and returns it in a chosen unit. If no unit is
    provided then it is just returned as a datetime

    Args:
        unit (DatetimeFixedUnit): the unit in which to provide the current datetime

    Returns:
        Union([datetime, int]): the current UTC datetime in the chosen unit
    """

    if not unit:
        return datetime.now(UTC)

    return int(datetime.now(UTC).timestamp() / unit.value)

decorators

Custom decorators.

backoff(exceptions=Exception, /, logger=None, *, max_tries=10, max_delay=60, timeout=3600)

Apply an exponential backoff to the decorated function.

The function will be called until it succeeds, the maximum number of tries attempted, or up to 24 hours (configurable via timeout).

** Be Careful! ** Setting max_tries max_delay, and timeout to 0 will retry as fast as possible for a whole day! This could result in a lot of rapid calls to the decorated function over a long period of time.

Parameters:

Name Type Description Default
exceptions type[Exception] | tuple[type[Exception], ...]

the exception(s) to catch

Exception
logger Logger

optional logger for logging the exception

None
max_tries int

the maximum number of tries. Setting to 0 will retry forever.

10
max_delay int

the maximum delay in seconds between tries. Setting to 0 will retry as fast as possible.

60
timeout int

the maximum time to wait for the decorated function to complete, in seconds. Setting to 0 will retry for a whole day. Defaults to 1 hour.

3600

Returns:

Name Type Description
Callable Callable[[Callable[P, R]], Callable[P, R]]

the actual decorator

Source code in wg_utilities/functions/decorators.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def backoff(
    exceptions: type[Exception] | tuple[type[Exception], ...] = Exception,
    /,
    logger: Logger | None = None,
    *,
    max_tries: int = 10,
    max_delay: int = 60,
    timeout: int = 3600,
) -> Callable[[Callable[P, R]], Callable[P, R]]:
    """Apply an exponential backoff to the decorated function.

    The function will be called until it succeeds, the maximum number of tries
    attempted, or up to 24 hours (configurable via `timeout`).

    ** Be Careful! **
    Setting max_tries max_delay, and timeout to 0 will retry as fast as possible for a whole day!
    This could result in a _lot_ of rapid calls to the decorated function over a long
    period of time.

    Args:
        exceptions (type[Exception] | tuple[type[Exception], ...]): the exception(s) to catch
        logger (Logger): optional logger for logging the exception
        max_tries (int): the maximum number of tries. Setting to 0 will retry forever.
        max_delay (int): the maximum delay in seconds between tries. Setting to 0 will
            retry as fast as possible.
        timeout (int): the maximum time to wait for the decorated function to complete,
            in seconds. Setting to 0 will retry for a whole day. Defaults to 1 hour.

    Returns:
        Callable: the actual decorator
    """

    timeout = 86400 if timeout <= 0 else min(timeout, 86400)

    def _decorator(func: Callable[P, R]) -> Callable[P, R]:
        """Apply an exponential backoff to the decorated function.

        The function will be called until it succeeds or the maximum number of tries,
        with an exponential delay between tries (up to the maximum delay).

        Args:
            func (Callable): the function being wrapped

        Returns:
            Callable: the inner function
        """

        @wraps(func)
        def worker(*args: P.args, **kwargs: P.kwargs) -> R:
            """Try to run the decorated function and calls the callback function.

            Args:
                *args (Any): any args passed to the inner func
                **kwargs (Any): any kwargs passed to the inner func

            Returns:
                Any: the result of the wrapped function

            Raises:
                Exception: any exception from the decorated function
            """

            start_time = time()

            tries = 0
            delay = 0.1
            while True:
                try:
                    return func(*args, **kwargs)
                except exceptions as exc:  # noqa: PERF203
                    if logger is not None:
                        logger.warning(
                            "Exception caught in backoff decorator (attempt %i/%i, waiting for %fs): %s %s",
                            tries,
                            max_tries,
                            delay,
                            type(exc).__name__,
                            exc,
                        )
                    tries += 1

                    if 0 < max_tries <= tries or (timeout <= (time() - start_time)):
                        raise

                    sleep(delay)
                    delay = min(delay * (2 + random()), max_delay)  # noqa: S311

        return worker

    return _decorator

file_management

Set of functions for specifically managing files and directories.

force_mkdir(target_path, *, path_is_file=False)

Create all directories needed for the given path.

Parameters:

Name Type Description Default
target_path str

the path to the directory which needs to be created

required
path_is_file bool

flag for whether the path is for a file, in which case the final part of the path will not be created

False

Returns:

Name Type Description
str Path

directory_path that was passed in

Source code in wg_utilities/functions/file_management.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def force_mkdir(target_path: Path, *, path_is_file: bool = False) -> Path:
    """Create all directories needed for the given path.

    Args:
        target_path (str): the path to the directory which needs to be created
        path_is_file (bool): flag for whether the path is for a file, in which case
            the final part of the path will not be created

    Returns:
        str: directory_path that was passed in
    """
    if path_is_file:
        target_path.parent.mkdir(exist_ok=True, parents=True)
    else:
        target_path.mkdir(exist_ok=True, parents=True)

    return target_path

user_data_dir(*, project_name='WgUtilities', file_name=None, _platform=platform)

Get OS specific data directory path.

Typical user data directories are

macOS: ~/Library/Application Support Unix: ~/.local/share # or in $XDG_DATA_HOME, if defined Win 10: C:\Users\\AppData\Local

For Unix, we follow the XDG spec and support $XDG_DATA_HOME if defined.

Parameters:

Name Type Description Default
project_name str

the name of the project which the utils are running in

'WgUtilities'
file_name Optional[str]

file to be fetched from the data dir

None
_platform str

the platform to get the data dir for

platform

Returns:

Name Type Description
str Path

full path to the user-specific data dir

Source code in wg_utilities/functions/file_management.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def user_data_dir(
    *,
    project_name: str = "WgUtilities",
    file_name: str | None = None,
    _platform: str = platform,
) -> Path:
    r"""Get OS specific data directory path.

    Typical user data directories are:
        macOS:    ~/Library/Application Support
        Unix:     ~/.local/share   # or in $XDG_DATA_HOME, if defined
        Win 10:   C:\\Users\\<username>\\AppData\\Local

    For Unix, we follow the XDG spec and support $XDG_DATA_HOME if defined.

    Args:
        project_name (str): the name of the project which the utils are running in
        file_name (Optional[str]): file to be fetched from the data dir
        _platform (str): the platform to get the data dir for

    Returns:
        str: full path to the user-specific data dir
    """

    # get os specific path
    if _platform.startswith("win"):
        os_path = environ["LOCALAPPDATA"]
    elif _platform.startswith("darwin"):
        os_path = "~/Library/Application Support"
    else:
        # linux
        os_path = getenv("XDG_DATA_HOME", "~/.local/share")

    path = Path(os_path) / project_name

    if file_name:
        return force_mkdir(path.expanduser() / file_name, path_is_file=True)

    return path.expanduser()

json

Useful functions for working with JSON/dictionaries.

InvalidJsonObjectError

Bases: Exception

Raised when an invalid JSON object/array is passed to process_json_object.

Source code in wg_utilities/functions/json.py
249
250
251
252
253
254
255
256
class InvalidJsonObjectError(Exception):
    """Raised when an invalid JSON object/array is passed to `process_json_object`."""

    def __init__(self, obj: Any) -> None:
        """Initialize the exception."""
        super().__init__(
            f"Input object must be a dict or list, not {type(obj)!r}",
        )

__init__(obj)

Initialize the exception.

Source code in wg_utilities/functions/json.py
252
253
254
255
256
def __init__(self, obj: Any) -> None:
    """Initialize the exception."""
    super().__init__(
        f"Input object must be a dict or list, not {type(obj)!r}",
    )

TargetProcessorFunc

Bases: Protocol[V_contra]

Typing protocol for the user-defined function passed into the below functions.

Source code in wg_utilities/functions/json.py
60
61
62
63
64
65
66
67
68
69
70
class TargetProcessorFunc(Protocol[V_contra]):
    """Typing protocol for the user-defined function passed into the below functions."""

    def __call__(
        self,
        value: V_contra,
        *,
        dict_key: str | None = None,
        list_index: int | None = None,
    ) -> JSONVal:
        """The function to be called on each value in the JSON object."""

__call__(value, *, dict_key=None, list_index=None)

The function to be called on each value in the JSON object.

Source code in wg_utilities/functions/json.py
63
64
65
66
67
68
69
70
def __call__(
    self,
    value: V_contra,
    *,
    dict_key: str | None = None,
    list_index: int | None = None,
) -> JSONVal:
    """The function to be called on each value in the JSON object."""

process_json_object(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)

Generic entry point to process dicts and/or lists.

Raises:

Type Description
InvalidJsonObjectError

if an invalid JSON object/array is passed

Source code in wg_utilities/functions/json.py
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
def process_json_object(
    obj: JSONObj | JSONArr,
    /,
    *,
    target_type: type[V] | tuple[type[V], ...],
    target_processor_func: TargetProcessorFunc[V],
    pass_on_fail: bool = True,
    log_op_func_failures: bool = False,
    single_keys_to_remove: Sequence[str] | None = None,
) -> None:
    """Generic entry point to process dicts and/or lists.

    Raises:
        InvalidJsonObjectError: if an invalid JSON object/array is passed
    """

    if isinstance(obj, dict):
        traverse_dict(
            obj,
            target_type=target_type,
            target_processor_func=target_processor_func,
            pass_on_fail=pass_on_fail,
            log_op_func_failures=log_op_func_failures,
            single_keys_to_remove=single_keys_to_remove,
        )
    elif isinstance(obj, list):
        process_list(
            obj,
            target_type=target_type,
            target_processor_func=target_processor_func,
            pass_on_fail=pass_on_fail,
            log_op_func_failures=log_op_func_failures,
            single_keys_to_remove=single_keys_to_remove,
        )
    else:
        raise InvalidJsonObjectError(obj)

process_list(lst, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)

Iterate through a list, applying list_op_func to any target_type instances.

This is used in close conjunction with process_dict to recursively process a JSON object and apply a given function to any values of a given type across the whole object.

Failures in the given function can be ignored by setting pass_on_fail to True, and/or logged by setting log_op_func_failures to True. If both are set to True, then the function will log the failure and then continue.

Parameters:

Name Type Description Default
lst list

the list to iterate through

required
target_type type

the target type to apply functions to

required
target_processor_func Callable

a function to apply to instances of target_type

required
pass_on_fail bool

ignore failure in either op function

True
log_op_func_failures bool

log any failures in either op function

False
single_keys_to_remove list

a list of keys that can be "expanded" up to the parent key

None

Raises:

Type Description
Exception

if the target_processor_func fails and pass_on_fail is False

Source code in wg_utilities/functions/json.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
def process_list(
    lst: list[JSONVal],
    /,
    *,
    target_type: type[V] | tuple[type[V], ...],
    target_processor_func: TargetProcessorFunc[V],
    pass_on_fail: bool = True,
    log_op_func_failures: bool = False,
    single_keys_to_remove: Sequence[str] | None = None,
) -> None:
    """Iterate through a list, applying `list_op_func` to any `target_type` instances.

    This is used in close conjunction with `process_dict` to recursively process
    a JSON object and apply a given function to any values of a given type across the
    whole object.

    Failures in the given function can be ignored by setting `pass_on_fail` to `True`,
    and/or logged by setting `log_op_func_failures` to `True`. If both are set to
    `True`, then the function will log the failure and then continue.

    Args:
        lst (list): the list to iterate through
        target_type (type): the target type to apply functions to
        target_processor_func (Callable): a function to apply to instances of `target_type`
        pass_on_fail (bool): ignore failure in either op function
        log_op_func_failures (bool): log any failures in either op function
        single_keys_to_remove (list): a list of keys that can be "expanded" up to the parent key

    Raises:
        Exception: if the `target_processor_func` fails and `pass_on_fail` is False
    """
    for i, elem in enumerate(lst):
        if isinstance(elem, target_type):
            try:
                lst[i] = target_processor_func(cast(V, elem), list_index=i)
            except Exception:
                if log_op_func_failures:
                    LOGGER.exception("Unable to process item at index %i", i)

                if not pass_on_fail:
                    raise

        # If the new(?) value is a dict/list, then it needs to be processed
        # before continuing to the next elem in this list
        if isinstance(lst[i], dict | list):
            process_json_object(
                lst[i],  # type: ignore[arg-type]
                target_type=target_type,
                target_processor_func=target_processor_func,
                pass_on_fail=pass_on_fail,
                log_op_func_failures=log_op_func_failures,
                single_keys_to_remove=single_keys_to_remove,
            )

set_nested_value(*, json_obj, keys, target_value, final_key=None)

Update a nested value in a dictionary.

Parameters:

Name Type Description Default
json_obj dict

the JSON object to update

required
keys list

a list of keys used to traverse the dictionary

required
target_value Any

the value to set at the given location/path

required
final_key str

the final key, the value of which we're actually setting

None
Source code in wg_utilities/functions/json.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def set_nested_value(
    *,
    json_obj: dict[Any, Any],
    keys: list[str],
    target_value: Any,
    final_key: str | None = None,
) -> None:
    """Update a nested value in a dictionary.

    Args:
        json_obj (dict): the JSON object to update
        keys (list): a list of keys used to traverse the dictionary
        target_value (Any): the value to set at the given location/path
        final_key (str): the final key, the value of which we're actually setting
    """

    final_key = final_key or keys.pop()

    if len(keys) > 0:
        set_nested_value(
            json_obj=json_obj.get(keys.pop(0), {}),
            keys=keys,
            target_value=target_value,
            final_key=final_key,
        )
    else:
        json_obj[final_key] = target_value

traverse_dict(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)

Traverse dict, applyingtarget_processor_func to any values of type target_type.

Parameters:

Name Type Description Default
obj dict

the JSON object to traverse

required
target_type type

the target type to apply functions to

required
target_processor_func Callable

a function to apply to instances of target_type

required
pass_on_fail bool

ignore failure in either op function

True
log_op_func_failures bool

log any failures in either op function

False
single_keys_to_remove list

a list of keys that can be "expanded" up to the parent key from a dict of length one, e.g.: ... { ... "parent_1": "something", ... "parent_2": { ... "uselessKey": "actual value" ... } ... } would go to ... { ... "parent_1": "something", ... "parent_2": "actual value" ... }

None

Raises:

Type Description
Exception

if the target_processor_func fails and pass_on_fail is False

Source code in wg_utilities/functions/json.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
def traverse_dict(  # noqa: PLR0912
    obj: JSONObj,
    /,
    *,
    target_type: type[V] | tuple[type[V], ...],
    target_processor_func: TargetProcessorFunc[V],
    pass_on_fail: bool = True,
    log_op_func_failures: bool = False,
    single_keys_to_remove: Sequence[str] | None = None,
) -> None:
    """Traverse dict, applying`target_processor_func` to any values of type `target_type`.

    Args:
        obj (dict): the JSON object to traverse
        target_type (type): the target type to apply functions to
        target_processor_func (Callable): a function to apply to instances of `target_type`
        pass_on_fail (bool): ignore failure in either op function
        log_op_func_failures (bool): log any failures in either op function
        single_keys_to_remove (list): a list of keys that can be "expanded" up to the parent key from a dict of
            length one, e.g.:
            ... {
            ...     "parent_1": "something",
            ...     "parent_2": {
            ...         "uselessKey": "actual value"
            ...     }
            ... }
            would go to
            ... {
            ...     "parent_1": "something",
            ...     "parent_2": "actual value"
            ... }

    Raises:
        Exception: if the `target_processor_func` fails and `pass_on_fail` is False
    """
    for k, v in obj.items():
        if isinstance(v, target_type):
            try:
                obj.update({k: target_processor_func(cast(V, v), dict_key=k)})
                if isinstance(obj[k], dict):
                    traverse_dict(
                        # If a dict has been created from a non-dict type (e.g. `loads("{...}")`,
                        # then we need to traverse the current object again, as the new dict may
                        # contain more instances of `target_type`. Otherwise, traverse
                        # the dict (that already existed).
                        obj if target_type is not dict else cast(JSONObj, obj[k]),
                        target_type=target_type,
                        target_processor_func=target_processor_func,
                        pass_on_fail=pass_on_fail,
                        log_op_func_failures=log_op_func_failures,
                        single_keys_to_remove=single_keys_to_remove,
                    )
            except Exception:
                if log_op_func_failures:
                    LOGGER.exception("Unable to process item with key %s", k)
                if not pass_on_fail:
                    raise

            continue

        if isinstance(v, dict):
            matched_single_key = False
            if (
                len(v) == 1
                and single_keys_to_remove is not None
                and (only_key := next(iter(v.keys()))) in single_keys_to_remove
            ):
                matched_single_key = True
                if isinstance(value := v.get(only_key), target_type):
                    try:
                        value = target_processor_func(cast(V, value), dict_key=only_key)
                    except Exception:
                        if log_op_func_failures:
                            LOGGER.exception(
                                "Unable to process item with key %s",
                                k,
                            )
                        if not pass_on_fail:
                            raise

                if isinstance(value, dict):
                    # Wrap the value, so that if the top level key is one
                    # of `single_keys_to_remove` then it's processed
                    # correctly
                    tmp_wrapper: JSONObj = {"-": value}
                    traverse_dict(
                        tmp_wrapper,
                        target_type=target_type,
                        target_processor_func=target_processor_func,
                        pass_on_fail=pass_on_fail,
                        log_op_func_failures=log_op_func_failures,
                        single_keys_to_remove=single_keys_to_remove,
                    )

                    value = tmp_wrapper["-"]

                obj[k] = value

            if not matched_single_key:
                traverse_dict(
                    v,
                    target_type=target_type,
                    target_processor_func=target_processor_func,
                    pass_on_fail=pass_on_fail,
                    log_op_func_failures=log_op_func_failures,
                    single_keys_to_remove=single_keys_to_remove,
                )

            continue

        if isinstance(v, list):
            process_list(
                v,
                target_type=target_type,
                target_processor_func=target_processor_func,
                pass_on_fail=pass_on_fail,
                log_op_func_failures=log_op_func_failures,
                single_keys_to_remove=single_keys_to_remove,
            )

processes

Set of functions for managing processes.

run_cmd(cmd, *, exit_on_error=True, shell=False)

Run commands on the command line.

Parameters:

Name Type Description Default
cmd str

the command to run in the user's terminal

required
exit_on_error bool

flag for if the script should exit if the command errored

True
shell bool

flag for running command in shell

False

Returns:

Name Type Description
str str

the output of the command

str str

the error from the command, if it errored

Raises:

Type Description
RuntimeError

if the command has a non-zero exit code

Source code in wg_utilities/functions/processes.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def run_cmd(
    cmd: str,
    *,
    exit_on_error: bool = True,
    shell: bool = False,
) -> tuple[str, str]:
    """Run commands on the command line.

    Args:
        cmd (str): the command to run in the user's terminal
        exit_on_error (bool): flag for if the script should exit if the command errored
        shell (bool): flag for running command in shell

    Returns:
        str: the output of the command
        str: the error from the command, if it errored

    Raises:
        RuntimeError: if the command has a non-zero exit code
    """

    LOGGER.debug("Running command `%s`", cmd)

    popen_input = cmd if shell else COMMAND_PATTERN.split(cmd)[1::2]

    with Popen(
        popen_input,
        stdout=PIPE,
        stderr=PIPE,
        shell=shell,  # noqa: S603
    ) as process:
        output, error = process.communicate()

        error_str = error.decode("utf-8").strip()

        if process.returncode != 0:
            if exit_on_error:
                raise RuntimeError(error_str)

            LOGGER.error(error_str)  # pragma: no cover

    return output.decode("utf-8").strip(), error_str

string_manipulation

Set of functions for string manipulation.

cleanse_string(value, *, whitespace_amount=None, preserve_newlines=False)

Remove all non-alphanumeric characters from a string.

Parameters:

Name Type Description Default
value str

the input string value

required
whitespace_amount int

the number of spaces to replace whitespace for in a string. Setting to 0 preserves all whitespace, 1 is a single space, and so on. Defaults to None, which will remove all whitespace.

None
preserve_newlines bool

whether to preserve newlines in the string.

False

Returns:

Name Type Description
str str

the cleansed string

Source code in wg_utilities/functions/string_manipulation.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def cleanse_string(
    value: str,
    *,
    whitespace_amount: int | None = None,
    preserve_newlines: bool = False,
) -> str:
    """Remove all non-alphanumeric characters from a string.

    Args:
        value (str): the input string value
        whitespace_amount (int, optional): the number of spaces to replace whitespace
            for in a string. Setting to 0 preserves all whitespace, 1 is a single space,
            and so on. Defaults to None, which will remove all whitespace.
        preserve_newlines (bool, optional): whether to preserve newlines in the string.

    Returns:
        str: the cleansed string
    """
    inner_pattern = "a-zA-Z0-9"

    if preserve_newlines:
        inner_pattern += "\n"

    if whitespace_amount is None:
        return sub(rf"[^{inner_pattern}]", "", value)

    if whitespace_amount == 0:
        return sub(rf"[^{inner_pattern}\s]", "", value)

    return sub(r"\s+", " " * whitespace_amount, sub(rf"[^{inner_pattern}\s]", "", value))

subclasses

Get all subclasses of a class recursively.

subclasses_recursive(typ, /, *, class_filter=None, track_visited=False, __visited=None)

Get all subclasses of a class recursively.

Parameters:

Name Type Description Default
typ type

the class to get the subclasses of

required
class_filter None

a function to filter the subclasses

None
track_visited bool

whether to track visited subclasses. Useful for avoiding infinite loops. Defaults to False.

False

Yields:

Name Type Description
type type[Any]

a subclass of the given class

Source code in wg_utilities/functions/subclasses.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def subclasses_recursive(
    typ: type[Any],
    /,
    *,
    class_filter: None | Callable[[type[Any]], bool] = None,
    track_visited: bool = False,
    __visited: set[type[Any]] | None = None,
) -> Generator[type[Any], None, None]:
    """Get all subclasses of a class recursively.

    Args:
        typ (type): the class to get the subclasses of
        class_filter (None, optional): a function to filter the subclasses
        track_visited (bool, optional): whether to track visited subclasses. Useful for avoiding
            infinite loops. Defaults to False.

    Yields:
        type: a subclass of the given class
    """

    for subclass in typ.__subclasses__():
        if track_visited:
            __visited = __visited or set()
            if subclass in __visited:
                continue

            __visited.add(subclass)

        if class_filter is None or class_filter(subclass):
            yield subclass

        yield from subclasses_recursive(
            subclass,
            class_filter=class_filter,
            track_visited=track_visited,
            __visited=__visited,  # type: ignore[call-arg]
        )

xml

Helper functions specifically for parsing/manipulating XML.

get_nsmap(*, root=None, xml_doc=None, warn_on_defaults=False)

Get the namespace map for an XML document.

Parameters:

Name Type Description Default
root Element

an lxml Element from an XML document

None
xml_doc str

a raw XML document

None
warn_on_defaults bool

log a warning when an empty prefix is found and converted to a default value

False

Returns:

Name Type Description
dict dict[str, str]

a namespace mapping for the provided XML

Raises:

Type Description
ValueError

if neither argument is provided

Source code in wg_utilities/functions/xml.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def get_nsmap(
    *,
    root: etree._Element | None = None,
    xml_doc: str | None = None,
    warn_on_defaults: bool = False,
) -> dict[str, str]:
    """Get the namespace map for an XML document.

    Args:
        root (Element): an lxml Element from an XML document
        xml_doc (str): a raw XML document
        warn_on_defaults (bool): log a warning when an empty prefix is found and converted to a default value

    Returns:
        dict: a namespace mapping for the provided XML

    Raises:
        ValueError: if neither argument is provided
    """
    if root is None:
        if xml_doc is None:
            raise ValueError("One of `root` or `xml_doc` should be non-null")

        root = etree.fromstring(xml_doc.encode())  # noqa: S320

    nsmap = {}
    default_count = 0
    processed_urls = set()

    prefix: str
    url: str
    for prefix, url in root.xpath(  # type: ignore[misc,assignment,union-attr]
        "//namespace::*",
    ):
        if url in processed_urls:
            continue

        if prefix:
            nsmap[prefix] = url
        else:
            default_prefix = f"default_{default_count}"
            default_count += 1
            if warn_on_defaults:
                LOGGER.warning(
                    "Adding namespace url `%s` with prefix key `%s`",
                    url,
                    default_prefix,
                )

            nsmap[default_prefix] = url

        processed_urls.add(url)

    return nsmap