`wg_utilities.functions`

Useful functions.

`DatetimeFixedUnit`

Bases: Enum

Enum for fixed units of time (i.e. not a month or a year).

Values are in seconds.

Source code in wg_utilities/functions/datetime_helpers.py

class DatetimeFixedUnit(Enum):
    """Enum for fixed units of time (i.e. not a month or a year).

    Values are in seconds.
    """

    WEEK = 604800
    DAY = 86400
    HOUR = 3600
    MINUTE = 60
    SECOND = 1
    MILLISECOND = 1e-3
    MICROSECOND = 1e-6
    NANOSECOND = 1e-9

`backoff(exceptions=Exception, /, logger=None, *, max_tries=10, max_delay=60, timeout=3600)`

Apply an exponential backoff to the decorated function.

The function will be called until it succeeds, the maximum number of tries attempted, or up to 24 hours (configurable via timeout).

** Be Careful! ** Setting max_tries max_delay, and timeout to 0 will retry as fast as possible for a whole day! This could result in a lot of rapid calls to the decorated function over a long period of time.

Parameters:

Name	Type	Description	Default
`exceptions`	`type[Exception] \| tuple[type[Exception], ...]`	the exception(s) to catch	`Exception`
`logger`	`Logger`	optional logger for logging the exception	`None`
`max_tries`	`int`	the maximum number of tries. Setting to 0 will retry forever.	`10`
`max_delay`	`int`	the maximum delay in seconds between tries. Setting to 0 will retry as fast as possible.	`60`
`timeout`	`int`	the maximum time to wait for the decorated function to complete, in seconds. Setting to 0 will retry for a whole day. Defaults to 1 hour.	`3600`

Returns:

Name	Type	Description
`Callable`	`Callable[[Callable[P, R]], Callable[P, R]]`	the actual decorator

Source code in wg_utilities/functions/decorators.py

def backoff(
    exceptions: type[Exception] | tuple[type[Exception], ...] = Exception,
    /,
    logger: Logger | None = None,
    *,
    max_tries: int = 10,
    max_delay: int = 60,
    timeout: int = 3600,
) -> Callable[[Callable[P, R]], Callable[P, R]]:
    """Apply an exponential backoff to the decorated function.

    The function will be called until it succeeds, the maximum number of tries
    attempted, or up to 24 hours (configurable via `timeout`).

    ** Be Careful! **
    Setting max_tries max_delay, and timeout to 0 will retry as fast as possible for a whole day!
    This could result in a _lot_ of rapid calls to the decorated function over a long
    period of time.

    Args:
        exceptions (type[Exception] | tuple[type[Exception], ...]): the exception(s) to catch
        logger (Logger): optional logger for logging the exception
        max_tries (int): the maximum number of tries. Setting to 0 will retry forever.
        max_delay (int): the maximum delay in seconds between tries. Setting to 0 will
            retry as fast as possible.
        timeout (int): the maximum time to wait for the decorated function to complete,
            in seconds. Setting to 0 will retry for a whole day. Defaults to 1 hour.

    Returns:
        Callable: the actual decorator
    """

    timeout = 86400 if timeout <= 0 else min(timeout, 86400)

    def _decorator(func: Callable[P, R]) -> Callable[P, R]:
        """Apply an exponential backoff to the decorated function.

        The function will be called until it succeeds or the maximum number of tries,
        with an exponential delay between tries (up to the maximum delay).

        Args:
            func (Callable): the function being wrapped

        Returns:
            Callable: the inner function
        """

        @wraps(func)
        def worker(*args: P.args, **kwargs: P.kwargs) -> R:
            """Try to run the decorated function and calls the callback function.

            Args:
                *args (Any): any args passed to the inner func
                **kwargs (Any): any kwargs passed to the inner func

            Returns:
                Any: the result of the wrapped function

            Raises:
                Exception: any exception from the decorated function
            """

            start_time = time()

            tries = 0
            delay = 0.1
            while True:
                try:
                    return func(*args, **kwargs)
                except exceptions as exc:  # noqa: PERF203
                    if logger is not None:
                        logger.warning(
                            "Exception caught in backoff decorator (attempt %i/%i, waiting for %fs): %s %s",
                            tries,
                            max_tries,
                            delay,
                            type(exc).__name__,
                            exc,
                        )
                    tries += 1

                    if 0 < max_tries <= tries or (timeout <= (time() - start_time)):
                        raise

                    sleep(delay)
                    delay = min(delay * (2 + random()), max_delay)  # noqa: S311

        return worker

    return _decorator

`chunk_list(lst, chunk_len)`

Yield successive n-sized chunks from lst.

Examples:

>>> chunk_list([1, 2, 3, 4, 5, 6, 7], 2)
[1, 2]
[3, 4]
[5, 6]
[7]

Parameters:

Name	Type	Description	Default
`lst`	`list`	the list to split into chunks	required
`chunk_len`	`int`	number of items per chunk	required

Yields:

Name	Type	Description
`list`	`list[Any]`	an N-sized chunk of the main list

Raises:

Type	Description
`ValueError`	if n is < 1

Source code in wg_utilities/functions/_functions.py

def chunk_list(lst: list[Any], chunk_len: int) -> Generator[list[Any], None, None]:
    """Yield successive n-sized chunks from lst.

    Examples:
        >>> chunk_list([1, 2, 3, 4, 5, 6, 7], 2)
        [1, 2]
        [3, 4]
        [5, 6]
        [7]

    Args:
        lst (list): the list to split into chunks
        chunk_len (int): number of items per chunk

    Yields:
        list: an N-sized chunk of the main list

    Raises:
        ValueError: if n is < 1
    """

    if chunk_len < 1:
        raise ValueError("`chunk_len` must be a positive integer")

    for i in range(0, len(lst), chunk_len):
        yield lst[i : i + chunk_len]

`cleanse_string(value, *, whitespace_amount=None, preserve_newlines=False)`

Remove all non-alphanumeric characters from a string.

Parameters:

Name	Type	Description	Default
`value`	`str`	the input string value	required
`whitespace_amount`	`int`	the number of spaces to replace whitespace for in a string. Setting to 0 preserves all whitespace, 1 is a single space, and so on. Defaults to None, which will remove all whitespace.	`None`
`preserve_newlines`	`bool`	whether to preserve newlines in the string.	`False`

Returns:

Name	Type	Description
`str`	`str`	the cleansed string

Source code in wg_utilities/functions/string_manipulation.py

def cleanse_string(
    value: str,
    *,
    whitespace_amount: int | None = None,
    preserve_newlines: bool = False,
) -> str:
    """Remove all non-alphanumeric characters from a string.

    Args:
        value (str): the input string value
        whitespace_amount (int, optional): the number of spaces to replace whitespace
            for in a string. Setting to 0 preserves all whitespace, 1 is a single space,
            and so on. Defaults to None, which will remove all whitespace.
        preserve_newlines (bool, optional): whether to preserve newlines in the string.

    Returns:
        str: the cleansed string
    """
    inner_pattern = "a-zA-Z0-9"

    if preserve_newlines:
        inner_pattern += "\n"

    if whitespace_amount is None:
        return sub(rf"[^{inner_pattern}]", "", value)

    if whitespace_amount == 0:
        return sub(rf"[^{inner_pattern}\s]", "", value)

    return sub(r"\s+", " " * whitespace_amount, sub(rf"[^{inner_pattern}\s]", "", value))

`flatten_dict(nested_dict, *, join_char='.', exclude_keys=None, exact_keys=False, _parent_key='')`

Flatten a nested dictionary into a single level dictionary.

This function recursively traverses a dictionary and flattens any nested JSON so the resultant dict has no values of type dict. This allows for easier processing into Redshift

Examples:

>>> flatten_dict(
...     {
...         "one": 1,
...         "two": {
...             "three": 3,
...             "four": 4,
...         },
...         "five": {"six": 6},
...     },
...     join_char="-",
...     exclude_keys=["five"],
... )
{
    "one": 1,
    "two-three": 3,
    "two-four": 4,
    "five": {"six": 6}
}

>>> flatten_dict(
...     {
...         "one": 1,
...         "two": {
...             "three": 3,
...             "four": 4,
...         },
...         "five": {"two": {"six": 6}},
...     },
...     join_char="-",
...     exclude_keys=["five-two"],
...     exact_keys=True,
... )

Parameters:

Name	Type	Description	Default
`nested_dict`	`dict`	the dict to be flattened	required
`join_char`	`str`	the character(s) to use when joining nested keys to form a single key	`'.'`
`exclude_keys`	`list`	list of keys to exclude when flatting the dict	`None`
`exact_keys`	`bool`	whether the excluded list of keys contains the exact flattened key, e.g. for `{"one":{"two":{"three":3}}` the exact key would be `one.two` or if it should exclude all keys regardless of parent	`False`
`_parent_key`	`str`	the string that keeps track of all the nested keys, for the initial use this should be an empty string, which is the default	`''`

Returns:

Name	Type	Description
`dict`	`dict[str, Any]`	a flattened dict

Source code in wg_utilities/functions/_functions.py

def flatten_dict(
    nested_dict: dict[str, object],
    *,
    join_char: str = ".",
    exclude_keys: list[str] | None = None,
    exact_keys: bool = False,
    _parent_key: str = "",
) -> dict[str, Any]:
    """Flatten a nested dictionary into a single level dictionary.

    This function recursively traverses a dictionary and flattens any nested JSON
    so the resultant dict has no values of type dict. This allows for easier processing
    into Redshift

    Examples:
        >>> flatten_dict(
        ...     {
        ...         "one": 1,
        ...         "two": {
        ...             "three": 3,
        ...             "four": 4,
        ...         },
        ...         "five": {"six": 6},
        ...     },
        ...     join_char="-",
        ...     exclude_keys=["five"],
        ... )
        {
            "one": 1,
            "two-three": 3,
            "two-four": 4,
            "five": {"six": 6}
        }

        >>> flatten_dict(
        ...     {
        ...         "one": 1,
        ...         "two": {
        ...             "three": 3,
        ...             "four": 4,
        ...         },
        ...         "five": {"two": {"six": 6}},
        ...     },
        ...     join_char="-",
        ...     exclude_keys=["five-two"],
        ...     exact_keys=True,
        ... )

    Args:
        nested_dict (dict): the dict to be flattened
        join_char (str): the character(s) to use when joining nested keys to form a
            single key
        exclude_keys (list): list of keys to exclude when flatting the dict
        exact_keys (bool): whether the excluded list of keys contains the exact
            flattened key, e.g. for `{"one":{"two":{"three":3}}` the exact key would be
            `one.two` or if it should exclude all keys regardless of parent
        _parent_key (str): the string that keeps track of all the nested keys,
            for the initial use this should be an empty string, which is the
            default

    Returns:
        dict: a flattened dict
    """
    output = {}

    for k, v in nested_dict.items():
        new_parent_key = (
            k if not _parent_key or (not exact_keys) else join_char.join([_parent_key, k])
        )
        if (
            isinstance(v, dict)
            and len(v) > 0
            and new_parent_key not in (e_keys := exclude_keys or [])
        ):
            output.update(
                {
                    join_char.join([str(k), str(k2)]): v2
                    for k2, v2 in flatten_dict(
                        v,
                        join_char=join_char,
                        exclude_keys=e_keys,
                        exact_keys=exact_keys,
                        _parent_key=new_parent_key,
                    ).items()
                },
            )
        else:
            output[k] = v

    return output

`force_mkdir(target_path, *, path_is_file=False)`

Create all directories needed for the given path.

Parameters:

Name	Type	Description	Default
`target_path`	`str`	the path to the directory which needs to be created	required
`path_is_file`	`bool`	flag for whether the path is for a file, in which case the final part of the path will not be created	`False`

Returns:

Name	Type	Description
`str`	`Path`	directory_path that was passed in

Source code in wg_utilities/functions/file_management.py

def force_mkdir(target_path: Path, *, path_is_file: bool = False) -> Path:
    """Create all directories needed for the given path.

    Args:
        target_path (str): the path to the directory which needs to be created
        path_is_file (bool): flag for whether the path is for a file, in which case
            the final part of the path will not be created

    Returns:
        str: directory_path that was passed in
    """
    if path_is_file:
        target_path.parent.mkdir(exist_ok=True, parents=True)
    else:
        target_path.mkdir(exist_ok=True, parents=True)

    return target_path

`process_json_object(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)`

Generic entry point to process dicts and/or lists.

Raises:

Type	Description
`InvalidJsonObjectError`	if an invalid JSON object/array is passed

Source code in wg_utilities/functions/json.py

def process_json_object(
    obj: JSONObj | JSONArr,
    /,
    *,
    target_type: type[V] | tuple[type[V], ...],
    target_processor_func: TargetProcessorFunc[V],
    pass_on_fail: bool = True,
    log_op_func_failures: bool = False,
    single_keys_to_remove: Sequence[str] | None = None,
) -> None:
    """Generic entry point to process dicts and/or lists.

    Raises:
        InvalidJsonObjectError: if an invalid JSON object/array is passed
    """

    if isinstance(obj, dict):
        traverse_dict(
            obj,
            target_type=target_type,
            target_processor_func=target_processor_func,
            pass_on_fail=pass_on_fail,
            log_op_func_failures=log_op_func_failures,
            single_keys_to_remove=single_keys_to_remove,
        )
    elif isinstance(obj, list):
        process_list(
            obj,
            target_type=target_type,
            target_processor_func=target_processor_func,
            pass_on_fail=pass_on_fail,
            log_op_func_failures=log_op_func_failures,
            single_keys_to_remove=single_keys_to_remove,
        )
    else:
        raise InvalidJsonObjectError(obj)

`process_list(lst, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)`

Iterate through a list, applying list_op_func to any target_type instances.

This is used in close conjunction with process_dict to recursively process a JSON object and apply a given function to any values of a given type across the whole object.

Failures in the given function can be ignored by setting pass_on_fail to True, and/or logged by setting log_op_func_failures to True. If both are set to True, then the function will log the failure and then continue.

Parameters:

Name	Type	Description	Default
`lst`	`list`	the list to iterate through	required
`target_type`	`type`	the target type to apply functions to	required
`target_processor_func`	`Callable`	a function to apply to instances of `target_type`	required
`pass_on_fail`	`bool`	ignore failure in either op function	`True`
`log_op_func_failures`	`bool`	log any failures in either op function	`False`
`single_keys_to_remove`	`list`	a list of keys that can be "expanded" up to the parent key	`None`

Raises:

Type	Description
`Exception`	if the `target_processor_func` fails and `pass_on_fail` is False

Source code in wg_utilities/functions/json.py

def process_list(
    lst: list[JSONVal],
    /,
    *,
    target_type: type[V] | tuple[type[V], ...],
    target_processor_func: TargetProcessorFunc[V],
    pass_on_fail: bool = True,
    log_op_func_failures: bool = False,
    single_keys_to_remove: Sequence[str] | None = None,
) -> None:
    """Iterate through a list, applying `list_op_func` to any `target_type` instances.

    This is used in close conjunction with `process_dict` to recursively process
    a JSON object and apply a given function to any values of a given type across the
    whole object.

    Failures in the given function can be ignored by setting `pass_on_fail` to `True`,
    and/or logged by setting `log_op_func_failures` to `True`. If both are set to
    `True`, then the function will log the failure and then continue.

    Args:
        lst (list): the list to iterate through
        target_type (type): the target type to apply functions to
        target_processor_func (Callable): a function to apply to instances of `target_type`
        pass_on_fail (bool): ignore failure in either op function
        log_op_func_failures (bool): log any failures in either op function
        single_keys_to_remove (list): a list of keys that can be "expanded" up to the parent key

    Raises:
        Exception: if the `target_processor_func` fails and `pass_on_fail` is False
    """
    for i, elem in enumerate(lst):
        if isinstance(elem, target_type):
            try:
                lst[i] = target_processor_func(cast(V, elem), list_index=i)
            except Exception:
                if log_op_func_failures:
                    LOGGER.exception("Unable to process item at index %i", i)

                if not pass_on_fail:
                    raise

        # If the new(?) value is a dict/list, then it needs to be processed
        # before continuing to the next elem in this list
        if isinstance(lst[i], dict | list):
            process_json_object(
                lst[i],  # type: ignore[arg-type]
                target_type=target_type,
                target_processor_func=target_processor_func,
                pass_on_fail=pass_on_fail,
                log_op_func_failures=log_op_func_failures,
                single_keys_to_remove=single_keys_to_remove,
            )

`run_cmd(cmd, *, exit_on_error=True, shell=False)`

Run commands on the command line.

Parameters:

Name	Type	Description	Default
`cmd`	`str`	the command to run in the user's terminal	required
`exit_on_error`	`bool`	flag for if the script should exit if the command errored	`True`
`shell`	`bool`	flag for running command in shell	`False`

Returns:

Name	Type	Description
`str`	`str`	the output of the command
`str`	`str`	the error from the command, if it errored

Raises:

Type	Description
`RuntimeError`	if the command has a non-zero exit code

Source code in wg_utilities/functions/processes.py

def run_cmd(
    cmd: str,
    *,
    exit_on_error: bool = True,
    shell: bool = False,
) -> tuple[str, str]:
    """Run commands on the command line.

    Args:
        cmd (str): the command to run in the user's terminal
        exit_on_error (bool): flag for if the script should exit if the command errored
        shell (bool): flag for running command in shell

    Returns:
        str: the output of the command
        str: the error from the command, if it errored

    Raises:
        RuntimeError: if the command has a non-zero exit code
    """

    LOGGER.debug("Running command `%s`", cmd)

    popen_input = cmd if shell else COMMAND_PATTERN.split(cmd)[1::2]

    with Popen(
        popen_input,
        stdout=PIPE,
        stderr=PIPE,
        shell=shell,  # noqa: S603
    ) as process:
        output, error = process.communicate()

        error_str = error.decode("utf-8").strip()

        if process.returncode != 0:
            if exit_on_error:
                raise RuntimeError(error_str)

            LOGGER.error(error_str)  # pragma: no cover

    return output.decode("utf-8").strip(), error_str

`set_nested_value(*, json_obj, keys, target_value, final_key=None)`

Update a nested value in a dictionary.

Parameters:

Name	Type	Description	Default
`json_obj`	`dict`	the JSON object to update	required
`keys`	`list`	a list of keys used to traverse the dictionary	required
`target_value`	`Any`	the value to set at the given location/path	required
`final_key`	`str`	the final key, the value of which we're actually setting	`None`

Source code in wg_utilities/functions/json.py

def set_nested_value(
    *,
    json_obj: dict[Any, Any],
    keys: list[str],
    target_value: Any,
    final_key: str | None = None,
) -> None:
    """Update a nested value in a dictionary.

    Args:
        json_obj (dict): the JSON object to update
        keys (list): a list of keys used to traverse the dictionary
        target_value (Any): the value to set at the given location/path
        final_key (str): the final key, the value of which we're actually setting
    """

    final_key = final_key or keys.pop()

    if len(keys) > 0:
        set_nested_value(
            json_obj=json_obj.get(keys.pop(0), {}),
            keys=keys,
            target_value=target_value,
            final_key=final_key,
        )
    else:
        json_obj[final_key] = target_value

`subclasses_recursive(typ, /, *, class_filter=None, track_visited=False, __visited=None)`

Get all subclasses of a class recursively.

Parameters:

Name	Type	Description	Default
`typ`	`type`	the class to get the subclasses of	required
`class_filter`	`None`	a function to filter the subclasses	`None`
`track_visited`	`bool`	whether to track visited subclasses. Useful for avoiding infinite loops. Defaults to False.	`False`

Yields:

Name	Type	Description
`type`	`type[Any]`	a subclass of the given class

Source code in wg_utilities/functions/subclasses.py

def subclasses_recursive(
    typ: type[Any],
    /,
    *,
    class_filter: None | Callable[[type[Any]], bool] = None,
    track_visited: bool = False,
    __visited: set[type[Any]] | None = None,
) -> Generator[type[Any], None, None]:
    """Get all subclasses of a class recursively.

    Args:
        typ (type): the class to get the subclasses of
        class_filter (None, optional): a function to filter the subclasses
        track_visited (bool, optional): whether to track visited subclasses. Useful for avoiding
            infinite loops. Defaults to False.

    Yields:
        type: a subclass of the given class
    """

    for subclass in typ.__subclasses__():
        if track_visited:
            __visited = __visited or set()
            if subclass in __visited:
                continue

            __visited.add(subclass)

        if class_filter is None or class_filter(subclass):
            yield subclass

        yield from subclasses_recursive(
            subclass,
            class_filter=class_filter,
            track_visited=track_visited,
            __visited=__visited,  # type: ignore[call-arg]
        )

`traverse_dict(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)`

Traverse dict, applyingtarget_processor_func to any values of type target_type.

Parameters:

Name	Type	Description	Default
`obj`	`dict`	the JSON object to traverse	required
`target_type`	`type`	the target type to apply functions to	required
`target_processor_func`	`Callable`	a function to apply to instances of `target_type`	required
`pass_on_fail`	`bool`	ignore failure in either op function	`True`
`log_op_func_failures`	`bool`	log any failures in either op function	`False`
`single_keys_to_remove`	`list`	a list of keys that can be "expanded" up to the parent key from a dict of length one, e.g.: ... { ... "parent_1": "something", ... "parent_2": { ... "uselessKey": "actual value" ... } ... } would go to ... { ... "parent_1": "something", ... "parent_2": "actual value" ... }	`None`

Raises:

Type	Description
`Exception`	if the `target_processor_func` fails and `pass_on_fail` is False

Source code in wg_utilities/functions/json.py

def traverse_dict(  # noqa: PLR0912
    obj: JSONObj,
    /,
    *,
    target_type: type[V] | tuple[type[V], ...],
    target_processor_func: TargetProcessorFunc[V],
    pass_on_fail: bool = True,
    log_op_func_failures: bool = False,
    single_keys_to_remove: Sequence[str] | None = None,
) -> None:
    """Traverse dict, applying`target_processor_func` to any values of type `target_type`.

    Args:
        obj (dict): the JSON object to traverse
        target_type (type): the target type to apply functions to
        target_processor_func (Callable): a function to apply to instances of `target_type`
        pass_on_fail (bool): ignore failure in either op function
        log_op_func_failures (bool): log any failures in either op function
        single_keys_to_remove (list): a list of keys that can be "expanded" up to the parent key from a dict of
            length one, e.g.:
            ... {
            ...     "parent_1": "something",
            ...     "parent_2": {
            ...         "uselessKey": "actual value"
            ...     }
            ... }
            would go to
            ... {
            ...     "parent_1": "something",
            ...     "parent_2": "actual value"
            ... }

    Raises:
        Exception: if the `target_processor_func` fails and `pass_on_fail` is False
    """
    for k, v in obj.items():
        if isinstance(v, target_type):
            try:
                obj.update({k: target_processor_func(cast(V, v), dict_key=k)})
                if isinstance(obj[k], dict):
                    traverse_dict(
                        # If a dict has been created from a non-dict type (e.g. `loads("{...}")`,
                        # then we need to traverse the current object again, as the new dict may
                        # contain more instances of `target_type`. Otherwise, traverse
                        # the dict (that already existed).
                        obj if target_type is not dict else cast(JSONObj, obj[k]),
                        target_type=target_type,
                        target_processor_func=target_processor_func,
                        pass_on_fail=pass_on_fail,
                        log_op_func_failures=log_op_func_failures,
                        single_keys_to_remove=single_keys_to_remove,
                    )
            except Exception:
                if log_op_func_failures:
                    LOGGER.exception("Unable to process item with key %s", k)
                if not pass_on_fail:
                    raise

            continue

        if isinstance(v, dict):
            matched_single_key = False
            if (
                len(v) == 1
                and single_keys_to_remove is not None
                and (only_key := next(iter(v.keys()))) in single_keys_to_remove
            ):
                matched_single_key = True
                if isinstance(value := v.get(only_key), target_type):
                    try:
                        value = target_processor_func(cast(V, value), dict_key=only_key)
                    except Exception:
                        if log_op_func_failures:
                            LOGGER.exception(
                                "Unable to process item with key %s",
                                k,
                            )
                        if not pass_on_fail:
                            raise

                if isinstance(value, dict):
                    # Wrap the value, so that if the top level key is one
                    # of `single_keys_to_remove` then it's processed
                    # correctly
                    tmp_wrapper: JSONObj = {"-": value}
                    traverse_dict(
                        tmp_wrapper,
                        target_type=target_type,
                        target_processor_func=target_processor_func,
                        pass_on_fail=pass_on_fail,
                        log_op_func_failures=log_op_func_failures,
                        single_keys_to_remove=single_keys_to_remove,
                    )

                    value = tmp_wrapper["-"]

                obj[k] = value

            if not matched_single_key:
                traverse_dict(
                    v,
                    target_type=target_type,
                    target_processor_func=target_processor_func,
                    pass_on_fail=pass_on_fail,
                    log_op_func_failures=log_op_func_failures,
                    single_keys_to_remove=single_keys_to_remove,
                )

            continue

        if isinstance(v, list):
            process_list(
                v,
                target_type=target_type,
                target_processor_func=target_processor_func,
                pass_on_fail=pass_on_fail,
                log_op_func_failures=log_op_func_failures,
                single_keys_to_remove=single_keys_to_remove,
            )

`try_float(v, default=0.0)`

Try to cast a value to a float, and returns a default if it fails.

Examples:

>>> try_float("12.34")
12.34

>>> try_float("ABC", -1)
-1

>>> try_float(1.2, 10)
1.2

Parameters:

Name	Type	Description	Default
`v`	`Union[str, bytes, bytearray, SupportsFloat, _SupportsIndex]`	The value to be cast to a float	required
`default`	`object`	The value to be returned if the casting fails	`0.0`

Returns:

Name	Type	Description
`float`	`object`	The value passed in, in float format, or the default

Source code in wg_utilities/functions/_functions.py

def try_float(v: Any, default: Any = 0.0) -> object:
    """Try to cast a value to a float, and returns a default if it fails.

    Examples:
        >>> try_float("12.34")
        12.34

        >>> try_float("ABC", -1)
        -1

        >>> try_float(1.2, 10)
        1.2

    Args:
        v (Union[str, bytes, bytearray, SupportsFloat, _SupportsIndex]): The
            value to be cast to a float
        default (object): The value to be returned if the casting fails

    Returns:
        float: The value passed in, in float format, or the default
    """

    try:
        return float(v)
    except (ValueError, TypeError):
        return default

`user_data_dir(*, project_name='WgUtilities', file_name=None, _platform=platform)`

Get OS specific data directory path.

Typical user data directories are

macOS: ~/Library/Application Support Unix: ~/.local/share # or in $XDG_DATA_HOME, if defined Win 10: C:\Users\\AppData\Local

For Unix, we follow the XDG spec and support $XDG_DATA_HOME if defined.

Parameters:

Name	Type	Description	Default
`project_name`	`str`	the name of the project which the utils are running in	`'WgUtilities'`
`file_name`	`Optional[str]`	file to be fetched from the data dir	`None`
`_platform`	`str`	the platform to get the data dir for	`platform`

Returns:

Name	Type	Description
`str`	`Path`	full path to the user-specific data dir

Source code in wg_utilities/functions/file_management.py

def user_data_dir(
    *,
    project_name: str = "WgUtilities",
    file_name: str | None = None,
    _platform: str = platform,
) -> Path:
    r"""Get OS specific data directory path.

    Typical user data directories are:
        macOS:    ~/Library/Application Support
        Unix:     ~/.local/share   # or in $XDG_DATA_HOME, if defined
        Win 10:   C:\\Users\\<username>\\AppData\\Local

    For Unix, we follow the XDG spec and support $XDG_DATA_HOME if defined.

    Args:
        project_name (str): the name of the project which the utils are running in
        file_name (Optional[str]): file to be fetched from the data dir
        _platform (str): the platform to get the data dir for

    Returns:
        str: full path to the user-specific data dir
    """

    # get os specific path
    if _platform.startswith("win"):
        os_path = environ["LOCALAPPDATA"]
    elif _platform.startswith("darwin"):
        os_path = "~/Library/Application Support"
    else:
        # linux
        os_path = getenv("XDG_DATA_HOME", "~/.local/share")

    path = Path(os_path) / project_name

    if file_name:
        return force_mkdir(path.expanduser() / file_name, path_is_file=True)

    return path.expanduser()

`utcnow(unit=None)`

datetime.utcnow with optional unit conversion.

Gets the current UTC time and returns it in a chosen unit. If no unit is provided then it is just returned as a datetime

Parameters:

Name	Type	Description	Default
`unit`	`DatetimeFixedUnit`	the unit in which to provide the current datetime	`None`

Returns:

Name	Type	Description
`Union`	`[datetime, int]`	the current UTC datetime in the chosen unit

Source code in wg_utilities/functions/datetime_helpers.py

def utcnow(unit: DatetimeFixedUnit | None = None) -> datetime | int:
    """`datetime.utcnow` with optional unit conversion.

    Gets the current UTC time and returns it in a chosen unit. If no unit is
    provided then it is just returned as a datetime

    Args:
        unit (DatetimeFixedUnit): the unit in which to provide the current datetime

    Returns:
        Union([datetime, int]): the current UTC datetime in the chosen unit
    """

    if not unit:
        return datetime.now(UTC)

    return int(datetime.now(UTC).timestamp() / unit.value)

`datetime_helpers`

Helper functions for all things date and time related.

`DatetimeFixedUnit`

Bases: Enum

Enum for fixed units of time (i.e. not a month or a year).

Values are in seconds.

Source code in wg_utilities/functions/datetime_helpers.py

class DatetimeFixedUnit(Enum):
    """Enum for fixed units of time (i.e. not a month or a year).

    Values are in seconds.
    """

    WEEK = 604800
    DAY = 86400
    HOUR = 3600
    MINUTE = 60
    SECOND = 1
    MILLISECOND = 1e-3
    MICROSECOND = 1e-6
    NANOSECOND = 1e-9

`utcnow(unit=None)`

datetime.utcnow with optional unit conversion.

Gets the current UTC time and returns it in a chosen unit. If no unit is provided then it is just returned as a datetime

Parameters:

Name	Type	Description	Default
`unit`	`DatetimeFixedUnit`	the unit in which to provide the current datetime	`None`

Returns:

Name	Type	Description
`Union`	`[datetime, int]`	the current UTC datetime in the chosen unit

Source code in wg_utilities/functions/datetime_helpers.py

def utcnow(unit: DatetimeFixedUnit | None = None) -> datetime | int:
    """`datetime.utcnow` with optional unit conversion.

    Gets the current UTC time and returns it in a chosen unit. If no unit is
    provided then it is just returned as a datetime

    Args:
        unit (DatetimeFixedUnit): the unit in which to provide the current datetime

    Returns:
        Union([datetime, int]): the current UTC datetime in the chosen unit
    """

    if not unit:
        return datetime.now(UTC)

    return int(datetime.now(UTC).timestamp() / unit.value)

`decorators`

Custom decorators.

`backoff(exceptions=Exception, /, logger=None, *, max_tries=10, max_delay=60, timeout=3600)`

Apply an exponential backoff to the decorated function.

The function will be called until it succeeds, the maximum number of tries attempted, or up to 24 hours (configurable via timeout).

** Be Careful! ** Setting max_tries max_delay, and timeout to 0 will retry as fast as possible for a whole day! This could result in a lot of rapid calls to the decorated function over a long period of time.

Parameters:

Name	Type	Description	Default
`exceptions`	`type[Exception] \| tuple[type[Exception], ...]`	the exception(s) to catch	`Exception`
`logger`	`Logger`	optional logger for logging the exception	`None`
`max_tries`	`int`	the maximum number of tries. Setting to 0 will retry forever.	`10`
`max_delay`	`int`	the maximum delay in seconds between tries. Setting to 0 will retry as fast as possible.	`60`
`timeout`	`int`	the maximum time to wait for the decorated function to complete, in seconds. Setting to 0 will retry for a whole day. Defaults to 1 hour.	`3600`

Returns:

Name	Type	Description
`Callable`	`Callable[[Callable[P, R]], Callable[P, R]]`	the actual decorator

Source code in wg_utilities/functions/decorators.py

def backoff(
    exceptions: type[Exception] | tuple[type[Exception], ...] = Exception,
    /,
    logger: Logger | None = None,
    *,
    max_tries: int = 10,
    max_delay: int = 60,
    timeout: int = 3600,
) -> Callable[[Callable[P, R]], Callable[P, R]]:
    """Apply an exponential backoff to the decorated function.

    The function will be called until it succeeds, the maximum number of tries
    attempted, or up to 24 hours (configurable via `timeout`).

    ** Be Careful! **
    Setting max_tries max_delay, and timeout to 0 will retry as fast as possible for a whole day!
    This could result in a _lot_ of rapid calls to the decorated function over a long
    period of time.

    Args:
        exceptions (type[Exception] | tuple[type[Exception], ...]): the exception(s) to catch
        logger (Logger): optional logger for logging the exception
        max_tries (int): the maximum number of tries. Setting to 0 will retry forever.
        max_delay (int): the maximum delay in seconds between tries. Setting to 0 will
            retry as fast as possible.
        timeout (int): the maximum time to wait for the decorated function to complete,
            in seconds. Setting to 0 will retry for a whole day. Defaults to 1 hour.

    Returns:
        Callable: the actual decorator
    """

    timeout = 86400 if timeout <= 0 else min(timeout, 86400)

    def _decorator(func: Callable[P, R]) -> Callable[P, R]:
        """Apply an exponential backoff to the decorated function.

        The function will be called until it succeeds or the maximum number of tries,
        with an exponential delay between tries (up to the maximum delay).

        Args:
            func (Callable): the function being wrapped

        Returns:
            Callable: the inner function
        """

        @wraps(func)
        def worker(*args: P.args, **kwargs: P.kwargs) -> R:
            """Try to run the decorated function and calls the callback function.

            Args:
                *args (Any): any args passed to the inner func
                **kwargs (Any): any kwargs passed to the inner func

            Returns:
                Any: the result of the wrapped function

            Raises:
                Exception: any exception from the decorated function
            """

            start_time = time()

            tries = 0
            delay = 0.1
            while True:
                try:
                    return func(*args, **kwargs)
                except exceptions as exc:  # noqa: PERF203
                    if logger is not None:
                        logger.warning(
                            "Exception caught in backoff decorator (attempt %i/%i, waiting for %fs): %s %s",
                            tries,
                            max_tries,
                            delay,
                            type(exc).__name__,
                            exc,
                        )
                    tries += 1

                    if 0 < max_tries <= tries or (timeout <= (time() - start_time)):
                        raise

                    sleep(delay)
                    delay = min(delay * (2 + random()), max_delay)  # noqa: S311

        return worker

    return _decorator

`file_management`

Set of functions for specifically managing files and directories.

`force_mkdir(target_path, *, path_is_file=False)`

Create all directories needed for the given path.

Parameters:

Name	Type	Description	Default
`target_path`	`str`	the path to the directory which needs to be created	required
`path_is_file`	`bool`	flag for whether the path is for a file, in which case the final part of the path will not be created	`False`

Returns:

Name	Type	Description
`str`	`Path`	directory_path that was passed in

Source code in wg_utilities/functions/file_management.py

def force_mkdir(target_path: Path, *, path_is_file: bool = False) -> Path:
    """Create all directories needed for the given path.

    Args:
        target_path (str): the path to the directory which needs to be created
        path_is_file (bool): flag for whether the path is for a file, in which case
            the final part of the path will not be created

    Returns:
        str: directory_path that was passed in
    """
    if path_is_file:
        target_path.parent.mkdir(exist_ok=True, parents=True)
    else:
        target_path.mkdir(exist_ok=True, parents=True)

    return target_path

`user_data_dir(*, project_name='WgUtilities', file_name=None, _platform=platform)`

Get OS specific data directory path.

Typical user data directories are

macOS: ~/Library/Application Support Unix: ~/.local/share # or in $XDG_DATA_HOME, if defined Win 10: C:\Users\\AppData\Local

For Unix, we follow the XDG spec and support $XDG_DATA_HOME if defined.

Parameters:

Name	Type	Description	Default
`project_name`	`str`	the name of the project which the utils are running in	`'WgUtilities'`
`file_name`	`Optional[str]`	file to be fetched from the data dir	`None`
`_platform`	`str`	the platform to get the data dir for	`platform`

Returns:

Name	Type	Description
`str`	`Path`	full path to the user-specific data dir

Source code in wg_utilities/functions/file_management.py

def user_data_dir(
    *,
    project_name: str = "WgUtilities",
    file_name: str | None = None,
    _platform: str = platform,
) -> Path:
    r"""Get OS specific data directory path.

    Typical user data directories are:
        macOS:    ~/Library/Application Support
        Unix:     ~/.local/share   # or in $XDG_DATA_HOME, if defined
        Win 10:   C:\\Users\\<username>\\AppData\\Local

    For Unix, we follow the XDG spec and support $XDG_DATA_HOME if defined.

    Args:
        project_name (str): the name of the project which the utils are running in
        file_name (Optional[str]): file to be fetched from the data dir
        _platform (str): the platform to get the data dir for

    Returns:
        str: full path to the user-specific data dir
    """

    # get os specific path
    if _platform.startswith("win"):
        os_path = environ["LOCALAPPDATA"]
    elif _platform.startswith("darwin"):
        os_path = "~/Library/Application Support"
    else:
        # linux
        os_path = getenv("XDG_DATA_HOME", "~/.local/share")

    path = Path(os_path) / project_name

    if file_name:
        return force_mkdir(path.expanduser() / file_name, path_is_file=True)

    return path.expanduser()

`json`

Useful functions for working with JSON/dictionaries.

`InvalidJsonObjectError`

Bases: Exception

Raised when an invalid JSON object/array is passed to process_json_object.

Source code in wg_utilities/functions/json.py

class InvalidJsonObjectError(Exception):
    """Raised when an invalid JSON object/array is passed to `process_json_object`."""

    def __init__(self, obj: Any) -> None:
        """Initialize the exception."""
        super().__init__(
            f"Input object must be a dict or list, not {type(obj)!r}",
        )

`init(obj)`

Initialize the exception.

Source code in wg_utilities/functions/json.py

def __init__(self, obj: Any) -> None:
    """Initialize the exception."""
    super().__init__(
        f"Input object must be a dict or list, not {type(obj)!r}",
    )

`TargetProcessorFunc`

Bases: Protocol[V_contra]

Typing protocol for the user-defined function passed into the below functions.

Source code in wg_utilities/functions/json.py

class TargetProcessorFunc(Protocol[V_contra]):
    """Typing protocol for the user-defined function passed into the below functions."""

    def __call__(
        self,
        value: V_contra,
        *,
        dict_key: str | None = None,
        list_index: int | None = None,
    ) -> JSONVal:
        """The function to be called on each value in the JSON object."""

`call(value, *, dict_key=None, list_index=None)`

The function to be called on each value in the JSON object.

Source code in wg_utilities/functions/json.py

def __call__(
    self,
    value: V_contra,
    *,
    dict_key: str | None = None,
    list_index: int | None = None,
) -> JSONVal:
    """The function to be called on each value in the JSON object."""

`process_json_object(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)`

Generic entry point to process dicts and/or lists.

Raises:

Type	Description
`InvalidJsonObjectError`	if an invalid JSON object/array is passed

Source code in wg_utilities/functions/json.py

def process_json_object(
    obj: JSONObj | JSONArr,
    /,
    *,
    target_type: type[V] | tuple[type[V], ...],
    target_processor_func: TargetProcessorFunc[V],
    pass_on_fail: bool = True,
    log_op_func_failures: bool = False,
    single_keys_to_remove: Sequence[str] | None = None,
) -> None:
    """Generic entry point to process dicts and/or lists.

    Raises:
        InvalidJsonObjectError: if an invalid JSON object/array is passed
    """

    if isinstance(obj, dict):
        traverse_dict(
            obj,
            target_type=target_type,
            target_processor_func=target_processor_func,
            pass_on_fail=pass_on_fail,
            log_op_func_failures=log_op_func_failures,
            single_keys_to_remove=single_keys_to_remove,
        )
    elif isinstance(obj, list):
        process_list(
            obj,
            target_type=target_type,
            target_processor_func=target_processor_func,
            pass_on_fail=pass_on_fail,
            log_op_func_failures=log_op_func_failures,
            single_keys_to_remove=single_keys_to_remove,
        )
    else:
        raise InvalidJsonObjectError(obj)

`process_list(lst, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)`

Iterate through a list, applying list_op_func to any target_type instances.

This is used in close conjunction with process_dict to recursively process a JSON object and apply a given function to any values of a given type across the whole object.

Failures in the given function can be ignored by setting pass_on_fail to True, and/or logged by setting log_op_func_failures to True. If both are set to True, then the function will log the failure and then continue.

Parameters:

Name	Type	Description	Default
`lst`	`list`	the list to iterate through	required
`target_type`	`type`	the target type to apply functions to	required
`target_processor_func`	`Callable`	a function to apply to instances of `target_type`	required
`pass_on_fail`	`bool`	ignore failure in either op function	`True`
`log_op_func_failures`	`bool`	log any failures in either op function	`False`
`single_keys_to_remove`	`list`	a list of keys that can be "expanded" up to the parent key	`None`

Raises:

Type	Description
`Exception`	if the `target_processor_func` fails and `pass_on_fail` is False

Source code in wg_utilities/functions/json.py

def process_list(
    lst: list[JSONVal],
    /,
    *,
    target_type: type[V] | tuple[type[V], ...],
    target_processor_func: TargetProcessorFunc[V],
    pass_on_fail: bool = True,
    log_op_func_failures: bool = False,
    single_keys_to_remove: Sequence[str] | None = None,
) -> None:
    """Iterate through a list, applying `list_op_func` to any `target_type` instances.

    This is used in close conjunction with `process_dict` to recursively process
    a JSON object and apply a given function to any values of a given type across the
    whole object.

    Failures in the given function can be ignored by setting `pass_on_fail` to `True`,
    and/or logged by setting `log_op_func_failures` to `True`. If both are set to
    `True`, then the function will log the failure and then continue.

    Args:
        lst (list): the list to iterate through
        target_type (type): the target type to apply functions to
        target_processor_func (Callable): a function to apply to instances of `target_type`
        pass_on_fail (bool): ignore failure in either op function
        log_op_func_failures (bool): log any failures in either op function
        single_keys_to_remove (list): a list of keys that can be "expanded" up to the parent key

    Raises:
        Exception: if the `target_processor_func` fails and `pass_on_fail` is False
    """
    for i, elem in enumerate(lst):
        if isinstance(elem, target_type):
            try:
                lst[i] = target_processor_func(cast(V, elem), list_index=i)
            except Exception:
                if log_op_func_failures:
                    LOGGER.exception("Unable to process item at index %i", i)

                if not pass_on_fail:
                    raise

        # If the new(?) value is a dict/list, then it needs to be processed
        # before continuing to the next elem in this list
        if isinstance(lst[i], dict | list):
            process_json_object(
                lst[i],  # type: ignore[arg-type]
                target_type=target_type,
                target_processor_func=target_processor_func,
                pass_on_fail=pass_on_fail,
                log_op_func_failures=log_op_func_failures,
                single_keys_to_remove=single_keys_to_remove,
            )

`set_nested_value(*, json_obj, keys, target_value, final_key=None)`

Update a nested value in a dictionary.

Parameters:

Name	Type	Description	Default
`json_obj`	`dict`	the JSON object to update	required
`keys`	`list`	a list of keys used to traverse the dictionary	required
`target_value`	`Any`	the value to set at the given location/path	required
`final_key`	`str`	the final key, the value of which we're actually setting	`None`

Source code in wg_utilities/functions/json.py

def set_nested_value(
    *,
    json_obj: dict[Any, Any],
    keys: list[str],
    target_value: Any,
    final_key: str | None = None,
) -> None:
    """Update a nested value in a dictionary.

    Args:
        json_obj (dict): the JSON object to update
        keys (list): a list of keys used to traverse the dictionary
        target_value (Any): the value to set at the given location/path
        final_key (str): the final key, the value of which we're actually setting
    """

    final_key = final_key or keys.pop()

    if len(keys) > 0:
        set_nested_value(
            json_obj=json_obj.get(keys.pop(0), {}),
            keys=keys,
            target_value=target_value,
            final_key=final_key,
        )
    else:
        json_obj[final_key] = target_value

`traverse_dict(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)`

Traverse dict, applyingtarget_processor_func to any values of type target_type.

Parameters:

Name	Type	Description	Default
`obj`	`dict`	the JSON object to traverse	required
`target_type`	`type`	the target type to apply functions to	required
`target_processor_func`	`Callable`	a function to apply to instances of `target_type`	required
`pass_on_fail`	`bool`	ignore failure in either op function	`True`
`log_op_func_failures`	`bool`	log any failures in either op function	`False`
`single_keys_to_remove`	`list`	a list of keys that can be "expanded" up to the parent key from a dict of length one, e.g.: ... { ... "parent_1": "something", ... "parent_2": { ... "uselessKey": "actual value" ... } ... } would go to ... { ... "parent_1": "something", ... "parent_2": "actual value" ... }	`None`

Raises:

Type	Description
`Exception`	if the `target_processor_func` fails and `pass_on_fail` is False

Source code in wg_utilities/functions/json.py

def traverse_dict(  # noqa: PLR0912
    obj: JSONObj,
    /,
    *,
    target_type: type[V] | tuple[type[V], ...],
    target_processor_func: TargetProcessorFunc[V],
    pass_on_fail: bool = True,
    log_op_func_failures: bool = False,
    single_keys_to_remove: Sequence[str] | None = None,
) -> None:
    """Traverse dict, applying`target_processor_func` to any values of type `target_type`.

    Args:
        obj (dict): the JSON object to traverse
        target_type (type): the target type to apply functions to
        target_processor_func (Callable): a function to apply to instances of `target_type`
        pass_on_fail (bool): ignore failure in either op function
        log_op_func_failures (bool): log any failures in either op function
        single_keys_to_remove (list): a list of keys that can be "expanded" up to the parent key from a dict of
            length one, e.g.:
            ... {
            ...     "parent_1": "something",
            ...     "parent_2": {
            ...         "uselessKey": "actual value"
            ...     }
            ... }
            would go to
            ... {
            ...     "parent_1": "something",
            ...     "parent_2": "actual value"
            ... }

    Raises:
        Exception: if the `target_processor_func` fails and `pass_on_fail` is False
    """
    for k, v in obj.items():
        if isinstance(v, target_type):
            try:
                obj.update({k: target_processor_func(cast(V, v), dict_key=k)})
                if isinstance(obj[k], dict):
                    traverse_dict(
                        # If a dict has been created from a non-dict type (e.g. `loads("{...}")`,
                        # then we need to traverse the current object again, as the new dict may
                        # contain more instances of `target_type`. Otherwise, traverse
                        # the dict (that already existed).
                        obj if target_type is not dict else cast(JSONObj, obj[k]),
                        target_type=target_type,
                        target_processor_func=target_processor_func,
                        pass_on_fail=pass_on_fail,
                        log_op_func_failures=log_op_func_failures,
                        single_keys_to_remove=single_keys_to_remove,
                    )
            except Exception:
                if log_op_func_failures:
                    LOGGER.exception("Unable to process item with key %s", k)
                if not pass_on_fail:
                    raise

            continue

        if isinstance(v, dict):
            matched_single_key = False
            if (
                len(v) == 1
                and single_keys_to_remove is not None
                and (only_key := next(iter(v.keys()))) in single_keys_to_remove
            ):
                matched_single_key = True
                if isinstance(value := v.get(only_key), target_type):
                    try:
                        value = target_processor_func(cast(V, value), dict_key=only_key)
                    except Exception:
                        if log_op_func_failures:
                            LOGGER.exception(
                                "Unable to process item with key %s",
                                k,
                            )
                        if not pass_on_fail:
                            raise

                if isinstance(value, dict):
                    # Wrap the value, so that if the top level key is one
                    # of `single_keys_to_remove` then it's processed
                    # correctly
                    tmp_wrapper: JSONObj = {"-": value}
                    traverse_dict(
                        tmp_wrapper,
                        target_type=target_type,
                        target_processor_func=target_processor_func,
                        pass_on_fail=pass_on_fail,
                        log_op_func_failures=log_op_func_failures,
                        single_keys_to_remove=single_keys_to_remove,
                    )

                    value = tmp_wrapper["-"]

                obj[k] = value

            if not matched_single_key:
                traverse_dict(
                    v,
                    target_type=target_type,
                    target_processor_func=target_processor_func,
                    pass_on_fail=pass_on_fail,
                    log_op_func_failures=log_op_func_failures,
                    single_keys_to_remove=single_keys_to_remove,
                )

            continue

        if isinstance(v, list):
            process_list(
                v,
                target_type=target_type,
                target_processor_func=target_processor_func,
                pass_on_fail=pass_on_fail,
                log_op_func_failures=log_op_func_failures,
                single_keys_to_remove=single_keys_to_remove,
            )

`processes`

Set of functions for managing processes.

`run_cmd(cmd, *, exit_on_error=True, shell=False)`

Run commands on the command line.

Parameters:

Name	Type	Description	Default
`cmd`	`str`	the command to run in the user's terminal	required
`exit_on_error`	`bool`	flag for if the script should exit if the command errored	`True`
`shell`	`bool`	flag for running command in shell	`False`

Returns:

Name	Type	Description
`str`	`str`	the output of the command
`str`	`str`	the error from the command, if it errored

Raises:

Type	Description
`RuntimeError`	if the command has a non-zero exit code

Source code in wg_utilities/functions/processes.py

def run_cmd(
    cmd: str,
    *,
    exit_on_error: bool = True,
    shell: bool = False,
) -> tuple[str, str]:
    """Run commands on the command line.

    Args:
        cmd (str): the command to run in the user's terminal
        exit_on_error (bool): flag for if the script should exit if the command errored
        shell (bool): flag for running command in shell

    Returns:
        str: the output of the command
        str: the error from the command, if it errored

    Raises:
        RuntimeError: if the command has a non-zero exit code
    """

    LOGGER.debug("Running command `%s`", cmd)

    popen_input = cmd if shell else COMMAND_PATTERN.split(cmd)[1::2]

    with Popen(
        popen_input,
        stdout=PIPE,
        stderr=PIPE,
        shell=shell,  # noqa: S603
    ) as process:
        output, error = process.communicate()

        error_str = error.decode("utf-8").strip()

        if process.returncode != 0:
            if exit_on_error:
                raise RuntimeError(error_str)

            LOGGER.error(error_str)  # pragma: no cover

    return output.decode("utf-8").strip(), error_str

`string_manipulation`

Set of functions for string manipulation.

`cleanse_string(value, *, whitespace_amount=None, preserve_newlines=False)`

Remove all non-alphanumeric characters from a string.

Parameters:

Name	Type	Description	Default
`value`	`str`	the input string value	required
`whitespace_amount`	`int`	the number of spaces to replace whitespace for in a string. Setting to 0 preserves all whitespace, 1 is a single space, and so on. Defaults to None, which will remove all whitespace.	`None`
`preserve_newlines`	`bool`	whether to preserve newlines in the string.	`False`

Returns:

Name	Type	Description
`str`	`str`	the cleansed string

Source code in wg_utilities/functions/string_manipulation.py

def cleanse_string(
    value: str,
    *,
    whitespace_amount: int | None = None,
    preserve_newlines: bool = False,
) -> str:
    """Remove all non-alphanumeric characters from a string.

    Args:
        value (str): the input string value
        whitespace_amount (int, optional): the number of spaces to replace whitespace
            for in a string. Setting to 0 preserves all whitespace, 1 is a single space,
            and so on. Defaults to None, which will remove all whitespace.
        preserve_newlines (bool, optional): whether to preserve newlines in the string.

    Returns:
        str: the cleansed string
    """
    inner_pattern = "a-zA-Z0-9"

    if preserve_newlines:
        inner_pattern += "\n"

    if whitespace_amount is None:
        return sub(rf"[^{inner_pattern}]", "", value)

    if whitespace_amount == 0:
        return sub(rf"[^{inner_pattern}\s]", "", value)

    return sub(r"\s+", " " * whitespace_amount, sub(rf"[^{inner_pattern}\s]", "", value))

`subclasses`

Get all subclasses of a class recursively.

`subclasses_recursive(typ, /, *, class_filter=None, track_visited=False, __visited=None)`

Get all subclasses of a class recursively.

Parameters:

Name	Type	Description	Default
`typ`	`type`	the class to get the subclasses of	required
`class_filter`	`None`	a function to filter the subclasses	`None`
`track_visited`	`bool`	whether to track visited subclasses. Useful for avoiding infinite loops. Defaults to False.	`False`

Yields:

Name	Type	Description
`type`	`type[Any]`	a subclass of the given class

Source code in wg_utilities/functions/subclasses.py

def subclasses_recursive(
    typ: type[Any],
    /,
    *,
    class_filter: None | Callable[[type[Any]], bool] = None,
    track_visited: bool = False,
    __visited: set[type[Any]] | None = None,
) -> Generator[type[Any], None, None]:
    """Get all subclasses of a class recursively.

    Args:
        typ (type): the class to get the subclasses of
        class_filter (None, optional): a function to filter the subclasses
        track_visited (bool, optional): whether to track visited subclasses. Useful for avoiding
            infinite loops. Defaults to False.

    Yields:
        type: a subclass of the given class
    """

    for subclass in typ.__subclasses__():
        if track_visited:
            __visited = __visited or set()
            if subclass in __visited:
                continue

            __visited.add(subclass)

        if class_filter is None or class_filter(subclass):
            yield subclass

        yield from subclasses_recursive(
            subclass,
            class_filter=class_filter,
            track_visited=track_visited,
            __visited=__visited,  # type: ignore[call-arg]
        )

`xml`

Helper functions specifically for parsing/manipulating XML.

`get_nsmap(*, root=None, xml_doc=None, warn_on_defaults=False)`

Get the namespace map for an XML document.

Parameters:

Name	Type	Description	Default
`root`	`Element`	an lxml Element from an XML document	`None`
`xml_doc`	`str`	a raw XML document	`None`
`warn_on_defaults`	`bool`	log a warning when an empty prefix is found and converted to a default value	`False`

Returns:

Name	Type	Description
`dict`	`dict[str, str]`	a namespace mapping for the provided XML

Raises:

Type	Description
`ValueError`	if neither argument is provided

Source code in wg_utilities/functions/xml.py

def get_nsmap(
    *,
    root: etree._Element | None = None,
    xml_doc: str | None = None,
    warn_on_defaults: bool = False,
) -> dict[str, str]:
    """Get the namespace map for an XML document.

    Args:
        root (Element): an lxml Element from an XML document
        xml_doc (str): a raw XML document
        warn_on_defaults (bool): log a warning when an empty prefix is found and converted to a default value

    Returns:
        dict: a namespace mapping for the provided XML

    Raises:
        ValueError: if neither argument is provided
    """
    if root is None:
        if xml_doc is None:
            raise ValueError("One of `root` or `xml_doc` should be non-null")

        root = etree.fromstring(xml_doc.encode())  # noqa: S320

    nsmap = {}
    default_count = 0
    processed_urls = set()

    prefix: str
    url: str
    for prefix, url in root.xpath(  # type: ignore[misc,assignment,union-attr]
        "//namespace::*",
    ):
        if url in processed_urls:
            continue

        if prefix:
            nsmap[prefix] = url
        else:
            default_prefix = f"default_{default_count}"
            default_count += 1
            if warn_on_defaults:
                LOGGER.warning(
                    "Adding namespace url `%s` with prefix key `%s`",
                    url,
                    default_prefix,
                )

            nsmap[default_prefix] = url

        processed_urls.add(url)

    return nsmap

wg_utilities.functions

DatetimeFixedUnit

backoff(exceptions=Exception, /, logger=None, *, max_tries=10, max_delay=60, timeout=3600)

chunk_list(lst, chunk_len)

cleanse_string(value, *, whitespace_amount=None, preserve_newlines=False)

flatten_dict(nested_dict, *, join_char='.', exclude_keys=None, exact_keys=False, _parent_key='')

force_mkdir(target_path, *, path_is_file=False)

process_json_object(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)

process_list(lst, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)

run_cmd(cmd, *, exit_on_error=True, shell=False)

set_nested_value(*, json_obj, keys, target_value, final_key=None)

subclasses_recursive(typ, /, *, class_filter=None, track_visited=False, __visited=None)

traverse_dict(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)

try_float(v, default=0.0)

user_data_dir(*, project_name='WgUtilities', file_name=None, _platform=platform)

utcnow(unit=None)

datetime_helpers

DatetimeFixedUnit

utcnow(unit=None)

decorators

backoff(exceptions=Exception, /, logger=None, *, max_tries=10, max_delay=60, timeout=3600)

file_management

force_mkdir(target_path, *, path_is_file=False)

user_data_dir(*, project_name='WgUtilities', file_name=None, _platform=platform)

json

InvalidJsonObjectError

__init__(obj)

TargetProcessorFunc

__call__(value, *, dict_key=None, list_index=None)

process_json_object(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)

process_list(lst, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)

set_nested_value(*, json_obj, keys, target_value, final_key=None)

traverse_dict(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)

processes

run_cmd(cmd, *, exit_on_error=True, shell=False)

string_manipulation

cleanse_string(value, *, whitespace_amount=None, preserve_newlines=False)

subclasses

subclasses_recursive(typ, /, *, class_filter=None, track_visited=False, __visited=None)

xml

get_nsmap(*, root=None, xml_doc=None, warn_on_defaults=False)

`wg_utilities.functions`

`DatetimeFixedUnit`

`backoff(exceptions=Exception, /, logger=None, *, max_tries=10, max_delay=60, timeout=3600)`

`chunk_list(lst, chunk_len)`

`cleanse_string(value, *, whitespace_amount=None, preserve_newlines=False)`

`flatten_dict(nested_dict, *, join_char='.', exclude_keys=None, exact_keys=False, _parent_key='')`

`force_mkdir(target_path, *, path_is_file=False)`

`process_json_object(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)`

`process_list(lst, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)`

`run_cmd(cmd, *, exit_on_error=True, shell=False)`

`set_nested_value(*, json_obj, keys, target_value, final_key=None)`

`subclasses_recursive(typ, /, *, class_filter=None, track_visited=False, __visited=None)`

`traverse_dict(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)`

`try_float(v, default=0.0)`

`user_data_dir(*, project_name='WgUtilities', file_name=None, _platform=platform)`

`utcnow(unit=None)`

`datetime_helpers`

`DatetimeFixedUnit`

`utcnow(unit=None)`

`decorators`

`backoff(exceptions=Exception, /, logger=None, *, max_tries=10, max_delay=60, timeout=3600)`

`file_management`

`force_mkdir(target_path, *, path_is_file=False)`

`user_data_dir(*, project_name='WgUtilities', file_name=None, _platform=platform)`

`json`

`InvalidJsonObjectError`

`init(obj)`

`TargetProcessorFunc`

`call(value, *, dict_key=None, list_index=None)`

`process_json_object(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)`

`process_list(lst, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)`

`set_nested_value(*, json_obj, keys, target_value, final_key=None)`

`traverse_dict(obj, /, *, target_type, target_processor_func, pass_on_fail=True, log_op_func_failures=False, single_keys_to_remove=None)`

`processes`

`run_cmd(cmd, *, exit_on_error=True, shell=False)`

`string_manipulation`

`cleanse_string(value, *, whitespace_amount=None, preserve_newlines=False)`

`subclasses`

`subclasses_recursive(typ, /, *, class_filter=None, track_visited=False, __visited=None)`

`xml`

`get_nsmap(*, root=None, xml_doc=None, warn_on_defaults=False)`