Source code for scholar_flux.utils.paths.processing_path

# /utils/paths/processing_path.py
"""Implements the ProcessingPath that is the most fundamental component in the scholar_flux JSON path processing trie
implementation.

The ProcessingPath is used to store a path processing representation that allows for extensive flexibility in the
creation, filtering, and discovery of nested keys in JSON structures.

"""
from __future__ import annotations
from typing import Union
import re
import logging
from copy import deepcopy
from typing import Optional, List, Tuple, Pattern, ClassVar
from dataclasses import dataclass, field
from scholar_flux.exceptions.path_exceptions import (
    InvalidProcessingPathError,
    InvalidPathDelimiterError,
    InvalidComponentTypeError,
    PathIndexingError,
)

# Configure logging
logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class ProcessingPath:
    """A utility class to handle path operations for processing and flattening dictionaries.

    Args:
        components (Union[str, int, Tuple[str, ...], List[str], List[int], List[str | int]]):
            The initial path, either as a string or a list of strings. Any integers will be auto-converted
            to strings in the process of formatting the components of the path
        component_types (Optional[Union[Tuple[str, ...], List[str]]]):
           Optional metadata fields that can be used to annotate specific components of a path
        delimiter (str):
            The delimiter used to separate components in the path.

    Raises:
        InvalidProcessingPathError: If the path is neither a string nor a list of strings.
        InvalidPathDelimiterError: If the delimiter is invalid.

    Attributes:
        components (Tuple[str, ...]): A tuple of path components.
        delimiter (str): The delimiter used to separate components in the path.

    Examples:

        >>> from scholar_flux.utils import ProcessingPath
        >>> abc_path = ProcessingPath(['a', 'b', 'c'], delimiter ='//')
        >>> updated_path = abc_path / 'd'
        >>> assert updated_path.depth > 3 and updated_path[-1] == 'd'
        # OUTPUT: True
        >>> assert str(updated_path) == 'a//b//c//d'
        >>> assert updated_path.has_ancestor(abc_path)

    """

    components: Tuple[str, ...] = field(init=False)
    component_types: Optional[Tuple[str, ...]] = field(init=False, default=None)
    delimiter: str = field(init=False, default="")
    DEFAULT_DELIMITER: ClassVar[str] = "."  # Class-level default delimiter


[docs]
    def __init__(
        self,
        components: Union[str, int, Tuple[str, ...], List[str], List[int], List[str | int]] = (),
        component_types: Optional[Union[Tuple[str, ...], List[str]]] = None,
        delimiter: Optional[str] = None,
    ):
        """Initializes the ProcessingPath. The inputs are first validated to ensure that the path components and
        delimiters are valid.

        Args:
            components: (Union[str, int, Tuple[str, ...], List[str], List[int], List[str | int]]):
                 The current path keys describing the path where each key represents a nested key in a JSON structure
            component_types: (Optional[Union[Tuple[str, ...], List[str]]]):
                An iterable of component types (used to annotate the components)
            delimiter: (Optional[str]):
                The separator used to indicate separate nested keys in a JSON structure. Defaults to the class default
                if not directly specified.

        """
        # Use object.__setattr__ to bypass immutability restrictions
        object.__setattr__(
            self,
            "delimiter",
            self._validate_delimiter(delimiter if delimiter is not None else self.DEFAULT_DELIMITER),
        )
        object.__setattr__(self, "components", self._validate_and_split_path(components))
        object.__setattr__(self, "component_types", self._validate_component_types(component_types))


    @staticmethod
    def _validate_delimiter(delimiter: str) -> str:
        """Validate the provided delimiter to ensure it's suitable for use in a ProcessingPath.

        Args:
            delimiter (str): The delimiter to validate.

        Returns:
            str: The validated delimiter.

        Raises:
            InvalidPathDelimiterError: If the delimiter is not a valid string.

        """

        if not isinstance(delimiter, str) or not delimiter:
            raise InvalidPathDelimiterError("Delimiter must be a non-empty string.")

        if delimiter.isspace():
            raise InvalidPathDelimiterError("Delimiter must not be a whitespace character.")

        if not len(set(delimiter).intersection(set(r"\/:<>|.%"))) > 0:
            raise InvalidPathDelimiterError(
                rf"Delimiter must not contain special characters like \ / : % * ? \" |\n received delimiter={delimiter}"
            )
        return delimiter

    def _validate_and_split_path(
        self, path: Union[str, int, Tuple[str, ...], List[str], List[int], List[str | int]]
    ) -> Tuple[str, ...]:
        """Helper method used to validate and prepare a tuple of path components on the instantiation of the
        ProcessingPath. After validation, the path is prepared, split and formatted depending on its input type.

        Args:
            path (Union[str, int, Tuple[str, ...], List[str], List[int], List[str | int]]):
                The path to validate and split. Note that all integers are auto-converted to strings

        Returns:
            Tuple[str, ...]: The validated tuple of path components.

        Raises:
            InvalidProcessingPathError: If the path is not a string, tuple, or a list of strings.

        """
        # If the path is a string, split it using the delimiter
        if isinstance(path, str):
            path = path.split(self.delimiter)

        if isinstance(path, int):
            path = [str(path)]

        # Convert tuple to list for easier manipulation
        if isinstance(path, tuple):
            path = list(path)

        # If path is the root ('') or an empty list/tuple, return root node
        if path is None or path in ("", [""], [], ("",)):
            return ("",)

        # Ensure the path is a list of non-empty strings
        if not isinstance(path, list) or not all(isinstance(p, (str, int)) for p in path):
            raise InvalidProcessingPathError("Path must be a list or tuple of strings.")

        ## Add a root path indicator
        # if not path[0] == '':
        #    path = [''] + path[1:]

        # Check for empty components in the path after stripping whitespace
        if any(not p.strip() for p in path[1:] if isinstance(p, str)):
            raise InvalidProcessingPathError("Non-root path components must be non-empty strings.")

        # Return the validated path as a tuple
        return tuple(str(p) if isinstance(p, int) else p for p in path)

    def _validate_component_types(
        self, component_types: Optional[Union[str, Tuple[str, ...], List[str]]] = None
    ) -> Optional[Tuple[str, ...]]:
        """Helper method that validates the component types that serve as metadata for each component. Upon validation,
        the component types are split into a tuple of strings depending on their initial type.

        Args:
            path (Union[str, Tuple[str, ...], List[str]]): The path to validate and split.

        Returns:
            Tuple[str, ...]: The validated tuple of path components.

        Raises:
            InvalidProcessingPathError: If the path is not a string, tuple, or a list of strings.

        """

        if component_types is None:
            return None
        # If the path is a string, split it using the delimiter
        if isinstance(component_types, str):
            component_types = component_types.split(self.delimiter)

        # Convert tuple to list for easier manipulation
        if isinstance(component_types, tuple):
            component_types = list(component_types)

        # Ensure the component_types is a list of non-empty strings or types
        if not isinstance(component_types, list) or not all(isinstance(p, str) for p in component_types):
            raise InvalidProcessingPathError("Path must be a list or tuple of strings or types.")

        if not component_types:
            return None

        if any(not p.strip() for p in component_types[1:]):
            raise InvalidProcessingPathError("Path components must be non-empty strings.")

        # determine length difference between node components and
        component_type_length_diff = self.depth - len(component_types)

        if not component_type_length_diff == 0:

            raise InvalidComponentTypeError(
                "When specified, the length of component_type must match the "
                f"depth of the Path:\n Component Type Length = {len(component_types)}, "
                f"Length of Components received: {len(component_types)}. "
            )

        return tuple(component_types)


[docs]
    @staticmethod
    def infer_delimiter(
        path: Union[str, ProcessingPath],
        delimiters: list[str] = ["<>", "//", "/", ">", "<", "\\", "%", "."],
    ) -> Optional[str]:
        """Infer the delimiter used in the path string based on its string representation.

        Args:
            path (Union[str,ProcessingPath]): The path string to infer the delimiter from.
            delimiters (List[str]): A list of common delimiters to search for in the path.
            default_delimiter (str): The default delimiter to use if no delimiter is found.
        Returns:
            str: The inferred delimiter.

        """
        str_path = path.to_string() if isinstance(path, ProcessingPath) else path

        for delimiter in delimiters:
            if delimiter in str_path:
                return delimiter

        return None



[docs]
    def update_delimiter(self, new_delimiter: str) -> ProcessingPath:
        """Update the delimiter of the current ProcessingPath with the provided new delimiter.

        This method creates a new ProcessingPath instance with the same components but replaces
        the existing delimiter with the specified `new_delimiter`.

        Args:
            new_delimiter (str): The new delimiter to replace the current one.

        Returns:
            ProcessingPath: A new ProcessingPath instance with the updated delimiter.

        Raises:
            InvalidPathDelimiterError: If the provided `new_delimiter` is not valid.

        Example:
            >>> processing_path = ProcessingPath('a.b.c', delimiter='.')
            >>> updated_path = processing_path.update_delimiter('/')
            >>> print(updated_path)  # Output: ProcessingPath(a/b/c)

        """
        validated_delimiter = self._validate_delimiter(new_delimiter)
        return ProcessingPath(self.components, self.component_types, validated_delimiter)



[docs]
    @classmethod
    def to_processing_path(
        cls,
        path: Union[ProcessingPath, str, int, List[str], List[int], List[str | int]],
        component_types: Optional[list | tuple] = None,
        delimiter: Optional[str] = None,
        infer_delimiter: bool = False,
    ) -> ProcessingPath:
        """Convert an input to a ProcessingPath instance if it's not already.

        Args:
            path (Union[ProcessingPath, str, int, List[str], List[int], List[str | int]]): The input path to convert.
            component_types (list|tuple): The type of component associated with each path element
            delimiter (str): The delimiter to use if the input is a string.

        Returns:
            ProcessingPath: A ProcessingPath instance.

        Raises:
            InvalidProcessingPathError: If the input cannot be converted to a valid ProcessingPath.

        """
        if isinstance(path, cls):
            return path

        if delimiter is None:

            if not infer_delimiter:
                # logger.debug(f"Infer delimiter is set to False. Default delimiter {cls.DEFAULT_DELIMITER} will be used")
                delimiter = cls.DEFAULT_DELIMITER
            else:
                if isinstance(path, (list, int)):
                    raise InvalidProcessingPathError("Cannot infer delimiter for list of strings or integers")
                return cls.with_inferred_delimiter(path, component_types)

        if isinstance(path, (str, int, list)):
            return cls(path, component_types, delimiter)
        else:
            raise InvalidProcessingPathError(f"Cannot convert {type(path)} to {cls.__name__}.")



[docs]
    @classmethod
    def with_inferred_delimiter(
        cls,
        path: Union[ProcessingPath, str],
        component_types: Optional[List | Tuple] = None,
    ) -> ProcessingPath:
        """Converts an input to a ProcessingPath instance if it's not already a processing path.

        Args:
            path (Union[ProcessingPath, str, List[str]]): The input path to convert.
            delimiter (str): The delimiter to use if the input is a string.
            component_type (list|tuple): The type of component associated with each path element

        Returns:
            ProcessingPath: A ProcessingPath instance.

        Raises:
            InvalidProcessingPathError: If the input cannot be converted to a valid ProcessingPath.

        """
        if not isinstance(path, (ProcessingPath, str)):
            raise InvalidProcessingPathError(f"Cannot infer delimiter for {type(path)}")

        path = path.to_string() if isinstance(path, ProcessingPath) else path
        delimiter = cls.infer_delimiter(path) or cls.DEFAULT_DELIMITER
        return cls(path, component_types, delimiter)


    @property
    def is_root(self) -> bool:
        """Check if the path represents the root node.

        Returns:
            bool: True if the path is root, False otherwise.

        """
        return self.components == ("",)

    def __str__(self) -> str:
        """Convert the ProcessingPath to its string representation.

        Returns:
            str: The string representation of the ProcessingPath.

        """
        return self.delimiter.join(self.components)

    def __getitem__(self, index: Union[int, slice]) -> ProcessingPath:
        """Retrieve a subset of the ProcessingPath components using indexing or slicing.

        Args:
            index (Union[int, slice]): The index or slice to retrieve components.

        Returns:
            ProcessingPath: A new ProcessingPath object with the selected components.

        Raises:
            IndexError: If the index is out of range.

        """
        if isinstance(index, int):
            return ProcessingPath(
                self.components[index],
                ((self.component_types[index],) if self.component_types is not None else None),
                self.delimiter,
            )
        elif isinstance(index, slice):
            start, stop, step = index.indices(len(self.components))
            return ProcessingPath(
                self.components[start:stop:step],
                (self.component_types[start:stop:step] if self.component_types is not None else None),
                self.delimiter,
            )
        else:
            raise IndexError(f"Invalid index for ProcessingPath: received {index}")


[docs]
    def append(self, component: int | str, component_type: Optional[str] = None) -> ProcessingPath:
        """Append a component to the path and return a new ProcessingPath object.

        Args:
            component (str): The component to append.

        Returns:
            ProcessingPath: A new ProcessingPath object with the appended component.

        Raises:
            InvalidProcessingPathError: If the component is not a non-empty string.

        """
        if not isinstance(component, (int, str)) or component is None or component == "":
            raise InvalidProcessingPathError("Component must be a non-empty string.")
        if self.component_types and (not isinstance(component_type, str) or not component_type):
            raise InvalidProcessingPathError(
                "Component Type must be a non-empty string/type when a pre-existing component type is not None"
            )
        return ProcessingPath(
            self.components + (str(component),),
            (
                self.component_types + (component_type,)
                if self.component_types is not None and component_type is not None
                else None
            ),
            self.delimiter,
        )


    @property
    def depth(self) -> int:
        """Return the depth of the path.

        Returns:
            int: The number of components in the path.

        """
        return len(self.components) if self.components != ("",) else 0

    @property
    def record_index(self) -> int:
        """Extract the first element of the current path to determine the record number if the current path  refers back
        to a paginated structure.

        Returns:
            int: The first value, converted to an integer if possible

        Raises:
           PathIndexingError: if the first element of the path is not a numerical index

        """
        try:
            idx = self.components[0]
            return int(idx)
        except (IndexError, TypeError) as e:
            raise PathIndexingError(
                f"The first element of the current path, '{self}',  cannot be indexed as a record: {e}"
            )


[docs]
    def is_ancestor_of(self, path: Union[str, ProcessingPath]) -> bool:
        """Determine whether the current path (self) is equal to or a subset/descendant path of the specified path.

        Args:
            path (ProcessingPath): The potential superset of (self) ProcessingPath.

        Returns:
            bool: True if 'self' is a subset of 'path'. False Otherwise.

        """
        if self.is_root:
            return True  # Root path is a subset of any path

        if isinstance(path, str):
            path = ProcessingPath.to_processing_path(path, delimiter=self.delimiter)

        if self.depth >= path.depth:
            return False

        return self[: self.depth] == path[: self.depth]



[docs]
    def has_ancestor(self, path: Union[str, ProcessingPath]) -> bool:
        """Determine whether the provided path is equal to or a subset/descendant of the current path (self).

        Args:
            path (ProcessingPath): The potential subset/descendant of (self) ProcessingPath.

        Returns:
            bool: True if 'self' is a superset of 'path'. False Otherwise.

        """
        if self.is_root:
            return False  # Root path is a subset of any path

        if isinstance(path, str):
            path = ProcessingPath.to_processing_path(path, delimiter=self.delimiter)

        if self.depth <= path.depth:
            return False

        return self[: path.depth] == path[: path.depth]



[docs]
    def replace(self, old: str, new: str) -> ProcessingPath:
        """Replace occurrences of a component in the path.

        Args:
            old (str): The component to replace.
            new (str): The new component to replace the old one with.

        Returns:
            ProcessingPath: A new ProcessingPath object with the replaced components.

        Raises:
            InvalidProcessingPathError: If the replacement arguments are not strings.

        """
        if not isinstance(old, str) or not isinstance(new, str):
            raise InvalidProcessingPathError(
                f"Replacement arguments must be strings, received: old = {old}, new = {new}"
            )
        return ProcessingPath(
            tuple(new if comp == old else comp for comp in self.components),
            self.component_types,
            self.delimiter,
        )



[docs]
    def replace_path(
        self,
        old: Union[str, ProcessingPath],
        new: Union[str, ProcessingPath],
        component_types: Optional[List | Tuple] = None,
    ) -> ProcessingPath:
        """Replace an ancestor path or full path in the current ProcessingPath with a new path.

        Args:
            old (Union[str, ProcessingPath]): The path to replace.
            new (Union[str, ProcessingPath]): The new path to replace the old path ancestor or full path with.

        Returns:
            ProcessingPath: A new ProcessingPath object with the replaced components.

        Raises:
            InvalidProcessingPathError: If the replacement arguments are not strings or ProcessingPaths.

        """
        if not isinstance(old, (str, ProcessingPath)) or not isinstance(new, (str, ProcessingPath)):
            raise InvalidProcessingPathError(
                f"Replacement arguments must be strings or ProcessingPaths, received: old = {old}, new = {new}"
            )

        old = ProcessingPath.to_processing_path(old, None, self.delimiter)
        new = ProcessingPath.to_processing_path(
            new,
            component_types if self.component_types is not None else None,
            self.delimiter,
        )
        if self == old:
            return new

        if self.has_ancestor(old):
            return new / self[old.depth :]

        return self


    def __hash__(self) -> int:
        """Compute a hash value for the ProcessingPath based on its components.

        Returns:
            int: The hash value of the ProcessingPath.

        """
        # return hash((self.components, self.delimiter))
        return hash(str(self))

    def __contains__(self, value: object) -> bool:
        """Method for indicating whether a partial path is contained within the ProcessingPath.

        This method uses string operations to account for delimiters to determine whether there is a match

        """

        if not isinstance(value, (list, tuple, str, ProcessingPath)):
            return False

        if isinstance(value, (list, tuple)):
            path = ProcessingPath(value)
        else:
            path = ProcessingPath.with_inferred_delimiter(value)

        return path.update_delimiter("<<>>").to_string() in self.update_delimiter("<<>>").to_string()

    def __lt__(self, path: ProcessingPath) -> bool:
        """Check if the current path is a subset of the given path and has a different depth.

        Args:
            path (ProcessingPath): The path to compare against.

        Returns:
            bool: True if self is a subset of path and has a different depth, otherwise False.

        """
        path = ProcessingPath.to_processing_path(path, delimiter=self.delimiter)
        return self._to_alphanum() < path._to_alphanum()

    def __le__(self, path: ProcessingPath) -> bool:
        """Check if the current path is equal to or a subset of the given path.

        Args:
            path (ProcessingPath): The path to compare against.

        Returns:
            bool: True if self is equal to or a subset of path, otherwise False.

        """
        path = ProcessingPath.to_processing_path(path, delimiter=self.delimiter)
        return self._to_alphanum() <= path._to_alphanum()

    def __gt__(self, path: ProcessingPath) -> bool:
        """Check if the current path strictly contains the given path.

        Args:
            path (ProcessingPath): The path to compare against.

        Returns:
            bool: True if self strictly contains path, otherwise False.

        """
        path = ProcessingPath.to_processing_path(path, delimiter=self.delimiter)
        return self._to_alphanum() > path._to_alphanum()

    def __ge__(self, path: ProcessingPath) -> bool:
        """Check if the current path is equal to or strictly contains the given path.

        Args:
            path (ProcessingPath): The path to compare against.

        Returns:
            bool: True if self is equal to or strictly contains path, otherwise False.

        """
        path = ProcessingPath.to_processing_path(path, delimiter=self.delimiter)
        return self._to_alphanum() >= path._to_alphanum()

    def __eq__(self, other: object) -> bool:
        """Check equality with another ProcessingPath, string, or list of strings.

        Args:
            other (object): The object to compare with.

        Returns:
            bool: True if the objects are equal, False otherwise.

        """
        if isinstance(other, ProcessingPath):
            return self.components == other.components and self.delimiter == other.delimiter
        elif isinstance(other, str):
            return self.components == ProcessingPath(other, delimiter=self.delimiter).components
        elif isinstance(other, (tuple, list)):
            return self.components == tuple(other)
        return False

    def __ne__(self, other: object) -> bool:
        """Check inequality with another ProcessingPath, string, or list of strings.

        Args:
            other (object): The object to compare with.

        Returns:
            bool: True if the objects are not equal, False otherwise.

        """
        return not self.__eq__(other)

    def __bool__(self) -> bool:
        """Determine whether the current path is empty or non-empty.

        Returns:
            bool: Indicates whether the number of components is non-zero

        """
        return not self.is_root

    def __len__(self) -> int:
        """Helper method that retrieves the total number of components from the processing path.

        Excludes the root component in the calculation.

        Returns:
            int: The total number of components in the processing path

        """
        return self.depth

    def __truediv__(self, other: Union[ProcessingPath, str]) -> ProcessingPath:
        """Concatenate the ProcessingPath with another path using the '/' operator.

        Args:
            other (Union[ProcessingPath, str]): The other path to concatenate.

        Returns:
            ProcessingPath: A new ProcessingPath object with concatenated components.

        Raises:
            InvalidProcessingPathError: If the other path is neither a ProcessingPath nor a string.

        """
        new_component_types = None
        if isinstance(other, ProcessingPath):
            if other.is_root:
                return self.copy()
            new_components = self.components + other.components
            if self.component_types and other.component_types:
                new_component_types = self.component_types + other.component_types
        elif isinstance(other, str):
            if other == "":
                return self.copy()
            new_components = self.components + (other,)
        else:
            raise InvalidProcessingPathError(f"Can only concatenate with a ProcessingPath or string. Received: {other}")
        return ProcessingPath(new_components, new_component_types, delimiter=self.delimiter)


[docs]
    def sorted(self) -> ProcessingPath:
        """Returns a sorted ProcessingPath from the current_path. Elements are sorted by component in alphabetical
        order.

        Returns:
           ProcessingPath: A new ProcessingPath object with the same components/types in a reversed order

        """

        ordered_indices, _ = zip(
            *sorted(
                enumerate(self._to_alphanum().split(self.delimiter)[1:]),
                key=lambda comp: comp[1],
            )
        )
        ordered_components = [self.components[i] for i in ordered_indices]
        ordered_component_types = [self.component_types[i] for i in ordered_indices] if self.component_types else None

        return ProcessingPath(ordered_components, ordered_component_types, self.delimiter)


    def __reversed__(self) -> ProcessingPath:
        """Helper method to reverse the ProcessingPath using the `reversed` built-in method."""
        return self.reversed()


[docs]
    def reversed(self) -> ProcessingPath:
        """Returns a reversed ProcessingPath from the current_path.

        Returns:
           ProcessingPath: A new ProcessingPath object with the same components/types in a reversed order

        """
        return ProcessingPath(
            self.components[::-1],
            self.component_types[::-1] if self.component_types is not None else None,
            self.delimiter,
        )



[docs]
    def copy(self) -> ProcessingPath:
        """Create a copy of the ProcessingPath.

        Returns:
            ProcessingPath: A new ProcessingPath object with the same components and delimiter.

        """
        return deepcopy(self)



[docs]
    def to_string(self) -> str:
        """Get the string representation of the ProcessingPath.

        Returns:
            str: The string representation of the ProcessingPath.

        """
        return str(self)



[docs]
    def to_pattern(self, escape_all=False) -> Pattern:
        """Convert the ProcessingPath to a regular expression pattern.

        Returns:
            Pattern: The regular expression pattern representing the ProcessingPath.

        """
        return (
            re.compile(re.escape(self.delimiter).join(self.components))
            if not escape_all
            else re.compile(re.escape(self.delimiter.join(self.components)))
        )



[docs]
    def to_list(self) -> List[str]:
        """Convert the ProcessingPath to a list of components.

        Returns:
            List[str]: A list of components in the ProcessingPath.

        """
        return list(self.components)


    def _to_alphanum(self, pad: int = 8, depth_first: bool = True) -> str:
        """Generate an alphanumeric representation of the path, padding numeric components for sorting.

        Args:
            pad (int): The number of digits to pad numeric components (default is 8).
            depth_first (bool): Determines whether to sort, accounting for depth first prior to human sorted alphabetical path.

        Returns:
            str: The alphanumeric string representation of the ProcessingPath.

        Raises:
            InvalidProcessingPathError: If an error occurs during conversion.

        """
        try:
            padded_components = [
                re.sub(
                    r"(^[a-zA-Z_\.\-]*)(\d+)$",
                    lambda x: f"{x.group(1)!r}{x.group(2).zfill(pad)!r}",
                    comp,
                )
                for comp in self.components
            ]
            return self.delimiter.join([str(self.depth)] + padded_components if depth_first else padded_components)
        except Exception as e:
            raise InvalidProcessingPathError(f"Error generating alphanumeric representation for path '{self}': {e}")

    def _filter_indices_list(
        self, indices: List[int], include_matches: bool = False
    ) -> Tuple[Tuple[str, ...], Optional[Tuple[str, ...]]]:
        """Filter the current ProcessingPath using a list of indices and returns components and component types as a
        tuple."""
        filtered_components = tuple(
            [component for index, component in enumerate(self.components) if (index in indices) == include_matches]
        )

        filtered_component_types = None
        if self.component_types is not None:
            filtered_component_types = tuple(
                [
                    component_type
                    for index, component_type in enumerate(self.component_types)
                    if (index in indices) == include_matches
                ]
            )
        return filtered_components, filtered_component_types


[docs]
    def remove_indices(self, num: int = -1, reverse: bool = False) -> ProcessingPath:
        """Remove numeric components from the path.

        Args:
            num (int): The number of numeric components to remove. If negative, removes all (default is -1).
        Returns:
            ProcessingPath: A new ProcessingPath object without the specified numeric components.

        """

        filtered_indices = [index for index, component in enumerate(self.components) if component.isdigit()]

        if not filtered_indices:
            return self.copy()

        if reverse:
            filtered_indices.reverse()

        if num >= 0:
            filtered_indices = filtered_indices[:num]

        filtered_components, filtered_component_types = self._filter_indices_list(
            filtered_indices, include_matches=False
        )

        return ProcessingPath(tuple(filtered_components), filtered_component_types, self.delimiter)



[docs]
    def replace_indices(self, placeholder: str = "i") -> ProcessingPath:
        """Replace numeric components in the path with a placeholder.

        Args:
            placeholder (str): The placeholder to replace numeric components with (default is 'i').

        Returns:
            ProcessingPath: A new ProcessingPath object with numeric components replaced by the placeholder.

        """
        new_components = tuple(placeholder if component.isdigit() else component for component in self.components)
        return ProcessingPath(new_components, self.component_types, self.delimiter)



[docs]
    def get_parent(self, step: int = 1) -> Optional[ProcessingPath]:
        """Get the ancestor path of the current ProcessingPath by the specified number of steps.

        This method navigates up the path structure by the given number of steps. If the step count is greater than or
        equal to the depth of the current path, or if the path is already the root, it returns None. If the step count
        equals the current depth, it returns the root ProcessingPath.

        Args:
            step (int): The number of levels up to retrieve. 1 for parent, 2 for grandparent, etc. (default is 1).

        Returns:
            Optional[ProcessingPath]:
                - The ancestor ProcessingPath if the step is within the path depth.
                - The root ProcessingPath if step equals the depth of the current path.
                - None if the step is greater than the current depth or if the path is already the root.

        Raises:
            ValueError: If the step is less than 1.

        """
        if step < 1:
            raise ValueError("Step must be greater than or equal to 1.")
        if self.is_root or step > len(self.components):
            return None
        if step == len(self.components):
            return ProcessingPath([""])
        return ProcessingPath(
            self.components[:-step],
            self.component_types[:-step] if self.component_types else None,
            self.delimiter,
        )



[docs]
    def get_ancestors(self) -> List[Optional[ProcessingPath]]:
        """Get all parent paths of the current ProcessingPath by the specified number of steps.

        Returns:
            List[Optional[ProcessingPath]]:
                - Contains a list of all ancestor paths for the current path
                - If the depth of the path is 1, an empty list is returned

        """
        if self.is_root:
            return []
        return [self.get_parent(i) for i in range(1, self.depth)]



[docs]
    def remove(self, removal_list: List[str]) -> ProcessingPath:
        """Remove specified components from the path.

        Args:
            removal_list (List[str]): A list of components to remove.

        Returns:
            ProcessingPath: A new ProcessingPath object without the specified components.

        """
        filtered_indices = [index for index, comp in enumerate(self.components) if comp not in removal_list]

        filtered_components, filtered_component_types = self._filter_indices_list(
            filtered_indices, include_matches=True
        )

        return ProcessingPath(filtered_components, filtered_component_types, self.delimiter)



[docs]
    def remove_by_type(self, removal_list: List[str], raise_on_error: bool = False) -> ProcessingPath:
        """Remove specified component types from the path.

        Args:
            removal_list (List[str]): A list of component types to remove.

        Returns:
            ProcessingPath: A new ProcessingPath object without the specified components.

        """
        if self.component_types is None:
            if raise_on_error:
                raise InvalidComponentTypeError("The ProcessingPath has no component type is available to filter on")
            else:
                logger.warning("The ProcessingPath has no component type is available to filter on. Skipping filtering")
                return self.copy()

        filtered_indices = [index for index, comp in enumerate(self.component_types) if comp not in removal_list]

        filtered_components, filtered_component_types = self._filter_indices_list(
            filtered_indices, include_matches=True
        )

        return ProcessingPath(filtered_components, filtered_component_types, self.delimiter)



[docs]
    def info_content(self, non_informative: List[str]) -> int:
        """Calculate the number of informative components in the path.

        Args:
            non_informative (List[str]): A list of non-informative components.

        Returns:
            int: The number of informative components.

        """
        informative_path = self.remove(non_informative)
        return len(informative_path.components)



[docs]
    def get_name(self, max_components: int = 1) -> ProcessingPath:
        """Generate a path name based on the last 'max_components' components of the path.

        Args:
            max_components (int): The maximum number of components to include in the name (default is 1).

        Returns:
            ProcessingPath: A new ProcessingPath object representing the generated name.

        """
        return ProcessingPath(
            self.components[-max_components:],
            (self.component_types[-max_components:] if self.component_types is not None else None),
            self.delimiter,
        )



[docs]
    @classmethod
    def keep_descendants(cls, paths: List[ProcessingPath]) -> List[ProcessingPath]:
        """Filters a list of paths and keeps only descendants."""
        if not paths:
            return []

        prepared_paths = [path if isinstance(path, cls) else cls.with_inferred_delimiter(path) for path in paths]

        sorted_paths = sorted(prepared_paths, key=lambda path: path.depth)
        max_depth = sorted_paths[-1].depth

        result = [
            current_path
            for i, current_path in enumerate(sorted_paths)
            if current_path.depth == max_depth
            or not any(current_path.is_ancestor_of(p) for p in sorted_paths[i + 1 :] if current_path.depth < p.depth)
        ]

        return result



[docs]
    def group(self, last_only: bool = False) -> ProcessingPath:
        """Attempt to retrieve the path omitting the last element if it is numeric. The remaining integers are replaced
        with a placeholder (i). This is later useful for when we need to group paths into a list or sets in order to
        consolidate record fields.

        Args:
            last_only (bool): Determines whether or not to replace all list indices vs removing only the last

        Returns:
            ProcessingPath: A ProcessingPath instance with the last numeric component removed and indices replaced.

        """

        if not self:
            return self

        if any(
            [
                self.component_types and self.component_types[-1] == "list",
                self.components[-1].isnumeric(),
            ]
        ):

            return self.remove_indices(num=1, reverse=True).replace_indices()

        return self.replace_indices()


    def __repr__(self) -> str:
        """Get the string representation of the ProcessingPath object.

        Returns:
            str: The string representation of the ProcessingPath.

        """
        return f"ProcessingPath(components={self.delimiter.join(self.components)}, component_types={self.delimiter.join(self.component_types) if self.component_types else None})"


__all__ = ["ProcessingPath"]