Source code for scholar_flux.utils.paths.path_nodes

#  /utils/paths/path_nodes.py
"""The scholar_flux.utils.paths.path_nodes module implements the basic PathNode data class necessary to represent a
terminal path-value combination within a nested JSON structure.

This data structure forms the basis of path processing that scholar_flux uses to process, filter, and flatten JSON data
sets.

"""
from __future__ import annotations
from typing import Union
import logging
import copy
from typing import Any, ClassVar
from dataclasses import dataclass
from typing_extensions import Self
from scholar_flux.utils.paths.processing_path import ProcessingPath
from scholar_flux.exceptions.path_exceptions import (
    InvalidProcessingPathError,
    InvalidPathNodeError,
)

# Configure logging
logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class PathNode:
    """A dataclass acts as a wrapper for path-terminal value pairs in nested JSON structures.

    The PathNode consists of a value of any type and a ProcessingPath instance that indicates where a terminal-value was
    found. This class simplifies the process of manipulating and flattening data structures originating from JSON data

    Attributes:
        path (ProcessingPath): The terminal path where the value was located
        value (Any) The value to associate with the current path:

    """

    path: ProcessingPath
    value: Any
    DEFAULT_DELIMITER: ClassVar[str] = ProcessingPath.DEFAULT_DELIMITER

    def __post_init__(self):
        """This step validates that path passed to the PathNode after initialization.

        Raises:
            InvalidPathNodeError: If the value passed as a Path is not a valid ProcessingPath

        """
        if not isinstance(self.path, ProcessingPath):
            raise InvalidPathNodeError(
                f"Error creating PathNode: expected a ProcessingPath for path, received {type(self.path)}"
            )


[docs]
    @classmethod
    def to_path_node(
        cls, path: Union[ProcessingPath, str, int, list[str], list[int], list[str | int]], value: Any, **path_kwargs
    ) -> Self:
        """Helper method for creating a path node from the components used to create paths in addition to value to
        assign the path node.

        Args:
            path (Union[ProcessingPath, str, list[str]]) : The path to be assigned to the node. If this is not a path
                                                           already, then a path will be created from what is provided
            value (Any): The value to associate with the new node
            **path_kwargs: Additional keyword arguments to be used in the creation of a path.
                           This is passed to ProcessingPath.to_processing_path when creating a path
        Returns:
            PathNode: The newly constructed path
        Raises:
            InvalidPathNodeError: If the values provided cannot be used to create a new node

        """

        try:
            path = ProcessingPath.to_processing_path(path, **path_kwargs)
        except (ValueError, InvalidProcessingPathError) as e:
            raise InvalidPathNodeError("Could not construct a path from the inputs") from e
        return cls(path, value)



[docs]
    def update(self, **attributes: Union[ProcessingPath, Any]) -> PathNode:
        """
        Update the parameters of a PathNode by creating a new PathNode instance.
        Note that the original PathNode dataclass is frozen. This method uses
        the copied dict originating from the dataclass to initialize a new PathNode.
        Args:
            **attributes (dict): keyword arguments indicating the attributes of the
            PathNode to update. If a specific key is not provided, then it will not update
            Each key should be a valid attribute name of PathNode,
            and each value should be the corresponding updated value.

        Returns:
            A new path with the updated attributes
        """
        parameter_dict = self.__dict__.copy() | attributes
        return PathNode(**parameter_dict)


    @property
    def path_keys(self) -> ProcessingPath:
        """Utility function for retaining keys from a path, ignoring indexes generated by lists Retrieves the original
        path minus all keys that originate from list indexes.

        Returns:
            ProcessingPath: A ProcessingPath instance associated with all dictionary keys

        """

        return self.path.remove_indices()

    @property
    def path_group(self) -> ProcessingPath:
        """Attempt to retrieve the path omitting the last element if it is numeric. The remaining integers are replaced
        with a placeholder (i). This is later useful for when we need to group paths into a list or sets in order to
        consolidate record fields.

        Returns:
            ProcessingPath: A ProcessingPath instance with the last numeric component removed and indices replaced.

        """
        return self.path.group()

    @property
    def record_index(self) -> int:
        """Extract the first element of the node's path to determine the record number originating from a list of
        dictionaries, assuming the path originates from a paginated structure.

        Returns:
            int: Value denoting the record that the path originates from

        Raises:
            PathIndexingError: if the first element of the path is not a numerical index

        """
        return self.path.record_index


[docs]
    @classmethod
    def is_valid_node(cls, node: PathNode) -> bool:
        """Validates whether the current node is or is not a PathNode isinstance. If the current input is not a
        PathNode, then this class will raise an InvalidPathNodeError.

        Raises:
           InvalidPathNodeError: If the current node is not a PathNode or if its path is not a valid ProcessingPath

        """
        if not isinstance(node, PathNode):
            raise InvalidPathNodeError(
                f"The current object is not a PathNode: expected 'PathNode', received {type(node)}"
            )

        if not isinstance(node.path, ProcessingPath):
            raise InvalidPathNodeError(
                f"The current path of the validated node is not a ProcessingPath: expected ProcessingPath, received {type(node.path)}"
            )

        return True


    def __hash__(self) -> int:
        """For hashing nodes based on their path hash. This creates a unique identifier for the dictionary hash assuming
        paths are not duplicated.

        Returns:
            int: hash of the current path node

        """
        return self.path.__hash__()

    def __lt__(self, other: PathNode) -> bool:
        """Check if the node of the current path is a subset of the given path.

        Args:
            path (ProcessingPath): The path to compare against.

        Returns:
            bool: True if self is a subset of path and has a different depth, otherwise False.

        """
        return self.path < other.path

    def __le__(self, other: PathNode) -> bool:
        """Check if the current path is equal to or a subset of the given path.

        Args:
            path (ProcessingPath): The path to compare against.

        Returns:
            bool: True if self is equal to or a subset of path, otherwise False.

        """
        return self.path < other.path or self == other

    def __gt__(self, other: PathNode) -> bool:
        """Check if the current path strictly contains the given path.

        Args:
            path (ProcessingPath): The path to compare against.

        Returns:
            bool: True if self strictly contains path, otherwise False.

        """
        return self.path > other.path

    def __ge__(self, other: PathNode) -> bool:
        """Check if the current path is equal to or strictly contains the given path.

        Args:
            path (PathNode): The path to compare against.

        Returns:
            bool: True if self is equal to or strictly contains path, otherwise False.

        """
        return self.path > other.path or self == other

    def __eq__(self, other: object) -> bool:
        """Check equality with another PathNode, string, or list of strings.

        Args:
            other (object): The object to compare with.

        Returns:
            bool: True if the objects are equal, False otherwise.

        """
        return isinstance(other, PathNode) and self.path == other.path and self.value == other.value


[docs]
    def copy(self) -> PathNode:
        """Helper method for copying and returning an identical path node."""
        return self.__copy__()


    def __copy__(self) -> PathNode:
        """Helper method for copying the current node."""
        return PathNode(path=self.path, value=copy.copy(self.value))

    def __deepcopy__(self, memo) -> PathNode:
        """Helper method for deeply copying the current node."""
        return PathNode(path=self.path, value=copy.deepcopy(self.value, memo))


__all__ = ["PathNode"]