Source code for scholar_flux.utils.paths.path_nodes

#  /utils/paths/path_nodes.py
"""The scholar_flux.utils.paths.path_nodes module implements the basic PathNode data class necessary to represent a
terminal path-value combination within a nested JSON structure.

This data structure forms the basis of path processing that scholar_flux uses to process, filter, and flatten JSON data
sets.

"""
from __future__ import annotations
from typing import Union
import logging
import copy
from typing import Any, ClassVar
from dataclasses import dataclass
from typing_extensions import Self
from scholar_flux.utils.paths.processing_path import ProcessingPath
from scholar_flux.exceptions.path_exceptions import (
    InvalidProcessingPathError,
    InvalidPathNodeError,
)

# Configure logging
logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class PathNode:
    """A dataclass acts as a wrapper for path-terminal value pairs in nested JSON structures.

    The PathNode consists of a value of any type and a ProcessingPath instance that indicates where a terminal-value was
    found. This class simplifies the process of manipulating and flattening data structures originating from JSON data

    Attributes:
        path (ProcessingPath): The terminal path where the value was located
        value (Any) The value to associate with the current path:

    """

    path: ProcessingPath
    value: Any
    DEFAULT_DELIMITER: ClassVar[str] = ProcessingPath.DEFAULT_DELIMITER

    def __post_init__(self):
        """This step validates that path passed to the PathNode after initialization.

        Raises:
            InvalidPathNodeError: If the value passed as a Path is not a valid ProcessingPath

        """
        if not isinstance(self.path, ProcessingPath):
            raise InvalidPathNodeError(
                f"Error creating PathNode: expected a ProcessingPath for path, received {type(self.path)}"
            )

[docs] @classmethod def to_path_node( cls, path: Union[ProcessingPath, str, int, list[str], list[int], list[str | int]], value: Any, **path_kwargs ) -> Self: """Helper method for creating a path node from the components used to create paths in addition to value to assign the path node. Args: path (Union[ProcessingPath, str, list[str]]) : The path to be assigned to the node. If this is not a path already, then a path will be created from what is provided value (Any): The value to associate with the new node **path_kwargs: Additional keyword arguments to be used in the creation of a path. This is passed to ProcessingPath.to_processing_path when creating a path Returns: PathNode: The newly constructed path Raises: InvalidPathNodeError: If the values provided cannot be used to create a new node """ try: path = ProcessingPath.to_processing_path(path, **path_kwargs) except (ValueError, InvalidProcessingPathError) as e: raise InvalidPathNodeError("Could not construct a path from the inputs") from e return cls(path, value)
[docs] def update(self, **attributes: Union[ProcessingPath, Any]) -> PathNode: """ Update the parameters of a PathNode by creating a new PathNode instance. Note that the original PathNode dataclass is frozen. This method uses the copied dict originating from the dataclass to initialize a new PathNode. Args: **attributes (dict): keyword arguments indicating the attributes of the PathNode to update. If a specific key is not provided, then it will not update Each key should be a valid attribute name of PathNode, and each value should be the corresponding updated value. Returns: A new path with the updated attributes """ parameter_dict = self.__dict__.copy() | attributes return PathNode(**parameter_dict)
@property def path_keys(self) -> ProcessingPath: """Utility function for retaining keys from a path, ignoring indexes generated by lists Retrieves the original path minus all keys that originate from list indexes. Returns: ProcessingPath: A ProcessingPath instance associated with all dictionary keys """ return self.path.remove_indices() @property def path_group(self) -> ProcessingPath: """Attempt to retrieve the path omitting the last element if it is numeric. The remaining integers are replaced with a placeholder (i). This is later useful for when we need to group paths into a list or sets in order to consolidate record fields. Returns: ProcessingPath: A ProcessingPath instance with the last numeric component removed and indices replaced. """ return self.path.group() @property def record_index(self) -> int: """Extract the first element of the node's path to determine the record number originating from a list of dictionaries, assuming the path originates from a paginated structure. Returns: int: Value denoting the record that the path originates from Raises: PathIndexingError: if the first element of the path is not a numerical index """ return self.path.record_index
[docs] @classmethod def is_valid_node(cls, node: PathNode) -> bool: """Validates whether the current node is or is not a PathNode isinstance. If the current input is not a PathNode, then this class will raise an InvalidPathNodeError. Raises: InvalidPathNodeError: If the current node is not a PathNode or if its path is not a valid ProcessingPath """ if not isinstance(node, PathNode): raise InvalidPathNodeError( f"The current object is not a PathNode: expected 'PathNode', received {type(node)}" ) if not isinstance(node.path, ProcessingPath): raise InvalidPathNodeError( f"The current path of the validated node is not a ProcessingPath: expected ProcessingPath, received {type(node.path)}" ) return True
def __hash__(self) -> int: """For hashing nodes based on their path hash. This creates a unique identifier for the dictionary hash assuming paths are not duplicated. Returns: int: hash of the current path node """ return self.path.__hash__() def __lt__(self, other: PathNode) -> bool: """Check if the node of the current path is a subset of the given path. Args: path (ProcessingPath): The path to compare against. Returns: bool: True if self is a subset of path and has a different depth, otherwise False. """ return self.path < other.path def __le__(self, other: PathNode) -> bool: """Check if the current path is equal to or a subset of the given path. Args: path (ProcessingPath): The path to compare against. Returns: bool: True if self is equal to or a subset of path, otherwise False. """ return self.path < other.path or self == other def __gt__(self, other: PathNode) -> bool: """Check if the current path strictly contains the given path. Args: path (ProcessingPath): The path to compare against. Returns: bool: True if self strictly contains path, otherwise False. """ return self.path > other.path def __ge__(self, other: PathNode) -> bool: """Check if the current path is equal to or strictly contains the given path. Args: path (PathNode): The path to compare against. Returns: bool: True if self is equal to or strictly contains path, otherwise False. """ return self.path > other.path or self == other def __eq__(self, other: object) -> bool: """Check equality with another PathNode, string, or list of strings. Args: other (object): The object to compare with. Returns: bool: True if the objects are equal, False otherwise. """ return isinstance(other, PathNode) and self.path == other.path and self.value == other.value
[docs] def copy(self) -> PathNode: """Helper method for copying and returning an identical path node.""" return self.__copy__()
def __copy__(self) -> PathNode: """Helper method for copying the current node.""" return PathNode(path=self.path, value=copy.copy(self.value)) def __deepcopy__(self, memo) -> PathNode: """Helper method for deeply copying the current node.""" return PathNode(path=self.path, value=copy.deepcopy(self.value, memo)) __all__ = ["PathNode"]