# /utils/paths/path_nodes.py
"""The scholar_flux.utils.paths.path_nodes module implements the basic PathNode data class necessary to represent a
terminal path-value combination within a nested JSON structure.
This data structure forms the basis of path processing that scholar_flux uses to process, filter, and flatten JSON data
sets.
"""
from __future__ import annotations
from typing import Union
import logging
import copy
from typing import Any, ClassVar
from dataclasses import dataclass
from typing_extensions import Self
from scholar_flux.utils.paths.processing_path import ProcessingPath
from scholar_flux.exceptions.path_exceptions import (
InvalidProcessingPathError,
InvalidPathNodeError,
)
# Configure logging
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class PathNode:
"""A dataclass acts as a wrapper for path-terminal value pairs in nested JSON structures.
The PathNode consists of a value of any type and a ProcessingPath instance that indicates where a terminal-value was
found. This class simplifies the process of manipulating and flattening data structures originating from JSON data
Attributes:
path (ProcessingPath): The terminal path where the value was located
value (Any) The value to associate with the current path:
"""
path: ProcessingPath
value: Any
DEFAULT_DELIMITER: ClassVar[str] = ProcessingPath.DEFAULT_DELIMITER
def __post_init__(self):
"""This step validates that path passed to the PathNode after initialization.
Raises:
InvalidPathNodeError: If the value passed as a Path is not a valid ProcessingPath
"""
if not isinstance(self.path, ProcessingPath):
raise InvalidPathNodeError(
f"Error creating PathNode: expected a ProcessingPath for path, received {type(self.path)}"
)
[docs]
@classmethod
def to_path_node(
cls, path: Union[ProcessingPath, str, int, list[str], list[int], list[str | int]], value: Any, **path_kwargs
) -> Self:
"""Helper method for creating a path node from the components used to create paths in addition to value to
assign the path node.
Args:
path (Union[ProcessingPath, str, list[str]]) : The path to be assigned to the node. If this is not a path
already, then a path will be created from what is provided
value (Any): The value to associate with the new node
**path_kwargs: Additional keyword arguments to be used in the creation of a path.
This is passed to ProcessingPath.to_processing_path when creating a path
Returns:
PathNode: The newly constructed path
Raises:
InvalidPathNodeError: If the values provided cannot be used to create a new node
"""
try:
path = ProcessingPath.to_processing_path(path, **path_kwargs)
except (ValueError, InvalidProcessingPathError) as e:
raise InvalidPathNodeError("Could not construct a path from the inputs") from e
return cls(path, value)
[docs]
def update(self, **attributes: Union[ProcessingPath, Any]) -> PathNode:
"""
Update the parameters of a PathNode by creating a new PathNode instance.
Note that the original PathNode dataclass is frozen. This method uses
the copied dict originating from the dataclass to initialize a new PathNode.
Args:
**attributes (dict): keyword arguments indicating the attributes of the
PathNode to update. If a specific key is not provided, then it will not update
Each key should be a valid attribute name of PathNode,
and each value should be the corresponding updated value.
Returns:
A new path with the updated attributes
"""
parameter_dict = self.__dict__.copy() | attributes
return PathNode(**parameter_dict)
@property
def path_keys(self) -> ProcessingPath:
"""Utility function for retaining keys from a path, ignoring indexes generated by lists Retrieves the original
path minus all keys that originate from list indexes.
Returns:
ProcessingPath: A ProcessingPath instance associated with all dictionary keys
"""
return self.path.remove_indices()
@property
def path_group(self) -> ProcessingPath:
"""Attempt to retrieve the path omitting the last element if it is numeric. The remaining integers are replaced
with a placeholder (i). This is later useful for when we need to group paths into a list or sets in order to
consolidate record fields.
Returns:
ProcessingPath: A ProcessingPath instance with the last numeric component removed and indices replaced.
"""
return self.path.group()
@property
def record_index(self) -> int:
"""Extract the first element of the node's path to determine the record number originating from a list of
dictionaries, assuming the path originates from a paginated structure.
Returns:
int: Value denoting the record that the path originates from
Raises:
PathIndexingError: if the first element of the path is not a numerical index
"""
return self.path.record_index
[docs]
@classmethod
def is_valid_node(cls, node: PathNode) -> bool:
"""Validates whether the current node is or is not a PathNode isinstance. If the current input is not a
PathNode, then this class will raise an InvalidPathNodeError.
Raises:
InvalidPathNodeError: If the current node is not a PathNode or if its path is not a valid ProcessingPath
"""
if not isinstance(node, PathNode):
raise InvalidPathNodeError(
f"The current object is not a PathNode: expected 'PathNode', received {type(node)}"
)
if not isinstance(node.path, ProcessingPath):
raise InvalidPathNodeError(
f"The current path of the validated node is not a ProcessingPath: expected ProcessingPath, received {type(node.path)}"
)
return True
def __hash__(self) -> int:
"""For hashing nodes based on their path hash. This creates a unique identifier for the dictionary hash assuming
paths are not duplicated.
Returns:
int: hash of the current path node
"""
return self.path.__hash__()
def __lt__(self, other: PathNode) -> bool:
"""Check if the node of the current path is a subset of the given path.
Args:
path (ProcessingPath): The path to compare against.
Returns:
bool: True if self is a subset of path and has a different depth, otherwise False.
"""
return self.path < other.path
def __le__(self, other: PathNode) -> bool:
"""Check if the current path is equal to or a subset of the given path.
Args:
path (ProcessingPath): The path to compare against.
Returns:
bool: True if self is equal to or a subset of path, otherwise False.
"""
return self.path < other.path or self == other
def __gt__(self, other: PathNode) -> bool:
"""Check if the current path strictly contains the given path.
Args:
path (ProcessingPath): The path to compare against.
Returns:
bool: True if self strictly contains path, otherwise False.
"""
return self.path > other.path
def __ge__(self, other: PathNode) -> bool:
"""Check if the current path is equal to or strictly contains the given path.
Args:
path (PathNode): The path to compare against.
Returns:
bool: True if self is equal to or strictly contains path, otherwise False.
"""
return self.path > other.path or self == other
def __eq__(self, other: object) -> bool:
"""Check equality with another PathNode, string, or list of strings.
Args:
other (object): The object to compare with.
Returns:
bool: True if the objects are equal, False otherwise.
"""
return isinstance(other, PathNode) and self.path == other.path and self.value == other.value
[docs]
def copy(self) -> PathNode:
"""Helper method for copying and returning an identical path node."""
return self.__copy__()
def __copy__(self) -> PathNode:
"""Helper method for copying the current node."""
return PathNode(path=self.path, value=copy.copy(self.value))
def __deepcopy__(self, memo) -> PathNode:
"""Helper method for deeply copying the current node."""
return PathNode(path=self.path, value=copy.deepcopy(self.value, memo))
__all__ = ["PathNode"]