Source code for scholar_flux.api.models.search_results

# /api/models/search_results.py
"""The scholar_flux.api.models.search_results module defines the SearchResult and SearchResultList implementations that
aid in the retrieval of multi-page and multi-coordinated searches.

These implementations allow increased organization for the API output of multiple searches
by defining the provider, page, query, and response result retrieved from multi-page searches
from the SearchCoordinator and multi-provider/page searches using the MultiSearchCoordinator.

Classes:
    SearchResult:
        Pydantic Base class that stores the search result as well as the query, provider name, and page.
    SearchResultList:
        Inherits from a basic list to constrain the output to a list of SearchResults while providing
        data preparation convenience functions for downstream frameworks.

"""
from __future__ import annotations
from scholar_flux.api.models import ProcessedResponse, ErrorResponse
from scholar_flux.utils.response_protocol import ResponseProtocol
from typing import Optional, Any, MutableSequence, Iterable
from requests import Response
from pydantic import BaseModel
import logging


logger = logging.getLogger(__name__)



[docs]
class SearchResult(BaseModel):
    """Core class used in order to store data in the retrieval and processing of API Searches when iterating and
    searching over a range of pages, queries, and providers at a time. This class uses pydantic to ensure that field
    validation is automatic for ensuring integrity and reliability of response processing. multi-page searches that link
    each response result to a particular query, page, and provider.

    Args:
        query (str): The query used to retrieve records and response metadata
        provider_name (str): The name of the provider where data is being retrieved
        page (int): The page number associated with the request for data
        response_result (Optional[ProcessedResponse | ErrorResponse]):
            The response result containing the specifics of the data retrieved from the response
            or the error messages recorded if the request is not successful.

    For convenience, the properties of the `response_result` are referenced as properties of
    the SearchResult, including: `response`, `parsed_response`, `processed_records`, etc.

    """

    query: str
    provider_name: str
    page: int
    response_result: Optional[ProcessedResponse | ErrorResponse] = None

    def __bool__(self) -> bool:
        """Makes the SearchResult truthy for ProcessedResponses and False for ErrorResponses/None."""
        return isinstance(self.response_result, ProcessedResponse)

    def __len__(self) -> int:
        """Returns the total number of successfully processed records from the ProcessedResponse.

        If the received Response was an ErrorResponse or None, then this value will be 0, indicating that no records
        were processed successfully.

        """
        return len(self.response_result) if isinstance(self.response_result, ProcessedResponse) else 0

    @property
    def response(self) -> Optional[Response | ResponseProtocol]:
        """Helper method directly referencing the original or reconstructed response or response-like object from the
        API Response if available.

        If the received response is not available (None in the response_result), then this value will also be absent
        (None).

        """
        return (
            self.response_result.response
            if self.response_result is not None and self.response_result.validate_response()
            else None
        )

    @property
    def parsed_response(self) -> Optional[Any]:
        """Contains the parsed response content from the APIResponse handling steps that extract the JSON, XML, or YAML
        content from a successfully received response.

        If an ErrorResponse was received instead, the value of this property is None.

        """
        return self.response_result.parsed_response if self.response_result else None

    @property
    def extracted_records(self) -> Optional[list[Any]]:
        """Contains the extracted records from the APIResponse handling steps that extract individual records from
        successfully received and parsed response.

        If an ErrorResponse was received instead, the value of this property is None.

        """
        return self.response_result.extracted_records if self.response_result else None

    @property
    def metadata(self) -> Optional[Any]:
        """Contains the metadata from the APIResponse handling steps that extract response metadata from successfully
        received and parsed responses.

        If an ErrorResponse was received instead, the value of this property is None.

        """
        return self.response_result.metadata if self.response_result else None

    @property
    def processed_records(self) -> Optional[list[dict[Any, Any]]]:
        """Contains the processed records from the APIResponse processing step after a successfully received response
        has been processed.

        If an error response was received instead, the value of this property is None.

        """
        return self.response_result.processed_records if self.response_result else None

    @property
    def data(self) -> Optional[list[dict[Any, Any]]]:
        """Alias referring back to the processed records from the ProcessedResponse or ErrorResponse.

        Contains the processed records from the APIResponse processing step after a successfully received response has
        been processed. If an error response was received instead, the value of this property is None.

        """
        return self.response_result.data if self.response_result else None

    @property
    def cache_key(self) -> Optional[str]:
        """Extracts the cache key from the API Response if available.

        This cache key is used when storing and retrieving data from response processing cache storage.

        """
        return self.response_result.cache_key if self.response_result else None

    @property
    def error(self) -> Optional[str]:
        """Extracts the error name associated with the result from the base class, indicating the name/category of the
        error in the event that the response_result is an ErrorResponse."""
        return self.response_result.error if isinstance(self.response_result, ErrorResponse) else None

    @property
    def message(self) -> Optional[str]:
        """Extracts the message associated with the result from the base class, indicating why an error occurred in the
        event that the response_result is an ErrorResponse."""
        return self.response_result.message if isinstance(self.response_result, ErrorResponse) else None

    @property
    def created_at(self) -> Optional[str]:
        """Extracts the time in which the ErrorResponse or ProcessedResponse was created, if available."""
        return (
            self.response_result.created_at
            if isinstance(self.response_result, (ErrorResponse, ProcessedResponse))
            else None
        )

    def __eq__(self, other: Any) -> bool:
        """Helper method for determining whether two search results are equal. The equality check operates by
        determining whether the other object is, first, a SearchResult instance. If it is, the components are dumped
        into a dictionary and checked for equality.

        Args:
            other (Any): An object to compare against the current search result

        Returns:
            bool: True if the class is the same and all components are equal, False otherwise.

        """
        if not isinstance(other, self.__class__):
            return False
        return self.model_dump() == other.model_dump()




[docs]
class SearchResultList(list[SearchResult]):
    """A helper class used to store the results of multiple SearchResult instances for enhanced type safety. This class
    inherits from a list and extends its functionality to tailor its functionality to APIResponses received from
    SearchCoordinators and MultiSearchCoordinators.

    Methods:
        - SearchResultList.append: Basic `list.append` implementation extended to accept only SearchResults
        - SearchResultList.extend: Basic `list.extend` implementation extended to accept only iterables of SearchResults
        - SearchResultList.filter: Removes NonResponses and ErrorResponses from the list of SearchResults
        - SearchResultList.filter: Removes NonResponses and ErrorResponses from the list of SearchResults
        - SearchResultList.join: Combines all records from ProcessedResponses into a list of dictionary-based records

    Note Attempts to add other classes to the SearchResultList other than SearchResults will raise a TypeError.

    """

    def __setitem__(self, index, item):
        """Overwrites the default __setitem__ method to ensure that only SearchResult objects can be added to the custom
        list.

        Args:
            index (int): The numeric index that defines where in the list to insert the SearchResult
            item (SearchResult):
                The response result containing the API response data, the provider name, and page associated
                with the response.

        """
        if not isinstance(item, SearchResult):
            raise TypeError(f"Expected a SearchResult, received an item of type {type(item)}")
        super().__setitem__(index, item)


[docs]
    def append(self, item: SearchResult):
        """Overwrites the default append method on the user dict to ensure that only SearchResult objects can be
        appended to the custom list.

        Args:
            item (SearchResult):
                The response result containing the API response data, the provider name, and page associated with
                the response.

        """
        if not isinstance(item, SearchResult):
            raise TypeError(f"Expected a SearchResult, received an item of type {type(item)}")
        super().append(item)



[docs]
    def extend(self, other: SearchResultList | MutableSequence[SearchResult] | Iterable[SearchResult]):
        """Overwrites the default append method on the user dict to ensure that only an iterable of SearchResult objects
        can be appended to the SearchResultList.

        Args:
            other (Iterable[SearchResult]): An iterable/sequence of response results containing the API response
            data, the provider name, and page associated with the response

        """
        if not isinstance(other, SearchResultList) and not (
            isinstance(other, (MutableSequence, Iterable)) and all(isinstance(item, SearchResult) for item in other)
        ):
            raise TypeError(f"Expected an iterable of SearchResults, received an object type {type(other)}")
        super().extend(other)



[docs]
    def join(self) -> list[dict[str, Any]]:
        """Helper method for joining all successfully processed API responses into a single list of dictionaries that
        can be loaded into a pandas or polars dataframe.

        Note that this method will only load processed responses that contain records that were also successfully
        extracted and processed.

        Returns:
            list[dict[str, Any]]: A single list containing all records retrieved from each page

        """
        return [self._resolve_record(record, item) for item in self for record in self._get_records(item) if record]


    @classmethod
    def _get_records(cls, item: SearchResult) -> list[dict[str, Any]]:
        """Extracts a list of records (dictionaries) from a SearchResult."""
        records = (
            None if not isinstance(item, SearchResult) or item.response_result is None else item.response_result.data
        )

        return records or []

    @classmethod
    def _resolve_record(cls, record: Optional[dict], item: SearchResult) -> dict[str, Any]:
        """Formats the current record and appends the provider_name and page number to the record."""
        record_dict = record or {}
        return record_dict | {"provider_name": item.provider_name, "page_number": item.page}


[docs]
    def filter(self) -> SearchResultList:
        """Helper method that retains only elements from the original response that indicate successful processing."""
        return SearchResultList(item for item in self if isinstance(item.response_result, ProcessedResponse))




__all__ = ["SearchResult", "SearchResultList"]