Source code for scholar_flux.api.models.reconstructed_response

# /api/models/reconstructed_response.py
"""The scholar_flux.api.reconstructed_response module implements a basic ReconstructedResponse data structure.

The ReconstructedResponse class was designed to be request-client agnostic to improve flexibility in the request
clients that can be used to retrieve data from APIs and load response data from cache.

The ReconstructedResponse is a minimal implementation of a response-like
object that can transform response classes from `requests`, `httpx`, and
`asyncio` into a singular representation of the same response.

"""
from __future__ import annotations
from typing import Optional, Dict, List, Any, MutableMapping, Mapping
from dataclasses import dataclass, asdict, fields
from scholar_flux.api.validators import validate_url
from scholar_flux.exceptions import InvalidResponseReconstructionException, InvalidResponseStructureException
import requests
from scholar_flux.utils.response_protocol import ResponseProtocol
from http.client import responses
from json import JSONDecodeError
import json
import logging

logger = logging.getLogger(__name__)


[docs] @dataclass class ReconstructedResponse: """Helper class for retaining the most relevant of fields when reconstructing responses from different sources such as requests and httpx (if chosen). The primary purpose of the ReconstructedResponse in scholar_flux is to create a minimal representation of a response when we need to construct a ProcessedResponse without an actual response and verify content fields. In applications such as retrieving cached data from a `scholar_flux.data_storage.DataCacheManager`, if an original or cached response is not available, then a ReconstructedResponse is created from the cached response fields when available. Args: status_code (int): The integer code indicating the status of the response reason (str): Indicates the reasoning associated with the status of the response headers MutableMapping[str, str]: Indicates metadata associated with the response (e.g. Content-Type, etc.) content (bytes): The content within the response url: (Any): The URL from which the response was received Note: The `ReconstructedResponse.build` factory method is recommended in cases when one property may contain the needed fields but may need to be processed and prepared first before being used. Examples include instances where one has text or json data instead of content, a reason_phrase field instead of reason, etc. Example: >>> from scholar_flux.api.models import ReconstructedResponse # build a response using a factory method that infers fields from existing ones when not directly specified >>> response = ReconstructedResponse.build(status_code = 200, content = b"success", url = "https://google.com") # check whether the current class follows a ResponseProtocol and contains valid fields >>> assert response.is_response() # OUTPUT: True >>> response.validate() # raises an error if invalid >>> response.raise_for_status() # no error for 200 status codes >>> assert response.reason == 'OK' == response.status # inferred from the status_code attribute """ status_code: int reason: str headers: MutableMapping[str, str] content: bytes url: Any
[docs] @classmethod def build(cls, response: Optional[Any] = None, **kwargs) -> ReconstructedResponse: """Helper method for building a new ReconstructedResponse from a regular response object. This classmethod can either construct a new ReconstructedResponse object from a response object or response-like object or create a new ReconstructedResponse altogether with its inputs. Args: response: (Optional[Any]): A response or response-like object of unknown type or None kwargs: The underlying components needed to construct a new response. Note that ideally, this set of key-value pairs would be specific only to the types expected by the ReconstructedResponse. """ if isinstance(response, ReconstructedResponse): return response if response is not None: if isinstance(response, dict): kwargs = response | kwargs elif isinstance(response, (Mapping, MutableMapping)): kwargs = dict(response) | kwargs else: kwargs = ( response.__dict__ | { # extract properties not serialized in a dict field: getattr(response, field) for field in ReconstructedResponse.fields() if hasattr(response, field) } | kwargs ) return ReconstructedResponse.from_keywords(**kwargs)
[docs] @classmethod def fields(cls) -> list: """Helper method for retrieving a list containing the names of all fields associated with the ReconstructedResponse class. Returns: list[str]: A list containing the name of each attribute in the ReconstructedResponse. """ return [field.name for field in fields(ReconstructedResponse)]
[docs] def asdict(self) -> dict[str, Any]: """Helper method for converting the ReconstructedResponse into a dictionary containing attributes and their corresponding values.""" return asdict(self)
[docs] @classmethod def from_keywords(cls, **kwargs) -> ReconstructedResponse: """Uses the provided keyword arguments to create a ReconstructedResponse. keywords include the default attributes of the ReconstructedResponse, or can be inferred and processed from other keywords. Args: status_code (int): The integer code indicating the status of the response reason (str): Indicates the reasoning associated with the status of the response headers (MutableMapping[str, str]): Indicates metadata associated with the response (e.g. Content-Type) content (bytes): The content within the response url: (Any): The URL from which the response was received Some fields can be both provided directly or inferred from other similarly common fields: - content: ['content', '_content', 'text', 'json'] - headers: ['headers', '_headers'] - reason: ['reason', 'status', 'reason_phrase', 'status_code'] Returns: ReconstructedResponse: A newly reconstructed response from the given keyword components """ status_code = cls._normalize_status_code(**kwargs) if status_code is not None: kwargs["status_code"] = status_code kwargs["headers"] = cls._normalize_headers(**kwargs) if url := cls._normalize_url(**kwargs): kwargs["url"] = url kwargs["reason"] = cls._normalize_reason(**kwargs) kwargs["content"] = cls._resolve_content_sources(**kwargs) filtered_response_dictionary = { name: value for name, value in kwargs.items() if name in (field.name for field in fields(cls)) } try: return ReconstructedResponse(**filtered_response_dictionary) except TypeError as e: raise InvalidResponseReconstructionException( f"Missing the core required fields needed to create a ReconstructedResponse: {e}" )
@classmethod def _normalize_status_code(cls, **kwargs) -> Optional[int]: """Helper class method for extracting status codes from the status_code or status field. Some status fields may actually contain a numeric code - this method accounts for these scenarios and returns None if a code isn't available. Args: **kwargs: A set of keyword arguments to extract a status code from the `status_code` or `status` parameters Returns: An integer code if available, otherwise None """ status_code = kwargs.get("status_code") or ( int(kwargs["status"]) if isinstance(kwargs.get("status"), int) or (isinstance(kwargs.get("status"), str) and kwargs.get("status", "").isnumeric()) else None ) return status_code @classmethod def _normalize_reason(cls, **kwargs) -> Optional[str]: """Helper class for extracting a reason associated with the status of a response. This method accounts for several scenarios: 1) where status may actually be the status code and not an actual reason 2) either status or reason is provided and not the other 3) where the status code needs to be inferred from the status code instead. Args: **kwargs: The list of parameters to extract a status from. Includes `reason`, `reason_phrase`, `status`, and otherwise, `status_code` directly using the `responses` enumeration from the standard http.client module Returns: Optional[str]: A string explaining the status code and reason behind it, otherwise None """ reason = ( kwargs.get("reason") or ( kwargs["status"] if isinstance(kwargs.get("status"), str) and not kwargs.get("status", "").isnumeric() else None ) or kwargs.get("reason_phrase") or responses.get(kwargs.get("status_code") or -1) ) return reason @classmethod def _normalize_url(cls, **kwargs) -> Optional[str]: """Helper method to extract a URL as a string if available. If the URL is a non-string field, this method attempts to convert the field into a string. Args: **kwargs: A set of keyword arguments containing the `url` parameter Returns: str: A String-formatted URL """ url = kwargs.get("url") return (str(url) if not isinstance(url, str) else url) if url is not None else None @classmethod def _normalize_headers(cls, **kwargs) -> MutableMapping: """Helper method for extracting and converting headers to a MutableMapping if the header field is a Mapping other than a dictionary type. The field attempts to extract the necessary headers from either the `headers` field or `_headers` field if either is provided with preference to `headers`. Args: **kwargs: The keyword arguments to extract the headers from. Includes `headers` and `_headers` Returns: MutableMapping: The headers associated with the response or an empty mapping """ headers = kwargs.get("headers") or kwargs.get("_headers", {}) headers = ( dict(headers) if isinstance(headers, (Mapping, MutableMapping)) and not isinstance(headers, dict) else headers ) return headers @classmethod def _resolve_content_sources(cls, **kwargs) -> Optional[bytes]: """Helper method for retrieving the content field from a set of provided, disparate parameters that each could have been provided by the user. This method searches for the following keys: 1) content, 2) _content, 3) json, 4) text. If multiple fields are provided, this implementation prefers the field that contains the most information available . This is especially important when processing structured data formats (e.g., JSON, XML, YAML). If an empty content field is provided along with a populated json list/dictionary, the json data will be encoded, dumped, and used in the content field as a bytes object. Otherwise, fields with empty-strings and bytes are treated as data, if provided, and preferred over `None`. Args: **kwargs: The keyword arguments to extract the content from. Includes `content`, `_content`, `json`, and `text` fields. Returns: Optional[bytes]: The parsed bytes object containing the expected content """ # resolve content types by converting to bytes text = kwargs["text"] if isinstance(kwargs.get("text", None), (str, bytes)) else None # encode and dump json content if provided json_data = json.dumps(kwargs["json"]) if isinstance(kwargs.get("json"), (dict, list)) else None content_sources = (kwargs.get("content"), kwargs.get("_content"), json_data, text) # search for the first populated (or most populated field accounting for provided, yet empty strings/bytes) content_fields = sorted( (content for content in content_sources if content is not None), key=lambda x: len(x) if isinstance(x, (str, bytes)) else -1, reverse=True, ) # retrieve the content and encode if not already encoded content = ( (content_fields[0].encode("utf-8") if isinstance(content_fields[0], str) else content_fields[0]) if content_fields else None ) return content @property def status(self) -> Optional[str]: """Helper property for retrieving a human-readable status description of the status. Returns: Optional[int]: The status description associated with the response (if available) """ return self.reason or responses.get(self.status_code) if self.status_code else None @property def text(self) -> Optional[str]: """Helper property for retrieving the text from the bytes content as a string. Returns: Optional[str]: The decoded text from the content of the response """ return self.content.decode() if isinstance(self.content, bytes) else None
[docs] def json(self) -> Optional[List[Any] | Dict[str, Any]]: """Return JSON-decoded body from the underlying response, if available.""" if not isinstance(self.content, bytes): logger.warning("The current response object does not contain jsonable content") return None try: return json.loads(self.content) except (JSONDecodeError, AttributeError, TypeError): logger.warning("The current ReconstructedResponse object " "does not have a valid json format.") return None
@classmethod def _identify_invalid_fields( cls, response: requests.Response | ReconstructedResponse | ResponseProtocol ) -> dict[str, Any]: """Helper class method for identifying invalid fields within a response. This class iteratively validates the complete list of all invalid fields that populate the current ReconstructedResponse. If any invalid fields exist, the method returns a dictionary of each field and its corresponding value. Args: response (requests.Response | ReconstructedResponse | ResponseProtocol): A response or response-like field to identify invalid values within Returns: (dict[str, Any]): A dictionary containing each invalid field as a keys and their assigned values """ if not (isinstance(response, requests.Response) or isinstance(response, ResponseProtocol)): # noqa SIM101 raise InvalidResponseStructureException( "The current class of type {type(response)} is not a response or response-like object." ) # will hold the full list of all invalid fields and respective values invalid_fields: Dict[str, Any] = {} # in classes such as httpx, reason might instead be reason_phrase for instance: reason = getattr(response, "reason", None) or getattr(response, "reason_phrase", None) if not (isinstance(response.status_code, int) and 100 <= response.status_code < 600): invalid_fields["status_code"] = response.status_code if not (isinstance(response.url, str) and validate_url(response.url)): invalid_fields["url"] = response.url if not isinstance(reason, str): invalid_fields["reason"] = reason if not isinstance(response.content, bytes): invalid_fields["content"] = response.content if not ( isinstance(response.headers, (dict, Mapping)) and all(isinstance(field, str) for field in response.headers) ): invalid_fields["headers"] = response.headers return invalid_fields
[docs] def is_response(self) -> bool: """Method for directly validating the fields that indicate that a response has been minimally recreated successfully. The fields that are validated include: 1) status codes (should be an integer) 2) URLs (should be a valid url) 3) reasons (should originate from a reason attribute or inferred from the status code) 4) content (should be a bytes field or encoded from a string text field) 5) headers (should be a dictionary with string fields and preferably a content type Returns: bool: Indicates whether the current reconstructed response minimally recreates a response object. """ invalid_fields = self._identify_invalid_fields(self) invalid_fields = { field: value if field in ("status_code", "url") else type(value) for field, value in invalid_fields.items() } if invalid_fields: logger.warning(f"The following fields contain invalid values: {invalid_fields}") return not any(invalid_fields)
[docs] def validate(self) -> None: """Raises an error if the recreated response object does not contain valid properties expected of a response. if the response validation is successful, a response is not raised and an object is not returned. Raises: InvalidResponseReconstructionException: if at least one field is determined to be invalid and unexpected of a true response object. """ if invalid_fields := self._identify_invalid_fields(self): raise InvalidResponseReconstructionException( "The ReconstructedResponse was not created successfully: Missing valid values for critical " f"fields to validate the response. The following fields are invalid: {invalid_fields}" )
@property def ok(self) -> bool: """Indicates whether the current response indicates a successful request (200 <= status_code < 400) or whether an invalid response has been received. Accounts for the. Returns: bool: True if the status code is an integer value within the range of 200 and 399, False otherwise """ return isinstance(self.status_code, int) and 200 <= self.status_code < 400 def __eq__(self, other: Any) -> bool: """Helper method for validating whether reconstructed API responses are the same.""" if isinstance(other, ReconstructedResponse) and asdict(self) == asdict(other): return True return False
[docs] def raise_for_status(self) -> None: """Method that imitates the capability of the requests and httpx response types to raise errors when encountering status codes that are indicative of failed responses. As scholar_flux processes data that is generally only sent when status codes are within the 200s (or exactly 200 [ok]), an error is raised when encountering a value outside of this range. Raises: InvalidResponseReconstructionException: If the structure of the ReconstructedResponse is invalid RequestException: If the expected response is not within the range of 200-399 """ try: self.validate() except InvalidResponseReconstructionException as e: raise requests.RequestException( "Could not verify from the ReconstructedResponse to determine whether the " f"original request was successful: {e}" ) if not 200 <= self.status_code < 300: raise requests.RequestException( "Expected a 200 (ok) status_code for the ReconstructedResponse. Received: " f"{self.status_code} ({self.reason or self.status})" )
__all__ = ["ReconstructedResponse"]