Source code for scholar_flux.api.models.response_metadata_map
# /api/models/response_metadata_map.py
"""The scholar_flux.api.models.response_metadata_map module implements the ResponseMetadataMap for field resolution."""
from pydantic import BaseModel
from typing import Optional, Any, Mapping
from scholar_flux.utils.record_types import MetadataType
from scholar_flux.utils import coerce_int, get_nested_data, PathUtils
from math import ceil
[docs]
class ResponseMetadataMap(BaseModel):
"""Maps API-specific response metadata field names to common names.
This class enables extraction of metadata from API responses, primarily used for pagination decisions in multi-page
searches. This class extracts and processes metadata fields from metadata dictionaries and can be used for nested
path reversal by denoting fields with periods.
field retrieval.
Args:
total_query_hits:
Field name containing the total number of results for a query (used to determine if more pages exist)
records_per_page: Field name indicating the number of records on the current page
Example:
>>> from scholar_flux.api.models.response_metadata_map import ResponseMetadataMap
>>> metadata_map = ResponseMetadataMap(total_query_hits="totalHits")
>>> metadata = {"totalHits": 318942, "limit": 10}
>>> total = metadata_map.calculate_query_hits(metadata)
>>> print(total) # 318942
>>> # Used for pagination decisions
>>> has_more = total > (current_page * records_per_page)
"""
total_query_hits: Optional[str] = None
records_per_page: Optional[str] = None
@classmethod
def _extract_key(cls, metadata: dict[str, Any], key: str) -> Any:
"""Helper method for reliably extracting available keys from metadata given a path."""
if not isinstance(metadata, Mapping) or not (key and isinstance(key, str)):
return None
if key in metadata:
return metadata[key]
# recursively extracts nested data at the current path if possible
value = get_nested_data(metadata, key, verbose=False) if PathUtils.DELIMITER in key else None
# returns None if coercion into an integer isn't possible
return value
[docs]
def calculate_query_hits(self, metadata: MetadataType) -> Optional[int]:
"""Extract and convert total query hits from response metadata.
Args:
metadata (MetadataType): A mapping containing response metadata typically from ProcessedResponse.metadata
Returns:
Optional[int]: Total number of query hits as an integer if available and convertible, otherwise None
Example:
>>> from scholar_flux.api.models.response_metadata_map import ResponseMetadataMap
>>> metadata_map = ResponseMetadataMap(total_query_hits="totalHits")
>>> metadata = {"totalHits": "1500", "results": [...]}
>>> total = metadata_map.calculate_query_hits(metadata)
>>> print(total) # 1500 (converted from string)
"""
key = self.total_query_hits or ""
return coerce_int(self._extract_key(metadata, key))
[docs]
def calculate_records_per_page(self, metadata: MetadataType) -> Optional[int]:
"""Extract and convert the total number of records on the current page from response metadata.
Args:
metadata (MetadataType):
A mapping containing response metadata (typically from ProcessedResponse.metadata)
Returns:
Optional[int]:
Total number of records on the current page as an integer if available and convertible, otherwise None
Example:
>>> from scholar_flux.api.models.response_metadata_map import ResponseMetadataMap
>>> metadata_map = ResponseMetadataMap(records_per_page="pageSize")
>>> metadata = {"pageSize": "20", "results": [...]}
>>> total = metadata_map.calculate_records_per_page(metadata)
>>> print(total) # 20 (converted from string)
"""
key = self.records_per_page or ""
return coerce_int(self._extract_key(metadata, key))
[docs]
def process_metadata(self, metadata: MetadataType) -> MetadataType:
"""Helper method for processing metadata after mapping relevant fields using the metadata schema.
Args:
metadata (MetadataType):
A mapping containing response metadata (typically from ProcessedResponse.metadata)
Returns:
metadata (MetadataType):
A mapped dictionary of processed metadata fields.
Example:
>>> from scholar_flux.api.models.response_metadata_map import ResponseMetadataMap
>>> metadata_map = ResponseMetadataMap(total_query_hits="totalHits", records_per_page="pageSize")
>>> metadata = {"totalHits": "1500","pageSize": "20", "results": [...]}
>>> metadata_map.process_metadata(metadata)
# OUTPUT: {"total_query_hits": 1500, "pageSize": "records_per_page", 20}
"""
return {
"total_query_hits": self.calculate_query_hits(metadata),
"records_per_page": self.calculate_records_per_page(metadata),
}
[docs]
def calculate_pages_remaining(
self,
page: int,
total_query_hits: Optional[int] = None,
records_per_page: Optional[int] = None,
metadata: Optional[MetadataType] = None,
) -> Optional[int]:
"""Calculating the total number of pages yet to be queried using either metadata or direct integer fields.
Args:
total_query_hits (Optional[int]):
Total number of record hits associated with a given query. If not specified, this is parsed
from the metadata
records_per_page (Optional[int]):
Total number of records on the current page as an integer if available and convertible
metadata (MetadataType):
A mapping containing response metadata (typically from ProcessedResponse.metadata)
Returns:
Optional[int]:
The total number of pages that remain given the values `total_query_hits` and `records_per_page`
Example:
>>> from scholar_flux.api.models.response_metadata_map import ResponseMetadataMap
>>> metadata_map = ResponseMetadataMap(
... total_query_hits="statistics.totalHits", records_per_page="metadata.pageSize"
... )
>>> metadata = {"statistics": {"totalHits": "1500"},"metadata": {"pageSize": "20"}}
>>> total = metadata_map.calculate_pages_remaining(page = 74, metadata = metadata)
>>> print(total) # 1 (converted from string)
"""
records_per_page = records_per_page if records_per_page else self.calculate_records_per_page(metadata or {})
total_query_hits = total_query_hits if total_query_hits else self.calculate_query_hits(metadata or {})
if total_query_hits is None or records_per_page is None or page is None:
return None
return self._calculate_pages_remaining(
page, total_query_hits=total_query_hits, records_per_page=records_per_page
)
@classmethod
def _calculate_pages_remaining(cls, page: int, total_query_hits: int, records_per_page: int) -> int:
"""Calculates the total number of pages that remain given the total number of hits and records per page.
Args:
total_query_hits (int):
Total number of record hits associated with a given query
records_per_page (int):
Total number of records on the current page as an integer
Returns:
int: The total number of pages that remain given the values `total_query_hits` and `records_per_page`
"""
calculated_page_max = ceil(total_query_hits / records_per_page)
# accounts for variability in Core API record retrieval count
under_record_limit = calculated_page_max - page
return max(0, under_record_limit)
def __call__(self, *args: Any, **kwargs: Any) -> Optional[MetadataType]:
"""Helper method that enables the current map to be used as a callable to map and process response metadata.
The call delegates metadata processing to the `process_metadata` method which will return a list if it receives
a list and returns a dictionary if a single record is received, otherwise.
"""
return self.process_metadata(*args, **kwargs)
__all__ = ["ResponseMetadataMap"]