# /api/models/search_inputs.py
"""The scholar_flux.api.models.search_inputs module implements the PageListInput RootModel for multi-page searches.
The PageListInput model is designed to validate and prepare lists and iterables of page numbers for multi-page retrieval
using the `SearchCoordinator.search_pages` method.
"""
from typing import Sequence, Mapping, Iterable
from typing_extensions import Self
from pydantic import RootModel, field_validator
from math import ceil
import logging
from scholar_flux.exceptions.api_exceptions import APIParameterException
logger = logging.getLogger(__name__)
[docs]
class PageListInput(RootModel[Sequence[int]]):
"""Helper class for processing page information in a predictable manner.
The PageListInput class expects to receive a list, string, or generator that contains at least one page number.
If a singular integer is received, the result is transformed into a single-item list containing that integer.
Args:
root (Sequence[int]): A list containing at least one page number.
Examples:
>>> from scholar_flux.api.models import PageListInput
>>> PageListInput(5)
PageListInput([5])
>>> PageListInput(range(5))
PageListInput([0, 1, 2, 3, 4])
"""
[docs]
@field_validator("root", mode="before")
def page_validation(cls, v: str | int | Sequence[int | str]) -> Sequence[int]:
"""Processes the page input to ensure that a list of integers is returned if the received page list is in a
valid format.
Args:
v (str | int | Sequence[int | str]): A page or sequence of pages to be formatted as a list of pages.
Returns:
Sequence[int]: A validated, formatted sequence of page numbers assuming successful page validation
Raises:
ValidationError: Internally raised via pydantic if a ValueError is encountered
(if the input is not exclusively a page or list of page numbers)
"""
if isinstance(v, (str, int)):
return [cls.process_page(v)]
if isinstance(v, (Sequence, Iterable)) and not isinstance(v, Mapping):
return sorted(set({cls.process_page(v_i) for v_i in v}))
err_msg = f"Expected a list, set, or generator containing page numbers. Received: '{type(v)}'"
logger.error(err_msg)
raise ValueError(err_msg)
[docs]
@classmethod
def process_page(cls, page_value: str | int) -> int:
"""Helper method for ensuring that each value in the sequence is a numeric string or whole number.
Note that this function will not throw an error for negative pages as that is handled at a later step
in the page search process.
Args:
page_value (str | int): The value to be converted if it is not already an integer
Returns:
int: A validated integer if the page can be converted to an integer and is not a float
Raises:
ValueError: When the value is not an integer or numeric string to be converted to an integer
"""
if isinstance(page_value, str) and page_value.isnumeric():
page_value = int(page_value)
if not isinstance(page_value, int):
err_msg = f"Expected a provided page value to be a number. Received: '{page_value}'"
logger.error(err_msg)
raise ValueError(err_msg)
return page_value
[docs]
@classmethod
def from_record_count(cls, min_records: int, records_per_page: int, page_offset: int = 0) -> Self:
"""Helper method for calculating the total number of pages required to retrieve at least `min_records` records.
Args:
min_records (int):
The total number of records to retrieve sequentially.
records_per_page (int):
The total number of records that are retrieved per page.
page_offset (int):
The total number of pages to skip before beginning record retrieval (0 by default). When the provided
value is not a non-negative integer, this parameter is coerced to 0 and a warning is triggered.
Returns:
PageListInput:
The calculated page range used to retrieve at least `min_records` records given `records_per_page`.
Examples:
>>> from scholar_flux.api.models import PageListInput
>>> PageListInput.from_record_count(20, 10, 0)
PageListInput(1, 2)
>>> PageListInput.from_record_count(20, 10, 2)
PageListInput(3, 4)
>>> PageListInput.from_record_count(15, 10, 1)
PageListInput(2, 3)
# triggers a warning for page_offset (non-integers are coerced to 0):
>>> PageListInput.from_record_count(20, 10, None)
PageListInput(1, 2)
>>> PageListInput.from_record_count(0, 10, 0)
PageListInput()
Note:
This method expects a positive integer for `min_records` from which to calculate the page range required to
retrieve at least `min_records`. Specifying 0 for `min_records` will result in an empty list of pages that
essentially functions as a no-op search returning an empty list from `SearchCoordinator.search_records`.
"""
if not isinstance(min_records, int) or min_records < 0:
raise APIParameterException(
f"Expected `min_records` to be a positive integer, but received value '{min_records}'"
)
if not isinstance(page_offset, int) or page_offset < 0:
logger.warning(
f"Expected a valid, non-negative integer for `page_offset`, but received '{page_offset}'. Defaulting "
"to 0 instead..."
)
page_offset = 0
page_start = 1 + page_offset
total_pages = max(0, ceil(min_records / records_per_page)) if records_per_page else 0
page_stop = page_start + total_pages
pages = range(page_start, page_stop)
return cls(pages)
@property
def page_numbers(self) -> Sequence[int]:
"""Returns the sequence of validated page numbers as a list."""
return list(self.root)
def __repr__(self) -> str:
"""Provides a simple string representation of the current page list input."""
class_name = self.__class__.__name__
vals = ", ".join(str(v) for v in self.page_numbers)
return f"{class_name}({vals})"
def __bool__(self) -> bool:
"""Helper method that returns False if `PageListInput.page_numbers` is empty and True otherwise."""
return bool(self.root)
__all__ = ["PageListInput"]