Source code for scholar_flux.api.validators

# /api/validators.py
"""The scholar_flux.api.validators module implements methods that are used within the validation of scholar_flux API
configurations to ensure that valid and invalid inputs are received as such.

Functions:

    validate_email:
        Used to verify whether an email matches the expected pattern
    validate_and_validate_and_process_email:
        Attempts to masks valid emails and raises an error on invalid input
    validate_url:
        Used to verify whether an url is a valid string
    normalize_url:
        Uses regular expressions to format the URL in a consistent format for string comparisons
    validate_and_process_url:
        validates URLs to ensure that it matches the expected format and normalizes the URL for later use

"""
import re
from urllib.parse import urlparse
from typing import Optional
from scholar_flux.security.utils import SecretUtils
from pydantic import SecretStr
import logging

logger = logging.getLogger(__name__)


[docs] def validate_email(email: str) -> bool: """Uses regex to determine whether the provided value is an email. Args: email (str): The email string to validate Returns: True if the email is valid, and False Otherwise """ regex = r"^[a-zA-Z0-9._%+-]+(%40|@)[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$" if isinstance(email, str) and re.match(regex, email): return True logger.warning(f"The value, '{email}' is not a valid email") return False
[docs] def validate_and_process_email(email: Optional[SecretStr | str]) -> Optional[SecretStr]: """If a string value is provided, determine whether the email is valid. This function first uses the validate_email function for the validation of the email. If the value is not an email, this implementation will raise an Error Args: email (Optional[str]): an email to validate if non-missing Returns: True if the email is valid or is not provided, and False Otherwise Raises: ValueError: If the current value is not an email """ if email is None: return None email_string = SecretUtils.unmask_secret(email) if not validate_email(email_string): raise ValueError(f"The provided email is invalid, received {email_string}") return SecretUtils.mask_secret(email)
[docs] def validate_url(url: str) -> bool: """Uses urlparse to determine whether the provided value is an url. Args: url (str): The url string to validate Returns: True if the url is valid, and False Otherwise """ try: result = urlparse(url) if result.scheme not in ("http", "https"): raise ValueError(f"Only http/https protocols are allowed. Received scheme: '{result.scheme}'") if not result.netloc: raise ValueError( f"Expected a domain in the URL after the http/https protocol. Only the scheme was received: {url}" ) return True except (ValueError, AttributeError) as e: logger.warning(f"The value, '{url}' is not a valid URL: {e}") return False
[docs] def normalize_url(url: str, normalize_https: bool = True) -> str: """Helper class to aid in comparisons of string urls. Normalizes a URL for consistent comparisons by converting to https:// and stripping right-most forward slashes ('/'). Args: url (str): The url to normalize into a consistent structure for later comparison normalize_https (bool): indicates whether to normalize the http identifier on the URL. This is True by default. Returns: str: The normalized url """ url = url.rstrip("/") if normalize_https: url = "https://" + re.sub(r"^https?://(www\.)?", "", url, flags=re.IGNORECASE) return url
[docs] def validate_and_process_url(url: Optional[str]) -> Optional[str]: """If a string value is provided, determine whether the url is valid. This function first uses the validate_url function for the validation of the url. Args: url (Optional[str]): an url to validate if non-missing Returns: True if the url is valid or is not provided, and False Otherwise """ if url is None: return None if not validate_url(url): raise ValueError( f"The provided URL '{url}' is invalid. " "It must include a scheme (e.g., 'http://' or 'https://') " "and a domain name." ) return normalize_url(url)
__all__ = [ "validate_email", "validate_and_process_email", "validate_url", "normalize_url", "validate_and_process_url", ]