# Copyright (C) 2020-2022  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from collections import Counter
from typing import Any, Dict, Iterable, List, Optional, TypeVar
from typing_extensions import Protocol, TypedDict
from swh.core.api import remote_api_endpoint
from swh.core.api.classes import PagedResult as CorePagedResult
TResult = TypeVar("TResult")
PagedResult = CorePagedResult[TResult, str]
SORT_BY_OPTIONS = [
    "nb_visits",
    "last_visit_date",
    "last_eventful_visit_date",
    "last_revision_date",
    "last_release_date",
    "date_created",
    "date_modified",
    "date_published",
]
[docs]
class MinimalOriginDict(TypedDict):
    """Mandatory keys of an :class:`OriginDict`"""
    url: str 
[docs]
class OriginDict(MinimalOriginDict, total=False):
    """Argument passed to :meth:`SearchInterface.origin_update`."""
    visit_types: List[str]
    has_visits: bool 
[docs]
class SearchInterface(Protocol):
[docs]
    @remote_api_endpoint("check")
    def check(self):
        """Dedicated method to execute some specific check per implementation."""
        ... 
[docs]
    @remote_api_endpoint("flush")
    def flush(self) -> None:
        """Blocks until all previous calls to _update() are completely
        applied.
        """
        ... 
[docs]
    @remote_api_endpoint("origin/update")
    def origin_update(self, documents: Iterable[OriginDict]) -> None:
        """Persist documents to the search backend."""
        ... 
[docs]
    @remote_api_endpoint("origin/search")
    def origin_search(
        self,
        *,
        query: str = "",
        url_pattern: Optional[str] = None,
        metadata_pattern: Optional[str] = None,
        with_visit: bool = False,
        visit_types: Optional[List[str]] = None,
        min_nb_visits: int = 0,
        min_last_visit_date: str = "",
        min_last_eventful_visit_date: str = "",
        min_last_revision_date: str = "",
        min_last_release_date: str = "",
        min_date_created: str = "",
        min_date_modified: str = "",
        min_date_published: str = "",
        programming_languages: Optional[List[str]] = None,
        licenses: Optional[List[str]] = None,
        keywords: Optional[List[str]] = None,
        fork_weight: Optional[float] = 0.5,
        sort_by: Optional[List[str]] = None,
        page_token: Optional[str] = None,
        limit: int = 50,
    ) -> PagedResult[OriginDict]:
        """Searches for origins matching the `url_pattern`.
        Args:
            query: Find origins according the queries written as per the
                swh-search query language syntax, if empty return all origins
            url_pattern: Part of the URL to search for, if empty and no filter
                parameters used return all origins
            metadata_pattern: Keywords to look for (across all the fields of
                "jsonld")
            with_visit: Whether origins with no visits are to be filtered out
            visit_types: Only origins having any of the provided visit types
                (e.g. git, svn, pypi) will be returned
            min_nb_visits: Filter origins that have number of visits >=
                the provided value
            min_last_visit_date: Filter origins that have
                last_visit_date on or after the provided date(ISO format)
            min_last_eventful_visit_date: Filter origins that have
                last_eventful_visit_date (eventful = snapshot_id changed)
                on or after the provided date(ISO format)
            min_last_revision_date: Filter origins that have
                last_revision_date on or after the provided date(ISO format)
            min_last_release_date: Filter origins that have
                last_release_date on or after the provided date(ISO format)
            min_date_created: Filter origins that have date_created
                from ``jsonld`` on or after the provided date
            min_date_modified: Filter origins that have date_modified
                from ``jsonld`` on or after the provided date
            min_date_published: Filter origins that have date_published
                from ``jsonld`` on or after the provided date
            programming_languages: Filter origins with programming languages
                present in the given list (based on instrinsic_metadata)
            licenses: Filter origins with licenses present in the given list
                (based on instrinsic_metadata)
            keywords: Filter origins having description/keywords
                (extracted from instrinsic_metadata) that match given values
            fork_weight: Multiplicative factor to apply to all origins known to be forks
                (<1 penalizes them, >1 boosts them)
            sort_by: Sort results based on a list of fields mentioned in SORT_BY_OPTIONS
                (nb_visits,last_visit_date, last_eventful_visit_date,
                last_revision_date, last_release_date).
                Return results in descending order if "-" is present at the beginning
                otherwise in ascending order.
            page_token: Opaque value used for pagination
            limit: number of results to return
        Returns:
            PagedResult of origin dicts matching the search criteria. If next_page_token
            is None, there is no longer data to retrieve.
        """
        ... 
[docs]
    @remote_api_endpoint("origin/get")
    def origin_get(self, url: str) -> Optional[Dict[str, Any]]:
        """Returns the full documents associated to the given origin URLs.
        Order is arbitrary; unknown origins are not returned.
        """ 
[docs]
    @remote_api_endpoint("origin/delete")
    def origin_delete(self, url: str) -> bool:
        """Remove the documents associated with the given origin URL.
        Returns:
            True if the document was removed, False if it could not be found.
        """
        ... 
[docs]
    @remote_api_endpoint("visit_types_count")
    def visit_types_count(self) -> Counter:
        """Returns origin counts per visit type (git, hg, svn, ...)."""
        ...