Source code for swh.lister
# Copyright (C) 2018-2025  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from importlib.metadata import PackageNotFoundError, entry_points, version
import logging
logger = logging.getLogger(__name__)
try:
    __version__ = version("swh-lister")
except PackageNotFoundError:
    __version__ = "devel"
USER_AGENT_TEMPLATE = (
    f"Software Heritage %s lister v{__version__}"
    " (+https://www.softwareheritage.org/contact)"
)
LISTERS = {
    entry_point.name.split(".", 1)[1]: entry_point
    for entry_point in entry_points().select(group="swh.workers")
    if entry_point.name.split(".", 1)[0] == "lister"
}
SUPPORTED_LISTERS = list(LISTERS)
TARBALL_EXTENSIONS = [
    "crate",
    "gem",
    "jar",
    "love",  # zip
    "zip",
    "tar",
    "gz",
    "tgz",
    "tbz",
    "bz2",
    "bzip2",
    "lzma",
    "lz",
    "txz",
    "xz",
    "z",
    "Z",
    "7z",
    "oxt",  # zip
    "pak",  # zip
    "war",  # zip
    "whl",  # zip
    "vsix",  # zip
    "VSIXPackage",  # zip
    "zst",
]
"""Tarball recognition pattern"""
[docs]
def get_lister(lister_name, db_url=None, **conf):
    """Instantiate a lister given its name.
    Args:
        lister_name (str): Lister's name
        conf (dict): Configuration dict (lister db cnx, policy, priority...)
    Returns:
        Tuple (instantiated lister, drop_tables function, init schema function,
        insert minimum data function)
    """
    if lister_name not in LISTERS:
        raise ValueError(
            "Invalid lister %s: only supported listers are %s"
            % (lister_name, SUPPORTED_LISTERS)
        )
    if db_url:
        conf["lister"] = {"cls": "postgresql", "db": db_url}
    registry_entry = LISTERS[lister_name].load()()
    lister_cls = registry_entry["lister"]
    from swh.lister import pattern
    if issubclass(lister_cls, pattern.Lister):
        return lister_cls.from_config(**conf)
    else:
        # Old-style lister
        return lister_cls(override_config=conf)