Source code for swh.storage.algos.directory
# Copyright (C) 2022-2023  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Iterable, List, Optional, Tuple
from swh.core.api.classes import stream_results_optional
from swh.model.model import Directory, DirectoryEntry, Sha1Git
from swh.storage.interface import StorageInterface
[docs]
def directory_get(
    storage: StorageInterface, directory_id: Sha1Git
) -> Optional[Directory]:
    """Get all the entries for a given directory
    Args:
        storage: the storage instance
        directory_id: the directory's identifier
    Returns:
        The directory if it could be properly put back together.
    """
    entries: Optional[Iterable[DirectoryEntry]] = stream_results_optional(
        storage.directory_get_entries,
        directory_id=directory_id,
    )
    if entries is None:
        return None
    return Directory(
        id=directory_id,
        entries=tuple(entries),
        raw_manifest=storage.directory_get_raw_manifest([directory_id])[directory_id],
    ) 
[docs]
def directory_get_many(
    storage: StorageInterface, directory_ids: List[Sha1Git]
) -> Iterable[Optional[Directory]]:
    """Same as :func:`directory_get`, but fetches directories slightly more
    effectively by batching requests to ``directory_get_raw_manifest``.
    Args:
        storage: the storage instance
        directory_ids: the directories' identifiers
    Yields:
        The directories which could be properly put back together
    """
    raw_manifests = storage.directory_get_raw_manifest(directory_ids)
    for directory_id in directory_ids:
        if directory_id not in raw_manifests:
            yield None
        else:
            entries = stream_results_optional(
                storage.directory_get_entries,
                directory_id=directory_id,
            )
            assert entries, f"Directory {directory_id.hex()} stopped existing"
            yield Directory(
                id=directory_id,
                entries=tuple(entries),
                raw_manifest=raw_manifests[directory_id],
            ) 
[docs]
def directory_get_many_with_possibly_duplicated_entries(
    storage: StorageInterface, directory_ids: List[Sha1Git]
) -> Iterable[Optional[Tuple[bool, Directory]]]:
    """Same as :func:`directory_get_many`, but does not error on directories whose
    entries may contain duplicated names.
    See :meth:`swh.model.model.Directory.from_possibly_duplicated_entries`.
    Args:
        storage: the storage instance
        directory_ids: the directories' identifiers
    Yields:
        ``(is_corrupt, directory)`` where ``is_corrupt`` is True iff some
        entry names were indeed duplicated
    """
    raw_manifests = storage.directory_get_raw_manifest(directory_ids)
    for directory_id in directory_ids:
        if directory_id not in raw_manifests:
            yield None
        else:
            entries = stream_results_optional(
                storage.directory_get_entries,
                directory_id=directory_id,
            )
            assert entries, f"Directory {directory_id.hex()} stopped existing"
            yield Directory.from_possibly_duplicated_entries(
                id=directory_id,
                entries=tuple(entries),
                raw_manifest=raw_manifests[directory_id],
            )