Source code for swh.export.exporter
# Copyright (C) 2020  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import contextlib
import pathlib
from types import TracebackType
from typing import Any, Dict, Optional, Type
import uuid
from swh.model.model import ModelObjectType
[docs]
class Exporter:
    """
    Base class for all the exporters.
    Each export can have multiple exporters, so we can read the journal a single
    time, then export the objects we read in different formats without having to
    re-read them every time.
    Override this class with the behavior for an export in a specific export
    format. You have to overwrite process_object() to make it write to the
    appropriate export files.
    You can also put setup and teardown logic in __enter__ and __exit__, and it
    will be called automatically.
    """
    def __init__(
        self,
        config: Dict[str, Any],
        export_path,
        sensitive_export_path,
        *args: Any,
        **kwargs: Any,
    ) -> None:
        self.config: Dict[str, Any] = config
        self.export_path = pathlib.Path(export_path)
        if sensitive_export_path is not None:
            self.sensitive_export_path = pathlib.Path(sensitive_export_path)
        else:
            self.sensitive_export_path = (
                pathlib.Path(export_path).parent / f"{export_path}-sensitive"
            )
        self.exit_stack = contextlib.ExitStack()
    def __enter__(self) -> "Exporter":
        self.export_path.mkdir(exist_ok=True, parents=True)
        self.sensitive_export_path.mkdir(exist_ok=True, parents=True)
        self.exit_stack.__enter__()
        return self
    def __exit__(
        self,
        exc_type: Optional[Type[BaseException]],
        exc_value: Optional[BaseException],
        traceback: Optional[TracebackType],
    ) -> Optional[bool]:
        return self.exit_stack.__exit__(exc_type, exc_value, traceback)
[docs]
    def process_object(self, object_type: ModelObjectType, obj: Dict[str, Any]) -> None:
        """
        Process a SWH object to export.
        Override this with your custom exporter.
        """
        raise NotImplementedError 
[docs]
    def get_unique_file_id(self) -> str:
        """
        Return a unique random file id for the current process.
        If config['test_unique_file_id'] is set, it will be used instead.
        """
        return str(self.config.get("test_unique_file_id", uuid.uuid4())) 
 
[docs]
class ExporterDispatch(Exporter):
    """
    Like Exporter, but dispatches each object type to a different function
    (e.g you can override `process_origin(self, object)` to process origins.)
    """
[docs]
    def process_object(self, object_type: ModelObjectType, obj: Dict[str, Any]) -> None:
        method_name = "process_" + object_type.name.lower()
        if hasattr(self, method_name):
            getattr(self, method_name)(obj)