Source code for swh.deposit.api.private.deposit_list
# Copyright (C) 2018-2022  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Any, Dict
from xml.etree import ElementTree
from django.conf import settings
from django.core.paginator import Paginator
from django.db.models import CharField, Q, TextField
from django.http import JsonResponse
from rest_framework.decorators import (
    api_view,
    authentication_classes,
    permission_classes,
)
from rest_framework.generics import ListAPIView
from rest_framework.permissions import AllowAny
from rest_framework.request import Request
import sentry_sdk
from swh.deposit.api.private import APIPrivateView
from swh.deposit.api.utils import DefaultPagination, DepositSerializer
from swh.deposit.models import Deposit
from swh.deposit.utils import parse_swh_deposit_origin, parse_swh_metadata_provenance
from swh.model.swhids import QualifiedSWHID
def _enrich_deposit_with_metadata(deposit: Deposit) -> Deposit:
    deposit_requests = deposit.depositrequest_set.filter(type="metadata")
    deposit_requests = deposit_requests.order_by("-id")
    # enrich deposit with raw metadata when we have some
    if deposit_requests and len(deposit_requests) > 0:
        raw_meta = deposit_requests[0].raw_metadata
        if raw_meta:
            deposit.set_raw_metadata(raw_meta)
    return deposit
[docs]
class APIList(ListAPIView, APIPrivateView):
    """Deposit request class to list the deposit's status per page.
    HTTP verbs supported: GET
    """
    serializer_class = DepositSerializer
    pagination_class = DefaultPagination
[docs]
    def paginate_queryset(self, queryset):
        """Return a single page of results. This enriches the queryset results with
        metadata if any.
        """
        page_result = self.paginator.paginate_queryset(
            queryset, self.request, view=self
        )
        deposits = []
        for deposit in page_result:
            _enrich_deposit_with_metadata(deposit)
            deposits.append(deposit)
        return deposits 
[docs]
    def get_queryset(self):
        """Retrieve queryset of deposits (with some optional filtering)."""
        params = self.request.query_params
        exclude_like = params.get("exclude")
        username = params.get("username")
        if username:
            deposits_qs = Deposit.objects.select_related("client").filter(
                client__username=username
            )
        else:
            deposits_qs = Deposit.objects.all()
        if exclude_like:
            # sql injection: A priori, nothing to worry about, django does it for
            # queryset
            # https://docs.djangoproject.com/en/3.0/topics/security/#sql-injection-protection  # noqa
            deposits_qs = deposits_qs.exclude(external_id__startswith=exclude_like)
        return deposits_qs.order_by("id") 
 
def _deposit_search_query(search_value: str) -> Q:
    fields = [f for f in Deposit._meta.fields if isinstance(f, (CharField, TextField))]
    queries = [Q(**{f.name + "__icontains": search_value}) for f in fields]
    search_query = Q()
    for query in queries:
        search_query = search_query | query
    return search_query
[docs]
@api_view()
@authentication_classes([])
@permission_classes([AllowAny])
def deposit_list_datatables(request: Request) -> JsonResponse:
    """Special API view to list and filter deposits, produced responses are intended
    to be consumed by datatables js framework used in deposits admin Web UI."""
    table_data: Dict[str, Any] = {}
    table_data["draw"] = int(request.GET.get("draw", 1))
    try:
        username = request.GET.get("username")
        if username:
            deposits = Deposit.objects.select_related("client").filter(
                client__username=username
            )
        else:
            deposits = Deposit.objects.all()
        deposits_count = deposits.count()
        search_value = request.GET.get("search[value]")
        if search_value:
            deposits = deposits.filter(_deposit_search_query(search_value))
        exclude_pattern = request.GET.get("excludePattern")
        if exclude_pattern:
            deposits = deposits.exclude(_deposit_search_query(exclude_pattern))
        column_order = request.GET.get("order[0][column]")
        field_order = request.GET.get("columns[%s][name]" % column_order, "id")
        order_dir = request.GET.get("order[0][dir]", "desc")
        if order_dir == "desc":
            field_order = "-" + field_order
        deposits = deposits.order_by(field_order)
        length = int(request.GET.get("length", 10))
        page = int(request.GET.get("start", 0)) // length + 1
        paginator = Paginator(deposits, length)
        data = [
            DepositSerializer(_enrich_deposit_with_metadata(d)).data
            for d in paginator.page(page).object_list
        ]
        table_data["recordsTotal"] = deposits_count
        table_data["recordsFiltered"] = deposits.count()
        data_list = []
        for d in data:
            data_dict = {
                "id": d["id"],
                "type": d["type"],
                "external_id": d["external_id"],
                "raw_metadata": d["raw_metadata"],
                "reception_date": d["reception_date"],
                "status": d["status"],
                "status_detail": d["status_detail"],
                "swhid": d["swhid"],
                "swhid_context": d["swhid_context"],
            }
            provenance = None
            raw_metadata = d["raw_metadata"]
            # for meta deposit, the uri should be the url provenance
            if raw_metadata and d["type"] == "meta":  # metadata provenance
                provenance = parse_swh_metadata_provenance(
                    ElementTree.fromstring(raw_metadata)
                )
            # For code deposits the uri is the origin
            # First, trying to determine it out of the raw metadata associated with the
            # deposit
            elif raw_metadata and d["type"] == "code":
                create_origin_url, add_to_origin_url = parse_swh_deposit_origin(
                    ElementTree.fromstring(raw_metadata)
                )
                provenance = create_origin_url or add_to_origin_url
            # For code deposits, if not provided, use the origin_url
            if not provenance and d["type"] == "code":
                if d["origin_url"]:
                    provenance = d["origin_url"]
                # If still not found, fallback using the swhid context
                if not provenance and d["swhid_context"]:
                    swhid = QualifiedSWHID.from_string(d["swhid_context"])
                    provenance = swhid.origin
            data_dict["uri"] = provenance  # could be None
            data_list.append(data_dict)
        table_data["data"] = data_list
    except Exception as exc:
        sentry_sdk.capture_exception(exc)
        table_data["error"] = (
            "An error occurred while retrieving the list of deposits !"
        )
        if settings.DEBUG:
            table_data["error"] += "\n" + str(exc)
    return JsonResponse(table_data)