Source code for swh.search.utils
# Copyright (C) 2021  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import codecs
from datetime import datetime
from typing import Optional
import iso8601
[docs]
def get_expansion(field, sep=None):
    METADATA_FIELDS = {
        "licenses": ["jsonld", "http://schema.org/license", "@id"],
        "programming_languages": [
            "jsonld",
            "http://schema.org/programmingLanguage",
            "@value",
        ],
        "keywords": [
            "jsonld",
            "http://schema.org/keywords",
            "@value",
        ],
        "descriptions": [
            "jsonld",
            "http://schema.org/description",
            "@value",
        ],
        "date_created": [
            "jsonld",
            "http://schema.org/dateCreated",
            "@value",
        ],
        "date_modified": [
            "jsonld",
            "http://schema.org/dateModified",
            "@value",
        ],
        "date_published": [
            "jsonld",
            "http://schema.org/datePublished",
            "@value",
        ],
    }
    if sep:
        return sep.join(METADATA_FIELDS[field])
    return METADATA_FIELDS[field] 
[docs]
def escape(obj):
    r"""Makes the object directly injectable into the
    query language by converting the escapable parts of
    the object into escape sequences.
    For strings, appends \ before special characters like ', ", and \
    For arrays, applies the same transformation on each element, joins the
    elements and returns a string-like representation of the list.
    >>> print(escape("foo ' bar"))
    "foo \' bar"
    >>> print(escape([r"foo ' bar", r"bar \\\' baz", r'foo " baz']))
    ["foo \' bar", "bar \\\\\\\' baz", "foo \" baz"]
    """
    if type(obj) is list:
        items = [escape(item) for item in obj]
        return "[" + ", ".join(items) + "]"
    elif type(obj) is str:
        return (
            '"'
            + obj.translate(
                {
                    ord("'"): r"\'",
                    ord('"'): r"\"",
                    ord("\\"): r"\\",
                }
            )
            + '"'
        )
    else:
        raise Exception(f"Unexpected item type {type(obj)}") 
[docs]
def unescape(string):
    r"""Processes the escaped special characters
    >>> unescape(r'''foo " bar''') == r'''foo " bar'''
    True
    >>> unescape(r'''foo \" bar''') == r'''foo " bar'''
    True
    >>> unescape(r'''foo \\" bar''') == r'''foo \" bar'''
    True
    >>> unescape(r'''foo \\\" bar''') == r'''foo \" bar'''
    True
    >>> unescape(r'''foo \\\\" bar''') == r'''foo \\" bar'''
    True
    >>> unescape(r'''café \" foo''') == r'''café " foo'''
    True
    """
    return codecs.escape_decode(string.encode())[0].decode()