Source code for idutils.detectors

# -*- coding: utf-8 -*-
#
# This file is part of IDUtils
# Copyright (C) 2024 CERN.
#
# IDUtils is free software; you can redistribute it and/or modify
# it under the terms of the Revised BSD License; see LICENSE file for
# more details.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.

"""Functions for detecting the persistent identifier."""

from . import validators
from .proxies import custom_schemes_registry
from .schemes import IDUTILS_PID_SCHEMES as _IDUTILS_PID_SCHEMES
from .schemes import IDUTILS_SCHEME_FILTER as _IDUTILS_SCHEME_FILTER

IDUTILS_PID_SCHEMES = _IDUTILS_PID_SCHEMES
"""Definition of scheme name and associated test function.

Order of list is important, as identifier scheme detection will test in the
order given by this list."""


IDUTILS_SCHEME_FILTER = _IDUTILS_SCHEME_FILTER
"""(present_scheme, [list of schemes to remove if present_scheme found])."""


[docs] def detect_identifier_schemes(val): """Detect persistent identifier scheme for a given value. .. note:: Some schemes like PMID are very generic. """ schemes = [] scheme_validators = IDUTILS_PID_SCHEMES + custom_schemes_registry().pick_scheme_key( "validator" ) for scheme, test in scheme_validators: if test(val): schemes.append(scheme) # GNDs and ISBNs numbers can clash... if "gnd" in schemes and "isbn" in schemes: # ...in which case check explicitly if it's clearly a GND if val.lower().startswith("gnd:"): schemes.remove("isbn") if "viaf" in schemes and "url" in schemes: # check explicitly if it's a viaf for viaf_url in validators.viaf_urls: if val.startswith(viaf_url): schemes.remove("url") if "viaf" in schemes and "handle" in schemes: # check explicitly if it's a viaf for viaf_url in validators.viaf_urls: if val.startswith(viaf_url): schemes.remove("handle") scheme_filter = IDUTILS_SCHEME_FILTER + custom_schemes_registry().pick_scheme_key( "filter" ) for first, remove_schemes in scheme_filter: if first in schemes: schemes = list(filter(lambda x: x not in remove_schemes, schemes)) if ( "handle" in schemes and "url" in schemes and not val.startswith("http://hdl.handle.net/") and not val.startswith("https://hdl.handle.net/") ): schemes = list(filter(lambda x: x != "handle", schemes)) elif "handle" in schemes and ("ark" in schemes or "arxiv" in schemes): schemes = list(filter(lambda x: x != "handle", schemes)) return schemes