Source code for api.proquest.identifier

import logging
import re

from core.model.identifier import Identifier, IdentifierParser


[docs]class ProQuestIdentifierParser(IdentifierParser): """Parser for ProQuest Doc IDs.""" PROQUEST_ID_REGEX = re.compile(r"urn:proquest.com/document-id/(\d+)") def __init__(self): self._logger = logging.getLogger(__name__)
[docs] def parse(self, identifier_string): """Parse a string containing an identifier, extract it and determine its type. :param identifier_string: String containing an identifier :type identifier_string: str :return: 2-tuple containing the identifier's type and identifier itself or None if the string contains an incorrect identifier :rtype: Optional[Tuple[str, str]] """ self._logger.debug( 'Started parsing identifier string "{0}"'.format(identifier_string) ) match = self.PROQUEST_ID_REGEX.match(identifier_string) if match: document_id = match.groups()[0] result = Identifier.PROQUEST_ID, document_id self._logger.debug( 'Finished parsing identifier string "{0}". Result: {1}'.format( document_id, result ) ) return result self._logger.debug( 'Finished parsing identifier string "{0}". It does not contain a ProQuest Doc ID'.format( identifier_string ) ) return None