Source code for core.model.identifier

# encoding: utf-8
# Identifier, Equivalency
import logging
import random
from urllib.parse import quote, unquote
from abc import ABCMeta, abstractmethod
from collections import defaultdict
from functools import total_ordering
import isbnlib
from sqlalchemy import (
    Boolean,
    Column,
    Float,
    ForeignKey,
    Integer,
    String,
    UniqueConstraint,
    func,
)
from sqlalchemy.orm import joinedload, relationship
from sqlalchemy.orm.exc import MultipleResultsFound, NoResultFound
from sqlalchemy.orm.session import Session
from sqlalchemy.sql import select
from sqlalchemy.sql.expression import and_, or_

from .classification import Classification, Subject
from .constants import IdentifierConstants, LinkRelations
from .coverage import CoverageRecord
from .datasource import DataSource
from .licensing import LicensePoolDeliveryMechanism, RightsStatus
from .measurement import Measurement
from . import Base, PresentationCalculationPolicy, create, get_one, get_one_or_create
from ..util.summary import SummaryEvaluator
from ..util.datetime_helpers import utc_now


[docs]class IdentifierParser(metaclass=ABCMeta): """Interface for identifier parsers."""
[docs] @abstractmethod def parse(self, identifier_string): """Parse a string containing an identifier, extract it and determine its type. :param identifier_string: String containing an identifier :type identifier_string: str :return: 2-tuple containing the identifier's type and identifier itself or None if the string contains an incorrect identifier :rtype: Optional[Tuple[str, str]] """ raise NotImplementedError()
[docs]@total_ordering class Identifier(Base, IdentifierConstants): """A way of uniquely referring to a particular edition. """ __tablename__ = 'identifiers' id = Column(Integer, primary_key=True) type = Column(String(64), index=True) identifier = Column(String, index=True) equivalencies = relationship( "Equivalency", primaryjoin=("Identifier.id==Equivalency.input_id"), backref="input_identifiers", cascade="all, delete-orphan" ) inbound_equivalencies = relationship( "Equivalency", primaryjoin=("Identifier.id==Equivalency.output_id"), backref="output_identifiers", cascade="all, delete-orphan" ) # One Identifier may have many associated CoverageRecords. coverage_records = relationship("CoverageRecord", backref="identifier") def __repr__(self): records = self.primarily_identifies if records and records[0].title: title = ' prim_ed=%d ("%s")' % (records[0].id, records[0].title) else: title = "" return "%s/%s ID=%s%s" % (self.type, self.identifier, self.id, title) # One Identifier may serve as the primary identifier for # several Editions. primarily_identifies = relationship( "Edition", backref="primary_identifier" ) # One Identifier may serve as the identifier for many # LicensePools, through different Collections. licensed_through = relationship( "LicensePool", backref="identifier", lazy='joined', ) # One Identifier may have many Links. links = relationship( "Hyperlink", backref="identifier" ) # One Identifier may be the subject of many Measurements. measurements = relationship( "Measurement", backref="identifier" ) # One Identifier may participate in many Classifications. classifications = relationship( "Classification", backref="identifier" ) # One identifier may participate in many Annotations. annotations = relationship( "Annotation", backref="identifier" ) # One Identifier can have have many LicensePoolDeliveryMechanisms. delivery_mechanisms = relationship( "LicensePoolDeliveryMechanism", backref="identifier", foreign_keys=lambda: [LicensePoolDeliveryMechanism.identifier_id] ) # Type + identifier is unique. __table_args__ = ( UniqueConstraint('type', 'identifier'), )
[docs] @classmethod def from_asin(cls, _db, asin, autocreate=True): """Turn an ASIN-like string into an Identifier. If the string is an ISBN10 or ISBN13, the Identifier will be of type ISBN and the value will be the equivalent ISBN13. Otherwise the Identifier will be of type ASIN and the value will be the value of `asin`. """ asin = asin.strip().replace("-", "") if isbnlib.is_isbn10(asin): asin = isbnlib.to_isbn13(asin) if isbnlib.is_isbn13(asin): type = cls.ISBN else: type = cls.ASIN return cls.for_foreign_id(_db, type, asin, autocreate)
[docs] @classmethod def for_foreign_id(cls, _db, foreign_identifier_type, foreign_id, autocreate=True): """Turn a foreign ID into an Identifier.""" foreign_identifier_type, foreign_id = cls.prepare_foreign_type_and_identifier( foreign_identifier_type, foreign_id ) if not foreign_identifier_type or not foreign_id: return None if autocreate: m = get_one_or_create else: m = get_one result = m(_db, cls, type=foreign_identifier_type, identifier=foreign_id) if isinstance(result, tuple): return result else: return result, False
[docs] @classmethod def prepare_foreign_type_and_identifier(cls, foreign_type, foreign_identifier): if not foreign_type or not foreign_identifier: return (None, None) # Turn a deprecated identifier type (e.g. "3M ID" into the # current type (e.g. "Bibliotheca ID"). foreign_type = cls.DEPRECATED_NAMES.get(foreign_type, foreign_type) if foreign_type in (Identifier.OVERDRIVE_ID, Identifier.BIBLIOTHECA_ID): foreign_identifier = foreign_identifier.lower() if not cls.valid_as_foreign_identifier(foreign_type, foreign_identifier): raise ValueError('"%s" is not a valid %s.' % ( foreign_identifier, foreign_type )) return (foreign_type, foreign_identifier)
[docs] @classmethod def valid_as_foreign_identifier(cls, type, id): """Return True if the given `id` can be an Identifier of the given `type`. This is not a complete implementation; we will add to it as necessary. In general we err on the side of allowing IDs that look invalid (e.g. all Overdrive IDs look like UUIDs, but we currently don't enforce that). We only reject an ID out of hand if it will cause problems with a third-party API. """ forbidden_characters = '' if type == Identifier.BIBLIOTHECA_ID: # IDs are joined with commas and provided as a URL path # element. Embedded commas or slashes will confuse the # Bibliotheca API. forbidden_characters = ',/' elif type == Identifier.AXIS_360_ID: # IDs are joined with commas during a lookup. Embedded # commas will confuse the Axis 360 API. forbidden_characters = ',' if any(x in id for x in forbidden_characters): return False return True
@property def urn(self): identifier_text = quote(self.identifier) if self.type == Identifier.ISBN: return self.ISBN_URN_SCHEME_PREFIX + identifier_text elif self.type == Identifier.URI: return self.identifier elif self.type == Identifier.GUTENBERG_ID: return self.GUTENBERG_URN_SCHEME_PREFIX + identifier_text else: identifier_type = quote(self.type) return self.URN_SCHEME_PREFIX + "%s/%s" % ( identifier_type, identifier_text) @property def work(self): """Find the Work, if any, associated with this Identifier. Although one Identifier may be associated with multiple LicensePools, all of them must share a Work. """ for lp in self.licensed_through: if lp.work: return lp.work
[docs] class UnresolvableIdentifierException(Exception): # Raised when an identifier that can't be resolved into a LicensePool # is provided in a context that requires a resolvable identifier pass
[docs] @classmethod def type_and_identifier_for_urn(cls, identifier_string): if not identifier_string: return None, None m = cls.GUTENBERG_URN_SCHEME_RE.match(identifier_string) if m: type = Identifier.GUTENBERG_ID identifier_string = m.groups()[0] elif identifier_string.startswith("http:") or identifier_string.startswith("https:"): type = Identifier.URI elif identifier_string.startswith(Identifier.URN_SCHEME_PREFIX): identifier_string = identifier_string[len(Identifier.URN_SCHEME_PREFIX):] type, identifier_string = list(map( unquote, identifier_string.split("/", 1))) elif identifier_string.startswith(Identifier.ISBN_URN_SCHEME_PREFIX): type = Identifier.ISBN identifier_string = identifier_string[len(Identifier.ISBN_URN_SCHEME_PREFIX):] identifier_string = unquote(identifier_string) # Make sure this is a valid ISBN, and convert it to an ISBN-13. if not (isbnlib.is_isbn10(identifier_string) or isbnlib.is_isbn13(identifier_string)): raise ValueError("%s is not a valid ISBN." % identifier_string) if isbnlib.is_isbn10(identifier_string): identifier_string = isbnlib.to_isbn13(identifier_string) elif identifier_string.startswith(Identifier.OTHER_URN_SCHEME_PREFIX): type = Identifier.URI else: raise ValueError( "Could not turn %s into a recognized identifier." % identifier_string) return (type, identifier_string)
[docs] @classmethod def parse_urns(cls, _db, identifier_strings, autocreate=True, allowed_types=None): """Converts a batch of URNs into Identifier objects. :param _db: A database connection :param identifier_strings: A list of strings, each a URN identifying some identifier. :param autocreate: Create an Identifier for a URN if none presently exists. :param allowed_types: If this is a list of Identifier types, only identifiers of those types may be looked up. All other identifier types will be treated as though they did not exist. :return: A 2-tuple (identifiers, failures). `identifiers` is a list of Identifiers. `failures` is a list of URNs that did not become Identifiers. """ if allowed_types is not None: allowed_types = set(allowed_types) failures = list() identifier_details = dict() for urn in identifier_strings: type = identifier = None try: (type, identifier) = cls.prepare_foreign_type_and_identifier( *cls.type_and_identifier_for_urn(urn) ) if (type and identifier and (allowed_types is None or type in allowed_types)): identifier_details[urn] = (type, identifier) else: failures.append(urn) except ValueError as e: failures.append(urn) identifiers_by_urn = dict() def find_existing_identifiers(identifier_details): if not identifier_details: return and_clauses = list() for type, identifier in identifier_details: and_clauses.append( and_(cls.type==type, cls.identifier==identifier) ) identifiers = _db.query(cls).filter(or_(*and_clauses)).all() for identifier in identifiers: identifiers_by_urn[identifier.urn] = identifier # Find identifiers that are already in the database. find_existing_identifiers(list(identifier_details.values())) # Remove the existing identifiers from the identifier_details list, # regardless of whether the provided URN was accurate. existing_details = [(i.type, i.identifier) for i in list(identifiers_by_urn.values())] identifier_details = { k: v for k, v in list(identifier_details.items()) if v not in existing_details and k not in list(identifiers_by_urn.keys()) } if not autocreate: # Don't make new identifiers. Send back unfound urns as failures. failures.extend(list(identifier_details.keys())) return identifiers_by_urn, failures # Find any identifier details that don't correspond to an existing # identifier. Try to create them. new_identifiers = list() new_identifiers_details = set([]) for urn, details in list(identifier_details.items()): if details in new_identifiers_details: # For some reason, this identifier is here twice. # Don't try to insert it twice. continue new_identifiers.append(dict(type=details[0], identifier=details[1])) new_identifiers_details.add(details) # Insert new identifiers into the database, then add them to the # results. if new_identifiers: _db.bulk_insert_mappings(cls, new_identifiers) _db.commit() find_existing_identifiers(list(identifier_details.values())) return identifiers_by_urn, failures
@classmethod def _parse_urn(cls, _db, identifier_string, identifier_type, must_support_license_pools=False): """Parse identifier string. :param _db: Database session :type _db: sqlalchemy.orm.session.Session :param identifier_string: Identifier itself :type identifier_string: str :param identifier_type: Identifier's type :type identifier_type: str :param must_support_license_pools: Boolean value indicating whether there should be a DataSource that provides licenses for books identified by the given identifier :type must_support_license_pools: bool :return: 2-tuple containing Identifier object and a boolean value indicating whether it's new :rtype: Tuple[core.model.identifier.Identifier, bool] """ if must_support_license_pools: try: _ = DataSource.license_source_for(_db, identifier_type) except NoResultFound: raise Identifier.UnresolvableIdentifierException() except MultipleResultsFound: # This is fine. pass return cls.for_foreign_id(_db, identifier_type, identifier_string)
[docs] @classmethod def parse_urn(cls, _db, identifier_string, must_support_license_pools=False): """Parse identifier string. :param _db: Database session :type _db: sqlalchemy.orm.session.Session :param identifier_string: String containing an identifier :type identifier_string: str :param must_support_license_pools: Boolean value indicating whether there should be a DataSource that provides licenses for books identified by the given identifier :type must_support_license_pools: bool :return: 2-tuple containing Identifier object and a boolean value indicating whether it's new :rtype: Tuple[core.model.identifier.Identifier, bool] """ identifier_type, identifier_string = cls.type_and_identifier_for_urn(identifier_string) return cls._parse_urn(_db, identifier_string, identifier_type, must_support_license_pools)
[docs] @classmethod def parse(cls, _db, identifier_string, parser, must_support_license_pools=False): """Parse identifier string. :param _db: Database session :type _db: sqlalchemy.orm.session.Session :param identifier_string: String containing an identifier :type identifier_string: str :param parser: Identifier parser :type parser: IdentifierParser :param must_support_license_pools: Boolean value indicating whether there should be a DataSource that provides licenses for books identified by the given identifier :type must_support_license_pools: bool :return: 2-tuple containing Identifier object and a boolean value indicating whether it's new :rtype: Tuple[core.model.identifier.Identifier, bool] """ identifier_type, identifier_string = parser.parse(identifier_string) return cls._parse_urn(_db, identifier_string, identifier_type, must_support_license_pools)
[docs] def equivalent_to(self, data_source, identifier, strength): """Make one Identifier equivalent to another. `data_source` is the DataSource that believes the two identifiers are equivalent. """ _db = Session.object_session(self) if self == identifier: # That an identifier is equivalent to itself is tautological. # Do nothing. return None eq, new = get_one_or_create( _db, Equivalency, data_source=data_source, input=self, output=identifier, on_multiple='interchangeable' ) eq.strength=strength if new: logging.info( "Identifier equivalency: %r==%r p=%.2f", self, identifier, strength ) return eq
[docs] @classmethod def recursively_equivalent_identifier_ids_query( cls, identifier_id_column, policy=None): """Get a SQL statement that will return all Identifier IDs equivalent to a given ID at the given confidence threshold. `identifier_id_column` can be a single Identifier ID, or a column like `Edition.primary_identifier_id` if the query will be used as a subquery. This uses the function defined in files/recursive_equivalents.sql. """ fn = cls._recursively_equivalent_identifier_ids_query( identifier_id_column, policy ) return select([fn])
@classmethod def _recursively_equivalent_identifier_ids_query( cls, identifier_id_column, policy=None ): policy = policy or PresentationCalculationPolicy() levels = policy.equivalent_identifier_levels threshold = policy.equivalent_identifier_threshold cutoff = policy.equivalent_identifier_cutoff return func.fn_recursive_equivalents( identifier_id_column, levels, threshold, cutoff )
[docs] @classmethod def recursively_equivalent_identifier_ids( cls, _db, identifier_ids, policy=None): """All Identifier IDs equivalent to the given set of Identifier IDs at the given confidence threshold. This uses the function defined in files/recursive_equivalents.sql. Four levels is enough to go from a Gutenberg text to an ISBN. Gutenberg ID -> OCLC Work IS -> OCLC Number -> ISBN Returns a dictionary mapping each ID in the original to a list of equivalent IDs. :param policy: A PresentationCalculationPolicy that explains how you've chosen to make the tradeoff between performance, data quality, and sheer number of equivalent identifiers. """ fn = cls._recursively_equivalent_identifier_ids_query( Identifier.id, policy ) query = select([Identifier.id, fn], Identifier.id.in_(identifier_ids)) results = _db.execute(query) equivalents = defaultdict(list) for r in results: original = r[0] equivalent = r[1] equivalents[original].append(equivalent) return equivalents
[docs] def equivalent_identifier_ids(self, policy=None): _db = Session.object_session(self) return Identifier.recursively_equivalent_identifier_ids( _db, [self.id], policy )
[docs] def licensed_through_collection(self, collection): """Find the LicensePool, if any, for this Identifier in the given Collection. :return: At most one LicensePool. """ for lp in self.licensed_through: if lp.collection == collection: return lp
[docs] def add_measurement(self, data_source, quantity_measured, value, weight=1, taken_at=None): """Associate a new Measurement with this Identifier.""" _db = Session.object_session(self) logging.debug( "MEASUREMENT: %s on %s/%s: %s == %s (wt=%d)", data_source.name, self.type, self.identifier, quantity_measured, value, weight) now = utc_now() taken_at = taken_at or now # Is there an existing most recent measurement? most_recent = get_one( _db, Measurement, identifier=self, data_source=data_source, quantity_measured=quantity_measured, is_most_recent=True, on_multiple='interchangeable' ) if most_recent and most_recent.value == value and taken_at == now: # The value hasn't changed since last time. Just update # the timestamp of the existing measurement. self.taken_at = taken_at if most_recent and most_recent.taken_at < taken_at: most_recent.is_most_recent = False return create( _db, Measurement, identifier=self, data_source=data_source, quantity_measured=quantity_measured, taken_at=taken_at, value=value, weight=weight, is_most_recent=True)[0]
[docs] def classify(self, data_source, subject_type, subject_identifier, subject_name=None, weight=1): """Classify this Identifier under a Subject. :param type: Classification scheme; one of the constants from Subject. :param subject_identifier: Internal ID of the subject according to that classification scheme. :param value: Human-readable description of the subject, if different from the ID. :param weight: How confident the data source is in classifying a book under this subject. The meaning of this number depends entirely on the source of the information. """ _db = Session.object_session(self) # Turn the subject type and identifier into a Subject. classifications = [] subject, is_new = Subject.lookup( _db, subject_type, subject_identifier, subject_name, ) logging.debug( "CLASSIFICATION: %s on %s/%s: %s %s/%s (wt=%d)", data_source.name, self.type, self.identifier, subject.type, subject.identifier, subject.name, weight ) # Use a Classification to connect the Identifier to the # Subject. try: classification, is_new = get_one_or_create( _db, Classification, identifier=self, subject=subject, data_source=data_source) except MultipleResultsFound as e: # TODO: This is a hack. all_classifications = _db.query(Classification).filter( Classification.identifier==self, Classification.subject==subject, Classification.data_source==data_source) all_classifications = all_classifications.all() classification = all_classifications[0] for i in all_classifications[1:]: _db.delete(i) classification.weight = weight return classification
[docs] @classmethod def resources_for_identifier_ids(self, _db, identifier_ids, rel=None, data_source=None): from .resource import Hyperlink, Resource resources = _db.query(Resource).join(Resource.links).filter( Hyperlink.identifier_id.in_(identifier_ids)) if data_source: if isinstance(data_source, DataSource): data_source = [data_source] resources = resources.filter(Hyperlink.data_source_id.in_([d.id for d in data_source])) if rel: if isinstance(rel, list): resources = resources.filter(Hyperlink.rel.in_(rel)) else: resources = resources.filter(Hyperlink.rel==rel) resources = resources.options(joinedload('representation')) return resources
[docs] @classmethod def classifications_for_identifier_ids(self, _db, identifier_ids): classifications = _db.query(Classification).filter( Classification.identifier_id.in_(identifier_ids)) return classifications.options(joinedload('subject'))
[docs] @classmethod def best_cover_for(cls, _db, identifier_ids, rel=None): # Find all image resources associated with any of # these identifiers. from .resource import Hyperlink, Resource rel = rel or Hyperlink.IMAGE images = cls.resources_for_identifier_ids( _db, identifier_ids, rel) images = images.join(Resource.representation) images = images.all() champions = Resource.best_covers_among(images) if not champions: champion = None elif len(champions) == 1: [champion] = champions else: champion = random.choice(champions) return champion, images
[docs] @classmethod def evaluate_summary_quality(cls, _db, identifier_ids, privileged_data_sources=None): """Evaluate the summaries for the given group of Identifier IDs. This is an automatic evaluation based solely on the content of the summaries. It will be combined with human-entered ratings to form an overall quality score. We need to evaluate summaries from a set of Identifiers (typically those associated with a single work) because we need to see which noun phrases are most frequently used to describe the underlying work. :param privileged_data_sources: If present, a summary from one of these data source will be instantly chosen, short-circuiting the decision process. Data sources are in order of priority. :return: The single highest-rated summary Resource. """ evaluator = SummaryEvaluator() if privileged_data_sources and len(privileged_data_sources) > 0: privileged_data_source = privileged_data_sources[0] else: privileged_data_source = None # Find all rel="description" resources associated with any of # these records. rels = [LinkRelations.DESCRIPTION, LinkRelations.SHORT_DESCRIPTION] descriptions = cls.resources_for_identifier_ids( _db, identifier_ids, rels, privileged_data_source).all() champion = None # Add each resource's content to the evaluator's corpus. for r in descriptions: if r.representation and r.representation.content: evaluator.add(r.representation.content) evaluator.ready() # Then have the evaluator rank each resource. for r in descriptions: if r.representation and r.representation.content: content = r.representation.content quality = evaluator.score(content) r.set_estimated_quality(quality) if not champion or r.quality > champion.quality: champion = r if privileged_data_source and not champion: # We could not find any descriptions from the privileged # data source. Try relaxing that restriction. return cls.evaluate_summary_quality(_db, identifier_ids, privileged_data_sources[1:]) return champion, descriptions
[docs] @classmethod def missing_coverage_from( cls, _db, identifier_types, coverage_data_source, operation=None, count_as_covered=None, count_as_missing_before=None, identifiers=None, collection=None ): """Find identifiers of the given types which have no CoverageRecord from `coverage_data_source`. :param count_as_covered: Identifiers will be counted as covered if their CoverageRecords have a status in this list. :param identifiers: Restrict search to a specific set of identifier objects. """ if collection: collection_id = collection.id else: collection_id = None data_source_id = None if coverage_data_source: data_source_id = coverage_data_source.id clause = and_(Identifier.id==CoverageRecord.identifier_id, CoverageRecord.data_source_id==data_source_id, CoverageRecord.operation==operation, CoverageRecord.collection_id==collection_id ) qu = _db.query(Identifier).outerjoin(CoverageRecord, clause) if identifier_types: qu = qu.filter(Identifier.type.in_(identifier_types)) missing = CoverageRecord.not_covered( count_as_covered, count_as_missing_before ) qu = qu.filter(missing) if identifiers: qu = qu.filter(Identifier.id.in_([x.id for x in identifiers])) return qu
[docs] def opds_entry(self): """Create an OPDS entry using only resources directly associated with this Identifier. This makes it possible to create an OPDS entry even when there is no Edition. Currently the only things in this OPDS entry will be description, cover image, and popularity. NOTE: The timestamp doesn't take into consideration when the description was added. Rather than fixing this it's probably better to get rid of this hack and create real Works where we would be using this method. """ id = self.urn cover_image = None description = None most_recent_update = None timestamps = [] for link in self.links: resource = link.resource if link.rel == LinkRelations.IMAGE: if not cover_image or ( not cover_image.representation.thumbnails and resource.representation.thumbnails): cover_image = resource if cover_image.representation: # This is technically redundant because # minimal_opds_entry will redo this work, # but just to be safe. mirrored_at = cover_image.representation.mirrored_at if mirrored_at: timestamps.append(mirrored_at) elif link.rel == LinkRelations.DESCRIPTION: if not description or resource.quality > description.quality: description = resource if self.coverage_records: timestamps.extend([ c.timestamp for c in self.coverage_records if c.timestamp ]) if timestamps: most_recent_update = max(timestamps) quality = Measurement.overall_quality(self.measurements) from ..opds import AcquisitionFeed return AcquisitionFeed.minimal_opds_entry( identifier=self, cover=cover_image, description=description, quality=quality, most_recent_update=most_recent_update )
def __eq__(self, other): """Equality implementation for total_ordering.""" # We don't want an Identifier to be == an IdentifierData # with the same data. if other is None or not isinstance(other, Identifier): return False return (self.type, self.identifier) == (other.type, other.identifier) def __hash__(self): return hash((self.type, self.identifier)) def __lt__(self, other): """Comparison implementation for total_ordering.""" if other is None or not isinstance(other, Identifier): return False return (self.type, self.identifier) < (other.type, other.identifier)
[docs]class Equivalency(Base): """An assertion that two Identifiers identify the same work. This assertion comes with a 'strength' which represents how confident the data source is in the assertion. """ __tablename__ = 'equivalents' # 'input' is the ID that was used as input to the datasource. # 'output' is the output id = Column(Integer, primary_key=True) input_id = Column(Integer, ForeignKey('identifiers.id'), index=True) input = relationship("Identifier", foreign_keys=input_id) output_id = Column(Integer, ForeignKey('identifiers.id'), index=True) output = relationship("Identifier", foreign_keys=output_id) # Who says? data_source_id = Column(Integer, ForeignKey('datasources.id'), index=True) # How many distinct votes went into this assertion? This will let # us scale the change to the strength when additional votes come # in. votes = Column(Integer, default=1) # How strong is this assertion (-1..1)? A negative number is an # assertion that the two Identifiers do *not* identify the # same work. strength = Column(Float, index=True) # Should this equivalency actually be used in calculations? This # is not manipulated directly, but it gives us the ability to use # manual intervention to defuse large chunks of problematic code # without actually deleting the data. enabled = Column(Boolean, default=True, index=True) def __repr__(self): r = "[%s ->\n %s\n source=%s strength=%.2f votes=%d)]" % ( repr(self.input).decode("utf8"), repr(self.output).decode("utf8"), self.data_source.name, self.strength, self.votes ) return r
[docs] @classmethod def for_identifiers(self, _db, identifiers, exclude_ids=None): """Find all Equivalencies for the given Identifiers.""" if not identifiers: return [] if isinstance(identifiers, list) and isinstance(identifiers[0], Identifier): identifiers = [x.id for x in identifiers] q = _db.query(Equivalency).distinct().filter( or_(Equivalency.input_id.in_(identifiers), Equivalency.output_id.in_(identifiers)) ) if exclude_ids: q = q.filter(~Equivalency.id.in_(exclude_ids)) return q