Source code for core.classifier

# encoding: utf-8

# If the genre classification does not match the fiction classification, throw
# away the genre classifications.
#
# E.g. "Investigations -- nonfiction" maps to Mystery, but Mystery
# conflicts with Nonfiction.

# SQL to find commonly used DDC classifications
# select count(editions.id) as c, subjects.identifier from editions join identifiers on workrecords.primary_identifier_id=workidentifiers.id join classifications on workidentifiers.id=classifications.work_identifier_id join subjects on classifications.subject_id=subjects.id where subjects.type = 'DDC' and not subjects.identifier like '8%' group by subjects.identifier order by c desc;

# SQL to find commonly used classifications not assigned to a genre
# select count(identifiers.id) as c, subjects.type, substr(subjects.identifier, 0, 20) as i, substr(subjects.name, 0, 20) as n from workidentifiers join classifications on workidentifiers.id=classifications.work_identifier_id join subjects on classifications.subject_id=subjects.id where subjects.genre_id is null and subjects.fiction is null group by subjects.type, i, n order by c desc;

import logging
import json
import os
import pkgutil
import re
from urllib.parse import urlparse
from collections import (
    Counter,
    defaultdict,
)

from sqlalchemy.orm.session import Session
from sqlalchemy.sql.expression import and_

base_dir = os.path.split(__file__)[0]
resource_dir = os.path.join(base_dir, "..", "resources")

NO_VALUE = "NONE"
NO_NUMBER = -1

[docs]class ClassifierConstants(object):
    DDC = "DDC"
    LCC = "LCC"
    LCSH = "LCSH"
    FAST = "FAST"
    OVERDRIVE = "Overdrive"
    RBDIGITAL = "RBdigital"
    BISAC = "BISAC"
    BIC = "BIC"
    TAG = "tag"   # Folksonomic tags.

    # Appeal controlled vocabulary developed by NYPL
    NYPL_APPEAL = "NYPL Appeal"

    GRADE_LEVEL = "Grade level" # "1-2", "Grade 4", "Kindergarten", etc.
    AGE_RANGE = "schema:typicalAgeRange" # "0-2", etc.
    AXIS_360_AUDIENCE = "Axis 360 Audience"
    RBDIGITAL_AUDIENCE = "RBdigital Audience"

    # We know this says something about the audience but we're not sure what.
    # Could be any of the values from GRADE_LEVEL or AGE_RANGE, plus
    # "YA", "Adult", etc.
    FREEFORM_AUDIENCE = "schema:audience"

    GUTENBERG_BOOKSHELF = "gutenberg:bookshelf"
    TOPIC = "schema:Topic"
    PLACE = "schema:Place"
    PERSON = "schema:Person"
    ORGANIZATION = "schema:Organization"
    LEXILE_SCORE = "Lexile"
    ATOS_SCORE = "ATOS"
    INTEREST_LEVEL = "Interest Level"

    AUDIENCE_ADULT = "Adult"
    AUDIENCE_ADULTS_ONLY = "Adults Only"
    AUDIENCE_YOUNG_ADULT = "Young Adult"
    AUDIENCE_CHILDREN = "Children"
    AUDIENCE_ALL_AGES = "All Ages"
    AUDIENCE_RESEARCH = "Research"

    # A book for a child younger than 14 is a children's book.
    # A book for a child 14 or older is a young adult book.
    YOUNG_ADULT_AGE_CUTOFF = 14

    ADULT_AGE_CUTOFF = 18

    # "All ages" actually means "all ages with reading fluency".
    ALL_AGES_AGE_CUTOFF = 8

    AUDIENCES_YOUNG_CHILDREN = [AUDIENCE_CHILDREN, AUDIENCE_ALL_AGES]
    AUDIENCES_JUVENILE = AUDIENCES_YOUNG_CHILDREN + [AUDIENCE_YOUNG_ADULT]
    AUDIENCES_ADULT = [AUDIENCE_ADULT, AUDIENCE_ADULTS_ONLY, AUDIENCE_ALL_AGES]
    AUDIENCES = set([AUDIENCE_ADULT, AUDIENCE_ADULTS_ONLY, AUDIENCE_YOUNG_ADULT,
                     AUDIENCE_CHILDREN, AUDIENCE_ALL_AGES, AUDIENCE_RESEARCH])

    SIMPLIFIED_GENRE = "http://librarysimplified.org/terms/genres/Simplified/"
    SIMPLIFIED_FICTION_STATUS = "http://librarysimplified.org/terms/fiction/"

[docs]class Classifier(ClassifierConstants):
    """Turn an external classification into an internal genre, an
    audience, an age level, and a fiction status.
    """

    AUDIENCES_NO_RESEARCH = [
        x for x in ClassifierConstants.AUDIENCES if x != ClassifierConstants.AUDIENCE_RESEARCH
    ]

    classifiers = dict()

[docs]    @classmethod
    def range_tuple(cls, lower, upper):
        """Turn a pair of ages into a tuple that represents an age range.
        This may be turned into an inclusive postgres NumericRange later,
        but this code should not depend on postgres.
        """
        # Just in case the upper and lower ranges are mixed up,
        # and no prior code caught this, un-mix them.
        if lower and upper and lower > upper:
            lower, upper = upper, lower
        return (lower, upper)

[docs]    @classmethod
    def lookup(cls, scheme):
        """Look up a classifier for a classification scheme."""
        return cls.classifiers.get(scheme, None)

[docs]    @classmethod
    def name_for(cls, identifier):
        """Look up a human-readable name for the given identifier."""
        return None

[docs]    @classmethod
    def classify(cls, subject):
        """Try to determine genre, audience, target age, and fiction status
        for the given Subject.
        """
        identifier, name = cls.scrub_identifier_and_name(
            subject.identifier, subject.name
        )
        fiction = cls.is_fiction(identifier, name)
        audience = cls.audience(identifier, name)

        target_age = cls.target_age(identifier, name)
        if target_age == cls.range_tuple(None, None):
            target_age = cls.default_target_age_for_audience(audience)

        return (cls.genre(identifier, name, fiction, audience),
                audience,
                target_age,
                fiction,
                )

[docs]    @classmethod
    def scrub_identifier_and_name(cls, identifier, name):
        """Prepare identifier and name from within a call to classify()."""
        identifier = cls.scrub_identifier(identifier)
        if isinstance(identifier, tuple):
            # scrub_identifier returned a canonical value for name as
            # well. Use it in preference to any name associated with
            # the subject.
            identifier, name = identifier
        elif not name:
            name = identifier
        name = cls.scrub_name(name)
        return identifier, name

[docs]    @classmethod
    def scrub_identifier(cls, identifier):
        """Prepare an identifier from within a call to classify().

        This may involve data normalization, conversion to lowercase,
        etc.
        """
        if identifier is None:
            return None
        return Lowercased(identifier)

[docs]    @classmethod
    def scrub_name(cls, name):
        """Prepare a name from within a call to classify()."""
        if name is None:
            return None
        return Lowercased(name)


[docs]    @classmethod
    def genre(cls, identifier, name, fiction=None, audience=None):
        """Is this identifier associated with a particular Genre?"""
        return None

[docs]    @classmethod
    def genre_match(cls, query):
        """Does this query string match a particular Genre, and which part
        of the query matches?"""
        return None, None

[docs]    @classmethod
    def is_fiction(cls, identifier, name):
        """Is this identifier+name particularly indicative of fiction?
        How about nonfiction?
        """
        if "nonfiction" in name:
            return False
        if "fiction" in name:
            return True
        return None

[docs]    @classmethod
    def audience(cls, identifier, name):
        """What does this identifier+name say about the audience for
        this book?
        """
        if 'juvenile' in name:
            return cls.AUDIENCE_CHILDREN
        elif 'young adult' in name or "YA" in name.original:
            return cls.AUDIENCE_YOUNG_ADULT
        return None

[docs]    @classmethod
    def audience_match(cls, query):
        """Does this query string match a particular Audience, and which
        part of the query matches?"""
        return (None, None)

[docs]    @classmethod
    def target_age(cls, identifier, name):
        """For children's books, what does this identifier+name say
        about the target age for this book?
        """
        return cls.range_tuple(None, None)

[docs]    @classmethod
    def default_target_age_for_audience(cls, audience):
        """The default target age for a given audience.

        We don't know what age range a children's book is appropriate
        for, but we can make a decent guess for a YA book, for an
        'Adult' book it's pretty clear, and for an 'Adults Only' book
        it's very clear.
        """
        if audience == Classifier.AUDIENCE_YOUNG_ADULT:
            return cls.range_tuple(14, 17)
        elif audience in (
                Classifier.AUDIENCE_ADULT, Classifier.AUDIENCE_ADULTS_ONLY
        ):
            return cls.range_tuple(18, None)
        return cls.range_tuple(None, None)

[docs]    @classmethod
    def default_audience_for_target_age(cls, range):
        if range is None:
            return None
        lower = range[0]
        upper = range[1]
        if not lower and not upper:
            # You could interpret this as 'all ages' but it's more
            # likely the data is simply missing.
            return None
        if not lower:
            if upper >= cls.ADULT_AGE_CUTOFF:
                # e.g. "up to 20 years", though this doesn't
                # really make sense.
                #
                # The 'all ages' interpretation is more plausible here
                # but it's still more likely that this is simply a
                # book for grown-ups and no lower bound was provided.
                return cls.AUDIENCE_ADULT
            elif upper > cls.YOUNG_ADULT_AGE_CUTOFF:
                # e.g. "up to 15 years"
                return cls.AUDIENCE_YOUNG_ADULT
            else:
                # e.g. "up to 14 years"
                return cls.AUDIENCE_CHILDREN

        # At this point we can assume that lower is not None.
        if lower >= 18:
            return cls.AUDIENCE_ADULT
        elif lower >= cls.YOUNG_ADULT_AGE_CUTOFF:
            return cls.AUDIENCE_YOUNG_ADULT
        elif lower <= cls.ALL_AGES_AGE_CUTOFF and (
            upper is not None and upper >= cls.ADULT_AGE_CUTOFF
        ):
            # e.g. "for children ages 7-77". The 'all ages' reading
            # is here the most plausible.
            return cls.AUDIENCE_ALL_AGES
        elif lower >= 12 and (not upper or upper >= cls.YOUNG_ADULT_AGE_CUTOFF):
            # Although we treat "Young Adult" as starting at 14, many
            # outside sources treat it as starting at 12. As such we
            # treat "12 and up" or "12-14" as an indicator of a Young
            # Adult audience, with a target age that overlaps what we
            # consider a Children audience.
            return cls.AUDIENCE_YOUNG_ADULT
        else:
            return cls.AUDIENCE_CHILDREN

[docs]    @classmethod
    def and_up(cls, young, keyword):
        """Encapsulates the logic of what "[x] and up" actually means.

        Given the lower end of an age range, tries to determine the
        upper end of the range.
        """
        if young is None:
            return None
        if not any(
                [keyword.endswith(x) for x in
                 ("and up", "and up.", "+", "+.")
             ]
        ):
            return None

        if young >= 18:
            old = young
        elif young >= 12:
            # "12 and up", "14 and up", etc.  are
            # generally intended to cover the entire
            # YA span.
            old = 17
        elif young >= 8:
            # "8 and up" means something like "8-12"
            old = young + 4
        else:
            # Whereas "3 and up" really means more
            # like "3 to 5".
            old = young + 2
        return old

class GradeLevelClassifier(Classifier):
    # How old a kid is when they start grade N in the US.
    american_grade_to_age = {
        # Preschool: 3-4 years
        'preschool' : 3,
        'pre-school' : 3,
        'p' : 3,
        'pk' : 4,

        # Easy readers
        'kindergarten' : 5,
        'k' : 5,
        '0' : 5,
        'first' : 6,
        '1' : 6,
        'second' : 7,
        '2' : 7,

        # Chapter Books
        'third' : 8,
        '3' : 8,
        'fourth' : 9,
        '4' : 9,
        'fifth' : 10,
        '5' : 10,
        'sixth' : 11,
        '6' : 11,
        '7' : 12,
        '8' : 13,

        # YA
        '9' : 14,
        '10' : 15,
        '11' : 16,
        '12': 17,
    }

    # Regular expressions that match common ways of expressing grade
    # levels.
    grade_res = [
        re.compile(x, re.I) for x in [
            "grades? ([kp0-9]+) to ([kp0-9]+)?",
            "grades? ([kp0-9]+) ?-? ?([kp0-9]+)?",
            "gr\.? ([kp0-9]+) ?-? ?([kp0-9]+)?",
            "grades?: ([kp0-9]+) to ([kp0-9]+)",
            "grades?: ([kp0-9]+) ?-? ?([kp0-9]+)?",
            "gr\.? ([kp0-9]+)",
            "([0-9]+)[tnsr][hdt] grade",
            "([a-z]+) grade",
            r'\b(kindergarten|preschool)\b',
        ]
    ]

    generic_grade_res = [
        re.compile(r"([kp0-9]+) ?- ?([0-9]+)", re.I),
        re.compile(r"([kp0-9]+) ?to ?([0-9]+)", re.I),
        re.compile(r"^([0-9]+)\b", re.I),
        re.compile(r"^([kp])\b", re.I),
    ]

    @classmethod
    def audience(cls, identifier, name, require_explicit_age_marker=False):
        target_age = cls.target_age(identifier, name, require_explicit_age_marker)
        return cls.default_audience_for_target_age(target_age)


    @classmethod
    def target_age(cls, identifier, name, require_explicit_grade_marker=False):

        if (identifier and "education" in identifier) or (name and 'education' in name):
            # This is a book about teaching, e.g. fifth grade.
            return cls.range_tuple(None, None)

        if (identifier and 'grader' in identifier) or (name and 'grader' in name):
            # This is a book about, e.g. fifth graders.
            return cls.range_tuple(None, None)

        if require_explicit_grade_marker:
            res = cls.grade_res
        else:
            res = cls.grade_res + cls.generic_grade_res

        for r in res:
            for k in identifier, name:
                if not k:
                    continue
                m = r.search(k)
                if m:
                    gr = m.groups()
                    if len(gr) == 1:
                        young = gr[0]
                        old = None
                    else:
                        young, old = gr

                    # Strip leading zeros
                    if young and young.lstrip('0'):
                        young = young.lstrip("0")
                    if old and old.lstrip('0'):
                        old = old.lstrip("0")

                    young = cls.american_grade_to_age.get(young)
                    old = cls.american_grade_to_age.get(old)

                    if not young and not old:
                        return cls.range_tuple(None, None)

                    if young:
                        young = int(young)
                    if old:
                        old = int(old)
                    if old is None:
                        old = cls.and_up(young, k)
                    if old is None and young is not None:
                        old = young
                    if young is None and old is not None:
                        young = old
                    if old and young and  old < young:
                        young, old = old, young
                    return cls.range_tuple(young, old)
        return cls.range_tuple(None, None)

    @classmethod
    def target_age_match(cls, query):
        target_age = None
        grade_words = None
        target_age = cls.target_age(None, query, require_explicit_grade_marker=True)
        if target_age:
            for r in cls.grade_res:
                match = r.search(query)
                if match:
                    grade_words = match.group()
                    break
        return (target_age, grade_words)

class InterestLevelClassifier(Classifier):

    @classmethod
    def audience(cls, identifier, name):
        if identifier in ('lg', 'mg+', 'mg'):
            return cls.AUDIENCE_CHILDREN
        elif identifier == 'ug':
            return cls.AUDIENCE_YOUNG_ADULT
        else:
            return None

    @classmethod
    def target_age(cls, identifier, name):
        if identifier == 'lg':
            return cls.range_tuple(5,8)
        if identifier in ('mg+', 'mg'):
            return cls.range_tuple(9,13)
        if identifier == 'ug':
            return cls.range_tuple(14,17)
        return None


class AgeClassifier(Classifier):
    # Regular expressions that match common ways of expressing ages.
    age_res = [
        re.compile(x, re.I) for x in [
            "age ([0-9]+) ?-? ?([0-9]+)?",
            "age: ([0-9]+) ?-? ?([0-9]+)?",
            "age: ([0-9]+) to ([0-9]+)",
            "ages ([0-9]+) ?- ?([0-9]+)",
            "([0-9]+) ?- ?([0-9]+) years?",
            "([0-9]+) years?",
            "ages ([0-9]+)+",
            "([0-9]+) and up",
            "([0-9]+) years? and up",
        ]
    ]

    generic_age_res = [
        re.compile("([0-9]+) ?- ?([0-9]+)", re.I),
        re.compile(r"^([0-9]+)\b", re.I),
    ]

    baby_re = re.compile("^baby ?- ?([0-9]+) year", re.I)

    @classmethod
    def audience(cls, identifier, name, require_explicit_age_marker=False):

        target_age = cls.target_age(identifier, name, require_explicit_age_marker)
        return cls.default_audience_for_target_age(target_age)

    @classmethod
    def target_age(cls, identifier, name, require_explicit_age_marker=False):
        if require_explicit_age_marker:
            res = cls.age_res
        else:
            res = cls.age_res + cls.generic_age_res
        if identifier:
            match = cls.baby_re.search(identifier)
            if match:
                # This is for babies.
                upper_bound = int(match.groups()[0])
                return cls.range_tuple(0, upper_bound)

        for r in res:
            for k in identifier, name:
                if not k:
                    continue
                m = r.search(k)
                if m:
                    groups = m.groups()
                    young = old = None
                    if groups:
                        young = int(groups[0])
                        if len(groups) > 1 and groups[1] != None:
                            old = int(groups[1])
                    if old is None:
                        old = cls.and_up(young, k)
                    if old is None and young is not None:
                        old = young
                    if young is None and old is not None:
                        young = old
                    if old > 99:
                        # This is not an age at all.
                        old = None
                    if young > 99:
                        # This is not an age at all.
                        young = None
                    if young > old:
                        young, old = old, young
                    return cls.range_tuple(young, old)

        return cls.range_tuple(None, None)

    @classmethod
    def target_age_match(cls, query):
        target_age = None
        age_words = None
        target_age = cls.target_age(None, query, require_explicit_age_marker=True)
        if target_age:
            for r in cls.age_res:
                match = r.search(query)
                if match:
                    age_words = match.group()
                    break
        return (target_age, age_words)


# This is the large-scale structure of our classification system.
#
# If the name of a genre is a string, it's the name of the genre
# and there are no subgenres.
#
# If the name of a genre is a dictionary, the 'name' argument is the
# name of the genre, and the 'subgenres' argument is the list of the
# subgenres.

COMICS_AND_GRAPHIC_NOVELS = "Comics & Graphic Novels"

fiction_genres = [
    "Adventure",
    "Classics",
    COMICS_AND_GRAPHIC_NOVELS,
    "Drama",
    dict(name="Erotica", audiences=Classifier.AUDIENCE_ADULTS_ONLY),
    dict(name="Fantasy", subgenres=[
        "Epic Fantasy",
        "Historical Fantasy",
        "Urban Fantasy",
    ]),
    "Folklore",
    "Historical Fiction",
    dict(name="Horror", subgenres=[
        "Gothic Horror",
        "Ghost Stories",
        "Vampires",
        "Werewolves",
        "Occult Horror",
    ]),
    "Humorous Fiction",
    "Literary Fiction",
    "LGBTQ Fiction",
    dict(name="Mystery", subgenres=[
        "Crime & Detective Stories",
        "Hard-Boiled Mystery",
        "Police Procedural",
        "Cozy Mystery",
        "Historical Mystery",
        "Paranormal Mystery",
        "Women Detectives",
    ]),
    "Poetry",
    "Religious Fiction",
    dict(name="Romance", subgenres=[
        "Contemporary Romance",
        "Gothic Romance",
        "Historical Romance",
        "Paranormal Romance",
        "Western Romance",
        "Romantic Suspense",
    ]),
    dict(name="Science Fiction", subgenres=[
        "Dystopian SF",
        "Space Opera",
        "Cyberpunk",
        "Military SF",
        "Alternative History",
        "Steampunk",
        "Romantic SF",
        "Media Tie-in SF",
    ]),
    "Short Stories",
    dict(name="Suspense/Thriller",
        subgenres=[
            "Historical Thriller",
            "Espionage",
            "Supernatural Thriller",
            "Medical Thriller",
            "Political Thriller",
            "Psychological Thriller",
            "Technothriller",
            "Legal Thriller",
            "Military Thriller",
        ],
    ),
    "Urban Fiction",
    "Westerns",
    "Women's Fiction",
]

nonfiction_genres = [
    dict(name="Art & Design", subgenres=[
        "Architecture",
        "Art",
        "Art Criticism & Theory",
        "Art History",
        "Design",
        "Fashion",
        "Photography",
    ]),
    "Biography & Memoir",
    "Education",
    dict(name="Personal Finance & Business", subgenres=[
        "Business",
        "Economics",
        "Management & Leadership",
        "Personal Finance & Investing",
        "Real Estate",
    ]),
    dict(name="Parenting & Family", subgenres=[
        "Family & Relationships",
        "Parenting",
    ]),
    dict(name="Food & Health", subgenres=[
        "Bartending & Cocktails",
        "Cooking",
        "Health & Diet",
        "Vegetarian & Vegan",
    ]),
    dict(name="History", subgenres=[
        "African History",
        "Ancient History",
        "Asian History",
        "Civil War History",
        "European History",
        "Latin American History",
        "Medieval History",
        "Middle East History",
        "Military History",
        "Modern History",
        "Renaissance & Early Modern History",
        "United States History",
        "World History",
    ]),
    dict(name="Hobbies & Home", subgenres=[
        "Antiques & Collectibles",
        "Crafts & Hobbies",
        "Gardening",
        "Games",
        "House & Home",
        "Pets",
    ]),
    "Humorous Nonfiction",
    dict(name="Entertainment", subgenres=[
        "Film & TV",
        "Music",
        "Performing Arts",
    ]),
    "Life Strategies",
    "Literary Criticism",
    "Periodicals",
    "Philosophy",
    "Political Science",
    dict(name="Reference & Study Aids", subgenres=[
        "Dictionaries",
        "Foreign Language Study",
        "Law",
        "Study Aids",
    ]),
    dict(name="Religion & Spirituality", subgenres=[
        "Body, Mind & Spirit",
        "Buddhism",
        "Christianity",
        "Hinduism",
        "Islam",
        "Judaism",
    ]),
    dict(name="Science & Technology", subgenres=[
        "Computers",
        "Mathematics",
        "Medical",
        "Nature",
        "Psychology",
        "Science",
        "Social Sciences",
        "Technology",
    ]),
    "Self-Help",
    "Sports",
    "Travel",
    "True Crime",
]


[docs]class GenreData(object):
    def __init__(self, name, is_fiction, parent=None, audience_restriction=None):
        self.name = name
        self.parent = parent
        self.is_fiction = is_fiction
        self.subgenres = []
        if isinstance(audience_restriction, str):
            audience_restriction = [audience_restriction]
        self.audience_restriction = audience_restriction

    def __repr__(self):
        return "<GenreData: %s>" % self.name

    @property
    def self_and_subgenres(self):
        yield self
        for child in self.all_subgenres:
            yield child

    @property
    def all_subgenres(self):
        for child in self.subgenres:
            for subgenre in child.self_and_subgenres:
                yield subgenre

    @property
    def parents(self):
        parents = []
        p = self.parent
        while p:
            parents.append(p)
            p = p.parent
        return reversed(parents)

[docs]    def has_subgenre(self, subgenre):
        for s in self.subgenres:
            if s == subgenre or s.has_subgenre(subgenre):
                return True
        return False

    @property
    def variable_name(self):
        return self.name.replace("-", "_").replace(", & ", "_").replace(", ", "_").replace(" & ", "_").replace(" ", "_").replace("/", "_").replace("'", "")

[docs]    @classmethod
    def populate(cls, namespace, genres, fiction_source, nonfiction_source):
        """Create a GenreData object for every genre and subgenre in the given
        list of fiction and nonfiction genres.
        """
        for source, default_fiction in (
                (fiction_source, True),
                (nonfiction_source, False)):
            for item in source:
                subgenres = []
                audience_restriction = None
                name = item
                fiction = default_fiction
                if isinstance(item, dict):
                    name = item['name']
                    subgenres = item.get('subgenres', [])
                    audience_restriction = item.get('audience_restriction')
                    fiction = item.get('fiction', default_fiction)

                cls.add_genre(
                    namespace, genres, name, subgenres, fiction,
                    None, audience_restriction)

[docs]    @classmethod
    def add_genre(cls, namespace, genres, name, subgenres, fiction,
                  parent, audience_restriction):
        """Create a GenreData object. Add it to a dictionary and a namespace.
        """
        if isinstance(name, tuple):
            name, default_fiction = name
        default_fiction = None
        default_audience = None
        if parent:
            default_fiction = parent.is_fiction
            default_audience = parent.audience_restriction
        if isinstance(name, dict):
            data = name
            subgenres = data.get('subgenres', [])
            name = data['name']
            fiction = data.get('fiction', default_fiction)
            audience_restriction = data.get('audience', default_audience)
        if name in genres:
            raise ValueError("Duplicate genre name! %s" % name)

        # Create the GenreData object.
        genre_data = GenreData(name, fiction, parent, audience_restriction)
        if parent:
            parent.subgenres.append(genre_data)

        # Add the genre to the given dictionary, keyed on name.
        genres[genre_data.name] = genre_data

        # Convert the name to a Python-safe variable name,
        # and add it to the given namespace.
        namespace[genre_data.variable_name] = genre_data

        # Do the same for subgenres.
        for sub in subgenres:
            cls.add_genre(namespace, genres, sub, [], fiction,
                          genre_data, audience_restriction)

genres = dict()
GenreData.populate(globals(), genres, fiction_genres, nonfiction_genres)

[docs]class Lowercased(str):
    """A lowercased string that remembers its original value."""
    def __new__(cls, value):
        if isinstance(value, Lowercased):
            # Nothing to do.
            return value
        if not isinstance(value, str):
            value = str(value)
        new_value = value.lower()
        if new_value.endswith('.'):
            new_value = new_value[:-1]
        o = super(Lowercased, cls).__new__(cls, new_value)
        o.original = value
        return o

[docs]    @classmethod
    def scrub_identifier(cls, identifier):
        if not identifier:
            return identifier

[docs]class AgeOrGradeClassifier(Classifier):

[docs]    @classmethod
    def audience(cls, identifier, name):
        audience = AgeClassifier.audience(identifier, name)
        if audience == None:
            audience = GradeLevelClassifier.audience(identifier, name)
        return audience

[docs]    @classmethod
    def target_age(cls, identifier, name):
        """This tag might contain a grade level, an age in years, or nothing.
        We will try both a grade level and an age in years, but we
        will require that the tag indicate what's being measured. A
        tag like "9-12" will not match anything because we don't know if it's
        age 9-12 or grade 9-12.
        """
        age = AgeClassifier.target_age(identifier, name, True)
        if age == cls.range_tuple(None, None):
            age = GradeLevelClassifier.target_age(identifier, name, True)
        return age

[docs]class FreeformAudienceClassifier(AgeOrGradeClassifier):
    # NOTE: In practice, subjects like "books for all ages" tend to be
    # more like advertising slogans than reliable indicators of an
    # ALL_AGES audience. So the only subject of this type we handle is
    # the literal string "all ages", as it would appear, e.g., in the
    # output of the metadata wrangler.

[docs]    @classmethod
    def audience(cls, identifier, name):
        if identifier in ('children', 'pre-adolescent', 'beginning reader'):
            return cls.AUDIENCE_CHILDREN
        elif identifier in ('young adult', 'ya', 'teenagers', 'adolescent',
                            'early adolescents'):
            return cls.AUDIENCE_YOUNG_ADULT
        elif identifier == 'adult':
            return cls.AUDIENCE_ADULT
        elif identifier == 'adults only':
            return cls.AUDIENCE_ADULTS_ONLY
        elif identifier == 'all ages':
            return cls.AUDIENCE_ALL_AGES
        elif identifier == 'research':
            return cls.AUDIENCE_RESEARCH
        return AgeOrGradeClassifier.audience(identifier, name)

[docs]    @classmethod
    def target_age(cls, identifier, name):
        if identifier == 'beginning reader':
            return cls.range_tuple(5,8)
        if identifier == 'pre-adolescent':
            return cls.range_tuple(9, 12)
        if identifier == 'early adolescents':
            return cls.range_tuple(13, 15)
        if identifier == 'all ages':
            return cls.range_tuple(
                cls.ALL_AGES_AGE_CUTOFF, None
            )
        strict_age = AgeClassifier.target_age(identifier, name, True)
        if strict_age[0] or strict_age[1]:
            return strict_age

        strict_grade = GradeLevelClassifier.target_age(identifier, name, True)
        if strict_grade[0] or strict_grade[1]:
            return strict_grade

        # Default to assuming it's an unmarked age.
        return AgeClassifier.target_age(identifier, name, False)


[docs]class WorkClassifier(object):
    """Boil down a bunch of Classification objects into a few values."""

    # TODO: This needs a lot of additions.
    genre_publishers = {
        "Harlequin" : Romance,
        "Pocket Books/Star Trek" : Media_Tie_in_SF,
        "Kensington" : Urban_Fiction,
        "Fodor's Travel Publications" : Travel,
        "Marvel Entertainment, LLC" : Comics_Graphic_Novels,
    }

    genre_imprints = {
        "Harlequin Intrigue" : Romantic_Suspense,
        "Love Inspired Suspense" : Romantic_Suspense,
        "Harlequin Historical" : Historical_Romance,
        "Harlequin Historical Undone" : Historical_Romance,
        "Frommers" : Travel,
        "LucasBooks": Media_Tie_in_SF,
    }

    audience_imprints = {
        "Harlequin Teen" : Classifier.AUDIENCE_YOUNG_ADULT,
        "HarperTeen" : Classifier.AUDIENCE_YOUNG_ADULT,
        "Open Road Media Teen & Tween" : Classifier.AUDIENCE_YOUNG_ADULT,
        "Rosen Young Adult" : Classifier.AUDIENCE_YOUNG_ADULT,
    }

    not_adult_publishers = set([
        "Scholastic Inc.",
        "Random House Children's Books",
        "Little, Brown Books for Young Readers",
        "Penguin Young Readers Group",
        "Hachette Children's Books",
        "Nickelodeon Publishing",
    ])

    not_adult_imprints = set([
        "Scholastic",
        "Scholastic Paperbacks",
        "Random House Books for Young Readers",
        "HMH Books for Young Readers",
        "Knopf Books for Young Readers",
        "Delacorte Books for Young Readers",
        "Open Road Media Young Readers",
        "Macmillan Young Listeners",
        "Bloomsbury Childrens",
        "NYR Children's Collection",
        "Bloomsbury USA Childrens",
        "National Geographic Children's Books",
    ])

    fiction_imprints = set(["Del Rey"])
    nonfiction_imprints = set(["Harlequin Nonfiction"])

    nonfiction_publishers = set(["Wiley"])
    fiction_publishers = set([])

    def __init__(self, work, test_session=None, debug=False):
        self._db = Session.object_session(work)
        if test_session:
            self._db = test_session
        self.work = work
        self.fiction_weights = Counter()
        self.audience_weights = Counter()
        self.target_age_lower_weights = Counter()
        self.target_age_upper_weights = Counter()
        self.genre_weights = Counter()
        self.direct_from_license_source = set()
        self.prepared = False
        self.debug = debug
        self.classifications = []
        self.seen_classifications = set()
        self.log = logging.getLogger("Classifier (workid=%d)" % self.work.id)
        self.using_staff_genres = False
        self.using_staff_fiction_status = False
        self.using_staff_audience = False
        self.using_staff_target_age = False

        # Keep track of whether we've seen one of Overdrive's generic
        # "Juvenile" classifications, as well as its more specific
        # subsets like "Picture Books" and "Beginning Readers"
        self.overdrive_juvenile_generic = False
        self.overdrive_juvenile_with_target_age = False

[docs]    def add(self, classification):
        """Prepare a single Classification for consideration."""
        try:
            from ..model import DataSource, Subject
        except ValueError:
            from model import DataSource, Subject

        # We only consider a given classification once from a given
        # data source.
        key = (classification.subject, classification.data_source)
        if key in self.seen_classifications:
            return
        self.seen_classifications.add(key)
        if self.debug:
            self.classifications.append(classification)

        # Make sure the Subject is ready to be used in calculations.
        if not classification.subject.checked: # or self.debug
            classification.subject.assign_to_genre()

        if classification.comes_from_license_source:
            self.direct_from_license_source.add(classification)
        else:
            if classification.subject.describes_format:
                # TODO: This is a bit of a hack.
                #
                # Only accept a classification having to do with
                # format (e.g. 'comic books') if that classification
                # comes direct from the license source. Otherwise it's
                # really easy for a graphic adaptation of a novel to
                # get mixed up with the original novel, whereupon the
                # original book is classified as a graphic novel.
                return

        # Put the weight of the classification behind various
        # considerations.
        weight = classification.scaled_weight
        subject = classification.subject
        from_staff = classification.data_source.name == DataSource.LIBRARY_STAFF

        # if classification is genre or NONE from staff, ignore all non-staff genres
        is_genre = subject.genre != None
        is_none = (from_staff and subject.type == Subject.SIMPLIFIED_GENRE and subject.identifier == SimplifiedGenreClassifier.NONE)
        if is_genre or is_none:
            if not from_staff and self.using_staff_genres:
                return
            if from_staff and not self.using_staff_genres:
                # first encounter with staff genre, so throw out existing genre weights
                self.using_staff_genres = True
                self.genre_weights = Counter()
            if is_genre:
                self.weigh_genre(subject.genre, weight)

        # if staff classification is fiction or nonfiction, ignore all other fictions
        if not self.using_staff_fiction_status:
            if from_staff and subject.type == Subject.SIMPLIFIED_FICTION_STATUS:
                # encountering first staff fiction status,
                # so throw out existing fiction weights
                self.using_staff_fiction_status = True
                self.fiction_weights = Counter()
            self.fiction_weights[subject.fiction] += weight

        # if staff classification is about audience, ignore all other audience classifications
        if not self.using_staff_audience:
            if from_staff and subject.type == Subject.FREEFORM_AUDIENCE:
                self.using_staff_audience = True
                self.audience_weights = Counter()
                self.audience_weights[subject.audience] += weight
            else:
                if classification.generic_juvenile_audience:
                    # We have a generic 'juvenile' classification. The
                    # audience might say 'Children' or it might say 'Young
                    # Adult' but we don't actually know which it is.
                    #
                    # We're going to split the difference, with a slight
                    # preference for YA, to bias against showing
                    # age-inappropriate material to children. To
                    # counterbalance the fact that we're splitting up the
                    # weight this way, we're also going to treat this
                    # classification as evidence _against_ an 'adult'
                    # classification.
                    self.audience_weights[Classifier.AUDIENCE_YOUNG_ADULT] += (weight * 0.6)
                    self.audience_weights[Classifier.AUDIENCE_CHILDREN] += (weight * 0.4)
                    for audience in Classifier.AUDIENCES_ADULT:
                        if audience != Classifier.AUDIENCE_ALL_AGES:
                            # 'All Ages' is considered an adult audience,
                            # but a generic 'juvenile' classification
                            # is not evidence against it.
                            self.audience_weights[audience] -= weight * 0.5
                else:
                    self.audience_weights[subject.audience] += weight

        if not self.using_staff_target_age:
            if from_staff and subject.type == Subject.AGE_RANGE:
                self.using_staff_target_age = True
                self.target_age_lower_weights = Counter()
                self.target_age_upper_weights = Counter()
            if subject.target_age:
                # Figure out how reliable this classification really is as
                # an indicator of a target age.
                scaled_weight = classification.weight_as_indicator_of_target_age
                target_min = subject.target_age.lower
                target_max = subject.target_age.upper
                if target_min is not None:
                    if not subject.target_age.lower_inc:
                        target_min += 1
                    self.target_age_lower_weights[target_min] += scaled_weight
                if target_max is not None:
                    if not subject.target_age.upper_inc:
                        target_max -= 1
                    self.target_age_upper_weights[target_max] += scaled_weight

        if not self.using_staff_audience and not self.using_staff_target_age:
            if subject.type=='Overdrive' and subject.audience==Classifier.AUDIENCE_CHILDREN:
                if subject.target_age and (
                        subject.target_age.lower or subject.target_age.upper
                ):
                    # This is a juvenile classification like "Picture
                    # Books" which implies a target age.
                    self.overdrive_juvenile_with_target_age = classification
                else:
                    # This is a generic juvenile classification like
                    # "Juvenile Fiction".
                    self.overdrive_juvenile_generic = classification

[docs]    def weigh_metadata(self):
        """Modify the weights according to the given Work's metadata.

        Use work metadata to simulate classifications.

        This is basic stuff, like: Harlequin tends to publish
        romances.
        """
        if self.work.title and ('Star Trek:' in self.work.title
            or 'Star Wars:' in self.work.title
            or ('Jedi' in self.work.title
                and self.work.imprint=='Del Rey')
        ):
            self.weigh_genre(Media_Tie_in_SF, 100)

        publisher = self.work.publisher
        imprint = self.work.imprint
        if (imprint in self.nonfiction_imprints
            or publisher in self.nonfiction_publishers):
            self.fiction_weights[False] = 100
        elif (imprint in self.fiction_imprints
              or publisher in self.fiction_publishers):
            self.fiction_weights[True] = 100

        if imprint in self.genre_imprints:
            self.weigh_genre(self.genre_imprints[imprint], 100)
        elif publisher in self.genre_publishers:
            self.weigh_genre(self.genre_publishers[publisher], 100)

        if imprint in self.audience_imprints:
            self.audience_weights[self.audience_imprints[imprint]] += 100
        elif (publisher in self.not_adult_publishers
              or imprint in self.not_adult_imprints):
            for audience in [Classifier.AUDIENCE_ADULT,
                             Classifier.AUDIENCE_ADULTS_ONLY]:
                self.audience_weights[audience] -= 100

[docs]    def prepare_to_classify(self):
        """Called the first time classify() is called. Does miscellaneous
        one-time prep work that requires all data to be in place.
        """
        self.weigh_metadata()

        explicitly_indicated_audiences = (
            Classifier.AUDIENCE_CHILDREN,
            Classifier.AUDIENCE_YOUNG_ADULT,
            Classifier.AUDIENCE_ADULTS_ONLY)
        audiences_from_license_source = set(
            [classification.subject.audience
             for classification in self.direct_from_license_source]
        )
        if (self.direct_from_license_source
            and not self.using_staff_audience
            and not any(
                audience in explicitly_indicated_audiences
                for audience in audiences_from_license_source
        )):
            # If this was erotica, or a book for children or young
            # adults, the distributor would have given some indication
            # of that fact. In the absense of any such indication, we
            # can assume very strongly that this is a regular old book
            # for adults.
            #
            # 3M is terrible at distinguishing between childrens'
            # books and YA books, but books for adults can be
            # distinguished by their _lack_ of childrens/YA
            # classifications.
            self.audience_weights[Classifier.AUDIENCE_ADULT] += 500

        if (self.overdrive_juvenile_generic
            and not self.overdrive_juvenile_with_target_age):
            # This book is classified under 'Juvenile Fiction' but not
            # under 'Picture Books' or 'Beginning Readers'. The
            # implicit target age here is 9-12 (the portion of
            # Overdrive's 'juvenile' age range not covered by 'Picture
            # Books' or 'Beginning Readers'.
            weight = self.overdrive_juvenile_generic.weight_as_indicator_of_target_age
            self.target_age_lower_weights[9] += weight
            self.target_age_upper_weights[12] += weight

        self.prepared = True

[docs]    def classify(self, default_fiction=None, default_audience=None):
        # Do a little prep work.
        if not self.prepared:
            self.prepare_to_classify()

        if self.debug:
            for c in self.classifications:
                self.log.debug(
                    "%d %r (via %s)", c.weight, c.subject, c.data_source.name
                )

        # Actually figure out the classifications
        fiction = self.fiction(default_fiction=default_fiction)
        genres = self.genres(fiction)
        audience = self.audience(genres, default_audience=default_audience)
        target_age = self.target_age(audience)
        if self.debug:
            self.log.debug("Fiction weights:")
            for k, v in self.fiction_weights.most_common():
                self.log.debug(" %s: %s", v, k)
            self.log.debug("Genre weights:")
            for k, v in self.genre_weights.most_common():
                self.log.debug(" %s: %s", v, k)
            self.log.debug("Audience weights:")
            for k, v in self.audience_weights.most_common():
                self.log.debug(" %s: %s", v, k)
        return genres, fiction, audience, target_age

[docs]    def fiction(self, default_fiction=None):
        """Is it more likely this is a fiction or nonfiction book?"""
        if not self.fiction_weights:
            # We have absolutely no idea one way or the other, and it
            # would be irresponsible to guess.
            return default_fiction
        is_fiction = default_fiction
        if self.fiction_weights[True] > self.fiction_weights[False]:
            is_fiction = True
        elif self.fiction_weights[False] > 0:
            is_fiction = False
        return is_fiction

[docs]    def audience(self, genres=[], default_audience=None):
        """What's the most likely audience for this book?
        :param default_audience: To avoid embarassing situations we will
        classify works as being intended for adults absent convincing
        evidence to the contrary. In some situations (like the metadata
        wrangler), it's better to state that we have no information, so
        default_audience can be set to None.
        """

        # If we determined that Erotica was a significant enough
        # component of the classification to count as a genre, the
        # audience will always be 'Adults Only', even if the audience
        # weights would indicate something else.
        if Erotica in genres:
            return Classifier.AUDIENCE_ADULTS_ONLY

        w = self.audience_weights
        if not self.audience_weights:
            # We have absolutely no idea, and it would be
            # irresponsible to guess.
            return default_audience

        children_weight = w.get(Classifier.AUDIENCE_CHILDREN, 0)
        ya_weight = w.get(Classifier.AUDIENCE_YOUNG_ADULT, 0)
        adult_weight = w.get(Classifier.AUDIENCE_ADULT, 0)
        adults_only_weight = w.get(Classifier.AUDIENCE_ADULTS_ONLY, 0)
        all_ages_weight = w.get(Classifier.AUDIENCE_ALL_AGES, 0)
        research_weight = w.get(Classifier.AUDIENCE_RESEARCH, 0)

        total_adult_weight = adult_weight + adults_only_weight
        total_weight = sum(w.values())

        audience = default_audience

        # A book will be classified as a young adult or childrens'
        # book when the weight of that audience is more than twice the
        # combined weight of the 'adult' and 'adults only' audiences.
        # If that combined weight is zero, then any amount of evidence
        # is sufficient.
        threshold = total_adult_weight * 2

        # If both the 'children' weight and the 'YA' weight pass the
        # threshold, we go with the one that weighs more.
        # If the 'children' weight passes the threshold on its own
        # we go with 'children'.
        total_juvenile_weight = children_weight + ya_weight
        if (research_weight > (total_adult_weight + all_ages_weight) and
            research_weight > (total_juvenile_weight + all_ages_weight) and
            research_weight > threshold):
            audience = Classifier.AUDIENCE_RESEARCH
        elif (all_ages_weight > total_adult_weight and
            all_ages_weight > total_juvenile_weight):
            audience = Classifier.AUDIENCE_ALL_AGES
        elif children_weight > threshold and children_weight > ya_weight:
            audience = Classifier.AUDIENCE_CHILDREN
        elif ya_weight > threshold:
            audience = Classifier.AUDIENCE_YOUNG_ADULT
        elif total_juvenile_weight > threshold:
            # Neither weight passes the threshold on its own, but
            # combined they do pass the threshold. Go with
            # 'Young Adult' to be safe.
            audience = Classifier.AUDIENCE_YOUNG_ADULT
        elif total_adult_weight > 0:
            audience = Classifier.AUDIENCE_ADULT

        # If the 'adults only' weight is more than 1/4 of the total adult
        # weight, classify as 'adults only' to be safe.
        #
        # TODO: This has not been calibrated.
        if (audience==Classifier.AUDIENCE_ADULT
            and adults_only_weight > total_adult_weight/4):
            audience = Classifier.AUDIENCE_ADULTS_ONLY

        return audience

[docs]    @classmethod
    def top_tier_values(self, counter):
        """Given a Counter mapping values to their frequency of occurance,
        return all values that are as common as the most common value.
        """
        top_frequency = None
        top_tier = set()
        for age, freq in counter.most_common():
            if not top_frequency:
                top_frequency = freq
            if freq != top_frequency:
                # We've run out of candidates
                break
            else:
                # This candidate occurs with the maximum frequency.
                top_tier.add(age)
        return top_tier

[docs]    def target_age(self, audience):
        """Derive a target age from the gathered data."""
        if audience not in (
                Classifier.AUDIENCE_CHILDREN, Classifier.AUDIENCE_YOUNG_ADULT
        ):
            # This is not a children's or YA book. Assertions about
            # target age are irrelevant and the default value rules.
            return Classifier.default_target_age_for_audience(audience)

        # Only consider the most reliable classifications.

        # Try to reach consensus on the lower and upper bounds of the
        # age range.
        if self.debug:
            if self.target_age_lower_weights:
                self.log.debug("Possible target age minima:")
                for k, v in self.target_age_lower_weights.most_common():
                    self.log.debug(" %s: %s", v, k)
            if self.target_age_upper_weights:
                self.log.debug("Possible target age maxima:")
                for k, v in self.target_age_upper_weights.most_common():
                    self.log.debug(" %s: %s", v, k)

        target_age_min = None
        target_age_max = None
        if self.target_age_lower_weights:
            # Find the youngest age in the top tier of values.
            candidates = self.top_tier_values(self.target_age_lower_weights)
            target_age_min = min(candidates)

        if self.target_age_upper_weights:
            # Find the oldest age in the top tier of values.
            candidates = self.top_tier_values(self.target_age_upper_weights)
            target_age_max = max(candidates)

        if not target_age_min and not target_age_max:
            # We found no opinions about target age. Use the default.
            return Classifier.default_target_age_for_audience(audience)

        if target_age_min is None:
            target_age_min = target_age_max

        if target_age_max is None:
            target_age_max = target_age_min

        # Err on the side of setting the minimum age too high.
        if target_age_min > target_age_max:
            target_age_max = target_age_min
        return Classifier.range_tuple(target_age_min, target_age_max)

[docs]    def genres(self, fiction, cutoff=0.15):
        """Consolidate genres and apply a low-pass filter."""
        # Remove any genres whose fiction status is inconsistent with the
        # (independently determined) fiction status of the book.
        #
        # It doesn't matter if a book is classified as 'science
        # fiction' 100 times; if we know it's nonfiction, it can't be
        # science fiction. (It's probably a history of science fiction
        # or something.)
        genres = dict(self.genre_weights)
        if not genres:
            # We have absolutely no idea, and it would be
            # irresponsible to guess.
            return {}

        for genre in list(genres.keys()):
            # If we have a fiction determination, that lets us eliminate
            # possible genres that conflict with that determination.
            #
            # TODO: If we don't have a fiction determination, the
            # genres we end up with may help us make one.
            if fiction is not None and (genre.default_fiction != fiction):
                del genres[genre]

        # Consolidate parent genres into their heaviest subgenre.
        genres = self.consolidate_genre_weights(genres)
        total_weight = float(sum(genres.values()))

        # Strip out the stragglers.
        for g, score in list(genres.items()):
            affinity = score / total_weight
            if affinity < cutoff:
                total_weight -= score
                del genres[g]
        return genres

[docs]    def weigh_genre(self, genre_data, weight):
        """A helper method that ensure we always use database Genre
        objects, not GenreData objects, when weighting genres.
        """
        try:
            from ..model import Genre
        except ValueError:
            from model import Genre
        genre, ignore = Genre.lookup(self._db, genre_data.name)
        self.genre_weights[genre] += weight

[docs]    @classmethod
    def consolidate_genre_weights(
            cls, weights, subgenre_swallows_parent_at=0.03
    ):
        """If a genre and its subgenres both show up, examine the subgenre
        with the highest weight. If its weight exceeds a certain
        proportion of the weight of the parent genre, assign the
        parent's weight to the subgenre and remove the parent.
        """
        #print("Before consolidation:")
        #for genre, weight in weights.items():
        #    print("", genre, weight)

        # Convert Genre objects to GenreData.
        consolidated = Counter()
        for genre, weight in list(weights.items()):
            if not isinstance(genre, GenreData):
                genre = genres[genre.name]
            consolidated[genre] += weight

        heaviest_child = dict()
        for genre, weight in list(consolidated.items()):
            for parent in genre.parents:
                if parent in consolidated:
                    if ((not parent in heaviest_child)
                        or weight > heaviest_child[parent][1]):
                        heaviest_child[parent] = (genre, weight)
        #print("Heaviest child:")
        #for parent, (genre, weight) in heaviest_child.items():
        #    print("", parent, genre, weight)
        made_it = False
        while not made_it:
            for parent, (child, weight) in sorted(
                heaviest_child.items(),
                key=lambda genre: genre[1][1], reverse=True
            ):
                parent_weight = consolidated.get(parent, 0)
                if weight > (subgenre_swallows_parent_at * parent_weight):
                    consolidated[child] += parent_weight
                    del consolidated[parent]
                    changed = False
                    for parent in parent.parents:
                        if parent in heaviest_child:
                            heaviest_child[parent] = (child, consolidated[child])
                            changed = True
                    if changed:
                        # We changed the dict, so we need to restart
                        # the iteration.
                        break
            # We made it all the way through the dict without changing it.
            made_it = True
        #print("Final heaviest child:")
        #for parent, (genre, weight) in heaviest_child.items():
        #    print("", parent, genre, weight)
        #print("After consolidation:")
        #for genre, weight in consolidated.items():
        #    print("", genre, weight)
        return consolidated

# Make a dictionary of classification schemes to classifiers.

Classifier.classifiers[Classifier.FREEFORM_AUDIENCE] = FreeformAudienceClassifier
Classifier.classifiers[Classifier.AXIS_360_AUDIENCE] = AgeOrGradeClassifier

# Finally, import classifiers described in submodules.
from .age import (
    GradeLevelClassifier,
    InterestLevelClassifier,
    AgeClassifier,
)
from .bisac import BISACClassifier
from .rbdigital import (
    RBDigitalAudienceClassifier,
    RBDigitalSubjectClassifier,
)
from .ddc import DeweyDecimalClassifier
from .lcc import LCCClassifier
from .gutenberg import GutenbergBookshelfClassifier
from .bic import BICClassifier
from .simplified import (
    SimplifiedFictionClassifier,
    SimplifiedGenreClassifier,
)
from .overdrive import OverdriveClassifier
from .keyword import (
    KeywordBasedClassifier,
    LCSHClassifier,
    FASTClassifier,
    TAGClassifier,
    Eg,
)
Source code for core.classifier

Library Simplified Circulation Manager

Navigation

Related Topics