Source code for core.classifier.age

import re

from . import Classifier

[docs]class GradeLevelClassifier(Classifier): # How old a kid is when they start grade N in the US. american_grade_to_age = { # Preschool: 3-4 years 'preschool' : 3, 'pre-school' : 3, 'p' : 3, 'pk' : 4, # Easy readers 'kindergarten' : 5, 'k' : 5, '0' : 5, 'first' : 6, '1' : 6, 'second' : 7, '2' : 7, # Chapter Books 'third' : 8, '3' : 8, 'fourth' : 9, '4' : 9, 'fifth' : 10, '5' : 10, 'sixth' : 11, '6' : 11, '7' : 12, 'seventh' : 12, '8' : 13, 'eighth' : 13, # YA '9' : 14, 'ninth' : 14, '10' : 15, 'tenth': 15, '11' : 16, 'eleventh' : 17, '12': 17, 'twelfth': 17, } # Regular expressions that match common ways of expressing grade # levels. grade_res = [ re.compile(x, re.I) for x in [ "grades? ([kp0-9]+) to ([kp0-9]+)?", "grades? ([kp0-9]+) ?-? ?([kp0-9]+)?", "gr\.? ([kp0-9]+) ?-? ?([kp0-9]+)?", "grades?: ([kp0-9]+) to ([kp0-9]+)", "grades?: ([kp0-9]+) ?-? ?([kp0-9]+)?", "gr\.? ([kp0-9]+)", "([0-9]+)[tnsr][hdt] grade", "([a-z]+) grade", r'\b(kindergarten|preschool)\b', ] ] generic_grade_res = [ re.compile(r"([kp0-9]+) ?- ?([0-9]+)", re.I), re.compile(r"([kp0-9]+) ?to ?([0-9]+)", re.I), re.compile(r"^([0-9]+)\b", re.I), re.compile(r"^([kp])\b", re.I), ]
[docs] @classmethod def audience(cls, identifier, name, require_explicit_age_marker=False): target_age = cls.target_age(identifier, name, require_explicit_age_marker) return cls.default_audience_for_target_age(target_age)
[docs] @classmethod def target_age(cls, identifier, name, require_explicit_grade_marker=False): if (identifier and "education" in identifier) or (name and 'education' in name): # This is a book about teaching, e.g. fifth grade. return cls.range_tuple(None, None) if (identifier and 'grader' in identifier) or (name and 'grader' in name): # This is a book about, e.g. fifth graders. return cls.range_tuple(None, None) if require_explicit_grade_marker: res = cls.grade_res else: res = cls.grade_res + cls.generic_grade_res for r in res: for k in identifier, name: if not k: continue m = r.search(k) if m: gr = m.groups() if len(gr) == 1: young = gr[0] old = None else: young, old = gr # Strip leading zeros if young and young.lstrip('0'): young = young.lstrip("0") if old and old.lstrip('0'): old = old.lstrip("0") young = cls.american_grade_to_age.get(young) old = cls.american_grade_to_age.get(old) if not young and not old: return cls.range_tuple(None, None) if young: young = int(young) if old: old = int(old) if old is None: old = cls.and_up(young, k) if old is None and young is not None: old = young if young is None and old is not None: young = old if old and young and old < young: young, old = old, young return cls.range_tuple(young, old) return cls.range_tuple(None, None)
[docs] @classmethod def target_age_match(cls, query): target_age = None grade_words = None target_age = cls.target_age(None, query, require_explicit_grade_marker=True) if target_age: for r in cls.grade_res: match = r.search(query) if match: grade_words = match.group() break return (target_age, grade_words)
[docs]class InterestLevelClassifier(Classifier):
[docs] @classmethod def audience(cls, identifier, name): if identifier in ('lg', 'mg+', 'mg'): return cls.AUDIENCE_CHILDREN elif identifier == 'ug': return cls.AUDIENCE_YOUNG_ADULT else: return None
[docs] @classmethod def target_age(cls, identifier, name): if identifier == 'lg': return cls.range_tuple(5,8) if identifier in ('mg+', 'mg'): return cls.range_tuple(9,13) if identifier == 'ug': return cls.range_tuple(14,17) return None
[docs]class AgeClassifier(Classifier): # Regular expressions that match common ways of expressing ages. age_res = [ re.compile(x, re.I) for x in [ "age ([0-9]+) ?-? ?([0-9]+)?", "age: ([0-9]+) ?-? ?([0-9]+)?", "age: ([0-9]+) to ([0-9]+)", "ages ([0-9]+) ?- ?([0-9]+)", "([0-9]+) ?- ?([0-9]+) years?", "([0-9]+) years?", "ages ([0-9]+)+", "([0-9]+) and up", "([0-9]+) years? and up", ] ] generic_age_res = [ re.compile("([0-9]+) ?- ?([0-9]+)", re.I), re.compile(r"^([0-9]+)\b", re.I), ] baby_re = re.compile("^baby ?- ?([0-9]+) year", re.I)
[docs] @classmethod def audience(cls, identifier, name, require_explicit_age_marker=False): target_age = cls.target_age(identifier, name, require_explicit_age_marker) return cls.default_audience_for_target_age(target_age)
[docs] @classmethod def target_age(cls, identifier, name, require_explicit_age_marker=False): if require_explicit_age_marker: res = cls.age_res else: res = cls.age_res + cls.generic_age_res if identifier: match = cls.baby_re.search(identifier) if match: # This is for babies. upper_bound = int(match.groups()[0]) return cls.range_tuple(0, upper_bound) for r in res: for k in identifier, name: if not k: continue m = r.search(k) if m: groups = m.groups() young = old = None if groups: young = int(groups[0]) if len(groups) > 1 and groups[1] != None: old = int(groups[1]) if old is None: old = cls.and_up(young, k) if old is None and young is not None: old = young if young is None and old is not None: young = old if old > 99: # This is not an age at all. old = None if young > 99: # This is not an age at all. young = None if (young is not None and old is not None and young > old): young, old = old, young return cls.range_tuple(young, old) return cls.range_tuple(None, None)
[docs] @classmethod def target_age_match(cls, query): target_age = None age_words = None target_age = cls.target_age(None, query, require_explicit_age_marker=True) if target_age: for r in cls.age_res: match = r.search(query) if match: age_words = match.group() break return (target_age, age_words)
Classifier.classifiers[Classifier.AGE_RANGE] = AgeClassifier Classifier.classifiers[Classifier.GRADE_LEVEL] = GradeLevelClassifier Classifier.classifiers[Classifier.INTEREST_LEVEL] = InterestLevelClassifier