Source code for core.classifier.lcc

from . import *

[docs]class LCCClassifier(Classifier): TOP_LEVEL = re.compile("^([A-Z]{1,2})") FICTION = set(["PN", "PQ", "PR", "PS", "PT", "PZ"]) JUVENILE = set(["PZ"]) GENRES = { # Unclassified/complicated stuff. # "America": E11-E143 # Ancient_History: D51-D90 # Angling: SH401-SH691 # Civil_War_History: E456-E655 # Geography: leftovers of G # Islam: BP1-BP253 # Latin_American_History: F1201-F3799 # Medieval History: D111-D203 # Military_History: D25-D27 # Modern_History: ??? # Renaissance_History: D219-D234 (1435-1648, so roughly) # Sports: GV557-1198.995 # TODO: E and F are actually "the Americas". # United_States_History is E151-E909, F1-F975 but not E456-E655 African_History : ["DT"], Ancient_History : ["DE"], Architecture : ["NA"], Art_Criticism_Theory : ["BH"], Asian_History : ["DS", "DU"], Biography_Memoir : ["CT"], Business : ["HC", "HF", "HJ"], Christianity : ["BR", "BS", "BT", "BV", "BX"], Cooking : ["TX"], Crafts_Hobbies : ["TT"], Economics : ["HB"], Education : ["L"], European_History : ["DA", "DAW", "DB", "DD", "DF", "DG", "DH", "DJ", "DK", "DL", "DP", "DQ", "DR"], Folklore : ["GR"], Games : ["GV"], Islam : ["BP"], Judaism : ["BM"], Literary_Criticism : ["Z"], Mathematics : ["QA", "HA", "GA"], Medical: ["QM", "R"], Military_History: ["U", "V"], Music: ["M"], Parenting_Family : ["HQ"], Periodicals : ["AP", "AN"], Philosophy : ["BC", "BD", "BJ"], Photography: ["TR"], Political_Science : ["J", "HX"], Psychology : ["BF"], Reference_Study_Aids : ["AE", "AG", "AI"], Religion_Spirituality : ["BL", "BQ"], Science : ["QB", "QC", "QD", "QE", "QH", "QK", "QL", "QR", "CC", "GB", "GC", "QP"], Social_Sciences : ["HD", "HE", "HF", "HM", "HN", "HS", "HT", "HV", "GN", "GF", "GT"], Sports: ["SK"], World_History : ["CB"], } LEFTOVERS = dict( B=Philosophy, T=Technology, Q=Science, S=Science, H=Social_Sciences, D=History, N=Art, L=Education, E=United_States_History, F=United_States_History, BP=Religion_Spirituality, ) NAMES = json.load(open(os.path.join(resource_dir, "lcc_one_level.json")))
[docs] @classmethod def scrub_identifier(cls, identifier): if not identifier: return identifier return identifier.upper()
[docs] @classmethod def name_for(cls, identifier): return cls.NAMES.get(identifier, None)
[docs] @classmethod def is_fiction(cls, identifier, name): if identifier == 'P': return True if not identifier.startswith('P'): return False for i in cls.FICTION: if identifier.startswith(i): return True return False
[docs] @classmethod def genre(cls, identifier, name, fiction=None, audience=None): for genre, strings in list(cls.GENRES.items()): for s in strings: if identifier.startswith(s): return genre for prefix, genre in list(cls.LEFTOVERS.items()): if identifier.startswith(prefix): return genre return None
[docs] @classmethod def audience(cls, identifier, name): if identifier.startswith("PZ"): return cls.AUDIENCE_CHILDREN # Everything else is _supposedly_ for adults, but we don't # trust that assumption. return None
Classifier.classifiers[Classifier.LCC] = LCCClassifier