# encoding: utf-8
"""Miscellaneous utilities"""
import re
import string
from collections import Counter
import api.flask_sqlalchemy_session
import sqlalchemy
from money import Money
from sqlalchemy import distinct, select
from sqlalchemy.sql.functions import func
# For backwards compatibility, import items that were moved to
# languages.py
from .languages import LanguageCodes, LookupTable
[docs]def batch(iterable, size=1):
"""Split up `iterable` into batches of size `size`."""
l = len(iterable)
for start in range(0, l, size):
yield iterable[start:min(start+size, l)]
[docs]def fast_query_count(query):
"""Counts the results of a query without using super-slow subquery"""
statement = query.selectable
table = statement.froms[0]
distinct_columns = statement._distinct_on
new_columns = [func.count()]
if statement._distinct and isinstance(distinct_columns, (list, tuple)):
# When using distinct to select from the db, the distinct
# columns need to be incorporated into the count itself.
new_columns = [func.count(distinct(func.concat(*distinct_columns)))]
count_q = select().with_only_columns(new_columns).select_from(table).order_by(None)
count = query.session.execute(count_q).scalar()
if query._limit_clause is not None and query._limit_clause.value < count:
return query._limit_clause.value
return count
[docs]def slugify(text, length_limit=None):
"""Takes a string and turns it into a slug.
:Example:
>>> slugify('Some (???) Title Somewhere')
some-title-somewhere
>>> slugify('Sly & the Family Stone')
sly-and-the-family-stone
>>> slugify('Happy birthday!', length_limit=4)
happ
"""
slug = re.sub('[.!@#\'$,?\(\)]', '', text.lower())
slug = re.sub('&', ' and ', slug)
slug = re.sub(' {2,}', ' ', slug)
slug = '-'.join(slug.split(' '))
while '--' in slug:
slug = re.sub('--', '-', slug)
if length_limit:
slug = slug[:length_limit]
return str(slug)
[docs]class TitleProcessor(object):
title_stopwords = ['The ', 'A ', 'An ']
[docs] @classmethod
def sort_title_for(cls, title):
if not title:
return title
for stopword in cls.title_stopwords:
if title.startswith(stopword):
title = title[len(stopword):] + ", " + stopword.strip()
break
return title
[docs]class Bigrams(object):
all_letters = re.compile("^[a-z]+$")
def __init__(self, bigrams):
self.bigrams = bigrams
self.proportional = Counter()
total = float(sum(bigrams.values()))
for bigram, quantity in self.bigrams.most_common():
proportion = quantity/total
if proportion < 0.001:
break
self.proportional[bigram] = proportion
[docs] def difference_from(self, other_bigrams):
total_difference = 0
for bigram, proportion in list(self.proportional.items()):
other_proportion = other_bigrams.proportional[bigram]
difference = abs(other_proportion - proportion)
total_difference += difference
# print("%s %.4f-%.4f = %.4f => %.4f" % (bigram, other_proportion, proportion, difference, total_difference))
for bigram, proportion in list(other_bigrams.proportional.items()):
if bigram not in self.proportional:
total_difference += proportion
# print("%s MISSING %.4f => %.4f" % (bigram, proportion, total_difference))
return total_difference
[docs] @classmethod
def from_text_files(cls, paths):
bigrams = Counter()
for path in paths:
cls.process_data(open(path).read(), bigrams)
return Bigrams(bigrams)
[docs] @classmethod
def from_string(cls, string):
bigrams = Counter()
cls.process_data(string, bigrams)
return Bigrams(bigrams)
[docs] @classmethod
def process_data(cls, data, bigrams):
for i in range(0, len(data)-1):
bigram = data[i:i+2].strip()
if len(bigram) == 2 and cls.all_letters.match(bigram):
bigrams[bigram.lower()] += 1
english_bigram_frequencies = {
"ab": 0.0021712725750437792,
"ac": 0.005213707466347486,
"ad": 0.004761174757224308,
"ag": 0.002362898803662714,
"ai": 0.004243783939953184,
"ak": 0.0016317710390858545,
"al": 0.009420640208489336,
"am": 0.0022184421082422864,
"an": 0.019261384072027876,
"ap": 0.001748220824169669,
"ar": 0.010173878691752996,
"as": 0.009223117788220589,
"at": 0.01276525492184598,
"au": 0.0010539442574041427,
"av": 0.0018941515675025501,
"ay": 0.0026193831404295966,
"ba": 0.001463729577066173,
"be": 0.005828385445840531,
"bl": 0.002477874540834075,
"bo": 0.0026577083861533835,
"br": 0.0010568923532290493,
"bu": 0.002104940418983379,
"by": 0.0013163247858208383,
"ca": 0.004967541464967778,
"ce": 0.006574253689541925,
"ch": 0.004742012134362416,
"ci": 0.002072511364909405,
"ck": 0.0032694382698215223,
"cl": 0.0010966916468652897,
"co": 0.007743173684117428,
"cr": 0.0021771687666935925,
"ct": 0.0035804623793491783,
"cu": 0.0016922070034964417,
"da": 0.0015005807748775066,
"de": 0.006702495857925366,
"di": 0.003999091986485929,
"do": 0.0031824694429867747,
"ds": 0.0014740479124533464,
"ea": 0.007121125465062116,
"ec": 0.005023555285641005,
"ed": 0.012741670155246725,
"ee": 0.0045415416182687605,
"ef": 0.0013325393128578251,
"eg": 0.001064262592791316,
"ei": 0.0018351896510044163,
"el": 0.00604359644105872,
"em": 0.003947500309550061,
"en": 0.01328117169120465,
"ep": 0.0017762277345062824,
"er": 0.02339461441854706,
"es": 0.010507013519967454,
"et": 0.004740538086449962,
"ev": 0.0024631340617095416,
"ew": 0.001260310965147611,
"ex": 0.0015035288707024132,
"ey": 0.002464608109621995,
"fa": 0.0013045324025212116,
"fe": 0.002000283017199191,
"ff": 0.0015919717454496141,
"fi": 0.0024395492951102883,
"fo": 0.004199562502579583,
"fr": 0.0018233972677047896,
"ft": 0.0012278819110736374,
"fu": 0.0010524702094916894,
"ga": 0.001748220824169669,
"ge": 0.003719022883119793,
"gh": 0.0031839434908992282,
"gi": 0.0014755219603657997,
"go": 0.0016332450869983078,
"gr": 0.002012075400498818,
"ha": 0.010978708851952524,
"he": 0.03081791970566211,
"hi": 0.007993761829234497,
"ho": 0.00562496683392197,
"hr": 0.0012308300068985443,
"ht": 0.002037134215010525,
"ia": 0.0017334803450451354,
"ib": 0.001068684736528676,
"ic": 0.007367291466441825,
"id": 0.004277687041939611,
"ie": 0.0031692030117746947,
"if": 0.001578705314237534,
"ig": 0.0029687324956810396,
"il": 0.004321908479313212,
"im": 0.0030615975141656004,
"in": 0.02371595686346189,
"io": 0.005216655562172393,
"ir": 0.003085182280764854,
"is": 0.008928308205729919,
"it": 0.011687725897842583,
"iv": 0.0021801168625184995,
"ke": 0.004137652490256543,
"ki": 0.0015521724518133737,
"ks": 0.0010657366407037694,
"la": 0.003962240788674595,
"ld": 0.0029628363040312264,
"le": 0.009557726664347498,
"li": 0.006910336613581288,
"ll": 0.007724011061255535,
"lo": 0.0036350021521099523,
"ls": 0.0010274113949799825,
"ly": 0.0051871746039233255,
"ma": 0.004444254456046839,
"me": 0.00833868904074858,
"mi": 0.0025766357509684496,
"mo": 0.0027977429378364515,
"mp": 0.0029304072499572527,
"mu": 0.0011689199945755036,
"my": 0.0012558888214102512,
"na": 0.0025810578947058093,
"nc": 0.002830171991910425,
"nd": 0.014385233577632207,
"ne": 0.007968703014722791,
"ng": 0.01172162899982901,
"ni": 0.003599625002211072,
"no": 0.004734641894800148,
"ns": 0.003651216679146939,
"nt": 0.008982847978490693,
"ny": 0.0015698610267628138,
"oc": 0.0017187398659206019,
"od": 0.0017718055907689223,
"of": 0.009453069262563311,
"oi": 0.0010229892512426224,
"ok": 0.0014946845832276932,
"ol": 0.0031087670473641076,
"om": 0.006596364408228725,
"on": 0.015450970218335977,
"oo": 0.0033667254320434432,
"op": 0.0026650786257156503,
"or": 0.012544147734977978,
"os": 0.0024248088159857547,
"ot": 0.004404455162410599,
"ou": 0.012628168465987818,
"ov": 0.0014814181520156132,
"ow": 0.004357285629212092,
"pa": 0.0024248088159857547,
"pe": 0.004336648958437745,
"ph": 0.001596393889186974,
"pi": 0.0014047676605680392,
"pl": 0.0026621305298907437,
"po": 0.0027137222068266105,
"pp": 0.0012897919233966781,
"pr": 0.003148566341000348,
"pu": 0.002514725738645409,
"qu": 0.0011851345216124904,
"ra": 0.00642242675455923,
"rc": 0.001068684736528676,
"rd": 0.0022656116414407935,
"re": 0.016922070034964418,
"rg": 0.0016332450869983078,
"ri": 0.00681010135553446,
"rk": 0.0017261101054828686,
"rl": 0.001046574017841876,
"rm": 0.0016317710390858545,
"rn": 0.001328117169120465,
"ro": 0.007760862259066869,
"rr": 0.0010878473593905697,
"rs": 0.004743486182274869,
"rt": 0.0029893691664553863,
"ru": 0.0013177988337332916,
"ry": 0.002701929823526984,
"sa": 0.0028758674771964788,
"sc": 0.0012691552526223311,
"se": 0.008092523039368872,
"sh": 0.0038148359974292606,
"si": 0.004310116096013585,
"so": 0.0036910159727831793,
"sp": 0.0017718055907689223,
"ss": 0.0034404278276661104,
"st": 0.01015324202097865,
"su": 0.0018631965613410298,
"ta": 0.004728745703150335,
"te": 0.013699801298341401,
"th": 0.031493033649565745,
"ti": 0.009472231885425203,
"tl": 0.0016745184285470015,
"to": 0.010639677832088254,
"tr": 0.0039519224532874216,
"ts": 0.0027933207940990913,
"tt": 0.0022449749706664464,
"tu": 0.0017644353512066555,
"ty": 0.0020164975442361777,
"ub": 0.0011114321259898233,
"uc": 0.0013590721752819853,
"ue": 0.0012529407255853443,
"ug": 0.0015904976975371608,
"ui": 0.0010952175989528364,
"ul": 0.0035052859358140577,
"um": 0.0012323040548109976,
"un": 0.004690420457426548,
"up": 0.0018956256154150034,
"ur": 0.0044309880248347595,
"us": 0.004758226661399402,
"ut": 0.007224308818933851,
"ve": 0.008085152799806605,
"vi": 0.0024498676304974616,
"wa": 0.00562938897765933,
"we": 0.0034581164026155505,
"wh": 0.0031721511075996013,
"wi": 0.0038531612431530475,
"wn": 0.0013413836003325452,
"wo": 0.0029937913101927465,
"ye": 0.0010288854428924358,
"yo": 0.002843438423122505,
"ys": 0.0013649683669317988
}
english_bigrams = Bigrams(Counter())
english_bigrams.proportional = Counter(english_bigram_frequencies)
[docs]class MoneyUtility(object):
DEFAULT_CURRENCY = 'USD'
[docs] @classmethod
def parse(cls, amount):
"""Attempt to turn a string into a Money object."""
currency = cls.DEFAULT_CURRENCY
if not amount:
amount = '0'
amount = str(amount)
if amount[0] == '$':
currency = 'USD'
amount = amount[1:]
return Money(amount, currency)
[docs]def is_session(value):
"""Return a boolean value indicating whether the value is a valid SQLAlchemy session.
:param value: Value
:type value: Any
:return: Boolean value indicating whether the value is a valid SQLAlchemy session or not
:rtype: bool
"""
return isinstance(value, (sqlalchemy.orm.session.Session, api.flask_sqlalchemy_session.flask_scoped_session))
[docs]def first_or_default(collection, default=None):
"""Return first element of the specified collection or the default value if the collection is empty.
:param collection: Collection
:type collection: Iterable
:param default: Default value
:type default: Any
"""
element = next(iter(collection), None)
if element is None:
element = default
return element
[docs]def chunks(lst, chunk_size, start_index=0):
"""Yield successive n-sized chunks from lst."""
length = len(lst)
for i in range(start_index, length, chunk_size):
yield lst[i:i + chunk_size]