From aefd8de78c9336129575e3c09d247b8a8270d962 Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Thu, 3 Jan 2013 18:15:00 -0800 Subject: [PATCH] Backporting collections.Counter to python2.6 --- apps/rss_feeds/models.py | 2 +- utils/feed_functions.py | 194 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 194 insertions(+), 2 deletions(-) diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index a037241ee..8de3f6814 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -8,7 +8,7 @@ import mongoengine as mongo import zlib import hashlib import redis -from collections import Counter +from utils.feed_functions import Counter from collections import defaultdict from operator import itemgetter # from nltk.collocations import TrigramCollocationFinder, BigramCollocationFinder, TrigramAssocMeasures, BigramAssocMeasures diff --git a/utils/feed_functions.py b/utils/feed_functions.py index 3e617a70a..e787e80d7 100644 --- a/utils/feed_functions.py +++ b/utils/feed_functions.py @@ -212,4 +212,196 @@ def mail_feed_error_to_admin(feed, e, local_vars=None, subject=None): pprint.pformat(local_vars) ) # print message - mail_admins(subject, message) \ No newline at end of file + mail_admins(subject, message) + +## {{{ http://code.activestate.com/recipes/576611/ (r11) +from operator import itemgetter +from heapq import nlargest +from itertools import repeat, ifilter + +class Counter(dict): + '''Dict subclass for counting hashable objects. Sometimes called a bag + or multiset. Elements are stored as dictionary keys and their counts + are stored as dictionary values. + + >>> Counter('zyzygy') + Counter({'y': 3, 'z': 2, 'g': 1}) + + ''' + + def __init__(self, iterable=None, **kwds): + '''Create a new, empty Counter object. And if given, count elements + from an input iterable. Or, initialize the count from another mapping + of elements to their counts. + + >>> c = Counter() # a new, empty counter + >>> c = Counter('gallahad') # a new counter from an iterable + >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping + >>> c = Counter(a=4, b=2) # a new counter from keyword args + + ''' + self.update(iterable, **kwds) + + def __missing__(self, key): + return 0 + + def most_common(self, n=None): + '''List the n most common elements and their counts from the most + common to the least. If n is None, then list all element counts. + + >>> Counter('abracadabra').most_common(3) + [('a', 5), ('r', 2), ('b', 2)] + + ''' + if n is None: + return sorted(self.iteritems(), key=itemgetter(1), reverse=True) + return nlargest(n, self.iteritems(), key=itemgetter(1)) + + def elements(self): + '''Iterator over elements repeating each as many times as its count. + + >>> c = Counter('ABCABC') + >>> sorted(c.elements()) + ['A', 'A', 'B', 'B', 'C', 'C'] + + If an element's count has been set to zero or is a negative number, + elements() will ignore it. + + ''' + for elem, count in self.iteritems(): + for _ in repeat(None, count): + yield elem + + # Override dict methods where the meaning changes for Counter objects. + + @classmethod + def fromkeys(cls, iterable, v=None): + raise NotImplementedError( + 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') + + def update(self, iterable=None, **kwds): + '''Like dict.update() but add counts instead of replacing them. + + Source can be an iterable, a dictionary, or another Counter instance. + + >>> c = Counter('which') + >>> c.update('witch') # add elements from another iterable + >>> d = Counter('watch') + >>> c.update(d) # add elements from another counter + >>> c['h'] # four 'h' in which, witch, and watch + 4 + + ''' + if iterable is not None: + if hasattr(iterable, 'iteritems'): + if self: + self_get = self.get + for elem, count in iterable.iteritems(): + self[elem] = self_get(elem, 0) + count + else: + dict.update(self, iterable) # fast path when counter is empty + else: + self_get = self.get + for elem in iterable: + self[elem] = self_get(elem, 0) + 1 + if kwds: + self.update(kwds) + + def copy(self): + 'Like dict.copy() but returns a Counter instance instead of a dict.' + return Counter(self) + + def __delitem__(self, elem): + 'Like dict.__delitem__() but does not raise KeyError for missing values.' + if elem in self: + dict.__delitem__(self, elem) + + def __repr__(self): + if not self: + return '%s()' % self.__class__.__name__ + items = ', '.join(map('%r: %r'.__mod__, self.most_common())) + return '%s({%s})' % (self.__class__.__name__, items) + + # Multiset-style mathematical operations discussed in: + # Knuth TAOCP Volume II section 4.6.3 exercise 19 + # and at http://en.wikipedia.org/wiki/Multiset + # + # Outputs guaranteed to only include positive counts. + # + # To strip negative and zero counts, add-in an empty counter: + # c += Counter() + + def __add__(self, other): + '''Add counts from two counters. + + >>> Counter('abbb') + Counter('bcc') + Counter({'b': 4, 'c': 2, 'a': 1}) + + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem in set(self) | set(other): + newcount = self[elem] + other[elem] + if newcount > 0: + result[elem] = newcount + return result + + def __sub__(self, other): + ''' Subtract count, but keep only results with positive counts. + + >>> Counter('abbbc') - Counter('bccd') + Counter({'b': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem in set(self) | set(other): + newcount = self[elem] - other[elem] + if newcount > 0: + result[elem] = newcount + return result + + def __or__(self, other): + '''Union is the maximum of value in either of the input counters. + + >>> Counter('abbb') | Counter('bcc') + Counter({'b': 3, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + _max = max + result = Counter() + for elem in set(self) | set(other): + newcount = _max(self[elem], other[elem]) + if newcount > 0: + result[elem] = newcount + return result + + def __and__(self, other): + ''' Intersection is the minimum of corresponding counts. + + >>> Counter('abbb') & Counter('bcc') + Counter({'b': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + _min = min + result = Counter() + if len(self) < len(other): + self, other = other, self + for elem in ifilter(self.__contains__, other): + newcount = _min(self[elem], other[elem]) + if newcount > 0: + result[elem] = newcount + return result + + +if __name__ == '__main__': + import doctest + print doctest.testmod() +## end of http://code.activestate.com/recipes/576611/ }}}