mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-04-13 09:42:01 +00:00
Adding overlap
This commit is contained in:
parent
ad58ad26a3
commit
b398bed406
1 changed files with 25 additions and 1 deletions
|
@ -1,7 +1,7 @@
|
||||||
import argparse
|
import argparse
|
||||||
import csv
|
import csv
|
||||||
import math
|
import math
|
||||||
from collections import defaultdict
|
from collections import Counter, defaultdict
|
||||||
|
|
||||||
|
|
||||||
def calculate_statistics(ratings):
|
def calculate_statistics(ratings):
|
||||||
|
@ -24,6 +24,7 @@ def calculate_statistics(ratings):
|
||||||
def process_file(path):
|
def process_file(path):
|
||||||
user_feeds = defaultdict(set) # Stores feeds rated by each user
|
user_feeds = defaultdict(set) # Stores feeds rated by each user
|
||||||
feed_users = defaultdict(set) # Stores users who have rated each feed
|
feed_users = defaultdict(set) # Stores users who have rated each feed
|
||||||
|
feed_ratings = defaultdict(list) # Stores ratings for each feed
|
||||||
ratings = []
|
ratings = []
|
||||||
|
|
||||||
with open(path, newline="") as csvfile:
|
with open(path, newline="") as csvfile:
|
||||||
|
@ -32,12 +33,27 @@ def process_file(path):
|
||||||
user, feed, rating = row
|
user, feed, rating = row
|
||||||
user_feeds[user].add(feed)
|
user_feeds[user].add(feed)
|
||||||
feed_users[feed].add(user)
|
feed_users[feed].add(user)
|
||||||
|
feed_ratings[feed].append(float(rating))
|
||||||
ratings.append(float(rating))
|
ratings.append(float(rating))
|
||||||
|
|
||||||
|
# Calculating average ratings for each feed
|
||||||
|
avg_feed_rating = {feed: sum(rates) / len(rates) for feed, rates in feed_ratings.items()}
|
||||||
|
|
||||||
|
# Finding feed with the highest average rating
|
||||||
|
highest_rated_feed = max(avg_feed_rating, key=avg_feed_rating.get)
|
||||||
|
|
||||||
# Overlap statistics
|
# Overlap statistics
|
||||||
avg_user_overlap = sum(len(feeds) for feeds in user_feeds.values()) / len(user_feeds)
|
avg_user_overlap = sum(len(feeds) for feeds in user_feeds.values()) / len(user_feeds)
|
||||||
avg_feed_overlap = sum(len(users) for users in feed_users.values()) / len(feed_users)
|
avg_feed_overlap = sum(len(users) for users in feed_users.values()) / len(feed_users)
|
||||||
|
|
||||||
|
# Finding feeds with greatest user overlap
|
||||||
|
overlap_count = Counter()
|
||||||
|
for feed in feed_users:
|
||||||
|
for user in feed_users[feed]:
|
||||||
|
overlap_count.update(user_feeds[user])
|
||||||
|
|
||||||
|
most_overlapped_feeds = overlap_count.most_common(5)
|
||||||
|
|
||||||
unique_users = len(user_feeds)
|
unique_users = len(user_feeds)
|
||||||
unique_feeds = len(feed_users)
|
unique_feeds = len(feed_users)
|
||||||
min_rating, max_rating, avg_rating, std_dev, median = calculate_statistics(ratings)
|
min_rating, max_rating, avg_rating, std_dev, median = calculate_statistics(ratings)
|
||||||
|
@ -52,6 +68,9 @@ def process_file(path):
|
||||||
avg_rating,
|
avg_rating,
|
||||||
std_dev,
|
std_dev,
|
||||||
median,
|
median,
|
||||||
|
highest_rated_feed,
|
||||||
|
avg_feed_rating[highest_rated_feed],
|
||||||
|
most_overlapped_feeds,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -72,6 +91,9 @@ def main():
|
||||||
avg_rating,
|
avg_rating,
|
||||||
std_dev,
|
std_dev,
|
||||||
median,
|
median,
|
||||||
|
highest_rated_feed,
|
||||||
|
highest_rating,
|
||||||
|
most_overlapped_feeds,
|
||||||
) = process_file(path)
|
) = process_file(path)
|
||||||
print(f"Unique Users: {unique_users}")
|
print(f"Unique Users: {unique_users}")
|
||||||
print(f"Unique Feeds: {unique_feeds}")
|
print(f"Unique Feeds: {unique_feeds}")
|
||||||
|
@ -80,6 +102,8 @@ def main():
|
||||||
print(
|
print(
|
||||||
f"Rating Stats - Min: {min_rating}, Max: {max_rating}, Average: {avg_rating:.2f}, Std Dev: {std_dev:.2f}, Median: {median}"
|
f"Rating Stats - Min: {min_rating}, Max: {max_rating}, Average: {avg_rating:.2f}, Std Dev: {std_dev:.2f}, Median: {median}"
|
||||||
)
|
)
|
||||||
|
print(f"Highest Rated Feed: {highest_rated_feed} with Average Rating: {highest_rating:.2f}")
|
||||||
|
print("Feeds with Greatest User Overlap: ", most_overlapped_feeds)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Loading…
Add table
Reference in a new issue