Adding overlap

This commit is contained in:
Samuel Clay 2023-12-05 09:09:00 -05:00
parent ad58ad26a3
commit b398bed406

View file

@ -1,7 +1,7 @@
import argparse import argparse
import csv import csv
import math import math
from collections import defaultdict from collections import Counter, defaultdict
def calculate_statistics(ratings): def calculate_statistics(ratings):
@ -24,6 +24,7 @@ def calculate_statistics(ratings):
def process_file(path): def process_file(path):
user_feeds = defaultdict(set) # Stores feeds rated by each user user_feeds = defaultdict(set) # Stores feeds rated by each user
feed_users = defaultdict(set) # Stores users who have rated each feed feed_users = defaultdict(set) # Stores users who have rated each feed
feed_ratings = defaultdict(list) # Stores ratings for each feed
ratings = [] ratings = []
with open(path, newline="") as csvfile: with open(path, newline="") as csvfile:
@ -32,12 +33,27 @@ def process_file(path):
user, feed, rating = row user, feed, rating = row
user_feeds[user].add(feed) user_feeds[user].add(feed)
feed_users[feed].add(user) feed_users[feed].add(user)
feed_ratings[feed].append(float(rating))
ratings.append(float(rating)) ratings.append(float(rating))
# Calculating average ratings for each feed
avg_feed_rating = {feed: sum(rates) / len(rates) for feed, rates in feed_ratings.items()}
# Finding feed with the highest average rating
highest_rated_feed = max(avg_feed_rating, key=avg_feed_rating.get)
# Overlap statistics # Overlap statistics
avg_user_overlap = sum(len(feeds) for feeds in user_feeds.values()) / len(user_feeds) avg_user_overlap = sum(len(feeds) for feeds in user_feeds.values()) / len(user_feeds)
avg_feed_overlap = sum(len(users) for users in feed_users.values()) / len(feed_users) avg_feed_overlap = sum(len(users) for users in feed_users.values()) / len(feed_users)
# Finding feeds with greatest user overlap
overlap_count = Counter()
for feed in feed_users:
for user in feed_users[feed]:
overlap_count.update(user_feeds[user])
most_overlapped_feeds = overlap_count.most_common(5)
unique_users = len(user_feeds) unique_users = len(user_feeds)
unique_feeds = len(feed_users) unique_feeds = len(feed_users)
min_rating, max_rating, avg_rating, std_dev, median = calculate_statistics(ratings) min_rating, max_rating, avg_rating, std_dev, median = calculate_statistics(ratings)
@ -52,6 +68,9 @@ def process_file(path):
avg_rating, avg_rating,
std_dev, std_dev,
median, median,
highest_rated_feed,
avg_feed_rating[highest_rated_feed],
most_overlapped_feeds,
) )
@ -72,6 +91,9 @@ def main():
avg_rating, avg_rating,
std_dev, std_dev,
median, median,
highest_rated_feed,
highest_rating,
most_overlapped_feeds,
) = process_file(path) ) = process_file(path)
print(f"Unique Users: {unique_users}") print(f"Unique Users: {unique_users}")
print(f"Unique Feeds: {unique_feeds}") print(f"Unique Feeds: {unique_feeds}")
@ -80,6 +102,8 @@ def main():
print( print(
f"Rating Stats - Min: {min_rating}, Max: {max_rating}, Average: {avg_rating:.2f}, Std Dev: {std_dev:.2f}, Median: {median}" f"Rating Stats - Min: {min_rating}, Max: {max_rating}, Average: {avg_rating:.2f}, Std Dev: {std_dev:.2f}, Median: {median}"
) )
print(f"Highest Rated Feed: {highest_rated_feed} with Average Rating: {highest_rating:.2f}")
print("Feeds with Greatest User Overlap: ", most_overlapped_feeds)
if __name__ == "__main__": if __name__ == "__main__":