Untitled

mail@pastecode.io avatar
unknown
python
2 years ago
5.2 kB
1
Indexable
Never
def display_list(list2D):
  '''input: list data structure
     return: None
     Prints elements of a 2D list to the terminal
  ''' 
  if list2D == []:
    print("List is empty")
  for row in list2D:
    print(row)

def display_dict(adict):
  '''input: dictionary data structure
     return: None
     Prints elements of a dictionary in sorted order of keys to the terminal
  ''' 
  if adict == {}:
    print("Dictionary is empty")
  for key in sorted(adict):
    print(str(key) + ": " +str(adict[key]))
    
def load_data(movies, ratings, tags):
  '''input: strings filenames)
     return: lists 
     read records from each of these files except the data in their header rows and stores them in
     three separate 2-D lists
  ''' 
  movie_db = []
  ratings_db = []
  tags_db = []

  try:
    with open(movies, "r") as m:
      m.readline().strip().split(",")
  
      for line in m:
        line = line.strip().split(",")
        
        line[0] = int(line[0])
        
        movie_db.append(line)

  # if try doesn't work, print "list is empty" message 
  except Exception as e:
    print(e)

  try:
    with open(ratings, "r") as r:
      r.readline().strip().split(",")
      
      for line in r:
        line = line.strip().split(",")
  
        line[0] = int(line[0])
        line[1] = int(line[1])
        line[2] = float(line[2])
        line[3] = int(line[3])
        
        ratings_db.append(line)

  except Exception as e:
    print(e)

  try:
    with open(tags, "r") as t:
      t.readline().strip().split(",")
  
      for line in t:
        line = line.strip().split(",")
          
        line[0] = int(line[0])
        line[1] = int(line[1])
        line[3] = int(line[3])
        
        tags_db.append(line)

  except Exception as e:
    print(e)
  
  return movie_db, ratings_db, tags_db
  
def get_genre_dict(db):
  '''input: list
     return: dict
     reads data from the 2-D lists from load_data() and returns a dictionary
     where a genre is key and list of all movie ids that belong to this genre is the value  
  ''' 

  genres = {} 

  for movie in db:
    movie_id = movie[0]
    genre = movie[2].lower()

    genre_split = genre.split("|")

    for genre in genre_split:
      
      # if the genre exists as a key, update the value of the list to include the move ID
      # if the genre doesn’t exist, create a new key: value pair 
      # with the genre as the key and the value as the list
      if genre not in genres:
        genres[genre] = [movie_id]
      else:
        genres[genre].append(movie_id)
        
  return genres
  
def get_tag_dict(db):
  '''input: list
     return: dict
     reads data from the 2-D lists from load_data() and returns a dictionary
     where a tag is key and a list of 
     all movie ids that have been tagged with this tag is the value  
  ''' 

  tags = {} 

  for movie in db:
    movie_id = movie[1]
    tag = movie[2].lower()

    if tag not in tags:
      tags[tag] = [movie_id]
    else:
      tags[tag].append(movie_id)
        
  return tags
  
def get_avg_ratings(db):
  '''input: list
     return: dict
     reads data from the 2-D lists from load_data() and returns a dictionary
     where a movie id is key and the average
     rating from all the reviewers is the value  
  ''' 

  ratings = {} 

  for movie in db:
    movie_id = movie[1]
    rating = movie[2]

    if movie_id not in ratings:
      ratings[movie_id] = [rating]    
    else:
      ratings[movie_id].append(rating)

  # calculate the average float number from the list 
  for avg in ratings:
    ratings[avg] = round(sum(ratings[avg]) / len(ratings[avg]), 2)

  return ratings
  
def get_movie_dict(movie_db, ratings):
  '''input: list and dict
     return: dict
     reads data from the 2-D lists from load_data() and returns a dictionary
     where a movie id is key and thelist containing the name of the movie, the year it got
     released, and the average rating of this movi is the value  
  ''' 

  movies = {}

  for movie in movie_db:
    movie_id = movie[0]
    
    # slice through to get the movie title and year 
    movie_title = movie[1][:-7]
    movie_year = int(movie[1][-5:-1])
    
    avg_rating = ratings[movie_id]

    movies [movie_id] = [movie_title, movie_year, avg_rating]

  return movies

def main():
    '''
      test all other functions
    ''' 
  
    # testing display_list()
    alist = [[1,2],[3,4],[5],[6,7]]
    display_list(alist)

    # testing display_dict()
    adict = {2:6, 7:1, 4:3, 5:"zero"}
    display_dict(adict)

    # testing load_data()
    movies_db, ratings_db, tags_db = load_data("movies_20.csv", "ratings_20.csv", "tags_20.csv")
    # display_list(movies_db[2:5])
    # display_list(ratings_db[2:5])
    # display_list(tags_db[2:5])
    
    # testing get_genre_dict()
    genres = get_genre_dict(movies_db)
    # display_dict(genres)

    # testing get_tag_dict()
    tags = get_tag_dict(tags_db)
    # display_dict(tags) 
    
    # testing get_avg_ratings()
    ratings = get_avg_ratings(ratings_db)
    # display_dict(ratings)
    
    # testing get_movie_dict()
    movies = get_movie_dict(movies_db, ratings)
    # display_dict(movies)
    
if __name__ == "__main__":
  main()