Movie Recommendation System Using Content Based Recommendation system
This post is mainly focused to implement movie recommendation system.
- Creating Required Environment for the Project
- Discription of dataset
- APP_Logger Class
- Helper Class
- Preprocessing Dataframe
- Embedding Class
Creating Required Environment for the Project
-
Create conda environment :
conda create -n movie python=3.7 -y
-
Activate conda environment :
conda activate movie
-
Install required dependencies for this project :
-
pip install pandas
-
pip install nltk
-
pip install sklearn
-
pip install ast
-
Importing Required Modules
import ast #for converting str to list
import nltk
import pandas as pd
import pickle
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from preprocess.preprocessing import Process
from datetime import datetime
Discription of dataset
- You can get dataset from Here.
movie dataframe
- This dataframe contains all the information about the movies. Like budget, revenue, runtime, etc.
credits dataframe
- This dataframe contains all the information about the movie_id, cast, movie name and crew of the movies.
movie = pd.read_csv('data/movies.csv')
credits = pd.read_csv('data/credits.csv')
movie.sample(3)
credits.sample(3)
class App_Logger:
def __init__(self):
pass
def log(self, file_object, log_message):
self.now = datetime.now()
self.date = self.now.date()
self.current_time = self.now.strftime("%H:%M:%S")
file_object.write(
str(self.date) + "/" + str(self.current_time) + "\t\t" + log_message +"\n")
class Helper:
def __init__(self):
self.stemmer = PorterStemmer()
def convert(self, text):
"""
Convert a string to a list of words.
"""
L = []
for i in ast.literal_eval(text):
L.append(i['name'])
return L
def merge1(self, df1, df2):
"""
Merge two dataframe.
"""
df1 = df1.merge(df2,on='title')
return df1
def convert_cast(self, text):
"""
Convert a string to a list of words and extract cast.
"""
L = []
counter = 0
for i in ast.literal_eval(text):
if counter < 3: # Here i am just keeping top 3 cast
L.append(i['name'])
counter+=1
return L
def fetch_director(self, text):
"""_
Convert a string to a list of words and extract director.
"""
L = []
for i in ast.literal_eval(text):
if i['job'] == 'Director':
L.append(i['name'])
break
return L
def remove_space(self, L):
"""
Removing space from a list. Like 'Anna Kendrick' = 'AnnaKendrick'
"""
L1 = []
for i in L:
L1.append(i.replace(" ",""))
return L1
def stems(self, text):
"""
Perform Stemming operation.
"""
T = []
for i in text.split():
T.append(self.stemmer.stem(i))
return " ".join(T)
class Process:
def __init__(self):
self.helper = Helper()
self.movie = pd.read_csv('data/movies.csv')
self.credits = pd.read_csv('data/credits.csv')
# Initializing the logger object
self.file_object = open("Logs/preprocessing_log.txt", 'a+')
self.log_writer = App_Logger()
def merge_dataframe(self):
try:
self.first_dataframe = self.helper.merge1(self.movie, self.credits)
return self.first_dataframe
except Exception as ex:
self.log_writer.log(self.file_object, 'Error occured while running the merge_dataframe function!! Error:: %s' % ex)
raise ex
def select_columns(self):
try:
self.merged_dataframe = self.merge_dataframe()
# Keeping important columns for recommendation
self.merged_dataframe = self.merged_dataframe[['movie_id','title','overview','genres','keywords','cast','crew']]
return self.merged_dataframe
except Exception as ex:
self.log_writer.log(self.file_object, 'Error occured while running the select_columns function. Error:: %s' % ex)
raise ex
def drop_outliers(self):
try:
self.selected_dataframe = self.select_columns()
# We have seen from our analysis there is 3 outlier in overview column, so we are dropping them.
self.first_dataframe.dropna(inplace=True)
return self.first_dataframe
except Exception as ex:
self.log_writer.log(self.file_object, 'Error occured while running the drop_outliers function!! Error:: %s' % ex)
raise ex
def convert_to_list(self):
try:
self.cleaned_dataframe = self.drop_outliers()
# Converting genres column to list of words
self.cleaned_dataframe['genres'] = self.cleaned_dataframe['genres'].apply(self.helper.convert)
#return self.movies
# Converting keywords column to list of words
self.cleaned_dataframe['keywords'] = self.cleaned_dataframe['keywords'].apply(self.helper.convert)
# Converting cast column to list of words
self.cleaned_dataframe['cast'] = self.cleaned_dataframe['cast'].apply(self.helper.convert_cast)
# Fetching director name from crew column.
self.cleaned_dataframe['crew'] = self.cleaned_dataframe['crew'].apply(self.helper.fetch_director)
return self.cleaned_dataframe
except Exception as ex:
self.log_writer.log(self.file_object, 'Error occured while running the convert_to_list function!! Error:: %s' % ex)
raise ex
def remove_space(self):
try:
self.converted_to_list_dataframe = self.convert_to_list()
# Removing space from cast, crew, genres and keywords column.
self.converted_to_list_dataframe['cast'] = self.converted_to_list_dataframe['cast'].apply(self.helper.remove_space)
self.converted_to_list_dataframe['crew'] = self.converted_to_list_dataframe['crew'].apply(self.helper.remove_space)
self.converted_to_list_dataframe['genres'] = self.converted_to_list_dataframe['genres'].apply(self.helper.remove_space)
self.converted_to_list_dataframe['keywords'] = self.converted_to_list_dataframe['keywords'].apply(self.helper.remove_space)
return self.converted_to_list_dataframe
except Exception as ex:
self.log_writer.log(self.file_object, 'Error occured while running the remove_space function Error:: %s' % ex)
raise ex
def concat_all(self):
try:
self.space_removed_dataframe = self.remove_space()
# Concatinate all
self.space_removed_dataframe['tags'] = str(self.space_removed_dataframe['overview']) + str(self.space_removed_dataframe['genres']) + str(self.space_removed_dataframe['keywords']) + str(self.space_removed_dataframe['cast']) + str(self.space_removed_dataframe['crew'])
return self.space_removed_dataframe
except Exception as ex:
self.log_writer.log(self.file_object, 'Error occured while running the concat_all function. Error:: %s' % ex)
raise ex
def drop_columns(self):
try:
self.new_dataframe = self.concat_all()
# Selecting only required columns and creating a new dataframe.
self.new_dataframe = self.new_dataframe[['movie_id','title','tags']]
return self.new_dataframe
except Exception as ex:
self.log_writer.log(self.file_object, 'Error occured while running the drop_columns function. Error:: %s' % ex)
raise ex
process = Process()
dataframe = process.drop_columns()
dataframe.sample(5)
class Embedding:
def __init__(self):
# Initializing the logger object
self.file_object = open("Logs/embedding_log.txt", 'a+')
self.log_writer = App_Logger()
self.cv = CountVectorizer(max_features=5000,stop_words='english')
self.new_df = Process().drop_columns()
def vectorize(self):
try:
self.vector = self.cv.fit_transform(self.new_df['tags']).toarray()
return self.vector
except Exception as ex:
self.log_writer.log(self.file_object, 'Error occured while running the vectorize function. Error:: %s' % ex)
raise ex
def cosine_similarity(self):
try:
self.vectorize_array = self.vectorize()
self.similarity = cosine_similarity(self.vectorize_array)
return self.similarity
except Exception as ex:
self.log_writer.log(self.file_object, 'Error occured while running the cosine_similarity function. Error:: %s' % ex)
raise ex
def recommend(self, movie):
try:
self.similarity = self.cosine_similarity()
index = self.new_df[self.new_df['title'] == movie].index[0]
distances = sorted(list(enumerate(self.similarity[index])),reverse=True,key = lambda x: x[1])
for i in distances[1:6]:
print(self.new_df.iloc[i[0]].title)
except Exception as ex:
self.log_writer.log(self.file_object, 'Error occured while running the recommend function. Error:: %s' % ex)
raise ex
def save_model(self):
try:
self.similarity = self.cosine_similarity()
pickle.dump(self.new_df,open('trained_model/movie_list.pkl','wb'))
pickle.dump(self.similarity,open('trained_model/similarity.pkl','wb'))
except Exception as ex:
self.log_writer.log(self.file_object, 'Error occured while running the save_model function. Error:: %s' % ex)
raise ex
embed = Embedding()
embed.recommend('Spider-Man 2')