Skip to content

Commit 56c311a

Browse files
author
Kool-Cool
committed
initial Commit
0 parents  commit 56c311a

File tree

11,690 files changed

+2283733
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

11,690 files changed

+2283733
-0
lines changed

__pycache__/app.cpython-310.pyc

773 Bytes
Binary file not shown.

__pycache__/model.cpython-310.pyc

2.86 KB
Binary file not shown.

app.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from flask import Flask , render_template , request
2+
from model import get_recommendations ,cosine_sim2 ,df2
3+
4+
app = Flask(__name__)
5+
6+
@app.route("/")
7+
def home():
8+
return render_template("home.html" , movie_title = df2["title"])
9+
10+
11+
@app.route('/recommendations', methods=['POST'])
12+
def recommendations():
13+
# Get the movie title entered by the user
14+
title = request.form['title']
15+
16+
# Get the recommendations using the get_recommendations function
17+
recommended_movies = get_recommendations(title, cosine_sim2)
18+
19+
# Pass the recommendations to the template for displaying
20+
return render_template('recommendations.html', title=title, recommendations=recommended_movies)
21+
22+
23+
if __name__ == "__main__":
24+
app.run(debug=True)

data/tmdb_5000_credits.csv

Lines changed: 4804 additions & 0 deletions
Large diffs are not rendered by default.

data/tmdb_5000_movies.csv

Lines changed: 4804 additions & 0 deletions
Large diffs are not rendered by default.

model.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
2+
import pandas as pd
3+
import numpy as np
4+
import sklearn
5+
import pickle
6+
7+
8+
# https://www.kaggle.com/datasets/tmdb/tmdb-movie-metadata
9+
10+
# od.download("https://www.kaggle.com/datasets/tmdb/tmdb-movie-metadata")
11+
12+
df1=pd.read_csv('data/tmdb_5000_credits.csv')
13+
df2=pd.read_csv('data/tmdb_5000_movies.csv')
14+
15+
# print(df1.info())
16+
# print(df2.info())
17+
18+
df1.columns = ['id','tittle','cast','crew']
19+
df2= df2.merge(df1,on='id')
20+
21+
#Import TfIdfVectorizer from scikit-learn
22+
from sklearn.feature_extraction.text import TfidfVectorizer
23+
24+
#Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
25+
tfidf = TfidfVectorizer(stop_words='english')
26+
27+
#Replace NaN with an empty string
28+
df2['overview'] = df2['overview'].fillna('')
29+
30+
#Construct the required TF-IDF matrix by fitting and transforming the data
31+
tfidf_matrix = tfidf.fit_transform(df2['overview'])
32+
33+
#Output the shape of tfidf_matrix
34+
# print(tfidf_matrix.shape())
35+
# Import linear_kernel
36+
from sklearn.metrics.pairwise import linear_kernel
37+
38+
# Compute the cosine similarity matrix
39+
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
40+
41+
#Construct a reverse map of indices and movie titles
42+
indices = pd.Series(df2.index, index=df2['title']).drop_duplicates()
43+
44+
# Function that takes in movie title as input and outputs most similar movies
45+
def get_recommendations(title, cosine_sim=cosine_sim):
46+
# Get the index of the movie that matches the title
47+
idx = indices[title]
48+
49+
# Get the pairwsie similarity scores of all movies with that movie
50+
sim_scores = list(enumerate(cosine_sim[idx]))
51+
52+
# Sort the movies based on the similarity scores
53+
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
54+
55+
# Get the scores of the 10 most similar movies
56+
sim_scores = sim_scores[1:11]
57+
58+
# Get the movie indices
59+
movie_indices = [i[0] for i in sim_scores]
60+
61+
# Return the top 10 most similar movies
62+
return df2['title'].iloc[movie_indices]
63+
64+
# print(get_recommendations('Wall Street: Money Never Sleeps'))
65+
66+
67+
68+
69+
#Parse the stringified features into their corresponding python objects
70+
from ast import literal_eval
71+
72+
features = ['cast', 'crew', 'keywords', 'genres']
73+
for feature in features:
74+
df2[feature] = df2[feature].apply(literal_eval)
75+
76+
77+
# Get the director's name from the crew feature. If director is not listed, return NaN
78+
def get_director(x):
79+
for i in x:
80+
if i['job'] == 'Director':
81+
return i['name']
82+
return np.nan
83+
84+
85+
# Returns the list top 3 elements or entire list; whichever is more.
86+
def get_list(x):
87+
if isinstance(x, list):
88+
names = [i['name'] for i in x]
89+
#Check if more than 3 elements exist. If yes, return only first three. If no, return entire list.
90+
if len(names) > 3:
91+
names = names[:3]
92+
return names
93+
94+
#Return empty list in case of missing/malformed data
95+
return []
96+
97+
# Define new director, cast, genres and keywords features that are in a suitable form.
98+
df2['director'] = df2['crew'].apply(get_director)
99+
100+
features = ['cast', 'keywords', 'genres']
101+
for feature in features:
102+
df2[feature] = df2[feature].apply(get_list)
103+
104+
105+
# Function to convert all strings to lower case and strip names of spaces
106+
def clean_data(x):
107+
if isinstance(x, list):
108+
return [str.lower(i.replace(" ", "")) for i in x]
109+
else:
110+
#Check if director exists. If not, return empty string
111+
if isinstance(x, str):
112+
return str.lower(x.replace(" ", ""))
113+
else:
114+
return ''
115+
116+
117+
# Apply clean_data function to your features.
118+
features = ['cast', 'keywords', 'director', 'genres']
119+
120+
for feature in features:
121+
df2[feature] = df2[feature].apply(clean_data)
122+
123+
124+
def create_soup(x):
125+
return ' '.join(x['keywords']) + ' ' + ' '.join(x['cast']) + ' ' + x['director'] + ' ' + ' '.join(x['genres'])
126+
df2['soup'] = df2.apply(create_soup, axis=1)
127+
128+
129+
130+
# Import CountVectorizer and create the count matrix
131+
from sklearn.feature_extraction.text import CountVectorizer
132+
133+
count = CountVectorizer(stop_words='english')
134+
count_matrix = count.fit_transform(df2['soup'])
135+
136+
# Compute the Cosine Similarity matrix based on the count_matrix
137+
from sklearn.metrics.pairwise import cosine_similarity
138+
139+
cosine_sim2 = cosine_similarity(count_matrix, count_matrix)
140+
141+
142+
# Reset index of our main DataFrame and construct reverse mapping as before
143+
df2 = df2.reset_index()
144+
indices = pd.Series(df2.index, index=df2['title'])
145+
146+
# print(get_recommendations('The Avengers', cosine_sim2))
147+
148+

requirements.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
flask
2+
pandas
3+
numpy
4+
opendatasets
5+
scikit-learn

templates/home.html

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
<!doctype html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="utf-8">
5+
<title>Autocomplete Example</title>
6+
<link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">
7+
<script src="https://code.jquery.com/jquery-1.12.4.js"></script>
8+
<script src="https://code.jquery.com/ui/1.12.1/jquery-ui.js"></script>
9+
<script>
10+
$(function() {
11+
var availableTags = {{ movie_title.tolist()|tojson }};
12+
$("#tags").autocomplete({
13+
source: availableTags,
14+
response: function(event, ui) {
15+
if (ui.content.length === 0) {
16+
$("#message").text("No results found");
17+
} else {
18+
$("#message").empty();
19+
}
20+
}
21+
});
22+
});
23+
24+
function submitForm() {
25+
var selectedTag = $("#tags").val();
26+
27+
$.ajax({
28+
type: "POST",
29+
url: "/recommendations",
30+
data: { title: selectedTag },
31+
success: function(response) {
32+
// Update the recommendation section with the response
33+
$("#recommendationSection").html(response);
34+
}
35+
});
36+
}
37+
</script>
38+
<title>Movie Recommendation</title>
39+
</head>
40+
41+
<body>
42+
<div class="ui-widget">
43+
<label for="tags">Tags: </label>
44+
<input id="tags">
45+
<div id="message"></div>
46+
</div>
47+
48+
<br>
49+
<button type="submit" onclick="submitForm()">Submit</button>
50+
51+
<div id="recommendationSection"></div>
52+
</body>
53+
</html>

templates/recommendations.html

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<title>Movie Recommendations</title>
5+
</head>
6+
<body>
7+
<h1>Movie Recommendations for "{{ title }}"</h1>
8+
<ul>
9+
{% for movie in recommendations %}
10+
<li>{{ movie }}</li>
11+
{% endfor %}
12+
</ul>
13+
</body>
14+
</html>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pip

0 commit comments

Comments
 (0)