Kool-Cool
diff --git a/‎__pycache__/app.cpython-310.pyc
773 Bytes b/‎__pycache__/app.cpython-310.pyc
773 Bytes
diff --git a/‎__pycache__/model.cpython-310.pyc
2.86 KB b/‎__pycache__/model.cpython-310.pyc
2.86 KB
diff --git a/‎app.py
Lines changed: 24 additions & 0 deletions b/‎app.py
Lines changed: 24 additions & 0 deletions
diff --git a/‎data/tmdb_5000_credits.csv
Lines changed: 4804 additions & 0 deletions b/‎data/tmdb_5000_credits.csv
Lines changed: 4804 additions & 0 deletions
diff --git a/‎data/tmdb_5000_movies.csv
Lines changed: 4804 additions & 0 deletions b/‎data/tmdb_5000_movies.csv
Lines changed: 4804 additions & 0 deletions
diff --git a/‎model.py
Lines changed: 148 additions & 0 deletions b/‎model.py
Lines changed: 148 additions & 0 deletions
diff --git a/‎requirements.txt
Lines changed: 5 additions & 0 deletions b/‎requirements.txt
Lines changed: 5 additions & 0 deletions
diff --git a/‎templates/home.html
Lines changed: 53 additions & 0 deletions b/‎templates/home.html
Lines changed: 53 additions & 0 deletions
diff --git a/‎templates/recommendations.html
Lines changed: 14 additions & 0 deletions b/‎templates/recommendations.html
Lines changed: 14 additions & 0 deletions
diff --git a/‎venv/Lib/site-packages/Flask-2.3.2.dist-info/INSTALLER
Lines changed: 1 addition & 0 deletions b/‎venv/Lib/site-packages/Flask-2.3.2.dist-info/INSTALLER
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,24 @@
+from flask import Flask , render_template , request 
+from model import get_recommendations ,cosine_sim2 ,df2
+
+app = Flask(__name__)
+
+@app.route("/")
+def home():
+    return render_template("home.html" , movie_title = df2["title"])
+    
+
+@app.route('/recommendations', methods=['POST'])
+def recommendations():
+    # Get the movie title entered by the user
+    title = request.form['title']
+
+    # Get the recommendations using the get_recommendations function
+    recommended_movies = get_recommendations(title, cosine_sim2)
+
+    # Pass the recommendations to the template for displaying
+    return render_template('recommendations.html', title=title, recommendations=recommended_movies)
+
+
+if __name__ == "__main__":
+  app.run(debug=True)
@@ -0,0 +1,148 @@
+
+import pandas as pd
+import numpy as np
+import sklearn
+import pickle
+
+
+# https://www.kaggle.com/datasets/tmdb/tmdb-movie-metadata
+
+# od.download("https://www.kaggle.com/datasets/tmdb/tmdb-movie-metadata")
+
+df1=pd.read_csv('data/tmdb_5000_credits.csv')
+df2=pd.read_csv('data/tmdb_5000_movies.csv')
+
+# print(df1.info())
+# print(df2.info())
+
+df1.columns = ['id','tittle','cast','crew']
+df2= df2.merge(df1,on='id')
+
+#Import TfIdfVectorizer from scikit-learn
+from sklearn.feature_extraction.text import TfidfVectorizer
+
+#Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
+tfidf = TfidfVectorizer(stop_words='english')
+
+#Replace NaN with an empty string
+df2['overview'] = df2['overview'].fillna('')
+
+#Construct the required TF-IDF matrix by fitting and transforming the data
+tfidf_matrix = tfidf.fit_transform(df2['overview'])
+
+#Output the shape of tfidf_matrix
+# print(tfidf_matrix.shape())
+# Import linear_kernel
+from sklearn.metrics.pairwise import linear_kernel
+
+# Compute the cosine similarity matrix
+cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
+
+#Construct a reverse map of indices and movie titles
+indices = pd.Series(df2.index, index=df2['title']).drop_duplicates()
+
+# Function that takes in movie title as input and outputs most similar movies
+def get_recommendations(title, cosine_sim=cosine_sim):
+    # Get the index of the movie that matches the title
+    idx = indices[title]
+
+    # Get the pairwsie similarity scores of all movies with that movie
+    sim_scores = list(enumerate(cosine_sim[idx]))
+
+    # Sort the movies based on the similarity scores
+    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
+
+    # Get the scores of the 10 most similar movies
+    sim_scores = sim_scores[1:11]
+
+    # Get the movie indices
+    movie_indices = [i[0] for i in sim_scores]
+
+    # Return the top 10 most similar movies
+    return df2['title'].iloc[movie_indices]
+
+# print(get_recommendations('Wall Street: Money Never Sleeps'))
+
+
+
+
+ #Parse the stringified features into their corresponding python objects
+from ast import literal_eval
+
+features = ['cast', 'crew', 'keywords', 'genres']
+for feature in features:
+    df2[feature] = df2[feature].apply(literal_eval)
+
+
+# Get the director's name from the crew feature. If director is not listed, return NaN
+def get_director(x):
+    for i in x:
+        if i['job'] == 'Director':
+            return i['name']
+    return np.nan
+
+
+# Returns the list top 3 elements or entire list; whichever is more.
+def get_list(x):
+    if isinstance(x, list):
+        names = [i['name'] for i in x]
+        #Check if more than 3 elements exist. If yes, return only first three. If no, return entire list.
+        if len(names) > 3:
+            names = names[:3]
+        return names
+
+    #Return empty list in case of missing/malformed data
+    return []
+
+# Define new director, cast, genres and keywords features that are in a suitable form.
+df2['director'] = df2['crew'].apply(get_director)
+
+features = ['cast', 'keywords', 'genres']
+for feature in features:
+    df2[feature] = df2[feature].apply(get_list)
+
+
+# Function to convert all strings to lower case and strip names of spaces
+def clean_data(x):
+    if isinstance(x, list):
+        return [str.lower(i.replace(" ", "")) for i in x]
+    else:
+        #Check if director exists. If not, return empty string
+        if isinstance(x, str):
+            return str.lower(x.replace(" ", ""))
+        else:
+            return ''
+
+
+# Apply clean_data function to your features.
+features = ['cast', 'keywords', 'director', 'genres']
+
+for feature in features:
+    df2[feature] = df2[feature].apply(clean_data)
+
+
+def create_soup(x):
+    return ' '.join(x['keywords']) + ' ' + ' '.join(x['cast']) + ' ' + x['director'] + ' ' + ' '.join(x['genres'])
+df2['soup'] = df2.apply(create_soup, axis=1)
+
+
+
+# Import CountVectorizer and create the count matrix
+from sklearn.feature_extraction.text import CountVectorizer
+
+count = CountVectorizer(stop_words='english')
+count_matrix = count.fit_transform(df2['soup'])
+
+# Compute the Cosine Similarity matrix based on the count_matrix
+from sklearn.metrics.pairwise import cosine_similarity
+
+cosine_sim2 = cosine_similarity(count_matrix, count_matrix)
+
+
+# Reset index of our main DataFrame and construct reverse mapping as before
+df2 = df2.reset_index()
+indices = pd.Series(df2.index, index=df2['title'])
+
+# print(get_recommendations('The Avengers', cosine_sim2))
+
+
@@ -0,0 +1,5 @@
+flask
+pandas
+numpy
+opendatasets
+scikit-learn
@@ -0,0 +1,53 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <title>Autocomplete Example</title>
+  <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">
+  <script src="https://code.jquery.com/jquery-1.12.4.js"></script>
+  <script src="https://code.jquery.com/ui/1.12.1/jquery-ui.js"></script>
+  <script>
+    $(function() {
+      var availableTags = {{ movie_title.tolist()|tojson }};
+      $("#tags").autocomplete({
+        source: availableTags,
+        response: function(event, ui) {
+          if (ui.content.length === 0) {
+            $("#message").text("No results found");
+          } else {
+            $("#message").empty();
+          }
+        }
+      });
+    });
+
+    function submitForm() {
+      var selectedTag = $("#tags").val();
+
+      $.ajax({
+        type: "POST",
+        url: "/recommendations",
+        data: { title: selectedTag },
+        success: function(response) {
+          // Update the recommendation section with the response
+          $("#recommendationSection").html(response);
+        }
+      });
+    }
+  </script>
+  <title>Movie Recommendation</title>
+</head>
+
+<body>
+  <div class="ui-widget">
+    <label for="tags">Tags: </label>
+    <input id="tags">
+    <div id="message"></div>
+  </div>
+
+  <br>
+  <button type="submit" onclick="submitForm()">Submit</button>
+
+  <div id="recommendationSection"></div>
+</body>
+</html>
@@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Movie Recommendations</title>
+</head>
+<body>
+    <h1>Movie Recommendations for "{{ title }}"</h1>
+    <ul>
+        {% for movie in recommendations %}
+        <li>{{ movie }}</li>
+        {% endfor %}
+    </ul>
+</body>
+</html>
@@ -0,0 +1 @@
+pip
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +flask
 +pandas
 +numpy
 +opendatasets
 +scikit-learn