diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..63bb312 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ + +### JupyterNotebooks ### +.ipynb_checkpoints +*/.ipynb_checkpoints/* + +# IPython +profile_default/ +ipython_config.py + +# Ignore csv files +# DataSets/* + +# Remove previous ipynb_checkpoints +# git rm -r .ipynb_checkpoints/ diff --git a/.ipynb_checkpoints/Data Preparation-checkpoint.ipynb b/.ipynb_checkpoints/Data Preparation-checkpoint.ipynb deleted file mode 100644 index 0a8fc05..0000000 --- a/.ipynb_checkpoints/Data Preparation-checkpoint.ipynb +++ /dev/null @@ -1,3863 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0bede181-17d5-4591-8fd1-5c1ea6763460", - "metadata": { - "tags": [] - }, - "source": [ - "# Data Preparation:\n", - "\n", - "Dataset from Kaggle : **\"MyAnimeList\"** by *Azathoth* \n", - "Source: https://www.kaggle.com/datasets/azathoth42/myanimelist/data (requires login)\n", - "\n", - "---\n", - "\n", - "### Essential Libraries\n", - "\n", - "Let us begin by importing the essential Python Libraries.\n", - "\n", - "> NumPy : Library for Numeric Computations in Python \n", - "> Pandas : Library for Data Acquisition and Preparation \n", - "> Matplotlib : Low-level library for Data Visualization \n", - "> Seaborn : Higher-level library for Data Visualization " - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "d8486f6e-2454-43d1-be41-b8361d8a9675", - "metadata": {}, - "outputs": [], - "source": [ - "# Basic Libraries\n", - "import numpy as np\n", - "import pandas as pd\n", - "import seaborn as sb\n", - "import matplotlib.pyplot as plt # we only need pyplot\n", - "sb.set() # set the default Seaborn style for graphics" - ] - }, - { - "cell_type": "markdown", - "id": "70e6c3f6-53c8-4814-9b3f-14f8693ea516", - "metadata": { - "jp-MarkdownHeadingCollapsed": true, - "tags": [] - }, - "source": [ - "---\n", - "\n", - "### Import the Dataset (UserList)\n", - "\n", - "The dataset is in CSV format; hence we use the `read_csv` function from Pandas. \n", - "Immediately after importing, take a quick look at the data using the `head` function." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "c677c3ee-5f8f-4503-967d-4332e1a0fa36", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
usernameuser_iduser_watchinguser_completeduser_onholduser_droppeduser_plantowatchuser_days_spent_watchinggenderlocationbirth_dateaccess_rankjoin_datelast_onlinestats_mean_scorestats_rewatchedstats_episodes
0karthiga225515334910055.31FemaleChennai, India1990-04-29NaN2013-03-032014-02-04 01:32:007.430.03391.0
1RedvelvetDaisuki189760661396390206118.07FemaleManila1995-01-01NaN2012-12-131900-05-13 02:47:006.7880.07094.0
2Damonashu373264519527255983.70MaleDetroit,Michigan1991-08-01NaN2008-02-131900-03-24 12:48:006.156.04936.0
3bskai228342254142511167.16MaleNayarit, Mexico1990-12-14NaN2009-08-312014-05-12 16:35:008.271.010081.0
4shuzzable234778136721622535.48NaNNaNNaNNaN2013-03-252015-09-09 21:54:009.067.02154.0
\n", - "
" - ], - "text/plain": [ - " username user_id user_watching user_completed user_onhold \\\n", - "0 karthiga 2255153 3 49 1 \n", - "1 RedvelvetDaisuki 1897606 61 396 39 \n", - "2 Damonashu 37326 45 195 27 \n", - "3 bskai 228342 25 414 2 \n", - "4 shuzzable 2347781 36 72 16 \n", - "\n", - " user_dropped user_plantowatch user_days_spent_watching gender \\\n", - "0 0 0 55.31 Female \n", - "1 0 206 118.07 Female \n", - "2 25 59 83.70 Male \n", - "3 5 11 167.16 Male \n", - "4 2 25 35.48 NaN \n", - "\n", - " location birth_date access_rank join_date last_online \\\n", - "0 Chennai, India 1990-04-29 NaN 2013-03-03 2014-02-04 01:32:00 \n", - "1 Manila 1995-01-01 NaN 2012-12-13 1900-05-13 02:47:00 \n", - "2 Detroit,Michigan 1991-08-01 NaN 2008-02-13 1900-03-24 12:48:00 \n", - "3 Nayarit, Mexico 1990-12-14 NaN 2009-08-31 2014-05-12 16:35:00 \n", - "4 NaN NaN NaN 2013-03-25 2015-09-09 21:54:00 \n", - "\n", - " stats_mean_score stats_rewatched stats_episodes \n", - "0 7.43 0.0 3391.0 \n", - "1 6.78 80.0 7094.0 \n", - "2 6.15 6.0 4936.0 \n", - "3 8.27 1.0 10081.0 \n", - "4 9.06 7.0 2154.0 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "userlist = pd.read_csv('UserList.csv')\n", - "userlist.head()" - ] - }, - { - "cell_type": "markdown", - "id": "b40fe8d8-2b08-41fc-bc32-61566cce8976", - "metadata": { - "tags": [] - }, - "source": [ - "Description of the dataset, as available on Kaggle, is as follows.\n", - "\n", - "\n", - "> **username** : user name\n", - "> **user_id** : ID for each user \n", - "> **user_watching** : how many anime currently the user is watching \n", - "> **user_completed** : how many anime watched by the user \n", - "> **user_onhold** : how many anime is watching halfway \n", - "> **user_dropped** : how many anime the user remove from his list \n", - "> **user_plantowatch** : how many anime the user added to his watch list \n", - "> **user_days_spent_watching** : How much time the user spend on watching anime \n", - "> **gender** : user gender \n", - "> **location** : where is the user from \n", - "> **birth_date** : user age \n", - "> **access_rank** : ??\n", - "> **join_date** : when the user join the community \n", - "> **last_online** : when is user last seen \n", - "> **stats_mean_score** : average score the user rate for the anime \n", - "> **stats_rewatched** : how many episode the user rewatch\n", - "> **stats_episodes** : how many episode the user completed\n", - "---\n", - "\n", - "Check the vital statistics of the dataset using the `type` and `shape` attributes." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "5f724fbe-04ca-4a41-bc73-75e7642da727", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data type : \n", - "Data dims : (302675, 17)\n" - ] - } - ], - "source": [ - "print(\"Data type : \", type(userlist))\n", - "print(\"Data dims : \", userlist.shape)" - ] - }, - { - "cell_type": "markdown", - "id": "4e6062d4-9a57-436a-a088-0a83c1ae20b7", - "metadata": {}, - "source": [ - "Check the variables (and their types) in the dataset using the `dtypes` attribute." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "a9606662-0f99-4e0b-82a8-e6b377b556d0", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 302675 entries, 0 to 302674\n", - "Data columns (total 17 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 username 302674 non-null object \n", - " 1 user_id 302675 non-null int64 \n", - " 2 user_watching 302675 non-null int64 \n", - " 3 user_completed 302675 non-null int64 \n", - " 4 user_onhold 302675 non-null int64 \n", - " 5 user_dropped 302675 non-null int64 \n", - " 6 user_plantowatch 302675 non-null int64 \n", - " 7 user_days_spent_watching 302675 non-null float64\n", - " 8 gender 217800 non-null object \n", - " 9 location 156773 non-null object \n", - " 10 birth_date 168749 non-null object \n", - " 11 access_rank 0 non-null float64\n", - " 12 join_date 302546 non-null object \n", - " 13 last_online 302546 non-null object \n", - " 14 stats_mean_score 302546 non-null float64\n", - " 15 stats_rewatched 302546 non-null float64\n", - " 16 stats_episodes 302546 non-null float64\n", - "dtypes: float64(5), int64(6), object(6)\n", - "memory usage: 39.3+ MB\n" - ] - } - ], - "source": [ - "userlist.info()" - ] - }, - { - "cell_type": "markdown", - "id": "9551ce66-7302-432d-abbe-5258857cf2a5", - "metadata": { - "tags": [] - }, - "source": [ - "---\n", - "\n", - "### Import the Dataset (AnimeList)\n", - "\n", - "The dataset is in CSV format; hence we use the `read_csv` function from Pandas. \n", - "Immediately after importing, take a quick look at the data using the `head` function." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "012c1bc0-c7fe-4e5e-b698-23d5f45f9ca2", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
anime_idtitletitle_englishtitle_japanesetitle_synonymsimage_urltypesourceepisodesstatus...backgroundpremieredbroadcastrelatedproducerlicensorstudiogenreopening_themeending_theme
011013Inu x Boku SSInu X Boku Secret Service妖狐×僕SSYouko x Boku SShttps://myanimelist.cdn-dena.com/images/anime/...TVManga12Finished Airing...Inu x Boku SS was licensed by Sentai Filmworks...Winter 2012Fridays at Unknown{'Adaptation': [{'mal_id': 17207, 'type': 'man...Aniplex, Square Enix, Mainichi Broadcasting Sy...Sentai FilmworksDavid ProductionComedy, Supernatural, Romance, Shounen['\"Nirvana\" by MUCC']['#1: \"Nirvana\" by MUCC (eps 1, 11-12)', '#2: ...
12104Seto no HanayomeMy Bride is a Mermaid瀬戸の花嫁The Inland Sea Bridehttps://myanimelist.cdn-dena.com/images/anime/...TVManga26Finished Airing...NaNSpring 2007Unknown{'Adaptation': [{'mal_id': 759, 'type': 'manga...TV Tokyo, AIC, Square Enix, SotsuFunimationGonzoComedy, Parody, Romance, School, Shounen['\"Romantic summer\" by SUN&LUNAR']['#1: \"Ashita e no Hikari (明日への光)\" by Asuka Hi...
25262Shugo Chara!! DokiShugo Chara!! Dokiしゅごキャラ!!どきっShugo Chara Ninenme, Shugo Chara! Second Yearhttps://myanimelist.cdn-dena.com/images/anime/...TVManga51Finished Airing...NaNFall 2008Unknown{'Adaptation': [{'mal_id': 101, 'type': 'manga...TV Tokyo, SotsuNaNSatelightComedy, Magic, School, Shoujo['#1: \"Minna no Tamago (みんなのたまご)\" by Shugo Cha...['#1: \"Rottara Rottara (ロッタラ ロッタラ)\" by Buono! ...
3721Princess TutuPrincess TutuプリンセスチュチュNaNhttps://myanimelist.cdn-dena.com/images/anime/...TVOriginal38Finished Airing...Princess Tutu aired in two parts. The first pa...Summer 2002Fridays at Unknown{'Adaptation': [{'mal_id': 1581, 'type': 'mang...Memory-Tech, GANSIS, Marvelous AQLADV FilmsHal Film MakerComedy, Drama, Magic, Romance, Fantasy['\"Morning Grace\" by Ritsuko Okazaki']['\"Watashi No Ai Wa Chiisaikeredo\" by Ritsuko ...
412365Bakuman. 3rd SeasonBakuman.バクマン。Bakuman Season 3https://myanimelist.cdn-dena.com/images/anime/...TVManga25Finished Airing...NaNFall 2012Unknown{'Adaptation': [{'mal_id': 9711, 'type': 'mang...NHK, ShueishaNaNJ.C.StaffComedy, Drama, Romance, Shounen['#1: \"Moshimo no Hanashi (もしもの話)\" by nano.RIP...['#1: \"Pride on Everyday\" by Sphere (eps 1-13)...
\n", - "

5 rows × 31 columns

\n", - "
" - ], - "text/plain": [ - " anime_id title title_english title_japanese \\\n", - "0 11013 Inu x Boku SS Inu X Boku Secret Service 妖狐×僕SS \n", - "1 2104 Seto no Hanayome My Bride is a Mermaid 瀬戸の花嫁 \n", - "2 5262 Shugo Chara!! Doki Shugo Chara!! Doki しゅごキャラ!!どきっ \n", - "3 721 Princess Tutu Princess Tutu プリンセスチュチュ \n", - "4 12365 Bakuman. 3rd Season Bakuman. バクマン。 \n", - "\n", - " title_synonyms \\\n", - "0 Youko x Boku SS \n", - "1 The Inland Sea Bride \n", - "2 Shugo Chara Ninenme, Shugo Chara! Second Year \n", - "3 NaN \n", - "4 Bakuman Season 3 \n", - "\n", - " image_url type source episodes \\\n", - "0 https://myanimelist.cdn-dena.com/images/anime/... TV Manga 12 \n", - "1 https://myanimelist.cdn-dena.com/images/anime/... TV Manga 26 \n", - "2 https://myanimelist.cdn-dena.com/images/anime/... TV Manga 51 \n", - "3 https://myanimelist.cdn-dena.com/images/anime/... TV Original 38 \n", - "4 https://myanimelist.cdn-dena.com/images/anime/... TV Manga 25 \n", - "\n", - " status ... background \\\n", - "0 Finished Airing ... Inu x Boku SS was licensed by Sentai Filmworks... \n", - "1 Finished Airing ... NaN \n", - "2 Finished Airing ... NaN \n", - "3 Finished Airing ... Princess Tutu aired in two parts. The first pa... \n", - "4 Finished Airing ... NaN \n", - "\n", - " premiered broadcast \\\n", - "0 Winter 2012 Fridays at Unknown \n", - "1 Spring 2007 Unknown \n", - "2 Fall 2008 Unknown \n", - "3 Summer 2002 Fridays at Unknown \n", - "4 Fall 2012 Unknown \n", - "\n", - " related \\\n", - "0 {'Adaptation': [{'mal_id': 17207, 'type': 'man... \n", - "1 {'Adaptation': [{'mal_id': 759, 'type': 'manga... \n", - "2 {'Adaptation': [{'mal_id': 101, 'type': 'manga... \n", - "3 {'Adaptation': [{'mal_id': 1581, 'type': 'mang... \n", - "4 {'Adaptation': [{'mal_id': 9711, 'type': 'mang... \n", - "\n", - " producer licensor \\\n", - "0 Aniplex, Square Enix, Mainichi Broadcasting Sy... Sentai Filmworks \n", - "1 TV Tokyo, AIC, Square Enix, Sotsu Funimation \n", - "2 TV Tokyo, Sotsu NaN \n", - "3 Memory-Tech, GANSIS, Marvelous AQL ADV Films \n", - "4 NHK, Shueisha NaN \n", - "\n", - " studio genre \\\n", - "0 David Production Comedy, Supernatural, Romance, Shounen \n", - "1 Gonzo Comedy, Parody, Romance, School, Shounen \n", - "2 Satelight Comedy, Magic, School, Shoujo \n", - "3 Hal Film Maker Comedy, Drama, Magic, Romance, Fantasy \n", - "4 J.C.Staff Comedy, Drama, Romance, Shounen \n", - "\n", - " opening_theme \\\n", - "0 ['\"Nirvana\" by MUCC'] \n", - "1 ['\"Romantic summer\" by SUN&LUNAR'] \n", - "2 ['#1: \"Minna no Tamago (みんなのたまご)\" by Shugo Cha... \n", - "3 ['\"Morning Grace\" by Ritsuko Okazaki'] \n", - "4 ['#1: \"Moshimo no Hanashi (もしもの話)\" by nano.RIP... \n", - "\n", - " ending_theme \n", - "0 ['#1: \"Nirvana\" by MUCC (eps 1, 11-12)', '#2: ... \n", - "1 ['#1: \"Ashita e no Hikari (明日への光)\" by Asuka Hi... \n", - "2 ['#1: \"Rottara Rottara (ロッタラ ロッタラ)\" by Buono! ... \n", - "3 ['\"Watashi No Ai Wa Chiisaikeredo\" by Ritsuko ... \n", - "4 ['#1: \"Pride on Everyday\" by Sphere (eps 1-13)... \n", - "\n", - "[5 rows x 31 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "animelist = pd.read_csv('AnimeList.csv')\n", - "animelist.head()\n" - ] - }, - { - "cell_type": "markdown", - "id": "7d76b827-dbdf-46fc-b715-ab9b6f277271", - "metadata": { - "tags": [] - }, - "source": [ - "Description of the dataset, as available on Kaggle, is as follows.\n", - "\n", - "\n", - "> **anime_id** : ID for each anime show \n", - "> **title** : Anime title \n", - "> **title_english** : Anime title in english \n", - "> **title_japanese** : Anime title in japanese \n", - "> **image_url** : Front poster \n", - "> **type** : Anime types (TV, Movie, etc) \n", - "> **source** : Anime source (Manga, Original) \n", - "> **episodes** : How many episodes \n", - "> **status** : Current status (airing, finieshed airinig) \n", - "> **airing** : Is it currently airing \n", - "> **aired_string** : Start date and finished date \n", - "> **aired** : Start date and finished date in java \n", - "> **duration** : How long is the anime(episode or movie) \n", - "> **rating** : Anime rating (pg13, NC16, M18, R21) \n", - "> **score** : Overall score of the anime (out of 10) \n", - "> **scored_by** : How many user give the score to the anime \n", - "> **rank** : Rank base on the score of the anime \n", - "> **popularity** : Rank base on how many people watch the anime \n", - "> **members** : How many people watch the anime \n", - "> **favorites** : How many people favorite the anime \n", - "> **background** : Background of the anime \n", - "> **premiered** : Which season the anime come out \n", - "> **broadcast** : Which day it broadcast \n", - "> **related** : Are there any sequel or prequel \n", - "> **producer** : Where the anime produce \n", - "> **licensor** : Which film it came from \n", - "> **studio** : which studio animated the anime \n", - "> **genre** : what are the genres in the anime \n", - "> **opening_theme** : opening song \n", - "> **ending_theme** : endinng song \n", - "---\n", - "\n", - "Check the vital statistics of the dataset using the `type` and `shape` attributes." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "553d943b-a42b-41ab-b03c-871f4a870312", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data type : \n", - "Data dims : (14478, 31)\n" - ] - } - ], - "source": [ - "print(\"Data type : \", type(animelist))\n", - "print(\"Data dims : \", animelist.shape)" - ] - }, - { - "cell_type": "markdown", - "id": "4c968766-8cef-425e-af09-e56f57d4b683", - "metadata": {}, - "source": [ - "Check the variables (and their types) in the dataset using the `dtypes` attribute." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "aa025f54-4b39-458a-8f33-6f834492ff8c", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 14478 entries, 0 to 14477\n", - "Data columns (total 31 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 anime_id 14478 non-null int64 \n", - " 1 title 14478 non-null object \n", - " 2 title_english 5724 non-null object \n", - " 3 title_japanese 14443 non-null object \n", - " 4 title_synonyms 8937 non-null object \n", - " 5 image_url 14382 non-null object \n", - " 6 type 14478 non-null object \n", - " 7 source 14478 non-null object \n", - " 8 episodes 14478 non-null int64 \n", - " 9 status 14478 non-null object \n", - " 10 airing 14478 non-null bool \n", - " 11 aired_string 14478 non-null object \n", - " 12 aired 14478 non-null object \n", - " 13 duration 14478 non-null object \n", - " 14 rating 13934 non-null object \n", - " 15 score 14478 non-null float64\n", - " 16 scored_by 14478 non-null int64 \n", - " 17 rank 12904 non-null float64\n", - " 18 popularity 14478 non-null int64 \n", - " 19 members 14478 non-null int64 \n", - " 20 favorites 14478 non-null int64 \n", - " 21 background 1057 non-null object \n", - " 22 premiered 4096 non-null object \n", - " 23 broadcast 4271 non-null object \n", - " 24 related 14478 non-null object \n", - " 25 producer 8288 non-null object \n", - " 26 licensor 3373 non-null object \n", - " 27 studio 8544 non-null object \n", - " 28 genre 14414 non-null object \n", - " 29 opening_theme 14478 non-null object \n", - " 30 ending_theme 14478 non-null object \n", - "dtypes: bool(1), float64(2), int64(6), object(22)\n", - "memory usage: 3.3+ MB\n" - ] - } - ], - "source": [ - "animelist.info()\n" - ] - }, - { - "cell_type": "markdown", - "id": "6aeb2dc1-39e4-4075-aa3a-cef5767bec69", - "metadata": { - "tags": [] - }, - "source": [ - "## Clean Data (AnimeList)" - ] - }, - { - "cell_type": "markdown", - "id": "6b06f41b-24e9-4674-b638-c1cd9b328f5f", - "metadata": {}, - "source": [ - "how might we (action) for (target audiences) in order to (outcome, what are the result we would like to see)\n", - "\n", - "e.g. \n", - "how might we recommend the top 20 anime shows for anime beginner?\n", - "how might we recommend the top 10 anime shows in winter season for anime user?" - ] - }, - { - "cell_type": "markdown", - "id": "2fa0c77b-5a83-47b7-9abc-1ab69c698e0c", - "metadata": { - "tags": [] - }, - "source": [ - "### Gathering information \n", - "---\n", - "\n", - "> Describe numeric \n", - "> Desccirbe object \n", - "> Display columns " - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "9817e6bc-0a70-431c-8567-c53f73c7d8e3", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
anime_idepisodesscorescored_byrankpopularitymembersfavorites
count14478.00000014478.00000014478.0000001.447800e+0412904.00000014478.0000001.447800e+0414478.000000
mean17377.22986611.3083996.1424821.146003e+046439.0654067220.2595662.296640e+04311.649606
std13165.31501143.4434511.4639814.310519e+043720.2276084170.0805647.498136e+042615.554211
min1.0000000.0000000.0000000.000000e+000.0000000.0000000.000000e+000.000000
25%4389.2500001.0000005.5500004.600000e+013216.2500003611.5000002.450000e+020.000000
50%15135.0000001.0000006.3700005.010000e+026441.5000007225.5000001.679500e+032.000000
75%31146.50000012.0000007.0600003.941500e+039664.00000010827.7500001.037900e+0423.000000
max37916.0000001818.00000010.0000001.009477e+0612919.00000014487.0000001.456378e+06106895.000000
\n", - "
" - ], - "text/plain": [ - " anime_id episodes score scored_by rank \\\n", - "count 14478.000000 14478.000000 14478.000000 1.447800e+04 12904.000000 \n", - "mean 17377.229866 11.308399 6.142482 1.146003e+04 6439.065406 \n", - "std 13165.315011 43.443451 1.463981 4.310519e+04 3720.227608 \n", - "min 1.000000 0.000000 0.000000 0.000000e+00 0.000000 \n", - "25% 4389.250000 1.000000 5.550000 4.600000e+01 3216.250000 \n", - "50% 15135.000000 1.000000 6.370000 5.010000e+02 6441.500000 \n", - "75% 31146.500000 12.000000 7.060000 3.941500e+03 9664.000000 \n", - "max 37916.000000 1818.000000 10.000000 1.009477e+06 12919.000000 \n", - "\n", - " popularity members favorites \n", - "count 14478.000000 1.447800e+04 14478.000000 \n", - "mean 7220.259566 2.296640e+04 311.649606 \n", - "std 4170.080564 7.498136e+04 2615.554211 \n", - "min 0.000000 0.000000e+00 0.000000 \n", - "25% 3611.500000 2.450000e+02 0.000000 \n", - "50% 7225.500000 1.679500e+03 2.000000 \n", - "75% 10827.750000 1.037900e+04 23.000000 \n", - "max 14487.000000 1.456378e+06 106895.000000 " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "## for numeric data\n", - "animelist.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "572df98e-95fe-4148-adcf-f2f264bcfa21", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titletitle_englishtitle_japanesetitle_synonymsimage_urltypesourcestatusaired_stringaired...backgroundpremieredbroadcastrelatedproducerlicensorstudiogenreopening_themeending_theme
count144785724144438937143821447814478144781447814478...10574096427114478828833738544144141447814478
unique144775606137018575143827163100269649...103822144194203221193778454443285458
topHinamatsuriCyborg 009ゲゲゲの鬼太郎Minna no Utahttps://myanimelist.cdn-dena.com/images/anime/...TVUnknownFinished AiringNot available{'from': None, 'to': None}...Includes claymation short which was shown befo...Spring 2017Unknown[]NHKFunimationToei AnimationHentai[][]
freq246189142714210137912231691...5802241451542772672586897848807
\n", - "

4 rows × 22 columns

\n", - "
" - ], - "text/plain": [ - " title title_english title_japanese title_synonyms \\\n", - "count 14478 5724 14443 8937 \n", - "unique 14477 5606 13701 8575 \n", - "top Hinamatsuri Cyborg 009 ゲゲゲの鬼太郎 Minna no Uta \n", - "freq 2 4 6 189 \n", - "\n", - " image_url type source \\\n", - "count 14382 14478 14478 \n", - "unique 14382 7 16 \n", - "top https://myanimelist.cdn-dena.com/images/anime/... TV Unknown \n", - "freq 1 4271 4210 \n", - "\n", - " status aired_string aired ... \\\n", - "count 14478 14478 14478 ... \n", - "unique 3 10026 9649 ... \n", - "top Finished Airing Not available {'from': None, 'to': None} ... \n", - "freq 13791 223 1691 ... \n", - "\n", - " background premiered \\\n", - "count 1057 4096 \n", - "unique 1038 221 \n", - "top Includes claymation short which was shown befo... Spring 2017 \n", - "freq 5 80 \n", - "\n", - " broadcast related producer licensor studio genre \\\n", - "count 4271 14478 8288 3373 8544 14414 \n", - "unique 441 9420 3221 193 778 4544 \n", - "top Unknown [] NHK Funimation Toei Animation Hentai \n", - "freq 2241 4515 427 726 725 868 \n", - "\n", - " opening_theme ending_theme \n", - "count 14478 14478 \n", - "unique 4328 5458 \n", - "top [] [] \n", - "freq 9784 8807 \n", - "\n", - "[4 rows x 22 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "## for data that is object\n", - "animelist.describe(include=object)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "121330ee-d633-4996-90be-0e8ae2956abd", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['anime_id', 'title', 'title_english', 'title_japanese',\n", - " 'title_synonyms', 'image_url', 'type', 'source', 'episodes', 'status',\n", - " 'airing', 'aired_string', 'aired', 'duration', 'rating', 'score',\n", - " 'scored_by', 'rank', 'popularity', 'members', 'favorites', 'background',\n", - " 'premiered', 'broadcast', 'related', 'producer', 'licensor', 'studio',\n", - " 'genre', 'opening_theme', 'ending_theme'],\n", - " dtype='object')" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "## what are the columns involved in the dataset\n", - "animelist.columns" - ] - }, - { - "cell_type": "markdown", - "id": "fb2ab278-8890-4394-be44-25cfb2a42055", - "metadata": { - "tags": [] - }, - "source": [ - "---\n", - "### Premiered \n", - "\n", - "> Convert Null value to binary indicator (1 or 0)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "c21ea1b8-02fe-4d1f-bff7-80346b4b6b21", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "## creates a new column \"isPremiered\" that contains 1 for rows where the \"premiered\" column is null and 0 for rows where it is not null. \n", - "##This new column acts as a binary indicator, showing whether an anime has a premiere date or not.\n", - "animelist[\"isPremiered\"] = animelist[\"premiered\"].isnull().astype(int)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "29af9569-8be7-456e-9268-a64320a0e205", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 14478 entries, 0 to 14477\n", - "Series name: isPremiered\n", - "Non-Null Count Dtype\n", - "-------------- -----\n", - "14478 non-null int32\n", - "dtypes: int32(1)\n", - "memory usage: 56.7 KB\n" - ] - } - ], - "source": [ - "animelist.isPremiered.info()" - ] - }, - { - "cell_type": "markdown", - "id": "252f25bc-9dec-48e2-887e-5dfa77648716", - "metadata": { - "tags": [] - }, - "source": [ - "### Studio filtering\n", - "Filter the studio that is less popular (<40) and combine into one \"SmallStudio\"" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "4f148b2d-cc2d-41f0-a638-5a91c19f0c40", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "studio\n", - "unknown 5934\n", - "Toei Animation 725\n", - "Sunrise 447\n", - "J.C.Staff 314\n", - "Madhouse 311\n", - " ... \n", - "Studio Junio, Annapuru 1\n", - "Tokyo Media Connections 1\n", - "Gainax, Tatsunoko Production 1\n", - "Fanworks, Imagineer 1\n", - "33 Collective 1\n", - "Name: count, Length: 779, dtype: int64" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "## calculate all the value from each studio\n", - "\n", - "## studio that is empty, replace with unknown\n", - "animelist[\"studio\"] = animelist[\"studio\"].fillna(\"unknown\")\n", - "studio_counts = animelist.studio.value_counts()\n", - "studio_counts" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "36ed0794-7ddb-4e31-a793-d8aaa680c01a", - "metadata": { - "scrolled": true, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "['Eiken',\n", - " 'Group TAC',\n", - " 'TNK',\n", - " 'Artland',\n", - " 'SynergySP',\n", - " '8bit',\n", - " 'Wit Studio',\n", - " 'Actas',\n", - " 'Manglobe',\n", - " 'Haoliners Animation League',\n", - " 'Ajia-Do',\n", - " 'MAPPA',\n", - " 'Studio Comet',\n", - " 'White Fox',\n", - " 'Mushi Production',\n", - " 'Studio Gokumi',\n", - " 'Hal Film Maker',\n", - " 'Tezuka Productions',\n", - " 'A.C.G.T.',\n", - " 'Asahi Production',\n", - " 'TYO Animations',\n", - " 'Gathering',\n", - " 'Tokyo Movie Shinsha',\n", - " 'Daume',\n", - " 'Kinema Citrus',\n", - " 'Polygon Pictures',\n", - " 'Nomad',\n", - " 'AIC A.S.T.A.',\n", - " 'T-Rex',\n", - " 'LIDENFILMS',\n", - " 'Magic Bus',\n", - " 'Studio Jam',\n", - " 'Bee Train',\n", - " 'GoHands',\n", - " 'Production IMS',\n", - " 'Trigger',\n", - " 'David Production',\n", - " 'Bandai Namco Pictures',\n", - " 'Telecom Animation Film',\n", - " 'Seven Arcs',\n", - " 'Office Takeout',\n", - " 'Asread',\n", - " 'Studio Fantasia',\n", - " 'Studio PuYUKAI',\n", - " 'RG Animation Studios',\n", - " 'dwarf',\n", - " 'AIC Plus+',\n", - " 'Seven Arcs Pictures',\n", - " 'Fanworks',\n", - " 'APPP',\n", - " 'Hoods Entertainment',\n", - " 'AT-2',\n", - " 'Sparkly Key Animation Studio',\n", - " 'Production I.G, Xebec',\n", - " 'Millepensee',\n", - " 'Y.O.U.C',\n", - " 'Shuka',\n", - " 'Flavors Soft',\n", - " 'Creators in Pack',\n", - " 'Animate Film',\n", - " 'ILCA',\n", - " 'Studio 9 MAiami',\n", - " 'Triangle Staff',\n", - " 'Radix',\n", - " 'Collaboration Works',\n", - " 'Suzuki Mirano',\n", - " 'CoMix Wave Films',\n", - " 'The Answer Studio',\n", - " 'Kanaban Graphics',\n", - " 'SANZIGEN',\n", - " 'EMT²',\n", - " 'Yumeta Company',\n", - " 'SANZIGEN, LIDENFILMS',\n", - " 'Passione',\n", - " 'Khara',\n", - " 'NAZ',\n", - " 'Bridge',\n", - " 'AIC Build',\n", - " 'Signal. MD',\n", - " 'Shanghai Foch Film Culture Investment',\n", - " 'Lay-duce',\n", - " 'Pie in The Sky',\n", - " 'Pierrot Plus',\n", - " 'G.CMay Animation & Film',\n", - " 'Shirogumi',\n", - " 'E&G Films',\n", - " 'G-Lam',\n", - " 'Pastel',\n", - " 'Studio 3Hz',\n", - " 'C-Station',\n", - " 'Studio Colorido',\n", - " 'Studio Eromatick',\n", - " 'Onionskin',\n", - " 'Phoenix Entertainment',\n", - " 'Silver Link., Connect',\n", - " 'Knack Productions',\n", - " 'M.S.C',\n", - " 'Digital Frontier',\n", - " 'Minakata Laboratory',\n", - " 'Fukushima Gainax',\n", - " 'Tokyo Kids',\n", - " 'Next Media Animation',\n", - " 'Xebec, OLM',\n", - " 'Yaoyorozu',\n", - " 'Trans Arts',\n", - " 'Project No.9',\n", - " 'Daewon Media',\n", - " 'Ashi Production',\n", - " 'feel., Zexcs',\n", - " 'Marza Animation Planet',\n", - " 'Pine Jam',\n", - " 'Studio 1st',\n", - " 'AIC Spirits',\n", - " 'Tatsunoko Production, Dongwoo A&E',\n", - " 'Palm Studio',\n", - " 'Graphinica',\n", - " 'Gakken Eigakyoku',\n", - " 'Telescreen BV',\n", - " 'Zero-G',\n", - " 'Gainax, Shaft',\n", - " 'BreakBottle',\n", - " 'PrimeTime',\n", - " 'Vega Entertainment',\n", - " 'Shogakukan Music & Digital Entertainment',\n", - " 'Shanghai Animation Film Studio',\n", - " 'Blue Cat',\n", - " 'Encourage Films',\n", - " 'Opera House',\n", - " 'Studio Rikka',\n", - " 'WAO World',\n", - " 'Pops Inc.',\n", - " 'C2C',\n", - " 'Satelight, A-1 Pictures',\n", - " 'AIC Classic',\n", - " 'pH Studio',\n", - " 'Ishikawa Pro',\n", - " 'Zero-G Room',\n", - " 'Usagi Ou',\n", - " 'AIC, Artmic',\n", - " 'Kamikaze Douga',\n", - " 'TYPHOON GRAPHICS',\n", - " 'Hoods Drifters Studio',\n", - " 'Office Take Off',\n", - " 'TROYCA',\n", - " 'Sunrise, Toei Animation',\n", - " 'Rising Force',\n", - " 'Shimogumi',\n", - " 'Shochiku Animation Institute',\n", - " '10Gauge',\n", - " 'Queen Bee',\n", - " 'AIC, Artmic, Animate Film',\n", - " 'Jinnis Animation Studios',\n", - " 'Milky Cartoon',\n", - " 'Imagin, Studio Live',\n", - " 'Egg',\n", - " 'Studio Deen, DAX Production',\n", - " 'Craftar',\n", - " 'Geno Studio',\n", - " 'Science SARU',\n", - " 'Artmic',\n", - " 'Millepensee, GEMBA',\n", - " 'L²Studio',\n", - " 'Planet',\n", - " 'Oh! Production',\n", - " 'Production I.G, Madhouse',\n", - " 'Xebec Zwei',\n", - " 'Production Reed, Asahi Production',\n", - " 'Studio Gokumi, AXsiZ',\n", - " 'OLM Digital',\n", - " 'Studio Gallop, Studio Deen',\n", - " 'Charaction',\n", - " 'Xebec, Issen',\n", - " 'Telecom Animation Film, Graphinica',\n", - " 'Ordet',\n", - " 'Studio Animal',\n", - " 'W-Toon Studio',\n", - " 'Nexus',\n", - " 'Artland, TNK',\n", - " 'Ascension',\n", - " 'Picture Magic',\n", - " 'Production I.G, M.S.C',\n", - " 'Chaos Project',\n", - " 'Gathering, Lesprit',\n", - " 'JCF',\n", - " 'TAKI Corporation',\n", - " 'Studio Egg',\n", - " 'Studio Pierrot, Pierrot Plus',\n", - " 'Studio Junio',\n", - " '2:10 Animation',\n", - " 'Madhouse, TMS Entertainment',\n", - " 'ChuChu',\n", - " 'Studio Matrix',\n", - " 'Vasoon Animation',\n", - " 'Topcraft',\n", - " 'Dongwoo A&E',\n", - " 'Trinet Entertainment, Picture Magic',\n", - " 'Steve N' Steven',\n", - " 'Studio A-CAT',\n", - " 'Ordet, LIDENFILMS',\n", - " 'Toei Animation, Studio World',\n", - " 'Yamamura Animation, Inc.',\n", - " 'Brain's Base, Marvy Jack',\n", - " 'Lilix',\n", - " 'Tsuchida Productions',\n", - " 'Gonzo, Asread',\n", - " 'Mili Pictures',\n", - " 'Bouncy',\n", - " 'A-1 Pictures, Ordet',\n", - " 'Madhouse, MAPPA',\n", - " 'Anime Antenna Iinkai',\n", - " 'Platinum Vision',\n", - " 'Majin',\n", - " 'A-1 Pictures, TROYCA',\n", - " 'Image House',\n", - " 'Life Work',\n", - " 'W-Toon Studio, DMM.futureworks',\n", - " 'Venet',\n", - " 'CG Year',\n", - " 'Oddjob',\n", - " 'HS Pictures Studio',\n", - " 'Agent 21',\n", - " 'NUT',\n", - " 'Creators Dot Com',\n", - " 'Studio Chizu',\n", - " 'OLM Digital, Signal. MD',\n", - " 'Sugar Boy, Blue Cat',\n", - " 'Remic',\n", - " 'Kenji Studio',\n", - " 'J.C.Staff, A.C.G.T.',\n", - " 'Gainax, Production I.G',\n", - " 'Barnum Studio, Project No.9',\n", - " 'Production I.G, Production Reed',\n", - " 'Tatsunoko Production, SynergySP',\n", - " 'Studio Sign',\n", - " 'A-Real',\n", - " 'Pollyanna Graphics',\n", - " 'Bones, Kinema Citrus',\n", - " 'Echoes',\n", - " 'Kyotoma',\n", - " 'Square Enix',\n", - " 'Tomovies',\n", - " 'Madhouse, Toei Animation',\n", - " 'Satelight, 8bit',\n", - " 'Satelight, ixtl',\n", - " 'Natural High',\n", - " 'Hotline',\n", - " 'Team YokkyuFuman',\n", - " 'An DerCen',\n", - " 'ACC Production',\n", - " 'Sunrise, Studio Deen',\n", - " 'Studio Blanc',\n", - " 'TUBA',\n", - " 'Jumondo',\n", - " 'Shirogumi, Encourage Films',\n", - " 'Oxybot',\n", - " 'Tomason',\n", - " 'Sanrio',\n", - " 'Sotsu',\n", - " 'Studio Fantasia, Animate Film',\n", - " 'Eiken, Studio Live',\n", - " 'Puzzle Animation Studio Limited',\n", - " 'Joker Films',\n", - " 'MMDGP',\n", - " 'Karaku',\n", - " 'Qualia Animation',\n", - " 'Trinet Entertainment',\n", - " 'Schoolzone',\n", - " 'Tatsunoko Production, Ordet',\n", - " 'domerica',\n", - " 'Thundray',\n", - " 'Toei Animation, Production Reed',\n", - " 'Kyoto Animation, Animation Do',\n", - " 'Calf Studio',\n", - " 'D & D Pictures',\n", - " 'Hoods Entertainment, teamKG',\n", - " 'Telecom Animation Film, Shirogumi',\n", - " 'DR Movie',\n", - " 'Kyoto Animation, Tatsunoko Production',\n", - " 'Triple X',\n", - " 'Actas, Studio 3Hz',\n", - " 'Studio Ghibli, Studio Hibari',\n", - " 'Sunrise, Bandai Namco Pictures',\n", - " 'Arms, TNK',\n", - " 'Himajin Planning',\n", - " 'Pink Pineapple',\n", - " 'Ordet, Millepensee',\n", - " 'Tatsunoko Production, Studio World',\n", - " 'Light Chaser Animation Studios',\n", - " 'Actas, SynergySP',\n", - " 'Digital Media Lab',\n", - " 'Toei Video',\n", - " 'Orange',\n", - " 'Madhouse, Studio 4°C',\n", - " 'drop',\n", - " 'Gainax, feel.',\n", - " 'Production I.G, Xebec, OLM',\n", - " 'WAO World, MooGoo',\n", - " 'KOO-KI',\n", - " 'Circle Tribute',\n", - " 'SELFISH',\n", - " 'LandQ studios',\n", - " 'Gainax, J.C.Staff',\n", - " 'Imagin',\n", - " 'Studio Binzo',\n", - " 'Haoliners Animation League, Pb Animation Co. Ltd.',\n", - " 'Sting Ray',\n", - " 'Gainax, Asahi Production',\n", - " 'Studio Rikka, Purple Cow Studio Japan',\n", - " 'TMS Entertainment, Telecom Animation Film',\n", - " 'Radix, Marine Entertainment',\n", - " 'Moss Design Unit',\n", - " 'Production I.G, Trans Arts',\n", - " 'KAGAYA Studio',\n", - " 'Haoliners Animation League, Studio LAN',\n", - " 'Kokusai Eigasha',\n", - " 'GARDEN LODGE',\n", - " 'Kinema Citrus, EMT²',\n", - " 'Tezuka Productions, MAPPA',\n", - " 'Studio Comet, KeyEast, REALTHING',\n", - " 'Anpro',\n", - " 'Madhouse, Imagin',\n", - " 'Tengu Kobo',\n", - " 'Studio Zero',\n", - " 'Satelight, C2C',\n", - " 'Kinema Citrus, Orange',\n", - " 'J.C.Staff, Artland',\n", - " 'Visual 80',\n", - " 'Studio Live',\n", - " 'Plum',\n", - " 'J.C.Staff, Egg Firm',\n", - " 'Three-d',\n", - " 'MAPPA, Studio VOLN',\n", - " 'Kazami Gakuen Koushiki Douga-bu',\n", - " 'Gonzo, Production I.G',\n", - " 'Robot Communications',\n", - " 'Studio! Cucuri',\n", - " 'Madhouse, Telecom Animation Film',\n", - " 'M2',\n", - " 'PPM',\n", - " 'Artland, Tatsunoko Production',\n", - " 'A-1 Pictures, Bridge',\n", - " 'Studio Flag',\n", - " 'Studio VOLN',\n", - " 'Madhouse, Production Reed',\n", - " 'Neft Film',\n", - " 'production doA',\n", - " 'Wawayu Animation',\n", - " 'Production I.G, Zexcs',\n", - " 'Larx Entertainment',\n", - " 'TMS Entertainment, Jinnis Animation Studios',\n", - " 'Gonzo, AIC',\n", - " 'AIC Frontier',\n", - " 'Bridge, Husio Studio',\n", - " 'Studio Pierrot, Shin-Ei Animation',\n", - " 'Manglobe, Geno Studio',\n", - " 'Ekura Animal',\n", - " 'Production IMS, Orange',\n", - " 'Gonzo, Palm Studio',\n", - " 'SBS TV Production',\n", - " 'Studio Pierrot, D.A.S.T.',\n", - " 'CloverWorks',\n", - " 'Gainax, Magic Bus',\n", - " 'Media Bank',\n", - " 'Madhouse, Tatsunoko Production',\n", - " 'Studio Flad',\n", - " 'Production I.G, OLM, Signal. MD',\n", - " 'Bones, Production Reed',\n", - " 'Studio Kikan',\n", - " 'Karasfilms',\n", - " 'Studio Unicorn, Hiro Media',\n", - " 'OLM, Production Reed',\n", - " 'AIC Spirits, BeSTACK',\n", - " 'MAPPA, Lapin Track',\n", - " 'Xebec, Genco, Triangle Staff',\n", - " 'Studio Pierrot, AIC A.S.T.A.',\n", - " 'TMS Entertainment, Tezuka Productions',\n", - " 'Artland, Nippon Animation',\n", - " 'ixtl, LIDENFILMS',\n", - " 'Tomoyasu Murata Company',\n", - " 'Chippai',\n", - " 'Dynamo Pictures',\n", - " 'Gainax, Toei Animation',\n", - " 'Group TAC, View Works',\n", - " 'NHK',\n", - " 'Shinkuukan',\n", - " 'Daichi Doga, Dongyang Animation',\n", - " 'Odolttogi',\n", - " 'Nice Boat Animation',\n", - " 'EDGE',\n", - " 'Public & Basic, Ripple Film',\n", - " 'TNK, Production Reed',\n", - " 'Actas, TMS Entertainment',\n", - " 'REVOROOT',\n", - " 'DAX Production',\n", - " 'Bibury Animation Studios',\n", - " 'Arms, Studio Kikan',\n", - " 'Actas, Bee Media',\n", - " 'Studio Flag, Studio Bogey',\n", - " 'Kitty Films',\n", - " 'Production I.G, SANZIGEN',\n", - " 'Tama Production',\n", - " 'Lerche, 10Gauge',\n", - " 'Chaos Project, GANSIS',\n", - " 'Seven Arcs, Seven Arcs Pictures',\n", - " 'A-1 Pictures, CloverWorks',\n", - " 'Studio Bogey, Public & Basic',\n", - " 'Artland, Madhouse',\n", - " 'AIC Spirits, Asread',\n", - " 'Barnum Studio, Project No.9, Studio Blanc',\n", - " 'Ajia-Do, Group TAC',\n", - " 'Tatsunoko Production, 10Gauge',\n", - " 'Heewon Entertainment',\n", - " 'AIC, Studio Hakk',\n", - " 'Studio Deen, Wit Studio',\n", - " 'Ajia-Do, Studio Deen',\n", - " 'Big Bang',\n", - " 'Madhouse, DLE',\n", - " 'Anime R, Aubec',\n", - " 'Fuji TV',\n", - " 'Madhouse, Tezuka Productions',\n", - " 'Tsukimidou',\n", - " 'Sparky Animation',\n", - " 'Artland, Magic Bus',\n", - " 'Sunrise, Nakamura Production',\n", - " 'Plum, Magic Bus',\n", - " 'Group TAC, Amuse',\n", - " 'J.C.Staff, Animate Film',\n", - " 'Production I.G, Animate Film',\n", - " 'Darts',\n", - " 'Production I.G, DLE',\n", - " 'Production I.G, NUT, REVOROOT',\n", - " 'Production I.G, Polygon Pictures',\n", - " 'Studio Meditation With a Pencil',\n", - " 'Panda Factory, Studio PuYUKAI',\n", - " 'Bee Train, Production I.G, Madhouse, Studio 4°C',\n", - " 'Picograph',\n", - " 'Tatsunoko Production, SynergySP, Seven',\n", - " 'Romanov Films',\n", - " 'T.P.O',\n", - " 'Shueisha',\n", - " 'AXsiZ',\n", - " 'Madhouse, Studio Deen',\n", - " 'D.A.S.T.',\n", - " 'Khara, Trigger',\n", - " 'Tsuburaya Productions, Trigger',\n", - " 'TriF Studio',\n", - " 'Studio LAN',\n", - " 'G-angle',\n", - " 'Satelight, Production Reed',\n", - " 'Shaft, TNK',\n", - " 'AIC Spirits, Digital Frontier',\n", - " 'Studio Deen, Daume',\n", - " 'Studio Deen, Studio Hibari, Production Reed',\n", - " 'Production I.G, OLM',\n", - " 'Eiken, TYO Animations',\n", - " 'Zexcs, Studio A-CAT',\n", - " 'Asahi Production, Rising Force',\n", - " 'Aubec',\n", - " 'Studio Pierrot, Ajia-Do',\n", - " 'Front Line, Studio G-1Neo',\n", - " 'Studio Korumi',\n", - " 'Studio Gallop, TMS Entertainment',\n", - " 'Animate Film, Studio Kikan',\n", - " 'Yokohama Animation Lab',\n", - " 'Felix Film',\n", - " 'Sunrise, Studio Hibari',\n", - " 'AIC, APPP',\n", - " 'Toei Animation, TMS Entertainment',\n", - " 'Studio Anima',\n", - " 'View Works',\n", - " 'Kamikaze Douga, Nishiki Studio',\n", - " 'J.C.Staff, Nomad',\n", - " 'TMS Entertainment, Studio Comet',\n", - " 'Mook Animation',\n", - " 'OLM, Wit Studio',\n", - " 'Toei Animation, Studio Nue',\n", - " 'J.C.Staff, Tatsunoko Production',\n", - " 'Bee Media, Code',\n", - " 'Artland, Hoods Entertainment',\n", - " 'Nippon Animation, Production Reed',\n", - " 'AIC, Animate Film',\n", - " 'Gainax, Studio 4°C',\n", - " 'A-1 Pictures, Trigger, CloverWorks',\n", - " 'Tatsunoko Production, Production Reed, Asahi Production',\n", - " 'Panmedia, Meruhensha',\n", - " 'Actas, Brain's Base',\n", - " 'Dwango, LIDENFILMS',\n", - " 'J.C.Staff, Production I.G',\n", - " 'OLM, OLM Digital, Sprite Animation Studios',\n", - " 'Bee Media, 81 Produce',\n", - " 'AIC, Darts',\n", - " 'Bee Train, Xebec',\n", - " 'Sakura Create',\n", - " 'TMS Entertainment, 3xCube',\n", - " 'Ishimori Entertainment',\n", - " 'Studio Core',\n", - " 'Robot Communications, Imagica Image Works',\n", - " 'Xebec, AIC',\n", - " 'Kaeruotoko Shokai',\n", - " 'Idea Factory',\n", - " 'J.C.Staff, Production Reed',\n", - " 'KIZAWA Studio',\n", - " 'Doga Kobo, Orange',\n", - " 'BOOTLEG',\n", - " 'Office DCI',\n", - " 'Ripple Film',\n", - " 'Studio UGOKI',\n", - " 'Madhouse, Studio Fantasia',\n", - " 'Nippon Animation, Studio WHO',\n", - " 'PRA',\n", - " 'Tamura Shigeru Studio',\n", - " 'Animaruya',\n", - " 'Bones, Sanrio',\n", - " 'Trinet Entertainment, Studio Hibari',\n", - " 'AIC Spirits, Group TAC',\n", - " 'Mirai Film',\n", - " 'Studio Ponoc',\n", - " 'Namu Animation',\n", - " 'Shirogumi, Shin-Ei Animation',\n", - " 'Coastline Animation Studio',\n", - " 'Cygames',\n", - " 'Madhouse, Group TAC',\n", - " 'Oz',\n", - " 'Emon, Blade',\n", - " 'Milky Animation Label',\n", - " 'Xebec, Asread',\n", - " 'Studio Ghibli, Ajia-Do',\n", - " 'J.C.Staff, Studio Ghibli',\n", - " 'Kyoto Animation, Sunrise',\n", - " 'EKACHI EPILKA',\n", - " 'Imagineer',\n", - " 'Genco, Radix',\n", - " 'TMS Entertainment, DLE',\n", - " 'Studio Take Off',\n", - " 'Studio Pierrot, Arms',\n", - " 'G-Lam, Studio CA',\n", - " 'Sunrise, Production Reed',\n", - " 'Ajia-Do, TMS Entertainment',\n", - " 'TNK, Zero-G',\n", - " 'J.C.Staff, Kitty Films',\n", - " 'Sunrise, Kino Production',\n", - " 'Diomedea, Studio Blanc',\n", - " 'Triangle Staff, Studio Wombat',\n", - " 'Bones, Telecom Animation Film',\n", - " 'Satelight, Encourage Films',\n", - " 'Kyoto Animation, Production I.G, Shin-Ei Animation',\n", - " 'Studio World',\n", - " 'Studio Kyuuma, Studio Kikan, Azeta Pictures',\n", - " 'Tatsunoko Production, CoMix Wave Films, FOREST Hunting One',\n", - " 'Think Corporation',\n", - " 'ufotable, feel., Studio Flag',\n", - " 'Future Planet, Beijing Huihuang Animation Company',\n", - " 'Artland, AIC, Artmic',\n", - " 'Madhouse, Tokyo Movie Shinsha',\n", - " 'Studio Pierrot, Kyoto Animation',\n", - " 'Studio Comet, Zexcs',\n", - " 'Japan Taps',\n", - " 'Ginga Ya',\n", - " 'Brain's Base, Studio A-CAT',\n", - " 'Office AO',\n", - " 'Shirogumi, EMT²',\n", - " 'Tokyo Kids, Minami Machi Bugyousho',\n", - " 'Gathering, Asahi Production',\n", - " 'AIC, Remic',\n", - " 'Studio 4°C, Sunrise',\n", - " 'Ordet, SANZIGEN',\n", - " 'Green Bunny',\n", - " 'Ordet, Encourage Films',\n", - " 'GRIZZLY',\n", - " 'Twilight Studio',\n", - " 'Enoki Films, Dai Nippon Printing',\n", - " 'Steve N' Steven, Rockwell Eyes',\n", - " 'Studio Ghibli, Polygon Pictures',\n", - " 'Khara, CyberConnect2',\n", - " 'Panda Factory',\n", - " 'Plus Heads',\n", - " 'Studio Fantasia, Amber Film Works',\n", - " 'Animate Film, Visual 80',\n", - " 'Marvy Jack',\n", - " 'Anpro, teamKG',\n", - " 'Asahi Production, Shochiku Animation Institute',\n", - " 'Madhouse, Studio Deen, Magic Bus',\n", - " 'Studio Pierrot, Studio Gallop',\n", - " 'Minami Machi Bugyousho',\n", - " 'Xebec, Kanaban Graphics',\n", - " 'Creators in Pack, Namu Animation',\n", - " 'Studio Pierrot, David Production',\n", - " 'Toei Animation, Tatsunoko Production',\n", - " 'OLM, P.A. Works',\n", - " 'Studio Moriken',\n", - " 'Studio OX',\n", - " 'Xebec, Production Reed',\n", - " 'Chiptune',\n", - " 'Shin-Ei Animation, DLE',\n", - " 'Sunrise, Telecom Animation Film',\n", - " 'Studio Deen, Studio Hibari',\n", - " 'Tatsunoko Production, Eiken',\n", - " 'C2C, Lay-duce',\n", - " 'Project No.9, Tri-Slash',\n", - " 'Group TAC, G&G Entertainment',\n", - " 'helo.inc',\n", - " 'LMD',\n", - " 'Toei Animation, Daewon Media',\n", - " 'J.C.Staff, Toei Animation',\n", - " 'Milky Cartoon, LMD',\n", - " 'Hal Film Maker, TYO Animations',\n", - " 'Studio Pierrot, Studio Hibari',\n", - " 'Madhouse, Nexus',\n", - " 'Asura Film',\n", - " 'Picona',\n", - " 'TMS Entertainment, TOCSIS',\n", - " 'Front Line',\n", - " 'Bee Media',\n", - " 'J.C.Staff, SANZIGEN',\n", - " 'Madhouse, TNK',\n", - " 'Will Palette',\n", - " 'Group TAC, Japan Vistec',\n", - " 'HeART-BIT',\n", - " 'TNK, Kinema Citrus',\n", - " 'CoMix Wave Films, Haoliners Animation League',\n", - " 'Tonko House',\n", - " 'Bee Train, Cookie Jar Entertainment',\n", - " 'SOEISHINSHA',\n", - " 'Xebec, Group TAC',\n", - " 'Ordet, Studio Moriken',\n", - " 'Rabbit Machine',\n", - " 'NHK Enterprises',\n", - " 'Studio 4°C, Shirogumi',\n", - " 'Beijing Rocen Digital',\n", - " 'Primastea',\n", - " 'J.C.Staff, Life Work',\n", - " 'Mook DLE',\n", - " 'Tele-Cartoon Japan',\n", - " 'Indeprox',\n", - " 'Studio Pierrot, Studio Deen',\n", - " 'Production I.G, Studio Deen',\n", - " 'Kinema Citrus, White Fox',\n", - " 'OLM, AIC A.S.T.A.',\n", - " 'Nippon Animation, Studio Gallop',\n", - " 'Silver Link., Nexus',\n", - " 'A-Line',\n", - " 'Picture Magic, Rikuentai',\n", - " 'Production I.G, Studio 4°C, Shaft',\n", - " 'Triangle Staff, Animate Film',\n", - " 'G&G Entertainment',\n", - " 'AIC Takarazuka',\n", - " 'Orange, Seven Arcs Pictures',\n", - " 'Buemon',\n", - " 'Kazuki Production',\n", - " 'Gonzo, DandeLion Animation Studio LLC',\n", - " 'Blade',\n", - " 'Toei Animation, Bridge',\n", - " 'CUCURI, Digital Network Animation',\n", - " 'Studio Unicorn',\n", - " 'MooGoo',\n", - " 'Takara Tomy A.R.T.S',\n", - " 'OLM, Shin-Ei Animation',\n", - " 'Arcs Create',\n", - " 'Usagi Ou, Studio Nanahoshi',\n", - " 'Actas, Production Reed',\n", - " 'Azeta Pictures',\n", - " 'Sunrise, Ascension',\n", - " 'Studio Khronos',\n", - " 'Iyasakadou Film',\n", - " 'Studio Z5, Studio March',\n", - " 'Studio G-1Neo',\n", - " 'TOHO animation',\n", - " 'Artmic, Animate Film',\n", - " 'Madhouse, feel.',\n", - " 'Echo',\n", - " 'Radix, Chaos Project',\n", - " 'Bones, Sunrise',\n", - " 'Gainax, Studio Deen',\n", - " 'Orange, Studio 3Hz',\n", - " 'Gainax, Fukushima Gainax',\n", - " 'Madhouse, Studio Gokumi',\n", - " 'iDRAGONS Creative Studio',\n", - " 'Group TAC, Ginga Ya',\n", - " 'October Media',\n", - " 'Boyan Pictures',\n", - " 'Nippon Animation, Xebec',\n", - " 'Piso Studio',\n", - " 'B&T',\n", - " 'Bones, Production I.G, Studio 4°C, Toei Animation',\n", - " 'Gonzo, Satelight',\n", - " 'Ordet, W-Toon Studio',\n", - " 'Fifth Avenue',\n", - " 'Sunwoo Entertainment',\n", - " 'Studio Hibari, Production Reed',\n", - " 'Cyclone Graphics inc',\n", - " 'Automatic Flowers Studio',\n", - " 'AIC, Artmic, Darts',\n", - " 'TYO Animations, LIDENFILMS',\n", - " 'Gainax, Madhouse',\n", - " 'Hoods Entertainment, Production IMS',\n", - " 'pH Studio, D & D Pictures',\n", - " 'CoMix Wave Films, FOREST Hunting One',\n", - " 'Nippon Animation, SynergySP, Shirogumi',\n", - " 'Barnum Studio, Silver Link., Connect',\n", - " 'Gonzo, DLE',\n", - " 'Brain's Base, Platinum Vision',\n", - " 'Japan Vistec',\n", - " 'Studio Gallop, Studio Comet',\n", - " 'Ripromo',\n", - " 'Yamato Works',\n", - " 'Studio Zealot',\n", - " 'Strawberry Meets Pictures',\n", - " 'Kyotoma, Office Nobu',\n", - " 'Creators in Pack, Studio Lings',\n", - " 'Gonzo, Picture Magic',\n", - " 'Asahi Production, Success Co.',\n", - " 'Nippon Animation, Shaft',\n", - " 'Genco, Triangle Staff',\n", - " 'Gonzo, Gainax, Production I.G, Madhouse, Studio 4°C, Satelight, CoMix Wave Films',\n", - " 'Mary Jane',\n", - " 'AIC, BeSTACK',\n", - " 'Project No.9, A-Real',\n", - " 'Studio Ppuri',\n", - " 'Studio Dadashow',\n", - " 'TMS Entertainment, Shin-Ei Animation',\n", - " 'Gonzo, ufotable',\n", - " 'Studio Fantasia, Rabbit Gate',\n", - " 'Sunrise, Dongwoo A&E',\n", - " 'Piko Studio',\n", - " 'Studio Wombat',\n", - " 'Kaname Productions',\n", - " 'Rockwell Eyes',\n", - " 'OLM, OLM Digital',\n", - " 'Madhouse, Satelight, Graphinica',\n", - " 'TMS Entertainment, Tokyo Kids, Minami Machi Bugyousho',\n", - " 'Sunrise, Bandai Visual',\n", - " 'Studio Bogey',\n", - " 'GEN Productions',\n", - " 'Studio Junio, Annapuru',\n", - " 'Tokyo Media Connections',\n", - " 'Gainax, Tatsunoko Production',\n", - " 'Fanworks, Imagineer',\n", - " '33 Collective']" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# group studio less than 40\n", - "minor = studio_counts[studio_counts < 40].index.to_list()\n", - "minor" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "222dcfbf-45f6-40c0-89c6-84a012fc6955", - "metadata": { - "scrolled": true, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "studio\n", - "unknown 5934\n", - "SmallStudio 3028\n", - "Toei Animation 725\n", - "Sunrise 447\n", - "J.C.Staff 314\n", - "Madhouse 311\n", - "Production I.G 251\n", - "TMS Entertainment 248\n", - "Studio Deen 241\n", - "Studio Pierrot 240\n", - "Nippon Animation 202\n", - "OLM 181\n", - "A-1 Pictures 174\n", - "Shin-Ei Animation 151\n", - "DLE 139\n", - "Tatsunoko Production 131\n", - "Shaft 111\n", - "Gonzo 109\n", - "Xebec 109\n", - "Bones 109\n", - "Kyoto Animation 103\n", - "AIC 98\n", - "Brain's Base 80\n", - "Silver Link. 74\n", - "Satelight 71\n", - "Arms 69\n", - "Production Reed 64\n", - "Doga Kobo 63\n", - "Studio 4°C 59\n", - "Gainax 59\n", - "ufotable 58\n", - "Zexcs 57\n", - "Seven 54\n", - "feel. 53\n", - "Kachidoki Studio 50\n", - "Studio Ghibli 48\n", - "Studio Gallop 47\n", - "Diomedea 46\n", - "Studio Hibari 45\n", - "Lerche 42\n", - "PoRO 42\n", - "P.A. Works 41\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "## combine those minor studio to one \"SmallStudio\"\n", - "animelist[\"studio\"] = animelist[\"studio\"].apply(lambda x : \"SmallStudio\" if x in minor else x)\n", - "animelist.studio.value_counts()" - ] - }, - { - "cell_type": "markdown", - "id": "8252d813-1697-4fea-8bcc-27fe59b72505", - "metadata": { - "tags": [] - }, - "source": [ - "### Drop the data that is not important\n", - "TODO: Add back producers\n", - "---\n", - "| | **Unnecessory data** | |\n", - "|:---------------:|:---------------:|:---------------:|\n", - "| anime_id | background | opening_theme |\n", - "| title_english | premiered | ending_theme |\n", - "| title_japanese | boardcast | air_string |\n", - "| title_synonyms | producer | |\n", - "| image_url | lincensor | |" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "6659bc9c-131b-4170-9c77-7255e999f5dc", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "## drop useless dat\n", - "animelist.drop(columns=['anime_id','title_english', 'title_japanese','title_synonyms', 'image_url', 'background',\n", - " 'premiered', 'broadcast','producer','licensor','opening_theme', 'ending_theme','aired_string' ], inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "b3efa3eb-0aec-4e3b-8aec-75245e675554", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(14478, 19)" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "## after dropping the columns \n", - "animelist.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "cb3f140d-af48-4231-b90c-2d6d1c943c9a", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['title', 'type', 'source', 'episodes', 'status', 'airing', 'aired',\n", - " 'duration', 'rating', 'score', 'scored_by', 'rank', 'popularity',\n", - " 'members', 'favorites', 'related', 'studio', 'genre', 'isPremiered'],\n", - " dtype='object')" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "animelist.columns" - ] - }, - { - "cell_type": "markdown", - "id": "ba2b03fa-c8cb-4b9b-a7d9-a74be4e186c7", - "metadata": { - "tags": [] - }, - "source": [ - "### Split aired date (from and to)\n", - "--- \n", - "aired contain { from: yyyy-mm-dd, to: yyyy-mm-dd}\n", - "\n", - "split into:\n", - "aired_from -> yyyy-mm-dd\n", - "aired_to -> yyyy-mm-dd\n", - "\n", - "calculate the number of days for the episode\n", - "calculate the how frequent it aired" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "8e8540f6-e1bb-4ea3-9eec-3e9226ec6285", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " aired_from aired_to\n", - "0 2012-01-13 2012-03-30\n", - "1 2007-04-02 2007-10-01\n", - "2 2008-10-04 2009-09-25\n", - "3 2002-08-16 2003-05-23\n", - "4 2012-10-06 2013-03-30\n", - "... ... ...\n", - "14473 1987-11-05 1988-11-04\n", - "14474 1986-03-21 1986-03-21\n", - "14475 NaN NaN\n", - "14476 NaN NaN\n", - "14477 2010-04-07 2010-04-07\n", - "\n", - "[14478 rows x 2 columns]\n" - ] - } - ], - "source": [ - "# Splitting the 'aired' column into 'from' and 'to' columns\n", - "animelist[['aired_from', 'aired_to']] = animelist['aired'].str.extract(r\"'from': '(.*?)', 'to': '(.*?)'\")\n", - "\n", - "# Displaying the DataFrame with the new columns\n", - "print(animelist[['aired_from', 'aired_to']])" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "889ad54e-ded1-4841-9949-09b118872b0f", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 14478 entries, 0 to 14477\n", - "Data columns (total 21 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 title 14478 non-null object \n", - " 1 type 14478 non-null object \n", - " 2 source 14478 non-null object \n", - " 3 episodes 14478 non-null int64 \n", - " 4 status 14478 non-null object \n", - " 5 airing 14478 non-null bool \n", - " 6 aired 14478 non-null object \n", - " 7 duration 14478 non-null object \n", - " 8 rating 13934 non-null object \n", - " 9 score 14478 non-null float64\n", - " 10 scored_by 14478 non-null int64 \n", - " 11 rank 12904 non-null float64\n", - " 12 popularity 14478 non-null int64 \n", - " 13 members 14478 non-null int64 \n", - " 14 favorites 14478 non-null int64 \n", - " 15 related 14478 non-null object \n", - " 16 studio 14478 non-null object \n", - " 17 genre 14414 non-null object \n", - " 18 isPremiered 14478 non-null int32 \n", - " 19 aired_from 12287 non-null object \n", - " 20 aired_to 12287 non-null object \n", - "dtypes: bool(1), float64(2), int32(1), int64(5), object(12)\n", - "memory usage: 2.2+ MB\n" - ] - } - ], - "source": [ - "animelist.info()" - ] - }, - { - "cell_type": "markdown", - "id": "56204f44-93aa-441a-9032-8c09a8f014f6", - "metadata": { - "tags": [] - }, - "source": [ - "### Split the Genres to columns\n", - "---" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "ded21918-e004-4518-b1a6-5a1f6e0a678e", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "## fill the missing value 'Nan' with 'NA'\n", - "animelist.genre = animelist.genre.fillna(\"NA\")" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "8144eaa7-b876-4390-b6da-12d1256c02bf", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AdventureCarsComedyDementiaDemonsDramaEcchiFantasyGameHarem...ShoujoShounenSlice of LifeSpaceSportsSuper PowerSupernaturalThrillerVampireYaoi
00000000000...0000000000
10000000000...0000000000
20000000000...0000000000
30000010100...0000000000
40000010000...0000000000
..................................................................
144730000000000...0000000000
144740000000000...0000000000
144751000000100...0000000000
144760000000000...0000000000
144770000000000...0000000000
\n", - "

14478 rows × 83 columns

\n", - "
" - ], - "text/plain": [ - " Adventure Cars Comedy Dementia Demons Drama Ecchi \\\n", - "0 0 0 0 0 0 0 0 \n", - "1 0 0 0 0 0 0 0 \n", - "2 0 0 0 0 0 0 0 \n", - "3 0 0 0 0 0 1 0 \n", - "4 0 0 0 0 0 1 0 \n", - "... ... ... ... ... ... ... ... \n", - "14473 0 0 0 0 0 0 0 \n", - "14474 0 0 0 0 0 0 0 \n", - "14475 1 0 0 0 0 0 0 \n", - "14476 0 0 0 0 0 0 0 \n", - "14477 0 0 0 0 0 0 0 \n", - "\n", - " Fantasy Game Harem ... Shoujo Shounen Slice of Life Space \\\n", - "0 0 0 0 ... 0 0 0 0 \n", - "1 0 0 0 ... 0 0 0 0 \n", - "2 0 0 0 ... 0 0 0 0 \n", - "3 1 0 0 ... 0 0 0 0 \n", - "4 0 0 0 ... 0 0 0 0 \n", - "... ... ... ... ... ... ... ... ... \n", - "14473 0 0 0 ... 0 0 0 0 \n", - "14474 0 0 0 ... 0 0 0 0 \n", - "14475 1 0 0 ... 0 0 0 0 \n", - "14476 0 0 0 ... 0 0 0 0 \n", - "14477 0 0 0 ... 0 0 0 0 \n", - "\n", - " Sports Super Power Supernatural Thriller Vampire Yaoi \n", - "0 0 0 0 0 0 0 \n", - "1 0 0 0 0 0 0 \n", - "2 0 0 0 0 0 0 \n", - "3 0 0 0 0 0 0 \n", - "4 0 0 0 0 0 0 \n", - "... ... ... ... ... ... ... \n", - "14473 0 0 0 0 0 0 \n", - "14474 0 0 0 0 0 0 \n", - "14475 0 0 0 0 0 0 \n", - "14476 0 0 0 0 0 0 \n", - "14477 0 0 0 0 0 0 \n", - "\n", - "[14478 rows x 83 columns]" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "## split the genres by the parameter ','\n", - "genre_animelist = animelist['genre'].str.get_dummies(sep=',')\n", - "genre_animelist" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "d4491bb5-83cc-4675-8496-cf17b48f969e", - "metadata": { - "scrolled": true, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 14478 entries, 0 to 14477\n", - "Data columns (total 83 columns):\n", - " # Column Non-Null Count Dtype\n", - "--- ------ -------------- -----\n", - " 0 Adventure 14478 non-null int64\n", - " 1 Cars 14478 non-null int64\n", - " 2 Comedy 14478 non-null int64\n", - " 3 Dementia 14478 non-null int64\n", - " 4 Demons 14478 non-null int64\n", - " 5 Drama 14478 non-null int64\n", - " 6 Ecchi 14478 non-null int64\n", - " 7 Fantasy 14478 non-null int64\n", - " 8 Game 14478 non-null int64\n", - " 9 Harem 14478 non-null int64\n", - " 10 Hentai 14478 non-null int64\n", - " 11 Historical 14478 non-null int64\n", - " 12 Horror 14478 non-null int64\n", - " 13 Josei 14478 non-null int64\n", - " 14 Kids 14478 non-null int64\n", - " 15 Magic 14478 non-null int64\n", - " 16 Martial Arts 14478 non-null int64\n", - " 17 Mecha 14478 non-null int64\n", - " 18 Military 14478 non-null int64\n", - " 19 Music 14478 non-null int64\n", - " 20 Mystery 14478 non-null int64\n", - " 21 Parody 14478 non-null int64\n", - " 22 Police 14478 non-null int64\n", - " 23 Psychological 14478 non-null int64\n", - " 24 Romance 14478 non-null int64\n", - " 25 Samurai 14478 non-null int64\n", - " 26 School 14478 non-null int64\n", - " 27 Sci-Fi 14478 non-null int64\n", - " 28 Seinen 14478 non-null int64\n", - " 29 Shoujo 14478 non-null int64\n", - " 30 Shoujo Ai 14478 non-null int64\n", - " 31 Shounen 14478 non-null int64\n", - " 32 Shounen Ai 14478 non-null int64\n", - " 33 Slice of Life 14478 non-null int64\n", - " 34 Space 14478 non-null int64\n", - " 35 Sports 14478 non-null int64\n", - " 36 Super Power 14478 non-null int64\n", - " 37 Supernatural 14478 non-null int64\n", - " 38 Thriller 14478 non-null int64\n", - " 39 Vampire 14478 non-null int64\n", - " 40 Yaoi 14478 non-null int64\n", - " 41 Yuri 14478 non-null int64\n", - " 42 Action 14478 non-null int64\n", - " 43 Adventure 14478 non-null int64\n", - " 44 Cars 14478 non-null int64\n", - " 45 Comedy 14478 non-null int64\n", - " 46 Dementia 14478 non-null int64\n", - " 47 Demons 14478 non-null int64\n", - " 48 Drama 14478 non-null int64\n", - " 49 Ecchi 14478 non-null int64\n", - " 50 Fantasy 14478 non-null int64\n", - " 51 Game 14478 non-null int64\n", - " 52 Harem 14478 non-null int64\n", - " 53 Hentai 14478 non-null int64\n", - " 54 Historical 14478 non-null int64\n", - " 55 Horror 14478 non-null int64\n", - " 56 Josei 14478 non-null int64\n", - " 57 Kids 14478 non-null int64\n", - " 58 Magic 14478 non-null int64\n", - " 59 Martial Arts 14478 non-null int64\n", - " 60 Mecha 14478 non-null int64\n", - " 61 Military 14478 non-null int64\n", - " 62 Music 14478 non-null int64\n", - " 63 Mystery 14478 non-null int64\n", - " 64 NA 14478 non-null int64\n", - " 65 Parody 14478 non-null int64\n", - " 66 Police 14478 non-null int64\n", - " 67 Psychological 14478 non-null int64\n", - " 68 Romance 14478 non-null int64\n", - " 69 Samurai 14478 non-null int64\n", - " 70 School 14478 non-null int64\n", - " 71 Sci-Fi 14478 non-null int64\n", - " 72 Seinen 14478 non-null int64\n", - " 73 Shoujo 14478 non-null int64\n", - " 74 Shounen 14478 non-null int64\n", - " 75 Slice of Life 14478 non-null int64\n", - " 76 Space 14478 non-null int64\n", - " 77 Sports 14478 non-null int64\n", - " 78 Super Power 14478 non-null int64\n", - " 79 Supernatural 14478 non-null int64\n", - " 80 Thriller 14478 non-null int64\n", - " 81 Vampire 14478 non-null int64\n", - " 82 Yaoi 14478 non-null int64\n", - "dtypes: int64(83)\n", - "memory usage: 9.2 MB\n" - ] - } - ], - "source": [ - "genre_animelist.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "0087eef3-cc59-46a2-86af-39ae8a00ef1b", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titletypesourceepisodesstatusairingaireddurationratingscore...ShoujoShounenSlice of LifeSpaceSportsSuper PowerSupernaturalThrillerVampireYaoi
0Inu x Boku SSTVManga12Finished AiringFalse{'from': '2012-01-13', 'to': '2012-03-30'}24 min. per ep.PG-13 - Teens 13 or older7.63...0000000000
1Seto no HanayomeTVManga26Finished AiringFalse{'from': '2007-04-02', 'to': '2007-10-01'}24 min. per ep.PG-13 - Teens 13 or older7.89...0000000000
2Shugo Chara!! DokiTVManga51Finished AiringFalse{'from': '2008-10-04', 'to': '2009-09-25'}24 min. per ep.PG - Children7.55...0000000000
3Princess TutuTVOriginal38Finished AiringFalse{'from': '2002-08-16', 'to': '2003-05-23'}16 min. per ep.PG-13 - Teens 13 or older8.21...0000000000
4Bakuman. 3rd SeasonTVManga25Finished AiringFalse{'from': '2012-10-06', 'to': '2013-03-30'}24 min. per ep.PG-13 - Teens 13 or older8.67...0000000000
\n", - "

5 rows × 104 columns

\n", - "
" - ], - "text/plain": [ - " title type source episodes status airing \\\n", - "0 Inu x Boku SS TV Manga 12 Finished Airing False \n", - "1 Seto no Hanayome TV Manga 26 Finished Airing False \n", - "2 Shugo Chara!! Doki TV Manga 51 Finished Airing False \n", - "3 Princess Tutu TV Original 38 Finished Airing False \n", - "4 Bakuman. 3rd Season TV Manga 25 Finished Airing False \n", - "\n", - " aired duration \\\n", - "0 {'from': '2012-01-13', 'to': '2012-03-30'} 24 min. per ep. \n", - "1 {'from': '2007-04-02', 'to': '2007-10-01'} 24 min. per ep. \n", - "2 {'from': '2008-10-04', 'to': '2009-09-25'} 24 min. per ep. \n", - "3 {'from': '2002-08-16', 'to': '2003-05-23'} 16 min. per ep. \n", - "4 {'from': '2012-10-06', 'to': '2013-03-30'} 24 min. per ep. \n", - "\n", - " rating score ... Shoujo Shounen Slice of Life \\\n", - "0 PG-13 - Teens 13 or older 7.63 ... 0 0 0 \n", - "1 PG-13 - Teens 13 or older 7.89 ... 0 0 0 \n", - "2 PG - Children 7.55 ... 0 0 0 \n", - "3 PG-13 - Teens 13 or older 8.21 ... 0 0 0 \n", - "4 PG-13 - Teens 13 or older 8.67 ... 0 0 0 \n", - "\n", - " Space Sports Super Power Supernatural Thriller Vampire Yaoi \n", - "0 0 0 0 0 0 0 0 \n", - "1 0 0 0 0 0 0 0 \n", - "2 0 0 0 0 0 0 0 \n", - "3 0 0 0 0 0 0 0 \n", - "4 0 0 0 0 0 0 0 \n", - "\n", - "[5 rows x 104 columns]" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "## combining the animelist data and genre data into animelist_df\n", - "animelist_df = pd.concat([animelist, genre_animelist], axis=1)\n", - "animelist_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "931de186-0984-4b46-af39-4b5659f3e44b", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titletypesourceepisodesstatusairingaireddurationratingscore...ShoujoShounenSlice of LifeSpaceSportsSuper PowerSupernaturalThrillerVampireYaoi
0Inu x Boku SSTVManga12Finished AiringFalse{'from': '2012-01-13', 'to': '2012-03-30'}24 min. per ep.PG-13 - Teens 13 or older7.63...0000000000
1Seto no HanayomeTVManga26Finished AiringFalse{'from': '2007-04-02', 'to': '2007-10-01'}24 min. per ep.PG-13 - Teens 13 or older7.89...0000000000
2Shugo Chara!! DokiTVManga51Finished AiringFalse{'from': '2008-10-04', 'to': '2009-09-25'}24 min. per ep.PG - Children7.55...0000000000
3Princess TutuTVOriginal38Finished AiringFalse{'from': '2002-08-16', 'to': '2003-05-23'}16 min. per ep.PG-13 - Teens 13 or older8.21...0000000000
4Bakuman. 3rd SeasonTVManga25Finished AiringFalse{'from': '2012-10-06', 'to': '2013-03-30'}24 min. per ep.PG-13 - Teens 13 or older8.67...0000000000
..................................................................
14473Gutchonpa Omoshiro HanashiTVUnknown5Finished AiringFalse{'from': '1987-11-05', 'to': '1988-11-04'}8 min. per ep.G - All Ages5.50...0000000000
14474Geba Geba Shou Time!OVAUnknown1Finished AiringFalse{'from': '1986-03-21', 'to': '1986-03-21'}25 min.G - All Ages4.60...0000000000
14475Godzilla: Hoshi wo Kuu MonoMovieOther1Not yet airedFalse{'from': None, 'to': None}UnknownR - 17+ (violence & profanity)0.00...0000000000
14476Nippon Mukashibanashi: Sannen NetarouOVAOther1Finished AiringFalse{'from': None, 'to': None}40 min.G - All Ages6.00...0000000000
14477Senjou no Valkyria SpecialSpecialUnknown1Finished AiringFalse{'from': '2010-04-07', 'to': '2010-04-07'}3 min.PG-13 - Teens 13 or older5.15...0000000000
\n", - "

14478 rows × 103 columns

\n", - "
" - ], - "text/plain": [ - " title type source episodes \\\n", - "0 Inu x Boku SS TV Manga 12 \n", - "1 Seto no Hanayome TV Manga 26 \n", - "2 Shugo Chara!! Doki TV Manga 51 \n", - "3 Princess Tutu TV Original 38 \n", - "4 Bakuman. 3rd Season TV Manga 25 \n", - "... ... ... ... ... \n", - "14473 Gutchonpa Omoshiro Hanashi TV Unknown 5 \n", - "14474 Geba Geba Shou Time! OVA Unknown 1 \n", - "14475 Godzilla: Hoshi wo Kuu Mono Movie Other 1 \n", - "14476 Nippon Mukashibanashi: Sannen Netarou OVA Other 1 \n", - "14477 Senjou no Valkyria Special Special Unknown 1 \n", - "\n", - " status airing aired \\\n", - "0 Finished Airing False {'from': '2012-01-13', 'to': '2012-03-30'} \n", - "1 Finished Airing False {'from': '2007-04-02', 'to': '2007-10-01'} \n", - "2 Finished Airing False {'from': '2008-10-04', 'to': '2009-09-25'} \n", - "3 Finished Airing False {'from': '2002-08-16', 'to': '2003-05-23'} \n", - "4 Finished Airing False {'from': '2012-10-06', 'to': '2013-03-30'} \n", - "... ... ... ... \n", - "14473 Finished Airing False {'from': '1987-11-05', 'to': '1988-11-04'} \n", - "14474 Finished Airing False {'from': '1986-03-21', 'to': '1986-03-21'} \n", - "14475 Not yet aired False {'from': None, 'to': None} \n", - "14476 Finished Airing False {'from': None, 'to': None} \n", - "14477 Finished Airing False {'from': '2010-04-07', 'to': '2010-04-07'} \n", - "\n", - " duration rating score ... Shoujo \\\n", - "0 24 min. per ep. PG-13 - Teens 13 or older 7.63 ... 0 \n", - "1 24 min. per ep. PG-13 - Teens 13 or older 7.89 ... 0 \n", - "2 24 min. per ep. PG - Children 7.55 ... 0 \n", - "3 16 min. per ep. PG-13 - Teens 13 or older 8.21 ... 0 \n", - "4 24 min. per ep. PG-13 - Teens 13 or older 8.67 ... 0 \n", - "... ... ... ... ... ... \n", - "14473 8 min. per ep. G - All Ages 5.50 ... 0 \n", - "14474 25 min. G - All Ages 4.60 ... 0 \n", - "14475 Unknown R - 17+ (violence & profanity) 0.00 ... 0 \n", - "14476 40 min. G - All Ages 6.00 ... 0 \n", - "14477 3 min. PG-13 - Teens 13 or older 5.15 ... 0 \n", - "\n", - " Shounen Slice of Life Space Sports Super Power Supernatural \\\n", - "0 0 0 0 0 0 0 \n", - "1 0 0 0 0 0 0 \n", - "2 0 0 0 0 0 0 \n", - "3 0 0 0 0 0 0 \n", - "4 0 0 0 0 0 0 \n", - "... ... ... ... ... ... ... \n", - "14473 0 0 0 0 0 0 \n", - "14474 0 0 0 0 0 0 \n", - "14475 0 0 0 0 0 0 \n", - "14476 0 0 0 0 0 0 \n", - "14477 0 0 0 0 0 0 \n", - "\n", - " Thriller Vampire Yaoi \n", - "0 0 0 0 \n", - "1 0 0 0 \n", - "2 0 0 0 \n", - "3 0 0 0 \n", - "4 0 0 0 \n", - "... ... ... ... \n", - "14473 0 0 0 \n", - "14474 0 0 0 \n", - "14475 0 0 0 \n", - "14476 0 0 0 \n", - "14477 0 0 0 \n", - "\n", - "[14478 rows x 103 columns]" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "## remove genre columns \n", - "animelist_df.drop(columns=[\"genre\"], inplace=True)\n", - "animelist_df" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "d0ef0721-d79a-4b14-a125-c04e1ac95ce3", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 14478 entries, 0 to 14477\n", - "Columns: 103 entries, title to Yaoi\n", - "dtypes: bool(1), float64(2), int32(1), int64(88), object(11)\n", - "memory usage: 11.2+ MB\n" - ] - } - ], - "source": [ - "animelist_df.info()" - ] - }, - { - "cell_type": "markdown", - "id": "98116925-f9c5-4902-8807-1ac623323955", - "metadata": { - "tags": [] - }, - "source": [ - "### Check value contain any NULL\n", - "---" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "fcf0cf61-2ef7-486f-b5ac-6f1f1d08df0e", - "metadata": { - "scrolled": true, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " title | has (0)\n", - " type | has (0)\n", - " source | has (0)\n", - " episodes | has (0)\n", - " status | has (0)\n", - " airing | has (0)\n", - " aired | has (0)\n", - " duration | has (0)\n", - " rating | has (544)\n", - " score | has (0)\n", - " scored_by | has (0)\n", - " rank | has (1574)\n", - " popularity | has (0)\n", - " members | has (0)\n", - " favorites | has (0)\n", - " related | has (0)\n", - " studio | has (0)\n", - " isPremiered | has (0)\n", - " aired_from | has (2191)\n", - " aired_to | has (2191)\n", - " Adventure | has (0)\n", - " Cars | has (0)\n", - " Comedy | has (0)\n", - " Dementia | has (0)\n", - " Demons | has (0)\n", - " Drama | has (0)\n", - " Ecchi | has (0)\n", - " Fantasy | has (0)\n", - " Game | has (0)\n", - " Harem | has (0)\n", - " Hentai | has (0)\n", - " Historical | has (0)\n", - " Horror | has (0)\n", - " Josei | has (0)\n", - " Kids | has (0)\n", - " Magic | has (0)\n", - " Martial Arts | has (0)\n", - " Mecha | has (0)\n", - " Military | has (0)\n", - " Music | has (0)\n", - " Mystery | has (0)\n", - " Parody | has (0)\n", - " Police | has (0)\n", - " Psychological | has (0)\n", - " Romance | has (0)\n", - " Samurai | has (0)\n", - " School | has (0)\n", - " Sci-Fi | has (0)\n", - " Seinen | has (0)\n", - " Shoujo | has (0)\n", - " Shoujo Ai | has (0)\n", - " Shounen | has (0)\n", - " Shounen Ai | has (0)\n", - " Slice of Life | has (0)\n", - " Space | has (0)\n", - " Sports | has (0)\n", - " Super Power | has (0)\n", - " Supernatural | has (0)\n", - " Thriller | has (0)\n", - " Vampire | has (0)\n", - " Yaoi | has (0)\n", - " Yuri | has (0)\n", - " Action | has (0)\n", - " Adventure | has (0)\n", - " Cars | has (0)\n", - " Comedy | has (0)\n", - " Dementia | has (0)\n", - " Demons | has (0)\n", - " Drama | has (0)\n", - " Ecchi | has (0)\n", - " Fantasy | has (0)\n", - " Game | has (0)\n", - " Harem | has (0)\n", - " Hentai | has (0)\n", - " Historical | has (0)\n", - " Horror | has (0)\n", - " Josei | has (0)\n", - " Kids | has (0)\n", - " Magic | has (0)\n", - " Martial Arts | has (0)\n", - " Mecha | has (0)\n", - " Military | has (0)\n", - " Music | has (0)\n", - " Mystery | has (0)\n", - " NA | has (0)\n", - " Parody | has (0)\n", - " Police | has (0)\n", - " Psychological | has (0)\n", - " Romance | has (0)\n", - " Samurai | has (0)\n", - " School | has (0)\n", - " Sci-Fi | has (0)\n", - " Seinen | has (0)\n", - " Shoujo | has (0)\n", - " Shounen | has (0)\n", - " Slice of Life | has (0)\n", - " Space | has (0)\n", - " Sports | has (0)\n", - " Super Power | has (0)\n", - " Supernatural | has (0)\n", - " Thriller | has (0)\n", - " Vampire | has (0)\n", - " Yaoi | has (0)\n" - ] - } - ], - "source": [ - "# let's make sure no null values\n", - "for col in animelist_df:\n", - " print(f\" {col} | has ({animelist_df[col].isnull().sum()})\")\n", - " \n", - " \n", - "## below table we can see:\n", - "## rating have 544 'Nan'\n", - "## rank have 1574 'Nan'\n", - "## aired_from have 2191 'Nan'\n", - "## aired-to have 2191 'Nan'" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "37ef5d53-7cfb-41e4-bb2c-c659ce5fd4ec", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "rating\n", - "PG-13 - Teens 13 or older 5020\n", - "G - All Ages 4541\n", - "PG - Children 1279\n", - "Rx - Hentai 1219\n", - "R - 17+ (violence & profanity) 997\n", - "R+ - Mild Nudity 878\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "## count the total for each rating\n", - "animelist_df.rating.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "32af0567-3583-49f5-90f7-d3a159d7a7bb", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "## ensure the rating is at least PG13\n", - "animelist_df['rating'].fillna(\"G - All Ages\",inplace=True)\n", - "\n", - "## convert the rank to the max rank (prevent skewness)\n", - "animelist_df['rank'].fillna(animelist_df['rank'].max(), inplace=True)\n", - "\n", - "## convert 'Nan' to None for aired dates.\n", - "animelist_df['aired_from'].fillna(\"Not aired\",inplace=True)\n", - "animelist_df['aired_to'].fillna(\"Not aired\",inplace=True)\n", - "\n", - "##find out whether aired time and primied have relation" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "3fc969d8-04c0-4242-9192-f68a40aa6339", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " title | has (0)\n", - " type | has (0)\n", - " source | has (0)\n", - " episodes | has (0)\n", - " status | has (0)\n", - " airing | has (0)\n", - " aired | has (0)\n", - " duration | has (0)\n", - " rating | has (0)\n", - " score | has (0)\n", - " scored_by | has (0)\n", - " rank | has (0)\n", - " popularity | has (0)\n", - " members | has (0)\n", - " favorites | has (0)\n", - " related | has (0)\n", - " studio | has (0)\n", - " isPremiered | has (0)\n", - " aired_from | has (0)\n", - " aired_to | has (0)\n", - " Adventure | has (0)\n", - " Cars | has (0)\n", - " Comedy | has (0)\n", - " Dementia | has (0)\n", - " Demons | has (0)\n", - " Drama | has (0)\n", - " Ecchi | has (0)\n", - " Fantasy | has (0)\n", - " Game | has (0)\n", - " Harem | has (0)\n", - " Hentai | has (0)\n", - " Historical | has (0)\n", - " Horror | has (0)\n", - " Josei | has (0)\n", - " Kids | has (0)\n", - " Magic | has (0)\n", - " Martial Arts | has (0)\n", - " Mecha | has (0)\n", - " Military | has (0)\n", - " Music | has (0)\n", - " Mystery | has (0)\n", - " Parody | has (0)\n", - " Police | has (0)\n", - " Psychological | has (0)\n", - " Romance | has (0)\n", - " Samurai | has (0)\n", - " School | has (0)\n", - " Sci-Fi | has (0)\n", - " Seinen | has (0)\n", - " Shoujo | has (0)\n", - " Shoujo Ai | has (0)\n", - " Shounen | has (0)\n", - " Shounen Ai | has (0)\n", - " Slice of Life | has (0)\n", - " Space | has (0)\n", - " Sports | has (0)\n", - " Super Power | has (0)\n", - " Supernatural | has (0)\n", - " Thriller | has (0)\n", - " Vampire | has (0)\n", - " Yaoi | has (0)\n", - " Yuri | has (0)\n", - " Action | has (0)\n", - " Adventure | has (0)\n", - " Cars | has (0)\n", - " Comedy | has (0)\n", - " Dementia | has (0)\n", - " Demons | has (0)\n", - " Drama | has (0)\n", - " Ecchi | has (0)\n", - " Fantasy | has (0)\n", - " Game | has (0)\n", - " Harem | has (0)\n", - " Hentai | has (0)\n", - " Historical | has (0)\n", - " Horror | has (0)\n", - " Josei | has (0)\n", - " Kids | has (0)\n", - " Magic | has (0)\n", - " Martial Arts | has (0)\n", - " Mecha | has (0)\n", - " Military | has (0)\n", - " Music | has (0)\n", - " Mystery | has (0)\n", - " NA | has (0)\n", - " Parody | has (0)\n", - " Police | has (0)\n", - " Psychological | has (0)\n", - " Romance | has (0)\n", - " Samurai | has (0)\n", - " School | has (0)\n", - " Sci-Fi | has (0)\n", - " Seinen | has (0)\n", - " Shoujo | has (0)\n", - " Shounen | has (0)\n", - " Slice of Life | has (0)\n", - " Space | has (0)\n", - " Sports | has (0)\n", - " Super Power | has (0)\n", - " Supernatural | has (0)\n", - " Thriller | has (0)\n", - " Vampire | has (0)\n", - " Yaoi | has (0)\n" - ] - } - ], - "source": [ - "# let's double confirmed there are no null values\n", - "for col in animelist_df:\n", - " print(f\" {col} | has ({animelist_df[col].isnull().sum()})\")\n", - " " - ] - }, - { - "cell_type": "markdown", - "id": "593b3e94-b089-4ff5-9be6-6b85fb06bed8", - "metadata": { - "tags": [] - }, - "source": [ - "### Convert to new csv file.\n", - "---\n" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "1545f7c0-4adf-4840-ab5e-8f093fd8e84b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "animelist_df.to_csv('out.csv', index=False) " - ] - }, - { - "cell_type": "markdown", - "id": "8f7a0093-684d-4db8-8146-3d45b077934f", - "metadata": {}, - "source": [ - "### if have other data need to be clean\n", - "---" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a9ba1c45-306d-44a4-876f-a9c33c1a42fe", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb deleted file mode 100644 index 619386d..0000000 --- a/.ipynb_checkpoints/main-checkpoint.ipynb +++ /dev/null @@ -1,351 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from jikanpy import Jikan\n", - "jikan = Jikan()\n", - "\n", - "mushishi = jikan.anime(457)\n", - "mushishi_with_eps = jikan.anime(457, extension='episodes')\n", - "\n", - "search_result = jikan.search('anime', 'Mushishi', page=2)\n", - "\n", - "winter_2018_anime = jikan.seasons(year=2018, season='winter')\n", - "\n", - "current_season = jikan.seasons(extension='now')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "jikan.anime_episode_by_id(anime_id=1, episode_id=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "genres = jikan.genres(type='anime')" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
mal_idurlimagestrailerapprovedtitlestitletitle_englishtitle_japanesetitle_synonyms...seasonyearbroadcastproducerslicensorsstudiosgenresexplicit_genresthemesdemographics
021https://myanimelist.net/anime/21/One_Piece{'jpg': {'image_url': 'https://cdn.myanimelist...{'youtube_id': '-tviZNY6CSw', 'url': 'https://...True[{'type': 'Default', 'title': 'One Piece'}, {'...One PieceOne PieceONE PIECE[OP]...fall1999.0{'day': 'Sundays', 'time': '09:30', 'timezone'...[{'mal_id': 169, 'type': 'anime', 'name': 'Fuj...[{'mal_id': 102, 'type': 'anime', 'name': 'Fun...[{'mal_id': 18, 'type': 'anime', 'name': 'Toei...[{'mal_id': 1, 'type': 'anime', 'name': 'Actio...[][][{'mal_id': 27, 'type': 'anime', 'name': 'Shou...
133352https://myanimelist.net/anime/33352/Violet_Eve...{'jpg': {'image_url': 'https://cdn.myanimelist...{'youtube_id': 'g5xWqjFglsk', 'url': 'https://...True[{'type': 'Default', 'title': 'Violet Evergard...Violet EvergardenViolet Evergardenヴァイオレット・エヴァーガーデン[]...winter2018.0{'day': 'Thursdays', 'time': '00:00', 'timezon...[{'mal_id': 104, 'type': 'anime', 'name': 'Lan...[{'mal_id': 102, 'type': 'anime', 'name': 'Fun...[{'mal_id': 2, 'type': 'anime', 'name': 'Kyoto...[{'mal_id': 8, 'type': 'anime', 'name': 'Drama...[][][]
235849https://myanimelist.net/anime/35849/Darling_in...{'jpg': {'image_url': 'https://cdn.myanimelist...{'youtube_id': 'cJ6g_6Ud0s8', 'url': 'https://...True[{'type': 'Default', 'title': 'Darling in the ...Darling in the FranXXDARLING in the FRANXXダーリン・イン・ザ・フランキス[]...winter2018.0{'day': 'Saturdays', 'time': '23:30', 'timezon...[{'mal_id': 17, 'type': 'anime', 'name': 'Anip...[{'mal_id': 102, 'type': 'anime', 'name': 'Fun...[{'mal_id': 56, 'type': 'anime', 'name': 'A-1 ...[{'mal_id': 1, 'type': 'anime', 'name': 'Actio...[][{'mal_id': 18, 'type': 'anime', 'name': 'Mech...[]
335120https://myanimelist.net/anime/35120/Devilman__...{'jpg': {'image_url': 'https://cdn.myanimelist...{'youtube_id': 'ww06yGPM7Kc', 'url': 'https://...True[{'type': 'Default', 'title': 'Devilman: Cryba...Devilman: CrybabyDevilman: CrybabyDEVILMAN crybaby[]...NoneNaN{'day': None, 'time': None, 'timezone': None, ...[{'mal_id': 17, 'type': 'anime', 'name': 'Anip...[][{'mal_id': 1591, 'type': 'anime', 'name': 'Sc...[{'mal_id': 1, 'type': 'anime', 'name': 'Actio...[][{'mal_id': 58, 'type': 'anime', 'name': 'Gore...[{'mal_id': 27, 'type': 'anime', 'name': 'Shou...
434577https://myanimelist.net/anime/34577/Nanatsu_no...{'jpg': {'image_url': 'https://cdn.myanimelist...{'youtube_id': 'lgkv0Lqr-Iw', 'url': 'https://...True[{'type': 'Default', 'title': 'Nanatsu no Taiz...Nanatsu no Taizai: Imashime no FukkatsuThe Seven Deadly Sins: Revival of the Commandm...七つの大罪 戒めの復活[Seven Deadly Sins Season 2]...winter2018.0{'day': 'Saturdays', 'time': '06:30', 'timezon...[{'mal_id': 17, 'type': 'anime', 'name': 'Anip...[][{'mal_id': 56, 'type': 'anime', 'name': 'A-1 ...[{'mal_id': 1, 'type': 'anime', 'name': 'Actio...[][][{'mal_id': 27, 'type': 'anime', 'name': 'Shou...
\n", - "

5 rows × 36 columns

\n", - "
" - ], - "text/plain": [ - " mal_id url \\\n", - "0 21 https://myanimelist.net/anime/21/One_Piece \n", - "1 33352 https://myanimelist.net/anime/33352/Violet_Eve... \n", - "2 35849 https://myanimelist.net/anime/35849/Darling_in... \n", - "3 35120 https://myanimelist.net/anime/35120/Devilman__... \n", - "4 34577 https://myanimelist.net/anime/34577/Nanatsu_no... \n", - "\n", - " images \\\n", - "0 {'jpg': {'image_url': 'https://cdn.myanimelist... \n", - "1 {'jpg': {'image_url': 'https://cdn.myanimelist... \n", - "2 {'jpg': {'image_url': 'https://cdn.myanimelist... \n", - "3 {'jpg': {'image_url': 'https://cdn.myanimelist... \n", - "4 {'jpg': {'image_url': 'https://cdn.myanimelist... \n", - "\n", - " trailer approved \\\n", - "0 {'youtube_id': '-tviZNY6CSw', 'url': 'https://... True \n", - "1 {'youtube_id': 'g5xWqjFglsk', 'url': 'https://... True \n", - "2 {'youtube_id': 'cJ6g_6Ud0s8', 'url': 'https://... True \n", - "3 {'youtube_id': 'ww06yGPM7Kc', 'url': 'https://... True \n", - "4 {'youtube_id': 'lgkv0Lqr-Iw', 'url': 'https://... True \n", - "\n", - " titles \\\n", - "0 [{'type': 'Default', 'title': 'One Piece'}, {'... \n", - "1 [{'type': 'Default', 'title': 'Violet Evergard... \n", - "2 [{'type': 'Default', 'title': 'Darling in the ... \n", - "3 [{'type': 'Default', 'title': 'Devilman: Cryba... \n", - "4 [{'type': 'Default', 'title': 'Nanatsu no Taiz... \n", - "\n", - " title \\\n", - "0 One Piece \n", - "1 Violet Evergarden \n", - "2 Darling in the FranXX \n", - "3 Devilman: Crybaby \n", - "4 Nanatsu no Taizai: Imashime no Fukkatsu \n", - "\n", - " title_english title_japanese \\\n", - "0 One Piece ONE PIECE \n", - "1 Violet Evergarden ヴァイオレット・エヴァーガーデン \n", - "2 DARLING in the FRANXX ダーリン・イン・ザ・フランキス \n", - "3 Devilman: Crybaby DEVILMAN crybaby \n", - "4 The Seven Deadly Sins: Revival of the Commandm... 七つの大罪 戒めの復活 \n", - "\n", - " title_synonyms ... season year \\\n", - "0 [OP] ... fall 1999.0 \n", - "1 [] ... winter 2018.0 \n", - "2 [] ... winter 2018.0 \n", - "3 [] ... None NaN \n", - "4 [Seven Deadly Sins Season 2] ... winter 2018.0 \n", - "\n", - " broadcast \\\n", - "0 {'day': 'Sundays', 'time': '09:30', 'timezone'... \n", - "1 {'day': 'Thursdays', 'time': '00:00', 'timezon... \n", - "2 {'day': 'Saturdays', 'time': '23:30', 'timezon... \n", - "3 {'day': None, 'time': None, 'timezone': None, ... \n", - "4 {'day': 'Saturdays', 'time': '06:30', 'timezon... \n", - "\n", - " producers \\\n", - "0 [{'mal_id': 169, 'type': 'anime', 'name': 'Fuj... \n", - "1 [{'mal_id': 104, 'type': 'anime', 'name': 'Lan... \n", - "2 [{'mal_id': 17, 'type': 'anime', 'name': 'Anip... \n", - "3 [{'mal_id': 17, 'type': 'anime', 'name': 'Anip... \n", - "4 [{'mal_id': 17, 'type': 'anime', 'name': 'Anip... \n", - "\n", - " licensors \\\n", - "0 [{'mal_id': 102, 'type': 'anime', 'name': 'Fun... \n", - "1 [{'mal_id': 102, 'type': 'anime', 'name': 'Fun... \n", - "2 [{'mal_id': 102, 'type': 'anime', 'name': 'Fun... \n", - "3 [] \n", - "4 [] \n", - "\n", - " studios \\\n", - "0 [{'mal_id': 18, 'type': 'anime', 'name': 'Toei... \n", - "1 [{'mal_id': 2, 'type': 'anime', 'name': 'Kyoto... \n", - "2 [{'mal_id': 56, 'type': 'anime', 'name': 'A-1 ... \n", - "3 [{'mal_id': 1591, 'type': 'anime', 'name': 'Sc... \n", - "4 [{'mal_id': 56, 'type': 'anime', 'name': 'A-1 ... \n", - "\n", - " genres explicit_genres \\\n", - "0 [{'mal_id': 1, 'type': 'anime', 'name': 'Actio... [] \n", - "1 [{'mal_id': 8, 'type': 'anime', 'name': 'Drama... [] \n", - "2 [{'mal_id': 1, 'type': 'anime', 'name': 'Actio... [] \n", - "3 [{'mal_id': 1, 'type': 'anime', 'name': 'Actio... [] \n", - "4 [{'mal_id': 1, 'type': 'anime', 'name': 'Actio... [] \n", - "\n", - " themes \\\n", - "0 [] \n", - "1 [] \n", - "2 [{'mal_id': 18, 'type': 'anime', 'name': 'Mech... \n", - "3 [{'mal_id': 58, 'type': 'anime', 'name': 'Gore... \n", - "4 [] \n", - "\n", - " demographics \n", - "0 [{'mal_id': 27, 'type': 'anime', 'name': 'Shou... \n", - "1 [] \n", - "2 [] \n", - "3 [{'mal_id': 27, 'type': 'anime', 'name': 'Shou... \n", - "4 [{'mal_id': 27, 'type': 'anime', 'name': 'Shou... \n", - "\n", - "[5 rows x 36 columns]" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "winter_list = pd.DataFrame(winter_2018_anime.get(\"data\"))\n", - "winter_list.head()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Test", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Data Preparation.ipynb b/Data Preparation.ipynb index e010237..7576bee 100644 --- a/Data Preparation.ipynb +++ b/Data Preparation.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "d8486f6e-2454-43d1-be41-b8361d8a9675", "metadata": {}, "outputs": [], @@ -43,7 +43,6 @@ "cell_type": "markdown", "id": "70e6c3f6-53c8-4814-9b3f-14f8693ea516", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -57,194 +56,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "c677c3ee-5f8f-4503-967d-4332e1a0fa36", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
usernameuser_iduser_watchinguser_completeduser_onholduser_droppeduser_plantowatchuser_days_spent_watchinggenderlocationbirth_dateaccess_rankjoin_datelast_onlinestats_mean_scorestats_rewatchedstats_episodes
0karthiga225515334910055.31FemaleChennai, India1990-04-29NaN2013-03-032014-02-04 01:32:007.430.03391.0
1RedvelvetDaisuki189760661396390206118.07FemaleManila1995-01-01NaN2012-12-131900-05-13 02:47:006.7880.07094.0
2Damonashu373264519527255983.70MaleDetroit,Michigan1991-08-01NaN2008-02-131900-03-24 12:48:006.156.04936.0
3bskai228342254142511167.16MaleNayarit, Mexico1990-12-14NaN2009-08-312014-05-12 16:35:008.271.010081.0
4shuzzable234778136721622535.48NaNNaNNaNNaN2013-03-252015-09-09 21:54:009.067.02154.0
\n", - "
" - ], - "text/plain": [ - " username user_id user_watching user_completed user_onhold \\\n", - "0 karthiga 2255153 3 49 1 \n", - "1 RedvelvetDaisuki 1897606 61 396 39 \n", - "2 Damonashu 37326 45 195 27 \n", - "3 bskai 228342 25 414 2 \n", - "4 shuzzable 2347781 36 72 16 \n", - "\n", - " user_dropped user_plantowatch user_days_spent_watching gender \\\n", - "0 0 0 55.31 Female \n", - "1 0 206 118.07 Female \n", - "2 25 59 83.70 Male \n", - "3 5 11 167.16 Male \n", - "4 2 25 35.48 NaN \n", - "\n", - " location birth_date access_rank join_date last_online \\\n", - "0 Chennai, India 1990-04-29 NaN 2013-03-03 2014-02-04 01:32:00 \n", - "1 Manila 1995-01-01 NaN 2012-12-13 1900-05-13 02:47:00 \n", - "2 Detroit,Michigan 1991-08-01 NaN 2008-02-13 1900-03-24 12:48:00 \n", - "3 Nayarit, Mexico 1990-12-14 NaN 2009-08-31 2014-05-12 16:35:00 \n", - "4 NaN NaN NaN 2013-03-25 2015-09-09 21:54:00 \n", - "\n", - " stats_mean_score stats_rewatched stats_episodes \n", - "0 7.43 0.0 3391.0 \n", - "1 6.78 80.0 7094.0 \n", - "2 6.15 6.0 4936.0 \n", - "3 8.27 1.0 10081.0 \n", - "4 9.06 7.0 2154.0 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "userlist = pd.read_csv('UserList.csv')\n", + "userlist = pd.read_csv('DataSets/Raw Data/UserList.csv')\n", "userlist.head()" ] }, @@ -282,21 +101,12 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "5f724fbe-04ca-4a41-bc73-75e7642da727", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data type : \n", - "Data dims : (302675, 17)\n" - ] - } - ], + "outputs": [], "source": [ "print(\"Data type : \", type(userlist))\n", "print(\"Data dims : \", userlist.shape)" @@ -312,43 +122,12 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "a9606662-0f99-4e0b-82a8-e6b377b556d0", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 302675 entries, 0 to 302674\n", - "Data columns (total 17 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 username 302674 non-null object \n", - " 1 user_id 302675 non-null int64 \n", - " 2 user_watching 302675 non-null int64 \n", - " 3 user_completed 302675 non-null int64 \n", - " 4 user_onhold 302675 non-null int64 \n", - " 5 user_dropped 302675 non-null int64 \n", - " 6 user_plantowatch 302675 non-null int64 \n", - " 7 user_days_spent_watching 302675 non-null float64\n", - " 8 gender 217800 non-null object \n", - " 9 location 156773 non-null object \n", - " 10 birth_date 168749 non-null object \n", - " 11 access_rank 0 non-null float64\n", - " 12 join_date 302546 non-null object \n", - " 13 last_online 302546 non-null object \n", - " 14 stats_mean_score 302546 non-null float64\n", - " 15 stats_rewatched 302546 non-null float64\n", - " 16 stats_episodes 302546 non-null float64\n", - "dtypes: float64(5), int64(6), object(6)\n", - "memory usage: 39.3+ MB\n" - ] - } - ], + "outputs": [], "source": [ "userlist.info()" ] @@ -370,263 +149,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "012c1bc0-c7fe-4e5e-b698-23d5f45f9ca2", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
anime_idtitletitle_englishtitle_japanesetitle_synonymsimage_urltypesourceepisodesstatus...backgroundpremieredbroadcastrelatedproducerlicensorstudiogenreopening_themeending_theme
011013Inu x Boku SSInu X Boku Secret Service妖狐×僕SSYouko x Boku SShttps://myanimelist.cdn-dena.com/images/anime/...TVManga12Finished Airing...Inu x Boku SS was licensed by Sentai Filmworks...Winter 2012Fridays at Unknown{'Adaptation': [{'mal_id': 17207, 'type': 'man...Aniplex, Square Enix, Mainichi Broadcasting Sy...Sentai FilmworksDavid ProductionComedy, Supernatural, Romance, Shounen['\"Nirvana\" by MUCC']['#1: \"Nirvana\" by MUCC (eps 1, 11-12)', '#2: ...
12104Seto no HanayomeMy Bride is a Mermaid瀬戸の花嫁The Inland Sea Bridehttps://myanimelist.cdn-dena.com/images/anime/...TVManga26Finished Airing...NaNSpring 2007Unknown{'Adaptation': [{'mal_id': 759, 'type': 'manga...TV Tokyo, AIC, Square Enix, SotsuFunimationGonzoComedy, Parody, Romance, School, Shounen['\"Romantic summer\" by SUN&LUNAR']['#1: \"Ashita e no Hikari (明日への光)\" by Asuka Hi...
25262Shugo Chara!! DokiShugo Chara!! Dokiしゅごキャラ!!どきっShugo Chara Ninenme, Shugo Chara! Second Yearhttps://myanimelist.cdn-dena.com/images/anime/...TVManga51Finished Airing...NaNFall 2008Unknown{'Adaptation': [{'mal_id': 101, 'type': 'manga...TV Tokyo, SotsuNaNSatelightComedy, Magic, School, Shoujo['#1: \"Minna no Tamago (みんなのたまご)\" by Shugo Cha...['#1: \"Rottara Rottara (ロッタラ ロッタラ)\" by Buono! ...
3721Princess TutuPrincess TutuプリンセスチュチュNaNhttps://myanimelist.cdn-dena.com/images/anime/...TVOriginal38Finished Airing...Princess Tutu aired in two parts. The first pa...Summer 2002Fridays at Unknown{'Adaptation': [{'mal_id': 1581, 'type': 'mang...Memory-Tech, GANSIS, Marvelous AQLADV FilmsHal Film MakerComedy, Drama, Magic, Romance, Fantasy['\"Morning Grace\" by Ritsuko Okazaki']['\"Watashi No Ai Wa Chiisaikeredo\" by Ritsuko ...
412365Bakuman. 3rd SeasonBakuman.バクマン。Bakuman Season 3https://myanimelist.cdn-dena.com/images/anime/...TVManga25Finished Airing...NaNFall 2012Unknown{'Adaptation': [{'mal_id': 9711, 'type': 'mang...NHK, ShueishaNaNJ.C.StaffComedy, Drama, Romance, Shounen['#1: \"Moshimo no Hanashi (もしもの話)\" by nano.RIP...['#1: \"Pride on Everyday\" by Sphere (eps 1-13)...
\n", - "

5 rows × 31 columns

\n", - "
" - ], - "text/plain": [ - " anime_id title title_english title_japanese \\\n", - "0 11013 Inu x Boku SS Inu X Boku Secret Service 妖狐×僕SS \n", - "1 2104 Seto no Hanayome My Bride is a Mermaid 瀬戸の花嫁 \n", - "2 5262 Shugo Chara!! Doki Shugo Chara!! Doki しゅごキャラ!!どきっ \n", - "3 721 Princess Tutu Princess Tutu プリンセスチュチュ \n", - "4 12365 Bakuman. 3rd Season Bakuman. バクマン。 \n", - "\n", - " title_synonyms \\\n", - "0 Youko x Boku SS \n", - "1 The Inland Sea Bride \n", - "2 Shugo Chara Ninenme, Shugo Chara! Second Year \n", - "3 NaN \n", - "4 Bakuman Season 3 \n", - "\n", - " image_url type source episodes \\\n", - "0 https://myanimelist.cdn-dena.com/images/anime/... TV Manga 12 \n", - "1 https://myanimelist.cdn-dena.com/images/anime/... TV Manga 26 \n", - "2 https://myanimelist.cdn-dena.com/images/anime/... TV Manga 51 \n", - "3 https://myanimelist.cdn-dena.com/images/anime/... TV Original 38 \n", - "4 https://myanimelist.cdn-dena.com/images/anime/... TV Manga 25 \n", - "\n", - " status ... background \\\n", - "0 Finished Airing ... Inu x Boku SS was licensed by Sentai Filmworks... \n", - "1 Finished Airing ... NaN \n", - "2 Finished Airing ... NaN \n", - "3 Finished Airing ... Princess Tutu aired in two parts. The first pa... \n", - "4 Finished Airing ... NaN \n", - "\n", - " premiered broadcast \\\n", - "0 Winter 2012 Fridays at Unknown \n", - "1 Spring 2007 Unknown \n", - "2 Fall 2008 Unknown \n", - "3 Summer 2002 Fridays at Unknown \n", - "4 Fall 2012 Unknown \n", - "\n", - " related \\\n", - "0 {'Adaptation': [{'mal_id': 17207, 'type': 'man... \n", - "1 {'Adaptation': [{'mal_id': 759, 'type': 'manga... \n", - "2 {'Adaptation': [{'mal_id': 101, 'type': 'manga... \n", - "3 {'Adaptation': [{'mal_id': 1581, 'type': 'mang... \n", - "4 {'Adaptation': [{'mal_id': 9711, 'type': 'mang... \n", - "\n", - " producer licensor \\\n", - "0 Aniplex, Square Enix, Mainichi Broadcasting Sy... Sentai Filmworks \n", - "1 TV Tokyo, AIC, Square Enix, Sotsu Funimation \n", - "2 TV Tokyo, Sotsu NaN \n", - "3 Memory-Tech, GANSIS, Marvelous AQL ADV Films \n", - "4 NHK, Shueisha NaN \n", - "\n", - " studio genre \\\n", - "0 David Production Comedy, Supernatural, Romance, Shounen \n", - "1 Gonzo Comedy, Parody, Romance, School, Shounen \n", - "2 Satelight Comedy, Magic, School, Shoujo \n", - "3 Hal Film Maker Comedy, Drama, Magic, Romance, Fantasy \n", - "4 J.C.Staff Comedy, Drama, Romance, Shounen \n", - "\n", - " opening_theme \\\n", - "0 ['\"Nirvana\" by MUCC'] \n", - "1 ['\"Romantic summer\" by SUN&LUNAR'] \n", - "2 ['#1: \"Minna no Tamago (みんなのたまご)\" by Shugo Cha... \n", - "3 ['\"Morning Grace\" by Ritsuko Okazaki'] \n", - "4 ['#1: \"Moshimo no Hanashi (もしもの話)\" by nano.RIP... \n", - "\n", - " ending_theme \n", - "0 ['#1: \"Nirvana\" by MUCC (eps 1, 11-12)', '#2: ... \n", - "1 ['#1: \"Ashita e no Hikari (明日への光)\" by Asuka Hi... \n", - "2 ['#1: \"Rottara Rottara (ロッタラ ロッタラ)\" by Buono! ... \n", - "3 ['\"Watashi No Ai Wa Chiisaikeredo\" by Ritsuko ... \n", - "4 ['#1: \"Pride on Everyday\" by Sphere (eps 1-13)... \n", - "\n", - "[5 rows x 31 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "animelist = pd.read_csv('AnimeList.csv')\n", + "animelist = pd.read_csv('DataSets/Raw Data/AnimeList.csv')\n", "animelist.head()\n" ] }, @@ -677,21 +207,12 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "553d943b-a42b-41ab-b03c-871f4a870312", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data type : \n", - "Data dims : (14478, 31)\n" - ] - } - ], + "outputs": [], "source": [ "print(\"Data type : \", type(animelist))\n", "print(\"Data dims : \", animelist.shape)" @@ -707,57 +228,12 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "aa025f54-4b39-458a-8f33-6f834492ff8c", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 14478 entries, 0 to 14477\n", - "Data columns (total 31 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 anime_id 14478 non-null int64 \n", - " 1 title 14478 non-null object \n", - " 2 title_english 5724 non-null object \n", - " 3 title_japanese 14443 non-null object \n", - " 4 title_synonyms 8937 non-null object \n", - " 5 image_url 14382 non-null object \n", - " 6 type 14478 non-null object \n", - " 7 source 14478 non-null object \n", - " 8 episodes 14478 non-null int64 \n", - " 9 status 14478 non-null object \n", - " 10 airing 14478 non-null bool \n", - " 11 aired_string 14478 non-null object \n", - " 12 aired 14478 non-null object \n", - " 13 duration 14478 non-null object \n", - " 14 rating 13934 non-null object \n", - " 15 score 14478 non-null float64\n", - " 16 scored_by 14478 non-null int64 \n", - " 17 rank 12904 non-null float64\n", - " 18 popularity 14478 non-null int64 \n", - " 19 members 14478 non-null int64 \n", - " 20 favorites 14478 non-null int64 \n", - " 21 background 1057 non-null object \n", - " 22 premiered 4096 non-null object \n", - " 23 broadcast 4271 non-null object \n", - " 24 related 14478 non-null object \n", - " 25 producer 8288 non-null object \n", - " 26 licensor 3373 non-null object \n", - " 27 studio 8544 non-null object \n", - " 28 genre 14414 non-null object \n", - " 29 opening_theme 14478 non-null object \n", - " 30 ending_theme 14478 non-null object \n", - "dtypes: bool(1), float64(2), int64(6), object(22)\n", - "memory usage: 3.3+ MB\n" - ] - } - ], + "outputs": [], "source": [ "animelist.info()\n" ] @@ -801,163 +277,12 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "9817e6bc-0a70-431c-8567-c53f73c7d8e3", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
anime_idepisodesscorescored_byrankpopularitymembersfavorites
count14478.00000014478.00000014478.0000001.447800e+0412904.00000014478.0000001.447800e+0414478.000000
mean17377.22986611.3083996.1424821.146003e+046439.0654067220.2595662.296640e+04311.649606
std13165.31501143.4434511.4639814.310519e+043720.2276084170.0805647.498136e+042615.554211
min1.0000000.0000000.0000000.000000e+000.0000000.0000000.000000e+000.000000
25%4389.2500001.0000005.5500004.600000e+013216.2500003611.5000002.450000e+020.000000
50%15135.0000001.0000006.3700005.010000e+026441.5000007225.5000001.679500e+032.000000
75%31146.50000012.0000007.0600003.941500e+039664.00000010827.7500001.037900e+0423.000000
max37916.0000001818.00000010.0000001.009477e+0612919.00000014487.0000001.456378e+06106895.000000
\n", - "
" - ], - "text/plain": [ - " anime_id episodes score scored_by rank \\\n", - "count 14478.000000 14478.000000 14478.000000 1.447800e+04 12904.000000 \n", - "mean 17377.229866 11.308399 6.142482 1.146003e+04 6439.065406 \n", - "std 13165.315011 43.443451 1.463981 4.310519e+04 3720.227608 \n", - "min 1.000000 0.000000 0.000000 0.000000e+00 0.000000 \n", - "25% 4389.250000 1.000000 5.550000 4.600000e+01 3216.250000 \n", - "50% 15135.000000 1.000000 6.370000 5.010000e+02 6441.500000 \n", - "75% 31146.500000 12.000000 7.060000 3.941500e+03 9664.000000 \n", - "max 37916.000000 1818.000000 10.000000 1.009477e+06 12919.000000 \n", - "\n", - " popularity members favorites \n", - "count 14478.000000 1.447800e+04 14478.000000 \n", - "mean 7220.259566 2.296640e+04 311.649606 \n", - "std 4170.080564 7.498136e+04 2615.554211 \n", - "min 0.000000 0.000000e+00 0.000000 \n", - "25% 3611.500000 2.450000e+02 0.000000 \n", - "50% 7225.500000 1.679500e+03 2.000000 \n", - "75% 10827.750000 1.037900e+04 23.000000 \n", - "max 14487.000000 1.456378e+06 106895.000000 " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "## for numeric data\n", "animelist.describe()" @@ -965,203 +290,12 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "572df98e-95fe-4148-adcf-f2f264bcfa21", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titletitle_englishtitle_japanesetitle_synonymsimage_urltypesourcestatusaired_stringaired...backgroundpremieredbroadcastrelatedproducerlicensorstudiogenreopening_themeending_theme
count144785724144438937143821447814478144781447814478...10574096427114478828833738544144141447814478
unique144775606137018575143827163100269649...103822144194203221193778454443285458
topHinamatsuriCyborg 009ゲゲゲの鬼太郎Minna no Utahttps://myanimelist.cdn-dena.com/images/anime/...TVUnknownFinished AiringNot available{'from': None, 'to': None}...Includes claymation short which was shown befo...Spring 2017Unknown[]NHKFunimationToei AnimationHentai[][]
freq246189142714210137912231691...5802241451542772672586897848807
\n", - "

4 rows × 22 columns

\n", - "
" - ], - "text/plain": [ - " title title_english title_japanese title_synonyms \\\n", - "count 14478 5724 14443 8937 \n", - "unique 14477 5606 13701 8575 \n", - "top Hinamatsuri Cyborg 009 ゲゲゲの鬼太郎 Minna no Uta \n", - "freq 2 4 6 189 \n", - "\n", - " image_url type source \\\n", - "count 14382 14478 14478 \n", - "unique 14382 7 16 \n", - "top https://myanimelist.cdn-dena.com/images/anime/... TV Unknown \n", - "freq 1 4271 4210 \n", - "\n", - " status aired_string aired ... \\\n", - "count 14478 14478 14478 ... \n", - "unique 3 10026 9649 ... \n", - "top Finished Airing Not available {'from': None, 'to': None} ... \n", - "freq 13791 223 1691 ... \n", - "\n", - " background premiered \\\n", - "count 1057 4096 \n", - "unique 1038 221 \n", - "top Includes claymation short which was shown befo... Spring 2017 \n", - "freq 5 80 \n", - "\n", - " broadcast related producer licensor studio genre \\\n", - "count 4271 14478 8288 3373 8544 14414 \n", - "unique 441 9420 3221 193 778 4544 \n", - "top Unknown [] NHK Funimation Toei Animation Hentai \n", - "freq 2241 4515 427 726 725 868 \n", - "\n", - " opening_theme ending_theme \n", - "count 14478 14478 \n", - "unique 4328 5458 \n", - "top [] [] \n", - "freq 9784 8807 \n", - "\n", - "[4 rows x 22 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "## for data that is object\n", "animelist.describe(include=object)" @@ -1169,29 +303,12 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "121330ee-d633-4996-90be-0e8ae2956abd", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['anime_id', 'title', 'title_english', 'title_japanese',\n", - " 'title_synonyms', 'image_url', 'type', 'source', 'episodes', 'status',\n", - " 'airing', 'aired_string', 'aired', 'duration', 'rating', 'score',\n", - " 'scored_by', 'rank', 'popularity', 'members', 'favorites', 'background',\n", - " 'premiered', 'broadcast', 'related', 'producer', 'licensor', 'studio',\n", - " 'genre', 'opening_theme', 'ending_theme'],\n", - " dtype='object')" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "## what are the columns involved in the dataset\n", "animelist.columns" @@ -1212,7 +329,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "c21ea1b8-02fe-4d1f-bff7-80346b4b6b21", "metadata": { "tags": [] @@ -1226,27 +343,12 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "29af9569-8be7-456e-9268-a64320a0e205", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 14478 entries, 0 to 14477\n", - "Series name: isPremiered\n", - "Non-Null Count Dtype\n", - "-------------- -----\n", - "14478 non-null int32\n", - "dtypes: int32(1)\n", - "memory usage: 56.7 KB\n" - ] - } - ], + "outputs": [], "source": [ "animelist.isPremiered.info()" ] @@ -1264,35 +366,12 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "4f148b2d-cc2d-41f0-a638-5a91c19f0c40", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "studio\n", - "unknown 5934\n", - "Toei Animation 725\n", - "Sunrise 447\n", - "J.C.Staff 314\n", - "Madhouse 311\n", - " ... \n", - "Studio Junio, Annapuru 1\n", - "Tokyo Media Connections 1\n", - "Gainax, Tatsunoko Production 1\n", - "Fanworks, Imagineer 1\n", - "33 Collective 1\n", - "Name: count, Length: 779, dtype: int64" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "## calculate all the value from each studio\n", "\n", @@ -1304,761 +383,13 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "36ed0794-7ddb-4e31-a793-d8aaa680c01a", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "['Eiken',\n", - " 'Group TAC',\n", - " 'TNK',\n", - " 'Artland',\n", - " 'SynergySP',\n", - " '8bit',\n", - " 'Wit Studio',\n", - " 'Actas',\n", - " 'Manglobe',\n", - " 'Haoliners Animation League',\n", - " 'Ajia-Do',\n", - " 'MAPPA',\n", - " 'Studio Comet',\n", - " 'White Fox',\n", - " 'Mushi Production',\n", - " 'Studio Gokumi',\n", - " 'Hal Film Maker',\n", - " 'Tezuka Productions',\n", - " 'A.C.G.T.',\n", - " 'Asahi Production',\n", - " 'TYO Animations',\n", - " 'Gathering',\n", - " 'Tokyo Movie Shinsha',\n", - " 'Daume',\n", - " 'Kinema Citrus',\n", - " 'Polygon Pictures',\n", - " 'Nomad',\n", - " 'AIC A.S.T.A.',\n", - " 'T-Rex',\n", - " 'LIDENFILMS',\n", - " 'Magic Bus',\n", - " 'Studio Jam',\n", - " 'Bee Train',\n", - " 'GoHands',\n", - " 'Production IMS',\n", - " 'Trigger',\n", - " 'David Production',\n", - " 'Bandai Namco Pictures',\n", - " 'Telecom Animation Film',\n", - " 'Seven Arcs',\n", - " 'Office Takeout',\n", - " 'Asread',\n", - " 'Studio Fantasia',\n", - " 'Studio PuYUKAI',\n", - " 'RG Animation Studios',\n", - " 'dwarf',\n", - " 'AIC Plus+',\n", - " 'Seven Arcs Pictures',\n", - " 'Fanworks',\n", - " 'APPP',\n", - " 'Hoods Entertainment',\n", - " 'AT-2',\n", - " 'Sparkly Key Animation Studio',\n", - " 'Production I.G, Xebec',\n", - " 'Millepensee',\n", - " 'Y.O.U.C',\n", - " 'Shuka',\n", - " 'Flavors Soft',\n", - " 'Creators in Pack',\n", - " 'Animate Film',\n", - " 'ILCA',\n", - " 'Studio 9 MAiami',\n", - " 'Triangle Staff',\n", - " 'Radix',\n", - " 'Collaboration Works',\n", - " 'Suzuki Mirano',\n", - " 'CoMix Wave Films',\n", - " 'The Answer Studio',\n", - " 'Kanaban Graphics',\n", - " 'SANZIGEN',\n", - " 'EMT²',\n", - " 'Yumeta Company',\n", - " 'SANZIGEN, LIDENFILMS',\n", - " 'Passione',\n", - " 'Khara',\n", - " 'NAZ',\n", - " 'Bridge',\n", - " 'AIC Build',\n", - " 'Signal. MD',\n", - " 'Shanghai Foch Film Culture Investment',\n", - " 'Lay-duce',\n", - " 'Pie in The Sky',\n", - " 'Pierrot Plus',\n", - " 'G.CMay Animation & Film',\n", - " 'Shirogumi',\n", - " 'E&G Films',\n", - " 'G-Lam',\n", - " 'Pastel',\n", - " 'Studio 3Hz',\n", - " 'C-Station',\n", - " 'Studio Colorido',\n", - " 'Studio Eromatick',\n", - " 'Onionskin',\n", - " 'Phoenix Entertainment',\n", - " 'Silver Link., Connect',\n", - " 'Knack Productions',\n", - " 'M.S.C',\n", - " 'Digital Frontier',\n", - " 'Minakata Laboratory',\n", - " 'Fukushima Gainax',\n", - " 'Tokyo Kids',\n", - " 'Next Media Animation',\n", - " 'Xebec, OLM',\n", - " 'Yaoyorozu',\n", - " 'Trans Arts',\n", - " 'Project No.9',\n", - " 'Daewon Media',\n", - " 'Ashi Production',\n", - " 'feel., Zexcs',\n", - " 'Marza Animation Planet',\n", - " 'Pine Jam',\n", - " 'Studio 1st',\n", - " 'AIC Spirits',\n", - " 'Tatsunoko Production, Dongwoo A&E',\n", - " 'Palm Studio',\n", - " 'Graphinica',\n", - " 'Gakken Eigakyoku',\n", - " 'Telescreen BV',\n", - " 'Zero-G',\n", - " 'Gainax, Shaft',\n", - " 'BreakBottle',\n", - " 'PrimeTime',\n", - " 'Vega Entertainment',\n", - " 'Shogakukan Music & Digital Entertainment',\n", - " 'Shanghai Animation Film Studio',\n", - " 'Blue Cat',\n", - " 'Encourage Films',\n", - " 'Opera House',\n", - " 'Studio Rikka',\n", - " 'WAO World',\n", - " 'Pops Inc.',\n", - " 'C2C',\n", - " 'Satelight, A-1 Pictures',\n", - " 'AIC Classic',\n", - " 'pH Studio',\n", - " 'Ishikawa Pro',\n", - " 'Zero-G Room',\n", - " 'Usagi Ou',\n", - " 'AIC, Artmic',\n", - " 'Kamikaze Douga',\n", - " 'TYPHOON GRAPHICS',\n", - " 'Hoods Drifters Studio',\n", - " 'Office Take Off',\n", - " 'TROYCA',\n", - " 'Sunrise, Toei Animation',\n", - " 'Rising Force',\n", - " 'Shimogumi',\n", - " 'Shochiku Animation Institute',\n", - " '10Gauge',\n", - " 'Queen Bee',\n", - " 'AIC, Artmic, Animate Film',\n", - " 'Jinnis Animation Studios',\n", - " 'Milky Cartoon',\n", - " 'Imagin, Studio Live',\n", - " 'Egg',\n", - " 'Studio Deen, DAX Production',\n", - " 'Craftar',\n", - " 'Geno Studio',\n", - " 'Science SARU',\n", - " 'Artmic',\n", - " 'Millepensee, GEMBA',\n", - " 'L²Studio',\n", - " 'Planet',\n", - " 'Oh! Production',\n", - " 'Production I.G, Madhouse',\n", - " 'Xebec Zwei',\n", - " 'Production Reed, Asahi Production',\n", - " 'Studio Gokumi, AXsiZ',\n", - " 'OLM Digital',\n", - " 'Studio Gallop, Studio Deen',\n", - " 'Charaction',\n", - " 'Xebec, Issen',\n", - " 'Telecom Animation Film, Graphinica',\n", - " 'Ordet',\n", - " 'Studio Animal',\n", - " 'W-Toon Studio',\n", - " 'Nexus',\n", - " 'Artland, TNK',\n", - " 'Ascension',\n", - " 'Picture Magic',\n", - " 'Production I.G, M.S.C',\n", - " 'Chaos Project',\n", - " 'Gathering, Lesprit',\n", - " 'JCF',\n", - " 'TAKI Corporation',\n", - " 'Studio Egg',\n", - " 'Studio Pierrot, Pierrot Plus',\n", - " 'Studio Junio',\n", - " '2:10 Animation',\n", - " 'Madhouse, TMS Entertainment',\n", - " 'ChuChu',\n", - " 'Studio Matrix',\n", - " 'Vasoon Animation',\n", - " 'Topcraft',\n", - " 'Dongwoo A&E',\n", - " 'Trinet Entertainment, Picture Magic',\n", - " 'Steve N' Steven',\n", - " 'Studio A-CAT',\n", - " 'Ordet, LIDENFILMS',\n", - " 'Toei Animation, Studio World',\n", - " 'Yamamura Animation, Inc.',\n", - " 'Brain's Base, Marvy Jack',\n", - " 'Lilix',\n", - " 'Tsuchida Productions',\n", - " 'Gonzo, Asread',\n", - " 'Mili Pictures',\n", - " 'Bouncy',\n", - " 'A-1 Pictures, Ordet',\n", - " 'Madhouse, MAPPA',\n", - " 'Anime Antenna Iinkai',\n", - " 'Platinum Vision',\n", - " 'Majin',\n", - " 'A-1 Pictures, TROYCA',\n", - " 'Image House',\n", - " 'Life Work',\n", - " 'W-Toon Studio, DMM.futureworks',\n", - " 'Venet',\n", - " 'CG Year',\n", - " 'Oddjob',\n", - " 'HS Pictures Studio',\n", - " 'Agent 21',\n", - " 'NUT',\n", - " 'Creators Dot Com',\n", - " 'Studio Chizu',\n", - " 'OLM Digital, Signal. MD',\n", - " 'Sugar Boy, Blue Cat',\n", - " 'Remic',\n", - " 'Kenji Studio',\n", - " 'J.C.Staff, A.C.G.T.',\n", - " 'Gainax, Production I.G',\n", - " 'Barnum Studio, Project No.9',\n", - " 'Production I.G, Production Reed',\n", - " 'Tatsunoko Production, SynergySP',\n", - " 'Studio Sign',\n", - " 'A-Real',\n", - " 'Pollyanna Graphics',\n", - " 'Bones, Kinema Citrus',\n", - " 'Echoes',\n", - " 'Kyotoma',\n", - " 'Square Enix',\n", - " 'Tomovies',\n", - " 'Madhouse, Toei Animation',\n", - " 'Satelight, 8bit',\n", - " 'Satelight, ixtl',\n", - " 'Natural High',\n", - " 'Hotline',\n", - " 'Team YokkyuFuman',\n", - " 'An DerCen',\n", - " 'ACC Production',\n", - " 'Sunrise, Studio Deen',\n", - " 'Studio Blanc',\n", - " 'TUBA',\n", - " 'Jumondo',\n", - " 'Shirogumi, Encourage Films',\n", - " 'Oxybot',\n", - " 'Tomason',\n", - " 'Sanrio',\n", - " 'Sotsu',\n", - " 'Studio Fantasia, Animate Film',\n", - " 'Eiken, Studio Live',\n", - " 'Puzzle Animation Studio Limited',\n", - " 'Joker Films',\n", - " 'MMDGP',\n", - " 'Karaku',\n", - " 'Qualia Animation',\n", - " 'Trinet Entertainment',\n", - " 'Schoolzone',\n", - " 'Tatsunoko Production, Ordet',\n", - " 'domerica',\n", - " 'Thundray',\n", - " 'Toei Animation, Production Reed',\n", - " 'Kyoto Animation, Animation Do',\n", - " 'Calf Studio',\n", - " 'D & D Pictures',\n", - " 'Hoods Entertainment, teamKG',\n", - " 'Telecom Animation Film, Shirogumi',\n", - " 'DR Movie',\n", - " 'Kyoto Animation, Tatsunoko Production',\n", - " 'Triple X',\n", - " 'Actas, Studio 3Hz',\n", - " 'Studio Ghibli, Studio Hibari',\n", - " 'Sunrise, Bandai Namco Pictures',\n", - " 'Arms, TNK',\n", - " 'Himajin Planning',\n", - " 'Pink Pineapple',\n", - " 'Ordet, Millepensee',\n", - " 'Tatsunoko Production, Studio World',\n", - " 'Light Chaser Animation Studios',\n", - " 'Actas, SynergySP',\n", - " 'Digital Media Lab',\n", - " 'Toei Video',\n", - " 'Orange',\n", - " 'Madhouse, Studio 4°C',\n", - " 'drop',\n", - " 'Gainax, feel.',\n", - " 'Production I.G, Xebec, OLM',\n", - " 'WAO World, MooGoo',\n", - " 'KOO-KI',\n", - " 'Circle Tribute',\n", - " 'SELFISH',\n", - " 'LandQ studios',\n", - " 'Gainax, J.C.Staff',\n", - " 'Imagin',\n", - " 'Studio Binzo',\n", - " 'Haoliners Animation League, Pb Animation Co. Ltd.',\n", - " 'Sting Ray',\n", - " 'Gainax, Asahi Production',\n", - " 'Studio Rikka, Purple Cow Studio Japan',\n", - " 'TMS Entertainment, Telecom Animation Film',\n", - " 'Radix, Marine Entertainment',\n", - " 'Moss Design Unit',\n", - " 'Production I.G, Trans Arts',\n", - " 'KAGAYA Studio',\n", - " 'Haoliners Animation League, Studio LAN',\n", - " 'Kokusai Eigasha',\n", - " 'GARDEN LODGE',\n", - " 'Kinema Citrus, EMT²',\n", - " 'Tezuka Productions, MAPPA',\n", - " 'Studio Comet, KeyEast, REALTHING',\n", - " 'Anpro',\n", - " 'Madhouse, Imagin',\n", - " 'Tengu Kobo',\n", - " 'Studio Zero',\n", - " 'Satelight, C2C',\n", - " 'Kinema Citrus, Orange',\n", - " 'J.C.Staff, Artland',\n", - " 'Visual 80',\n", - " 'Studio Live',\n", - " 'Plum',\n", - " 'J.C.Staff, Egg Firm',\n", - " 'Three-d',\n", - " 'MAPPA, Studio VOLN',\n", - " 'Kazami Gakuen Koushiki Douga-bu',\n", - " 'Gonzo, Production I.G',\n", - " 'Robot Communications',\n", - " 'Studio! Cucuri',\n", - " 'Madhouse, Telecom Animation Film',\n", - " 'M2',\n", - " 'PPM',\n", - " 'Artland, Tatsunoko Production',\n", - " 'A-1 Pictures, Bridge',\n", - " 'Studio Flag',\n", - " 'Studio VOLN',\n", - " 'Madhouse, Production Reed',\n", - " 'Neft Film',\n", - " 'production doA',\n", - " 'Wawayu Animation',\n", - " 'Production I.G, Zexcs',\n", - " 'Larx Entertainment',\n", - " 'TMS Entertainment, Jinnis Animation Studios',\n", - " 'Gonzo, AIC',\n", - " 'AIC Frontier',\n", - " 'Bridge, Husio Studio',\n", - " 'Studio Pierrot, Shin-Ei Animation',\n", - " 'Manglobe, Geno Studio',\n", - " 'Ekura Animal',\n", - " 'Production IMS, Orange',\n", - " 'Gonzo, Palm Studio',\n", - " 'SBS TV Production',\n", - " 'Studio Pierrot, D.A.S.T.',\n", - " 'CloverWorks',\n", - " 'Gainax, Magic Bus',\n", - " 'Media Bank',\n", - " 'Madhouse, Tatsunoko Production',\n", - " 'Studio Flad',\n", - " 'Production I.G, OLM, Signal. MD',\n", - " 'Bones, Production Reed',\n", - " 'Studio Kikan',\n", - " 'Karasfilms',\n", - " 'Studio Unicorn, Hiro Media',\n", - " 'OLM, Production Reed',\n", - " 'AIC Spirits, BeSTACK',\n", - " 'MAPPA, Lapin Track',\n", - " 'Xebec, Genco, Triangle Staff',\n", - " 'Studio Pierrot, AIC A.S.T.A.',\n", - " 'TMS Entertainment, Tezuka Productions',\n", - " 'Artland, Nippon Animation',\n", - " 'ixtl, LIDENFILMS',\n", - " 'Tomoyasu Murata Company',\n", - " 'Chippai',\n", - " 'Dynamo Pictures',\n", - " 'Gainax, Toei Animation',\n", - " 'Group TAC, View Works',\n", - " 'NHK',\n", - " 'Shinkuukan',\n", - " 'Daichi Doga, Dongyang Animation',\n", - " 'Odolttogi',\n", - " 'Nice Boat Animation',\n", - " 'EDGE',\n", - " 'Public & Basic, Ripple Film',\n", - " 'TNK, Production Reed',\n", - " 'Actas, TMS Entertainment',\n", - " 'REVOROOT',\n", - " 'DAX Production',\n", - " 'Bibury Animation Studios',\n", - " 'Arms, Studio Kikan',\n", - " 'Actas, Bee Media',\n", - " 'Studio Flag, Studio Bogey',\n", - " 'Kitty Films',\n", - " 'Production I.G, SANZIGEN',\n", - " 'Tama Production',\n", - " 'Lerche, 10Gauge',\n", - " 'Chaos Project, GANSIS',\n", - " 'Seven Arcs, Seven Arcs Pictures',\n", - " 'A-1 Pictures, CloverWorks',\n", - " 'Studio Bogey, Public & Basic',\n", - " 'Artland, Madhouse',\n", - " 'AIC Spirits, Asread',\n", - " 'Barnum Studio, Project No.9, Studio Blanc',\n", - " 'Ajia-Do, Group TAC',\n", - " 'Tatsunoko Production, 10Gauge',\n", - " 'Heewon Entertainment',\n", - " 'AIC, Studio Hakk',\n", - " 'Studio Deen, Wit Studio',\n", - " 'Ajia-Do, Studio Deen',\n", - " 'Big Bang',\n", - " 'Madhouse, DLE',\n", - " 'Anime R, Aubec',\n", - " 'Fuji TV',\n", - " 'Madhouse, Tezuka Productions',\n", - " 'Tsukimidou',\n", - " 'Sparky Animation',\n", - " 'Artland, Magic Bus',\n", - " 'Sunrise, Nakamura Production',\n", - " 'Plum, Magic Bus',\n", - " 'Group TAC, Amuse',\n", - " 'J.C.Staff, Animate Film',\n", - " 'Production I.G, Animate Film',\n", - " 'Darts',\n", - " 'Production I.G, DLE',\n", - " 'Production I.G, NUT, REVOROOT',\n", - " 'Production I.G, Polygon Pictures',\n", - " 'Studio Meditation With a Pencil',\n", - " 'Panda Factory, Studio PuYUKAI',\n", - " 'Bee Train, Production I.G, Madhouse, Studio 4°C',\n", - " 'Picograph',\n", - " 'Tatsunoko Production, SynergySP, Seven',\n", - " 'Romanov Films',\n", - " 'T.P.O',\n", - " 'Shueisha',\n", - " 'AXsiZ',\n", - " 'Madhouse, Studio Deen',\n", - " 'D.A.S.T.',\n", - " 'Khara, Trigger',\n", - " 'Tsuburaya Productions, Trigger',\n", - " 'TriF Studio',\n", - " 'Studio LAN',\n", - " 'G-angle',\n", - " 'Satelight, Production Reed',\n", - " 'Shaft, TNK',\n", - " 'AIC Spirits, Digital Frontier',\n", - " 'Studio Deen, Daume',\n", - " 'Studio Deen, Studio Hibari, Production Reed',\n", - " 'Production I.G, OLM',\n", - " 'Eiken, TYO Animations',\n", - " 'Zexcs, Studio A-CAT',\n", - " 'Asahi Production, Rising Force',\n", - " 'Aubec',\n", - " 'Studio Pierrot, Ajia-Do',\n", - " 'Front Line, Studio G-1Neo',\n", - " 'Studio Korumi',\n", - " 'Studio Gallop, TMS Entertainment',\n", - " 'Animate Film, Studio Kikan',\n", - " 'Yokohama Animation Lab',\n", - " 'Felix Film',\n", - " 'Sunrise, Studio Hibari',\n", - " 'AIC, APPP',\n", - " 'Toei Animation, TMS Entertainment',\n", - " 'Studio Anima',\n", - " 'View Works',\n", - " 'Kamikaze Douga, Nishiki Studio',\n", - " 'J.C.Staff, Nomad',\n", - " 'TMS Entertainment, Studio Comet',\n", - " 'Mook Animation',\n", - " 'OLM, Wit Studio',\n", - " 'Toei Animation, Studio Nue',\n", - " 'J.C.Staff, Tatsunoko Production',\n", - " 'Bee Media, Code',\n", - " 'Artland, Hoods Entertainment',\n", - " 'Nippon Animation, Production Reed',\n", - " 'AIC, Animate Film',\n", - " 'Gainax, Studio 4°C',\n", - " 'A-1 Pictures, Trigger, CloverWorks',\n", - " 'Tatsunoko Production, Production Reed, Asahi Production',\n", - " 'Panmedia, Meruhensha',\n", - " 'Actas, Brain's Base',\n", - " 'Dwango, LIDENFILMS',\n", - " 'J.C.Staff, Production I.G',\n", - " 'OLM, OLM Digital, Sprite Animation Studios',\n", - " 'Bee Media, 81 Produce',\n", - " 'AIC, Darts',\n", - " 'Bee Train, Xebec',\n", - " 'Sakura Create',\n", - " 'TMS Entertainment, 3xCube',\n", - " 'Ishimori Entertainment',\n", - " 'Studio Core',\n", - " 'Robot Communications, Imagica Image Works',\n", - " 'Xebec, AIC',\n", - " 'Kaeruotoko Shokai',\n", - " 'Idea Factory',\n", - " 'J.C.Staff, Production Reed',\n", - " 'KIZAWA Studio',\n", - " 'Doga Kobo, Orange',\n", - " 'BOOTLEG',\n", - " 'Office DCI',\n", - " 'Ripple Film',\n", - " 'Studio UGOKI',\n", - " 'Madhouse, Studio Fantasia',\n", - " 'Nippon Animation, Studio WHO',\n", - " 'PRA',\n", - " 'Tamura Shigeru Studio',\n", - " 'Animaruya',\n", - " 'Bones, Sanrio',\n", - " 'Trinet Entertainment, Studio Hibari',\n", - " 'AIC Spirits, Group TAC',\n", - " 'Mirai Film',\n", - " 'Studio Ponoc',\n", - " 'Namu Animation',\n", - " 'Shirogumi, Shin-Ei Animation',\n", - " 'Coastline Animation Studio',\n", - " 'Cygames',\n", - " 'Madhouse, Group TAC',\n", - " 'Oz',\n", - " 'Emon, Blade',\n", - " 'Milky Animation Label',\n", - " 'Xebec, Asread',\n", - " 'Studio Ghibli, Ajia-Do',\n", - " 'J.C.Staff, Studio Ghibli',\n", - " 'Kyoto Animation, Sunrise',\n", - " 'EKACHI EPILKA',\n", - " 'Imagineer',\n", - " 'Genco, Radix',\n", - " 'TMS Entertainment, DLE',\n", - " 'Studio Take Off',\n", - " 'Studio Pierrot, Arms',\n", - " 'G-Lam, Studio CA',\n", - " 'Sunrise, Production Reed',\n", - " 'Ajia-Do, TMS Entertainment',\n", - " 'TNK, Zero-G',\n", - " 'J.C.Staff, Kitty Films',\n", - " 'Sunrise, Kino Production',\n", - " 'Diomedea, Studio Blanc',\n", - " 'Triangle Staff, Studio Wombat',\n", - " 'Bones, Telecom Animation Film',\n", - " 'Satelight, Encourage Films',\n", - " 'Kyoto Animation, Production I.G, Shin-Ei Animation',\n", - " 'Studio World',\n", - " 'Studio Kyuuma, Studio Kikan, Azeta Pictures',\n", - " 'Tatsunoko Production, CoMix Wave Films, FOREST Hunting One',\n", - " 'Think Corporation',\n", - " 'ufotable, feel., Studio Flag',\n", - " 'Future Planet, Beijing Huihuang Animation Company',\n", - " 'Artland, AIC, Artmic',\n", - " 'Madhouse, Tokyo Movie Shinsha',\n", - " 'Studio Pierrot, Kyoto Animation',\n", - " 'Studio Comet, Zexcs',\n", - " 'Japan Taps',\n", - " 'Ginga Ya',\n", - " 'Brain's Base, Studio A-CAT',\n", - " 'Office AO',\n", - " 'Shirogumi, EMT²',\n", - " 'Tokyo Kids, Minami Machi Bugyousho',\n", - " 'Gathering, Asahi Production',\n", - " 'AIC, Remic',\n", - " 'Studio 4°C, Sunrise',\n", - " 'Ordet, SANZIGEN',\n", - " 'Green Bunny',\n", - " 'Ordet, Encourage Films',\n", - " 'GRIZZLY',\n", - " 'Twilight Studio',\n", - " 'Enoki Films, Dai Nippon Printing',\n", - " 'Steve N' Steven, Rockwell Eyes',\n", - " 'Studio Ghibli, Polygon Pictures',\n", - " 'Khara, CyberConnect2',\n", - " 'Panda Factory',\n", - " 'Plus Heads',\n", - " 'Studio Fantasia, Amber Film Works',\n", - " 'Animate Film, Visual 80',\n", - " 'Marvy Jack',\n", - " 'Anpro, teamKG',\n", - " 'Asahi Production, Shochiku Animation Institute',\n", - " 'Madhouse, Studio Deen, Magic Bus',\n", - " 'Studio Pierrot, Studio Gallop',\n", - " 'Minami Machi Bugyousho',\n", - " 'Xebec, Kanaban Graphics',\n", - " 'Creators in Pack, Namu Animation',\n", - " 'Studio Pierrot, David Production',\n", - " 'Toei Animation, Tatsunoko Production',\n", - " 'OLM, P.A. Works',\n", - " 'Studio Moriken',\n", - " 'Studio OX',\n", - " 'Xebec, Production Reed',\n", - " 'Chiptune',\n", - " 'Shin-Ei Animation, DLE',\n", - " 'Sunrise, Telecom Animation Film',\n", - " 'Studio Deen, Studio Hibari',\n", - " 'Tatsunoko Production, Eiken',\n", - " 'C2C, Lay-duce',\n", - " 'Project No.9, Tri-Slash',\n", - " 'Group TAC, G&G Entertainment',\n", - " 'helo.inc',\n", - " 'LMD',\n", - " 'Toei Animation, Daewon Media',\n", - " 'J.C.Staff, Toei Animation',\n", - " 'Milky Cartoon, LMD',\n", - " 'Hal Film Maker, TYO Animations',\n", - " 'Studio Pierrot, Studio Hibari',\n", - " 'Madhouse, Nexus',\n", - " 'Asura Film',\n", - " 'Picona',\n", - " 'TMS Entertainment, TOCSIS',\n", - " 'Front Line',\n", - " 'Bee Media',\n", - " 'J.C.Staff, SANZIGEN',\n", - " 'Madhouse, TNK',\n", - " 'Will Palette',\n", - " 'Group TAC, Japan Vistec',\n", - " 'HeART-BIT',\n", - " 'TNK, Kinema Citrus',\n", - " 'CoMix Wave Films, Haoliners Animation League',\n", - " 'Tonko House',\n", - " 'Bee Train, Cookie Jar Entertainment',\n", - " 'SOEISHINSHA',\n", - " 'Xebec, Group TAC',\n", - " 'Ordet, Studio Moriken',\n", - " 'Rabbit Machine',\n", - " 'NHK Enterprises',\n", - " 'Studio 4°C, Shirogumi',\n", - " 'Beijing Rocen Digital',\n", - " 'Primastea',\n", - " 'J.C.Staff, Life Work',\n", - " 'Mook DLE',\n", - " 'Tele-Cartoon Japan',\n", - " 'Indeprox',\n", - " 'Studio Pierrot, Studio Deen',\n", - " 'Production I.G, Studio Deen',\n", - " 'Kinema Citrus, White Fox',\n", - " 'OLM, AIC A.S.T.A.',\n", - " 'Nippon Animation, Studio Gallop',\n", - " 'Silver Link., Nexus',\n", - " 'A-Line',\n", - " 'Picture Magic, Rikuentai',\n", - " 'Production I.G, Studio 4°C, Shaft',\n", - " 'Triangle Staff, Animate Film',\n", - " 'G&G Entertainment',\n", - " 'AIC Takarazuka',\n", - " 'Orange, Seven Arcs Pictures',\n", - " 'Buemon',\n", - " 'Kazuki Production',\n", - " 'Gonzo, DandeLion Animation Studio LLC',\n", - " 'Blade',\n", - " 'Toei Animation, Bridge',\n", - " 'CUCURI, Digital Network Animation',\n", - " 'Studio Unicorn',\n", - " 'MooGoo',\n", - " 'Takara Tomy A.R.T.S',\n", - " 'OLM, Shin-Ei Animation',\n", - " 'Arcs Create',\n", - " 'Usagi Ou, Studio Nanahoshi',\n", - " 'Actas, Production Reed',\n", - " 'Azeta Pictures',\n", - " 'Sunrise, Ascension',\n", - " 'Studio Khronos',\n", - " 'Iyasakadou Film',\n", - " 'Studio Z5, Studio March',\n", - " 'Studio G-1Neo',\n", - " 'TOHO animation',\n", - " 'Artmic, Animate Film',\n", - " 'Madhouse, feel.',\n", - " 'Echo',\n", - " 'Radix, Chaos Project',\n", - " 'Bones, Sunrise',\n", - " 'Gainax, Studio Deen',\n", - " 'Orange, Studio 3Hz',\n", - " 'Gainax, Fukushima Gainax',\n", - " 'Madhouse, Studio Gokumi',\n", - " 'iDRAGONS Creative Studio',\n", - " 'Group TAC, Ginga Ya',\n", - " 'October Media',\n", - " 'Boyan Pictures',\n", - " 'Nippon Animation, Xebec',\n", - " 'Piso Studio',\n", - " 'B&T',\n", - " 'Bones, Production I.G, Studio 4°C, Toei Animation',\n", - " 'Gonzo, Satelight',\n", - " 'Ordet, W-Toon Studio',\n", - " 'Fifth Avenue',\n", - " 'Sunwoo Entertainment',\n", - " 'Studio Hibari, Production Reed',\n", - " 'Cyclone Graphics inc',\n", - " 'Automatic Flowers Studio',\n", - " 'AIC, Artmic, Darts',\n", - " 'TYO Animations, LIDENFILMS',\n", - " 'Gainax, Madhouse',\n", - " 'Hoods Entertainment, Production IMS',\n", - " 'pH Studio, D & D Pictures',\n", - " 'CoMix Wave Films, FOREST Hunting One',\n", - " 'Nippon Animation, SynergySP, Shirogumi',\n", - " 'Barnum Studio, Silver Link., Connect',\n", - " 'Gonzo, DLE',\n", - " 'Brain's Base, Platinum Vision',\n", - " 'Japan Vistec',\n", - " 'Studio Gallop, Studio Comet',\n", - " 'Ripromo',\n", - " 'Yamato Works',\n", - " 'Studio Zealot',\n", - " 'Strawberry Meets Pictures',\n", - " 'Kyotoma, Office Nobu',\n", - " 'Creators in Pack, Studio Lings',\n", - " 'Gonzo, Picture Magic',\n", - " 'Asahi Production, Success Co.',\n", - " 'Nippon Animation, Shaft',\n", - " 'Genco, Triangle Staff',\n", - " 'Gonzo, Gainax, Production I.G, Madhouse, Studio 4°C, Satelight, CoMix Wave Films',\n", - " 'Mary Jane',\n", - " 'AIC, BeSTACK',\n", - " 'Project No.9, A-Real',\n", - " 'Studio Ppuri',\n", - " 'Studio Dadashow',\n", - " 'TMS Entertainment, Shin-Ei Animation',\n", - " 'Gonzo, ufotable',\n", - " 'Studio Fantasia, Rabbit Gate',\n", - " 'Sunrise, Dongwoo A&E',\n", - " 'Piko Studio',\n", - " 'Studio Wombat',\n", - " 'Kaname Productions',\n", - " 'Rockwell Eyes',\n", - " 'OLM, OLM Digital',\n", - " 'Madhouse, Satelight, Graphinica',\n", - " 'TMS Entertainment, Tokyo Kids, Minami Machi Bugyousho',\n", - " 'Sunrise, Bandai Visual',\n", - " 'Studio Bogey',\n", - " 'GEN Productions',\n", - " 'Studio Junio, Annapuru',\n", - " 'Tokyo Media Connections',\n", - " 'Gainax, Tatsunoko Production',\n", - " 'Fanworks, Imagineer',\n", - " '33 Collective']" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# group studio less than 40\n", "minor = studio_counts[studio_counts < 40].index.to_list()\n", @@ -2067,67 +398,13 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "222dcfbf-45f6-40c0-89c6-84a012fc6955", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "studio\n", - "unknown 5934\n", - "SmallStudio 3028\n", - "Toei Animation 725\n", - "Sunrise 447\n", - "J.C.Staff 314\n", - "Madhouse 311\n", - "Production I.G 251\n", - "TMS Entertainment 248\n", - "Studio Deen 241\n", - "Studio Pierrot 240\n", - "Nippon Animation 202\n", - "OLM 181\n", - "A-1 Pictures 174\n", - "Shin-Ei Animation 151\n", - "DLE 139\n", - "Tatsunoko Production 131\n", - "Shaft 111\n", - "Gonzo 109\n", - "Xebec 109\n", - "Bones 109\n", - "Kyoto Animation 103\n", - "AIC 98\n", - "Brain's Base 80\n", - "Silver Link. 74\n", - "Satelight 71\n", - "Arms 69\n", - "Production Reed 64\n", - "Doga Kobo 63\n", - "Studio 4°C 59\n", - "Gainax 59\n", - "ufotable 58\n", - "Zexcs 57\n", - "Seven 54\n", - "feel. 53\n", - "Kachidoki Studio 50\n", - "Studio Ghibli 48\n", - "Studio Gallop 47\n", - "Diomedea 46\n", - "Studio Hibari 45\n", - "Lerche 42\n", - "PoRO 42\n", - "P.A. Works 41\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "## combine those minor studio to one \"SmallStudio\"\n", "animelist[\"studio\"] = animelist[\"studio\"].apply(lambda x : \"SmallStudio\" if x in minor else x)\n", @@ -2155,7 +432,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "6659bc9c-131b-4170-9c77-7255e999f5dc", "metadata": { "tags": [] @@ -2169,23 +446,12 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "b3efa3eb-0aec-4e3b-8aec-75245e675554", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "(14478, 19)" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "## after dropping the columns \n", "animelist.shape" @@ -2193,26 +459,12 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "cb3f140d-af48-4231-b90c-2d6d1c943c9a", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['title', 'type', 'source', 'episodes', 'status', 'airing', 'aired',\n", - " 'duration', 'rating', 'score', 'scored_by', 'rank', 'popularity',\n", - " 'members', 'favorites', 'related', 'studio', 'genre', 'isPremiered'],\n", - " dtype='object')" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "animelist.columns" ] @@ -2238,33 +490,12 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "8e8540f6-e1bb-4ea3-9eec-3e9226ec6285", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " aired_from aired_to\n", - "0 2012-01-13 2012-03-30\n", - "1 2007-04-02 2007-10-01\n", - "2 2008-10-04 2009-09-25\n", - "3 2002-08-16 2003-05-23\n", - "4 2012-10-06 2013-03-30\n", - "... ... ...\n", - "14473 1987-11-05 1988-11-04\n", - "14474 1986-03-21 1986-03-21\n", - "14475 NaN NaN\n", - "14476 NaN NaN\n", - "14477 2010-04-07 2010-04-07\n", - "\n", - "[14478 rows x 2 columns]\n" - ] - } - ], + "outputs": [], "source": [ "# Splitting the 'aired' column into 'from' and 'to' columns\n", "animelist[['aired_from', 'aired_to']] = animelist['aired'].str.extract(r\"'from': '(.*?)', 'to': '(.*?)'\")\n", @@ -2275,47 +506,12 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "889ad54e-ded1-4841-9949-09b118872b0f", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 14478 entries, 0 to 14477\n", - "Data columns (total 21 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 title 14478 non-null object \n", - " 1 type 14478 non-null object \n", - " 2 source 14478 non-null object \n", - " 3 episodes 14478 non-null int64 \n", - " 4 status 14478 non-null object \n", - " 5 airing 14478 non-null bool \n", - " 6 aired 14478 non-null object \n", - " 7 duration 14478 non-null object \n", - " 8 rating 13934 non-null object \n", - " 9 score 14478 non-null float64\n", - " 10 scored_by 14478 non-null int64 \n", - " 11 rank 12904 non-null float64\n", - " 12 popularity 14478 non-null int64 \n", - " 13 members 14478 non-null int64 \n", - " 14 favorites 14478 non-null int64 \n", - " 15 related 14478 non-null object \n", - " 16 studio 14478 non-null object \n", - " 17 genre 14414 non-null object \n", - " 18 isPremiered 14478 non-null int32 \n", - " 19 aired_from 12287 non-null object \n", - " 20 aired_to 12287 non-null object \n", - "dtypes: bool(1), float64(2), int32(1), int64(5), object(12)\n", - "memory usage: 2.2+ MB\n" - ] - } - ], + "outputs": [], "source": [ "animelist.info()" ] @@ -2333,7 +529,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "ded21918-e004-4518-b1a6-5a1f6e0a678e", "metadata": { "tags": [] @@ -2346,374 +542,12 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "8144eaa7-b876-4390-b6da-12d1256c02bf", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AdventureCarsComedyDementiaDemonsDramaEcchiFantasyGameHarem...ShoujoShounenSlice of LifeSpaceSportsSuper PowerSupernaturalThrillerVampireYaoi
00000000000...0000000000
10000000000...0000000000
20000000000...0000000000
30000010100...0000000000
40000010000...0000000000
..................................................................
144730000000000...0000000000
144740000000000...0000000000
144751000000100...0000000000
144760000000000...0000000000
144770000000000...0000000000
\n", - "

14478 rows × 83 columns

\n", - "
" - ], - "text/plain": [ - " Adventure Cars Comedy Dementia Demons Drama Ecchi \\\n", - "0 0 0 0 0 0 0 0 \n", - "1 0 0 0 0 0 0 0 \n", - "2 0 0 0 0 0 0 0 \n", - "3 0 0 0 0 0 1 0 \n", - "4 0 0 0 0 0 1 0 \n", - "... ... ... ... ... ... ... ... \n", - "14473 0 0 0 0 0 0 0 \n", - "14474 0 0 0 0 0 0 0 \n", - "14475 1 0 0 0 0 0 0 \n", - "14476 0 0 0 0 0 0 0 \n", - "14477 0 0 0 0 0 0 0 \n", - "\n", - " Fantasy Game Harem ... Shoujo Shounen Slice of Life Space \\\n", - "0 0 0 0 ... 0 0 0 0 \n", - "1 0 0 0 ... 0 0 0 0 \n", - "2 0 0 0 ... 0 0 0 0 \n", - "3 1 0 0 ... 0 0 0 0 \n", - "4 0 0 0 ... 0 0 0 0 \n", - "... ... ... ... ... ... ... ... ... \n", - "14473 0 0 0 ... 0 0 0 0 \n", - "14474 0 0 0 ... 0 0 0 0 \n", - "14475 1 0 0 ... 0 0 0 0 \n", - "14476 0 0 0 ... 0 0 0 0 \n", - "14477 0 0 0 ... 0 0 0 0 \n", - "\n", - " Sports Super Power Supernatural Thriller Vampire Yaoi \n", - "0 0 0 0 0 0 0 \n", - "1 0 0 0 0 0 0 \n", - "2 0 0 0 0 0 0 \n", - "3 0 0 0 0 0 0 \n", - "4 0 0 0 0 0 0 \n", - "... ... ... ... ... ... ... \n", - "14473 0 0 0 0 0 0 \n", - "14474 0 0 0 0 0 0 \n", - "14475 0 0 0 0 0 0 \n", - "14476 0 0 0 0 0 0 \n", - "14477 0 0 0 0 0 0 \n", - "\n", - "[14478 rows x 83 columns]" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "## split the genres by the parameter ','\n", "genre_animelist = animelist['genre'].str.get_dummies(sep=',')\n", @@ -2722,121 +556,233 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "d4491bb5-83cc-4675-8496-cf17b48f969e", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 14478 entries, 0 to 14477\n", - "Data columns (total 83 columns):\n", - " # Column Non-Null Count Dtype\n", - "--- ------ -------------- -----\n", - " 0 Adventure 14478 non-null int64\n", - " 1 Cars 14478 non-null int64\n", - " 2 Comedy 14478 non-null int64\n", - " 3 Dementia 14478 non-null int64\n", - " 4 Demons 14478 non-null int64\n", - " 5 Drama 14478 non-null int64\n", - " 6 Ecchi 14478 non-null int64\n", - " 7 Fantasy 14478 non-null int64\n", - " 8 Game 14478 non-null int64\n", - " 9 Harem 14478 non-null int64\n", - " 10 Hentai 14478 non-null int64\n", - " 11 Historical 14478 non-null int64\n", - " 12 Horror 14478 non-null int64\n", - " 13 Josei 14478 non-null int64\n", - " 14 Kids 14478 non-null int64\n", - " 15 Magic 14478 non-null int64\n", - " 16 Martial Arts 14478 non-null int64\n", - " 17 Mecha 14478 non-null int64\n", - " 18 Military 14478 non-null int64\n", - " 19 Music 14478 non-null int64\n", - " 20 Mystery 14478 non-null int64\n", - " 21 Parody 14478 non-null int64\n", - " 22 Police 14478 non-null int64\n", - " 23 Psychological 14478 non-null int64\n", - " 24 Romance 14478 non-null int64\n", - " 25 Samurai 14478 non-null int64\n", - " 26 School 14478 non-null int64\n", - " 27 Sci-Fi 14478 non-null int64\n", - " 28 Seinen 14478 non-null int64\n", - " 29 Shoujo 14478 non-null int64\n", - " 30 Shoujo Ai 14478 non-null int64\n", - " 31 Shounen 14478 non-null int64\n", - " 32 Shounen Ai 14478 non-null int64\n", - " 33 Slice of Life 14478 non-null int64\n", - " 34 Space 14478 non-null int64\n", - " 35 Sports 14478 non-null int64\n", - " 36 Super Power 14478 non-null int64\n", - " 37 Supernatural 14478 non-null int64\n", - " 38 Thriller 14478 non-null int64\n", - " 39 Vampire 14478 non-null int64\n", - " 40 Yaoi 14478 non-null int64\n", - " 41 Yuri 14478 non-null int64\n", - " 42 Action 14478 non-null int64\n", - " 43 Adventure 14478 non-null int64\n", - " 44 Cars 14478 non-null int64\n", - " 45 Comedy 14478 non-null int64\n", - " 46 Dementia 14478 non-null int64\n", - " 47 Demons 14478 non-null int64\n", - " 48 Drama 14478 non-null int64\n", - " 49 Ecchi 14478 non-null int64\n", - " 50 Fantasy 14478 non-null int64\n", - " 51 Game 14478 non-null int64\n", - " 52 Harem 14478 non-null int64\n", - " 53 Hentai 14478 non-null int64\n", - " 54 Historical 14478 non-null int64\n", - " 55 Horror 14478 non-null int64\n", - " 56 Josei 14478 non-null int64\n", - " 57 Kids 14478 non-null int64\n", - " 58 Magic 14478 non-null int64\n", - " 59 Martial Arts 14478 non-null int64\n", - " 60 Mecha 14478 non-null int64\n", - " 61 Military 14478 non-null int64\n", - " 62 Music 14478 non-null int64\n", - " 63 Mystery 14478 non-null int64\n", - " 64 NA 14478 non-null int64\n", - " 65 Parody 14478 non-null int64\n", - " 66 Police 14478 non-null int64\n", - " 67 Psychological 14478 non-null int64\n", - " 68 Romance 14478 non-null int64\n", - " 69 Samurai 14478 non-null int64\n", - " 70 School 14478 non-null int64\n", - " 71 Sci-Fi 14478 non-null int64\n", - " 72 Seinen 14478 non-null int64\n", - " 73 Shoujo 14478 non-null int64\n", - " 74 Shounen 14478 non-null int64\n", - " 75 Slice of Life 14478 non-null int64\n", - " 76 Space 14478 non-null int64\n", - " 77 Sports 14478 non-null int64\n", - " 78 Super Power 14478 non-null int64\n", - " 79 Supernatural 14478 non-null int64\n", - " 80 Thriller 14478 non-null int64\n", - " 81 Vampire 14478 non-null int64\n", - " 82 Yaoi 14478 non-null int64\n", - "dtypes: int64(83)\n", - "memory usage: 9.2 MB\n" - ] - } - ], + "outputs": [], "source": [ "genre_animelist.info()" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "0087eef3-cc59-46a2-86af-39ae8a00ef1b", "metadata": { "tags": [] }, + "outputs": [], + "source": [ + "## combining the animelist data and genre data into animelist_df\n", + "animelist_df = pd.concat([animelist, genre_animelist], axis=1)\n", + "animelist_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "931de186-0984-4b46-af39-4b5659f3e44b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "## remove genre columns \n", + "animelist_df.drop(columns=[\"genre\"], inplace=True)\n", + "animelist_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0ef0721-d79a-4b14-a125-c04e1ac95ce3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "animelist_df.info()" + ] + }, + { + "cell_type": "markdown", + "id": "98116925-f9c5-4902-8807-1ac623323955", + "metadata": { + "tags": [] + }, + "source": [ + "### Check value contain any NULL\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcf0cf61-2ef7-486f-b5ac-6f1f1d08df0e", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "# let's make sure no null values\n", + "for col in animelist_df:\n", + " print(f\" {col} | has ({animelist_df[col].isnull().sum()})\")\n", + " \n", + " \n", + "## below table we can see:\n", + "## rating have 544 'Nan'\n", + "## rank have 1574 'Nan'\n", + "## aired_from have 2191 'Nan'\n", + "## aired-to have 2191 'Nan'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37ef5d53-7cfb-41e4-bb2c-c659ce5fd4ec", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "## count the total for each rating\n", + "animelist_df.rating.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32af0567-3583-49f5-90f7-d3a159d7a7bb", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "## ensure the rating is at least PG13\n", + "animelist_df['rating'].fillna(\"G - All Ages\",inplace=True)\n", + "\n", + "## convert the rank to the max rank (prevent skewness)\n", + "animelist_df['rank'].fillna(animelist_df['rank'].max(), inplace=True)\n", + "\n", + "## convert 'Nan' to None for aired dates.\n", + "animelist_df['aired_from'].fillna(\"Not aired\",inplace=True)\n", + "animelist_df['aired_to'].fillna(\"Not aired\",inplace=True)\n", + "\n", + "##find out whether aired time and primied have relation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fc969d8-04c0-4242-9192-f68a40aa6339", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# let's double confirmed there are no null values\n", + "for col in animelist_df:\n", + " print(f\" {col} | has ({animelist_df[col].isnull().sum()})\")\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "593b3e94-b089-4ff5-9be6-6b85fb06bed8", + "metadata": { + "tags": [] + }, + "source": [ + "### Convert to new csv file.\n", + "---\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1545f7c0-4adf-4840-ab5e-8f093fd8e84b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "#animelist_df.to_csv('outV2.csv', index=False) " + ] + }, + { + "cell_type": "markdown", + "id": "8f7a0093-684d-4db8-8146-3d45b077934f", + "metadata": {}, + "source": [ + "### if have other data need to be clean\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9ba1c45-306d-44a4-876f-a9c33c1a42fe", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "a835aa18-7be3-4ab2-b9e3-4884895247da", + "metadata": {}, + "source": [ + "## Dealing with Related Column" + ] + }, + { + "cell_type": "markdown", + "id": "d862db97-2ffd-449b-ab1a-a651adecef44", + "metadata": {}, + "source": [ + "### Exploring JSON Structure for each data unit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28b2c807-f858-4287-adca-b0865b8921c1", + "metadata": {}, + "outputs": [], + "source": [ + "related_cell0 = animelist_df[\"related\"][0]\n", + "related_cell_mod = related_cell0.replace(\"'\", \"\\\"\")\n", + "related_cell_mod" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d1f895f-2caf-4700-9f7c-11f94763f03c", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "related_dict = json.loads(related_cell_mod)\n", + "related_dict['Sequel'][0]['type']" + ] + }, + { + "cell_type": "markdown", + "id": "4bccf211-eb72-4aed-8f3e-9190830354e4", + "metadata": {}, + "source": [ + "### Testing with a sample df with 5 rows" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "id": "d5687044-12c9-4e45-8ab0-f124d4c33f44", + "metadata": {}, "outputs": [ { "data": { @@ -2860,204 +806,94 @@ " \n", " \n", " title\n", - " type\n", - " source\n", - " episodes\n", - " status\n", - " airing\n", - " aired\n", - " duration\n", - " rating\n", - " score\n", - " ...\n", - " Shoujo\n", - " Shounen\n", - " Slice of Life\n", - " Space\n", - " Sports\n", - " Super Power\n", - " Supernatural\n", - " Thriller\n", - " Vampire\n", - " Yaoi\n", + " related\n", " \n", " \n", " \n", " \n", - " 0\n", - " Inu x Boku SS\n", - " TV\n", - " Manga\n", - " 12\n", - " Finished Airing\n", - " False\n", - " {'from': '2012-01-13', 'to': '2012-03-30'}\n", - " 24 min. per ep.\n", - " PG-13 - Teens 13 or older\n", - " 7.63\n", - " ...\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", + " 10\n", + " Junjou Romantica 2\n", + " {'Adaptation': [{'mal_id': 765, 'type': 'manga...\n", " \n", " \n", - " 1\n", - " Seto no Hanayome\n", - " TV\n", - " Manga\n", - " 26\n", - " Finished Airing\n", - " False\n", - " {'from': '2007-04-02', 'to': '2007-10-01'}\n", - " 24 min. per ep.\n", - " PG-13 - Teens 13 or older\n", - " 7.89\n", - " ...\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", + " 11\n", + " Kaichou wa Maid-sama!\n", + " {'Adaptation': [{'mal_id': 2921, 'type': 'mang...\n", " \n", " \n", - " 2\n", - " Shugo Chara!! Doki\n", - " TV\n", - " Manga\n", - " 51\n", - " Finished Airing\n", - " False\n", - " {'from': '2008-10-04', 'to': '2009-09-25'}\n", - " 24 min. per ep.\n", - " PG - Children\n", - " 7.55\n", - " ...\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", + " 12\n", + " Sekaiichi Hatsukoi 2\n", + " {'Adaptation': [{'mal_id': 10309, 'type': 'man...\n", + " \n", + " \n", + " 13\n", + " Tonari no Kaibutsu-kun\n", + " {'Adaptation': [{'mal_id': 13702, 'type': 'man...\n", + " \n", + " \n", + " 14\n", + " Bleach\n", + " {'Adaptation': [{'mal_id': 12, 'type': 'manga'...\n", " \n", " \n", - " 3\n", - " Princess Tutu\n", - " TV\n", - " Original\n", - " 38\n", - " Finished Airing\n", - " False\n", - " {'from': '2002-08-16', 'to': '2003-05-23'}\n", - " 16 min. per ep.\n", - " PG-13 - Teens 13 or older\n", - " 8.21\n", - " ...\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", + " 15\n", + " Chobits\n", + " {'Adaptation': [{'mal_id': 107, 'type': 'manga...\n", " \n", " \n", - " 4\n", - " Bakuman. 3rd Season\n", - " TV\n", - " Manga\n", - " 25\n", - " Finished Airing\n", - " False\n", - " {'from': '2012-10-06', 'to': '2013-03-30'}\n", - " 24 min. per ep.\n", - " PG-13 - Teens 13 or older\n", - " 8.67\n", - " ...\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", + " 16\n", + " Kimi ni Todoke\n", + " {'Adaptation': [{'mal_id': 3378, 'type': 'mang...\n", + " \n", + " \n", + " 17\n", + " Naruto: Shippuuden\n", + " {'Adaptation': [{'mal_id': 11, 'type': 'manga'...\n", + " \n", + " \n", + " 18\n", + " Ranma ½\n", + " {'Adaptation': [{'mal_id': 23, 'type': 'manga'...\n", + " \n", + " \n", + " 19\n", + " Toradora!\n", + " {'Adaptation': [{'mal_id': 7149, 'type': 'mang...\n", " \n", " \n", "\n", - "

5 rows × 104 columns

\n", "" ], "text/plain": [ - " title type source episodes status airing \\\n", - "0 Inu x Boku SS TV Manga 12 Finished Airing False \n", - "1 Seto no Hanayome TV Manga 26 Finished Airing False \n", - "2 Shugo Chara!! Doki TV Manga 51 Finished Airing False \n", - "3 Princess Tutu TV Original 38 Finished Airing False \n", - "4 Bakuman. 3rd Season TV Manga 25 Finished Airing False \n", - "\n", - " aired duration \\\n", - "0 {'from': '2012-01-13', 'to': '2012-03-30'} 24 min. per ep. \n", - "1 {'from': '2007-04-02', 'to': '2007-10-01'} 24 min. per ep. \n", - "2 {'from': '2008-10-04', 'to': '2009-09-25'} 24 min. per ep. \n", - "3 {'from': '2002-08-16', 'to': '2003-05-23'} 16 min. per ep. \n", - "4 {'from': '2012-10-06', 'to': '2013-03-30'} 24 min. per ep. \n", - "\n", - " rating score ... Shoujo Shounen Slice of Life \\\n", - "0 PG-13 - Teens 13 or older 7.63 ... 0 0 0 \n", - "1 PG-13 - Teens 13 or older 7.89 ... 0 0 0 \n", - "2 PG - Children 7.55 ... 0 0 0 \n", - "3 PG-13 - Teens 13 or older 8.21 ... 0 0 0 \n", - "4 PG-13 - Teens 13 or older 8.67 ... 0 0 0 \n", - "\n", - " Space Sports Super Power Supernatural Thriller Vampire Yaoi \n", - "0 0 0 0 0 0 0 0 \n", - "1 0 0 0 0 0 0 0 \n", - "2 0 0 0 0 0 0 0 \n", - "3 0 0 0 0 0 0 0 \n", - "4 0 0 0 0 0 0 0 \n", - "\n", - "[5 rows x 104 columns]" + " title related\n", + "10 Junjou Romantica 2 {'Adaptation': [{'mal_id': 765, 'type': 'manga...\n", + "11 Kaichou wa Maid-sama! {'Adaptation': [{'mal_id': 2921, 'type': 'mang...\n", + "12 Sekaiichi Hatsukoi 2 {'Adaptation': [{'mal_id': 10309, 'type': 'man...\n", + "13 Tonari no Kaibutsu-kun {'Adaptation': [{'mal_id': 13702, 'type': 'man...\n", + "14 Bleach {'Adaptation': [{'mal_id': 12, 'type': 'manga'...\n", + "15 Chobits {'Adaptation': [{'mal_id': 107, 'type': 'manga...\n", + "16 Kimi ni Todoke {'Adaptation': [{'mal_id': 3378, 'type': 'mang...\n", + "17 Naruto: Shippuuden {'Adaptation': [{'mal_id': 11, 'type': 'manga'...\n", + "18 Ranma ½ {'Adaptation': [{'mal_id': 23, 'type': 'manga'...\n", + "19 Toradora! {'Adaptation': [{'mal_id': 7149, 'type': 'mang..." ] }, - "execution_count": 24, + "execution_count": 144, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "## combining the animelist data and genre data into animelist_df\n", - "animelist_df = pd.concat([animelist, genre_animelist], axis=1)\n", - "animelist_df.head()" + "related_df = animelist_df[[\"title\",\"related\"]]\n", + "test_related = related_df[10:20]\n", + "test_related" ] }, { "cell_type": "code", - "execution_count": 25, - "id": "931de186-0984-4b46-af39-4b5659f3e44b", - "metadata": { - "tags": [] - }, + "execution_count": 167, + "id": "39b4fd5d-a3c0-4c04-beb8-94bf59eeb5d3", + "metadata": {}, "outputs": [ { "data": { @@ -3081,42 +917,26 @@ " \n", " \n", " title\n", - " type\n", - " source\n", - " episodes\n", - " status\n", - " airing\n", - " aired\n", - " duration\n", - " rating\n", - " score\n", - " ...\n", - " Shoujo\n", - " Shounen\n", - " Slice of Life\n", - " Space\n", - " Sports\n", - " Super Power\n", - " Supernatural\n", - " Thriller\n", - " Vampire\n", - " Yaoi\n", + " Adaptation\n", + " Sequel\n", + " Side story\n", + " Alternative version\n", + " Prequel\n", + " Summary\n", + " Other\n", + " Spin-off\n", + " Alternative setting\n", + " Character\n", + " Parent story\n", + " Full story\n", " \n", " \n", " \n", " \n", " 0\n", " Inu x Boku SS\n", - " TV\n", - " Manga\n", - " 12\n", - " Finished Airing\n", - " False\n", - " {'from': '2012-01-13', 'to': '2012-03-30'}\n", - " 24 min. per ep.\n", - " PG-13 - Teens 13 or older\n", - " 7.63\n", - " ...\n", + " 1\n", + " 1\n", " 0\n", " 0\n", " 0\n", @@ -3131,18 +951,10 @@ " \n", " 1\n", " Seto no Hanayome\n", - " TV\n", - " Manga\n", - " 26\n", - " Finished Airing\n", - " False\n", - " {'from': '2007-04-02', 'to': '2007-10-01'}\n", - " 24 min. per ep.\n", - " PG-13 - Teens 13 or older\n", - " 7.89\n", - " ...\n", - " 0\n", - " 0\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", " 0\n", " 0\n", " 0\n", @@ -3155,19 +967,11 @@ " \n", " 2\n", " Shugo Chara!! Doki\n", - " TV\n", - " Manga\n", - " 51\n", - " Finished Airing\n", - " False\n", - " {'from': '2008-10-04', 'to': '2009-09-25'}\n", - " 24 min. per ep.\n", - " PG - Children\n", - " 7.55\n", - " ...\n", - " 0\n", + " 1\n", + " 1\n", " 0\n", " 0\n", + " 1\n", " 0\n", " 0\n", " 0\n", @@ -3179,140 +983,12 @@ " \n", " 3\n", " Princess Tutu\n", - " TV\n", - " Original\n", - " 38\n", - " Finished Airing\n", - " False\n", - " {'from': '2002-08-16', 'to': '2003-05-23'}\n", - " 16 min. per ep.\n", - " PG-13 - Teens 13 or older\n", - " 8.21\n", - " ...\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " \n", - " \n", - " 4\n", - " Bakuman. 3rd Season\n", - " TV\n", - " Manga\n", - " 25\n", - " Finished Airing\n", - " False\n", - " {'from': '2012-10-06', 'to': '2013-03-30'}\n", - " 24 min. per ep.\n", - " PG-13 - Teens 13 or older\n", - " 8.67\n", - " ...\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 14473\n", - " Gutchonpa Omoshiro Hanashi\n", - " TV\n", - " Unknown\n", - " 5\n", - " Finished Airing\n", - " False\n", - " {'from': '1987-11-05', 'to': '1988-11-04'}\n", - " 8 min. per ep.\n", - " G - All Ages\n", - " 5.50\n", - " ...\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " \n", - " \n", - " 14474\n", - " Geba Geba Shou Time!\n", - " OVA\n", - " Unknown\n", " 1\n", - " Finished Airing\n", - " False\n", - " {'from': '1986-03-21', 'to': '1986-03-21'}\n", - " 25 min.\n", - " G - All Ages\n", - " 4.60\n", - " ...\n", - " 0\n", - " 0\n", " 0\n", " 0\n", " 0\n", " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " \n", - " \n", - " 14475\n", - " Godzilla: Hoshi wo Kuu Mono\n", - " Movie\n", - " Other\n", " 1\n", - " Not yet aired\n", - " False\n", - " {'from': None, 'to': None}\n", - " Unknown\n", - " R - 17+ (violence & profanity)\n", - " 0.00\n", - " ...\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", " 0\n", " 0\n", " 0\n", @@ -3321,47 +997,15 @@ " 0\n", " \n", " \n", - " 14476\n", - " Nippon Mukashibanashi: Sannen Netarou\n", - " OVA\n", - " Other\n", + " 4\n", + " Bakuman. 3rd Season\n", " 1\n", - " Finished Airing\n", - " False\n", - " {'from': None, 'to': None}\n", - " 40 min.\n", - " G - All Ages\n", - " 6.00\n", - " ...\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", " 0\n", " 0\n", " 0\n", + " 2\n", " 0\n", - " \n", - " \n", - " 14477\n", - " Senjou no Valkyria Special\n", - " Special\n", - " Unknown\n", " 1\n", - " Finished Airing\n", - " False\n", - " {'from': '2010-04-07', 'to': '2010-04-07'}\n", - " 3 min.\n", - " PG-13 - Teens 13 or older\n", - " 5.15\n", - " ...\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", " 0\n", " 0\n", " 0\n", @@ -3370,470 +1014,124 @@ " \n", " \n", "\n", - "

14478 rows × 103 columns

\n", "" ], "text/plain": [ - " title type source episodes \\\n", - "0 Inu x Boku SS TV Manga 12 \n", - "1 Seto no Hanayome TV Manga 26 \n", - "2 Shugo Chara!! Doki TV Manga 51 \n", - "3 Princess Tutu TV Original 38 \n", - "4 Bakuman. 3rd Season TV Manga 25 \n", - "... ... ... ... ... \n", - "14473 Gutchonpa Omoshiro Hanashi TV Unknown 5 \n", - "14474 Geba Geba Shou Time! OVA Unknown 1 \n", - "14475 Godzilla: Hoshi wo Kuu Mono Movie Other 1 \n", - "14476 Nippon Mukashibanashi: Sannen Netarou OVA Other 1 \n", - "14477 Senjou no Valkyria Special Special Unknown 1 \n", - "\n", - " status airing aired \\\n", - "0 Finished Airing False {'from': '2012-01-13', 'to': '2012-03-30'} \n", - "1 Finished Airing False {'from': '2007-04-02', 'to': '2007-10-01'} \n", - "2 Finished Airing False {'from': '2008-10-04', 'to': '2009-09-25'} \n", - "3 Finished Airing False {'from': '2002-08-16', 'to': '2003-05-23'} \n", - "4 Finished Airing False {'from': '2012-10-06', 'to': '2013-03-30'} \n", - "... ... ... ... \n", - "14473 Finished Airing False {'from': '1987-11-05', 'to': '1988-11-04'} \n", - "14474 Finished Airing False {'from': '1986-03-21', 'to': '1986-03-21'} \n", - "14475 Not yet aired False {'from': None, 'to': None} \n", - "14476 Finished Airing False {'from': None, 'to': None} \n", - "14477 Finished Airing False {'from': '2010-04-07', 'to': '2010-04-07'} \n", - "\n", - " duration rating score ... Shoujo \\\n", - "0 24 min. per ep. PG-13 - Teens 13 or older 7.63 ... 0 \n", - "1 24 min. per ep. PG-13 - Teens 13 or older 7.89 ... 0 \n", - "2 24 min. per ep. PG - Children 7.55 ... 0 \n", - "3 16 min. per ep. PG-13 - Teens 13 or older 8.21 ... 0 \n", - "4 24 min. per ep. PG-13 - Teens 13 or older 8.67 ... 0 \n", - "... ... ... ... ... ... \n", - "14473 8 min. per ep. G - All Ages 5.50 ... 0 \n", - "14474 25 min. G - All Ages 4.60 ... 0 \n", - "14475 Unknown R - 17+ (violence & profanity) 0.00 ... 0 \n", - "14476 40 min. G - All Ages 6.00 ... 0 \n", - "14477 3 min. PG-13 - Teens 13 or older 5.15 ... 0 \n", + " title Adaptation Sequel Side story Alternative version \\\n", + "0 Inu x Boku SS 1 1 0 0 \n", + "1 Seto no Hanayome 1 1 1 1 \n", + "2 Shugo Chara!! Doki 1 1 0 0 \n", + "3 Princess Tutu 1 0 0 0 \n", + "4 Bakuman. 3rd Season 1 0 0 0 \n", "\n", - " Shounen Slice of Life Space Sports Super Power Supernatural \\\n", - "0 0 0 0 0 0 0 \n", - "1 0 0 0 0 0 0 \n", - "2 0 0 0 0 0 0 \n", - "3 0 0 0 0 0 0 \n", - "4 0 0 0 0 0 0 \n", - "... ... ... ... ... ... ... \n", - "14473 0 0 0 0 0 0 \n", - "14474 0 0 0 0 0 0 \n", - "14475 0 0 0 0 0 0 \n", - "14476 0 0 0 0 0 0 \n", - "14477 0 0 0 0 0 0 \n", + " Prequel Summary Other Spin-off Alternative setting Character \\\n", + "0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 \n", + "2 1 0 0 0 0 0 \n", + "3 0 1 0 0 0 0 \n", + "4 2 0 1 0 0 0 \n", "\n", - " Thriller Vampire Yaoi \n", - "0 0 0 0 \n", - "1 0 0 0 \n", - "2 0 0 0 \n", - "3 0 0 0 \n", - "4 0 0 0 \n", - "... ... ... ... \n", - "14473 0 0 0 \n", - "14474 0 0 0 \n", - "14475 0 0 0 \n", - "14476 0 0 0 \n", - "14477 0 0 0 \n", - "\n", - "[14478 rows x 103 columns]" + " Parent story Full story \n", + "0 0 0 \n", + "1 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 " ] }, - "execution_count": 25, + "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "## remove genre columns \n", - "animelist_df.drop(columns=[\"genre\"], inplace=True)\n", - "animelist_df" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "d0ef0721-d79a-4b14-a125-c04e1ac95ce3", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 14478 entries, 0 to 14477\n", - "Columns: 103 entries, title to Yaoi\n", - "dtypes: bool(1), float64(2), int32(1), int64(88), object(11)\n", - "memory usage: 11.2+ MB\n" - ] - } - ], - "source": [ - "animelist_df.info()" + "\n", + "related_dict = {}\n", + "related_dict['title']=[]\n", + "\n", + "related_row_dict_list = [];\n", + "\n", + "for i, related_row in enumerate(related_df['related']):\n", + " # Original JSON data used single quotation, and double quotes inside values\n", + " # According to JSON guidelines, strings should use double quotes, we will convert double quotes inside to single quotes\n", + " related_row = related_row.replace(\"\\\"\", \"(temp_double_quotes)\")\n", + " related_row = related_row.replace(\"'\", \"\\\"\")\n", + " related_row = related_row.replace(\"(temp_double_quotes)\", \"'\")\n", + " \n", + " # Convert each row into its own dictionary\n", + " related_row_dict = json.loads(related_row)\n", + " related_row_dict_list.append(related_row_dict)\n", + "\n", + " # Fill in title list\n", + " related_dict['title'].append(related_df['title'].iloc[i])\n", + "\n", + " # Fill keys with all unique relations\n", + " for relation in related_row_dict:\n", + " if not relation in related_dict.keys():\n", + " related_dict[relation] = []\n", + "\n", + "for related_row_dict in related_row_dict_list:\n", + " for relation in related_dict:\n", + " if relation=='title': \n", + " continue\n", + " if relation in related_row_dict:\n", + " related_dict[relation].append(len(list(related_row_dict[relation])))\n", + " else:\n", + " related_dict[relation].append(0)\n", + "related_df_separated = pd.DataFrame.from_dict(related_dict)\n", + "related_df_separated.head()" ] }, { "cell_type": "markdown", - "id": "98116925-f9c5-4902-8807-1ac623323955", - "metadata": { - "tags": [] - }, - "source": [ - "### Check value contain any NULL\n", - "---" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "fcf0cf61-2ef7-486f-b5ac-6f1f1d08df0e", - "metadata": { - "scrolled": true, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " title | has (0)\n", - " type | has (0)\n", - " source | has (0)\n", - " episodes | has (0)\n", - " status | has (0)\n", - " airing | has (0)\n", - " aired | has (0)\n", - " duration | has (0)\n", - " rating | has (544)\n", - " score | has (0)\n", - " scored_by | has (0)\n", - " rank | has (1574)\n", - " popularity | has (0)\n", - " members | has (0)\n", - " favorites | has (0)\n", - " related | has (0)\n", - " studio | has (0)\n", - " isPremiered | has (0)\n", - " aired_from | has (2191)\n", - " aired_to | has (2191)\n", - " Adventure | has (0)\n", - " Cars | has (0)\n", - " Comedy | has (0)\n", - " Dementia | has (0)\n", - " Demons | has (0)\n", - " Drama | has (0)\n", - " Ecchi | has (0)\n", - " Fantasy | has (0)\n", - " Game | has (0)\n", - " Harem | has (0)\n", - " Hentai | has (0)\n", - " Historical | has (0)\n", - " Horror | has (0)\n", - " Josei | has (0)\n", - " Kids | has (0)\n", - " Magic | has (0)\n", - " Martial Arts | has (0)\n", - " Mecha | has (0)\n", - " Military | has (0)\n", - " Music | has (0)\n", - " Mystery | has (0)\n", - " Parody | has (0)\n", - " Police | has (0)\n", - " Psychological | has (0)\n", - " Romance | has (0)\n", - " Samurai | has (0)\n", - " School | has (0)\n", - " Sci-Fi | has (0)\n", - " Seinen | has (0)\n", - " Shoujo | has (0)\n", - " Shoujo Ai | has (0)\n", - " Shounen | has (0)\n", - " Shounen Ai | has (0)\n", - " Slice of Life | has (0)\n", - " Space | has (0)\n", - " Sports | has (0)\n", - " Super Power | has (0)\n", - " Supernatural | has (0)\n", - " Thriller | has (0)\n", - " Vampire | has (0)\n", - " Yaoi | has (0)\n", - " Yuri | has (0)\n", - " Action | has (0)\n", - " Adventure | has (0)\n", - " Cars | has (0)\n", - " Comedy | has (0)\n", - " Dementia | has (0)\n", - " Demons | has (0)\n", - " Drama | has (0)\n", - " Ecchi | has (0)\n", - " Fantasy | has (0)\n", - " Game | has (0)\n", - " Harem | has (0)\n", - " Hentai | has (0)\n", - " Historical | has (0)\n", - " Horror | has (0)\n", - " Josei | has (0)\n", - " Kids | has (0)\n", - " Magic | has (0)\n", - " Martial Arts | has (0)\n", - " Mecha | has (0)\n", - " Military | has (0)\n", - " Music | has (0)\n", - " Mystery | has (0)\n", - " NA | has (0)\n", - " Parody | has (0)\n", - " Police | has (0)\n", - " Psychological | has (0)\n", - " Romance | has (0)\n", - " Samurai | has (0)\n", - " School | has (0)\n", - " Sci-Fi | has (0)\n", - " Seinen | has (0)\n", - " Shoujo | has (0)\n", - " Shounen | has (0)\n", - " Slice of Life | has (0)\n", - " Space | has (0)\n", - " Sports | has (0)\n", - " Super Power | has (0)\n", - " Supernatural | has (0)\n", - " Thriller | has (0)\n", - " Vampire | has (0)\n", - " Yaoi | has (0)\n" - ] - } - ], + "id": "04a8ee6d-fc20-4bba-bf79-80c13ef5fde5", + "metadata": {}, "source": [ - "# let's make sure no null values\n", - "for col in animelist_df:\n", - " print(f\" {col} | has ({animelist_df[col].isnull().sum()})\")\n", - " \n", - " \n", - "## below table we can see:\n", - "## rating have 544 'Nan'\n", - "## rank have 1574 'Nan'\n", - "## aired_from have 2191 'Nan'\n", - "## aired-to have 2191 'Nan'" + "### Sum each column to find how many of each type of relation there are" ] }, { "cell_type": "code", - "execution_count": 28, - "id": "37ef5d53-7cfb-41e4-bb2c-c659ce5fd4ec", - "metadata": { - "tags": [] - }, + "execution_count": 168, + "id": "a8dd20b7-209e-4a7c-af35-b55cab43bad4", + "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "rating\n", - "PG-13 - Teens 13 or older 5020\n", - "G - All Ages 4541\n", - "PG - Children 1279\n", - "Rx - Hentai 1219\n", - "R - 17+ (violence & profanity) 997\n", - "R+ - Mild Nudity 878\n", - "Name: count, dtype: int64" + "{'Adaptation': 4758,\n", + " 'Sequel': 2550,\n", + " 'Side story': 1700,\n", + " 'Alternative version': 1631,\n", + " 'Prequel': 2535,\n", + " 'Summary': 422,\n", + " 'Other': 2996,\n", + " 'Spin-off': 573,\n", + " 'Alternative setting': 715,\n", + " 'Character': 371,\n", + " 'Parent story': 1923,\n", + " 'Full story': 437}" ] }, - "execution_count": 28, + "execution_count": 168, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "## count the total for each rating\n", - "animelist_df.rating.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "32af0567-3583-49f5-90f7-d3a159d7a7bb", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "## ensure the rating is at least PG13\n", - "animelist_df['rating'].fillna(\"G - All Ages\",inplace=True)\n", - "\n", - "## convert the rank to the max rank (prevent skewness)\n", - "animelist_df['rank'].fillna(animelist_df['rank'].max(), inplace=True)\n", - "\n", - "## convert 'Nan' to None for aired dates.\n", - "animelist_df['aired_from'].fillna(\"Not aired\",inplace=True)\n", - "animelist_df['aired_to'].fillna(\"Not aired\",inplace=True)\n", - "\n", - "##find out whether aired time and primied have relation" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "3fc969d8-04c0-4242-9192-f68a40aa6339", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " title | has (0)\n", - " type | has (0)\n", - " source | has (0)\n", - " episodes | has (0)\n", - " status | has (0)\n", - " airing | has (0)\n", - " aired | has (0)\n", - " duration | has (0)\n", - " rating | has (0)\n", - " score | has (0)\n", - " scored_by | has (0)\n", - " rank | has (0)\n", - " popularity | has (0)\n", - " members | has (0)\n", - " favorites | has (0)\n", - " related | has (0)\n", - " studio | has (0)\n", - " isPremiered | has (0)\n", - " aired_from | has (0)\n", - " aired_to | has (0)\n", - " Adventure | has (0)\n", - " Cars | has (0)\n", - " Comedy | has (0)\n", - " Dementia | has (0)\n", - " Demons | has (0)\n", - " Drama | has (0)\n", - " Ecchi | has (0)\n", - " Fantasy | has (0)\n", - " Game | has (0)\n", - " Harem | has (0)\n", - " Hentai | has (0)\n", - " Historical | has (0)\n", - " Horror | has (0)\n", - " Josei | has (0)\n", - " Kids | has (0)\n", - " Magic | has (0)\n", - " Martial Arts | has (0)\n", - " Mecha | has (0)\n", - " Military | has (0)\n", - " Music | has (0)\n", - " Mystery | has (0)\n", - " Parody | has (0)\n", - " Police | has (0)\n", - " Psychological | has (0)\n", - " Romance | has (0)\n", - " Samurai | has (0)\n", - " School | has (0)\n", - " Sci-Fi | has (0)\n", - " Seinen | has (0)\n", - " Shoujo | has (0)\n", - " Shoujo Ai | has (0)\n", - " Shounen | has (0)\n", - " Shounen Ai | has (0)\n", - " Slice of Life | has (0)\n", - " Space | has (0)\n", - " Sports | has (0)\n", - " Super Power | has (0)\n", - " Supernatural | has (0)\n", - " Thriller | has (0)\n", - " Vampire | has (0)\n", - " Yaoi | has (0)\n", - " Yuri | has (0)\n", - " Action | has (0)\n", - " Adventure | has (0)\n", - " Cars | has (0)\n", - " Comedy | has (0)\n", - " Dementia | has (0)\n", - " Demons | has (0)\n", - " Drama | has (0)\n", - " Ecchi | has (0)\n", - " Fantasy | has (0)\n", - " Game | has (0)\n", - " Harem | has (0)\n", - " Hentai | has (0)\n", - " Historical | has (0)\n", - " Horror | has (0)\n", - " Josei | has (0)\n", - " Kids | has (0)\n", - " Magic | has (0)\n", - " Martial Arts | has (0)\n", - " Mecha | has (0)\n", - " Military | has (0)\n", - " Music | has (0)\n", - " Mystery | has (0)\n", - " NA | has (0)\n", - " Parody | has (0)\n", - " Police | has (0)\n", - " Psychological | has (0)\n", - " Romance | has (0)\n", - " Samurai | has (0)\n", - " School | has (0)\n", - " Sci-Fi | has (0)\n", - " Seinen | has (0)\n", - " Shoujo | has (0)\n", - " Shounen | has (0)\n", - " Slice of Life | has (0)\n", - " Space | has (0)\n", - " Sports | has (0)\n", - " Super Power | has (0)\n", - " Supernatural | has (0)\n", - " Thriller | has (0)\n", - " Vampire | has (0)\n", - " Yaoi | has (0)\n" - ] - } - ], - "source": [ - "# let's double confirmed there are no null values\n", - "for col in animelist_df:\n", - " print(f\" {col} | has ({animelist_df[col].isnull().sum()})\")\n", - " " - ] - }, - { - "cell_type": "markdown", - "id": "593b3e94-b089-4ff5-9be6-6b85fb06bed8", - "metadata": { - "tags": [] - }, - "source": [ - "### Convert to new csv file.\n", - "---\n" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "1545f7c0-4adf-4840-ab5e-8f093fd8e84b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "animelist_df.to_csv('outV2.csv', index=False) " - ] - }, - { - "cell_type": "markdown", - "id": "8f7a0093-684d-4db8-8146-3d45b077934f", - "metadata": {}, - "source": [ - "### if have other data need to be clean\n", - "---" + "relation_counts = {}\n", + "for column in related_df_separated:\n", + " if column=='title':\n", + " continue\n", + " else:\n", + " relation_counts[column] = related_df_separated[column].sum()\n", + "relation_counts " ] }, { "cell_type": "code", "execution_count": null, - "id": "a9ba1c45-306d-44a4-876f-a9c33c1a42fe", + "id": "e1490496-775a-48f9-a248-71a10b3ed780", "metadata": {}, "outputs": [], "source": [] @@ -3855,7 +1153,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.8" } }, "nbformat": 4, diff --git a/CSV file/Cleaned data/outV1.csv b/DataSets/Cleaned data/outV1.csv similarity index 100% rename from CSV file/Cleaned data/outV1.csv rename to DataSets/Cleaned data/outV1.csv diff --git a/CSV file/AnimeList.csv b/DataSets/Raw Data/AnimeList.csv similarity index 100% rename from CSV file/AnimeList.csv rename to DataSets/Raw Data/AnimeList.csv diff --git a/CSV file/UserList.csv b/DataSets/Raw Data/UserList.csv similarity index 100% rename from CSV file/UserList.csv rename to DataSets/Raw Data/UserList.csv