Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error in SVM as:- AttributeError: 'RandomizedSearchCV' object has no attribute 'best_estimator_' and 'NameError: name 'X_train_confirmed' is not defined" #273

Open
DebjyotiSaha opened this issue Mar 20, 2020 · 4 comments

Comments

@DebjyotiSaha
Copy link

import numpy as np
import pandas as pd
import matplotlib.colors as mcolors
import random
import math
import time
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error
import datetime
import operator
import matplotlib.pyplot as plt
plt.style.use('seaborn')

confirmed_cases= pd.read_csv("D:/Project/New Projects/COVID-19/time_series_covid-19_confirmed.csv")
deaths_reported= pd.read_csv("D:/Project/New Projects/COVID-19/time_series_covid-19_deaths.csv")
recover_reported= pd.read_csv("D:/Project/New Projects/COVID-19/time_series_covid-19_recovered.csv")

print(confirmed_cases.head())
print(deaths_reported.head())
print(recover_reported.head())

cols=confirmed_cases.keys()
print(cols)

confirmed= confirmed_cases.loc[:, cols[4]: cols[-1]]
deaths= deaths_reported.loc[:, cols[4]: cols[-1]]
recoveries= recover_reported.loc[:, cols[4]: cols[-1]]
print(confirmed)
print(deaths)
print(recoveries)

print(confirmed.head())

dates= confirmed.keys()
world_cases= []
total_deaths= []
mortality_rate= []
total_recovered= []

for i in dates:
confirmed_sum= confirmed[i].sum()
death_sum= deaths[i].sum()
recovered_sum= recoveries[i].sum()
world_cases.append(confirmed_sum)
total_deaths.append(death_sum)
mortality_rate.append(death_sum/confirmed_sum)
total_recovered.append(recovered_sum)

print(confirmed_sum)
print(death_sum)
print(recovered_sum)
print(world_cases)

days_since_1_22= np.array([i for i in range(len(dates))]).reshape(-1,1)
world_cases= np.array(world_cases).reshape(-1,1)
total_deaths= np.array(total_deaths).reshape(-1,1)
total_recovered= np.array(total_recovered).reshape(-1,1)
print(days_since_1_22)
print(world_cases)
print(total_deaths)
print(total_recovered)

day_in_future=10
future_forecast=np.array([i for i in range(len(dates)+ day_in_future)]).reshape(-1,1)
adjusted_dates= future_forecast[:-10]
print("future_forecast", future_forecast)

latest_confirmed= confirmed_cases[dates[-1]]
latest_deaths= deaths_reported[dates[-1]]
latest_recoveries= recover_reported[dates[-1]]
print(latest_confirmed)
print(latest_deaths)
print(latest_recoveries)

unique_countries= list(confirmed_cases["Country/Region"].unique())
print(unique_countries)

country_confirmed_cases= []
no_cases=[]
for i in unique_countries:
cases= latest_confirmed[confirmed_cases["Country/Region"]==i].sum()
if cases>0:
country_confirmed_cases.append(cases)
else:
no_cases.append(i)

for i in no_cases:
unique_countries.remove(i)

unique_countries= [k for k, v in sorted(zip(unique_countries, country_confirmed_cases), key= operator.itemgetter(1))]

for i in range(len(unique_countries)):
country_confirmed_cases[i]= latest_confirmed[confirmed_cases["Country/Region"]==unique_countries[i]].sum()

print("Confirmed cases by Country/Region")
for i in range(len(unique_countries)):
print(f'{unique_countries[i]}: {country_confirmed_cases[i]} cases')

unique_provinces= list(confirmed_cases["Province/State"].unique())
outliers= ["United Kingdom", "Denmark", "France"]
for i in outliers:
unique_provinces.remove(i)

province_confirmed_cases=[]
no_cases=[]
for i in unique_provinces:
cases= latest_confirmed[confirmed_cases["Province/State"]==i].sum()
if cases>0:
province_confirmed_cases.append(cases)
else:
no_cases.append(i)
for i in no_cases:
unique_provinces.remove(i)

for i in range(len(unique_provinces)):
print(f'{unique_provinces[i]}: {province_confirmed_cases[i]} cases')

nan_indices=[]
for i in range(len(unique_provinces)):
if type(unique_provinces[i])==float:
nan_indices.append(i)

unique_provinces= list(unique_provinces)
province_confirmed_cases= list(province_confirmed_cases)
for i in nan_indices:
unique_provinces.pop(i)
province_confirmed_cases(i)

plt.figure(figsize=(32,32))
plt.barh(unique_countries, country_confirmed_cases)
plt.title("No. of COVID-19 confirmed cases in countries")
plt.xlabel("No. of COVID-19 confirmed cases")
plt.show()

china_confirmed=latest_confirmed[confirmed_cases["Country/Region"]=="China"].sum()
outside_mainland_china_confirmed= np.sum(country_confirmed_cases) - china_confirmed
plt.figure(figsize=(16,9))
plt.barh("Mainland China", china_confirmed)
plt.barh("Outside MC", outside_mainland_china_confirmed)
plt.title("Number of confirmed cases")
plt.show()

print("Outside MC {} cases:".format(outside_mainland_china_confirmed))
print("Mainland China: {} cases".format(china_confirmed))
print("Total: {} cases".format(china_confirmed+outside_mainland_china_confirmed))

visual_unique_countries=[]
visual_confirmed_cases=[]
others=np.sum(country_confirmed_cases[10:])
for i in range(len(country_confirmed_cases[:-10])):
visual_unique_countries.append(unique_countries[i])
visual_confirmed_cases.append(country_confirmed_cases[i])

visual_unique_countries.append("Others")
visual_confirmed_cases.append(others)
plt.figure(figsize=(32,18))
plt.barh(visual_unique_countries, visual_confirmed_cases)
plt.title("No. of confirmed covid-19 in countries/region", size=20)
plt.show()

c=random.choices(list(mcolors.CSS4_COLORS.values()),k=len(unique_countries))
plt.figure(figsize=(20,20))
plt.title("Covid19 confirmed")
plt.pie(visual_confirmed_cases, colors=c)
plt.legend(visual_unique_countries, loc="best")
plt.show()

c=random.choices(list(mcolors.CSS4_COLORS.values()),k=len(unique_countries))
plt.figure(figsize=(20,20))
plt.title("Covid19 confirmed")
plt.pie(visual_confirmed_cases[1:], colors=c)
plt.legend(visual_unique_countries[-1:], loc="best")
plt.show()

kernel=["poly", "sigmoid", "rbf"]
c=[0.01, 0.1, 1, 10]
gamma=[0.01, 0.1, 1]
epsilon=[0.01, 0.1, 1]
shrinking=[True,False]
svm_grid={"kernel":kernel, "C":c, "gamma":gamma, "epsilon":epsilon, "shrinking":shrinking}

svm=SVR()
svm_search=RandomizedSearchCV(svm, svm_grid, scoring="neg_mean_squared_error", cv=3, return_train_score=True, n_jobs=-1, n_iter=40, verbose=1)
svm_search.fit(X_train_confirmed, y_train_confirmed)

print(svm_search.best_estimator_)

@JonnoFTW
Copy link
Collaborator

This looks like you're having an issue with sklearn, please ask on stackoverflow with formatted code, full traceback and the problem reduced to a MWE https://stackoverflow.com/help/minimal-reproducible-example

@Abhilash28snist
Copy link

can you please define X and y in this program.
i have the datasets so please mention X and y (target )

@John-p-v1999
Copy link

John-p-v1999 commented Jan 7, 2022 via email

@Abhilash28snist
Copy link

sorry, i cant understand your language.
reply only if you know the answer.\

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

4 participants