Skip to content

Commit

Permalink
Packaging project
Browse files Browse the repository at this point in the history
- Fixed the bugs
- Restored deleted files
- Removed unnecessary files
- Added console visualized progress checklist
- Runned tests
  • Loading branch information
caominhduy committed Jun 23, 2020
1 parent 75f6f07 commit 57c288d
Show file tree
Hide file tree
Showing 198 changed files with 650,533 additions and 1,093,029 deletions.
35 changes: 16 additions & 19 deletions __main__.py
@@ -1,42 +1,37 @@
"""
Run this command to use our project:
$python3 pandemic-central
or:
$cd pandemic-central
$python3 __main__.py
Please read README.md before using
"""

__author__ = 'Duy Cao, Joseph Galasso'
__copyright__ = '© Pandamic Central, 2020'
__license__ = 'MIT'
__version__ = '1.0.0'
__status__ = 'developing'
__status__ = 'released'
__url__ = 'https://github.com/solveforj/pandemic-central'

import os
import preprocess
import tf_predict
from colorama import init, Fore, Back, Style
#import train_scikit
#import generate_data
import generate_data
import train

path = os.getcwd() + ('/pandemic-central')
os.chdir(path)
init()

def main(state='on'):
print(Fore.BLACK + Back.WHITE + Style.BRIGHT + '''
print(Fore.BLUE + Back.WHITE + Style.BRIGHT + '''
__ __ ___ __ __ ___ ___ __
|__) /\ |\ | | \ |__ |\/| | / ` / ` |__ |\ | | |__) /\ |
| /~~\ | \| |__/ |___ | | | \__, \__, |___ | \| | | \ /~~\ |___
''')
print()
print(Style.DIM + 'An application of Machine Learning in predicting COVID-19\n')
while state == 'on':
menu = ['1. Preprocess data', '2. Train and predict with Scikit-learn',\
'3. Train and predict data using TensorFlow [EXPERIMENTAL]',\
menu = ['1. Preprocess data only',\
'2. Preprocess, train and predict with Scikit-learn',\
'3. Train and predict data using TensorFlow (EXPERIMENTAL)',\
'4. Exit']
print(Fore.BLACK + Back.WHITE + Style.NORMAL)
print('\nOPTIONS:\n')
Expand All @@ -47,14 +42,16 @@ def main(state='on'):
user_input = input('Enter option as its equivalent number: ')
if user_input == '1':
preprocess.main()
#generate_data.merge_data()
if user_input == '2':
#train_scikit.main()
print('')
if user_input == '3':
generate_data.merge_data(save_files=True, ag=True)
elif user_input == '2':
train.main()
elif user_input == '3':
tf_predict.main()
if user_input == '4':
elif user_input == '4':
state = 'off'
elif not user_input in ['1', '2', '3', '4']:
print(Fore.RED + '\nInvalid option! Please try again.')
print(Style.RESET_ALL)

if __name__ == '__main__':
main()
51 changes: 0 additions & 51 deletions data_analysis/corr_5000.csv

This file was deleted.

46 changes: 35 additions & 11 deletions generate_data.py
Expand Up @@ -472,27 +472,38 @@ def preprocess_testing():

return merged_df

def merge_data(save_files = False, mode = "training"):
def merge_data(save_files = False, mode = "training", ag=False):
pd.options.mode.chained_assignment = None

census = preprocess_census(use_reduced=True)

disparities = preprocess_disparities()
health = merge_health_data()
smoking = preprocess_smoking_prevalence()

print("Updating Facebook Data - This will take a while")
other_mobility_path = get_latest_file('7-days-mobility')
google_apple_mobility = pd.read_csv(other_mobility_path)
google_apple_mobility = google_apple_mobility.rename(columns={'fips':'FIPS'})
saving_path = 'processed_data/merged/' + date.today().isoformat() + '.csv.gz'

print("[ ] Update Facebook Data", end='\r')
mobility = preprocess_facebook()
print('[' + u'\u2713' + ']\n')

print("Updating JHU Data - This will take a while")
print("[ ] Update JHU Data", end='\r')
cases = preprocess_JHU()
print('[' + u'\u2713' + ']\n')

print("Updating Rt Data - This will take a while")
print("[ ] Update Rt Data", end='\r')
rt = preprocess_Rt()
print('[' + u'\u2713' + ']\n')

print("Updating Testing Data - This will take a while")
print("[ ] Update Testing Data", end='\r')
testing = preprocess_testing()
print('[' + u'\u2713' + ']\n')

print("[ ] Process and export data", end='\r')

print("Processing and exporting data")
census['FIPS'] = census['FIPS'].astype(int)
disparities['FIPS'] = disparities['FIPS'].astype(int)
health['FIPS'] = health['FIPS'].astype(int)
Expand All @@ -512,11 +523,17 @@ def merge_data(save_files = False, mode = "training"):
merged_DF = pd.merge(left=merged_DF, right=disparities, how='left', on=['FIPS'], copy=False)
merged_DF = pd.merge(left=merged_DF, right = smoking, how='left', on=['region', 'Location'], copy=False)
merged_DF = pd.merge(left=merged_DF, right=census, how='left', on=['FIPS'], copy=False).sort_values(['FIPS', 'date']).reset_index(drop=True)
if ag: # do not delete
merged_DF = pd.merge(left=merged_DF, right=google_apple_mobility, how='left', on=['FIPS', 'date'], copy=False).sort_values(['FIPS', 'date']).reset_index(drop=True)

locations = merged_DF['Location']
merged_DF = merged_DF.drop('Location', axis=1)
merged_DF.insert(0, 'Location', locations)

if ag: # do not delete
apple_google_df = merged_DF.dropna() # do not delete
merged_DF = merged_DF.drop(['google_mobility_7d', 'apple_mobility_7d'], 1) # do not delete

columns = merged_DF.columns.tolist()
columns.remove('fb_stationary')
columns.remove('fb_movement_change')
Expand All @@ -533,16 +550,23 @@ def merge_data(save_files = False, mode = "training"):
training_no_mobility = cleaned_DF.drop(['fb_stationary', 'fb_movement_change'], axis=1)

if save_files == True:
unused_DF.to_csv(os.path.split(os.getcwd())[0] + "/unused_data.csv", index=False)
training_mobility.to_csv(os.path.split(os.getcwd())[0] + "/training_mobility.csv", index=False)
latest_mobility.to_csv(os.path.split(os.getcwd())[0] + "/latest_mobility.csv", index=False)
latest_no_mobility.to_csv(os.path.split(os.getcwd())[0] + "/latest_no_mobility.csv", index=False)
training_no_mobility.to_csv(os.path.split(os.getcwd())[0] + "/training_no_mobility.csv", index=False)
if ag:
apple_google_df.to_csv(saving_path, compression='gzip', index=False) # DO NOT DELETE
if not ag:
unused_DF.to_csv(os.path.split(os.getcwd())[0] + "/unused_data.csv", index=False)
training_mobility.to_csv(os.path.split(os.getcwd())[0] + "/training_mobility.csv", index=False)
latest_mobility.to_csv(os.path.split(os.getcwd())[0] + "/latest_mobility.csv", index=False)
latest_no_mobility.to_csv(os.path.split(os.getcwd())[0] + "/latest_no_mobility.csv", index=False)
training_no_mobility.to_csv(os.path.split(os.getcwd())[0] + "/training_no_mobility.csv", index=False)

print('[' + u'\u2713' + ']\n')

if mode == "training":
return training_mobility, training_no_mobility
if mode == "predictions":
return latest_mobility, latest_no_mobility

pd.options.mode.chained_assignment = 'warn' # return to default

if __name__ == '__main__':
merge_data(save_files=True)
Binary file removed models/sk-learn-model-rf-mobility.pkl
Binary file not shown.
Binary file removed models/sk-learn-model-rf-no-mobility.pkl
Binary file not shown.

0 comments on commit 57c288d

Please sign in to comment.