Packaging project

- Fixed the bugs - Restored deleted files - Removed unnecessary files - Added console visualized progress checklist - Runned tests
solveforj · Jun 23, 2020 · 57c288d · 57c288d
1 parent 75f6f07
commit 57c288d
Show file tree

Hide file tree

Showing 198 changed files with 650,533 additions and 1,093,029 deletions.
diff --git a/__main__.py b/__main__.py
@@ -1,42 +1,37 @@
 """
-Run this command to use our project:
-
-    $python3 pandemic-central
-
-or:
-    $cd pandemic-central
-    $python3 __main__.py
+Please read README.md before using
 """
 
 __author__ = 'Duy Cao, Joseph Galasso'
 __copyright__ = '© Pandamic Central, 2020'
 __license__ = 'MIT'
 __version__ = '1.0.0'
-__status__ = 'developing'
+__status__ = 'released'
 __url__ = 'https://github.com/solveforj/pandemic-central'
 
 import os
 import preprocess
 import tf_predict
 from colorama import init, Fore, Back, Style
-#import train_scikit
-#import generate_data
+import generate_data
+import train
 
 path = os.getcwd() + ('/pandemic-central')
 os.chdir(path)
 init()
 
 def main(state='on'):
-    print(Fore.BLACK + Back.WHITE + Style.BRIGHT + '''
+    print(Fore.BLUE + Back.WHITE + Style.BRIGHT + '''
   __             __   ___          __      __   ___      ___  __
  |__)  /\  |\ | |  \ |__   |\/| | /  `    /  ` |__  |\ |  |  |__)  /\  |
  |    /~~\ | \| |__/ |___  |  | | \__,    \__, |___ | \|  |  |  \ /~~\ |___
         ''')
     print()
     print(Style.DIM + 'An application of Machine Learning in predicting COVID-19\n')
     while state == 'on':
-        menu = ['1. Preprocess data', '2. Train and predict with Scikit-learn',\
-            '3. Train and predict data using TensorFlow [EXPERIMENTAL]',\
+        menu = ['1. Preprocess data only',\
+            '2. Preprocess, train and predict with Scikit-learn',\
+            '3. Train and predict data using TensorFlow (EXPERIMENTAL)',\
             '4. Exit']
         print(Fore.BLACK + Back.WHITE + Style.NORMAL)
         print('\nOPTIONS:\n')
@@ -47,14 +42,16 @@ def main(state='on'):
         user_input = input('Enter option as its equivalent number: ')
         if user_input == '1':
             preprocess.main()
-            #generate_data.merge_data()
-        if user_input == '2':
-            #train_scikit.main()
-            print('')
-        if user_input == '3':
+            generate_data.merge_data(save_files=True, ag=True)
+        elif user_input == '2':
+            train.main()
+        elif user_input == '3':
             tf_predict.main()
-        if user_input == '4':
+        elif user_input == '4':
             state = 'off'
+        elif not user_input in ['1', '2', '3', '4']:
+            print(Fore.RED + '\nInvalid option! Please try again.')
+            print(Style.RESET_ALL)
 
 if __name__ == '__main__':
     main()
diff --git a/data_analysis/corr_5000.csv b/data_analysis/corr_5000.csv
diff --git a/generate_data.py b/generate_data.py
@@ -472,27 +472,38 @@ def preprocess_testing():
 
     return merged_df
 
-def merge_data(save_files = False, mode = "training"):
+def merge_data(save_files = False, mode = "training", ag=False):
+    pd.options.mode.chained_assignment = None
 
     census = preprocess_census(use_reduced=True)
 
     disparities = preprocess_disparities()
     health = merge_health_data()
     smoking = preprocess_smoking_prevalence()
 
-    print("Updating Facebook Data - This will take a while")
+    other_mobility_path = get_latest_file('7-days-mobility')
+    google_apple_mobility = pd.read_csv(other_mobility_path)
+    google_apple_mobility = google_apple_mobility.rename(columns={'fips':'FIPS'})
+    saving_path = 'processed_data/merged/' + date.today().isoformat() + '.csv.gz'
+
+    print("[ ] Update Facebook Data", end='\r')
     mobility = preprocess_facebook()
+    print('[' + u'\u2713' + ']\n')
 
-    print("Updating JHU Data - This will take a while")
+    print("[ ] Update JHU Data", end='\r')
     cases = preprocess_JHU()
+    print('[' + u'\u2713' + ']\n')
 
-    print("Updating Rt Data - This will take a while")
+    print("[ ] Update Rt Data", end='\r')
     rt = preprocess_Rt()
+    print('[' + u'\u2713' + ']\n')
 
-    print("Updating Testing Data - This will take a while")
+    print("[ ] Update Testing Data", end='\r')
     testing = preprocess_testing()
+    print('[' + u'\u2713' + ']\n')
+
+    print("[ ] Process and export data", end='\r')
 
-    print("Processing and exporting data")
     census['FIPS'] = census['FIPS'].astype(int)
     disparities['FIPS'] = disparities['FIPS'].astype(int)
     health['FIPS'] = health['FIPS'].astype(int)
@@ -512,11 +523,17 @@ def merge_data(save_files = False, mode = "training"):
     merged_DF = pd.merge(left=merged_DF, right=disparities, how='left', on=['FIPS'], copy=False)
     merged_DF = pd.merge(left=merged_DF, right = smoking, how='left', on=['region', 'Location'], copy=False)
     merged_DF = pd.merge(left=merged_DF, right=census, how='left', on=['FIPS'], copy=False).sort_values(['FIPS', 'date']).reset_index(drop=True)
+    if ag: # do not delete
+        merged_DF = pd.merge(left=merged_DF, right=google_apple_mobility, how='left', on=['FIPS', 'date'], copy=False).sort_values(['FIPS', 'date']).reset_index(drop=True)
 
     locations = merged_DF['Location']
     merged_DF = merged_DF.drop('Location', axis=1)
     merged_DF.insert(0, 'Location', locations)
 
+    if ag: # do not delete
+        apple_google_df = merged_DF.dropna() # do not delete
+        merged_DF = merged_DF.drop(['google_mobility_7d', 'apple_mobility_7d'], 1) # do not delete
+
     columns = merged_DF.columns.tolist()
     columns.remove('fb_stationary')
     columns.remove('fb_movement_change')
@@ -533,16 +550,23 @@ def merge_data(save_files = False, mode = "training"):
     training_no_mobility = cleaned_DF.drop(['fb_stationary', 'fb_movement_change'], axis=1)
 
     if save_files == True:
-        unused_DF.to_csv(os.path.split(os.getcwd())[0] + "/unused_data.csv", index=False)
-        training_mobility.to_csv(os.path.split(os.getcwd())[0] + "/training_mobility.csv", index=False)
-        latest_mobility.to_csv(os.path.split(os.getcwd())[0] + "/latest_mobility.csv", index=False)
-        latest_no_mobility.to_csv(os.path.split(os.getcwd())[0] + "/latest_no_mobility.csv", index=False)
-        training_no_mobility.to_csv(os.path.split(os.getcwd())[0] + "/training_no_mobility.csv", index=False)
+        if ag:
+            apple_google_df.to_csv(saving_path, compression='gzip', index=False) # DO NOT DELETE
+        if not ag:
+            unused_DF.to_csv(os.path.split(os.getcwd())[0] + "/unused_data.csv", index=False)
+            training_mobility.to_csv(os.path.split(os.getcwd())[0] + "/training_mobility.csv", index=False)
+            latest_mobility.to_csv(os.path.split(os.getcwd())[0] + "/latest_mobility.csv", index=False)
+            latest_no_mobility.to_csv(os.path.split(os.getcwd())[0] + "/latest_no_mobility.csv", index=False)
+            training_no_mobility.to_csv(os.path.split(os.getcwd())[0] + "/training_no_mobility.csv", index=False)
+
+    print('[' + u'\u2713' + ']\n')
 
     if mode == "training":
         return training_mobility, training_no_mobility
     if mode == "predictions":
         return latest_mobility, latest_no_mobility
 
+    pd.options.mode.chained_assignment = 'warn' # return to default
+
 if __name__ == '__main__':
     merge_data(save_files=True)
diff --git a/models/sk-learn-model-rf-mobility.pkl b/models/sk-learn-model-rf-mobility.pkl
diff --git a/models/sk-learn-model-rf-no-mobility.pkl b/models/sk-learn-model-rf-no-mobility.pkl