-
Notifications
You must be signed in to change notification settings - Fork 1
/
Update_allcsv.py
131 lines (105 loc) · 4.99 KB
/
Update_allcsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import xlsxwriter
import pandas as pd
import numpy as np
QCLab_path = "/Users/alex/Downloads/QCLAB_AD_mice062921_clean.csv"
Concatweight_path = "/Users/alex/Downloads/MasterSheet_Experiments2021-4_9_3_21.xlsx"
df_QC = pd.read_csv(QCLab_path)
df_ConWeight = pd.read_excel(Concatweight_path)
#df['Structure'] = df['Structure'].str.lower()
#rd stands for human readable for humans, R is the one used as R input. Both are built here from same principles
all_csv_rd = "/Users/alex/Downloads/Mouse_experiments_readable.csv"
all_csv_R = "/Users/alex/Downloads/Mouse_experiments_R.csv"
#df_ConWeight = df_ConWeight[df_ConWeight.Success == True]
glucose_weeks = [0, 6, 12, 20, 30]
glucose_weeks_list=[]
glucose_weeks_dir = {}
for glucose_week in glucose_weeks:
oldformat = f'Glucose_week{glucose_week}'
newformat = f'glucose_w{glucose_week}'
glucose_weeks_dir[oldformat]=newformat
glucose_weeks_list.append(newformat)
rename_cols_dir={'Animal_ID': 'animalid','Genotype':'genotype', 'Sex':'sex'}
#df_ConWeight.rename(columns=rename_cols_dir, inplace=True)
rename_cols_dir.update(glucose_weeks_dir)
weight_weeks_list=[]
weight_weeks_dir = {}
number_weeks_weight = 28
for i in np.arange(number_weeks_weight+1):
oldformat = f'Weight_week{i}'
newformat = f'weight_w{i}'
weight_weeks_dir[oldformat]=newformat
weight_weeks_list.append(newformat)
if number_weeks_weight>np.max(glucose_weeks):
number_weeks = np.max(glucose_weeks)
else:
number_weeks = np.max(glucose_weeks)
rename_cols_dir.update(weight_weeks_dir)
df_ConWeight.rename(columns=rename_cols_dir, inplace=True)
allcolumns = ["animalid", "genotype","sex"] + glucose_weeks_list +weight_weeks_list
df_allcsv_human = df_ConWeight[allcolumns]
df_QC = df_QC[df_QC.DWI != "Blank"]
df_QC = df_QC[df_QC['DWI'].notna()]
#df_allcsv_human['DWI'] = ''
#df_allcsv_human['GRE'] = ''
#df_QC_2 = df_QC.copy()
#df_QC_2.replace(to_replace=r':0$', value='', regex=True)
#df_QC_2.replace(to_replace=r':1$', value='', regex=True)
#Duplicates are 191205-1, 191205-2, 191205-3, 191205-4
df_QC = df_QC.drop_duplicates('Animal')
#duplicate = df_QC_2[df_QC_2.duplicated()]
df_QC = df_QC[['Animal', 'DWI', 'GRE']]
df_QC.rename(columns={'Animal':'animalid'}, inplace=True)
df_allcsv_human = df_allcsv_human.join(df_QC.set_index('animalid'), on='animalid')
df_allcsv_human.to_csv(all_csv_rd)
df_allcsv_human = df_allcsv_human[df_allcsv_human['animalid'].notna()]
if not pd.Series(df_allcsv_human["animalid"]).is_unique:
raise Exception("The subject names in this dataframe are not unique, cannot proceed with next step for R")
subject_names = df_allcsv_human['animalid'].unique().tolist()
R_columns = [['week','animalid','genotype','sex','weight','glucose','DWI',"GRE"]]
df_allcsv_R = pd.DataFrame(columns = list(R_columns))
index=0
for week in np.arange(number_weeks + 1):
for subject_name in subject_names :
if set([f'weight_w{week}']).issubset(df_allcsv_human.columns):
try:
weight_val = df_allcsv_human[df_allcsv_human['animalid'] == subject_name][f'weight_w{week}'].values[0]
except:
raise Exception('this is for debugging, but clearly it crashed xD')
else:
weight_val = np.nan
if set([f'glucose_w{week}']).issubset(df_allcsv_human.columns):
glucose_val = df_allcsv_human[df_allcsv_human['animalid'] == subject_name][f'glucose_w{week}'].values[0]
else:
glucose_val = np.nan
try:
df_allcsv_R.loc[index]= [week,subject_name,df_allcsv_human[df_allcsv_human['animalid']==subject_name]['genotype'].values[0],
df_allcsv_human[df_allcsv_human['animalid'] == subject_name]['sex'].values[0], weight_val, glucose_val,
df_allcsv_human[df_allcsv_human['animalid'] == subject_name]['DWI'].values[0],
df_allcsv_human[df_allcsv_human['animalid'] == subject_name]['GRE'].values[0]]
except:
raise Exception('this is for debugging, but clearly it crashed xD')
index += 1
df_allcsv_R.to_csv(all_csv_R)
"""
#df_dict = {name: df_allcsv_human.loc[df_allcsv_human['customer name'] == name] for name in customerNames}
#df_dict['Name3']
animals = df_QC.AnimalID.unique()
for animal in animals:
newrow = df_ConWeight['animal-id'] == animal
newrow =
mydataframe = mydataframe.append(new_row, ignore_index=True)
glucose_weeks_dir = {'Glucose_week0':'glucose_week0', 'Glucose_week6':'glucose_week6',
'Glucose_week12':'glucose_week12', 'Glucose_week20':'glucose_week20',
'Glucose_Week30':'glucose_week30'}
"""
"""
df_QC = df_QC[df_QC.DWI != "Blank"]
df_QC = df_QC[df_QC['DWI'].notna()]
genotypes = df_QC.Genotype.unique()
treatments = df_QC.Treatment.unique()
for genotype in genotypes:
for treatment in treatments:
number_gen = len(df_QC[(df_QC['Genotype'] == genotype) & (df_QC['Treatment'] == treatment)])
#if number_gen!=0:
print(f"There are {number_gen} with genotype {genotype} and treatment {treatment}")
"""