From 9cdbadfb64ecdc0c88bfdd3b5659ae3e93957591 Mon Sep 17 00:00:00 2001 From: npg2108 Date: Mon, 23 Apr 2018 14:54:52 -0400 Subject: [PATCH] First commit --- README.md | 34 + cohorts/__init__.py | 10 + cohorts/cohort.py | 1244 ++++++++++++++++++++++++++ cohorts/utils.py | 248 ++++++ setup.py | 10 + test.ipynb | 2040 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 3586 insertions(+) create mode 100644 README.md create mode 100644 cohorts/__init__.py create mode 100644 cohorts/cohort.py create mode 100644 cohorts/utils.py create mode 100644 setup.py create mode 100644 test.ipynb diff --git a/README.md b/README.md new file mode 100644 index 0000000..48fdf23 --- /dev/null +++ b/README.md @@ -0,0 +1,34 @@ +# `Cohorts` + +### A python package for standardized and reproducible processing, analysis, and integration of clinical proteomics data + +Written by Nicholas Giangreco + +Copyright (c) 2018 by the Tatonetti Lab + +## Goal + +Discovery of disease biomarkers or risk factors requires high throughput experimentation on patient's samples collected as a cohort of patients. This clinical data is collected from multiple patient cohorts either within an institutions or among many institutions. + +This python package aims to provide standardization in creating a data structure representing each clinical dataset, while allowing for seamless integration of multiple instances. + + +## Installation + +git clone ... +cd ... +pip install setup.py +... + +### Prerequisites + +python 3 + +## Contribute + +Please do! Both cohort data structure and functionality features are needed. + +## License + + + diff --git a/cohorts/__init__.py b/cohorts/__init__.py new file mode 100644 index 0000000..8a91a85 --- /dev/null +++ b/cohorts/__init__.py @@ -0,0 +1,10 @@ +''' +Cohorts : Enabling standardized and reproducible clinical 'omics analysis +========================================================================= + +This packages allows handling and structuring datasets from patient cohorts. + + +''' + +from .cohort import Cohort \ No newline at end of file diff --git a/cohorts/cohort.py b/cohorts/cohort.py new file mode 100644 index 0000000..bf3d884 --- /dev/null +++ b/cohorts/cohort.py @@ -0,0 +1,1244 @@ +from .utils import * +import os +import numpy as np +import pandas as pd +import scipy.stats as sc +from sklearn.preprocessing import StandardScaler +import itertools as it + +class Cohort(object): + """ + Patient cohort object for patient proteomics data. + + Dataframes, variables, and functions facilitating the processing, analysis, and integration of the cohort data. + + Parameters + ---------- + cohort: str + name of the patient cohort + + file_dir: str + directory where replicate dataframe and + sample group membership dataframe file names are located + + replicates_file: str + name of the replicates dataframe file + + A proteins x B replicates + tab delimited + + replicate = "SampleName" + "_Rep[0-9]" + + sample_groups_file: str + name of the sample group file + + N groups x M samples + + sample = "SampleName" + + data_dir: str + directory where extra data files are located + + uniprot_file: str + name of the uniprot database flat file located in data_dir + + Examples + -------- + >>>c = cohorts.Cohort(cohort='cohort_name', + file_dir="path/to/files/" + replicates_file="file_name", + sample_groups_file="sample_groups_file_name", + data_dir="path/to/data/dir/", + uniprot_file="uniprot_flat_file" + ) + >>>c.set_replicates_hq() + >>>c.set_trans_replicates_hq() + >>>c.set_samples_hq() + >>>c.set_trans_samples_hq() + + """ + + tests = [ + ( "t-test",sc.ttest_ind ), + ("Wilcoxon_RankSum_test",sc.ranksums) + ] + + + def __init__(self,cohort='cumc', + replicates_file=None,sample_groups_file=None, + uniprot_file=None, + file_dir="",data_dir="../../data/"): + + self.cwd = os.getcwd() + self.data_dir = data_dir + self.file_dir = file_dir + self.cohort = cohort + self.replicates_file = replicates_file + self.sample_groups_file = sample_groups_file + self.uniprot_file = uniprot_file + self.raw_samples = None + self.raw_replicates = None + self.replicates_hq = None + self.trans_replicates_hq = None + self.samples_hq = None + self.trans_samples_hq = None + self.sample_replicate_dictionary = None + self.samples = None + self.replicates = None + self.proteins = None + self.replicate_groups = None + self.sample_groups = None + self.tidy_replicate_groups = None + self.tidy_sample_groups = None + self.groups = None + self.ref = None + self.treat = None + self.set_replicates_file() + self.set_sample_groups_file() + self.set_raw_replicates() + self.set_replicates() + self.set_sample_replicate_dictionary() + self.set_samples() + self.set_sample_groups() + self.set_replicate_groups() + self.set_raw_samples() + self.set_tidy_sample_groups() + self.set_tidy_replicate_groups() + self.set_proteins() + self.set_groups() + self.set_ref() + self.set_treat() + + #extra parameters/data objects that can be + #attributed to an object instance + self.data = {} + self.params = {} + + + #SET FUNCTIONS + + def set_replicates_file(self): + """ + Setting the replicates file string. + Combination of the file directory string and the replicates file string + + Parameters + ---------- + None + + """ + + self.replicates_file = str(self.file_dir) + str(self.replicates_file) + + def set_sample_groups_file(self): + """ + Setting the replicate_groups string + Combination of the path string and the replicates file string + + Parameters + ---------- + None + + """ + self.sample_groups_file = str(self.file_dir) + str(self.sample_groups_file) + + def set_uniprot_file(self): + """ + Setting the path to the uniprot flat file + + Parameters + ---------- + None + + """ + self.uniprot_file = str(self.data_dir) + str(self.uniprot_file) + + def set_raw_replicates(self): + """ + Setting raw replicate dataframe from files given to object + + Parameters + ---------- + None + + """ + self.raw_replicates = pd.read_csv(self.replicates_file,delimiter="\t",index_col=0) + + def set_replicates(self): + """ + Setting replicate names + + Depends on : raw_replicates + + Parameters + ---------- + None + + """ + + self.replicates = self.raw_replicates.columns.tolist() + + def set_sample_replicate_dictionary(self): + """ + Setting dictionary of samples (keys) to replicates (values) + + Depends on : replicates + + Parameters + ---------- + None + + """ + + #get replicates + replicates = self.replicates + + #strip replicate indicator to get array of sample names for replicates + duplicate_samples = [y[0] for y in [x for x in np.chararray.split(replicates,"_")]] + + #set previous to series for easy unique sample name retrieval + samples = pd.Series(duplicate_samples).unique() + + #initialize dictionary + dictionary = {} + + #loop through unique samples + for key in samples: + #get indice of replicates for sample + replicate_inds = pd.Series(duplicate_samples).isin([key]) + + #get replicates for sample + values = np.array(replicates)[replicate_inds.tolist()] + + #set key:value pair (sample : replicates) in dictionary + dictionary[key] = values.tolist() + + self.sample_replicate_dictionary = dictionary + + def set_raw_samples(self,agg='mean'): + """ + Setting raw sample dataframe + + Take statistic of replicates with particular sample membership + + Depends on : raw_replicates and make_df_samples() + + Parameters + ---------- + + agg: {'mean','median', 'variance'} + statistic to apply to replicate values for sample membership + + """ + + df = self.raw_replicates.fillna(0) + + self.raw_samples = self.make_df_samples(df,agg=agg) + + def set_replicates_hq(self, + uniprot_annot=False, + annot_status='reviewed', + quant_least_reps_per_samps=False, + n_reps=1, + all_reps_quant=False, + threshold=88, + intrasamp_var=False, + higher=False): + """ + Setting proteins in raw dataframe to be reviewed by Uniprot + + Depends on : raw_replicates, proteins_in_n_replicates_per_samples, proteins_quant_all_reps, proteins_with_uniprot_annot, proteins_by_intrasample_variability + + Parameters + ---------- + + uniprot_annot {True,False} + Subset proteins by annot=ation status in uniprot database + annot_status {'reviewed','unreviewed'} + Annotation status in uniprot + quant_least_reps_per_samps {True,False} + Subset proteins by the number of replicates quantified per sample + n_reps + Number of replicates quantified per sample. See above + all_reps_quant {True,False} + Subset proteins by only those quantified in every replicate per sample + intrasamp_var {True,False} + Subset proteins by the amount of replicate variance per sample + threshold + Quantile to subset variance from. See above + higher {True,False} + Indicates whether to subset by proteins with a high annotation score from Uniprot + + """ + + #get raw data + df = self.raw_replicates.fillna(0) + + #subset to have proteins found in atleast sufficient_reps + #per sample for all samples + if quant_least_reps_per_samps: + prots = self.proteins_in_n_replicates_per_samples(df,n_reps=1) + df = df.loc[prots] + + #subset to have proteins found in atleast n_reps + #per sample for all samples + if all_reps_quant: + prots = self.proteins_quant_all_reps(df) + df = df.loc[prots] + + #subset to have proteins with high annotation score + if uniprot_annot: + #set uniprot file if asked for, for protein filtering + self.set_uniprot_file() + prots = self.proteins_with_uniprot_annot(df,status=annot_status) + df = df.loc[prots] + + # subset to have proteins with certain intrasample variability + if intrasamp_var: + + prots = self.proteins_by_intrasample_variability(df, + higher=higher, + threshold=threshold) + df = df.loc[prots] + + self.replicates_hq = df + + def set_trans_replicates_hq(self, + trans='None', + add_small=False, + stat_thresh=False, + threshold=95, + higher=False, + statistic='variance'): + """ + Transforming hq values with different functions, by default log1p. + + Depends on : replicates_hq, proteins_by_statistic_threshold + + Parameters + ---------- + + trans: numerical transformation. Default: scikitlearn's StandardScaler + Indicates how to transform the raw protein values + add_small + Add small value if any dataframe value is 0 - when log transforming + stat_threshold + Subsetting dataframe by a statistic threshold + threshold [0,100] + Quantile for statistic threshold + higher + Subset above or below statistic threshold + statistic {'mean','median','variance'} + Statistic to be applied + + """ + + #set hq dataframe; make sure there's no NaN values + df = self.replicates_hq + + #instantiate/declare/set up Standard scaler model from + #scikitlearn on data + scaler = StandardScaler() + scaler.fit(df) + + #list of name-function pairs + func = [('log1p', np.log1p), + ('log2', np.log2), + ('sklearn', scaler.transform), + ('rank_normalize', rank_normalize), + ('quantile_normalize', quantileNormalize), + ('None', pd.DataFrame.copy)] + + #get appropriate list index for function from trans string + m = [x for x in range(len(func)) if trans in func[x][0]][0] + + #add small epsilon value to all protein values + if add_small: + eps = 0.0001 + df = df.applymap(lambda x : x + eps) + + #apply function from appropriate function list index + data = pd.DataFrame(func[m][1](df), + index=df.index, + columns=df.columns + ) + + #subset to have proteins that meet a certain statistical threshold + if stat_thresh: + prots = self.proteins_by_statistic_threshold(data, + statistic=statistic, + threshold=threshold, + higher=higher) + data = data.loc[prots] + + self.trans_replicates_hq = data + + def set_samples_hq(self, + agg='mean', + uniprot_annot=False, + annot_status='reviewed'): + """ + Setting sample dataframe from processed replicate dataframe + + Depends on : trans_replicates_hq, make_df_samples + Parameters + ---------- + + agg: {'mean','median', 'variance'} + statistic to apply to replicate values for sample membership + uniprot_annot {True,False} + Subset proteins by annot=ation status in uniprot database + annot_status {'reviewed','unreviewed'} + Annotation status in uniprot + """ + + #get processed replicates + df = self.trans_replicates_hq + + #subset to have proteins with high annotation score + if uniprot_annot: + #set uniprot file if asked for, for protein filtering + self.set_uniprot_file() + prots = self.proteins_with_uniprot_annot(df,status=annot_status) + df = df.loc[prots] + + self.samples_hq = self.make_df_samples( df , agg = agg ) + + def set_trans_samples_hq(self, + trans='None', + add_small=False, + stat_thresh=False, + threshold=95, + higher=False, + statistic='variance'): + """ + Transforming hq values with different functions, by default log1p. + + Depends on : samples_hq, proteins_by_statistic_threshold + + Parameters + ---------- + + trans: numerical transformation. Default: scikitlearn's StandardScaler + Indicates how to transform the raw protein values + add_small + Add small value if any dataframe value is 0 - when log transforming + stat_threshold + Subsetting dataframe by a statistic threshold + threshold [0,100] + Quantile for statistic threshold + higher + Subset above or below statistic threshold + statistic {'mean','median','variance'} + Statistic to be applied + + """ + + #set hq dataframe; make sure there's no NaN values + data = self.samples_hq + + #instantiate/declare/set up Standard scaler model from + #scikitlearn on data + scaler = StandardScaler() + scaler.fit(data) + + #only do log (other than log1p) transformations if values are + #all nonzero + if len(np.where(data.as_matrix().ravel()==0)[0])>0: + add_small = True + print('There are zero values in the sample dataframe, a small number epsilon must be added to the protein values when there are zero values. Adding small epsilon...') + + #list of name-function pairs + func = [('log1p', np.log1p), + ('log2', np.log2), + ('sklearn', scaler.transform), + ('rank_normalize', rank_normalize), + ('quantile_normalize', quantileNormalize), + ('None', pd.DataFrame.copy)] + + #get appropriate list index for function from trans string + m = [x for x in range(len(func)) if trans in func[x][0]][0] + + #add small epsilon value to all protein values + if add_small: + eps = 0.0001 + data = data.applymap(lambda x : x + eps) + + #apply function from appropriate function list index + data = pd.DataFrame(func[m][1](data), + index=data.index, + columns=data.columns + ) + + #subset to have proteins that meet a certain statistical threshold + if stat_thresh: + prots = self.proteins_by_statistic_threshold(data, + statistic=statistic, + threshold=threshold, + higher=higher) + data = data.loc[prots] + + self.trans_samples_hq = data + + def set_samples(self): + """ + Setting patient names from raw dataframe + + Depends on : sample_replicate_dictionary + + Parameters + ---------- + None + + """ + self.samples = [x for x in self.sample_replicate_dictionary.keys()] + + def set_proteins(self): + """ + Setting protein ids from raw dataframe + + Depends on raw_replicates + + Parameters + ---------- + None + + """ + self.proteins = self.raw_replicates.index.values + + def set_sample_groups(self,file=None): + """ + Setting group membership of samples dataframe where 1 indicates membership and 0 no membership. + + Parameters + ---------- + file + file to read sample groups data. Defaults to parameters from cohorts instantiation + """ + + #if file is not None: + + self.sample_groups = pd.read_csv(self.sample_groups_file,delimiter="\t",index_col=0) + + def set_df_replicate_groups(self): + """ + Aggregating df_replicate_groups to derive sample group + membership + + Parameters + ---------- + None + + """ + + mats = {} + for samp in self.samples: + single = self.sample_groups.loc[:,samp] + reps = self.sample_replicate_dictionary[samp] + for r in reps: + mats[r] = single + + return pd.DataFrame.from_dict(mats) + + def set_replicate_groups(self,file=None): + """ + Setting group membership of replicate dataframe where 1 indicates membership and 0 no membership. + + Parameters + ---------- + file + file to read replicate groups data. Defaults to parameters from cohorts instantiation + + """ + + self.replicate_groups = self.set_df_replicate_groups() + + def set_groups(self): + """ + Setting groups from sample groups dataframe + + Parameters + ---------- + None + + """ + + self.groups = self.sample_groups.index.values + + def set_ref(self,ref='NL'): + """ + Setting reference group (hard coded to be normal patients or 'NL', but can change this after declaration) + + Parameters + ---------- + + ref: string + Reference group name + + + """ + + #get ref index + ind = np.where(self.groups==ref)[0] + + #make sure there's only one trt name in all groups + if len(ind)==1: + + self.ref = self.groups[ind] + + else: + + self.ref = np.asarray(self.groups[0],dtype='object') + + def set_treat(self,trt='PGD'): + """ + Setting treatment group + + Parameters + ---------- + + trt: string + Treatment group name + """ + + #get trt index + ind = np.where(self.groups==trt)[0] + + #make sure there's only one trt name in all groups + if len(ind)==1: + + self.treat = self.groups[ind] + else: + + self.treat = np.asarray(self.groups[1],dtype='object') + + def set_tidy_replicate_groups(self): + """ + Setting tidy group membership of replicates, where each replicate is the observation and group membership is an attribute (column) + + Parameters + ---------- + None + + """ + + #copy sample groups dataframe + df = self.replicate_groups.T.copy() + + #set Replicates as column in dataframe + df.loc[:,'Replicates'] = df.index + + #melt to tidy dataframe + melted = pd.melt(df, + id_vars=['Replicates'], + value_vars=df.columns.tolist()[0:len(df.columns)-1], + var_name='Groups' + ) + + #filter for group membership to samples + melted_filtered = melted.query('value != 0') + + #delete value {0,1} column-unnecessary since it now redundantly indicates membership + del melted_filtered['value'] + + #set Samples dtype as string + melted_filtered.loc[:,'Replicates'] = melted_filtered['Replicates'].astype(str) + + self.tidy_replicate_groups = melted_filtered + + def set_tidy_sample_groups(self): + """ + Setting tidy group membership of samples, where each sample is the observation and group membership is an attribute (column) + + Parameters + ---------- + None + + """ + + #copy sample groups dataframe + df = self.sample_groups.copy() + + #set Samples as column in dataframe + df.loc[:,'Samples'] = df.index + + #melt to tidy dataframe + melted = pd.melt(df, + id_vars=['Samples'], + value_vars=df.columns.tolist()[0:len(df.columns)-1], + var_name='Groups' + ) + + #filter for group membership to samples + melted_filtered = melted.query('value != 0') + + #delete value {0,1} column-unnecessary since it now redundantly indicates membership + del melted_filtered['value'] + + #set Samples dtype as string + melted_filtered.loc[:,'Samples'] = melted_filtered['Samples'].astype(str) + + self.tidy_sample_groups = melted_filtered + + def get_protein_annotations(self,string='hq'): + + ''' + Get Uniprot protein ontology data + + Parameters + ---------- + string : string + Indicate ontology data for either uniprot expert-curated or 'reviewed' proteins or all proteins from raw + + Output + ------- + tab : Ontology dataframe + ''' + + #get uniprot ontology + tab = get_uniprot_table() + + #subset ontology with proteins of interest + if string=='hq': + + inds = tab.index.isin(self.proteins) + + return tab[inds] + + if string=='all': + + return tab + + def get_uniprot_table(self): + """ + Upload uniprot database flat file + + Parameters + ---------- + None + + """ + + tab = pd.read_csv(self.data_dir+self.uniprot_file,delimiter="\t",index_col=0) + + return tab + + #ANALYSIS FUNCTIONS + + def manual_feature_extraction(self,df): + """ + Query protein quantification between reference and other groups + + Parameters + ---------- + df + dataframe to manually extract features + """ + + #load raw and sample group dataframes + df_samples = df + df_sample_groups = self.sample_groups + + #make rownames-presence/absence/mixed conditions of proteins amongst samples + val_grps = ('allq', 'allnotq', 'mixed') + tmp = list(it.product(val_grps,val_grps)) + rownames = [] + for i in tmp: + rownames.append("_".join(i)) + + #set column names-reference/indicator scenarios + x = self.groups != self.ref[0] + t = [ self.ref[0] + '/' + x for x in self.groups[x] ] + colnames = tuple(t) + + + #populate length and protein array dataframes + df_len = pd.DataFrame(index=rownames,columns=colnames) + df_arr = pd.DataFrame(index=rownames,columns=colnames) + + #populate each element with an empty array or list + for i, j in it.product(range(df_len.shape[0]),range(df_len.shape[1])): + df_len.iloc[i][j] = () + for i, j in it.product(range(df_arr.shape[0]),range(df_arr.shape[1])): + df_arr.iloc[i][j] = [] + + #make list of functions for assessing presence/absence/mixed protein status + singlefuncs = [ ( "allq", allq ) , + ("allnotq", allnotq ) , + ("mixed", mixed ) + ] + + #set reference group + ref_grp = self.ref[0] + + #loop through indicators to assess presense/absence/mixed proteins for each reference/indicator scenario and populate length/protein array dataframes + for i in self.groups[x]: + + #set reference/indicator scenario + gr1 = ref_grp + gr2 = i + colname = gr1+"/"+gr2 + print(colname) + + #make arrays of proteins for reference samples for each assessment condition + gr1_arr = [] + tmp = df_samples.T + arr1 = df_sample_groups.loc[gr1] == 1 + X = tmp[arr1.values].transpose() + for l in range(len(val_grps)): + tmp = singlefuncs[l][1](X)#slow + gr1_arr.append(tmp) + + #make arrays of proteins for reference samples for each assessment condition + gr2_arr = [] + tmp = df_samples.T + arr2 = df_sample_groups.loc[gr2] == 1 + X = tmp[arr2.values].transpose() + for k in range(len(val_grps)): + tmp = singlefuncs[k][1](X) + gr2_arr.append(tmp) + + #for each combination of gr1_arr and gr2_arr, do intersection and append to corresponding row in column of data frame. This gives proteins in each condition for each reference/indicator scenario + for m,n in it.product( range( len(gr1_arr) ) , range( len(gr2_arr) ) ): + rowname = singlefuncs[m][0]+"_"+singlefuncs[n][0] + df_len.loc[ rowname , colname ] = len ( np.intersect1d( gr1_arr[m], gr2_arr[n] ) ) + df_arr.loc[ rowname , colname ] = np.intersect1d( gr1_arr[m], gr2_arr[n] ) + + #dictionary of length and protein array dataframes + dictionary = { 'df_len' : df_len, 'df_arr' : df_arr} + + helper_dictionary = self.make_protein_substraction(dictionary) + + self.data['mfe'] = { 'main' : dictionary, 'helper' : helper_dictionary } + + def make_protein_substraction(self,dictionary=None): + ''' + Helper function only for manual_feature_extraction method to do set operations on protein results + + Right now supports difference of proteins + + Parameters + ---------- + dictionary + dictionary to fill in-fed from manual_feature_extraction + + Output + ------ + Dictionary of protein array length band array from difference ofv manually extracted features between ref and treat groups + ''' + #declare dictionary from manual feature extraction + mfe = dictionary + + #set comparisons made in mfe + comps_grps = mfe['df_len'].columns.tolist() + + #set protein feature comparisons from manual feature extraction + comps_prots = mfe['df_len'].index.tolist() + + #make combination of comparisons-resulting dataframe rows and columns + combs = list( + it.product( + comps_grps, + comps_grps + ) + ) + + #make dictionary of presence,absence, mixed. Values will be NxN dataframe labeled with combs above + protein_dict = dict.fromkeys( + comps_prots + ) + + #set dataframe to extract proteins from + df = mfe['df_arr'] + + #loop through dictionary keys which are the protein feature comparisons + for i in protein_dict.keys(): + + #make empty NxN dataframe to store protein lengths and arrays + empty_len = pd.DataFrame( + index = comps_grps, + columns = comps_grps + ) + + #make copied empty dataframe to store protein arrays + empty_diff = empty_len.copy() + + #loopp through dataframe index, columns and store length of difference and proteins from difference of sets + for j in comps_grps: + for k in comps_grps: + diff_arr = set(df.loc[i,j]) - set(df.loc[i,k]) + length = len(diff_arr) + empty_len.at[j,k] = length + empty_diff.at[j,k] = diff_arr + + #store dictionary of dataframes as value in dictionary + protein_dict[i] = { 'df_len' : empty_len, 'df_arr' : empty_diff } + + return protein_dict + + def hypothesis_testing(self,df,df_groups): + """ + Hypothesis testing of reference sample proteins versus treatment sample proteins. + + Parameters + ---------- + df + dataframe of values to test + df_groups + group membership of samples/replicates in df to test + + Outputs + ------- + df + hypothesis test results + """ + + #get subset dataframes by reference and treatment groups + df1 = self.get_sub_df(df,df_groups,self.ref[0]) + df2 = self.get_sub_df(df,df_groups,self.treat[0]) + + #List of hypothesis test names and functions + tests = self.tests + + #set dataframe to fill + df = pd.DataFrame(columns=['Protein','Test','Pvalue','Statistic']) + + #for each hypothesis test name and function + for i, hyp in enumerate(tests): + + #set test name + test = hyp[0] + + #for each protein + for j in range(0,df1.shape[0]): + + #get protein location in dataframes + firstloc = df1.iloc[j] + secondloc = df2.iloc[j] + + #make sure the arrays are filled with floats + a = firstloc.values.astype(float) + b = secondloc.values.astype(float) + + #make sure the name of the protein is the same in each dataframe + if firstloc.name == secondloc.name: + + #set protein name + protein = firstloc.name + else: + break + #set statistic from test + stat = hyp[1](a,b)[0] + + #set pvalue from test + pval = hyp[1](a,b)[1] + + #put data in pandas series + row = pd.Series([protein,test,pval,stat],index=df.columns) + + #add row to dataframe + df = df.append(row,ignore_index=True) + + #set correct object membership of dataframe columns + df = df.astype(dtype= { + "Protein":"str", + "Test":"str", + "Pvalue":"float64", + "Statistic":'float64' + } + ) + + return df + + def get_sub_df(self,df,df_groups,grp): + ''' + Helper function in class to get patient-subset dataframe. + + Subset larger dataframe by reference and treatment groups + + Used in hypothesis_testing() + + Parameters + ---------- + df + proteomics data dataframe given to hypothesis_testing + df_groups + groups dataframe given to hypothesis testing + grp str + group name for subsetting + + Output + ------ + subsetted dataset + ''' + + #get indices of replicates/samples in groups + inds = np.where(df_groups.loc[grp] == 1) + + #get + + return df.T.iloc[inds].T + + #PROTEIN SUBSET FUNCTIONS + + def proteins_with_uniprot_annot(self,df,status='reviewed'): + """ + Get proteins that have high annotation score by uniprot + + Parameters + ---------- + df + dataframe to subset + status {'reviewed','unreviewed'} + protein annotation score from uniprot curation + + Output + ------ + subsetted uniprot database + """ + + return self.uniprot_annot(df,status=status).index.ravel() + + def proteins_in_n_replicates_per_samples(self,df,n_reps=1): + """ + Subsetting dataset by proteins that are quantified in more atleast n replicates per sample for all samples + + Parameters + ---------- + df + dataframe to subset + n_reps + Least number of replicates quantified per sample + + Output + ------ + subsetted proteins + """ + samp_rep_dict = self.sample_replicate_dictionary + + atleast1_reps = [] + for samp in samp_rep_dict.keys(): + reps = samp_rep_dict[samp] + sub = df.T.loc[reps] + mask = sub.apply(lambda x : x > 0,axis=1) + #proteins quantified in more than 1 replicates of a sample + prot_bool = mask.sum(axis=0) > n_reps + [atleast1_reps.append(x) for x in prot_bool.index[prot_bool].tolist()] + + atleast1_rep_per_samp_prots = np.unique(atleast1_reps) + + return atleast1_rep_per_samp_prots + + def proteins_quant_all_reps(self,df): + """ + Helper function returning all proteins that + are quantified in every replicate. + + Parameters + ---------- + df + dataframe to subset + + Output + ------ + subsetted proteins + """ + + #determine how many reps show quantification of the protein + num_reps_quant = df.apply(lambda x : sum(x > 0),axis=1) + + #is the num reps all of them? + all_reps_quant = num_reps_quant.apply( + lambda x : x == df.shape[1]) + + #get protein names + allquant_prots = df.index[all_reps_quant] + + return allquant_prots + + def proteins_by_statistic_threshold(self, + df, + statistic='mean', + higher=False, + threshold=80): + """ + Proteins that meet a certain statistical threshold + e.g. proteins that have low variance across the dataset + + outputs proteins above/below that threshold + + e.g. threshold=25 gives proteins that are below the 25th quantile + of the statistical function. Threshold=80 gives proteins that are + below the 80th quantile of the statistical function + + Parameters + ---------- + df + dataframe to subset + statistic: {'mean','median', 'variance'} + statistic to apply across values + higher {True,False} + Indicates whether to subset by proteins with a high annotation score from Uniprot + threshold [0,100] + Quantile for statistic threshold + + Output + ------ + subsetted proteins + + """ + #possible statistical functions to threshold by + funcs = { + 'mean' : np.mean, + 'median' : np.median, + 'variance' : np.var, + 'std' : np.std + } + + #collect proteins in dataset + prots = df.index + + #create series by applying statistic + series = df.apply(funcs[statistic],axis=1) + + #store dataset value of threshold + thresh = (series.describe( + percentiles=np.arange(0,1,.01) + ) + .loc['0%':] + .iloc[threshold] + ) + + #determine direction of threshold + #if greater than 50, greater than or equal to, + #otherwise, less than + if higher: + return prots[series.where(series > thresh).notnull()] + else: + return prots[series.where(series <= thresh).notnull()] + + def proteins_by_intrasample_variability(self, + df,higher=False,threshold=80): + """ + Output proteins below a certain variance threshold for + replicates within all samples + + Parameters + ---------- + df + dataframe to subset + higher {True,False} + Indicates whether to subset by proteins with a high annotation score from Uniprot + threshold [0,100] + Quantile for statistic threshold + + Output + ------ + subsetted proteins + + """ + + #transpose + df = df.T + + samp_rep_dict = self.sample_replicate_dictionary + + prots = pd.Index([]) + + for samp in samp_rep_dict.keys(): + reps = samp_rep_dict.get(samp) + sub = df.loc[np.asarray(reps)] + prots = prots.append( \ + self.proteins_by_statistic_threshold(df=sub.T, + higher=False, + statistic='variance', + threshold=threshold)) + + return prots.unique() + + #HELPER FUNCTIONS + def uniprot_annot(self, df,status='reviewed'): + """ + Subset dataset by proteins that have high annotation score by uniprot + + Parameters + ---------- + df + dataframe to subset + annot_status {'reviewed','unreviewed'} + Annotation status in uniprot + + Output + ------ + subsetted dataset + """ + + #get uniprot data + annot = self.get_protein_annotations('hq') + + #get reviewed uniprot data + annot_reviewed = annot[annot['Status'].str.match(status)] + + #getting all uniprot reviewed proteins + cprots = annot_reviewed.iloc[:,0].index + + #filter raw for reviewed proteins + raw_reviewed = df[df.index.isin(cprots)] + + #set new protein column + tmp = raw_reviewed.assign(Proteins=raw_reviewed.index.values) + + #group proteins because there might be duplicates + grouped = tmp.groupby('Proteins') + + #get one of the duplicated proteins + reviewed = grouped.max() + + return reviewed + + def make_df_samples(self,df,agg='mean'): + """ + From df_replicates, make df_samples by applying to the replicates for a sample the estimator indicated in the helper function + + Parameters + ---------- + df + replicates dataframe to aggregate + agg: {'mean','median', 'variance'} + statistic to apply to replicate values for sample membership + + Output + ------ + subsetted dataset + """ + df_replicates = df + acc = df.index.tolist() + samples = self.samples + dictionary = self.sample_replicate_dictionary + + # initializing empty, desired dataframe of samples by proteins + df_samples = pd.DataFrame(columns=samples) + + #for each sample, apply the helper function to df_replicates sub dataframe to make df_samples + for sample in dictionary.keys(): + + #dataframe of replicates for a given sample + holder = df_replicates[dictionary[sample]] + + #determine sample protein value from replicate values + df = holder.agg(func=agg,axis=1) + + #put in df to larger df_samples + df_samples[sample] = df + + #add index-protein accessions + df_samples.index = acc + + return df_samples + + +if __name__=="__main__": + """ + Execute + """ + c = cohorts.Cohort('cumc') \ No newline at end of file diff --git a/cohorts/utils.py b/cohorts/utils.py new file mode 100644 index 0000000..662a880 --- /dev/null +++ b/cohorts/utils.py @@ -0,0 +1,248 @@ +import numpy as np +import pandas as pd +import seaborn as sns +import logging +from functools import reduce +import scipy.stats as sc + +def allq(X): + """ + Helper function used in manual_feature_extraction + + Parameters + ---------- + X + dataframe to subset + + Outputs + ------- + Subsetted dataframe with all proteins quantified + """ + applied = X.apply(lambda x: np.sum(x>0),axis=1) + bools = np.where(applied == len(X.columns)) + return X.iloc[bools].index.tolist() + +def allnotq(X): + """ + Helper function used in manual_feature_extraction + + Parameters + ---------- + X + dataframe to subset + + Outputs + ------- + Subsetted dataframe with no proteins quantified + """ + applied = X.apply(lambda x: np.sum(x),axis=1) + bools = np.where(applied == 0) + return X.iloc[bools].index.tolist() + +def mixed(X): + """ + Helper function used in manual_feature_extraction + + Parameters + ---------- + X + dataframe to subset + + Outputs + ------- + Subsetted dataframe with either 1 up to all proteins quantified + """ + applied = X.apply(lambda x: np.sum(x>0),axis=1) + condition1 = (applied < len(X.columns)) + condition2 = (applied > 0) + bools = np.where( condition1 & condition2 ) + return X.iloc[bools].index.tolist() + +def get_uniprot_table(file="/Users/npg2108/Research/Projects/exosome_pgf/data/uniprot-all_20171124.tab.gz"): + """ + Uploading uniprot database flat file + """ + tab = pd.read_csv(file,delimiter=" ",index_col=0) + + return tab + +def get_real(series, agg='mean'): + """ + Helper function to get array mean or return NaN for series of NaNs/floats. + Used to get sample value of protein for replicates in df_replicates + """ + + func = {'mean' : np.mean,'median': np.median,'variance' : np.var} + + # get logical list of whether the protein value is NaN + inds = [np.isnan(x) for x in series] + + #how many are NaN? + summ = np.sum(inds) + + #give NaN/float for protein value depending on number of NaNs in series + #returns NaN if all NaN values, and function value if atleasst one is non-NaN + if summ==len(inds): + return 0 + else: + #negativing-list of booleans for indices where non-NaN + reals = [not i for i in inds] + stat = func[agg](series[ reals ]) + return stat + +def rank_normalize(X): + """ + Helper function to normalize values in sample by normalized rank, so that protein values within a sample are [0,1] and indicate relative expression in that sample. Hopefully this will allow better comparisons between samples in a cohort. + """ + + df_ranked = X.apply(lambda x : sc.rankdata(x,method='dense')) + + df_ranked_normalized = df_ranked.apply(lambda x : (x / np.max(x))) + + return df_ranked_normalized + +def make_comparable(dfs,rank=True): + """ + Make list of cohort datasets (proteins x reps/samps) comparable i.e. make sure the + proteins are the same between the datasets and normalize. + """ + + #get list of sets of proteins in each cohort dataset + if type(dfs) == dict: + prot_set_lst = [set(x.index) for i,x in dfs.items()] + dfs = [x for i,x in dfs.items()] + else: + prot_set_lst = [set(x.index) for x in dfs] + + #get protein intersection + com_prots = multiple_set_intersection(*prot_set_lst) + + #iterate through dfs and normalize and append to new list + dfs_new = [] + for df in dfs: + if rank: + dfs_new.append(rank_normalize(df.loc[com_prots])) + else: + dfs_new.append(df.loc[com_prots]) + + return dfs_new + +def multiple_set_intersection(*sets): + """ + Return multiple set intersection. + from https://stackoverflow.com/questions/2541752/best-way-to-find-the-intersection-of-multiple-sets + """ + try: + return set.intersection(*sets) + except TypeError: # this is Python < 2.6 or no arguments + pass + + try: a_set= sets[0] + except IndexError: # no arguments + return set() # return empty set + + return reduce(a_set.intersection, sets[1:]) + +def quantileNormalize(df_input): + """ + From https://github.com/ShawnLYU/Quantile_Normalize/blob/master/quantile_norm.py + Quantile normalization method of a dataset + """ + df = df_input.copy() + #compute rank + dic = {} + for col in df: + dic.update({col : sorted(df[col])}) + sorted_df = pd.DataFrame(dic) + rank = sorted_df.mean(axis = 1).tolist() + #sort + for col in df: + t = np.searchsorted(np.sort(df[col]), df[col]) + df[col] = [rank[i] for i in t] + return df + + #from here +#https://gist.github.com/mpschr/5db20df78c034654f030 + +def multi_df_join(df_dict): + """ + Takes a list of dataframes with the same primary key (id) and merges the columns, using + the dictionary keys as suffixes in case there are conflicts with the column names. + Parameters + ---------- + df_dict: A dictionary where keys are strings (collision suffixes) and values are DataFrames + + Output + ------ + The merged data frames + """ + + # check if there are colliding column names + all_columns = [] + for df in df_dict.values(): + all_columns = all_columns + list(df.columns) + colliding_columns = set([x for x in all_columns if all_columns.count(x) > 1]) + + df = None + for suffix, input_df in df_dict.items(): + + # rename colliding columns + renamer_dict = {} + for col in list(input_df.columns): + if col in colliding_columns: + renamer_dict[col] = col + suffix + input_df.rename(columns=renamer_dict, inplace=True) + + # join columns + if df is None: + df = input_df + else: + df = df.join(input_df, how='outer') + logging.debug("Shape is currently: {}".format(df.shape)) + + return df + +def treat_ref_color_map(df,labels,groups,palette='hls'): + ''' + Create color dictionary for reference and treatment samples for use in plotting + + Parameters + ---------- + df + group labels from sample or replicate groups + labels + array of ref and treat group label + groups + label from dataframe in which we want to attribute colors to + Order by the labels + + Output + ------ + Samples to rgb pandas Series + ''' + + #create array of group names for correct sample membership + cond = [] * df.shape[0] + for g in groups: + for n in df.loc[g].ravel(): + if n == 1: + cond.append(g) + + #create a color palette with the same number of colors as unique groups-right now only 2 + network_pal = sns.color_palette(palette,n_colors=len(groups)) + + #create a dictionary where the key is the group and the values are the colors from the palette we just created + network_lut = dict(zip(groups, network_pal)) + + #convert array of group names to sample membership to a pandas series for mapping + networks = pd.Series(cond,index=labels) + + #map the colors to the series. Now we have a list of colors the same length as our dataframe, where unique values are mapped to the same color + network_colors = networks.map(network_lut) + + #change index class to string + network_colors.index = network_colors.index.astype('str') + + return network_colors + + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..af2d57a --- /dev/null +++ b/setup.py @@ -0,0 +1,10 @@ +from setuptools import setup + +setup(name='cohorts', + version='0.1a', + description='Proteomics exosome_pgf cohort structure', + author='Tatonetti Lab', + author_email='npg2108@cumc.columbia.edu', + license='MIT', + packages=['cohorts'], + zip_safe=False) diff --git a/test.ipynb b/test.ipynb new file mode 100644 index 0000000..20e0715 --- /dev/null +++ b/test.ipynb @@ -0,0 +1,2040 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "nbpresent": { + "id": "24799cf3-8a2c-41b4-98c0-2861feeaa846" + } + }, + "outputs": [], + "source": [ + "import sys\n", + "import cohorts\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing setters and attributes" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\tPatient cohort object for patient proteomics data.\n", + "\n", + "\tDataframes, variables, and functions facilitating the processing, analysis, and integration of the cohort data.\n", + "\n", + "\tParameters\n", + "\t----------\n", + "\tcohort: str\n", + "\t\tname of the patient cohort\n", + "\n", + "\tfile_dir: str\n", + "\t\tdirectory where replicate dataframe and \n", + "\t\tsample group membership dataframe file names are located\n", + "\n", + "\treplicates_file: str\n", + "\t\tname of the replicates dataframe file\n", + "\n", + "\t\tA proteins x B replicates\n", + "\t\ttab delimited\n", + "\n", + "\t\treplicate = \"SampleName\" + \"_Rep[0-9]\"\n", + "\n", + "\tsample_groups_file: str\n", + "\t\tname of the sample group file\n", + "\n", + "\t\tN groups x M samples\n", + "\n", + "\t\tsample = \"SampleName\"\n", + "\n", + "\tdata_dir: str\n", + "\t\tdirectory where extra data files are located\n", + "\t\t\n", + "\tuniprot_file: str\n", + "\t\tname of the uniprot database flat file located in data_dir\n", + "\n", + "\tExamples\n", + "\t--------\n", + "\t>>>c = cohorts.Cohort(cohort='cohort_name',\n", + "\t\tfile_dir=\"path/to/files/\"\n", + "\t\treplicates_file=\"file_name\",\n", + "\t\tsample_groups_file=\"sample_groups_file_name\",\n", + "\t\tdata_dir=\"path/to/data/dir/\",\n", + "\t\tuniprot_file=\"uniprot_flat_file\"\n", + "\t\t)\n", + "\t>>>c.set_replicates_hq()\n", + "\t>>>c.set_trans_replicates_hq()\n", + "\t>>>c.set_samples_hq()\n", + "\t>>>c.set_trans_samples_hq()\n", + "\n", + "\t\n" + ] + } + ], + "source": [ + "print(cohorts.Cohort.__doc__)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "nbpresent": { + "id": "7f9017a5-6455-4867-a8a2-c3842b3f9d9c" + }, + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/npg2108/Research/Projects/exosome_pgf/cohorts\n", + "../data/\n", + "cumc\n", + "['53688_rep1', '53688_rep2', '53688_rep3', '53688_rep4', '54109_rep1', '54109_rep2', '54109_rep3', '54109_rep4', '54314_rep1', '54314_rep2', '54314_rep3', '54314_rep4', '54793_rep1', '54793_rep2', '54793_rep3', '54793_rep4', '55175_rep1', '55175_rep2', '55175_rep3', '55175_rep4', '56191_rep1', '56191_rep2', '56191_rep3', '56191_rep4', '56603_rep1', '56603_rep2', '56603_rep3', '56603_rep4', '56767_rep1', '56767_rep2', '56767_rep3', '56767_rep4', '62128_rep1', '62128_rep2', '62128_rep3', '62128_rep4', '53190_rep1', '53190_rep2', '53190_rep3', '53190_rep4', '53488_rep1', '53488_rep2', '53488_rep3', '53488_rep4', '56118_rep1', '56118_rep2', '56118_rep3', '56118_rep4', '57268_rep1', '57268_rep2', '57268_rep3', '57268_rep4', '57267_rep1', '57267_rep2', '57267_rep3', '57267_rep4', '57270_rep1', '57270_rep2', '57270_rep3', '57270_rep4', '57269_rep1', '57269_rep2', '57269_rep3', '57269_rep4']\n", + "{'53688': ['53688_rep1', '53688_rep2', '53688_rep3', '53688_rep4'], '54109': ['54109_rep1', '54109_rep2', '54109_rep3', '54109_rep4'], '54314': ['54314_rep1', '54314_rep2', '54314_rep3', '54314_rep4'], '54793': ['54793_rep1', '54793_rep2', '54793_rep3', '54793_rep4'], '55175': ['55175_rep1', '55175_rep2', '55175_rep3', '55175_rep4'], '56191': ['56191_rep1', '56191_rep2', '56191_rep3', '56191_rep4'], '56603': ['56603_rep1', '56603_rep2', '56603_rep3', '56603_rep4'], '56767': ['56767_rep1', '56767_rep2', '56767_rep3', '56767_rep4'], '62128': ['62128_rep1', '62128_rep2', '62128_rep3', '62128_rep4'], '53190': ['53190_rep1', '53190_rep2', '53190_rep3', '53190_rep4'], '53488': ['53488_rep1', '53488_rep2', '53488_rep3', '53488_rep4'], '56118': ['56118_rep1', '56118_rep2', '56118_rep3', '56118_rep4'], '57268': ['57268_rep1', '57268_rep2', '57268_rep3', '57268_rep4'], '57267': ['57267_rep1', '57267_rep2', '57267_rep3', '57267_rep4'], '57270': ['57270_rep1', '57270_rep2', '57270_rep3', '57270_rep4'], '57269': ['57269_rep1', '57269_rep2', '57269_rep3', '57269_rep4']}\n", + "['53688', '54109', '54314', '54793', '55175', '56191', '56603', '56767', '62128', '53190', '53488', '56118', '57268', '57267', '57270', '57269']\n", + "['NL' 'PGD' 'RV' 'LV' 'vasoplegia']\n", + "1204\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda/envs/cohorts_test/lib/python3.6/site-packages/pandas/core/indexing.py:621: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + " self.obj[item_labels[indexer[info_axis]]] = value\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
53688541095431454793551755619156603567676212853190534885611857268572675727057269
P619810.000000.000000.000000.000000.000000.000000.00000.000000.607000.309750.384000.843250.360501.255500.404000.25225
E7EX291.218001.009751.059501.472751.889751.457501.29001.416500.954000.864250.793500.831001.636251.129000.725752.06750
P621910.000000.000000.000000.000000.000000.000000.00000.000000.239000.135500.100500.184250.269750.346500.599000.09350
Q994601.077500.280000.283750.316500.000000.000000.00000.000000.307750.951000.349750.321250.000000.000000.000000.00000
P522090.850250.480750.603000.921751.055000.413750.59950.882750.763250.804500.814001.075750.996750.811750.558752.21100
\n", + "
" + ], + "text/plain": [ + " 53688 54109 54314 54793 55175 56191 56603 56767 \\\n", + "P61981 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.0000 0.00000 \n", + "E7EX29 1.21800 1.00975 1.05950 1.47275 1.88975 1.45750 1.2900 1.41650 \n", + "P62191 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.0000 0.00000 \n", + "Q99460 1.07750 0.28000 0.28375 0.31650 0.00000 0.00000 0.0000 0.00000 \n", + "P52209 0.85025 0.48075 0.60300 0.92175 1.05500 0.41375 0.5995 0.88275 \n", + "\n", + " 62128 53190 53488 56118 57268 57267 57270 57269 \n", + "P61981 0.60700 0.30975 0.38400 0.84325 0.36050 1.25550 0.40400 0.25225 \n", + "E7EX29 0.95400 0.86425 0.79350 0.83100 1.63625 1.12900 0.72575 2.06750 \n", + "P62191 0.23900 0.13550 0.10050 0.18425 0.26975 0.34650 0.59900 0.09350 \n", + "Q99460 0.30775 0.95100 0.34975 0.32125 0.00000 0.00000 0.00000 0.00000 \n", + "P52209 0.76325 0.80450 0.81400 1.07575 0.99675 0.81175 0.55875 2.21100 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
53688_rep153688_rep253688_rep353688_rep454109_rep154109_rep254109_rep354109_rep454314_rep154314_rep2...57267_rep357267_rep457270_rep157270_rep257270_rep357270_rep457269_rep157269_rep257269_rep357269_rep4
P61981NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN0.7500.866NaNNaN0.4620.547NaNNaN
E7EX291.1421.3171.5560.8571.1501.0800.8041.0050.9301.235...1.5281.7310.7160.6260.7010.8601.6761.8571.5033.234
P62191NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaN2.396NaNNaNNaN0.374NaNNaN
Q99460NaNNaN4.310NaNNaNNaN1.120NaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
P522091.2781.1600.3350.6280.4610.4060.7280.3280.7870.731...0.9211.1440.7380.6370.3320.5281.8052.4652.6131.961
\n", + "

5 rows × 64 columns

\n", + "
" + ], + "text/plain": [ + " 53688_rep1 53688_rep2 53688_rep3 53688_rep4 54109_rep1 \\\n", + "P61981 NaN NaN NaN NaN NaN \n", + "E7EX29 1.142 1.317 1.556 0.857 1.150 \n", + "P62191 NaN NaN NaN NaN NaN \n", + "Q99460 NaN NaN 4.310 NaN NaN \n", + "P52209 1.278 1.160 0.335 0.628 0.461 \n", + "\n", + " 54109_rep2 54109_rep3 54109_rep4 54314_rep1 54314_rep2 \\\n", + "P61981 NaN NaN NaN NaN NaN \n", + "E7EX29 1.080 0.804 1.005 0.930 1.235 \n", + "P62191 NaN NaN NaN NaN NaN \n", + "Q99460 NaN 1.120 NaN NaN NaN \n", + "P52209 0.406 0.728 0.328 0.787 0.731 \n", + "\n", + " ... 57267_rep3 57267_rep4 57270_rep1 57270_rep2 \\\n", + "P61981 ... NaN NaN 0.750 0.866 \n", + "E7EX29 ... 1.528 1.731 0.716 0.626 \n", + "P62191 ... NaN NaN NaN 2.396 \n", + "Q99460 ... NaN NaN NaN NaN \n", + "P52209 ... 0.921 1.144 0.738 0.637 \n", + "\n", + " 57270_rep3 57270_rep4 57269_rep1 57269_rep2 57269_rep3 57269_rep4 \n", + "P61981 NaN NaN 0.462 0.547 NaN NaN \n", + "E7EX29 0.701 0.860 1.676 1.857 1.503 3.234 \n", + "P62191 NaN NaN NaN 0.374 NaN NaN \n", + "Q99460 NaN NaN NaN NaN NaN NaN \n", + "P52209 0.332 0.528 1.805 2.465 2.613 1.961 \n", + "\n", + "[5 rows x 64 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
53190_rep153190_rep253190_rep353190_rep453488_rep153488_rep253488_rep353488_rep453688_rep153688_rep2...57269_rep357269_rep457270_rep157270_rep257270_rep357270_rep462128_rep162128_rep262128_rep362128_rep4
NL0000000011...0000000000
PGD1111111100...1111111111
RV1111111100...1111111111
LV1111000000...1111111111
vasoplegia0000000000...0000000000
\n", + "

5 rows × 64 columns

\n", + "
" + ], + "text/plain": [ + " 53190_rep1 53190_rep2 53190_rep3 53190_rep4 53488_rep1 \\\n", + "NL 0 0 0 0 0 \n", + "PGD 1 1 1 1 1 \n", + "RV 1 1 1 1 1 \n", + "LV 1 1 1 1 0 \n", + "vasoplegia 0 0 0 0 0 \n", + "\n", + " 53488_rep2 53488_rep3 53488_rep4 53688_rep1 53688_rep2 \\\n", + "NL 0 0 0 1 1 \n", + "PGD 1 1 1 0 0 \n", + "RV 1 1 1 0 0 \n", + "LV 0 0 0 0 0 \n", + "vasoplegia 0 0 0 0 0 \n", + "\n", + " ... 57269_rep3 57269_rep4 57270_rep1 57270_rep2 \\\n", + "NL ... 0 0 0 0 \n", + "PGD ... 1 1 1 1 \n", + "RV ... 1 1 1 1 \n", + "LV ... 1 1 1 1 \n", + "vasoplegia ... 0 0 0 0 \n", + "\n", + " 57270_rep3 57270_rep4 62128_rep1 62128_rep2 62128_rep3 \\\n", + "NL 0 0 0 0 0 \n", + "PGD 1 1 1 1 1 \n", + "RV 1 1 1 1 1 \n", + "LV 1 1 1 1 1 \n", + "vasoplegia 0 0 0 0 0 \n", + "\n", + " 62128_rep4 \n", + "NL 0 \n", + "PGD 1 \n", + "RV 1 \n", + "LV 1 \n", + "vasoplegia 0 \n", + "\n", + "[5 rows x 64 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
53688541095431454793551755619156603567676212853190534885611857268572675727057269
NL1111111100000000
PGD0000000011111111
RV0000000011111111
LV0000000011000011
vasoplegia0000000000010000
\n", + "
" + ], + "text/plain": [ + " 53688 54109 54314 54793 55175 56191 56603 56767 62128 \\\n", + "NL 1 1 1 1 1 1 1 1 0 \n", + "PGD 0 0 0 0 0 0 0 0 1 \n", + "RV 0 0 0 0 0 0 0 0 1 \n", + "LV 0 0 0 0 0 0 0 0 1 \n", + "vasoplegia 0 0 0 0 0 0 0 0 0 \n", + "\n", + " 53190 53488 56118 57268 57267 57270 57269 \n", + "NL 0 0 0 0 0 0 0 \n", + "PGD 1 1 1 1 1 1 1 \n", + "RV 1 1 1 1 1 1 1 \n", + "LV 1 0 0 0 0 1 1 \n", + "vasoplegia 0 0 1 0 0 0 0 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SamplesGroups
0NL53688
5NL54109
10NL54314
15NL54793
20NL55175
\n", + "
" + ], + "text/plain": [ + " Samples Groups\n", + "0 NL 53688\n", + "5 NL 54109\n", + "10 NL 54314\n", + "15 NL 54793\n", + "20 NL 55175" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "top = \"~/Research/Projects/exosome_pgf/\"\n", + "file_dirs ={ 'cumc' : top+\"cumc/data/\", 'cedar' : top+\"cedarsinai/data/\", 'paris' : top+\"paris/data/\"}\n", + "replicates_files = { 'cumc' : \"df_replicates.tab\", 'cedar' : \"df_replicates.tab\", 'paris' : \"df_replicates.tab\"}\n", + "replicate_groups_files = { 'cumc' : \"df_replicate_groups.tab\", 'cedar' : \"df_replicate_groups.tab\", 'paris' : \"df_replicate_groups.tab\"}\n", + "sample_groups_files = { 'cumc' : \"df_sample_groups_v2.tab\", 'cedar' : \"df_sample_groups.tab\", 'paris' : \"df_sample_groups.tab\"}\n", + "cohort = 'cumc'\n", + "c = cohorts.Cohort(cohort=cohort,\n", + " data_dir=\"../data/\",file_dir=file_dirs[cohort],\n", + " replicates_file=replicates_files[cohort],\n", + " sample_groups_file=sample_groups_files[cohort],\n", + " uniprot_file=\"uniprot-all_20171124.tab.gz\")\n", + "print(c.cwd)\n", + "print(c.data_dir)\n", + "print(c.cohort)\n", + "print(c.replicates)\n", + "print(c.sample_replicate_dictionary) \n", + "print(c.samples)\n", + "print(c.groups)\n", + "print(len(c.proteins))\n", + "display(c.raw_samples.head())\n", + "display(c.raw_replicates.head())\n", + "display(c.replicate_groups.head())\n", + "display(c.sample_groups.head())\n", + "display(c.tidy_sample_groups.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## testing analysis functions" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NL/PGD\n", + "NL/RV\n", + "NL/LV\n", + "NL/vasoplegia\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NL/PGDNL/RVNL/LVNL/vasoplegia
allq_allq535535535546
allq_allnotq63636369
allq_mixed1717170
allnotq_allq69696969
allnotq_allnotq76767676
allnotq_mixed0000
mixed_allq15151563
mixed_allnotq00035
mixed_mixed8383830
\n", + "
" + ], + "text/plain": [ + " NL/PGD NL/RV NL/LV NL/vasoplegia\n", + "allq_allq 535 535 535 546\n", + "allq_allnotq 63 63 63 69\n", + "allq_mixed 17 17 17 0\n", + "allnotq_allq 69 69 69 69\n", + "allnotq_allnotq 76 76 76 76\n", + "allnotq_mixed 0 0 0 0\n", + "mixed_allq 15 15 15 63\n", + "mixed_allnotq 0 0 0 35\n", + "mixed_mixed 83 83 83 0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "c.set_ref('NL')\n", + "c.set_treat('PGD')\n", + "c.set_replicates_hq()\n", + "c.set_trans_replicates_hq()\n", + "c.set_samples_hq(uniprot_annot=True)\n", + "c.manual_feature_extraction(c.samples_hq)\n", + "display(c.data['mfe']['main']['df_len'])\n", + "#load raw and sample group dataframes" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NL/PGDNL/RVNL/LVNL/vasoplegia
NL/PGD0000
NL/RV0000
NL/LV0000
NL/vasoplegia1111110
\n", + "
" + ], + "text/plain": [ + " NL/PGD NL/RV NL/LV NL/vasoplegia\n", + "NL/PGD 0 0 0 0\n", + "NL/RV 0 0 0 0\n", + "NL/LV 0 0 0 0\n", + "NL/vasoplegia 11 11 11 0" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NL/PGDNL/RVNL/LVNL/vasoplegia
NL/PGD0000
NL/RV0000
NL/LV0000
NL/vasoplegia0000
\n", + "
" + ], + "text/plain": [ + " NL/PGD NL/RV NL/LV NL/vasoplegia\n", + "NL/PGD 0 0 0 0\n", + "NL/RV 0 0 0 0\n", + "NL/LV 0 0 0 0\n", + "NL/vasoplegia 0 0 0 0" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NL/PGDNL/RVNL/LVNL/vasoplegia
NL/PGD0000
NL/RV0000
NL/LV0000
NL/vasoplegia6660
\n", + "
" + ], + "text/plain": [ + " NL/PGD NL/RV NL/LV NL/vasoplegia\n", + "NL/PGD 0 0 0 0\n", + "NL/RV 0 0 0 0\n", + "NL/LV 0 0 0 0\n", + "NL/vasoplegia 6 6 6 0" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NL/PGDNL/RVNL/LVNL/vasoplegia
NL/PGD0000
NL/RV0000
NL/LV0000
NL/vasoplegia0000
\n", + "
" + ], + "text/plain": [ + " NL/PGD NL/RV NL/LV NL/vasoplegia\n", + "NL/PGD 0 0 0 0\n", + "NL/RV 0 0 0 0\n", + "NL/LV 0 0 0 0\n", + "NL/vasoplegia 0 0 0 0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(c.data['mfe']['helper']['allq_allq']['df_len'])\n", + "display(c.data['mfe']['helper']['allnotq_allnotq']['df_len'])\n", + "display(c.data['mfe']['helper']['allq_allnotq']['df_len'])\n", + "display(c.data['mfe']['helper']['allnotq_allq']['df_len'])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ProteinTestPvalueStatistic
0A0A075B6H9t-test0.1596261.485322
1A0A075B6I0t-test0.6413050.476162
2A0A075B6I1t-test0.315327-1.041432
3A0A075B6I4t-test0.0000515.744282
4A0A075B6I9t-test0.5059800.682629
5A0A075B6J1t-test0.6187970.508829
6A0A075B6J6t-test0.1947161.361986
7A0A075B6J9t-test0.9269960.093289
8A0A075B6K0t-test0.370359-0.925552
9A0A075B6K2t-test0.2957901.086082
10A0A075B6K4t-test0.144513-1.545552
11A0A075B6K5t-test0.292936-1.092785
12A0A075B6K6t-test0.101548-1.752511
13A0A075B6P5t-test0.3106751.051874
14A0A075B6Q5t-test0.916957-0.106166
15A0A075B6S4t-test0.5814290.564375
16A0A075B6S5t-test0.8900320.140803
17A0A075B6S6t-test0.1452871.542342
18A0A087WSX0t-test0.1386811.570236
19A0A087WSY4t-test0.6235040.501950
20A0A087WSY6t-test0.7707990.297038
21A0A087WSZ0t-test0.098795-1.768246
22A0A0A0MRZ9t-test0.215526-1.297221
23A0A0A0MS14t-test0.0042253.410355
24A0A0A0MT36t-test0.075822-1.917398
25A0A0B4J1U3t-test0.1619361.476540
26A0A0B4J1V0t-test0.7958920.263649
27A0A0B4J1V1t-test0.6353070.484812
28A0A0B4J1V2t-test0.1409851.560378
29A0A0B4J1V6t-test0.6917510.404787
...............
1686Q9UKW4Wilcoxon_RankSum_test0.1414821.470294
1687Q9UKY1Wilcoxon_RankSum_test1.0000000.000000
1688Q9UKY7Wilcoxon_RankSum_test0.000778-3.360672
1689Q9ULV4Wilcoxon_RankSum_test0.674424-0.420084
1690Q9UM47Wilcoxon_RankSum_test0.916359-0.105021
1691Q9UMZ2Wilcoxon_RankSum_test1.0000000.000000
1692Q9UN67Wilcoxon_RankSum_test0.833635-0.210042
1693Q9UNW1Wilcoxon_RankSum_test0.0928921.680336
1694Q9UQP3Wilcoxon_RankSum_test0.058707-1.890378
1695Q9Y289Wilcoxon_RankSum_test1.0000000.000000
1696Q9Y2H0Wilcoxon_RankSum_test0.000778-3.360672
1697Q9Y2W2Wilcoxon_RankSum_test1.0000000.000000
1698Q9Y345Wilcoxon_RankSum_test1.0000000.000000
1699Q9Y490Wilcoxon_RankSum_test0.344562-0.945189
1700Q9Y5C1Wilcoxon_RankSum_test0.0007783.360672
1701Q9Y5G5Wilcoxon_RankSum_test1.0000000.000000
1702Q9Y5G6Wilcoxon_RankSum_test1.0000000.000000
1703Q9Y5Q9Wilcoxon_RankSum_test0.400814-0.840168
1704Q9Y5S2Wilcoxon_RankSum_test0.008652-2.625525
1705Q9Y5Y7Wilcoxon_RankSum_test0.0117192.520504
1706Q9Y615Wilcoxon_RankSum_test0.8336350.210042
1707Q9Y6B7Wilcoxon_RankSum_test0.0007783.360672
1708Q9Y6D5Wilcoxon_RankSum_test0.4008140.840168
1709Q9Y6K1Wilcoxon_RankSum_test0.752714-0.315063
1710Q9Y6Q1Wilcoxon_RankSum_test1.0000000.000000
1711Q9Y6R7Wilcoxon_RankSum_test0.027423-2.205441
1712Q9Y6V0Wilcoxon_RankSum_test0.000778-3.360672
1713Q9Y6Y1Wilcoxon_RankSum_test1.0000000.000000
1714Q9Y6Y9Wilcoxon_RankSum_test0.7527140.315063
1715Q9Y6Z7Wilcoxon_RankSum_test0.2075781.260252
\n", + "

1716 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " Protein Test Pvalue Statistic\n", + "0 A0A075B6H9 t-test 0.159626 1.485322\n", + "1 A0A075B6I0 t-test 0.641305 0.476162\n", + "2 A0A075B6I1 t-test 0.315327 -1.041432\n", + "3 A0A075B6I4 t-test 0.000051 5.744282\n", + "4 A0A075B6I9 t-test 0.505980 0.682629\n", + "5 A0A075B6J1 t-test 0.618797 0.508829\n", + "6 A0A075B6J6 t-test 0.194716 1.361986\n", + "7 A0A075B6J9 t-test 0.926996 0.093289\n", + "8 A0A075B6K0 t-test 0.370359 -0.925552\n", + "9 A0A075B6K2 t-test 0.295790 1.086082\n", + "10 A0A075B6K4 t-test 0.144513 -1.545552\n", + "11 A0A075B6K5 t-test 0.292936 -1.092785\n", + "12 A0A075B6K6 t-test 0.101548 -1.752511\n", + "13 A0A075B6P5 t-test 0.310675 1.051874\n", + "14 A0A075B6Q5 t-test 0.916957 -0.106166\n", + "15 A0A075B6S4 t-test 0.581429 0.564375\n", + "16 A0A075B6S5 t-test 0.890032 0.140803\n", + "17 A0A075B6S6 t-test 0.145287 1.542342\n", + "18 A0A087WSX0 t-test 0.138681 1.570236\n", + "19 A0A087WSY4 t-test 0.623504 0.501950\n", + "20 A0A087WSY6 t-test 0.770799 0.297038\n", + "21 A0A087WSZ0 t-test 0.098795 -1.768246\n", + "22 A0A0A0MRZ9 t-test 0.215526 -1.297221\n", + "23 A0A0A0MS14 t-test 0.004225 3.410355\n", + "24 A0A0A0MT36 t-test 0.075822 -1.917398\n", + "25 A0A0B4J1U3 t-test 0.161936 1.476540\n", + "26 A0A0B4J1V0 t-test 0.795892 0.263649\n", + "27 A0A0B4J1V1 t-test 0.635307 0.484812\n", + "28 A0A0B4J1V2 t-test 0.140985 1.560378\n", + "29 A0A0B4J1V6 t-test 0.691751 0.404787\n", + "... ... ... ... ...\n", + "1686 Q9UKW4 Wilcoxon_RankSum_test 0.141482 1.470294\n", + "1687 Q9UKY1 Wilcoxon_RankSum_test 1.000000 0.000000\n", + "1688 Q9UKY7 Wilcoxon_RankSum_test 0.000778 -3.360672\n", + "1689 Q9ULV4 Wilcoxon_RankSum_test 0.674424 -0.420084\n", + "1690 Q9UM47 Wilcoxon_RankSum_test 0.916359 -0.105021\n", + "1691 Q9UMZ2 Wilcoxon_RankSum_test 1.000000 0.000000\n", + "1692 Q9UN67 Wilcoxon_RankSum_test 0.833635 -0.210042\n", + "1693 Q9UNW1 Wilcoxon_RankSum_test 0.092892 1.680336\n", + "1694 Q9UQP3 Wilcoxon_RankSum_test 0.058707 -1.890378\n", + "1695 Q9Y289 Wilcoxon_RankSum_test 1.000000 0.000000\n", + "1696 Q9Y2H0 Wilcoxon_RankSum_test 0.000778 -3.360672\n", + "1697 Q9Y2W2 Wilcoxon_RankSum_test 1.000000 0.000000\n", + "1698 Q9Y345 Wilcoxon_RankSum_test 1.000000 0.000000\n", + "1699 Q9Y490 Wilcoxon_RankSum_test 0.344562 -0.945189\n", + "1700 Q9Y5C1 Wilcoxon_RankSum_test 0.000778 3.360672\n", + "1701 Q9Y5G5 Wilcoxon_RankSum_test 1.000000 0.000000\n", + "1702 Q9Y5G6 Wilcoxon_RankSum_test 1.000000 0.000000\n", + "1703 Q9Y5Q9 Wilcoxon_RankSum_test 0.400814 -0.840168\n", + "1704 Q9Y5S2 Wilcoxon_RankSum_test 0.008652 -2.625525\n", + "1705 Q9Y5Y7 Wilcoxon_RankSum_test 0.011719 2.520504\n", + "1706 Q9Y615 Wilcoxon_RankSum_test 0.833635 0.210042\n", + "1707 Q9Y6B7 Wilcoxon_RankSum_test 0.000778 3.360672\n", + "1708 Q9Y6D5 Wilcoxon_RankSum_test 0.400814 0.840168\n", + "1709 Q9Y6K1 Wilcoxon_RankSum_test 0.752714 -0.315063\n", + "1710 Q9Y6Q1 Wilcoxon_RankSum_test 1.000000 0.000000\n", + "1711 Q9Y6R7 Wilcoxon_RankSum_test 0.027423 -2.205441\n", + "1712 Q9Y6V0 Wilcoxon_RankSum_test 0.000778 -3.360672\n", + "1713 Q9Y6Y1 Wilcoxon_RankSum_test 1.000000 0.000000\n", + "1714 Q9Y6Y9 Wilcoxon_RankSum_test 0.752714 0.315063\n", + "1715 Q9Y6Z7 Wilcoxon_RankSum_test 0.207578 1.260252\n", + "\n", + "[1716 rows x 4 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ht = c.hypothesis_testing(df=c.samples_hq,df_groups=c.sample_groups)\n", + "ht" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## plotting with object data" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEWCAYAAACdaNcBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3Xd8XNd54P3fM30Gg0HvhWAHq8QiUlQvliy5SLZlW7YTJ06865T1JnHabnazdvzGeeNs3mTjxF47juMmuSi2ZVmy1StVKbF3gCBAEGWAGQwwAAbT5573jwEligJBkAQJcvB8P5/5YDD33HOfOwSfOXPuueeIMQallFKFxTbXASillJp9mtyVUqoAaXJXSqkCpMldKaUKkCZ3pZQqQJrclVKqAGlyV5cNEfkrEbl/lur6roh8afL59SLSNhv1Ttb3mIj85uTzT4nIS7NY96+JyJOzVZ8qXJrc1RmJyHUi8oqIjIrIsIi8LCJXzXVcs8UY86IxZvmZys30w8UYc6cx5nvnG5eItIiIERHHSXX/wBhz+/nWrQqf48xF1HwmIgHgl8DvAf8BuIDrgdRcxnUpEhEBxBhjzXUsSmnLXZ3JMgBjzI+MMTljTMIY86QxZi+AiCwWkWdFJCIiQyLyAxEpPbGziBwTkT8Tkb0iMiEi/y4iNZNdF+Mi8rSIlE2WPdFS/YyI9ItIUET+5HSBicjVk98ooiKyR0RumqbsOhHZOXnMBwDPSdtuEpHek37/byLSN1m2TURuFZE7gP8B3CsiMRHZM1n2eRH5GxF5GYgDiyZf+09vP7z8y+Q3n8Micusp78+7Tvr95G8HWyd/RiePueXUbh4RuUZE3pis+w0Rueakbc+LyF9PftMaF5EnRaRycptHRO6f/HeLTu5bc7r3T11+NLmrM2kHciLyPRG580QiPokAfwvUAyuAJuCvTilzD3Ab+Q+K9wOPkU+UleT/Bv/glPI3A0uB24H/fnLye/OgIg3Ar4AvAeXAnwI/E5GqKcq6gIeA+ybL/mQypncQkeXAZ4GrjDHFwLuBY8aYx4H/F3jAGOM3xlxx0m6fBD4DFAPdU1S7GeicPN8vAA+KSPlUxz/FDZM/SyeP+eopsZaTfw/+GagA/hH4lYhUnFTsE8BvAdXkv3X96eTrvwmUkP/3qgB+F0jMICZ1mdDkrqZljBkDrgMM8G9AWEQePtHKM8Z0GGOeMsakjDFh8gnmxlOq+RdjzKAxpg94EdhmjNlljEkBPwfWnVL+i8aYCWPMPuA7wMenCO3XgUeNMY8aYyxjzFPAduA9U5S9GnAC/2SMyRhjfgq8cZpTzgFuYKWIOI0xx4wxR6d5iwC+a4w5YIzJGmMyU2wPnXTsB4A24L1nqHMm3gscMcbcN3nsHwGHyX+AnvAdY0y7MSZBvlvtysnXM+ST+pLJb2Q7Jv+tVYHQ5K7OyBhzyBjzKWNMI7CafCv9nwBEpFpEfjzZjTEG3E++hXqywZOeJ6b43X9K+Z6TnndPHu9UC4CPTHYpREUkSv5DqG6KsvVAn3n7LHlTtbAxxnQAf0T+20do8tymOv7p4p3KVMc+U50zUc87z6MbaDjp94GTnsd5672+D3gC+PFkF9j/FhHnLMSkLhGa3NVZMcYcBr5LPslDvkvGAGuNMQHyLWo5z8M0nfS8GeifokwPcJ8xpvSkR5Ex5stTlA0CDZMXPE+ud0rGmB8aY64j/wFigL87sel0u5yurklTHfvEOU0AvpO21Z5Fvf2TMZ6sGeg7w35Mfov4ojFmJXAN8D7gN860n7p8aHJX0xKRVhH5ExFpnPy9iXw3yWuTRYqBGPmLfg3An83CYf+XiPhEZBX5/uIHpihzP/B+EXm3iNgnLxDedCLOU7wKZIE/EBGHiHwI2DTVgUVkuYjcIiJuIEn+m0VucvMg0CIiZ/v/pnry2E4R+Qj5axOPTm7bDXxscttG4MMn7RcGLGDRaep9FFgmIp+YPK97gZXkRzdNS0RuFpE1ImIHxsh30+TOsJu6jGhyV2cyTv6C4DYRmSCf1PcDJ0axfBFYD4ySv7j34Cwc8wWgA3gG+P+MMe+4accY0wPcTf7CbJh8S/7PmOJv2hiTBj4EfAoYAe6dJk438GVgiHyXRvXkMSB/IRYgIiI7z+J8tpG/QDwE/A3wYWNMZHLb/wIWT8b1ReCHJ8Udnyz/8mTX09WnnFeEfIv7T4AI8OfA+4wxQzOIqRb4KfnEfoj8ez4rN4ipS4PoYh3qUiEiLUAX4DTGZOc2GqUub9pyV0qpAqTJXSmlCpB2yyilVAHSlrtSShWgOZs47I477jCPP/74XB1+Rp769oFpt9cvLWXV9Q3TllFKqVk2o/tI5qzlPjQ0k9FaSimlzoV2yyilVAHS5K6UUgVIk7tSShUgXYlpGvbh5LTbncfHePsEfEopdWnQlrtSShUgTe5KKVWANLkrpVQB0uSulFIFSC+oTiOUCE27PTmaYxkrLlI0Sik1c5rcpyHdHdNv7+pj5IE4Zfd+9CJFpJRSM6PdMkopVYA0uSulVAHS5K6UUgVIk/s0siYz7SNtJdgxuGOuw1RKqXfQC6rTcKTT0253peMQ6btI0Sil1Mxpcp+GMblpt1tWhtRA4iJFo5RSM6fdMkopVYA0uSulVAHS5K6UUgVI+9ynEbCNTLu92t7LIO6LFI1SSs2cttyVUqoAaXJXSqkCpN0y0+jxpKbdHnTHGLLDey9SPEopNVPacldKqQJ0xuQuIk0i8pyIHBKRAyLyh1OUERH5ZxHpEJG9IrL+woSrlFJqJmbSLZMF/sQYs1NEioEdIvKUMebgSWXuBJZOPjYDX5/8qZRSag6cMbkbY4JAcPL5uIgcAhqAk5P73cD3jTEGeE1ESkWkbnLfy1bWNv3bk8PFuN1/kaJRSqmZO6s+dxFpAdYB207Z1AD0nPR77+RrSiml5sCMk7uI+IGfAX9kjBk7dfMUu5gp6viMiGwXke3hcPjsIlVKKTVjM0ruIuIkn9h/YIx5cIoivUDTSb83Av2nFjLGfNMYs9EYs7Gqqupc4lVKKTUDMxktI8C/A4eMMf94mmIPA78xOWrmamD0cu9vV0qpy9lMRstcC3wS2Cciuydf+x9AM4Ax5hvAo8B7gA4gDvzW7IeqlFJqpmYyWuYlpu5TP7mMAf7LbAWllFLq/OgdqkopVYA0uSulVAHS5K6UUgVIk7tSShUgTe5KKVWANLkrpVQB0uSulFIFSJO7UkoVIE3uSilVgDS5K6VUAdLkrpRSBUiTu1JKFSBN7kopVYA0uSulVAHS5K6UUgVIk7tSShUgTe5KKVWANLkrpVQB0uSulFIFSJO7UkoVIE3uSilVgDS5K6VUAdLkrpRSBUiTu1JKFSBN7kopVYA0uSulVAHS5K6UUgVIk7tSShUgTe5KKVWANLkrpVQB0uSulFIFSJO7UkoVIE3uSilVgDS5K6VUAdLkrpRSBUiTu1JKFSBN7kopVYA0uSulVAFyzHUA6uL74bbjZyzzic3NFyESpdSFcsaWu4h8W0RCIrL/NNtvEpFREdk9+fj87IeplFLqbMyk5f5d4KvA96cp86Ix5n2zEpG6KBYf/8n0BezlsPG3Lk4wSqlZd8bkbozZKiItFz4UdTGVRJLTbk8yimfjRQpGKTXrZuuC6hYR2SMij4nIqtMVEpHPiMh2EdkeDodn6dBKKaVONRsXVHcCC4wxMRF5D/AQsHSqgsaYbwLfBNi4caOZhWOrc5BqGyU3lpm2zKiVwHOR4lFKzb7zbrkbY8aMMbHJ548CThGpPO/IlFJKnbPzbrmLSC0waIwxIrKJ/AdG5LwjUxfe4aOn3ZT0OBh54D8ou/ejFzEgpdRsOWNyF5EfATcBlSLSC3wBcAIYY74BfBj4PRHJAgngY8YY7XJRSqk5NJPRMh8/w/avkh8qqZRS6hKhd6jOU2PZCC4rdtrtJm0ncBHjUUrNLp1bRimlCpAmd6WUKkCa3JVSqgBpcldKqQKkyV0ppQqQJnellCpAOhRyHipq249dEtjSp59fxuSyFzEipdRs0+SuCPvc73jNJkLvaIyh/qE3X/tkvU4ZpNTlQrtllFKqAGlyV0qpAqTJXSmlCpD2uc9zdt8w4nrnLDIiQmLiGNGRNwAoLbvqYoemlDoPmtznuf2OZh7xbKTdUc/SbJDfSD6DE2uuw1JKnSftlpnHupwVfL7k4+x1tFBrjdDhquMJ/zqMTZO7Upc7bbnPU0lj5x8qbyJgJfirxP3cUrodvyMBQNzrYs/IKkbnOEal1LnTlvs8db+1mn5HKZ+LPcza4qP47Alezqzne8n3kMDNisAR7CY912Eqpc6RttznoVEjPGNaeM/4IdYWjdPi7mer2cSLsomMx8k/pRv4ovNbrIs+SsexBSBCvMPGSPKdbXldY1WpS5O23OcZYxl6Jkr4/aiPO731LCl6g5CpozH153x8fBMNwUHsLj+/nLiGZls3zWMdcx2yUuocaMt9HjHG8MJ/tFOTclPnSxPwP0CRJHg8+Kcsbv8G/kAL11sWTznCtPlXELe2c13P4+xLDePJHcWVSUxRq7bclboUaXI/I5l8XP4jSHY/3cOB5/s44s6x2BtmtewmPFpGxaHduEMHSQcPIsCNfcU8f+1muidqWWbrwZlLzXXoSqmzpN0yZ7QReDewGvDOcSznLjaSZNvDnQwWCxs8FlHHS5QzyugrDmpCOzjWfDv7rr6FzFX34MLJ9S9s5Q3HFdixqIv1zHX4SqmzpMl9WqVALZACFgDXkm/FX362PdKFsQwHSFHlGGKl7Cba7SUz6mSkJkB3yw2Muq6l39ePd/NncWcyVB0OMzLhp260k6HxJMeGJt72UEpdujS5T2spkAWKAEO+5V4/pxGdi0hfjMOvBnEsD3CX3Um7o40VuXZCuwNkPHYGF1XiN9vJ2GqJ5tzEfWBbcjMtx7o5FFxIiRWjLDd05gMppS4Z2ud+GuHj4+Rb7QD9QDn5VvtioG+uwjqjAy++M7b9L/ThcNp4NR7jt22jjMke4sfd5BJ2xpcEwCb4rIOM27YQs11FKLGDRa33MNj/OlaXhbVQaEodJcWqOTgjpdS50Jb7aex8spt8a30A2AFsn9xSQj7RXx5S8QxDfTGqFgWoi6bpsAdZlW0ntDdAzuskUe4BQMjhsw6QkmYiqeNkSJJbsIGKyAgDvaXUJnoYs9Jve/TFJ9i+ffsZIlBKzQVN7lMwxtBzcJh8S7178tUR4Dj5hL94rkKbue5XoPsVgjv2g4HB3HHehSFk66G0f5hswk62rgTkrWsIg+Xj7Fno4YnW23m+eICalveTcTgY7CnHb4/jNvE5PCGl1NnQbpkpxEZSpOJZ8sMfT+5r7gVagGouh7fOGBgYdFNakqEj7mSBPcQS6WK00wdeO7mAhxNDPF9dtI7vX3PP5J4bediy+NK+FNULlrKwq41s0kapc4BB56I5Ox+l1Mxpy30Kg10nbrMf5u3j20eABPm3rfpih3XWRqJOkik7NTVJ6uIe2u1BVg8fYmLATbql+s1W+4G6pdx/9QdoDXbwJw+/yuceCrM4fJzPr/HQtvE27JbFUIefsuTAHJ+RUmqmNLlPoXt/ZPJZ/xRbe8h3zVz6o2aCg24cDouQO8sqUozbxgj0jABCtqUKgO6yOr5548cITPTR3PM1gv5X8KfsfPq5rSyNxvjKDVcTKq9gpK+YgDuKmMv/Zi6l5gNN7lPoa49OPhucaiv5vvhqDPaLF9RZSmeEoYiL2uoUwVE3w7Yhmq1exo95yVWXYnwukBzfvOEOspLEHv072iqHeGPRHkxxLxOBEv5o6yM4LXjxys0wYrDS4M8Nz/WpKaVm4NLvOL7IrJxFbDiJx+8kGUti95USiPQz7PVN3r4UAybIj31fAESnqW1uOI+P0dflxxih0QNDIx46nSFu7NtHNmEn3boFF538880+ustWs3rwu9zX1UayyUFnsw8WfBGA8WEPH+p9Pw9ffwv3PPsoYz1eKgOdpCanmLHcdhzZZxk52qmzQyp1idGW+ykivTGMgarmYopxsfaVx1m3bSstfZ24vGWTpYLku2YuzXHfxhj6og5KvDkiNkMrGUZsMSr6I4gdXHVudpfv5InGj+NNR/lPW9vZu7CezpYiKkMZvHsWEtr7IbylSa6r/CsGqmrpq65hpNtPwKdLeCh1OdCW+ymOTfa3V1Vnaf73J7BZBhoCLD7STtJXzEBZFUIIWAK0gnl5TuOdSnQsTTxtY1V9irYJO422IcqtYRL9TpxNFUQqf8yX6zeR8azkI6//nNKN/WRr0jz54vvYNbaKBY4RUqkr8RDhYyte5WbrBZ7eeC2/8eiDOJ0ZbGSx9E9HqUuattxP0dc+AkDs+R/iTqVwXreANxqLmaipYsXenZTYisiPmskBRTiomstwp9Q3kMBhM9QEcrhSTjrtIdb37CGXsuNpivKXNV7SgQ9RGh/jXaln2OFexn9//ov8LHE7KVwcsBp4xZ3hpeAH+G7HFt7jeICXrtyAABMDbjyESFlxJjITdI8fZ8fgjrk+ZaXUKbT5dYqRYBwwuA/sIFlcwp54gqGkh1iZg5sHDYHQUUYraxEiQBUe69Ia951J5RgcSlBfkmXUghYrywu2cWp7g2QdNr7fOk7YvZiYbyW/vudRtjc18519H2dBZpDfjD3F4kwQgJD3Tr7ta+aVng+w1tRS0homWlxMYCBOSWOMkXTpHJ+pUmo6mtxPYowhEUvjdSaoDIcYXrqISFJ4vvx6aiTDhlCM+q4jjDWuZCwZAqrxWkvnNOZT55Jpf2MQy4JxM8G+sJ0N9lFsVg4rBK76FD+qCFBu+yBDgNd+nO+0fZyluV4+PfIYLnJv1uNLDXBvbgW/qNzPv/Zew/vrnuP1lVdStmsr9UtHQJdXVeqSpt0yJ4mPpTEWFMeOIEC7y00m4+N/Dn2PRO1edjbW4Z+YwBmPAGEAHJQzGp5qhaKLzxjDaCiB3WnhcOTwGw9d9hBX9uwjl7KzbTkUW16GSzawtP8YP+x/D0ucx/ntyONEXFGCrvE3H2OOdtwIf53dis+WYseezexsXYFJCw5jgY53V+qSdsbkLiLfFpGQiOw/zXYRkX8WkQ4R2Ssi62c/zItjqGccgJL+fYyXlDFiT/KJ5pcpuzvKZ9a9wes3xsg6nJR2tyFmHMiX79oTnsOo84wxhI+Pk05ksUuaVF+YxpwhJKMs7u0Am+E7Kz1s7L2aIZeH0fYUAecYnw0+jJvsFDUOAgnG4iu5xf8Efekier1+AOJhFyXZKGRTEAtD5OhFPVel1JnNpFvmu8BXge+fZvud5Cc+XwpsBr4++fOy038kP8yvYrCN4eYmrmo6wJGaHJmgsLR7jL+s303fikbqD/USWrqO8WwHhivo2jPEle9qviAxTTWF7wnpZJahnhhDvTHGhhJk0/nWdDrtIe1uJTYO5RObOOaNU9f4ApZlp6PuOlyHo4ynfHwu8CMsa+o/AcFg6KE3vZJP8y3ifJDnBmvpqmvAN5CiZEGcJMUX5JyVUufvjMndGLNVRFqmKXI38H1jjAFeE5FSEakzxgRnKcaLJnx8DIyFNzlCvKQe0xThT8reGg3z+yNRNm6I4NznxpOIM2YfQmzr6D8SJTaSxF/mueAxGssQ6Z8geHSUSF8MDHj8Tiob/QweG8Ntz+F0xwil7fhcUZzjQnfdbRzjVm5pb+PbG2uxHR3n9oYXaN49TjgFKQtScT82u4XNnsPhSSM2A/SQYwmlY17ucvfzUrqK12tXsHB3L7XLR3BIHK8rjEMMfX0/oqHh4xf8/JVSMzMbfe4N5CdcOaF38rV3EJHPiMh2EdkeDs99V8apRsMJnLlxBEPxggP8XXEpvlQxE12fRcZa+dfSEkZdFpYdvOFuSL81Y2Tbtgs7qVY2Y9HbNsLrv+xi/wt9jA0laFpRzsb3LGDzXQsJVHkxFpT6s9gcOWq8TkJFnWw6+hW2vPZ5Ut59lKRW8+vb0jS7x7hxdBtHY4bxbH4yBbszizGQTblIjhWRSzvIT7VgY29iLQHHYe4yTvZ6W8AIthxgab+7Upeq2UjuUy0qaqYqaIz5pjFmozFmY1XVpTc+PD6Wxp0Kk/QV8URdiojdQSj4m9TW11DDnbhzTr7hd5NZnKMm2EOgaDlpM4DLY+fQK0HyX15mVyqR5eiuMK89dJSO7SGcbjsrr6tjywcXs3hdFf4yDyJCsGMUX4kLt9PQh41y+wgIFI2OU+IPkZI2flWfxJ8TPhoJEG5vpdIpLPBBvRdcRUk8gTjuwARiM6QnfGQnZ4EMppdSmzvEr+GjrbwZS4TUqBN3aqq+eqXUpWA2knsv0HTS741MPZ3iJc2yDNm0hX+8l0y5jceKvJSNrKDSXsmv7XyBuw/sIzD+btrcLg7Ueiken8AhDjLZ3WRSOUZDCQaOzt6t+ePDSbb+uJ3XHuqk59AwZbVFrLu9mfXvXkD1ggA221ufqbGRJOORJPVLShGBNG767GFqwsOYcRsTzYYJs4ADScPjCwew28Zxeu+hxNOA7ZSPZpvdwl08gd2ZIZuwk8vswzLNZOJBKrFRVzROb2kVibCLIjKzdr5Kqdk1G+PcHwY+KyI/Jn8hdfRy7G+PDuRXGSod7SPcNIERG8dH38W9YweA/NeT9x9O8cv1Hp5YNs76rYInchRxWBgn2B02Dr0apG7J22/ume6CKMCq69/egzUajrPz8W4OvzYABmoWBmheVY6v2HXaOoIdo4hNqFkYYM/hNJUON0HbCDe37QZguLyRX/mXgRHuWvlDpGcpdm4gaL+bCvMAaanFkHqrQgFHkYU13kNm4gWk+APsnXBwVekQN9lG2VG+jOauQUoXx3k9vgJ7yrAvWkqp5LupPllfOaP3XCl14cxkKOSPgFeB5SLSKyKfFpHfFZHfnSzyKNAJdAD/Bvz+BYv2Agoezc/u6I2H2NmQoSJjw11UT0N85M0ydmBRfwu7qpyMlTmojXRQ5a3HJmMEKj0c2R4iPnZud/eMhhM8/Z2D/OAL22jbNsiq6+r59S9tofXq2mkTezZjMdA5RlWzH6fbTgSosUcxAqVjYdylGV7hKkayRQQWp2kYNGTTLRTn9gOGIcdHsXC+o14RG86iOsAiE3+ZaLaCHB1cm62lq7kRLMFhz+WXe1JKXXJmMlpm2iEQk6Nk/susRTRHQsfzY9Z9iRBPNjupjdXQnDr+jnLruwLsbYa2hTk27BnHtbISY28jnarCylhs+8VRbv7kihkfNzmRYfujx9j3fC82u3DFLY1ceVszRSXuGe0/2DlKLmvRuDw/Y+WIzUnA3kNFNIZjzOBeleW+ok1Yfge3Nz9B32MbSGRtlJo0PrOXmGMDY45VHKn4BeOeBN6Mi/J4ETWxEmx2Fw5vI9n4MaxMBalUD0scm0nVTn4bGRcCthgTDv+Mz1cpdXHo9AOTogNxbFaajGeCqM+Gf2wD1wY731Eu4ylmdaScx5YPcdVOgzfVRiSRQdzraN3SyMFXgqy+qZGqpunHgFs5i772KC/95AjZjEXtohIWrq3E7XNwbO/QtPueYIyhrz1KcYWHQKUXgAqbjT7bMFcf3IcgHKuuIm7zY7UWc8VQB4OxW7E5Mphcht6SDroDQa4I3s2Yp4ntzY+9WXdVLMDGnoU0j9SQS5aRTcTpSmao8gu1tiGGAgGKwkmWevrYXbT8LN5ppdTFoMl90ngkiScRJlhtUZaDbOXtOLsefmdBEW7e5uB/3wlJl1AxfIiobyPjmQ4CFUvw+Jy8+EA7d//hOuzOd/Z6GWMIdY/TtWeIZCxDeV0Ri9ZV4S87c0u9/8jbFwaZGE0RH0tTuyhA/5Eo0WyWenuUqBgqon24irP8vPRGpNrJ2vL9JLYuASDpSPLQsp20Vw7gT7lpGWllQ9/tXNHvJukM01MaYXd9N4+t2MP63hbWH72O7MSL9E/EsPxZSsTJwcZFVLdHqW0NAZrclbrU6NwykxLjKYrigxyqhtYJN7cuazp9YVs9y1JODrYY/J0JApULEA7Qe3iEaz+8hGDHKL/4yi6SsbdGkxjLEO4ZZ/tj3Rx6OYjdaWPtLY2svaVxRol9KtHBOHaH4C/P3zzVOTJBzB6hZDyBbzxHcWOC18xqEisq2BJ/naJeLxn/YX515c85Wh7ixq7l/M4bN9MY60PI4DTrKE57WRlq5N49W2gdrGdn4zGeX/kSOGpIJeykrX6aclUMNNVhZW3g0D8hpS5F2nIn35rOZnJ4E8N0LRZs8UY+srKOx09TfqS0nBsOwatLbaxvz+LyHCAzMUL/kU7u/L013PbplTz7vcP88Iuv4Qu4sNltjAxMkE1beIudtG6ppaYlgJw6DvEspOIZJqJpKhqK3hwW6YqN0GuLsPJoO2IgUl9Ctr6YIlecpbsn2FET58kl+ylN+vjovk1UxwOTtWWB/cAGYBHQid3YuLFzBYGUl9ebj1LUWsm6/TaGE4Osdiyh05+/Ca18aBx7hQ6JVOpSo8kdJke42PCkhumvEBy5tSyuKDpteWOzsWKfn4fvGcYAnshhqnybiCT30b3/OlqvriNQ4WXPMz0Ej46STeeoaPBT0einstH/tjHq52rg6CAiDjBBRvphzIImwuwXQ3moH4c3x8uBtfSvaOTazPM8ZU3w+rIuWkYqufvQejy5U0fI9JO/RWEZ+RuOMwjCur4WYq5h9jRvJxCtYvHoABuKNpLzGiaKvRSHE3hKLo1ZMZVSb9HkDoQnZ4P0JCNESy3cXHfGfUYDtaxIjdNZl8ZzyEHVreWEDxzg6I4grVfXUbuohNpFJWcc534u0oksqYQDb3Ea22SvyAEZZC0ZPKks1dEYxYsT9BU3k7YZRnseY39DhHX9C3jX0ZXYTtsbdwi4kfwSgocAEIRrj9mJepfw6qpOWl45xkYEY8/R3VxHoCOGY0PmpJnglVKXAk3uQLg7n9zTtmHqjJ2WpS1n3CdSXsV7Xt7DK0tcfOTFEY5V7AYLOne+QiK2Gq//lLHpO+87cyDrPzmjeIeDEyDg8+fH1KclzQKrj+M2O9UdPdgtC3sDPLl6KVWDX6DXFuHarmWsGWhm3DFdF8o4+flkFpK/dSF/Y5PD5Lil4w4euOJuM2JBAAAgAElEQVRfeWpFJx8aiFFufByrbmblgU4qkzEGdLy7UpcUvRrGZLIEwv5hypIlfHBF3Rn3SXk8BHrcDCywsAGDx3Msrmslk9hD++uDby/bNUom5uT1+MRpHztyY+wY3HHG9UiTExnGhpJ4i9LY7PmE+qj3ERYmmskC5QMRbF6Ll+urCY/dhy0T5s7DV7JmYKZTEreRvx932dte9acn2NL9HgYqUzxY/CRL0jUkAyUALBvqZSCsyV2pS4kmdyA6GMOejdNfnkLS9WyqDZx5J2C40k3rhMVIEbj25ihdNorJBdnzzK4LEueJBTnsDiFUepRO23Ge9zzHxk4Hvc4x+qJ+Vg11MrgszReaM4CT93ZspDl6NtMBxMn3uTcBJ3/7CLIidDMLhn38tH4rNbkKMh43aZ+DiqExeg9edjNOKFXQNLkD8aExvMn8xdSMoxWbbWZvS7jSzfUv59i9WFjZc5zDVf1U+GuJdD9DePKO19k0EU2RGM9Q0eAHm0XEEcIK7qHVdgUHsCjt68duDH+zuQjLvZRm34dpCJ/+wvDpdZKfbGHBSa8FEYTbOltJOtL8svIFcjaLUH0FqSEH1sAQlnbNKHXJ0OQOZBJpPMlhEqUWjup1M95voshObtxHrN7CmzZs23ElyzelsTJH2faLF2c1xmw6R6h7HJfXTkm1l3H7KIOjL9HaV0rE7+cFU8ptA09xpA62eC3CNf+TDXtC53i0GPll9lp4608kP/1vWXIxa/oDPFL+AilHjK6aejITDtbGOugdiJ33eSqlZse8T+7GGLLGiScZobgoR+ui1TPfWYTeKj9LojmSTrj5wFba63soLS3jyGs/Yzzy1hDBpG2IkC172seAZdEWPf1MyUd2hMimLWoWBnh57Ajt6ae5osPP6qoFfK2kjaWer9IylKGyJU6x/ybqEmGagpHzeGc6AQ9Qnz9V0sAQGanm1i4/TstBR8kRooH8vPxXjRxm27ZD9B46wN6nT3eHgFLqYpn3yT0xlsaIE3tmGIfTx7sW1pzV/j3Vfpa/bmf3clhzLMQPOz7I6i3jWNlBnvjmTwDoGpoglbWwLHPax3TCPeMMdo5RXl9Ev/8oe5L/yKYDfjI1I3xxZTsTtY/zwVdt5JzCpvoxvlP9YTbsPzrlKiozNwSMkR85c0KQjNRQlrNz68AajhR3M1AOlkNoHh6kO5S6IAuWKKXO3rwfCjnQNQZA3DmMpKtYXn12MxwOlXoRXzFWaQ53JsH1e59h171JWjcWc3j7g4ADK1FKTrLgcIIxlDJGuRWlhHw3RgYHI7ZSQplSusfePlQxF7MRf8OLFOd4tvIBBgZfZc1RP69uGOBIVRorl2NR2w1sPPocgdY4u6qXERcPazu7ZuHd6QbWYKhBGASC5GQNFl629FfzbK2Hg+VtjFUX4Q0P40pOEImVgE4SqdScm/fJ/cSFzzHPCCmrCaf97L7MGBGs9Vey8ZWXCVbAxq4jfO7w3/KVDX9KTWwZffse4qaFDsodXbSkx6kzIbwnL4xxQhZyCRs97gVEUpvoK76CYdNEYpcHy57lV8u+Rm4siOUxPHL9AG4LspGb8A5v5A9f+i6CoXbJGH9X83us62zDnc3mr4melz5gFXAl8ASQHxGTkWp8qSHuHrmJH1c+TmddJaU7x9mcO8CxULUmd6UuAfM+uQ8dzy/GEfFHSHpvPqc6rOuvwbX1Jfoai9i4Z4IP9X6db1X8Eb9zw/+huKWMPftLeb/3GCmbjzbbQvpt1QzZSplweDFisGeEaluUYvs4a1JHuGLwAQi/wHPWB+hvGKG9cjtZWwYqoTwhfHColJ8OfxZnzsMHM9uoGQ7jWJohV2xja9EGPrj/+Vl6dzLkL6SuxfA0Jy6qpqWGVG4Hdw99iJ+VPc1rTRnW7xBumdjN34SugUXvXPxDKXVxzfvkPtoTwpaDrH8MT+Oac6ukrg5ryWKWdA0R88KGPd10Bo7xS+df8L76r+Bf0MlOfOSyDgxRXPYIDfJW33TCgva4m7aEj39IlzCQKyIrFvAodkuoHnVSPVTC9WmhovwaPj96Iw5juMW1j5t3PIvdylG9PMYTFdfR0jmA3Viz8+YA+THv9cAShDYMI4w7yokxysLUGOuiK3l94V5+T2BJpI9IwMbPI7tYyx2zGINS6mzN++SeGo3hSaaxVcKaRTNfQelU1g3XUvrt7/OrZVfw3vY9LIo9wpHjZfyD45v8WnQvDdZR7BxjyJajz+kj5PDTRYKBXB8RM4CFwUaKBqfFFl+OJpud0hE3jiNFJMIVFC9ewIB/IV8dbUGM4ZPuozQ7XqC8N0p8k5PSogSPB66j4eDALF8mD5MfGrmO/N2rQSB/B++IdZz3jtzIG+X7GKqw0TQwgWthhqPDpaevTil1Ucz75J7JCsXJYXJeH1uazm5hZ8MGvNkiDr+WRHJLWe7xs3Q0SX9JEbe+OMGe37iPuzsf4aEFv8mu8rtIGTuSbcM98SquxBvYTJKcvZSU73bS3o04rXqSA0GK+55jsf91ShbEcLSMYkyQl46Xc3/7QgKOJH+25EEqql6j5itC1ufAvcRJp7OBeJ+fImZ7rLkB9gKbMXjIJ/eVGNxEcz2syK6nYaKBXc3Hqd7r4HrvLg4Nn/uHpFJqdszr5G6MIWfz4koPM+GsomWaaX7PWJfdQWT5NSzb8yQ/WnkHHz30BL/9c+Gv7x3HEf8XFo9bBB12jAgey8Hy+CqunbiR5elWxm0WB53DvFwe4WBdIwcX/S6Pxe/lhtEOVgUP8OxwM29ElrO8pJPPtv4cIUbRt/24uhM8cvfN/HnuPv6v62P4Yhdq6t0DwDXkV1w6Mc1ALdFcL37jYfnIEg42Hef2nTZuSezmGa5iKJai0n9ui5Aopc7fvB7nHhtOYdm8ZCVCxjTiOMuRMqcaWnk9yZIaPtD1Cv937QdZcdziCz8qYnH3QtyJau6MOvnbwWFe7u7kO+FfsT7+b4STT1EU6+eeYT/fOFLJ41uT/OneXlzjwkOx5fxd521sjyzjNhd8ffQKhl75GOnvNVC+P8EPbr+bjfVtDNsCHIotnqV3ZSp9QJT8yJkTyb2OuDVC0kqwKF5HsCo/idjSwT5AePrg4NRVKaUuinndcg92jgL5Me459/mvA2rsDnqv+TCLH/86t4x2889XfIjf2f8If/kfnRyuXcTuwBU86fSxlwhbzCFa5TgL+TaHsgt5hiXEfdUMFzXSlqpjqB8cIpSVuBhYU8ojPgfdR9r5zEOPsKKrg0euu5Wd16/mSwe+zoO5m0kZG5DDRo7Zvo1IAMNBYDOQI39zU77ffdAKUmXKqM6tJFTyMp6BMcoXjPDEgQE+tmmmM1EqpWbbvE7uAx35peIyngiBxnPvJx5Lx996XlyOu/UaVh16CXdDgj+88Q/4QOcrXB06wKr+o2/bL0q+G6iaIaoZetu2tN1F1hfAeIsYfVRwJEepHh8h6XLx7fd+gt2LF/Mv+75MQly8lL0S2wVfLuNE10wrJ19UDeUGaHQupSHRQFe9jVXdhmvLdvNERzmxVBa/e17/iSk1Z+b1/7xQWzfgxeYdYs3SK2et3uOrbyLndLNo33P843AP22pX87VVdzPm8lFsT9NQNIHfl8vfAJWyuCLTxrsdW0mmXLwaWY81UYwnMYE9ESeXSpESO0VFbrYvW0+suJoR10p+t+9+Wp3dfD/zXhLGg5zUXpdZb7vD27tm+oClGJyErCArzQYEYbTMh/9QjOb0AdK5m3m+LcT71tZfgFiUUmcyr5P7RCiMWHXkvEk2Np95gY4ZE6Gv9RpGq1tYsO9Zbuh+g5u7Xn1bkZTLxf7lKzi2ZBF7i1YzkKriXu+vuKP8Re5P3cV92XswqSQ3JQ4hNV4QoS4Yo2fRChqjPdzleIFduWW8mjvHsflne0qc3DWzh/zlmhpCuUGKcOOwYMK7GNiD1R+ktCTLkwcGNbkrNUfmdXJPZXJ40lFiFQGayn3nXM+x9GlGhfgXsn/Lp7HHU1wVfIZxUjizGezZLKmcDfdwgg3PvUamzEf34ha+VfkJPsRjfMrzEFtye3jI/24SBKgMD+PK+kh7KmjMHecjzh8Rx8MPMnfCeU4PNhOGDW8+y89pcGKVpk3Yc2GiuWGKLR+jxU2kXHso6k+zbOEunt1nJ8QRqj/+0Qseo1Lq7eZ1creMC3cywoC3Drvt3JOk0xmevoBfSPpLGHa/s7skBGAM3lgSV3SEZx2bCZeUsrl0L39sfYuDyWW0uZbhcEeodwxwVWonI1LMN1L3MMG5fyCdm2Hy66pWTP4sAcKEckFqKWfEPUGy3M6yvhzPl75OrP8q/k+fkzXbjr9Zwyc260VWpS6GeZvcrZyFkWIk103G3Ti3wYiQ8ngZk/yMWy+kazg4vJJNvl2s8h7mCt/BN4tuk5V8x9yFmAvfYp/aAPnpCEaA/J2ooVyQZutKDjl6GfL7WNwxznC8H5ctRVvCwcXpOFJKnWzeJvfoYBzLHsCyRfBULTrnegShNjg0faFzmOM8nK3kV2O38ez4tdQ5Q6SMm1jOxxs1TeSMkyKS5xjx+Rogv/xeBqgEI4RyQdZbNwDQE2hkMYdo6s0gNS/SPngrljHYZK4+jJSan+btTUxHd3UCII4IrauvOa+6xh2+aR9jDh9Ju+vMFU0hYXx0plvoy9QxapXAnCfJISALOAEbdvwMW2EcYsdh7ESr6jACG49bZEq3EbOE3uH4GepUSs22edtyP7bnEFCFzT3MlYtbz6uutIxMX0BgVKaYw/0cNFjDIOB2pacvaM/OyvHeySJ/paAifxiKsbA46olQni0h7I+TLHGwuTvJNxnH4e7jQLCS5vOY2kEpdfbmbXIfHuwDqjDuOE1l5554SszcLAqdOsM3gaztQt7UdKLfPYudYgCOuCMsSlcQcg4T9nlpHsjgzYK3+nkO9i/kjlW1yJx/61Bq/pi33TLpZAaMRdpnx3YeI2XmpxD5FnwaO8UY4LArRI2Vn19mIFAFlvC+4w5SRQeIJKKExmfnm4tSambmbcvdZrlwZ6KM18/izUsXQV3wQs38eDYyQAQowY4HjJ0O9zClpggMhGpq4HAnH2yL8pNFfpyl2znQv5CagGeuA1dq3pi3LXeb5ceVjpAtb5jrUC5TA4ALwYaTKlK2LL3OcfymiLGKEhJ+O4F+odbuw1fxKgeD0bkOWKl5ZV4m99HwCEIZNjPMwmUb5zqcN5kZPC4dA28+c1ILQJtrmCarDHEWE/IWkRhycc1EAMsxwmB2N9H4GS4CK6VmzbxM7vu37yLnKMUpYdavP7dFsVWS/ERiFiVWOSuzXcQdB6izSrGJjYGyMowlfOxQP0Vix1X2KgeDY3MdtFLzxrxM7nv37wGx4bCP0FRZNtfhXMYGyP8JNQHQ4+qjxsrftdrTVIkBKo6PstpTjMPfzt7BjjmLVKn5ZkYXVEXkDuAr5GeN+pYx5sunbP8U8Pfk54IF+Kox5luzGOesGguF8APijl9Sw/PmaljluQsCrSBl2LM+ErYEMccYHuNmuLyE8SIXvpCLqxKlbDNjDPIM390rFE1eV/3Iso/MafRKFbIzttxFxA58DbgTWAl8XERWTlH0AWPMlZOPSzaxA+SS+d7rjM+a40gudzEgP3rHG69FHBm6PV005cpw2csIFflIRNxsHBikxVGEs2QH2w4dnb5KpdSsmEm3zCagwxjTaYxJAz8G7r6wYV1Y7owbm5VhvNI716EUgD4wBt9EfvK1Y+5u6q1yHDjorwyAgSUdx7kmYEPsKfYO69qqSl0MM0nuDUDPSb/3Tr52qntEZK+I/FREmqaqSEQ+IyLbRWR7OHyGaXIvkHQygSddgjsVoXzR6jmJobAEQYSK4WZWHhij6OhBaqwAAG1LysjZhMSAi9rxapzZCqL2vaTSl9a4H6UK0UyS+1Sd0qf+73wEaDHGrAWeBr43VUXGmG8aYzYaYzZWVVWdXaSzpLvrMA5TgSsbZv0175uTGArLMJg0mAowgmWS5DKDeC0X2eJyhvxeRoNeWoajrHF5sLlDvNjZc+ZqlVLnZSbJvZcTwyHyGoH+kwsYYyLGmBP3l/8bvLl0zyXntX2vYjkqcTHEgoXnvii2OtlxEDtm8s9kON1Oo1WOP1fJYMCNFbNxVfd+rq+IY3JedoXfmON4lSp8M0nubwBLRWShiLiAjwEPn1xARE6+h/8u4NDshTi7Oo61Ydm9OB3RS2qkzOXLBuyafL4OwcZw9ih1pgwXNvYvzq8WlQ7aKRopocosJm4/xMD46JxFrNR8cMbkbozJAp8FniCftP/DGHNARP4fEblrstgfiMgBEdkD/AHwqQsV8PmaGMnPLS6uS2GOlsIgtIGxgPyiJ2PZ41Rn8+Mdjy+sZMLlIDxYQuvgAO8qTwOGp4/uOn2FSqnzNqNx7saYR4FHT3nt8yc9/wvgL2Y3tAvDG89/nmX8eiv8bBGyGEaAcizsGNIk0l2Uun0UZ5sIFx/FG8xxxXA7x5bbsCVb6TI7SOfSuM5xEROl1PTm1R2q2UyG4kS+m8DTVDLH0RSadhDBcAUGCKcP0mxVUZEt4nCTA1vOMDHoxhn20+pbjrFN8OODD8110EoVrHmV3Ds69+JPl+PMjLF0i46UmS1GANmW/0XWYgHhTBsNuVIEeO3KUjI2G8cGalg52M9tVT3kknX8+77vYBm9kUypC2FeJfft7S/jylXgyQ6x7Kpb5jqcgiJEgRRQjwFyJoUrHcFhbBSZRQwVuzHdhoUT/VSkuvCnrmM408vzPc/PbeBKFaj5ldx792PslXhMBJvDOdfhFKA+wEXKvpik3cVApp1Gq4KaVDVH6sGZzhEeCVDbl6a1zIeVLuerO/8NY/SmJqVm27xK7sFYiLS7HIdNF464MCa7ZrgOgIHMQRpz5bgxPLXJhyXQHmzkqqE2bit9ilz0Bo6M7mf74Pa5C1mpAjVvkrsxhtJRAbFhPONzHU6BOgLkgEUY7CTNGMXpOGKgiDWMFLnwdsSxWRbN4Sg31twE2QBf3fW1OY5bqcIzb5J7JNRH7UgxAEW12iVzIQgW+WmInMBSAMLp/dRb5dQn69i7wOBLpnktuZrVwX5ubdxHcuhGdoZ28Hrw9bkMXamCM2+S++4jr1E5UQPGovX2O+c6nAJ24uakzQAE0/tpyVXgFotnNpeSE2H0oIOKzBjV4Z9SYd2A05Tytd1f0753pWbRvEnuu3p24svW4k2HWbD51rkOp4AdIT+v3AIMRWRJ4U2NYDNCaWYDEb+dhs4wh9wtLD42wF0rsowP3sDO0E5e7n95roNXqmDMm+S+I9oFtlq81iBit891OAVLiJNfockG5BcfH8zspcEqpy5dwWsrs7hyFq8Fl9KUCLM+9jWs6GZcporPv/hl7n+tay7DV6pgzJvkHpsIk/JU47QPz3Uo88AeAAybMdgZzLbRmC3GIVnaVi4haxMat/fS7lvA6v52rikbJNH/LsKpbvaNPj3HsStVGOZFco9PjNEQAWOz4/TqSJkLbx/5UTM+DKsxlkUu3obbOCiPb6ajzlA/PMpj1tU0xof4WPnPiI+tJSBLeCF8HxOZibk+AaUue/Miue84/BKNI9UAVC1fOMfRFB7Dhrc9YAUQAiwgfydwX/oNluRq8ZPlhS1+bEDNc50c9TaxdvgIy32jxIPvZSIX5Wu7dWikUudrXiT3lzpfzo+UAVZ99ONzHM180UP+z6sEWEKGJOIeA8DhvJmRIljeN8DPvDfSFB/iM2U/ZWSkjhbnLfzg0A84GDk4l8ErddmbF8n9tehhfNlaPJlhSmrnZnm/+SdEfq6ZLIZbMUbYU7yDRqucEivArivAk81R8VQHbwRWcsvY69R4ohzv2oTX4eVzz32OB9oe4CftP5nrE1HqslTwyd0Yw4AJgb0GjxWa63DmEUN+hUYbQj2wkqgjiseZISdZYk03kXLAqt4BfmE2UpaJ8Rel9zM6XsJq/3von+jnxd4X5/gclLp8FXxy7+05Qv3QKHFvLS7HyFyHM88cBQyGOHArGBt7S3dQbQXI0MCeKy1KEmkWPN/Gryqv5Y6xN1ju6eVw25WsqVzD1t6t9IzrYtpKnYuCT+4vHXiKVX31WHYXlQsCcx3OPJMi3/fuBiopH9rIhGMCrytNVrKkWm4j7YA1PYO8cbyarDj4ovc79EZsLHK+lxJ3CQ8eeZCRpH4oK3W2Cj65vxrcSWO0BYC1H/nA3AYzL3VM/oxQGdqCK1nKntLtVFvFROy1DKyCkmSa5gPHeaDyRq5OHOIe74s8vSvAB5bcQywd44+f/2MyucycnoVSl5uCT+47s70EMgtwZscoX9Y41+HMQwnyNzWVYCRHXe+7SJkU0aJu0mQJLfowSResPxZi6MU4B/wL+W+2HzMxkuR4zwLuWnwX2we386VtX9K5Z5Q6CwWd3EeiYSQzQMrTQpHpRUTmOqR5SXgaTJqcPU5RvIG6wWsJ+rqoFRdH3XayVzTgzuVYczzE1pEmKnJj/JX/Pp7Y5WShfw3/ec1/5sEjD/L32/9eE7xSM1TQyf2p3Y+wvNdF3FdDsTc21+HMW0Ic4TFcmTKgh4qhTaw40IQ1+BxFlpMXF9xCqsROc2QM274kz5Wu4J7sC9yQ286j213813X/lU+0/v/tnXmUFNW9xz+/rt5mn2EWmIFhRhw2RURA1OcGapRnVIjhRY0mQUM2n1tMXp7xZeHFd47xxMSTzROXaIwao0QNRknEKCqaCAwGZFNBGARmBpgBevburq7f+6MKHdpBWpihm+Z+zqnTt6pu3f7W7e5f/+pXdX/38zy89mF+Wv9TY+ANhhTIauP+3PuLGb29BoDafz8vzWqOdlaBvgNUAs0oM1CtIn/PatqtGJtOvABLYfKmbby7JI+G3ApuD/6WTes7eW1DC7dMuYXLR1/OQ2sf4pbFtxBLxNJ9QgZDRpO1xl1VWWNvZGh7jZvD/dwJ6ZZ0VCOA8DSwCygG2lA+j8bClHbuYGVVAe01YwnbysR1zbyxpYwCuvhJ+B6u/cNydnXGuPWUW7lx4o0s2LSAa56/hqaOpvSelMGQwWStcV/3bj0+uxWRWnLt7QTD/nRLOqpRAaQHeBSI4T4eGUG5AukIUNAT4YWJY9BgHk7IYvAbXSyXMqaxgqvjT3DFfS9A/e+Yc8Ic7jz7TtbvXs+sv8zihc0vpPW8DIZMJWuN+/w3n2L8RmgrqKUw3JZuOQYPIQL8HtfAF+N68jMI7RlKWOL8Y8pESiJRohVltD8fpCEvn5v8T1G1cwVXrR2MqnJB7QXMu3gewwqGcfPLN3P9S9fT2NGY1vMyGDKNrDXuC9v+xZSGEdiBPMZcfEa65RzVfDRrZA2wFNgDVAAtCBMJ7zqNzrIa1o0dS817jQTqprL8jUraQ35+Efw129/ewhV/W4XtKMMLh/PIhY9w86SbWdK0hJnzZ/LA6geIO+Z5eIMBstS4b9ryNq2BbZR1n4BonJFTx6ZbkuEjxIE3gDVACWCD5hDcfRrN1ZeyufoEKpYtYvSgL/FyZCpBfw8Ph25n06srOP+55bTEbAK+AFePu5r5M+ZzWuVp3LX8Li6dfykLGxaaJ2oMRz1ZGYh+YMmD1DUpu0smUORsMfH2jEWBjUAz6FiQKhyJI/EK3jv267SUraN63SKOPeHTrCkcznjfAzwSup3P/nMup7Z1MGdchGNz3Kdmzhh2BpeOvJS7lt/Ft175FuNKx3HTpJs4pfKUtJ6hwZAuss5zV1X+1raEae8OoydcyjFnGa898+kCWQ68hu3vRNSPSpSOolpWj7uWZZ1B1m89jq16PSOkiSeDcyle1cgvV5Ty1HqLmOO2cnb12Tx5yZPcdvpttPS0MGfhHL78/JdZ0rTEePKGo46sc2lf/OefidNCze7pbAk7nDTz5HRLMqTMboLxxcAQxBlNQgrJszeD+ml1RrCgaTgVuccyJed+ng5/n8vW/YDFQ4ezsivGtMZX6Fq2nNyTJwFwzbhrqN9ezz+2/YM5C+cwLH8YZw47k7riOj43+nPpPU2D4TCQdcb952se4dT1sKtoAoWynZyCYLolGT4xzSDNQBVd/pEE7TjHr7uHXcXjaBpyMs92/Yg8aye357zOg1sivNJax1PjZ/BW2yYuXLOTY8eU4rf8nFp5KpMHT2bFjhW8vu11Hnv7McpzyklogotHXEx+MD/dJ2owDBiSrsvVyZMna319fb+2uXT1Ir5cfyN3PDGCTcNv4IzPVHPiBSMPur1ff/2b/ajOcHAIQiU59mBGrnma4sh7rB19Di3DJkF8CIqF42/hpUAB6yotIscPorblPUbvqccu3kCiNETQCuH3+YlEIzR1NtEWayPgCzC+fDw3TryRE8tPxCdZF6E0ZC8pJcnKKuM+/cGZ5DZt5NJV19OdX841d1+CFTj4H60x7pmEUCj5VDbsoLJhMbFgLivHn8zQKmFX50m02CNwUDb6E6wrE9aMLyTH38WoDcvJ6aqnI7iRnSVRokEHCwvLZxF34ihKUbCI82vPZ3rtdE6qOImAFUj3yRoMH0dKxj1rwjJ/fPEhGmUDd7xex8bqY5k0reiQDLsh01DatJ22mhDO4ImUrW3glGWLaM8fSmh0E2fWbGZT9N8I9JxDXXMR5zd3sD7XZmXNFDZOnkphtJ3abe9R2dRARXQbTmgPjXkR3g/vpj0nwrx35zHv3Xn4xEdlXiWjSkZRW1RLZV4lV4wxk6objjyywnNv2PYOMxbO5qJlXYzZcyM9+aVc8+uLD9m4G889c9kxOMDpm96ias12rM4e4oFcgkMDVNZsorV4DK/rRbS2TyCgFp2ibM6N825lkHeOySNWHKS4exfV2zcxuLmJ8h3NFEYaaazYzdayLlqLonSH3UdwBKGuqI7xFeMZVzaO40uPp66kjoDPePeGtHF0hGU6OyKc98R/UNLazNcWz2Br1TSmXTWK48449Ik5jJTNIcIAAA3qSURBVHHPXBorc9yCKmO3beC4hg3kNPZAAhAlWGBjFfl4f9BpbMg7mc5AHT4J4qhDh9NGlxWhLdxDW24UJ6RYPqXIjlAS3UVBVxuBnjaiiTY66WRPXpRtZTbbS5Q9eeCz/NQEhjKqsI7jKsYxofpkRpePIewPQ/2DBxY/+eoB7RtD1pP9YZl3Nq/iyhe/TdXORr6x6Cw21k6jdnisXwy7IbP54NstwtvDRvL2sJFY8TijWjYytGUHQ3a3oHtsBm9/g/LYUhzxESk6lpbSE9hdPJKO/GqG9Pig2yGvazt5ndvI74iQ39lMXkcj4eiuPn9BDhDJF5pKtvJ+eQMry//Oc2XCtlII4qeqy8cxUWVkzGGM41Dth9ygUDyoBIL5ECoA8UHBECga5i7hosPXcYajhpQ8dxGZDvwcsID7VfXHSftDuNmgJgGtwGWq2vBxbR6K575zVzPfWfhT1kQWcdHyEGN2zWRn2SQqSqPMum064uufGZeM535kEhH3EUcLmyp/E+PDa6lhJzmJdsDB0TBd9hi26iQ2aS07nDK6tRA/oQ/aiOIQEZsu4kS1Gx9tBO1d5EZbKOncQfXuHQxvbSIv1vPh++ZabCsVmgYlaC4Rmktge7HFnrw8AuRQFLcYFHcoiycYHLcpTcQp0RjhhCBOiKivgIiU0EIJrTKINl853VYZUV8JQX+QUNAiGAoQCgXJDQfJCwe4aEIlJfkhrAT0dNt0d8bp7ojT1Rajuz1OtCuOOorjAKq0tfbgD/oIBC33NWQRzPETyvUTzPETCFmMO8s4RxlO/4RlRMQC3gU+BWwFlgFXqOraXnWuBcar6tdF5HLgM6p62ce1e7DG/YEX7+NnW++loKubOx4uZe1x/436LOrGCudcfx6Wv/9uohrjnl0EJcbw0FaGBXZQFWql2NqByIff/55EKS32SFrsGrbb1ey2B9ORKCXqFNF7MLdgE/B1EpJ2QrQRtLvwxeNIzEaiNkRtJO4garM3k300ANGgjx6/EAsIcT844sPxWTg+C9dv8iP4QfwfKbv7LW+bBSRwiKPYKHHARiXuLsRB4qjEQOLgiyMkEFF8olgoIg6QQMVBAUVRcV8dQEVwRHF8kBCwfWD7hLgPEpbgiA8VCxU/Kn7E8eNzBBw/gbhFKAa53TYFXVF2FpfSUlSKQwDEvVchIiCKSAKI4/i6caSDmNWNZYUot4+hx9+FSgxLbALYoFF80kmOP4FPE+QEYvhVCMSC+HryCIQi5BQ2kmPbBEkQ0AQBdQhoAsuxCaiNz0lgq9Bj5ZBI+PBpmJgvSCyYT6BkCAkrF9vKQwN5EAijgTAayAF/AAn4USsB2kl+zmAK82sIBXwEfEJIYwTtHgI9rQQ7mrEsC607H0TotBNEonG2dETZFOkm2mXzqepBHFdVeLBf5X4Ly0wBNqjqRu9D+SMwA1jbq84MYK5X/hPwKxERHYCA/tSRp3NP46uM6sqnoGoZQwp3cs6tn6OwLKe/38qQZcQ0yIaeEWzoGUGkIx8Lm1L/bsr8uyj0dVBotROWCLnWm4z2LyEsUfySwHYCtDtltCcqaNdyuhKD6NF8olpAj1NAu78Cx/LjhC1U/SgWqnsNNqAOoO4vUl1TaqlrZH2aQBKu0RHd+5rApzbi7H2N9lp396v4SVhBEr4gCSuIY+V75RAJK4RjZcjgvSAM7nKXgUJxb7WMDL/EucXP9VmnW4N0EyRGAAuHADZ+EgRIEBIvk2hz/2la69Rw4cdMFtZ51ohDMe4pkYrnPguYrqpzvPUvAKeo6nW96qz26mz11t/z6rQktfVV4Kve6mjgnf46kQGiDGg5YK3MwegdWIzegeVI0ptOrS2qOv1AlVLx3Pu6BEj+R0ilDqp6L3BvCu+ZEYhIvapOTreOVDF6Bxajd2A5kvQeCVpTCVBvBap7rQ8Dkqe9+aCOiPiBItwpdgwGg8GQBlIx7suAkSJyjIgEgcuBZ5LqPAN8ySvPAl4aiHi7wWAwGFLjgGEZVbVF5Drgedw7RA+o6hoR+RFQr6rPAL8FHhaRDbge++UDKfowcsSEkDyM3oHF6B1YjiS9Ga81bSNUDQaDwTBwmMxaBoPBkIUY424wGAxZiDHuuOkVROQdEdkgIrf0sT8kIo97+5eISO3hV7mPngPpnS0iO0VkhbfMSYdOT8sDIrLDGwvR134RkV945/KWiEw83BqT9BxI71QRifTq2x8cbo1JeqpFZJGIrBORNSJyYx91MqKPU9SaMf0rImERWSoiKz29/9tHnYyyDfugqkf1gnuT+D1gBBAEVgLHJdW5FviNV74ceDzD9c4GfpXuvvW0nAVMBFbvZ/+FwF9xx0qcCizJcL1TgWfT3a+99FQCE71yAW6qkOTvQ0b0cYpaM6Z/vf7K98oBYAlwalKdjLENyYvx3HulV1DVGLA3vUJvZgAPeeU/AeeKSP9kJ/vkpKI3Y1DVV/n4MQ8zgN+ryxtAsYhUHh51HyUFvRmFqjap6pteuR1YBwxNqpYRfZyi1ozB668ObzXgLclPoGSSbdgHY9zdL9eWXutb+egX7oM6qmoDEaD0sKj7KKnoBfisdwn+JxGp7mN/ppDq+WQSp3mX6n8VkePTLWYvXkjgJFwPszcZ18cfoxUyqH9FxBKRFcAO4AVV3W/fZoBt2Adj3PsxvcJhIhUtfwFqVXU88Hc+9CwykUzq21R4E6hR1ROBXwJ/TrMeAEQkH3gSuElV25J393FI2vr4AFozqn9VNaGqE3BH5k8RkXFJVTKqb3tjjPuRl17hgHpVtVVVo97qfbh59jOVVPo/Y1DVtr2X6qq6AAiISFk6NYlIANdYPqqqT/VRJWP6+EBaM7F/PS17gJeB5IRdmWQb9sEY9yMvvcIB9SbFUy/BjW1mKs8AX/Se6DgViKhqU7pF7Q8RGbI3pioiU3B/Q61p1CO4I8TXqerP9lMtI/o4Fa2Z1L8iUi4ixV45BzgPeDupWibZhn04oqfZ6w/0CEuvkKLeG0TkEsD29M5Ol14ReQz3CYgyEdkK/BD3xhSq+htgAe7THBuALiCtE4ymoHcW8A0RsYFu4PI0/5hPB74ArPJiwwC3AsMh4/o4Fa2Z1L+VwEPiTljkA55Q1Wcz1TYkY9IPGAwGQxZiwjIGg8GQhRjjbjAYDFmIMe4Gg8GQhRjjbjAYDFmIMe4Gg8GQhRjjbvjEiEjHgWvt99jrvAx62p+DU0TkBi/b4KP91Wavtm9Nsd6Cvc9FDwQiMldEvj1Q7RuyC2PcDYeb13EHg2zu53avBS5U1Ss/6YHe4J6P+y2kZNxV9UJvJKPBkHaMcTccNJ5R/ImIrBaRVSJymbfdJyJ3ezmwn/U82lkAqvovVW3oo625IvKwiLwkIutF5Cv7ec+bvfdbLSI3edt+g5sC+RkR+WZS/dkiMl9E/iZuDvwfettrPU//btx8JtUicoV3HqtF5A6v3o+BHHFziz/qbbtK3DzfK0TkHm+QCyLSICJlvdq+z+uDhd4Ix966irz6Pm89V0S2iEhARL4iIsvETZ71pIjk9tEPL4vIZK9cJiINXtnyPpNl4iaO+5q3vVJEXvU0rxaRM1P4iA1HMunOOWyWI28BOrzXzwIv4I6UHQy8jzuqbxbuqEgfMATYDcxKaqMBKOu1Phc3N30OUIabaa8q6ZhJwCogD8gH1gAn9dVer2NmA024mfpygNXAZKAWcPDycwNVnv5y3JHbLwEze5+vVx6Lm5gt4K3fDXyxtwavbRuY4G1/AriqD23zgWle+TLgfq9c2qvO/wHX9+qjb3vll4HJXrkMaPDKXwW+55VDQD1wDPAt4H+87RZQkO7vkVkGdjGeu+FQOAN4TN3MeduBV4CTve3zVNVR1WZgUYrtzVfVblVt8Y6Z0sf7Pa2qneoml3oKSMUDfUHdZGrd3jFneNs3q5vfHE/3y6q6U93UrY/iTtyRzLm4fzLLvCH05+JeNSSzSVX3DrFfjmvwk3kc16iDN9GDVx4nIotFZBVwJfBJ0t6ej5tHZgVuOt1SYCRuTqKrRWQucIK6+dQNWcxRn1vGcEjsb1KCg52sIDkXRiqplw+l3c6DaFuAh1T1uweoF+1VTuBeNSTzDHC7iAzC/cN4ydv+O9yrhpUiMhs3100yNh+GVcNJ+q5X1ec/IlzkLODTuLlQfqKqvz/AORiOYIznbjgUXgUu8+K85bie7lLgNdzJQnwiMpi+jVNfzBB33spS75hlfbzfTC8+nQd8BlicQrufEpFBXtx7Ju5N3WSWAGd78WsLuAL3SgQgLm6qWoAXgVkiUgHgtVuT4vntg3f1sRT4Oe7UcglvVwHQ5L3n/m4QN/BhKudZvbY/j5t4K+DpGyUieZ7GHap6H26yq7TOVWsYeIznbjgUngZOw42VK/AdVW0WkSdxwxWrcefJXII7Qw0icgPwHdxY/FsiskBV907gvRR4DjdL4G2qmpyn/k0R+Z1XD9wY9b9S0Pka8DBQB/xBVeslaSJjVW0Ske/ihoMEWKCq873d93pa31TVK0Xke8BC72ZoHPhPDv7pn8eBeez7B/h93D7bjHuPoaCP4+4EnhCRL/Chxw9wP24I6E0REWAn7h/aVOC/RCQOdABfPEi9hiMEkxXSMCCISL6qdnhe+FLgdC/+vr/6c3FvXN7Zzzpm4954vK4/2zUYMh3juRsGimfFHdATxPXC92vYDQZD/2M8d4PBYMhCzA1Vg8FgyEKMcTcYDIYsxBh3g8FgyEKMcTcYDIYsxBh3g8FgyEL+H9lLsqeylfOSAAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pylab as plt\n", + "import seaborn as sns\n", + "df_samples = c.raw_samples.apply(lambda x: np.log1p(x))\n", + "for n in range(0,15,1):\n", + " sns.distplot(df_samples.iloc[:,n],label=df_samples.index.values[n])\n", + "plt.xlabel('log1p of protein values')\n", + "plt.title('Sample distributions')\n", + "sns.despine()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df_replicates = c.raw_replicates.fillna(0).apply(lambda x: np.log10(x+1))\n", + "for n in range(0,len(df_replicates.columns)-1,1):\n", + " sns.distplot(df_replicates.iloc[:,n],label=df_replicates.index.values[n])\n", + "plt.xlabel('log1p of protein values')\n", + "plt.title('Replicate distributions')\n", + "sns.despine()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:cohorts_test]", + "language": "python", + "name": "conda-env-cohorts_test-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}