Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #60 from Mesnage-Org/metadata
Metadata
- Loading branch information
Showing
16 changed files
with
14,337 additions
and
14,134 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ __pycache__/ | |
# outputs | ||
*Cleaned.csv | ||
eg_output.csv | ||
results*.csv | ||
|
||
#IDE | ||
.vscode/ | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
# Data Dictionary | ||
|
||
*This document is a work in progress.* | ||
|
||
## Mass Lists | ||
|
||
Format: `CSV` (`.csv`) | ||
|
||
| Column | Description | Unit | | ||
|---|---|---| | ||
| Structure | Structure code | NA | | ||
| Monoisotopicmass | Monoisotopic mass | *TBD* | | ||
|
||
## FTRS Input Files | ||
|
||
Format: `.ftrs` | ||
|
||
## MaxQuant Input Files | ||
|
||
Format: `TSV` (`.txt`) | ||
|
||
## FTRS Output Files | ||
|
||
Format: `CSV` (`.csv`) | ||
|
||
The column name of the first column contains [embedded metadata](#embedded-metadata) on the provenance of the file. Subsequent columns are defined as follows: | ||
|
||
| Column | Description | Unit | | ||
|---|---|---| | ||
| ID | *TBD* | *TBD* | | ||
| xicStart | *TBD* | *TBD* | | ||
| xicEnd | *TBD* | *TBD* | | ||
| feature | *TBD* | *TBD* | | ||
| corrMax | *TBD* | *TBD* | | ||
| ionCount | *TBD* | *TBD* | | ||
| chargeOrder | *TBD* | *TBD* | | ||
| maxIsotopeCount | *TBD* | *TBD* | | ||
| rt | *TBD* | *TBD* | | ||
| mwMonoisotopic | *TBD* | *TBD* | | ||
| theo_mwMonoisotopic | *TBD* | *TBD* | | ||
| inferredStructure | *TBD* | *TBD* | | ||
| maxIntensity | *TBD* | *TBD* | | ||
|
||
## MaxQuant Output Files | ||
|
||
Format: `CSV` (`.csv`) | ||
|
||
The column name of the first column contains [embedded metadata](#embedded-metadata) on the provenance of the file. Subsequent columns are defined as follows: | ||
|
||
| Column | Description | Unit | | ||
|---|---|---| | ||
| ID | *TBD* | *TBD* | | ||
| rt | *TBD* | *TBD* | | ||
| rt_length | *TBD* | *TBD* | | ||
| mwMonoisotopic | *TBD* | *TBD* | | ||
| theo_mwMonoisotopic | *TBD* | *TBD* | | ||
| inferredStructure | *TBD* | *TBD* | | ||
| maxIntensity | *TBD* | *TBD* | | ||
|
||
## Embedded Metadata | ||
|
||
| Data | Description | | ||
|---|---| | ||
| file | Input data file | | ||
| masses_file | Mass list file | | ||
| modifications | List of modifications (*TBD*) | | ||
| ppm | ppm tolerance (*TBD*) | | ||
| rt_window | *TBD* | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,21 @@ | ||
import pgfinder.matching as matching | ||
import pgfinder.pgio as pgio | ||
import pgfinder.validation as validation | ||
|
||
csv_filepath = "data/masses/e_coli_monomer_masses.csv" | ||
mq_filepath = "data/maxquant_test_data.txt" | ||
csv_filepath = "data/masses/e_coli_monomer_masses.csv" | ||
|
||
raw_data = matching.maxquant_file_reader(mq_filepath) | ||
validation.validate_raw_data_df(raw_data) | ||
masses = pgio.ms_file_reader(mq_filepath) | ||
validation.validate_raw_data_df(masses) | ||
|
||
theo_masses = matching.theo_masses_reader(csv_filepath) | ||
theo_masses = pgio.theo_masses_reader(csv_filepath) | ||
validation.validate_theo_masses_df(theo_masses) | ||
|
||
mod_test = ['Sodium','Potassium','Anhydro','DeAc','Deacetyl_Anhydro','Nude','Decay','Amidation','Amidase','Double_Anh','multimers_Glyco'] | ||
validation.validate_enabled_mod_list(mod_test) | ||
|
||
results = matching.data_analysis(raw_data, theo_masses, 0.5, mod_test, 10) | ||
results = matching.data_analysis(masses, theo_masses, 0.5, mod_test, 10) | ||
|
||
print(results.attrs['metadata']) | ||
print(results) | ||
|
||
print(results) | ||
pgio.dataframe_to_csv_metadata(save_filepath='./', output_dataframe=results) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,21 @@ | ||
import pgfinder.matching as matching | ||
import pgfinder.pgio as pgio | ||
import pgfinder.validation as validation | ||
|
||
csv_filepath = "data/masses/e_coli_monomer_masses.csv" | ||
ftrs_filepath = "data/ftrs_test_data.ftrs" | ||
csv_filepath = "data/masses/e_coli_monomer_masses.csv" | ||
|
||
raw_data = matching.ftrs_reader(ftrs_filepath) | ||
validation.validate_raw_data_df(raw_data) | ||
masses = pgio.ms_file_reader(ftrs_filepath) | ||
validation.validate_raw_data_df(masses) | ||
|
||
theo_masses = matching.theo_masses_reader(csv_filepath) | ||
theo_masses = pgio.theo_masses_reader(csv_filepath) | ||
validation.validate_theo_masses_df(theo_masses) | ||
|
||
mod_test = ['Sodium','Potassium','Anhydro','DeAc','Deacetyl_Anhydro','Nude','Decay','Amidation','Amidase','Double_Anh','multimers_Glyco'] | ||
validation.validate_enabled_mod_list(mod_test) | ||
|
||
results = matching.data_analysis(raw_data, theo_masses, 0.5, mod_test, 10) | ||
results = matching.data_analysis(masses, theo_masses, 0.5, mod_test, 10) | ||
|
||
print(results.attrs['metadata']) | ||
print(results) | ||
|
||
print(results) | ||
pgio.dataframe_to_csv_metadata(save_filepath='./', output_dataframe=results) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,22 @@ | ||
import pgfinder.matching as matching | ||
import pgfinder.pgio as pgio | ||
import pgfinder.validation as validation | ||
|
||
# Set mass database and modifications | ||
csv_filepath = "data/masses/e_coli_monomer_masses.csv" | ||
theo_masses = matching.theo_masses_reader(csv_filepath) | ||
theo_masses = pgio.theo_masses_reader(csv_filepath) | ||
mod_test = ['Sodium','Potassium','Anhydro','DeAc','Deacetyl_Anhydro','Nude','Decay','Amidation','Amidase','Double_Anh','multimers_Glyco'] | ||
|
||
# Generate maxquant baseline | ||
mq_file_name = "data/maxquant_test_data.txt" | ||
raw_data_mq = matching.maxquant_file_reader(mq_file_name) | ||
results = matching.data_analysis(raw_data_mq, theo_masses, 0.5, mod_test, 10) | ||
results.to_csv("data/baseline_output_mq.csv") | ||
mq_filepath = "data/maxquant_test_data.txt" | ||
masses_mq = pgio.ms_file_reader(mq_filepath) | ||
validation.validate_raw_data_df(masses_mq) | ||
results = matching.data_analysis(masses_mq, theo_masses, 0.5, mod_test, 10) | ||
pgio.dataframe_to_csv_metadata(save_filepath='./data/', output_dataframe=results, filename='baseline_output_mq.csv') | ||
|
||
# Generate ftrs baseline | ||
ftrs_file_name = "data/ftrs_test_data.ftrs" | ||
raw_data_ftrs = matching.ftrs_reader(ftrs_file_name) | ||
results = matching.data_analysis(raw_data_ftrs, theo_masses, 0.5, mod_test, 10) | ||
results.to_csv("data/baseline_output_ftrs.csv") | ||
ftrs_filepath = "data/ftrs_test_data.ftrs" | ||
masses_ftrs = pgio.ms_file_reader(ftrs_filepath) | ||
validation.validate_raw_data_df(masses_ftrs) | ||
results = matching.data_analysis(masses_ftrs, theo_masses, 0.5, mod_test, 10) | ||
pgio.dataframe_to_csv_metadata(save_filepath='./data/', output_dataframe=results, filename='baseline_output_ftrs.csv') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.