Skip to content

Commit

Permalink
Features/filtering (#17)
Browse files Browse the repository at this point in the history
* Add missing setter methods

* Add include-scores option
  • Loading branch information
lukfor committed Dec 8, 2023
1 parent 1eba846 commit e856f31
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ public class ApplyScoreCommand implements Callable<Integer> {
@Option(names = { "--samples" }, description = "Include only samples from this file", required = false)
String includeSamplesFilename = null;

@Option(names = { "--includeScores",
"--include-scores" }, description = "Include only scores from this file. Works only with score collections.", required = false)
String includeScoresFilename = null;

@Option(names = { "--report-json", "--info" }, description = "Write statistics to json file", required = false)
String reportJson = null;

Expand Down Expand Up @@ -202,6 +206,7 @@ public Integer call() throws Exception {
}
task.setIncludeVariantFilename(includeVariantFilename);
task.setIncludeSamplesFilename(includeSamplesFilename);
task.setIncludeScoreFilename(includeScoresFilename);
task.setOutput(taskPrefix + ".scores.txt");
task.setFixStrandFlips(fixStrandFlips);
task.setRemoveAmbiguous(removeAmbiguous);
Expand Down
14 changes: 14 additions & 0 deletions src/main/java/genepi/riskscore/commands/FilterMetaCommand.java
Original file line number Diff line number Diff line change
Expand Up @@ -70,5 +70,19 @@ private boolean accept(MetaFile.MetaScore score, String category, String populat
return true;
}

public void setMeta(String meta) {
this.meta = meta;
}

public void setCategory(String category) {
this.category = category;
}

public void setPopulation(String population) {
this.population = population;
}

public void setOut(String out) {
this.out = out;
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package genepi.riskscore.io.scores;

import genepi.io.text.LineReader;
import genepi.riskscore.io.Chunk;
import genepi.riskscore.io.csv.CsvWithHeaderTableReader;
import genepi.riskscore.io.csv.TabixTableReader;
Expand All @@ -23,6 +24,8 @@ public class MergedRiskScoreCollection implements IRiskScoreCollection {

private String filename;

private String includeScoresFilename;

private boolean verbose = false;

private Map<String, MergedVariant> variantsIndex = new HashMap<String, MergedVariant>();
Expand Down Expand Up @@ -52,8 +55,9 @@ public class MergedRiskScoreCollection implements IRiskScoreCollection {
COLUMNS.add(COLUMN_OTHER_ALLELE);
}

public MergedRiskScoreCollection(String filename) {
public MergedRiskScoreCollection(String filename, String includeScoresFilename) {
this.filename = filename;
this.includeScoresFilename = includeScoresFilename;
}

@Override
Expand All @@ -74,6 +78,19 @@ public String getVersion() {
@Override
public void buildIndex(String chromosome, Chunk chunk, String dbsnp, String proxies) throws Exception {

List<String> includedScores = new Vector<String>();

if (includeScoresFilename != null) {
LineReader lineReader = new LineReader(includeScoresFilename);
while(lineReader.next()){
String score = lineReader.get().trim();
if (!score.isEmpty()) {
includedScores.add(score);
}
}
lineReader.close();
}

String metaFilename = filename + META_EXTENSION;
File metaFile = new File(metaFilename);
if (!metaFile.exists()){
Expand All @@ -95,12 +112,25 @@ public void buildIndex(String chromosome, Chunk chunk, String dbsnp, String prox
TabixTableReader reader = new TabixTableReader(filename, chromosome, chunk.getStart(), chunk.getEnd());
String[] columns = reader.getColumns();

numberRiskScores = columns.length - COLUMNS.size();
Map<String, Integer> scoreToColumn = new HashMap<String, Integer>();
summaries = new RiskScoreSummary[numberRiskScores];
for (int i = 0; i < columns.length; i++){
String column = columns[i];
if (COLUMNS.contains(column)){
continue;
}
if (!includedScores.isEmpty() && !includedScores.contains(column)){
continue;
}
scoreToColumn.put(column, i);
}


numberRiskScores = scoreToColumn.size();
summaries = new RiskScoreSummary[numberRiskScores];
int index = 0;
for (String column: columns){
if (COLUMNS.contains(column)){
if (!scoreToColumn.containsKey(column)){
continue;
}
summaries[index] = new RiskScoreSummary(column);
Expand Down Expand Up @@ -129,7 +159,7 @@ public void buildIndex(String chromosome, Chunk chunk, String dbsnp, String prox
}
index = 0;
for (String column: columns){
if (COLUMNS.contains(column)){
if (!scoreToColumn.containsKey(column)){
continue;
}
if (reader.getString(column).equals("") || reader.getString(column).equals(".")){
Expand Down
8 changes: 7 additions & 1 deletion src/main/java/genepi/riskscore/tasks/ApplyScoreTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ public class ApplyScoreTask implements ITaskRunnable {

private String proxies;

private String includeScoreFilename = null;

private boolean fixStrandFlips = false;

private boolean removeAmbiguous = false;
Expand Down Expand Up @@ -114,6 +116,10 @@ public void setIncludeSamplesFilename(String includeSamplesFilename) {
this.includeSamplesFilename = includeSamplesFilename;
}

public void setIncludeScoreFilename(String includeScoreFilename) {
this.includeScoreFilename = includeScoreFilename;
}

public void setGenotypeFormat(String genotypeFormat) {
this.genotypeFormat = genotypeFormat;
}
Expand Down Expand Up @@ -172,7 +178,7 @@ public void run(ITaskMonitor monitor) throws Exception {
//TODO: move to factory
if (riskScoreFilenames.length == 1 && new File(riskScoreFilenames[0]).exists() &&
RiskScoreFormatFactory.readHeader(riskScoreFilenames[0]).startsWith(MergedRiskScoreCollection.HEADER)) {
collection = new MergedRiskScoreCollection(riskScoreFilenames[0]);
collection = new MergedRiskScoreCollection(riskScoreFilenames[0], includeScoreFilename);
} else {
collection = new RiskScoreCollection(riskScoreFilenames, formats);
}
Expand Down

0 comments on commit e856f31

Please sign in to comment.