-
Notifications
You must be signed in to change notification settings - Fork 4
/
FASTAParser.java
72 lines (59 loc) · 1.53 KB
/
FASTAParser.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.HashMap;
import java.util.Map;
/**
* Reads a multiple FASTA file containing DNA sequences in FASTA format
* and stores the entries as FASTAEntry objects.
*
* @author Alexander Herbig
*
*/
public class FASTAParser
{
/**
* Reads a multiple FASTA file containing DNA sequences in FASTA format.
* @param br the BufferedReader which reads from the multiple FASTA file
* @return a list containing the resulting FASTAEntry objects
* @throws Exception
*/
public static Map<String,String> parseDNA(String filename) throws Exception
{
BufferedReader br = new BufferedReader(new FileReader(filename));
Map<String,String> fastaEntries = new HashMap<String, String>();
String tmpID;
StringBuffer tmpSeqString = new StringBuffer();
String tmpLine = br.readLine();
//jump to first header line
while(tmpLine!=null&&(tmpLine.charAt(0)!='>' || tmpLine.length()==0))
{
tmpLine=br.readLine();
}
tmpID=tmpLine;
tmpLine=br.readLine();
while(tmpLine!=null)
{
if(tmpLine.length()!=0)
{
if(tmpLine.charAt(0)=='>')
{
fastaEntries.put(toID(tmpID), tmpSeqString.toString());
tmpSeqString = new StringBuffer();
tmpID = tmpLine;
}
else
{
tmpSeqString.append(tmpLine);
}
}
tmpLine = br.readLine();
}
fastaEntries.put(toID(tmpID), tmpSeqString.toString());
br.close();
return fastaEntries;
}
private static String toID(String fastaID)
{
return(fastaID.substring(1));
}
}