Skip to content
This repository has been archived by the owner on Jan 21, 2021. It is now read-only.

Commit

Permalink
Updated browse tag export to properly quote tsv/csv.
Browse files Browse the repository at this point in the history
  • Loading branch information
gijskant committed May 25, 2016
1 parent c6908f9 commit 28fe984
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 9 deletions.
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ dependencies {
compile group: 'com.google.guava', name: 'guava', version: GUAVA_VERSION
compile group: 'commons-cli', name: 'commons-cli', version: '1.2'
compile group: 'org.hibernate', name: 'hibernate-validator', version: HIBERNATE_VALIDATOR_VERSION
compile group: 'com.opencsv', name: 'opencsv', version: '3.7'

compile group: 'ch.qos.logback', name: 'logback-classic', version: LOGBACK_VERSION
compile group: 'org.slf4j', name: 'jcl-over-slf4j', version: SLF4J_VERSION
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package org.transmartproject.batch.batchartifacts

import com.opencsv.CSVWriter
import groovy.transform.CompileStatic
import org.springframework.batch.item.file.transform.ExtractorLineAggregator

/**
* Writes lines in CSV format using the opencsv library.
*/
@CompileStatic
class CsvLineAggregator<T> extends ExtractorLineAggregator<T> {

char separator = CSVWriter.DEFAULT_SEPARATOR
char quotechar = CSVWriter.DEFAULT_QUOTE_CHARACTER
char escapechar = CSVWriter.DEFAULT_ESCAPE_CHARACTER
String lineEnd = CSVWriter.DEFAULT_LINE_END

@Override
protected String doAggregate(Object[] fields) {
OutputStream out = new ByteArrayOutputStream()
out.withWriter { writer ->
CSVWriter csvWriter = new CSVWriter(writer, separator, quotechar, escapechar, lineEnd)
List<String> outData = []
fields.each { outData << it.toString() }
csvWriter.writeNext(outData.toArray(new String[0]), false)
csvWriter.flush()
csvWriter.close()
}
String result = out.toString()
out.close()
result
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ class BrowseTagAssociationDatabaseReader implements ItemStreamReader<BrowseTagAs
-1 as tag_item_id,
'study_description' as code,
'Study description' as display_name,
0 as display_order,
1 as display_order,
cast (exp.description as varchar(4000)) as value,
cast (exp.description as varchar(4000)) as description
FROM $Tables.FM_FOLDER f
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ import groovy.util.logging.Slf4j
import org.springframework.batch.item.ItemWriter
import org.springframework.batch.item.file.FlatFileHeaderCallback
import org.springframework.batch.item.file.FlatFileItemWriter
import org.springframework.batch.item.file.transform.DelimitedLineAggregator
import org.springframework.batch.item.file.transform.FieldExtractor
import org.springframework.core.io.Resource
import org.transmartproject.batch.batchartifacts.CsvLineAggregator

import javax.annotation.PostConstruct

Expand All @@ -19,8 +19,9 @@ class BrowseTagTypeFlatFileWriter implements ItemWriter<BrowseTagValue> {
@Delegate
FlatFileItemWriter<BrowseTagType> delegate

DelimitedLineAggregator<Collection<String>> valuesAggregator = new DelimitedLineAggregator<>(
fieldExtractor: { Collection<String> s -> s as Object[] } as FieldExtractor
CsvLineAggregator<Collection<String>> valuesAggregator = new CsvLineAggregator<>(
lineEnd: '',
fieldExtractor: { Collection<String> s -> s as Object[] } as FieldExtractor
)

private final Resource resource
Expand All @@ -33,8 +34,9 @@ class BrowseTagTypeFlatFileWriter implements ItemWriter<BrowseTagValue> {
void init() {
delegate = new FlatFileItemWriter(
resource: resource,
lineAggregator: new DelimitedLineAggregator<BrowseTagType>(
delimiter: '\t',
lineAggregator: new CsvLineAggregator<BrowseTagType>(
separator: '\t',
lineEnd: '',
fieldExtractor: { BrowseTagType type ->
[ type.folderType.type,
type.displayName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ import org.springframework.batch.core.job.flow.JobExecutionDecider
import org.springframework.batch.core.step.tasklet.TaskletStep
import org.springframework.batch.item.file.FlatFileHeaderCallback
import org.springframework.batch.item.file.FlatFileItemWriter
import org.springframework.batch.item.file.transform.DelimitedLineAggregator
import org.springframework.batch.item.file.transform.FieldExtractor
import org.springframework.beans.factory.annotation.Value
import org.springframework.context.annotation.Bean
import org.springframework.context.annotation.ComponentScan
import org.springframework.context.annotation.Configuration
import org.springframework.core.io.FileSystemResource
import org.springframework.core.io.Resource
import org.transmartproject.batch.batchartifacts.CsvLineAggregator
import org.transmartproject.batch.beans.AbstractJobConfiguration

/**
Expand Down Expand Up @@ -129,8 +129,9 @@ class BrowseTagsExportJobConfiguration extends AbstractJobConfiguration {
Resource resource = browseTagsFileResource(null)
new FlatFileItemWriter(
resource: resource,
lineAggregator: new DelimitedLineAggregator<BrowseTagAssociation>(
delimiter: '\t',
lineAggregator: new CsvLineAggregator<BrowseTagAssociation>(
separator: '\t',
lineEnd: '',
fieldExtractor: { BrowseTagAssociation item ->
['\\',
item.value.type.displayName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ class TagsLoadJobTests implements JobRunningTestTrait {
hasEntry('tag_type', 'SYNONYMS'),
hasEntry(is('tags_idx'), isIntegerNumber(5)),
),
allOf(
hasEntry('path', '\\Public Studies\\GSE8581\\'),
hasEntry('tag', 'Text with tab characters (\t), and\nnew lines.'),
hasEntry('tag_type', 'FREE TEXT'),
hasEntry(is('tags_idx'), isIntegerNumber(6)),
),
)

def runJob = RunJob.createInstance(
Expand Down Expand Up @@ -112,6 +118,12 @@ class TagsLoadJobTests implements JobRunningTestTrait {
hasEntry('tag_type', 'SYNONYMS'),
hasEntry(is('tags_idx'), isIntegerNumber(5)),
),
allOf(
hasEntry('path', '\\Public Studies\\GSE8581\\'),
hasEntry('tag', 'Text with tab characters (\t), and\nnew lines.'),
hasEntry('tag_type', 'FREE TEXT'),
hasEntry(is('tags_idx'), isIntegerNumber(6)),
),
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package org.transmartproject.batch.batchartifacts

import org.junit.Test
import org.springframework.batch.item.ExecutionContext
import org.springframework.batch.item.ItemStreamReader
import org.springframework.batch.item.file.FlatFileItemReader
import org.springframework.batch.item.file.mapping.DefaultLineMapper
import org.springframework.batch.item.file.mapping.FieldSetMapper
import org.springframework.batch.item.file.separator.DefaultRecordSeparatorPolicy
import org.springframework.batch.item.file.transform.DelimitedLineTokenizer
import org.springframework.batch.item.file.transform.FieldSet
import org.springframework.batch.item.file.transform.LineAggregator
import org.springframework.batch.item.file.transform.PassThroughFieldExtractor
import org.springframework.core.io.ByteArrayResource
import org.springframework.core.io.Resource

import static org.hamcrest.MatcherAssert.assertThat
import static org.hamcrest.Matchers.*

/**
* Tests the {@link CsvLineAggregator}, to see if it writes lines such that
* they can be read correctly by the {@link DelimitedLineTokenizer}.
*/
class CsvLineAggregatorTests {

static <T> void write(List<? extends T> items, LineAggregator<T> lineAggregator, Writer writer) {
StringBuilder lines = new StringBuilder()
for (T item : items) {
lines.append(lineAggregator.aggregate(item))
}
writer.write(lines.toString())
}

static <T> List<T> read(ItemStreamReader<T> reader) {
List<T> result = []
reader.open(new ExecutionContext())
List<String> item
while ((item = reader.read()) != null) {
result << item
}
reader.close()
result
}

private static final List<List<String>> TEST_DATA = [
[ 'Column 1', 'Column 2', 'Column 3', 'Column 4' ],
[ 'A', 'Test', 'Test with tab (\t).', 'Test should pass.' ],
[ 'B', 'Test', 'Test with comma (,) in the text.', 'Test should pass.' ],
[ 'C', 'Test', 'Test with newline (\n) in it.', 'Test should pass.' ],
]

@Test
void testWriteCsvData() {
// Writing test data to byte array
ByteArrayOutputStream out = new ByteArrayOutputStream()
out.withWriter { writer ->
def aggregator = new CsvLineAggregator<List<String>>(
separator: '\t',
fieldExtractor: new PassThroughFieldExtractor<List<String>>(),
)
write(TEST_DATA, aggregator, writer)
writer.flush()
}

// Reading data from byte array
def tokenizer = new DelimitedLineTokenizer(
delimiter: '\t',
)
def lineMapper = new DefaultLineMapper(
lineTokenizer: tokenizer,
fieldSetMapper: { FieldSet fs ->
fs.values as List<String>
} as FieldSetMapper<List<String>>,
)

Resource resource = new ByteArrayResource(out.toByteArray())
def reader = new FlatFileItemReader<List<String>>(
lineMapper: lineMapper,
recordSeparatorPolicy: new DefaultRecordSeparatorPolicy(),
resource: resource,
)
reader.afterPropertiesSet()
List<List<String>> result = read(reader)

assertThat result, equalTo(TEST_DATA)
}

}
2 changes: 2 additions & 0 deletions studies/GSE8581/tags/tags_1.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
concept_key tag_title tag_description index
\ TITLE Human Chronic Obstructive Pulmonary Disorder Biomarker 2
\ ORGANISM Homo sapiens 3
\ FREE TEXT "Text with tab characters ( ), and
new lines." 6
\Endpoints\FEV1 NAME FEV1/FVC ratio 4
\Endpoints\FEV1 SYNONYMS Tiffeneau-Pinelli 5

0 comments on commit 28fe984

Please sign in to comment.