Skip to content
This repository has been archived by the owner on Jan 21, 2021. It is now read-only.

Commit

Permalink
Added export of browse tags.
Browse files Browse the repository at this point in the history
  • Loading branch information
gijskant committed Apr 13, 2016
1 parent d223c71 commit ffda9d1
Show file tree
Hide file tree
Showing 15 changed files with 873 additions and 0 deletions.
128 changes: 128 additions & 0 deletions scripts/browsetags_to_tagtypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#! /usr/bin/env python
# :noTabs=true:
# (c) Copyright (c) 2016 The Hyve
# (c) This file is distributed under the Apache Software License, Version 2.0
# (c) (https://www.apache.org/licenses/LICENSE-2.0.txt).
"""
browsetags_to_tagtypes.py
Brief: Converts concept codes from TranSMART, used in fixed vocabulary meta tags
in the 'browse' tab to tag types, to be used in the 'analyze' tab.
The file formats are the one exported by transmart-batch used for
browse tags export (in.tsv) and the one used by transmart-batch for tag type
import (out.tsv).
Usage: browsetags_to_tagtypes.py <in.tsv> [out.tsv]
Author: Gijs Kant <gijs@thehyve.nl>
"""
import csv
import os
import sys
import StringIO

class ConverterException(Exception):
pass

class Converter:

input_line_length = 5

@staticmethod
def read(tsv_in):
types = {}
reader = csv.reader(tsv_in, dialect="excel-tab")
for line in reader:
if not len(line) == Converter.input_line_length:
raise ConverterException(
'Invalid line length on line %d: %d (expected: %d).' % (
reader.line_num,
len(line),
Converter.input_line_length))
folder_type = line[0]
code = line[1]
name = line[2]
value = line[3]
printvalue = line[4]
if folder_type == 'STUDY':
t = types.get(name)
if t is None:
t = dict()
t['name'] = name
t['code'] = code
t['values'] = []
types[name] = t
t['values'].append(printvalue)
return types

@staticmethod
def write(tsv_out, types):
writer = csv.writer(tsv_out, dialect="excel-tab")
writer.writerow([
'node_type',
'title',
'solr_field_name',
'value_type',
'shown_if_empty',
'values',
'index'
])
i = 1
for t in types.values():
csv_out = StringIO.StringIO()
csv_writer = csv.writer(csv_out, lineterminator = '')
csv_writer.writerow(t['values'])
writer.writerow([
'STUDY',
t['name'],
t['code'],
'ANALYZED_STRING',
'Y',
csv_out.getvalue(),
i
])
csv_out.close()
i = i + 1


def main():
if len(sys.argv) < 2:
print >> sys.stderr, ("Usage: %s <in.tsv> [out.tsv]"
% os.path.basename(sys.argv[0]))
sys.exit()

in_file = sys.argv[1]
out_file = 'stdout'
write_to_stdout = True
if len(sys.argv) > 2:
out_file = sys.argv[2]
write_to_stdout = False

types = None
print >> sys.stderr, "Reading browse tags from %s ..." % in_file
try:
with open(in_file) as tsv_in:
types = Converter.read(tsv_in)
except ConverterException as e:
print e

if types is None:
print >> sys.stderr, "Reading browse tags failed."
sys.exit(1)

if write_to_stdout:
tsv_out = sys.stdout
else:
tsv_out = open(out_file, 'w')
print >> sys.stderr, "Writing tag types to %s ..." % out_file
Converter.write(tsv_out, types)

if not write_to_stdout:
tsv_out.close()

print >> sys.stderr, "Done."


if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package org.transmartproject.batch.browsetag

import groovy.transform.EqualsAndHashCode
import groovy.transform.ToString

/**
*
*/
@ToString
@EqualsAndHashCode(includes = ['id', 'level', 'name', 'type', 'parent'])
class BrowseFolder {
BrowseFolderType type
Long id
Integer level
String name
String fullName
String description
String parent
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package org.transmartproject.batch.browsetag

import groovy.transform.EqualsAndHashCode
import groovy.transform.ToString

/**
* Represents a node type with which tags can be associated.
* This is called a 'tag template' in the database.
*/
@ToString
@EqualsAndHashCode(includes = ['type', 'displayName'])
class BrowseFolderType implements Serializable {

private static final long serialVersionUID = 1L

String type
String displayName

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package org.transmartproject.batch.browsetag

import groovy.transform.EqualsAndHashCode
import groovy.transform.ToString

/**
*
*/
@ToString
@EqualsAndHashCode(includes = ['folder', 'value'])
class BrowseTagAssociation {
BrowseFolder folder
BrowseTagValue value
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
package org.transmartproject.batch.browsetag

import groovy.util.logging.Slf4j
import org.springframework.batch.item.ItemStreamReader
import org.springframework.batch.item.database.JdbcCursorItemReader
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.jdbc.core.RowMapper
import org.transmartproject.batch.clinical.db.objects.Tables

import javax.annotation.PostConstruct
import javax.sql.DataSource
import java.sql.ResultSet
import java.sql.SQLException

/**
* Gets the current patients (for the study) from database.
*/
@Slf4j
class BrowseTagAssociationDatabaseReader implements ItemStreamReader<BrowseTagAssociation> {

@Delegate
JdbcCursorItemReader<BrowseTagAssociation> delegate

@Autowired
DataSource dataSource

@PostConstruct
void init() {
delegate = new JdbcCursorItemReader<>(
driverSupportsAbsolute: true,
dataSource: dataSource,
sql: sql,
rowMapper: this.&mapRow as RowMapper<BrowseTagAssociation>)

delegate.afterPropertiesSet()
}

private String getSql() {
/*
Table {@link $Tables.BIO_CONCEPT_CODE}:
primary key: bio_concept_code_id
unique: (code_type_name, bio_concept_code)
index: code_type_name
Table {@link $Tables.AM_TAG_ITEM}:
primary key: (tag_template_id, tag_item_id)
*/

"""
(SELECT
f.folder_id,
f.folder_name,
f.folder_full_name,
f.folder_level,
f.folder_type,
f.folder_tag,
f.parent_id,
f.description as folder_description,
fp.folder_name as parent_name,
fp.folder_full_name as parent_full_name,
fp.folder_type as parent_type,
ati.tag_template_id,
ati.tag_item_id,
ati.tag_item_uid,
ati.display_name,
ati.tag_item_type,
ati.tag_item_subtype,
ati.code_type_name,
ati.required,
tv.value as value,
tv.value as description
FROM $Tables.FM_FOLDER f
INNER JOIN $Tables.FM_DATA_UID fuid
ON f.folder_id = fuid.fm_data_id
INNER JOIN $Tables.AM_TAG_ASSOCIATION ata
ON fuid.unique_id = ata.subject_uid
INNER JOIN $Tables.AM_TAG_ITEM ati
ON ata.tag_item_id = ati.tag_item_id
INNER JOIN $Tables.AM_DATA_UID tuid
ON ata.object_uid = tuid.unique_id
INNER JOIN $Tables.AM_TAG_VALUE tv
ON tuid.am_data_id = tv.tag_value_id
LEFT OUTER JOIN $Tables.FM_FOLDER fp
ON f.parent_id = fp.folder_id
WHERE ata.object_type = 'AM_TAG_VALUE')
UNION
(SELECT
f.folder_id,
f.folder_name,
f.folder_full_name,
f.folder_level,
f.folder_type,
f.folder_tag,
f.parent_id,
f.description as folder_description,
fp.folder_name as parent_name,
fp.folder_full_name as parent_full_name,
fp.folder_type as parent_type,
ati.tag_template_id,
ati.tag_item_id,
ati.tag_item_uid,
ati.display_name,
ati.tag_item_type,
ati.tag_item_subtype,
ati.code_type_name,
ati.required,
bcc.bio_concept_code as value,
bcc.code_description as description
FROM $Tables.FM_FOLDER f
INNER JOIN $Tables.FM_DATA_UID fuid
ON f.folder_id = fuid.fm_data_id
INNER JOIN $Tables.AM_TAG_ASSOCIATION ata
ON fuid.unique_id = ata.subject_uid
INNER JOIN $Tables.AM_TAG_ITEM ati
ON ata.tag_item_id = ati.tag_item_id
INNER JOIN $Tables.BIO_CONCEPT_CODE bcc
ON ata.object_uid = concat(bcc.code_type_name, ':', bcc.bio_concept_code)
LEFT OUTER JOIN $Tables.FM_FOLDER fp
ON f.parent_id = fp.folder_id
WHERE ata.object_type = 'BIO_CONCEPT_CODE')
"""
}

private final Map<String, BrowseFolderType> folderTypes = [:]

private BrowseFolderType getFolderType(ResultSet rs) {
String folderTypeName = rs.getString('folder_type')
BrowseFolderType folderType = folderTypes[folderTypeName]
if (folderType == null) {
folderType = new BrowseFolderType(
type: folderTypeName,
displayName: rs.getString('folder_description')
)
folderTypes[folderTypeName] = folderType
}
folderType
}

private final Map<Long, BrowseTagType> tagTypes = [:]

private BrowseTagType getTagType(ResultSet rs) {
Long tagItemId = rs.getLong('tag_item_id')
BrowseTagType tagType = tagTypes[tagItemId]
if (tagType == null) {
tagType = new BrowseTagType(
code: rs.getString('code_type_name'),
folderType: getFolderType(rs),
type: rs.getString('tag_item_type'),
subType: rs.getString('tag_item_subtype'),
displayName: rs.getString('display_name'),
required: rs.getBoolean('required')
)
tagTypes[tagItemId] = tagType
}
tagType
}

private final Map<String, BrowseFolder> folders = [:]

private BrowseFolder getFolder(ResultSet rs) {
String fullName = rs.getString('folder_full_name')
BrowseFolder folder = folders[fullName]
if (folder == null) {
folder = new BrowseFolder(
fullName: fullName,
id: rs.getLong('folder_id'),
name: rs.getString('folder_name'),
type: getFolderType(rs),
level: rs.getInt('folder_level'),
description: rs.getString('folder_description'),
parent: rs.getString('parent_name')
)
folders[fullName] = folder
}
folder
}

private BrowseTagValue getValue(ResultSet rs) {
new BrowseTagValue(
type: getTagType(rs),
value: rs.getString('value'),
description: rs.getString('description')
)
}

@SuppressWarnings('UnusedPrivateMethodParameter')
private BrowseTagAssociation mapRow(ResultSet rs, int rowNum) throws SQLException {
new BrowseTagAssociation(
folder: getFolder(rs),
value: getValue(rs)
)
}

}

0 comments on commit ffda9d1

Please sign in to comment.