Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rq019 changes #197

Merged
merged 4 commits into from Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1,671 changes: 1,507 additions & 164 deletions projects/019 - Heald DARE/README.html

Large diffs are not rendered by default.

530 changes: 456 additions & 74 deletions projects/019 - Heald DARE/README.md

Large diffs are not rendered by default.

16,042 changes: 10,352 additions & 5,690 deletions projects/019 - Heald DARE/clinical-code-sets.csv

Large diffs are not rendered by default.

131 changes: 100 additions & 31 deletions projects/019 - Heald DARE/extraction-sql/bio-markers.sql

Large diffs are not rendered by default.

337 changes: 215 additions & 122 deletions projects/019 - Heald DARE/extraction-sql/cohort.sql

Large diffs are not rendered by default.

61 changes: 50 additions & 11 deletions projects/019 - Heald DARE/scripts/main.js
@@ -1,4 +1,10 @@
// This file contains the logic for extracting data when an RDE
// is ready to generate the data for a study. When the SQL is compiled
// this file is copied to the "scripts" directory of the project. In
// this way we can update a single js file (this one) rather than having
// a separate file in each project directory
const fs = require('fs');
const readline = require('readline');
const mssql = require('mssql');
const msRestNodeAuth = require('@azure/ms-rest-nodeauth');
const chalk = require('chalk');
Expand Down Expand Up @@ -463,7 +469,7 @@ async function getPatientPseudoIds() {
return new Promise((resolve) => {
const request = new mssql.Request();
request.stream = true;
request.query('SELECT PK_Patient_Link_ID FROM RLS.vw_Patient_Link;');
request.query('SELECT PK_Patient_Link_ID FROM SharedCare.Patient_Link;');

request.on('row', (row) => {
// Emitted for each row in a recordset
Expand All @@ -476,41 +482,74 @@ async function getPatientPseudoIds() {
${err}`);
});

request.on('done', () => {
request.on('done', async () => {
// Always emitted as the last one
if (!store.shouldOverwrite) {
log('Loading the existing mapping file...');
let maxPseudoId = 0;
fs.readFileSync(PSEUDO_ID_FILE, 'utf8')
.split('\n')
.forEach((x) => {
if (x.trim().length < 2) return;
const [fkid, pseudoId] = x.split(',');

const fileStream = fs.createReadStream(PSEUDO_ID_FILE);

const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity, // treat \r\n as single line break
});

// Process one line at a time rather than reading entire file
// in one go as that was causing out of memory exceptions
for await (const line of rl) {
if (line.trim().length >= 2) {
const [fkid, pseudoId] = line.split(',');
store.pseudoLookup[fkid.trim()] = +pseudoId.trim();
maxPseudoId = Math.max(maxPseudoId, +pseudoId.trim());
});
}
}

log(`There are ${Object.keys(store.pseudoLookup).length} patient ids in the mapping file.`);
const newPatientIds = patientIds.filter((patientId) => !store.pseudoLookup[patientId]);
log(`There are ${newPatientIds.length} new patient ids from the database.`);
if (newPatientIds.length > 0) {
const newPatientIdRows = randomIdGenerator(maxPseudoId, newPatientIds);
log('Appending new patient ids to the lookup file...');
fs.writeFileSync(PSEUDO_ID_FILE, '\n' + newPatientIdRows.join('\n'), { flag: 'a' });
log(`New patient ids added to the pseudo id lookup file.`);
}
return resolve();
} else {
log(chalk.bold(`${patientIds.length} patient ids retrived.`));
const patientIdRows = randomIdGenerator(0, patientIds);
fs.writeFileSync(PSEUDO_ID_FILE, patientIdRows.join('\n'));
log(`Patient ids written to the pseudo id lookup file.`);
log('Writing new patient ids to the lookup file...');
const pseudoFileWriter = fs.createWriteStream(PSEUDO_ID_FILE);
pseudoFileWriter.on('error', function (err) {
throw err;
});
pseudoFileWriter.on('close', () => {
log(`Patient ids written to the pseudo id lookup file.`);
return resolve();
});
let i = 0;
function writeToFile() {
let ok = true;
while (ok && i < patientIdRows.length) {
ok = pseudoFileWriter.write(patientIdRows[i] + '\n');
i++;
}
if (i === patientIdRows.length) pseudoFileWriter.end();
else pseudoFileWriter.once('drain', writeToFile);
}
writeToFile();

// pseudoFileWriter.end();
// fs.writeFileSync(PSEUDO_ID_FILE, patientIdRows.join('\n'));
}
return resolve();
});
});
}

function randomIdGenerator(start = 0, ids) {
log('Randomly assigning ids...');
shuffleArray(ids);
log('Array shuffling complete');
return ids.map((id, i) => {
store.highestId = start + i + 1;
store.pseudoLookup[id] = store.highestId;
Expand Down
25 changes: 22 additions & 3 deletions projects/019 - Heald DARE/template-sql/bio-markers.template.sql
Expand Up @@ -11,6 +11,10 @@
-- - Removing the date restriction in order to get all possible trend data
-- - Adding blood pressure and triglycerides

-- Richard Williams - changes at 29th February 2024
-- PI requested:
-- - Longitudinal data for slgt2 inhibitors and metformin

-- Cohort is patients included in the DARE study. The below queries produce the data
-- that is required for each patient. However, a filter needs to be applied to only
-- provide this data for patients in the DARE study. Adrian Heald will provide GraphNet
Expand All @@ -20,9 +24,6 @@
-- We assume that a temporary table will exist as follows:
-- CREATE TABLE #DAREPatients (NhsNo NVARCHAR(30));

-- For each patient in the DARE cohort, this produces all biomarker readings
-- since 2018-01-01.

--Just want the output, not the messages
SET NOCOUNT ON;

Expand All @@ -45,6 +46,7 @@ INNER JOIN #DAREPatients dp ON dp.NhsNo = p.NhsNo;

--> CODESET bmi:2 hba1c:2 cholesterol:2 ldl-cholesterol:1 hdl-cholesterol:1 vitamin-d:1 testosterone:1 sex-hormone-binding-globulin:1 egfr:1
--> CODESET diastolic-blood-pressure:1 systolic-blood-pressure:1 triglycerides:1 urinary-albumin-creatinine-ratio:1
--> CODESET sglt2-inhibitors:1 metformin:1

-- First lets get all the measurements in one place to improve query speed later on
IF OBJECT_ID('tempdb..#biomarkerValues') IS NOT NULL DROP TABLE #biomarkerValues;
Expand Down Expand Up @@ -181,6 +183,23 @@ WHERE (
FK_Reference_Coding_ID IN (SELECT FK_Reference_Coding_ID FROM #VersionedCodeSets WHERE (Concept = 'urinary-albumin-creatinine-ratio' AND [Version] = 1))
);

INSERT INTO #biomarkers
SELECT FK_Patient_Link_ID, 'sglt2-inhibitor' AS Label, EventDate, [Value], Units
FROM #biomarkerValues
WHERE (
FK_Reference_SnomedCT_ID IN (SELECT FK_Reference_SnomedCT_ID FROM #VersionedSnomedSets WHERE (Concept = 'sglt2-inhibitors' AND [Version] = 1)) OR
FK_Reference_Coding_ID IN (SELECT FK_Reference_Coding_ID FROM #VersionedCodeSets WHERE (Concept = 'sglt2-inhibitors' AND [Version] = 1))
);

INSERT INTO #biomarkers
SELECT FK_Patient_Link_ID, 'metformin' AS Label, EventDate, [Value], Units
FROM #biomarkerValues
WHERE (
FK_Reference_SnomedCT_ID IN (SELECT FK_Reference_SnomedCT_ID FROM #VersionedSnomedSets WHERE (Concept = 'metformin' AND [Version] = 1)) OR
FK_Reference_Coding_ID IN (SELECT FK_Reference_Coding_ID FROM #VersionedCodeSets WHERE (Concept = 'metformin' AND [Version] = 1))
);


-- Final output
SELECT NhsNo, Label, EventDate, [Value], Units FROM #biomarkers b
INNER JOIN #NhsNoToLinkId n on n.FK_Patient_Link_ID = b.FK_Patient_Link_ID
Expand Down