Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rq019 changes #197

Merged
merged 4 commits into from Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1,523 changes: 1,433 additions & 90 deletions projects/019 - Heald DARE/README.html

Large diffs are not rendered by default.

470 changes: 426 additions & 44 deletions projects/019 - Heald DARE/README.md

Large diffs are not rendered by default.

5,038 changes: 4,850 additions & 188 deletions projects/019 - Heald DARE/clinical-code-sets.csv

Large diffs are not rendered by default.

79 changes: 48 additions & 31 deletions projects/019 - Heald DARE/extraction-sql/bio-markers.sql

Large diffs are not rendered by default.

337 changes: 215 additions & 122 deletions projects/019 - Heald DARE/extraction-sql/cohort.sql

Large diffs are not rendered by default.

263 changes: 263 additions & 0 deletions projects/019 - Heald DARE/extraction-sql/medications.sql

Large diffs are not rendered by default.

61 changes: 50 additions & 11 deletions projects/019 - Heald DARE/scripts/main.js
@@ -1,4 +1,10 @@
// This file contains the logic for extracting data when an RDE
// is ready to generate the data for a study. When the SQL is compiled
// this file is copied to the "scripts" directory of the project. In
// this way we can update a single js file (this one) rather than having
// a separate file in each project directory
const fs = require('fs');
const readline = require('readline');
const mssql = require('mssql');
const msRestNodeAuth = require('@azure/ms-rest-nodeauth');
const chalk = require('chalk');
Expand Down Expand Up @@ -463,7 +469,7 @@ async function getPatientPseudoIds() {
return new Promise((resolve) => {
const request = new mssql.Request();
request.stream = true;
request.query('SELECT PK_Patient_Link_ID FROM RLS.vw_Patient_Link;');
request.query('SELECT PK_Patient_Link_ID FROM SharedCare.Patient_Link;');

request.on('row', (row) => {
// Emitted for each row in a recordset
Expand All @@ -476,41 +482,74 @@ async function getPatientPseudoIds() {
${err}`);
});

request.on('done', () => {
request.on('done', async () => {
// Always emitted as the last one
if (!store.shouldOverwrite) {
log('Loading the existing mapping file...');
let maxPseudoId = 0;
fs.readFileSync(PSEUDO_ID_FILE, 'utf8')
.split('\n')
.forEach((x) => {
if (x.trim().length < 2) return;
const [fkid, pseudoId] = x.split(',');

const fileStream = fs.createReadStream(PSEUDO_ID_FILE);

const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity, // treat \r\n as single line break
});

// Process one line at a time rather than reading entire file
// in one go as that was causing out of memory exceptions
for await (const line of rl) {
if (line.trim().length >= 2) {
const [fkid, pseudoId] = line.split(',');
store.pseudoLookup[fkid.trim()] = +pseudoId.trim();
maxPseudoId = Math.max(maxPseudoId, +pseudoId.trim());
});
}
}

log(`There are ${Object.keys(store.pseudoLookup).length} patient ids in the mapping file.`);
const newPatientIds = patientIds.filter((patientId) => !store.pseudoLookup[patientId]);
log(`There are ${newPatientIds.length} new patient ids from the database.`);
if (newPatientIds.length > 0) {
const newPatientIdRows = randomIdGenerator(maxPseudoId, newPatientIds);
log('Appending new patient ids to the lookup file...');
fs.writeFileSync(PSEUDO_ID_FILE, '\n' + newPatientIdRows.join('\n'), { flag: 'a' });
log(`New patient ids added to the pseudo id lookup file.`);
}
return resolve();
} else {
log(chalk.bold(`${patientIds.length} patient ids retrived.`));
const patientIdRows = randomIdGenerator(0, patientIds);
fs.writeFileSync(PSEUDO_ID_FILE, patientIdRows.join('\n'));
log(`Patient ids written to the pseudo id lookup file.`);
log('Writing new patient ids to the lookup file...');
const pseudoFileWriter = fs.createWriteStream(PSEUDO_ID_FILE);
pseudoFileWriter.on('error', function (err) {
throw err;
});
pseudoFileWriter.on('close', () => {
log(`Patient ids written to the pseudo id lookup file.`);
return resolve();
});
let i = 0;
function writeToFile() {
let ok = true;
while (ok && i < patientIdRows.length) {
ok = pseudoFileWriter.write(patientIdRows[i] + '\n');
i++;
}
if (i === patientIdRows.length) pseudoFileWriter.end();
else pseudoFileWriter.once('drain', writeToFile);
}
writeToFile();

// pseudoFileWriter.end();
// fs.writeFileSync(PSEUDO_ID_FILE, patientIdRows.join('\n'));
}
return resolve();
});
});
}

function randomIdGenerator(start = 0, ids) {
log('Randomly assigning ids...');
shuffleArray(ids);
log('Array shuffling complete');
return ids.map((id, i) => {
store.highestId = start + i + 1;
store.pseudoLookup[id] = store.highestId;
Expand Down
Expand Up @@ -20,9 +20,6 @@
-- We assume that a temporary table will exist as follows:
-- CREATE TABLE #DAREPatients (NhsNo NVARCHAR(30));

-- For each patient in the DARE cohort, this produces all biomarker readings
-- since 2018-01-01.

--Just want the output, not the messages
SET NOCOUNT ON;

Expand Down
67 changes: 67 additions & 0 deletions projects/019 - Heald DARE/template-sql/medications.template.sql
@@ -0,0 +1,67 @@
--┌─────────────────┐
--│ Medication file │
--└─────────────────┘

------------------------ RDE CHECK ---------------------
--------------------------------------------------------

-- Richard Williams - changes at 29th February 2024
-- PI requested:
-- - Longitudinal data for slgt2 inhibitors and metformin

-- Cohort is patients included in the DARE study. The below queries produce the data
-- that is required for each patient. However, a filter needs to be applied to only
-- provide this data for patients in the DARE study. Adrian Heald will provide GraphNet
-- with a list of NHS numbers, then they will execute the below but filtered to the list
-- of NHS numbers.

-- We assume that a temporary table will exist as follows:
-- CREATE TABLE #DAREPatients (NhsNo NVARCHAR(30));

--Just want the output, not the messages
SET NOCOUNT ON;

--Create DARECohort Table
SELECT SUBSTRING(REPLACE(NHSNo, ' ', ''),1,3) + ' ' + SUBSTRING(REPLACE(NHSNo, ' ', ''),4,3) + ' ' + SUBSTRING(REPLACE(NHSNo, ' ', ''),7,4) 'NHSNo' INTO #DAREPatients FROM [dbo].[DARECohort]

-- Get link ids of patients
SELECT DISTINCT FK_Patient_Link_ID INTO #Patients
FROM SharedCare.Patient p
INNER JOIN #DAREPatients dp ON dp.NhsNo = p.NhsNo;

-- Get lookup between nhs number and fk_patient_link_id
SELECT DISTINCT p.NhsNo, p.FK_Patient_Link_ID INTO #NhsNoToLinkId
FROM SharedCare.Patient p
INNER JOIN #DAREPatients dp ON dp.NhsNo = p.NhsNo;

--> CODESET sglt2-inhibitors:1 metformin:1

-- Create a table of medications for all the people in our cohort.
-- Just using SuppliedCode
IF OBJECT_ID('tempdb..#PatientMedicationData') IS NOT NULL DROP TABLE #PatientMedicationData;
SELECT
FK_Patient_Link_ID,
CAST(MedicationDate AS DATE) AS MedicationDate,
GPPracticeCode,
Dosage,
Units,
Quantity,
SuppliedCode
INTO #PatientMedicationData
FROM [SharedCare].GP_Medications
WHERE SuppliedCode IN (SELECT [Code] FROM #AllCodes)
AND FK_Patient_Link_ID IN (SELECT FK_Patient_Link_ID FROM #Patients);
--31s

-- Add indexes for future speed increase
DROP INDEX IF EXISTS medicationData1 ON #PatientMedicationData;
CREATE INDEX medicationData1 ON #PatientMedicationData (SuppliedCode) INCLUDE (FK_Patient_Link_ID, MedicationDate);
--15s

-- Final output
SELECT NhsNo, MedicationDate, a.description AS Medication, Units AS Method, Dosage As DosageInstruction, Quantity
FROM #PatientMedicationData m
LEFT OUTER JOIN #AllCodes a ON a.Code = SuppliedCode
INNER JOIN #NhsNoToLinkId n on n.FK_Patient_Link_ID = m.FK_Patient_Link_ID
WHERE SuppliedCode IN (SELECT [Code] FROM #AllCodes WHERE [Concept] IN ('sglt2-inhibitors','metformin') AND [Version] = 1)
ORDER BY NhsNo, MedicationDate;