rw251 · rw251 · Mar 7, 2024 · Mar 7, 2024 · Mar 7, 2024 · Mar 7, 2024
diff --git a/projects/019 - Heald DARE/README.html b/projects/019 - Heald DARE/README.html
diff --git a/projects/019 - Heald DARE/README.md b/projects/019 - Heald DARE/README.md
diff --git a/projects/019 - Heald DARE/clinical-code-sets.csv b/projects/019 - Heald DARE/clinical-code-sets.csv
diff --git a/projects/019 - Heald DARE/extraction-sql/bio-markers.sql b/projects/019 - Heald DARE/extraction-sql/bio-markers.sql
diff --git a/projects/019 - Heald DARE/extraction-sql/cohort.sql b/projects/019 - Heald DARE/extraction-sql/cohort.sql
diff --git a/projects/019 - Heald DARE/scripts/main.js b/projects/019 - Heald DARE/scripts/main.js
@@ -1,4 +1,10 @@
+// This file contains the logic for extracting data when an RDE
+// is ready to generate the data for a study. When the SQL is compiled
+// this file is copied to the "scripts" directory of the project. In
+// this way we can update a single js file (this one) rather than having
+// a separate file in each project directory
 const fs = require('fs');
+const readline = require('readline');
 const mssql = require('mssql');
 const msRestNodeAuth = require('@azure/ms-rest-nodeauth');
 const chalk = require('chalk');
@@ -463,7 +469,7 @@ async function getPatientPseudoIds() {
   return new Promise((resolve) => {
     const request = new mssql.Request();
     request.stream = true;
-    request.query('SELECT PK_Patient_Link_ID FROM RLS.vw_Patient_Link;');
+    request.query('SELECT PK_Patient_Link_ID FROM SharedCare.Patient_Link;');
 
     request.on('row', (row) => {
       // Emitted for each row in a recordset
@@ -476,41 +482,74 @@ async function getPatientPseudoIds() {
 ${err}`);
     });
 
-    request.on('done', () => {
+    request.on('done', async () => {
       // Always emitted as the last one
       if (!store.shouldOverwrite) {
         log('Loading the existing mapping file...');
         let maxPseudoId = 0;
-        fs.readFileSync(PSEUDO_ID_FILE, 'utf8')
-          .split('\n')
-          .forEach((x) => {
-            if (x.trim().length < 2) return;
-            const [fkid, pseudoId] = x.split(',');
+
+        const fileStream = fs.createReadStream(PSEUDO_ID_FILE);
+
+        const rl = readline.createInterface({
+          input: fileStream,
+          crlfDelay: Infinity, // treat \r\n as single line break
+        });
+
+        // Process one line at a time rather than reading entire file
+        // in one go as that was causing out of memory exceptions
+        for await (const line of rl) {
+          if (line.trim().length >= 2) {
+            const [fkid, pseudoId] = line.split(',');
             store.pseudoLookup[fkid.trim()] = +pseudoId.trim();
             maxPseudoId = Math.max(maxPseudoId, +pseudoId.trim());
-          });
+          }
+        }
+
         log(`There are ${Object.keys(store.pseudoLookup).length} patient ids in the mapping file.`);
         const newPatientIds = patientIds.filter((patientId) => !store.pseudoLookup[patientId]);
         log(`There are ${newPatientIds.length} new patient ids from the database.`);
         if (newPatientIds.length > 0) {
           const newPatientIdRows = randomIdGenerator(maxPseudoId, newPatientIds);
+          log('Appending new patient ids to the lookup file...');
           fs.writeFileSync(PSEUDO_ID_FILE, '\n' + newPatientIdRows.join('\n'), { flag: 'a' });
           log(`New patient ids added to the pseudo id lookup file.`);
         }
+        return resolve();
       } else {
         log(chalk.bold(`${patientIds.length} patient ids retrived.`));
         const patientIdRows = randomIdGenerator(0, patientIds);
-        fs.writeFileSync(PSEUDO_ID_FILE, patientIdRows.join('\n'));
-        log(`Patient ids written to the pseudo id lookup file.`);
+        log('Writing new patient ids to the lookup file...');
+        const pseudoFileWriter = fs.createWriteStream(PSEUDO_ID_FILE);
+        pseudoFileWriter.on('error', function (err) {
+          throw err;
+        });
+        pseudoFileWriter.on('close', () => {
+          log(`Patient ids written to the pseudo id lookup file.`);
+          return resolve();
+        });
+        let i = 0;
+        function writeToFile() {
+          let ok = true;
+          while (ok && i < patientIdRows.length) {
+            ok = pseudoFileWriter.write(patientIdRows[i] + '\n');
+            i++;
+          }
+          if (i === patientIdRows.length) pseudoFileWriter.end();
+          else pseudoFileWriter.once('drain', writeToFile);
+        }
+        writeToFile();
+
+        // pseudoFileWriter.end();
+        // fs.writeFileSync(PSEUDO_ID_FILE, patientIdRows.join('\n'));
       }
-      return resolve();
     });
   });
 }
 
 function randomIdGenerator(start = 0, ids) {
   log('Randomly assigning ids...');
   shuffleArray(ids);
+  log('Array shuffling complete');
   return ids.map((id, i) => {
     store.highestId = start + i + 1;
     store.pseudoLookup[id] = store.highestId;

diff --git a/projects/019 - Heald DARE/template-sql/bio-markers.template.sql b/projects/019 - Heald DARE/template-sql/bio-markers.template.sql
@@ -11,6 +11,10 @@
 --		- Removing the date restriction in order to get all possible trend data
 --		-	Adding blood pressure and triglycerides
 
+-- Richard Williams - changes at 29th February 2024
+-- PI requested:
+--		- Longitudinal data for slgt2 inhibitors and metformin
+
 -- Cohort is patients included in the DARE study. The below queries produce the data
 -- that is required for each patient. However, a filter needs to be applied to only
 -- provide this data for patients in the DARE study. Adrian Heald will provide GraphNet
@@ -20,9 +24,6 @@
 -- We assume that a temporary table will exist as follows:
 -- CREATE TABLE #DAREPatients (NhsNo NVARCHAR(30));
 
--- For each patient in the DARE cohort, this produces all biomarker readings
--- since 2018-01-01.
-
 --Just want the output, not the messages
 SET NOCOUNT ON;
 
@@ -45,6 +46,7 @@ INNER JOIN #DAREPatients dp ON dp.NhsNo = p.NhsNo;
 
 --> CODESET bmi:2 hba1c:2 cholesterol:2 ldl-cholesterol:1 hdl-cholesterol:1 vitamin-d:1 testosterone:1 sex-hormone-binding-globulin:1 egfr:1
 --> CODESET diastolic-blood-pressure:1 systolic-blood-pressure:1 triglycerides:1 urinary-albumin-creatinine-ratio:1
+--> CODESET sglt2-inhibitors:1 metformin:1
 
 -- First lets get all the measurements in one place to improve query speed later on
 IF OBJECT_ID('tempdb..#biomarkerValues') IS NOT NULL DROP TABLE #biomarkerValues;
@@ -181,6 +183,23 @@ WHERE (
   FK_Reference_Coding_ID IN (SELECT FK_Reference_Coding_ID FROM #VersionedCodeSets WHERE (Concept = 'urinary-albumin-creatinine-ratio' AND [Version] = 1))
 );
 
+INSERT INTO #biomarkers
+SELECT FK_Patient_Link_ID, 'sglt2-inhibitor' AS Label, EventDate, [Value], Units
+FROM #biomarkerValues
+WHERE (
+	FK_Reference_SnomedCT_ID IN (SELECT FK_Reference_SnomedCT_ID FROM #VersionedSnomedSets WHERE (Concept = 'sglt2-inhibitors' AND [Version] = 1)) OR
+  FK_Reference_Coding_ID IN (SELECT FK_Reference_Coding_ID FROM #VersionedCodeSets WHERE (Concept = 'sglt2-inhibitors' AND [Version] = 1))
+);
+
+INSERT INTO #biomarkers
+SELECT FK_Patient_Link_ID, 'metformin' AS Label, EventDate, [Value], Units
+FROM #biomarkerValues
+WHERE (
+	FK_Reference_SnomedCT_ID IN (SELECT FK_Reference_SnomedCT_ID FROM #VersionedSnomedSets WHERE (Concept = 'metformin' AND [Version] = 1)) OR
+  FK_Reference_Coding_ID IN (SELECT FK_Reference_Coding_ID FROM #VersionedCodeSets WHERE (Concept = 'metformin' AND [Version] = 1))
+);
+
+
 -- Final output
 SELECT NhsNo, Label, EventDate, [Value], Units FROM #biomarkers b
 INNER JOIN #NhsNoToLinkId n on n.FK_Patient_Link_ID = b.FK_Patient_Link_ID