Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add function parseStream (#16)
* wip: parsing stream * wip: first example with generator * chore: allow to define CHUNK_SIZE in tests * chore: fix parseStream chunk size * wip: allow to join chunks * wip: try to slice the data * fix: throw error if no closing tag * feat: add parseStream * chore: test with node 18 * fix: only test parseStream with node 18 and greater
- Loading branch information
Showing
10 changed files
with
413 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -119,4 +119,6 @@ dist | |
|
||
lib | ||
lib-esm | ||
big.xml | ||
big.xml | ||
|
||
script/medline.xml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import { parseStream } from '../lib/index.js'; | ||
import { open } from 'fs/promises'; | ||
|
||
/* | ||
In order to test this script you should first build the package: `npm run prepack` | ||
And you also need a (big) file from medline called 'medline.xml' | ||
*/ | ||
|
||
async function doAll() { | ||
const file = await open(new URL('medline.xml', import.meta.url), 'r'); | ||
const stream = file.readableWebStream(); | ||
let i = 0; | ||
for await (const entry of parseStream(stream, 'PubmedArticle')) { | ||
console.log(entry); | ||
console.log(i++); | ||
} | ||
} | ||
|
||
doAll(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import { open } from 'fs/promises'; | ||
import { join } from 'path'; | ||
|
||
import { parseStream } from '../parseStream'; | ||
|
||
describe('parseStream', () => { | ||
it('simple case', async () => { | ||
// eslint-disable-next-line jest/no-if | ||
if (Number(process.versions.node.split('.')[0]) >= 18) { | ||
const file = await open(join(__dirname, 'assets/sample.xml'), 'r'); | ||
const CHUNK_SIZE = 10; | ||
const transformStream = new TransformStream({ | ||
start: function start() {}, // required. | ||
transform: async function transform(chunk, controller) { | ||
if (chunk === null) controller.terminate(); | ||
chunk = new Uint8Array(await chunk); | ||
for (let i = 0; i < chunk.length; i += CHUNK_SIZE) { | ||
controller.enqueue(chunk.slice(i, i + CHUNK_SIZE)); | ||
} | ||
}, | ||
}); | ||
|
||
const results = []; | ||
//@ts-expect-error feature is too new | ||
const readableStream = file.readableWebStream(); | ||
for await (let entry of parseStream( | ||
readableStream.pipeThrough(transformStream), | ||
'address', | ||
)) { | ||
results.push(entry); | ||
//console.log(entry); | ||
} | ||
expect(results).toMatchInlineSnapshot(` | ||
Array [ | ||
Object { | ||
"buildingNo": 1, | ||
"city": "New York", | ||
"flatNo": 1, | ||
"street": "Park Ave", | ||
}, | ||
Object { | ||
"buildingNo": 33, | ||
"city": "Boston", | ||
"flatNo": 24, | ||
"street": "Centre St", | ||
}, | ||
Object { | ||
"buildingNo": 1, | ||
"city": "Moscow", | ||
"flatNo": 2, | ||
"street": "Kahovka", | ||
}, | ||
Object { | ||
"buildingNo": 3, | ||
"city": "Tula", | ||
"flatNo": 78, | ||
"street": "Lenina", | ||
}, | ||
] | ||
`); | ||
} | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
export * from './parse'; | ||
export * from './parseStream'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import { | ||
defaultOptions, | ||
StreamParseOptions, | ||
} from './traversable/defaultOptions'; | ||
import { getTraversableGenerator } from './traversable/getTraversableGenerator'; | ||
import { traversableToJSON } from './traversableToJSON'; | ||
|
||
/** | ||
* Parse a web stream representing an XML and emit objects | ||
*/ | ||
export async function* parseStream( | ||
readableStream: ReadableStream, | ||
lookupTagName: string, | ||
options: StreamParseOptions = {}, | ||
) { | ||
options = { ...defaultOptions, ...options }; | ||
|
||
for await (const traversableEntry of getTraversableGenerator( | ||
readableStream, | ||
lookupTagName, | ||
options, | ||
)) { | ||
yield traversableToJSON(traversableEntry, options); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.