diff --git a/deno.json b/deno.json index 47207157..bae15e16 100644 --- a/deno.json +++ b/deno.json @@ -32,6 +32,7 @@ "@cliffy/command": "jsr:@effigies/cliffy-command@1.0.0-dev.8", "@cliffy/table": "jsr:@effigies/cliffy-table@1.0.0-dev.5", "@hed/validator": "npm:hed-validator@4.0.1", + "@hyparquet": "npm:hyparquet@1.12.0", "@ignore": "npm:ignore@7.0.3", "@libs/xml": "jsr:@libs/xml@6.0.4", "@mango/nifti": "npm:@bids/nifti-reader-js@0.6.9", diff --git a/src/files/browser.ts b/src/files/browser.ts index a8a7ae7b..f8f70e17 100644 --- a/src/files/browser.ts +++ b/src/files/browser.ts @@ -29,6 +29,10 @@ export class BIDSFileBrowser implements BIDSFile { return this.#file.size } + get size(): number { + return this.#file.size + } + get stream(): ReadableStream { return this.#file.stream() } @@ -44,6 +48,12 @@ export class BIDSFileBrowser implements BIDSFile { async readBytes(size: number, offset = 0): Promise> { return new Uint8Array(await this.#file.slice(offset, size).arrayBuffer()) } + + async slice(start: number, end: number): Promise { + return (await this.readBytes(end - start, start)).buffer + } + + /** } /** diff --git a/src/files/deno.ts b/src/files/deno.ts index 65cc82d3..72871635 100644 --- a/src/files/deno.ts +++ b/src/files/deno.ts @@ -45,6 +45,10 @@ export class BIDSFileDeno implements BIDSFile { return this.#fileInfo ? this.#fileInfo.size : -1 } + get byteLength(): number { + return this.#fileInfo ? this.#fileInfo.size : -1 + } + get stream(): ReadableStream { const handle = this.#openHandle() return handle.readable @@ -87,6 +91,9 @@ export class BIDSFileDeno implements BIDSFile { return buf.subarray(0, nbytes) } + async slice(start: number, end: number): Promise { + return (await this.readBytes(end - start, start)).buffer + } /** * Return a Deno file handle */ diff --git a/src/files/parquet.test.ts b/src/files/parquet.test.ts new file mode 100644 index 00000000..d272e54e --- /dev/null +++ b/src/files/parquet.test.ts @@ -0,0 +1,18 @@ +import { assertEquals } from '@std/assert' +import { FileIgnoreRules } from './ignore.ts' +import { BIDSFileDeno } from './deno.ts' + +import { loadParquet } from './parquet.ts' + +Deno.test('Test loading parquet file', async (t) => { + const ignore = new FileIgnoreRules([]) + await t.step('Load participants.parquet', async () => { + const path = 'participants.parquet' + const root = './tests/data/' + const file = new BIDSFileDeno(root, path, ignore) + const participantsMap = await loadParquet(file) + const keys = Object.keys(participantsMap) + assertEquals(keys.length, 3) + keys.map(key => assertEquals(participantsMap.get(key)?.length, 16)) + }) +}) diff --git a/src/files/parquet.ts b/src/files/parquet.ts new file mode 100644 index 00000000..0c43a6c3 --- /dev/null +++ b/src/files/parquet.ts @@ -0,0 +1,15 @@ +import { asyncBufferFromFile, parquetRead, ParquetReadOptions, ColumnData } from '@hyparquet' +import type { BIDSFile } from '../types/filetree.ts' +import { ColumnsMap } from '../types/columns.ts' +import { createUTF8Stream } from './streams.ts' + +export async function loadParquet(file: BIDSFile, maxRows: number = -1): Promise { + let columnsMap = new ColumnsMap() + const readOpts: ParquetReadOptions = { + file: file, + // @ts-expect-error + onChunk: (data) => columnsMap[data.columnName] = data.columnData.map(entry => String(entry)), + } + await parquetRead(readOpts) + return columnsMap +} diff --git a/src/types/filetree.ts b/src/types/filetree.ts index d1e94f24..0e84ba9f 100644 --- a/src/types/filetree.ts +++ b/src/types/filetree.ts @@ -15,6 +15,10 @@ export interface BIDSFile { text: () => Promise // Read a range of bytes readBytes: (size: number, offset?: number) => Promise> + // Alternative to readBytes used for ArrayBuffer compatibility + slice: (start: number, end: number) => Promise + // Alternative to size used for ArrayBuffer compatibility + byteLength: number // Access the parent directory parent: FileTree // File has been viewed diff --git a/tests/data/participants.parquet b/tests/data/participants.parquet new file mode 100644 index 00000000..75a54019 Binary files /dev/null and b/tests/data/participants.parquet differ