Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add --prune option to prevent walking subtrees #124

Merged
merged 1 commit into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions src/files/deno.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ await requestReadPermission()
// Use this file for testing file behavior
const testUrl = import.meta.url
const testPath = fromFileUrl(testUrl)
const testDir = dirname(testPath)
const testDir = dirname(testPath) // $REPO/src/files
const testFilename = basename(testPath)
const repoRoot = dirname(dirname(dirname(testPath)))
const ignore = new FileIgnoreRules([])
const prune = new FileIgnoreRules(['derivatives'], false)

Deno.test('Deno implementation of BIDSFile', async (t) => {
await t.step('implements basic file properties', () => {
Expand Down Expand Up @@ -53,7 +55,7 @@ Deno.test('Deno implementation of BIDSFile', async (t) => {
'strips BOM characters when reading UTF-8 via .text()',
async () => {
// BOM is invalid in JSON but shows up often from certain tools, so abstract handling it
const bomDir = join(testPath, '..', '..', 'tests')
const bomDir = join(repoRoot, 'src', 'tests')
const bomFilename = 'bom-utf8.json'
const file = new BIDSFileDeno(bomDir, bomFilename, ignore)
const text = await file.text()
Expand All @@ -75,4 +77,16 @@ Deno.test('Deno implementation of FileTree', async (t) => {
assert(testObj !== undefined)
assertEquals(testObj.path, `/${parent}/${testFilename}`)
})

await t.step('implements pruning', async () => {
const dsDir = join(repoRoot, 'tests', 'data', 'valid_dataset')
const derivFile =
'derivatives/fmriprep/sub-01/ses-01/func/sub-01_ses-01_task-rest_confounds.tsv.gz'

const fullTree = await readFileTree(dsDir)
assert(fullTree.get(derivFile))

const prunedTree = await readFileTree(dsDir, prune)
assert(!prunedTree.get(derivFile))
})
})
18 changes: 14 additions & 4 deletions src/files/deno.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,17 +117,22 @@ async function _readFileTree(
rootPath: string,
relativePath: string,
ignore: FileIgnoreRules,
prune: FileIgnoreRules,
parent?: FileTree,
): Promise<FileTree> {
await requestReadPermission()
const name = basename(relativePath)
const tree = new FileTree(relativePath, name, parent, ignore)

for await (const dirEntry of Deno.readDir(join(rootPath, relativePath))) {
const thisPath = posix.join(relativePath, dirEntry.name)
if (prune.test(thisPath)) {
continue
}
if (dirEntry.isFile || dirEntry.isSymlink) {
const file = new BIDSFileDeno(
rootPath,
posix.join(relativePath, dirEntry.name),
thisPath,
ignore,
)
file.parent = tree
Expand All @@ -136,8 +141,9 @@ async function _readFileTree(
if (dirEntry.isDirectory) {
const dirTree = await _readFileTree(
rootPath,
posix.join(relativePath, dirEntry.name),
thisPath,
ignore,
prune,
tree,
)
tree.directories.push(dirTree)
Expand All @@ -149,9 +155,13 @@ async function _readFileTree(
/**
* Read in the target directory structure and return a FileTree
*/
export async function readFileTree(rootPath: string): Promise<FileTree> {
export async function readFileTree(
rootPath: string,
prune?: FileIgnoreRules,
): Promise<FileTree> {
prune ??= new FileIgnoreRules([], false)
const ignore = new FileIgnoreRules([])
const tree = await _readFileTree(rootPath, '/', ignore)
const tree = await _readFileTree(rootPath, '/', ignore, prune)
const bidsignore = tree.get('.bidsignore')
if (bidsignore) {
try {
Expand Down
9 changes: 7 additions & 2 deletions src/files/ignore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,15 @@ const defaultIgnores = [
export class FileIgnoreRules {
#ignore: Ignore

constructor(config: string[]) {
constructor(
config: string[],
addDefaults: boolean = true,
) {
// @ts-expect-error
this.#ignore = ignore()
this.#ignore.add(defaultIgnores)
if (addDefaults) {
this.#ignore.add(defaultIgnores)
}
this.#ignore.add(config)
}

Expand Down
6 changes: 5 additions & 1 deletion src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import type { Config } from './setup/options.ts'
import * as colors from '@std/fmt/colors'
import { readFileTree } from './files/deno.ts'
import { fileListToTree } from './files/browser.ts'
import { FileIgnoreRules } from './files/ignore.ts'
import { resolve } from '@std/path'
import { validate } from './validators/bids.ts'
import { consoleFormat, resultToJSONStr } from './utils/output.ts'
Expand All @@ -21,7 +22,10 @@ export async function main(): Promise<ValidationResult> {
setupLogging(options.debug)

const absolutePath = resolve(options.datasetPath)
const tree = await readFileTree(absolutePath)
const prune = options.prune
? new FileIgnoreRules(['derivatives', 'sourcedata', 'code'], false)
: undefined
const tree = await readFileTree(absolutePath, prune)

const config = options.config ? JSON.parse(Deno.readTextFileSync(options.config)) as Config : {}

Expand Down
5 changes: 5 additions & 0 deletions src/setup/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ export type ValidatorOptions = {
recursive?: boolean
outfile?: string
blacklistModalities: string[]
prune?: boolean
}

const modalityType = new EnumType<string>(
Expand Down Expand Up @@ -72,6 +73,10 @@ export const validateCommand: Command<void, void, any, string[], void> = new Com
'-r, --recursive',
'Validate datasets found in derivatives directories in addition to root dataset',
)
.option(
'-p, --prune',
'Prune derivatives and sourcedata directories on load (disables -r and will underestimate dataset size)',
)
.option(
'-o, --outfile <file:string>',
'File to write validation results to.',
Expand Down
3 changes: 1 addition & 2 deletions src/tests/regression.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@ import { pathsToTree } from '../files/filetree.ts'
import { validate } from '../validators/bids.ts'
import type { BIDSFile } from '../types/filetree.ts'


Deno.test('Regression tests', async (t) => {
await t.step('Verify ignored files in scans.tsv do not trigger error', async () => {
const paths = [
'/dataset_description.json',
'/sub-01/anat/sub-01_T1w.nii.gz',
'/sub-01/anat/sub-01_CT.nii.gz', // unknown file
'/sub-01/anat/sub-01_CT.nii.gz', // unknown file
'/sub-01/sub-01_scans.tsv',
]
const ignore = ['*_CT.nii.gz']
Expand Down