Skip to content

Commit

Permalink
Refactor handlePart due to issues with PassThrough streams
Browse files Browse the repository at this point in the history
Our approach of using PassThrough streams created issues elsewhere. We therefore refactor to manually collect the first chunk of data in order to detect the file type based on this.
  • Loading branch information
StuAA78 committed Oct 17, 2024
1 parent 60e3781 commit a0be743
Showing 1 changed file with 67 additions and 31 deletions.
98 changes: 67 additions & 31 deletions packages/webapp/src/utils/upload.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import path from 'path'
import { fileTypeFromStream } from 'file-type'
import { PassThrough } from 'stream'
import { fileTypeFromBuffer } from 'file-type'
import { uploadStreamAndAwaitScan, deleteBlobFromContainers } from './azure-storage.js'
import multiparty from 'multiparty'
import constants from './constants.js'
Expand Down Expand Up @@ -62,48 +61,85 @@ const uploadFile = async (logger, request, config) => {
}

const handlePart = async (logger, part, config, uploadResult) => {
const fileSizeInBytes = part.byteCount
const fileSize = parseFloat(parseFloat(part.byteCount / 1024 / 1024).toFixed(config.fileValidationConfig?.maximumDecimalPlaces || 2))
const fileSizeInBytes = part.byteCount || 0
const filename = part.filename

// In order to detect the file type we create two passthrough streams to "clone" the original stream: one to detect
// the file type and one to upload the part. This is needed to prevent the file type check from consuming the entire
// stream immediately.
const fileTypePart = new PassThrough()
const uploadPart = new PassThrough()
part.pipe(fileTypePart)
part.pipe(uploadPart)
const detectedFileType = await fileTypeFromStream(fileTypePart)

// Delay throwing errors until the form is closed.
if (!filename) {
uploadResult.errorMessage = constants.uploadErrors.noFile
part.resume()
} else if (isXSSVulnerable(filename)) {
throw new Error(constants.uploadErrors.uploadFailure)
} else if (config.fileValidationConfig?.fileExt && !config.fileValidationConfig.fileExt.includes(path.extname(filename.toLowerCase()))) {
uploadResult.errorMessage = constants.uploadErrors.unsupportedFileExt
part.resume()
} else if (config.checkFileType && config.fileValidationConfig?.fileType && !config.fileValidationConfig.fileType.includes(detectedFileType.mime)) {
uploadResult.errorMessage = constants.uploadErrors.invalidFileType
return
}

if (isXSSVulnerable(filename)) {
uploadResult.errorMessage = constants.uploadErrors.uploadFailure
part.resume()
} else if (fileSize * 100 === 0) {
return
}

if (fileSizeInBytes === 0) {
uploadResult.errorMessage = constants.uploadErrors.emptyFile
part.resume()
} else if (fileSizeInBytes > config.fileValidationConfig.maxFileSize) {
uploadResult.errorMessage = constants.uploadErrors.maximumFileSizeExceeded
part.resume()
} else {
logger.info(`${new Date().toUTCString()} Uploading ${filename}`)
uploadResult.fileSize = fileSizeInBytes
uploadResult.filename = filename
uploadResult.fileType = detectedFileType.mime
return
}

// Capture the initial chunk of the stream for file type detection purposes while it continues to upload
const chunks = []
let initialChunk

// Asynchronously collect the initial chunk while allowing the stream to flow
const chunkPromise = new Promise((resolve, reject) => {
part.on('data', (chunk) => {
// Only the first chunk is collected as this should be enough for file type detection
if (chunks.length === 0) {
chunks.push(chunk)
resolve(Buffer.concat(chunks))
}
})

part.on('error', reject)
part.on('end', () => {
// Handle edge case where no data is present
if (chunks.length === 0) {
resolve(Buffer.concat(chunks))
}
})
})

try {
// Start file upload stream while we process the first chunk
const uploadConfig = JSON.parse(JSON.stringify(config))
uploadConfig.blobConfig.blobName = `${config.blobConfig.blobName}${filename}`
const tags = await uploadStreamAndAwaitScan(logger, uploadConfig, uploadPart)

// Upload stream happens while chunk is processed
const uploadPromise = uploadStreamAndAwaitScan(logger, uploadConfig, part)

// Await chunk processing to detect file type
initialChunk = await chunkPromise
const detectedFileType = await fileTypeFromBuffer(initialChunk)
const validFileType = config.checkFileType && config.fileValidationConfig?.fileType && !config.fileValidationConfig.fileType.includes(detectedFileType.mime)
if (!detectedFileType || validFileType) {
uploadResult.errorMessage = constants.uploadErrors.invalidFileType
part.resume()
return
}

const validFileExtension = config.fileValidationConfig?.fileExt && !config.fileValidationConfig.fileExt.includes(path.extname(filename.toLowerCase()))
if (validFileExtension) {
uploadResult.errorMessage = constants.uploadErrors.unsupportedFileExt
part.resume()
return
}

// Ensure file continues to be uploaded
const tags = await uploadPromise
uploadResult.tags = tags
uploadResult.config = uploadConfig
uploadResult.fileSize = fileSizeInBytes
uploadResult.filename = filename
uploadResult.fileType = detectedFileType.mime
} catch (err) {
logger.error(`Upload failed for ${filename}: ${err.message}`)
uploadResult.errorMessage = constants.uploadErrors.uploadFailure
}
}

Expand Down

0 comments on commit a0be743

Please sign in to comment.