Skip to content

Commit

Permalink
add fragmentation
Browse files Browse the repository at this point in the history
  • Loading branch information
DanielHindi committed Mar 18, 2016
1 parent 64eb1ef commit e0c98aa
Show file tree
Hide file tree
Showing 3 changed files with 233 additions and 45 deletions.
48 changes: 43 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@
Takes an amazon s3 bucket folder and zips it to a:
* Stream
* Local File
* Local File Fragments (zip multiple files broken up by max number of files or size)
* S3 File (ie uploads the zip back to s3)
* S3 File Fragments (upload multiple zip files broken up by max number of files or size)

###2. Differential zipping
It also allows you to do *differential* zips. Youc an save the key of the last file you zipped and then zip files that have been uploaded after the last zip.
It also allows you to do *differential* zips. You can save the key of the last file you zipped and then zip files that have been uploaded after the last zip.

###3. Filter Files to zip
###3. Fragmented Zips
If a zip file has the potential of getting too big. You can provide limits to breakup the compression into multiple zip files. You can limit based on file count or total size (pre zip)

###4. Filter Files to zip
You can filter out files you dont want zipped based on any criteria you need


Expand Down Expand Up @@ -53,6 +58,22 @@ zipper.zipToFile ("myBucketFolderName",'keyOfLastFileIZipped', './myLocalFile.zi
```


### Zip fragments to local file system with the filename pattern with a maximum file count
```
zipper.zipToFileFragments ('myBucketFolderName','keyOfLastFileIZipped', './myLocalFile.zip',maxNumberOfFilesPerZip, maxSizeInBytesPreZip, function(err,results){
if(err)
console.error(err);
else{
if(results.length > 0) {
var result = results[results.length - 1];
var lastFile = result.zippedFiles[result.zippedFiles.length - 1];
if (lastFile)
console.log('last key ', lastFile.Key); // next time start from here
}
}
});
```


### Zip to S3 file
```
Expand All @@ -68,6 +89,21 @@ zipper.zipToS3File ("myBucketFolderName",'keyOfLastFileIZipped', 'myS3File.zip',
});
```

### Zip fragments to S3
```
zipper.zipToS3FileFragments("11111111111111",'', 'test.zip',5,1024*1024,function(err, results){
if(err)
console.error(err);
else if(results.length > 0) {
var result = results[results.length - 1];
var lastFile = result.zippedFiles[result.zippedFiles.length - 1];
if (lastFile)
console.log('last key ', lastFile.Key); // next time start from here
}
});
```

##The Details
### `init`
Either from the construcor or from the `init(config)` function you can pass along the AWS config object
Expand Down Expand Up @@ -97,11 +133,13 @@ Override this function when you want to filter out certain files. The `file` par

### `getFiles: function(folderName,startKey,callback)`
Get a list of files in the bucket folder
* `foldeName` : the name of the folder in the bucket
* `folderName` : the name of the folder in the bucket
* `startKey`: optional. return files listed after this file key
* `callback(err,files)`: the function you want called when the list returns
* `callback(err,result)`: the function you want called when the list returns
* `err`: error object if it exists
* `files`: array of files found
* `result`:
* `files`: array of files found
* `totalFilesScanned`: total number of files scanned including filter out files from the `filterOutFiles` function

### `streamZipDataTo: function (pipe,folderName, startKey, callback)`
If you want to get a stream to pipe raw data to. For example if you wanted to stream the zip file directly to an http response
Expand Down
226 changes: 188 additions & 38 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,52 +33,90 @@ S3Zipper.prototype = {
,filterOutFiles: function(fileObj){
return fileObj;
}
,getFiles: function(folderName,startKey,callback){
,getFiles: function(folderName,startKey,maxFileCount,maxFileSize,callback){

var bucketParams = {
Bucket: this.awsConfig.bucket, /* required */
Delimiter: "/",
Prefix: folderName + "/"
//MaxKeys: 50
};
if (startKey)
bucketParams.Marker = startKey;

if(typeof(maxFileCount) == "function" && typeof(callback) == "undefined"){
callback=maxFileCount;
maxFileCount = null;
}
else if(maxFileCount > 0)
bucketParams.MaxKeys = maxFileCount;

var t = this;
this.s3bucket.listObjects(bucketParams, function (err, data) {
if (err) {
callback(err, null);
} else {
var result = [];
var totalSizeOfPassedFiles=0;
var lastScannedFile;
for (var i = 0; i < data.Contents.length; i++) {

var passedFile = t.filterOutFiles(data.Contents[i]);
if(passedFile)
result.push(passedFile);

if(passedFile) {


if(maxFileSize && maxFileSize < passedFile.Size) {
console.warn('Single file size exceeds max allowed size', data.Contents[i].Size, '>', maxFileSize, passedFile);
if(result.length == 0){
console.warn('Will zip large file on its own', passedFile.Key);
result.push(passedFile);
totalSizeOfPassedFiles += passedFile.Size;
}
else
break;
}
else if(maxFileSize && totalSizeOfPassedFiles + data.Contents[i].Size > maxFileSize) {
console.log('Hit max size limit. Split fragment');
break;
}
else {
result.push(passedFile);
totalSizeOfPassedFiles += passedFile.Size;
}
}

lastScannedFile = data.Contents[i];
}
callback(null, result);
callback(null, {files:result,totalFilesScanned :data.Contents.length,lastScannedFile:lastScannedFile} );
}
});
}
,streamZipDataTo: function (pipe,folderName, startKey, callback) {
,streamZipDataTo: function (pipe,folderName, startKey,maxFileCount,maxFileSize, callback) {
if (!folderName) {
console.error('folderName required');
return null;
}


if(typeof(startKey) == "function" && !callback ) {
callback = startKey;
startKey=null;
maxFileCount = null;
}else if(typeof(maxFileCount) == "function" && typeof(callback) == "undefined"){
callback=maxFileCount;
maxFileCount = null;
}

var zip = new archiver.create('zip');
if(pipe) zip.pipe(pipe);

var t= this;

this.getFiles(folderName,startKey,function(err,files){
this.getFiles(folderName,startKey,maxFileCount,maxFileSize,function(err,clearedFiles){
if(err)
console.error(err);
else{
var files = clearedFiles.files;
async.map(files,function(f,callback){
t.s3bucket.getObject({Bucket: t.awsConfig.bucket,Key: f.Key },function(err,data){
if(err)
Expand All @@ -99,13 +137,42 @@ S3Zipper.prototype = {
}, function(err,results){
zip.finalize();
zip.manifest = results;
callback(err,zip);
callback(err,{
zip: zip,
zippedFiles: results,
totalFilesScanned:clearedFiles.totalFilesScanned,
lastScannedFile:clearedFiles.lastScannedFile
});

});
}
});

}
,uploadLocalFileToS3: function(localFileName, s3ZipFileName,callback){
console.log('uploading ',s3ZipFileName,'...');
var readStream = fs.createReadStream(localFileName);//tempFile

this.s3bucket.upload({
Bucket: this.awsConfig.bucket
, Key: s3ZipFileName
, ContentType: "application/zip"
, Body: readStream
})
.on('httpUploadProgress', function (e) {
console.log('upload progress', Math.round(e.loaded / e.total * 100, 2), '%');

})
.send(function (err, result) {
readStream.close();
if (err)
callback(err);
else {
console.log('upload completed.');
callback(null,result);
}
});
}
//all these timeouts are because streams arent done writing when they say they are
,zipToS3File: function (s3FolderName,startKey,s3ZipFileName ,callback){
var t = this;
Expand All @@ -116,36 +183,16 @@ S3Zipper.prototype = {


this.zipToFile(s3FolderName,startKey,tempFile ,function(err,r){
console.log('uploading ',s3ZipFileName,'...');

if(r && r.manifest && r.manifest.length) {
var readStream = fs.createReadStream(tempFile);//tempFile

t.s3bucket.upload({
Bucket: t.awsConfig.bucket
, Key: s3ZipFileName
, ContentType: "application/zip"
, Body: readStream
})
.on('httpUploadProgress', function (e) {
console.log('upload progress', Math.round(e.loaded / e.total * 100, 0), '%');

})
.send(function (err, result) {
readStream.close();
if (err)
callback(err);
else {
console.log('zip upload completed.');

callback(null, {
zipFileETag: result.ETag,
zipFileLocation: result.Location,
zippedFiles: r.manifest
});
fs.unlink(tempFile);
}

if(r && r.zippedFiles && r.zippedFiles.length) {
t.uploadLocalFileToS3(tempFile,s3ZipFileName,function(err,result){
callback(null, {
zipFileETag: result.ETag,
zipFileLocation: result.Location,
zippedFiles: r.zippedFiles
});
fs.unlink(tempFile);
});
}
else {
console.log('no files zipped. nothing to upload');
Expand All @@ -159,17 +206,120 @@ S3Zipper.prototype = {
});


}
,zipToS3FileFragments: function (s3FolderName, startKey, s3ZipFileName, maxFileCount, maxFileSize , callback){
var t = this;
var tempFile = '__' + Date.now() + '.zip';

if(s3ZipFileName.indexOf('/') < 0 )
s3ZipFileName=s3FolderName + "/" + s3ZipFileName;

var count = 0;
this.zipToFileFragments(s3FolderName,startKey,tempFile,maxFileCount,maxFileSize,callback)
.onFileZipped = function(fragFileName){
var s3fn = s3ZipFileName.replace(".zip", "_" + count + ".zip" );
count++;
uploadFrag(s3fn,fragFileName);
};

function uploadFrag(s3FragName,localFragName){

t.uploadLocalFileToS3(localFragName, s3FragName, function (err, result) {
if(result){
console.log('remove temp file ',localFragName);
fs.unlink(localFragName);
}

});
}


}
,zipToFile: function (s3FolderName,startKey,zipFileName ,callback){
var fileStream = fs.createWriteStream(zipFileName);
this.streamZipDataTo(fileStream,s3FolderName,startKey,function(err,result){
this.streamZipDataTo(fileStream,s3FolderName,startKey,null,null,function(err,result){
setTimeout(function(){
callback(err,result);
fileStream.close();
},1000);
});
}
,zipToFileFragments: function (s3FolderName,startKey,zipFileName ,maxFileCount,maxFileSize,callback){


var events = {
onFileZipped:function(){}
};

var report ={
results:[]
,errors:[]
,lastKey:null
} ;

if(maxFileSize && maxFileSize < 1024)
console.warn ('Max File Size is really low. This may cause no files to be zipped, maxFileSize set to ',maxFileSize);

if(zipFileName.indexOf(".zip") < 0)
zipFileName+=".zip";

var t= this;

function garbageCollector(fileStream,result,fragFileName){

setTimeout(function () {

fileStream.close();
if (result.zippedFiles.length == 0) /// its an empty zip file get rid of it

fs.unlink(fragFileName);

else
events.onFileZipped(fragFileName,result);
},1000); /// TODO: Zip needs a bit more time to finishing writing. I'm sure there is a better way
}

var counter = 0;
function recursiveLoop(startKey,fragFileName ,callback) {
var fileStream = fs.createWriteStream(fragFileName);
t.streamZipDataTo(fileStream, s3FolderName, startKey, maxFileCount,maxFileSize, function (err, result) {

if (err)
report.errors.push(err);
else {
if (result.zippedFiles.length > 0) {
report.results.push(result);
report.lastKey = result.zippedFiles[result.zippedFiles.length - 1].Key;
}


/// you may have not zipped anything but you scanned files and there may be more
if(result.totalFilesScanned > 0)
recursiveLoop(result.lastScannedFile.Key, zipFileName.replace(".zip", "_" + counter + ".zip"), callback);
else ///you're done time to go home
callback(err, result);

counter++;
/// clean up your trash you filthy animal
garbageCollector(fileStream,result,fragFileName);

}

});
}

recursiveLoop(startKey,zipFileName ,function(){

if (report.errors.length > 0)
callback(report.errors, report.results);
else
callback(null, report.results);

});

return events;

}
};

module.exports = S3Zipper;
Loading

0 comments on commit e0c98aa

Please sign in to comment.