-
Notifications
You must be signed in to change notification settings - Fork 120
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f07ac28
commit 6320d46
Showing
29 changed files
with
8,558 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Hadoop Filecrush | ||
Copyright 2010, 2011 m6d Media6degrees |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Hadoop filecrusher. | ||
|
||
Turn many small files into fewer larger ones. Also change from text to sequence and other compression options in one pass. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
<groupId>com.m6d</groupId> | ||
<artifactId>filecrush</artifactId> | ||
<name>M6D App - Filecrush</name> | ||
<version>2.2.2-SNAPSHOT</version> | ||
<description>filecrush utility</description> | ||
<packaging>jar</packaging> | ||
<properties> | ||
<hadoop.version>0.20.2</hadoop.version> | ||
<commons-cli.version>1.2</commons-cli.version> | ||
<commons-logging.version>1.0.4</commons-logging.version> | ||
<commons-lang.version>2.3</commons-lang.version> | ||
<commons-httpclient.version>3.0.1</commons-httpclient.version> | ||
<log4j.version>1.2.13</log4j.version> | ||
<slf4j.version>1.6.1</slf4j.version> | ||
<plexus-utils.version>1.1</plexus-utils.version> | ||
<junit.version>4.8.2</junit.version> | ||
<mockito.version>1.8.5</mockito.version> | ||
<hamcrest.version>1.2</hamcrest.version> | ||
<easymock.version>3.0</easymock.version> | ||
<jetty.version>6.1.14</jetty.version> | ||
</properties> | ||
|
||
<build> | ||
|
||
<pluginManagement> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-eclipse-plugin</artifactId> | ||
<version>2.5.1</version> | ||
<configuration> | ||
<projectNameTemplate>[artifactId]</projectNameTemplate> | ||
<wtpmanifest>true</wtpmanifest> | ||
<wtpapplicationxml>true</wtpapplicationxml> | ||
<wtpversion>1.5</wtpversion> | ||
<additionalBuildcommands> | ||
<buildcommand>org.eclipse.jdt.core.javabuilder</buildcommand> | ||
<buildcommand>org.maven.ide.eclipse.maven2Builder</buildcommand> | ||
</additionalBuildcommands> | ||
<additionalProjectnatures> | ||
<projectnature>org.eclipse.jdt.core.javanature</projectnature> | ||
<projectnature>org.maven.ide.eclipse.maven2Nature</projectnature> | ||
</additionalProjectnatures> | ||
</configuration> | ||
</plugin> | ||
</plugins> | ||
</pluginManagement> | ||
|
||
<plugins> | ||
|
||
<plugin> | ||
<artifactId>maven-compiler-plugin</artifactId> | ||
<configuration> | ||
<source>1.6</source> | ||
<target>1.6</target> | ||
</configuration> | ||
</plugin> | ||
|
||
<plugin> | ||
<artifactId>maven-jar-plugin</artifactId> | ||
<configuration> | ||
<archive> | ||
</archive> | ||
</configuration> | ||
<executions> | ||
<execution> | ||
<id>jar</id> | ||
<goals> | ||
<goal>jar</goal> | ||
</goals> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
|
||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>org.apache.hadoop</groupId> | ||
<artifactId>hadoop-core</artifactId> | ||
<version>${hadoop.version}</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>commons-logging</groupId> | ||
<artifactId>commons-logging</artifactId> | ||
<version>${commons-logging.version}</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>log4j</groupId> | ||
<artifactId>log4j</artifactId> | ||
<version>${log4j.version}</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>commons-httpclient</groupId> | ||
<artifactId>commons-httpclient</artifactId> | ||
<version>${commons-httpclient.version}</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>commons-lang</groupId> | ||
<artifactId>commons-lang</artifactId> | ||
<version>${commons-lang.version}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.codehaus.plexus</groupId> | ||
<artifactId>plexus-utils</artifactId> | ||
<version>${plexus-utils.version}</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>commons-cli</groupId> | ||
<artifactId>commons-cli</artifactId> | ||
<version>${commons-cli.version}</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
<!-- test dependencies --> | ||
<dependency> | ||
<groupId>org.mockito</groupId> | ||
<artifactId>mockito-all</artifactId> | ||
<version>${mockito.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.hamcrest</groupId> | ||
<artifactId>hamcrest-core</artifactId> | ||
<version>${hamcrest.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.hamcrest</groupId> | ||
<artifactId>hamcrest-library</artifactId> | ||
<version>${hamcrest.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.easymock</groupId> | ||
<artifactId>easymock</artifactId> | ||
<version>${easymock.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>junit</groupId> | ||
<artifactId>junit</artifactId> | ||
<version>${junit.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.apache.hadoop</groupId> | ||
<artifactId>hadoop-test</artifactId> | ||
<version>${hadoop.version}</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.slf4j</groupId> | ||
<artifactId>slf4j-api</artifactId> | ||
<version>${slf4j.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.slf4j</groupId> | ||
<artifactId>slf4j-log4j12</artifactId> | ||
<version>${slf4j.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<!-- Needed to run Hadoop cluster test cases --> | ||
<dependency> | ||
<groupId>org.mortbay.jetty</groupId> | ||
<artifactId>jetty</artifactId> | ||
<version>${jetty.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<!-- Needed to run Hadoop cluster test cases --> | ||
<dependency> | ||
<groupId>org.mortbay.jetty</groupId> | ||
<artifactId>jetty-util</artifactId> | ||
<version>${jetty.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
</dependencies> | ||
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
/* | ||
Copyright 2011 m6d.com | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
package com.m6d.filecrush.clean; | ||
|
||
import java.io.IOException; | ||
|
||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.conf.Configured; | ||
import org.apache.hadoop.fs.FileStatus; | ||
import org.apache.hadoop.fs.FileSystem; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.hadoop.util.Tool; | ||
import org.apache.hadoop.util.ToolRunner; | ||
|
||
@SuppressWarnings("deprecation") | ||
public class Clean extends Configured implements Tool{ | ||
|
||
public static final String TARGET_DIR="clean.target.dir"; | ||
public static final String CUTTOFF_MILLIS="clean.cutoff.millis"; | ||
public static final String TARGET_EXPR="clean.target.expr"; | ||
public static final String WARN_MODE="clean.warn.mode"; | ||
|
||
protected FileSystem fs; | ||
protected Configuration conf; | ||
protected long cutoff; | ||
|
||
public Clean(){ | ||
super(); | ||
} | ||
|
||
public static void main(String[] args) throws Exception { | ||
Clean clean = new Clean(); | ||
int exitCode = ToolRunner.run(new Configuration(),clean, args); | ||
System.exit(exitCode); | ||
} | ||
|
||
@Override | ||
public int run(String[] args) throws Exception { | ||
conf = getConf(); | ||
|
||
try { | ||
fs=FileSystem.get(getConf()); | ||
} catch (IOException e) { | ||
throw new RuntimeException("Could not open filesystem"); | ||
} | ||
int pre = preFlightCheck(); | ||
if (pre!=0){ | ||
return pre; | ||
} | ||
|
||
if (conf.get(CUTTOFF_MILLIS)!=null){ | ||
long now=System.currentTimeMillis(); | ||
long targetAge= Long.parseLong(conf.get(CUTTOFF_MILLIS)); | ||
cutoff=now-targetAge; | ||
} | ||
|
||
return cleanup (new Path(conf.get(TARGET_DIR))); | ||
|
||
} | ||
|
||
public void warnOrDelete(Path p) throws IOException{ | ||
if (conf.getBoolean(WARN_MODE, false)){ | ||
System.out.println("DELETE "+p); | ||
} else { | ||
if ( p.equals( new Path(conf.get(TARGET_DIR)) )){ | ||
|
||
} else { | ||
fs.delete(p); | ||
} | ||
} | ||
} | ||
|
||
|
||
public int cleanup(Path p){ | ||
try { | ||
if (fs.isFile(p)){ | ||
if (conf.get(TARGET_EXPR)!=null){ | ||
if (p.getName().matches(conf.get(TARGET_EXPR))){ | ||
warnOrDelete(p); | ||
} | ||
} | ||
if (conf.get(CUTTOFF_MILLIS)!=null){ | ||
if (fs.getFileStatus(p).getModificationTime() < cutoff ){ | ||
warnOrDelete(p); | ||
} | ||
} | ||
} | ||
|
||
if (fs.isDirectory(p)){ | ||
for (FileStatus stat: fs.listStatus(p)){ | ||
cleanup( stat.getPath() ); | ||
} | ||
if (fs.listStatus(p).length == 0){ | ||
if (conf.get(TARGET_EXPR)!=null){ | ||
if (p.getName().matches(conf.get(TARGET_EXPR))){ | ||
warnOrDelete(p); | ||
} | ||
} | ||
if (conf.get(CUTTOFF_MILLIS)!=null){ | ||
if (fs.getFileStatus(p).getModificationTime() < cutoff ){ | ||
warnOrDelete(p); | ||
} | ||
} | ||
} | ||
} | ||
} catch (IOException e) { | ||
System.out.println("exception "+e); | ||
return 7; | ||
} | ||
return 0; | ||
} | ||
|
||
public int preFlightCheck(){ | ||
Configuration conf = getConf(); | ||
if (conf.get(TARGET_DIR) == null){ | ||
System.err.println("You must specify a target.dir"); | ||
return 1; | ||
} | ||
if (conf.get(TARGET_DIR).equals("/")){ | ||
System.err.println("Will not clean / !!!!!!"); | ||
return 2; | ||
} | ||
if ( fs.getHomeDirectory().equals( new Path(conf.get(TARGET_DIR)) ) ){ | ||
System.err.println("Will not clean home directory"); | ||
return 3; | ||
} | ||
if (conf.get(CUTTOFF_MILLIS)==null && conf.get(TARGET_EXPR)==null){ | ||
System.err.println("You must specify "+CUTTOFF_MILLIS+" or "+TARGET_EXPR); | ||
return 4; | ||
} | ||
if (!(conf.get(CUTTOFF_MILLIS)==null) && !(conf.get(TARGET_EXPR)==null)){ | ||
System.err.println("You can not specify "+CUTTOFF_MILLIS+" and "+TARGET_EXPR); | ||
return 9; | ||
} | ||
if (conf.get(CUTTOFF_MILLIS)!=null) { | ||
try { | ||
Long.parseLong(conf.get(CUTTOFF_MILLIS)); | ||
} catch (NumberFormatException ex){ | ||
System.err.println(CUTTOFF_MILLIS+" was specified as "+conf.get(CUTTOFF_MILLIS)+" this is not a long integer"); | ||
return 15; | ||
} | ||
} | ||
try { | ||
if (! fs.exists( new Path(conf.get(TARGET_DIR)))) { | ||
System.err.println(conf.get(TARGET_DIR)+" does not exist"); | ||
} | ||
} catch (IOException e) { | ||
System.err.println("IOEXCEPTION"+ e); | ||
return 6; | ||
} | ||
return 0; | ||
} | ||
|
||
} |
Oops, something went wrong.