Skip to content

Commit

Permalink
NUTCH-3014 Standardize Job names (apache#789)
Browse files Browse the repository at this point in the history
  • Loading branch information
lewismc authored Nov 3, 2023
1 parent 792ed28 commit bbf0867
Show file tree
Hide file tree
Showing 32 changed files with 74 additions and 117 deletions.
3 changes: 1 addition & 2 deletions src/java/org/apache/nutch/crawl/CrawlDb.java
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,7 @@ public static Job createJob(Configuration config, Path crawlDb)
Path newCrawlDb = new Path(crawlDb, Integer.toString(new Random()
.nextInt(Integer.MAX_VALUE)));

Job job = NutchJob.getInstance(config);
job.setJobName("crawldb " + crawlDb);
Job job = Job.getInstance(config, "Nutch CrawlDb: " + crawlDb);

Path current = new Path(crawlDb, CURRENT_NAME);
if (current.getFileSystem(job.getConfiguration()).exists(current)) {
Expand Down
3 changes: 1 addition & 2 deletions src/java/org/apache/nutch/crawl/CrawlDbMerger.java
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,8 @@ public static Job createMergeJob(Configuration conf, Path output,
Path newCrawlDb = new Path(output,
"merge-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

Job job = NutchJob.getInstance(conf);
Job job = Job.getInstance(conf, "Nutch CrawlDbMerger: " + output);
conf = job.getConfiguration();
job.setJobName("crawldb merge " + output);

job.setInputFormatClass(SequenceFileInputFormat.class);

Expand Down
20 changes: 7 additions & 13 deletions src/java/org/apache/nutch/crawl/CrawlDbReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -564,9 +564,8 @@ private TreeMap<String, Writable> processStatJobHelper(String crawlDb,
throws IOException, InterruptedException, ClassNotFoundException {
Path tmpFolder = new Path(crawlDb, "stat_tmp" + System.currentTimeMillis());

Job job = NutchJob.getInstance(config);
Job job = Job.getInstance(config, "Nutch CrawlDbReader: " + crawlDb);
config = job.getConfiguration();
job.setJobName("stats " + crawlDb);
config.setBoolean("db.reader.stats.sort", sort);

FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));
Expand Down Expand Up @@ -812,7 +811,7 @@ public CrawlDatum get(String crawlDb, String url, Configuration config)

@Override
protected int process(String line, StringBuilder output) throws Exception {
Job job = NutchJob.getInstance(getConf());
Job job = Job.getInstance(getConf(), "Nutch CrawlDbReader: process " + crawlDb);
Configuration config = job.getConfiguration();
readUrl(this.crawlDb, line, config, output);
return 0;
Expand All @@ -839,8 +838,7 @@ public void processDumpJob(String crawlDb, String output,

Path outFolder = new Path(output);

Job job = NutchJob.getInstance(config);
job.setJobName("dump " + crawlDb);
Job job = Job.getInstance(config, "Nutch CrawlDbReader: dump " + crawlDb);
Configuration jobConf = job.getConfiguration();

FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));
Expand Down Expand Up @@ -958,18 +956,15 @@ public void processTopNJob(String crawlDb, long topN, float min,
String output, Configuration config)
throws IOException, ClassNotFoundException, InterruptedException {

if (LOG.isInfoEnabled()) {
LOG.info("CrawlDb topN: starting (topN=" + topN + ", min=" + min + ")");
LOG.info("CrawlDb db: {}", crawlDb);
}
LOG.info("CrawlDb topN: starting (topN=" + topN + ", min=" + min + ")");
LOG.info("CrawlDb db: {}", crawlDb);

Path outFolder = new Path(output);
Path tempDir = new Path(
config.get("mapreduce.cluster.temp.dir", ".") + "/readdb-topN-temp-"
+ Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

Job job = NutchJob.getInstance(config);
job.setJobName("topN prepare " + crawlDb);
Job job = Job.getInstance(config, "Nutch CrawlDbReader: topN prepare " + crawlDb);
FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));
job.setInputFormatClass(SequenceFileInputFormat.class);

Expand Down Expand Up @@ -1000,8 +995,7 @@ public void processTopNJob(String crawlDb, long topN, float min,
}

LOG.info("CrawlDb topN: collecting topN scores.");
job = NutchJob.getInstance(config);
job.setJobName("topN collect " + crawlDb);
job = Job.getInstance(config, "Nutch CrawlDbReader: topN collect " + crawlDb);
job.getConfiguration().setLong("db.reader.topn", topN);

FileInputFormat.addInputPath(job, tempDir);
Expand Down
3 changes: 1 addition & 2 deletions src/java/org/apache/nutch/crawl/DeduplicationJob.java
Original file line number Diff line number Diff line change
Expand Up @@ -305,9 +305,8 @@ public int run(String[] args) throws IOException {
Path tempDir = new Path(crawlDb, "dedup-temp-"
+ Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

Job job = NutchJob.getInstance(getConf());
Job job = Job.getInstance(getConf(), "Nutch DeduplicationJob: " + crawlDb);
Configuration conf = job.getConfiguration();
job.setJobName("Deduplication on " + crawlDb);
conf.set(DEDUPLICATION_GROUP_MODE, group);
conf.set(DEDUPLICATION_COMPARE_ORDER, compareOrder);
job.setJarByClass(DeduplicationJob.class);
Expand Down
13 changes: 5 additions & 8 deletions src/java/org/apache/nutch/crawl/Generator.java
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ private JexlContext createContext(HostDatum datum) {
public void setup(Context context) throws IOException {
conf = context.getConfiguration();
mos = new MultipleOutputs<FloatWritable, SelectorEntry>(context);
Job job = Job.getInstance(conf);
Job job = Job.getInstance(conf, "Nutch Generator.SelectorReducer");
limit = conf.getLong(GENERATOR_TOP_N, Long.MAX_VALUE)
/ job.getNumReduceTasks();
maxNumSegments = conf.getInt(GENERATOR_MAX_NUM_SEGMENTS, 1);
Expand Down Expand Up @@ -695,7 +695,7 @@ public Path[] generate(Path dbDir, Path segments, int numLists, long topN,
long curTime)
throws IOException, InterruptedException, ClassNotFoundException {

Job job = NutchJob.getInstance(getConf());
Job job = Job.getInstance(getConf(), "Nutch Generator: generate from " + dbDir);
Configuration conf = job.getConfiguration();
boolean filter = conf.getBoolean(GENERATOR_FILTER, true);
boolean normalise = conf.getBoolean(GENERATOR_NORMALISE, true);
Expand Down Expand Up @@ -839,8 +839,7 @@ public Path[] generate(Path dbDir, Path segments, int numLists, long topN,
}

// map to inverted subset due for fetch, sort by score
Job job = NutchJob.getInstance(getConf());
job.setJobName("generate: select from " + dbDir);
Job job = Job.getInstance(getConf(), "Nutch Generator: generate from " + dbDir);
Configuration conf = job.getConfiguration();
if (numLists == -1) {
/* for politeness create exactly one partition per fetch task */
Expand Down Expand Up @@ -942,8 +941,7 @@ public Path[] generate(Path dbDir, Path segments, int numLists, long topN,
Path tempDir2 = new Path(dbDir,
"generate-temp-" + java.util.UUID.randomUUID().toString());

job = NutchJob.getInstance(getConf());
job.setJobName("generate: updatedb " + dbDir);
job = Job.getInstance(getConf(), "Nutch Generator: updatedb " + dbDir);
job.getConfiguration().setLong(Nutch.GENERATE_TIME_KEY, generateTime);
for (Path segmpaths : generatedSegments) {
Path subGenDir = new Path(segmpaths, CrawlDatum.GENERATE_DIR_NAME);
Expand Down Expand Up @@ -1001,8 +999,7 @@ private Path partitionSegment(Path segmentsDir, Path inputDir, int numLists)

LOG.info("Generator: segment: " + segment);

Job job = NutchJob.getInstance(getConf());
job.setJobName("generate: partition " + segment);
Job job = Job.getInstance(getConf(), "Nutch Generator: partition segment " + segment);
Configuration conf = job.getConfiguration();
conf.setInt("partition.url.seed", RANDOM.nextInt());

Expand Down
2 changes: 1 addition & 1 deletion src/java/org/apache/nutch/crawl/Injector.java
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ public void inject(Path crawlDb, Path urlDir, boolean overwrite,
Path lock = CrawlDb.lock(conf, crawlDb, false);

// configure job
Job job = Job.getInstance(conf, "inject " + urlDir);
Job job = Job.getInstance(conf, "Nutch Injector: " + urlDir);
job.setJarByClass(Injector.class);
job.setMapperClass(InjectMapper.class);
job.setReducerClass(InjectReducer.class);
Expand Down
3 changes: 1 addition & 2 deletions src/java/org/apache/nutch/crawl/LinkDb.java
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,8 @@ private static Job createJob(Configuration config, Path linkDb,
Path newLinkDb = new Path(linkDb,
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

Job job = NutchJob.getInstance(config);
Job job = Job.getInstance(config, "Nutch LinkDb: " + linkDb);
Configuration conf = job.getConfiguration();
job.setJobName("linkdb " + linkDb);

job.setInputFormatClass(SequenceFileInputFormat.class);

Expand Down
3 changes: 1 addition & 2 deletions src/java/org/apache/nutch/crawl/LinkDbMerger.java
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,7 @@ public static Job createMergeJob(Configuration config, Path linkDb,
Path newLinkDb = new Path(linkDb,
"merge-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

Job job = NutchJob.getInstance(config);
job.setJobName("linkdb merge " + linkDb);
Job job = Job.getInstance(config, "Nutch LinkDbMerger: " + linkDb);

Configuration conf = job.getConfiguration();
job.setInputFormatClass(SequenceFileInputFormat.class);
Expand Down
3 changes: 1 addition & 2 deletions src/java/org/apache/nutch/crawl/LinkDbReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,7 @@ public void processDumpJob(String linkdb, String output, String regex)

Path outFolder = new Path(output);

Job job = NutchJob.getInstance(getConf());
job.setJobName("read " + linkdb);
Job job = Job.getInstance(getConf(), "Nutch LinkDbReader: " + linkdb);
job.setJarByClass(LinkDbReader.class);

Configuration conf = job.getConfiguration();
Expand Down
2 changes: 1 addition & 1 deletion src/java/org/apache/nutch/fetcher/Fetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ public void fetch(Path segment, int threads) throws IOException,
totalOutlinksToFollow);
}

Job job = NutchJob.getInstance(getConf());
Job job = Job.getInstance(getConf(), "Nutch Fetcher: " + segment.getName());
job.setJobName("FetchData");
Configuration conf = job.getConfiguration();

Expand Down
3 changes: 1 addition & 2 deletions src/java/org/apache/nutch/hostdb/ReadHostDb.java
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ private void readHostDb(Path hostDb, Path output, boolean dumpHomepages, boolean
conf.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
conf.set("mapreduce.output.textoutputformat.separator", "\t");

Job job = Job.getInstance(conf);
job.setJobName("ReadHostDb");
Job job = Job.getInstance(conf, "Nutch ReadHostDb");
job.setJarByClass(ReadHostDb.class);

FileInputFormat.addInputPath(job, new Path(hostDb, "current"));
Expand Down
3 changes: 1 addition & 2 deletions src/java/org/apache/nutch/hostdb/UpdateHostDb.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,10 @@ private void updateHostDb(Path hostDb, Path crawlDb, Path topHosts,
stopWatch.start();
LOG.info("UpdateHostDb: starting");

Job job = NutchJob.getInstance(getConf());
Job job = Job.getInstance(getConf(), "Nutch UpdateHostDb");
Configuration conf = job.getConfiguration();
boolean preserveBackup = conf.getBoolean("db.preserve.backup", true);
job.setJarByClass(UpdateHostDb.class);
job.setJobName("UpdateHostDb");

FileSystem fs = hostDb.getFileSystem(conf);
Path old = new Path(hostDb, "old");
Expand Down
4 changes: 1 addition & 3 deletions src/java/org/apache/nutch/indexer/CleaningJob.java
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public void delete(String crawldb, boolean noCommit)
stopWatch.start();
LOG.info("CleaningJob: starting");

Job job = NutchJob.getInstance(getConf());
Job job = Job.getInstance(getConf(), "Nutch CleaningJob: " + crawldb);
Configuration conf = job.getConfiguration();

FileInputFormat.addInputPath(job, new Path(crawldb, CrawlDb.CURRENT_NAME));
Expand All @@ -157,8 +157,6 @@ public void delete(String crawldb, boolean noCommit)
job.setReducerClass(DeleterReducer.class);
job.setJarByClass(CleaningJob.class);

job.setJobName("CleaningJob");

// need to expicitely allow deletions
conf.setBoolean(IndexerMapReduce.INDEXER_DELETE, true);

Expand Down
3 changes: 2 additions & 1 deletion src/java/org/apache/nutch/indexer/IndexingJob.java
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ public void index(Path crawlDb, Path linkDb, List<Path> segments,
stopWatch.start();
LOG.info("Indexer: starting");

final Job job = NutchJob.getInstance(getConf());
final Job job = Job.getInstance(getConf(),
"Nutch IndexingJob: crawldb: " + crawlDb + " segment(s): " + segments);
job.setJobName("Indexer");
Configuration conf = job.getConfiguration();

Expand Down
3 changes: 1 addition & 2 deletions src/java/org/apache/nutch/parse/ParseSegment.java
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,7 @@ public void parse(Path segment) throws IOException,
LOG.info("ParseSegment: starting");
LOG.info("ParseSegment: segment: {}", segment);

Job job = NutchJob.getInstance(getConf());
job.setJobName("parse " + segment);
Job job = Job.getInstance(getConf(), "Nutch ParseSegment: " + segment);

Configuration conf = job.getConfiguration();
FileInputFormat.addInputPath(job, new Path(segment, Content.DIR_NAME));
Expand Down
6 changes: 2 additions & 4 deletions src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,7 @@ public void dumpLinks(Path webGraphDb) throws IOException,
// run the inverter job
Path tempInverted = new Path(webGraphDb, "inverted-"
+ Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
Job inverter = NutchJob.getInstance(conf);
inverter.setJobName("LinkDumper: inverter");
Job inverter = Job.getInstance(conf, "Nutch LinkDumper: invert " + webGraphDb);
FileInputFormat.addInputPath(inverter, nodeDb);
FileInputFormat.addInputPath(inverter, outlinkDb);
inverter.setInputFormatClass(SequenceFileInputFormat.class);
Expand Down Expand Up @@ -372,8 +371,7 @@ public void dumpLinks(Path webGraphDb) throws IOException,
}

// run the merger job
Job merger = NutchJob.getInstance(conf);
merger.setJobName("LinkDumper: merger");
Job merger = Job.getInstance(conf, "Nutch LinkDumper: merge " + tempInverted);
FileInputFormat.addInputPath(merger, tempInverted);
merger.setJarByClass(Merger.class);
merger.setInputFormatClass(SequenceFileInputFormat.class);
Expand Down
15 changes: 6 additions & 9 deletions src/java/org/apache/nutch/scoring/webgraph/LinkRank.java
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,8 @@ private int runCounter(FileSystem fs, Path webGraphDb) throws IOException,
// configure the counter job
Path numLinksPath = new Path(webGraphDb, NUM_NODES);
Path nodeDb = new Path(webGraphDb, WebGraph.NODE_DIR);
Job counter = NutchJob.getInstance(getConf());
Job counter = Job.getInstance(getConf(), "Nutch LinkRank: counter " + webGraphDb);
Configuration conf = counter.getConfiguration();
counter.setJobName("LinkRank Counter");
FileInputFormat.addInputPath(counter, nodeDb);
FileOutputFormat.setOutputPath(counter, numLinksPath);
counter.setInputFormatClass(SequenceFileInputFormat.class);
Expand Down Expand Up @@ -194,9 +193,8 @@ private void runInitializer(Path nodeDb, Path output) throws IOException,
InterruptedException, ClassNotFoundException {

// configure the initializer
Job initializer = NutchJob.getInstance(getConf());
Job initializer = Job.getInstance(getConf(), "Nutch LinkRank: initializer " + nodeDb);
Configuration conf = initializer.getConfiguration();
initializer.setJobName("LinkAnalysis Initializer");
FileInputFormat.addInputPath(initializer, nodeDb);
FileOutputFormat.setOutputPath(initializer, output);
initializer.setJarByClass(Initializer.class);
Expand Down Expand Up @@ -245,9 +243,9 @@ private void runInverter(Path nodeDb, Path outlinkDb, Path output)
throws IOException, InterruptedException, ClassNotFoundException {

// configure the inverter
Job inverter = NutchJob.getInstance(getConf());
Job inverter = Job.getInstance(getConf(),
"Nutch Linkrank: inverter nodedb: " + nodeDb + " outlinkdb: " + outlinkDb);
Configuration conf = inverter.getConfiguration();
inverter.setJobName("LinkAnalysis Inverter");
FileInputFormat.addInputPath(inverter, nodeDb);
FileInputFormat.addInputPath(inverter, outlinkDb);
FileOutputFormat.setOutputPath(inverter, output);
Expand Down Expand Up @@ -305,11 +303,10 @@ private void runAnalysis(Path nodeDb, Path inverted, Path output,
int iteration, int numIterations, float rankOne)
throws IOException, InterruptedException, ClassNotFoundException {

Job analyzer = NutchJob.getInstance(getConf());
Job analyzer = Job.getInstance(getConf(),
"Nutch LinkRank: analysis iteration" + (iteration + 1) + " of " + numIterations);
Configuration conf = analyzer.getConfiguration();
conf.set("link.analyze.iteration", String.valueOf(iteration + 1));
analyzer.setJobName("LinkAnalysis Analyzer, iteration " + (iteration + 1)
+ " of " + numIterations);
FileInputFormat.addInputPath(analyzer, nodeDb);
FileInputFormat.addInputPath(analyzer, inverted);
FileOutputFormat.setOutputPath(analyzer, output);
Expand Down
3 changes: 1 addition & 2 deletions src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,8 @@ public void dumpNodes(Path webGraphDb, DumpType type, long topN, Path output,
LOG.info("NodeDumper: starting");
Path nodeDb = new Path(webGraphDb, WebGraph.NODE_DIR);

Job dumper = NutchJob.getInstance(getConf());
Job dumper = Job.getInstance(getConf(), "Nutch NodeDumper: " + webGraphDb);
Configuration conf = dumper.getConfiguration();
dumper.setJobName("NodeDumper: " + webGraphDb);
FileInputFormat.addInputPath(dumper, nodeDb);
dumper.setInputFormatClass(SequenceFileInputFormat.class);

Expand Down
3 changes: 1 addition & 2 deletions src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,7 @@ public void update(Path crawlDb, Path webGraphDb) throws IOException,
.nextInt(Integer.MAX_VALUE)));

// run the updater job outputting to the temp crawl database
Job updater = NutchJob.getInstance(conf);
updater.setJobName("Update CrawlDb from WebGraph");
Job updater = Job.getInstance(conf, "Nutch ScoreUpdater: " + crawlDb);
FileInputFormat.addInputPath(updater, crawlDbCurrent);
FileInputFormat.addInputPath(updater, nodeDb);
FileOutputFormat.setOutputPath(updater, newCrawlDb);
Expand Down
9 changes: 3 additions & 6 deletions src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
Original file line number Diff line number Diff line change
Expand Up @@ -545,9 +545,8 @@ public void createWebGraph(Path webGraphDb, Path[] segments,

Path tempOutlinkDb = new Path(outlinkDb + "-"
+ Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
Job outlinkJob = NutchJob.getInstance(getConf());
Job outlinkJob = Job.getInstance(getConf(), "Nutch WebGraph: outlinkdb " + outlinkDb);
Configuration outlinkJobConf = outlinkJob.getConfiguration();
outlinkJob.setJobName("Outlinkdb: " + outlinkDb);

boolean deleteGone = outlinkJobConf.getBoolean("link.delete.gone", false);
boolean preserveBackup = outlinkJobConf.getBoolean("db.preserve.backup", true);
Expand Down Expand Up @@ -625,9 +624,8 @@ public void createWebGraph(Path webGraphDb, Path[] segments,
Path tempInlinkDb = new Path(inlinkDb + "-"
+ Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

Job inlinkJob = NutchJob.getInstance(getConf());
Job inlinkJob = Job.getInstance(getConf(), "Nutch WebGraph: inlinkdb " + inlinkDb);
Configuration inlinkJobConf = inlinkJob.getConfiguration();
inlinkJob.setJobName("Inlinkdb " + inlinkDb);
LOG.info("InlinkDb: adding input: " + outlinkDb);
FileInputFormat.addInputPath(inlinkJob, outlinkDb);
inlinkJob.setInputFormatClass(SequenceFileInputFormat.class);
Expand Down Expand Up @@ -669,9 +667,8 @@ public void createWebGraph(Path webGraphDb, Path[] segments,
Path tempNodeDb = new Path(nodeDb + "-"
+ Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

Job nodeJob = NutchJob.getInstance(getConf());
Job nodeJob = Job.getInstance(getConf(), "Nutch WebGraph: nodedb " + nodeDb);
Configuration nodeJobConf = nodeJob.getConfiguration();
nodeJob.setJobName("NodeDb " + nodeDb);
LOG.info("NodeDb: adding input: " + outlinkDb);
LOG.info("NodeDb: adding input: " + inlinkDb);
FileInputFormat.addInputPath(nodeJob, outlinkDb);
Expand Down
3 changes: 1 addition & 2 deletions src/java/org/apache/nutch/segment/SegmentMerger.java
Original file line number Diff line number Diff line change
Expand Up @@ -625,9 +625,8 @@ public void merge(Path out, Path[] segs, boolean filter, boolean normalize,
long slice) throws IOException, ClassNotFoundException, InterruptedException {
String segmentName = Generator.generateSegmentName();
LOG.info("Merging {} segments to {}/{}", segs.length, out, segmentName);
Job job = NutchJob.getInstance(getConf());
Job job = Job.getInstance(getConf(), "Nutch SegmentMerger: " + out + "/" + segmentName);
Configuration conf = job.getConfiguration();
job.setJobName("mergesegs " + out + "/" + segmentName);
conf.setBoolean("segment.merger.filter", filter);
conf.setBoolean("segment.merger.normalizer", normalize);
conf.setLong("segment.merger.slice", slice);
Expand Down
3 changes: 1 addition & 2 deletions src/java/org/apache/nutch/segment/SegmentReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,7 @@ public void dump(Path segment, Path output) throws IOException,

LOG.info("SegmentReader: dump segment: {}", segment);

Job job = NutchJob.getInstance(getConf());
job.setJobName("read " + segment);
Job job = Job.getInstance(getConf(), "Nutch SegmentReader: " + segment);
Configuration conf = job.getConfiguration();

if (ge)
Expand Down
Loading

0 comments on commit bbf0867

Please sign in to comment.