Skip to content

Commit

Permalink
heritrix: remove ExtractorChrome from crawl profile
Browse files Browse the repository at this point in the history
  • Loading branch information
ato committed Nov 7, 2024
1 parent 4f8650d commit f2c8f34
Showing 1 changed file with 0 additions and 10 deletions.
10 changes: 0 additions & 10 deletions common/resources/pandas/crawlconfig/crawler-beans.cxml
Original file line number Diff line number Diff line change
Expand Up @@ -283,14 +283,6 @@
</bean>
<bean id="extractorHttp" class="org.archive.modules.extractor.ExtractorHTTP">
</bean>
<bean id="extractorChromeTooManyHopsDecideRule" class="org.archive.modules.deciderules.TooManyHopsDecideRule">
</bean>
<bean id="extractorChrome" class="org.archive.modules.extractor.ExtractorChrome">
<property name="enabled" value="false" />
<property name="shouldProcessRule">
<ref bean="extractorChromeTooManyHopsDecideRule" />
</property>
</bean>
<bean id="extractorRobotsTxt" class="org.archive.modules.extractor.ExtractorRobotsTxt">
</bean>
<bean id="extractorSitemap" class="org.archive.modules.extractor.ExtractorSitemap">
Expand Down Expand Up @@ -331,8 +323,6 @@
<ref bean="fetchHttp"/>
<!-- ...extract outlinks from HTTP headers... -->
<ref bean="extractorHttp"/>
<!-- NLA: load in headless browser -->
<ref bean="extractorChrome"/>
<!-- ...extract sitemap urls from robots.txt... -->
<ref bean="extractorRobotsTxt"/>
<!-- ...extract links from sitemaps... -->
Expand Down

0 comments on commit f2c8f34

Please sign in to comment.