diff --git a/ache-tools/build.gradle b/ache-tools/build.gradle deleted file mode 100644 index 516fc5b7f..000000000 --- a/ache-tools/build.gradle +++ /dev/null @@ -1,36 +0,0 @@ -plugins { - id 'java' - id 'application' -} - - -application { - mainClass = 'achecrawler.RunCliTool' - applicationDefaultJvmArgs = ["-Dname=ache-tools -XX:+HeapDumpOnOutOfMemoryError"] -} - -dependencies { - // Sub-projects - implementation project(':ache') - implementation project(':crawler-commons') - - // Dependencies - implementation libs.slf4j.api - implementation libs.logback.classic - implementation group: 'org.apache.commons', name: 'commons-compress', version: '1.22' - implementation group: 'commons-validator', name: 'commons-validator', version: '1.6' - implementation group: 'io.airlift', name: 'airline', version: '0.9' - implementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.14.2' - implementation group: 'com.fasterxml.jackson.dataformat', name: 'jackson-dataformat-cbor', version: '2.14.2' - implementation group: 'org.elasticsearch.client', name: 'elasticsearch-rest-client', version: '5.6.7' - implementation group: 'org.apache.kafka', name: 'kafka-clients', version: '3.4.0' - implementation group: 'org.apache.tika', name: 'tika-parsers', version: '1.28.4' - implementation group: 'org.jsoup', name: 'jsoup', version: '1.15.1' - implementation group: 'com.amazonaws', name: 'aws-java-sdk-s3', version: '1.12.+' - - // Test framework dependencies - testImplementation libs.junit.api - testImplementation libs.junit.params - testRuntimeOnly libs.junit.engine - testImplementation libs.assertj.core -} \ No newline at end of file diff --git a/ache-tools/build.gradle.kts b/ache-tools/build.gradle.kts new file mode 100644 index 000000000..06cd74105 --- /dev/null +++ b/ache-tools/build.gradle.kts @@ -0,0 +1,35 @@ +plugins { + id("java") + id("application") +} + +application { + mainClass = "achecrawler.RunCliTool" + applicationDefaultJvmArgs = listOf("-Dname=ache-tools -XX:+HeapDumpOnOutOfMemoryError") +} + +dependencies { + // Sub-projects + implementation(project(":ache")) + implementation(project(":crawler-commons")) + + // Dependencies + implementation(libs.slf4j.api) + implementation(libs.logback.classic) + implementation(libs.commons.compress) + implementation(libs.commons.validator) + implementation(libs.airline) + implementation(libs.jackson.core.databind) + implementation(libs.jackson.dataformat.cbor) + implementation(libs.elasticsearch.rest.client) + implementation(libs.kafka.clients) + implementation(libs.tika.parsers) + implementation(libs.jsoup) + implementation(libs.aws.java.sdk.s3) + + // Test framework dependencies + testImplementation(libs.junit.api) + testImplementation(libs.junit.params) + testRuntimeOnly(libs.junit.engine) + testImplementation(libs.assertj.core) +} \ No newline at end of file diff --git a/crawler-commons/build.gradle b/crawler-commons/build.gradle deleted file mode 100644 index 1545acf99..000000000 --- a/crawler-commons/build.gradle +++ /dev/null @@ -1,17 +0,0 @@ -plugins { - id 'java-library' -} - -repositories { - mavenCentral() -} - -dependencies { - api group: 'org.slf4j', name: 'slf4j-api', version: '1.7.36' - implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0' - implementation group: 'org.apache.httpcomponents', name: 'httpclient', version: '4.5.14' - implementation group: 'commons-io', name: 'commons-io', version: '2.11.0' - - testImplementation group: 'junit', name: 'junit', version: '4.13.2' - testImplementation group: 'org.eclipse.jetty', name: 'jetty-server', version: '9.4.48.v20220622' -} diff --git a/crawler-commons/build.gradle.kts b/crawler-commons/build.gradle.kts new file mode 100644 index 000000000..f37fb6f2f --- /dev/null +++ b/crawler-commons/build.gradle.kts @@ -0,0 +1,17 @@ +plugins { + id("java-library") +} + +repositories { + mavenCentral() +} + +dependencies { + api(group="org.slf4j", name = "slf4j-api", version = "1.7.36") + implementation(group = "org.apache.commons", name = "commons-lang3", version = "3.12.0") + implementation(group = "org.apache.httpcomponents", name = "httpclient", version = "4.5.14") + implementation(group = "commons-io", name = "commons-io", version = "2.11.0") + + testImplementation(group = "junit", name = "junit", version = "4.13.2") + testImplementation(group = "org.eclipse.jetty", name = "jetty-server", version = "9.4.48.v20220622") +} diff --git a/settings.gradle.kts b/settings.gradle.kts index a20123eb8..e181e1ba5 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -47,11 +47,13 @@ dependencyResolutionManagement { library("elasticsearch.rest.client", "org.elasticsearch.client:elasticsearch-rest-client:5.6.7") library("kafka.clients", "org.apache.kafka:kafka-clients:3.4.0") library("webarchive.commons", "org.netpreserve.commons:webarchive-commons:1.1.9") + library("aws-java-sdk-s3", "com.amazonaws:aws-java-sdk-s3:1.12.+") // Data parsing and extraction library("boilerpipe", "com.syncthemall:boilerpipe:1.2.2") library("nekohtml", "net.sourceforge.nekohtml:nekohtml:1.9.22") library("jsoup", "org.jsoup:jsoup:1.15.1") library("lucene.analyzers.common", "org.apache.lucene:lucene-analyzers-common:8.11.1") + library("tika.parsers", "org.apache.tika:tika-parsers:1.28.4") // HTTP libraries library("okhttp", "com.squareup.okhttp3:okhttp:4.10.0") library("httpclient", "org.apache.httpcomponents:httpclient:4.5.14")