-
-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #19 from lawzava/general-maintenance
🎨 general maintenance
- Loading branch information
Showing
11 changed files
with
227 additions
and
781 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,4 +10,4 @@ jobs: | |
- name: golangci-lint | ||
uses: golangci/golangci-lint-action@v2 | ||
with: | ||
version: v1.41.1 | ||
version: v1.50.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,6 @@ builds: | |
- | ||
env: | ||
- CGO_ENABLED=0 | ||
main: ./main.go | ||
goos: | ||
- linux | ||
- darwin | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package main | ||
|
||
import ( | ||
"github.com/lawzava/emailscraper" | ||
) | ||
|
||
//nolint:gochecknoglobals // allow global var here | ||
var ( | ||
scraperParameters emailscraper.Config | ||
url string | ||
output string | ||
outputWithURL bool | ||
) | ||
|
||
//nolint:gochecknoinits // required by github.com/spf13/cobra | ||
func init() { | ||
rootCmd.PersistentFlags().StringVarP(&url, | ||
"website", "w", "https://lawzava.com", "Website to scrape") | ||
rootCmd.PersistentFlags().BoolVar(&scraperParameters.Recursively, | ||
"recursively", true, "Scrape website recursively") | ||
rootCmd.PersistentFlags().IntVarP(&scraperParameters.MaxDepth, | ||
"depth", "d", 3, "Max depth to follow when scraping recursively") //nolint:gomnd // allow default max depth | ||
rootCmd.PersistentFlags().BoolVar(&scraperParameters.Async, | ||
"async", true, "Scrape website pages asynchronously") | ||
rootCmd.PersistentFlags().BoolVar(&scraperParameters.Debug, | ||
"debug", false, "Print debug logs") | ||
rootCmd.PersistentFlags().BoolVar(&scraperParameters.FollowExternalLinks, | ||
"follow-external", false, "Follow external 3rd party links within website") | ||
rootCmd.PersistentFlags().BoolVar(&scraperParameters.EnableJavascript, | ||
"js", false, "Enables EnableJavascript execution await") | ||
rootCmd.PersistentFlags().IntVar(&scraperParameters.Timeout, | ||
"timeout", 0, "If > 0, specify a timeout (seconds) for js execution await") | ||
rootCmd.PersistentFlags().StringVar(&output, | ||
"output", outputPlain, "Output type to use (default 'plain', supported: 'csv', 'json')") | ||
rootCmd.PersistentFlags().BoolVar(&outputWithURL, | ||
"output-with-url", false, "Adds URL to output with each email") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,39 @@ | ||
module github.com/lawzava/scrape | ||
|
||
go 1.16 | ||
go 1.19 | ||
|
||
require ( | ||
github.com/antchfx/xmlquery v1.3.8 // indirect | ||
github.com/chromedp/cdproto v0.0.0-20211019232255-96776d03ee97 // indirect | ||
github.com/lawzava/emailscraper v1.1.3 | ||
github.com/spf13/cobra v1.2.1 | ||
golang.org/x/net v0.0.0-20211020060615-d418f374d309 // indirect | ||
golang.org/x/sys v0.0.0-20211020154033-fcb26fe61c20 // indirect | ||
github.com/lawzava/emailscraper v1.2.0 | ||
github.com/spf13/cobra v1.6.0 | ||
) | ||
|
||
require ( | ||
github.com/PuerkitoBio/goquery v1.8.0 // indirect | ||
github.com/andybalholm/cascadia v1.3.1 // indirect | ||
github.com/antchfx/htmlquery v1.2.5 // indirect | ||
github.com/antchfx/xmlquery v1.3.12 // indirect | ||
github.com/antchfx/xpath v1.2.1 // indirect | ||
github.com/chromedp/cdproto v0.0.0-20221011223153-490dc4d81f7c // indirect | ||
github.com/chromedp/chromedp v0.8.6 // indirect | ||
github.com/chromedp/sysutil v1.0.0 // indirect | ||
github.com/gobwas/glob v0.2.3 // indirect | ||
github.com/gobwas/httphead v0.1.0 // indirect | ||
github.com/gobwas/pool v0.2.1 // indirect | ||
github.com/gobwas/ws v1.1.0 // indirect | ||
github.com/gocolly/colly/v2 v2.1.0 // indirect | ||
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect | ||
github.com/golang/protobuf v1.5.2 // indirect | ||
github.com/inconshreveable/mousetrap v1.0.1 // indirect | ||
github.com/josharian/intern v1.0.0 // indirect | ||
github.com/kennygrant/sanitize v1.2.4 // indirect | ||
github.com/lawzava/go-tld v1.0.1 // indirect | ||
github.com/mailru/easyjson v0.7.7 // indirect | ||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect | ||
github.com/spf13/pflag v1.0.5 // indirect | ||
github.com/temoto/robotstxt v1.1.2 // indirect | ||
golang.org/x/net v0.0.0-20221014081412-f15817d10f9b // indirect | ||
golang.org/x/sys v0.0.0-20221013171732-95e765b1cc43 // indirect | ||
golang.org/x/text v0.3.8 // indirect | ||
google.golang.org/appengine v1.6.7 // indirect | ||
google.golang.org/protobuf v1.28.1 // indirect | ||
) |
Oops, something went wrong.