Skip to content

Commit

Permalink
[DVT-437] node crawler (#55)
Browse files Browse the repository at this point in the history
* DVT-437 add node crawl function

* create crawl command

* add crawl logic and enodes

* more crawl function changes

* attempting to connect to a node using devp2p to get node name informatino

* connecting to node with ethclient

* save updates to the crawler

* add rlpx to node crawler

* fix lint

* add filter and ping commands, refactor crawl and p2p

* update ping and crawl, remove filter

* fix lint

* add deadline to handshake

* fix shadow

---------

Co-authored-by: Jesse Lee <[email protected]>
  • Loading branch information
minhd-vu and gatsbyz authored Mar 31, 2023
1 parent 1c94d6b commit f1cd341
Show file tree
Hide file tree
Showing 13 changed files with 1,182 additions and 6 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
out/
.DS_Store
coverage.out
coverage.out

.vscode
*.json
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

INSTALL_DIR:=~/go/bin/
INSTALL_DIR:=~/go/bin
BIN_NAME:=polycli
BUILD_DIR:=./out

Expand Down
136 changes: 136 additions & 0 deletions cmd/p2p/crawl/crawl.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/*
Copyright © 2022 Polygon <[email protected]>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package crawl

import (
"net"
"time"

"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/p2p/discover"
"github.com/ethereum/go-ethereum/p2p/enode"
"github.com/rs/zerolog/log"
"github.com/spf13/cobra"

"github.com/maticnetwork/polygon-cli/p2p"
)

type (
crawlParams struct {
Bootnodes string
Timeout string
Threads int
NetworkID int
NodesFile string
Database string
}
)

var (
inputCrawlParams crawlParams
)

// crawlCmd represents the crawl command
var CrawlCmd = &cobra.Command{
Use: "crawl [nodes file]",
Short: "Crawl a network",
Long: `This is a basic function to crawl a network.`,
Args: cobra.MinimumNArgs(1),
PreRun: func(cmd *cobra.Command, args []string) {
inputCrawlParams.NodesFile = args[0]
},
RunE: func(cmd *cobra.Command, args []string) error {
inputSet, err := p2p.LoadNodesJSON(inputCrawlParams.NodesFile)
if err != nil {
return err
}

var cfg discover.Config
cfg.PrivateKey, _ = crypto.GenerateKey()
bn, err := p2p.ParseBootnodes(inputCrawlParams.Bootnodes)
if err != nil {
log.Error().Err(err).Msg("Unable to parse bootnodes")
return err
}
cfg.Bootnodes = bn

db, err := enode.OpenDB(inputCrawlParams.Database)
if err != nil {
return err
}

ln := enode.NewLocalNode(db, cfg.PrivateKey)
socket, err := listen(ln)
if err != nil {
return err
}

disc, err := discover.ListenV4(socket, ln, cfg)
if err != nil {
return err
}
defer disc.Close()

c := newCrawler(inputSet, disc, disc.RandomNodes())
c.revalidateInterval = 10 * time.Minute

timeout, err := time.ParseDuration(inputCrawlParams.Timeout)
if err != nil {
return err
}

log.Info().Msg("Starting crawl")

output := c.run(timeout, inputCrawlParams.Threads)
return p2p.WriteNodesJSON(inputCrawlParams.NodesFile, output)
},
}

func init() {
CrawlCmd.PersistentFlags().StringVarP(&inputCrawlParams.Bootnodes, "bootnodes", "b", "", "Comma separated nodes used for bootstrapping. At least one bootnode is required, so other nodes in the network can discover each other.")
if err := CrawlCmd.MarkPersistentFlagRequired("bootnodes"); err != nil {
log.Error().Err(err).Msg("Failed to mark bootnodes as required persistent flag")
}
CrawlCmd.PersistentFlags().StringVarP(&inputCrawlParams.Timeout, "timeout", "t", "30m0s", "Time limit for the crawl.")
CrawlCmd.PersistentFlags().IntVarP(&inputCrawlParams.Threads, "parallel", "p", 16, "How many parallel discoveries to attempt.")
CrawlCmd.PersistentFlags().IntVarP(&inputCrawlParams.NetworkID, "network-id", "n", 0, "Filter discovered nodes by this network id.")
CrawlCmd.PersistentFlags().StringVarP(&inputCrawlParams.Database, "database", "d", "", "Node database for updating and storing client information.")
}

func listen(ln *enode.LocalNode) (*net.UDPConn, error) {
addr := "0.0.0.0:0"

socket, err := net.ListenPacket("udp4", addr)
if err != nil {
return nil, err
}

// Configure UDP endpoint in ENR from listener address.
usocket := socket.(*net.UDPConn)
uaddr := socket.LocalAddr().(*net.UDPAddr)

if uaddr.IP.IsUnspecified() {
ln.SetFallbackIP(net.IP{127, 0, 0, 1})
} else {
ln.SetFallbackIP(uaddr.IP)
}

ln.SetFallbackUDP(uaddr.Port)

return usocket, nil
}
Loading

0 comments on commit f1cd341

Please sign in to comment.