cmd/devp2p: faster crawling + less verbose dns updates (#26697)
This improves the speed of DHT crawling by using concurrent requests. It also removes logging of individual DNS updates.
This commit is contained in:
		
							parent
							
								
									ee530c0d5a
								
							
						
					
					
						commit
						c155c8e179
					
				| @ -17,6 +17,8 @@ | |||||||
| package main | package main | ||||||
| 
 | 
 | ||||||
| import ( | import ( | ||||||
|  | 	"sync" | ||||||
|  | 	"sync/atomic" | ||||||
| 	"time" | 	"time" | ||||||
| 
 | 
 | ||||||
| 	"github.com/ethereum/go-ethereum/log" | 	"github.com/ethereum/go-ethereum/log" | ||||||
| @ -34,6 +36,7 @@ type crawler struct { | |||||||
| 
 | 
 | ||||||
| 	// settings
 | 	// settings
 | ||||||
| 	revalidateInterval time.Duration | 	revalidateInterval time.Duration | ||||||
|  | 	mu                 sync.RWMutex | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const ( | const ( | ||||||
| @ -67,7 +70,7 @@ func newCrawler(input nodeSet, disc resolver, iters ...enode.Iterator) *crawler | |||||||
| 	return c | 	return c | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (c *crawler) run(timeout time.Duration) nodeSet { | func (c *crawler) run(timeout time.Duration, nthreads int) nodeSet { | ||||||
| 	var ( | 	var ( | ||||||
| 		timeoutTimer = time.NewTimer(timeout) | 		timeoutTimer = time.NewTimer(timeout) | ||||||
| 		timeoutCh    <-chan time.Time | 		timeoutCh    <-chan time.Time | ||||||
| @ -75,35 +78,51 @@ func (c *crawler) run(timeout time.Duration) nodeSet { | |||||||
| 		doneCh       = make(chan enode.Iterator, len(c.iters)) | 		doneCh       = make(chan enode.Iterator, len(c.iters)) | ||||||
| 		liveIters    = len(c.iters) | 		liveIters    = len(c.iters) | ||||||
| 	) | 	) | ||||||
|  | 	if nthreads < 1 { | ||||||
|  | 		nthreads = 1 | ||||||
|  | 	} | ||||||
| 	defer timeoutTimer.Stop() | 	defer timeoutTimer.Stop() | ||||||
| 	defer statusTicker.Stop() | 	defer statusTicker.Stop() | ||||||
| 	for _, it := range c.iters { | 	for _, it := range c.iters { | ||||||
| 		go c.runIterator(doneCh, it) | 		go c.runIterator(doneCh, it) | ||||||
| 	} | 	} | ||||||
| 
 |  | ||||||
| 	var ( | 	var ( | ||||||
| 		added   int | 		added   uint64 | ||||||
| 		updated int | 		updated uint64 | ||||||
| 		skipped int | 		skipped uint64 | ||||||
| 		recent  int | 		recent  uint64 | ||||||
| 		removed int | 		removed uint64 | ||||||
|  | 		wg      sync.WaitGroup | ||||||
| 	) | 	) | ||||||
|  | 	wg.Add(nthreads) | ||||||
|  | 	for i := 0; i < nthreads; i++ { | ||||||
|  | 		go func() { | ||||||
|  | 			defer wg.Done() | ||||||
|  | 			for { | ||||||
|  | 				select { | ||||||
|  | 				case n := <-c.ch: | ||||||
|  | 					switch c.updateNode(n) { | ||||||
|  | 					case nodeSkipIncompat: | ||||||
|  | 						atomic.AddUint64(&skipped, 1) | ||||||
|  | 					case nodeSkipRecent: | ||||||
|  | 						atomic.AddUint64(&recent, 1) | ||||||
|  | 					case nodeRemoved: | ||||||
|  | 						atomic.AddUint64(&removed, 1) | ||||||
|  | 					case nodeAdded: | ||||||
|  | 						atomic.AddUint64(&added, 1) | ||||||
|  | 					default: | ||||||
|  | 						atomic.AddUint64(&updated, 1) | ||||||
|  | 					} | ||||||
|  | 				case <-c.closed: | ||||||
|  | 					return | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		}() | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| loop: | loop: | ||||||
| 	for { | 	for { | ||||||
| 		select { | 		select { | ||||||
| 		case n := <-c.ch: |  | ||||||
| 			switch c.updateNode(n) { |  | ||||||
| 			case nodeSkipIncompat: |  | ||||||
| 				skipped++ |  | ||||||
| 			case nodeSkipRecent: |  | ||||||
| 				recent++ |  | ||||||
| 			case nodeRemoved: |  | ||||||
| 				removed++ |  | ||||||
| 			case nodeAdded: |  | ||||||
| 				added++ |  | ||||||
| 			default: |  | ||||||
| 				updated++ |  | ||||||
| 			} |  | ||||||
| 		case it := <-doneCh: | 		case it := <-doneCh: | ||||||
| 			if it == c.inputIter { | 			if it == c.inputIter { | ||||||
| 				// Enable timeout when we're done revalidating the input nodes.
 | 				// Enable timeout when we're done revalidating the input nodes.
 | ||||||
| @ -119,8 +138,11 @@ loop: | |||||||
| 			break loop | 			break loop | ||||||
| 		case <-statusTicker.C: | 		case <-statusTicker.C: | ||||||
| 			log.Info("Crawling in progress", | 			log.Info("Crawling in progress", | ||||||
| 				"added", added, "updated", updated, "removed", removed, | 				"added", atomic.LoadUint64(&added), | ||||||
| 				"ignored(recent)", recent, "ignored(incompatible)", skipped) | 				"updated", atomic.LoadUint64(&updated), | ||||||
|  | 				"removed", atomic.LoadUint64(&removed), | ||||||
|  | 				"ignored(recent)", atomic.LoadUint64(&removed), | ||||||
|  | 				"ignored(incompatible)", atomic.LoadUint64(&skipped)) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| @ -131,6 +153,7 @@ loop: | |||||||
| 	for ; liveIters > 0; liveIters-- { | 	for ; liveIters > 0; liveIters-- { | ||||||
| 		<-doneCh | 		<-doneCh | ||||||
| 	} | 	} | ||||||
|  | 	wg.Wait() | ||||||
| 	return c.output | 	return c.output | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -148,7 +171,9 @@ func (c *crawler) runIterator(done chan<- enode.Iterator, it enode.Iterator) { | |||||||
| // updateNode updates the info about the given node, and returns a status
 | // updateNode updates the info about the given node, and returns a status
 | ||||||
| // about what changed
 | // about what changed
 | ||||||
| func (c *crawler) updateNode(n *enode.Node) int { | func (c *crawler) updateNode(n *enode.Node) int { | ||||||
|  | 	c.mu.RLock() | ||||||
| 	node, ok := c.output[n.ID()] | 	node, ok := c.output[n.ID()] | ||||||
|  | 	c.mu.RUnlock() | ||||||
| 
 | 
 | ||||||
| 	// Skip validation of recently-seen nodes.
 | 	// Skip validation of recently-seen nodes.
 | ||||||
| 	if ok && time.Since(node.LastCheck) < c.revalidateInterval { | 	if ok && time.Since(node.LastCheck) < c.revalidateInterval { | ||||||
| @ -156,10 +181,9 @@ func (c *crawler) updateNode(n *enode.Node) int { | |||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// Request the node record.
 | 	// Request the node record.
 | ||||||
| 	nn, err := c.disc.RequestENR(n) |  | ||||||
| 	node.LastCheck = truncNow() |  | ||||||
| 	status := nodeUpdated | 	status := nodeUpdated | ||||||
| 	if err != nil { | 	node.LastCheck = truncNow() | ||||||
|  | 	if nn, err := c.disc.RequestENR(n); err != nil { | ||||||
| 		if node.Score == 0 { | 		if node.Score == 0 { | ||||||
| 			// Node doesn't implement EIP-868.
 | 			// Node doesn't implement EIP-868.
 | ||||||
| 			log.Debug("Skipping node", "id", n.ID()) | 			log.Debug("Skipping node", "id", n.ID()) | ||||||
| @ -176,8 +200,9 @@ func (c *crawler) updateNode(n *enode.Node) int { | |||||||
| 		} | 		} | ||||||
| 		node.LastResponse = node.LastCheck | 		node.LastResponse = node.LastCheck | ||||||
| 	} | 	} | ||||||
| 
 |  | ||||||
| 	// Store/update node in output set.
 | 	// Store/update node in output set.
 | ||||||
|  | 	c.mu.Lock() | ||||||
|  | 	defer c.mu.Unlock() | ||||||
| 	if node.Score <= 0 { | 	if node.Score <= 0 { | ||||||
| 		log.Debug("Removing node", "id", n.ID()) | 		log.Debug("Removing node", "id", n.ID()) | ||||||
| 		delete(c.output, n.ID()) | 		delete(c.output, n.ID()) | ||||||
|  | |||||||
| @ -78,7 +78,7 @@ var ( | |||||||
| 		Name:   "crawl", | 		Name:   "crawl", | ||||||
| 		Usage:  "Updates a nodes.json file with random nodes found in the DHT", | 		Usage:  "Updates a nodes.json file with random nodes found in the DHT", | ||||||
| 		Action: discv4Crawl, | 		Action: discv4Crawl, | ||||||
| 		Flags:  flags.Merge(discoveryNodeFlags, []cli.Flag{crawlTimeoutFlag}), | 		Flags:  flags.Merge(discoveryNodeFlags, []cli.Flag{crawlTimeoutFlag, crawlParallelismFlag}), | ||||||
| 	} | 	} | ||||||
| 	discv4TestCommand = &cli.Command{ | 	discv4TestCommand = &cli.Command{ | ||||||
| 		Name:   "test", | 		Name:   "test", | ||||||
| @ -120,6 +120,11 @@ var ( | |||||||
| 		Usage: "Time limit for the crawl.", | 		Usage: "Time limit for the crawl.", | ||||||
| 		Value: 30 * time.Minute, | 		Value: 30 * time.Minute, | ||||||
| 	} | 	} | ||||||
|  | 	crawlParallelismFlag = &cli.IntFlag{ | ||||||
|  | 		Name:  "parallel", | ||||||
|  | 		Usage: "How many parallel discoveries to attempt.", | ||||||
|  | 		Value: 16, | ||||||
|  | 	} | ||||||
| 	remoteEnodeFlag = &cli.StringFlag{ | 	remoteEnodeFlag = &cli.StringFlag{ | ||||||
| 		Name:    "remote", | 		Name:    "remote", | ||||||
| 		Usage:   "Enode of the remote node under test", | 		Usage:   "Enode of the remote node under test", | ||||||
| @ -195,7 +200,7 @@ func discv4ResolveJSON(ctx *cli.Context) error { | |||||||
| 	defer disc.Close() | 	defer disc.Close() | ||||||
| 	c := newCrawler(inputSet, disc, enode.IterNodes(nodeargs)) | 	c := newCrawler(inputSet, disc, enode.IterNodes(nodeargs)) | ||||||
| 	c.revalidateInterval = 0 | 	c.revalidateInterval = 0 | ||||||
| 	output := c.run(0) | 	output := c.run(0, 1) | ||||||
| 	writeNodesJSON(nodesFile, output) | 	writeNodesJSON(nodesFile, output) | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
| @ -214,7 +219,7 @@ func discv4Crawl(ctx *cli.Context) error { | |||||||
| 	defer disc.Close() | 	defer disc.Close() | ||||||
| 	c := newCrawler(inputSet, disc, disc.RandomNodes()) | 	c := newCrawler(inputSet, disc, disc.RandomNodes()) | ||||||
| 	c.revalidateInterval = 10 * time.Minute | 	c.revalidateInterval = 10 * time.Minute | ||||||
| 	output := c.run(ctx.Duration(crawlTimeoutFlag.Name)) | 	output := c.run(ctx.Duration(crawlTimeoutFlag.Name), ctx.Int(crawlParallelismFlag.Name)) | ||||||
| 	writeNodesJSON(nodesFile, output) | 	writeNodesJSON(nodesFile, output) | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  | |||||||
| @ -110,7 +110,7 @@ func discv5Crawl(ctx *cli.Context) error { | |||||||
| 	defer disc.Close() | 	defer disc.Close() | ||||||
| 	c := newCrawler(inputSet, disc, disc.RandomNodes()) | 	c := newCrawler(inputSet, disc, disc.RandomNodes()) | ||||||
| 	c.revalidateInterval = 10 * time.Minute | 	c.revalidateInterval = 10 * time.Minute | ||||||
| 	output := c.run(ctx.Duration(crawlTimeoutFlag.Name)) | 	output := c.run(ctx.Duration(crawlTimeoutFlag.Name), ctx.Int(crawlParallelismFlag.Name)) | ||||||
| 	writeNodesJSON(nodesFile, output) | 	writeNodesJSON(nodesFile, output) | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  | |||||||
| @ -126,11 +126,16 @@ func (c *cloudflareClient) uploadRecords(name string, records map[string]string) | |||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// Iterate over the new records and inject anything missing.
 | 	// Iterate over the new records and inject anything missing.
 | ||||||
|  | 	log.Info("Updating DNS entries") | ||||||
|  | 	created := 0 | ||||||
|  | 	updated := 0 | ||||||
|  | 	skipped := 0 | ||||||
| 	for path, val := range records { | 	for path, val := range records { | ||||||
| 		old, exists := existing[path] | 		old, exists := existing[path] | ||||||
| 		if !exists { | 		if !exists { | ||||||
| 			// Entry is unknown, push a new one to Cloudflare.
 | 			// Entry is unknown, push a new one to Cloudflare.
 | ||||||
| 			log.Info(fmt.Sprintf("Creating %s = %q", path, val)) | 			log.Debug(fmt.Sprintf("Creating %s = %q", path, val)) | ||||||
|  | 			created++ | ||||||
| 			ttl := rootTTL | 			ttl := rootTTL | ||||||
| 			if path != name { | 			if path != name { | ||||||
| 				ttl = treeNodeTTLCloudflare // Max TTL permitted by Cloudflare
 | 				ttl = treeNodeTTLCloudflare // Max TTL permitted by Cloudflare
 | ||||||
| @ -139,27 +144,33 @@ func (c *cloudflareClient) uploadRecords(name string, records map[string]string) | |||||||
| 			_, err = c.CreateDNSRecord(context.Background(), c.zoneID, record) | 			_, err = c.CreateDNSRecord(context.Background(), c.zoneID, record) | ||||||
| 		} else if old.Content != val { | 		} else if old.Content != val { | ||||||
| 			// Entry already exists, only change its content.
 | 			// Entry already exists, only change its content.
 | ||||||
| 			log.Info(fmt.Sprintf("Updating %s from %q to %q", path, old.Content, val)) | 			log.Debug(fmt.Sprintf("Updating %s from %q to %q", path, old.Content, val)) | ||||||
|  | 			updated++ | ||||||
| 			old.Content = val | 			old.Content = val | ||||||
| 			err = c.UpdateDNSRecord(context.Background(), c.zoneID, old.ID, old) | 			err = c.UpdateDNSRecord(context.Background(), c.zoneID, old.ID, old) | ||||||
| 		} else { | 		} else { | ||||||
|  | 			skipped++ | ||||||
| 			log.Debug(fmt.Sprintf("Skipping %s = %q", path, val)) | 			log.Debug(fmt.Sprintf("Skipping %s = %q", path, val)) | ||||||
| 		} | 		} | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return fmt.Errorf("failed to publish %s: %v", path, err) | 			return fmt.Errorf("failed to publish %s: %v", path, err) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 	log.Info("Updated DNS entries", "new", created, "updated", updated, "untouched", skipped) | ||||||
| 	// Iterate over the old records and delete anything stale.
 | 	// Iterate over the old records and delete anything stale.
 | ||||||
|  | 	deleted := 0 | ||||||
|  | 	log.Info("Deleting stale DNS entries") | ||||||
| 	for path, entry := range existing { | 	for path, entry := range existing { | ||||||
| 		if _, ok := records[path]; ok { | 		if _, ok := records[path]; ok { | ||||||
| 			continue | 			continue | ||||||
| 		} | 		} | ||||||
| 		// Stale entry, nuke it.
 | 		// Stale entry, nuke it.
 | ||||||
| 		log.Info(fmt.Sprintf("Deleting %s = %q", path, entry.Content)) | 		log.Debug(fmt.Sprintf("Deleting %s = %q", path, entry.Content)) | ||||||
|  | 		deleted++ | ||||||
| 		if err := c.DeleteDNSRecord(context.Background(), c.zoneID, entry.ID); err != nil { | 		if err := c.DeleteDNSRecord(context.Background(), c.zoneID, entry.ID); err != nil { | ||||||
| 			return fmt.Errorf("failed to delete %s: %v", path, err) | 			return fmt.Errorf("failed to delete %s: %v", path, err) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  | 	log.Info("Deleted stale DNS entries", "count", deleted) | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  | |||||||
| @ -329,8 +329,9 @@ func (c *route53Client) collectRecords(name string) (map[string]recordSet, error | |||||||
| 	var req route53.ListResourceRecordSetsInput | 	var req route53.ListResourceRecordSetsInput | ||||||
| 	req.HostedZoneId = &c.zoneID | 	req.HostedZoneId = &c.zoneID | ||||||
| 	existing := make(map[string]recordSet) | 	existing := make(map[string]recordSet) | ||||||
|  | 	log.Info("Loading existing TXT records", "name", name, "zone", c.zoneID) | ||||||
| 	for page := 0; ; page++ { | 	for page := 0; ; page++ { | ||||||
| 		log.Info("Loading existing TXT records", "name", name, "zone", c.zoneID, "page", page) | 		log.Debug("Loading existing TXT records", "name", name, "zone", c.zoneID, "page", page) | ||||||
| 		resp, err := c.api.ListResourceRecordSets(context.TODO(), &req) | 		resp, err := c.api.ListResourceRecordSets(context.TODO(), &req) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return existing, err | 			return existing, err | ||||||
| @ -360,7 +361,7 @@ func (c *route53Client) collectRecords(name string) (map[string]recordSet, error | |||||||
| 		req.StartRecordName = resp.NextRecordName | 		req.StartRecordName = resp.NextRecordName | ||||||
| 		req.StartRecordType = resp.NextRecordType | 		req.StartRecordType = resp.NextRecordType | ||||||
| 	} | 	} | ||||||
| 
 | 	log.Info("Loaded existing TXT records", "name", name, "zone", c.zoneID, "records", len(existing)) | ||||||
| 	return existing, nil | 	return existing, nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user