Merge pull request #10405 from cortze/feat/upgrade-elastic-search-traces-transport

feat: tracer: upgrade elastic search transport for pubsub traces
This commit is contained in:
Andrew Jackson (Ajax) 2023-05-24 10:01:37 -05:00 committed by GitHub
commit aef2ab6315
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 189 additions and 70 deletions

View File

@ -394,6 +394,20 @@ func GossipSub(in GossipIn) (service *pubsub.PubSub, err error) {
transports = append(transports, jsonTransport) transports = append(transports, jsonTransport)
} }
tps := make([]string, 0) // range of topics that will be submited to the traces
addTopicToList := func(topicList []string, newTopic string) []string {
// check if the topic is already in the list
for _, tp := range topicList {
if tp == newTopic {
return topicList
}
}
// add it otherwise
return append(topicList, newTopic)
}
// tracer
if in.Cfg.ElasticSearchTracer != "" { if in.Cfg.ElasticSearchTracer != "" {
elasticSearchTransport, err := tracer.NewElasticSearchTransport( elasticSearchTransport, err := tracer.NewElasticSearchTransport(
in.Cfg.ElasticSearchTracer, in.Cfg.ElasticSearchTracer,
@ -403,7 +417,9 @@ func GossipSub(in GossipIn) (service *pubsub.PubSub, err error) {
return nil, err return nil, err
} }
transports = append(transports, elasticSearchTransport) transports = append(transports, elasticSearchTransport)
tps = addTopicToList(tps, build.BlocksTopic(in.Nn))
} }
lt := tracer.NewLotusTracer(transports, in.Host.ID(), in.Cfg.TracerSourceAuth) lt := tracer.NewLotusTracer(transports, in.Host.ID(), in.Cfg.TracerSourceAuth)
// tracer // tracer
@ -412,28 +428,25 @@ func GossipSub(in GossipIn) (service *pubsub.PubSub, err error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
pi, err := peer.AddrInfoFromP2pAddr(a) pi, err := peer.AddrInfoFromP2pAddr(a)
if err != nil { if err != nil {
return nil, err return nil, err
} }
tr, err := pubsub.NewRemoteTracer(context.TODO(), in.Host, *pi) tr, err := pubsub.NewRemoteTracer(context.TODO(), in.Host, *pi)
if err != nil { if err != nil {
return nil, err return nil, err
} }
pst := newPeerScoreTracker(lt, in.Sk) pst := newPeerScoreTracker(lt, in.Sk)
trw := newTracerWrapper(tr, lt, build.BlocksTopic(in.Nn)) trw := newTracerWrapper(tr, lt, tps...)
options = append(options, pubsub.WithEventTracer(trw)) options = append(options, pubsub.WithEventTracer(trw))
options = append(options, pubsub.WithPeerScoreInspect(pst.UpdatePeerScore, 10*time.Second)) options = append(options, pubsub.WithPeerScoreInspect(pst.UpdatePeerScore, 10*time.Second))
} else { } else {
// still instantiate a tracer for collecting metrics // still instantiate a tracer for collecting metrics
trw := newTracerWrapper(nil, lt)
options = append(options, pubsub.WithEventTracer(trw))
pst := newPeerScoreTracker(lt, in.Sk) pst := newPeerScoreTracker(lt, in.Sk)
trw := newTracerWrapper(nil, lt, tps...)
options = append(options, pubsub.WithEventTracer(trw))
options = append(options, pubsub.WithPeerScoreInspect(pst.UpdatePeerScore, 10*time.Second)) options = append(options, pubsub.WithPeerScoreInspect(pst.UpdatePeerScore, 10*time.Second))
} }
@ -457,7 +470,6 @@ func newTracerWrapper(
topicsMap[topic] = struct{}{} topicsMap[topic] = struct{}{}
} }
} }
return &tracerWrapper{lp2pTracer: lp2pTracer, lotusTracer: lotusTracer, topics: topicsMap} return &tracerWrapper{lp2pTracer: lp2pTracer, lotusTracer: lotusTracer, topics: topicsMap}
} }
@ -486,71 +498,164 @@ func (trw *tracerWrapper) Trace(evt *pubsub_pb.TraceEvent) {
if trw.lp2pTracer != nil { if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt) trw.lp2pTracer.Trace(evt)
} }
if trw.lotusTracer != nil { if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt) trw.lotusTracer.Trace(evt)
} }
} }
case pubsub_pb.TraceEvent_DELIVER_MESSAGE: case pubsub_pb.TraceEvent_DELIVER_MESSAGE:
stats.Record(context.TODO(), metrics.PubsubDeliverMessage.M(1)) stats.Record(context.TODO(), metrics.PubsubDeliverMessage.M(1))
if trw.traceMessage(evt.GetDeliverMessage().GetTopic()) {
if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt)
}
if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt)
}
}
case pubsub_pb.TraceEvent_REJECT_MESSAGE: case pubsub_pb.TraceEvent_REJECT_MESSAGE:
stats.Record(context.TODO(), metrics.PubsubRejectMessage.M(1)) stats.Record(context.TODO(), metrics.PubsubRejectMessage.M(1))
if trw.traceMessage(evt.GetRejectMessage().GetTopic()) { if trw.traceMessage(evt.GetRejectMessage().GetTopic()) {
if trw.lp2pTracer != nil { if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt) trw.lp2pTracer.Trace(evt)
} }
if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt)
}
}
case pubsub_pb.TraceEvent_DUPLICATE_MESSAGE:
stats.Record(context.TODO(), metrics.PubsubDuplicateMessage.M(1))
if trw.traceMessage(evt.GetDuplicateMessage().GetTopic()) {
if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt)
}
if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt)
}
}
case pubsub_pb.TraceEvent_JOIN:
if trw.traceMessage(evt.GetJoin().GetTopic()) {
if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt)
}
if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt)
}
}
case pubsub_pb.TraceEvent_LEAVE:
if trw.traceMessage(evt.GetLeave().GetTopic()) {
if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt)
}
if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt)
}
}
case pubsub_pb.TraceEvent_GRAFT:
if trw.traceMessage(evt.GetGraft().GetTopic()) {
if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt)
}
if trw.lotusTracer != nil { if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt) trw.lotusTracer.Trace(evt)
} }
} }
case pubsub_pb.TraceEvent_DUPLICATE_MESSAGE:
stats.Record(context.TODO(), metrics.PubsubDuplicateMessage.M(1))
case pubsub_pb.TraceEvent_JOIN:
if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt)
}
if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt)
}
case pubsub_pb.TraceEvent_LEAVE:
if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt)
}
if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt)
}
case pubsub_pb.TraceEvent_GRAFT:
if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt)
}
if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt)
}
case pubsub_pb.TraceEvent_PRUNE: case pubsub_pb.TraceEvent_PRUNE:
if trw.traceMessage(evt.GetPrune().GetTopic()) {
if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt)
}
if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt)
}
}
case pubsub_pb.TraceEvent_ADD_PEER:
if trw.lp2pTracer != nil { if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt) trw.lp2pTracer.Trace(evt)
} }
if trw.lotusTracer != nil { if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt) trw.lotusTracer.Trace(evt)
} }
case pubsub_pb.TraceEvent_REMOVE_PEER:
if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt)
}
if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt)
}
case pubsub_pb.TraceEvent_RECV_RPC: case pubsub_pb.TraceEvent_RECV_RPC:
stats.Record(context.TODO(), metrics.PubsubRecvRPC.M(1)) stats.Record(context.TODO(), metrics.PubsubRecvRPC.M(1))
// only track the RPC Calls from IWANT / IHAVE / BLOCK topic
controlRPC := evt.GetRecvRPC().GetMeta().GetControl()
ihave := controlRPC.GetIhave()
iwant := controlRPC.GetIwant()
msgsRPC := evt.GetRecvRPC().GetMeta().GetMessages()
// check if any of the messages we are sending belong to a trackable topic
var validTopic bool = false
for _, topic := range msgsRPC {
if trw.traceMessage(topic.GetTopic()) {
validTopic = true
break
}
}
// track if the Iwant / Ihave messages are from a valid Topic
var validIhave bool = false
for _, msgs := range ihave {
if trw.traceMessage(msgs.GetTopic()) {
validIhave = true
break
}
}
// check if we have any of iwant msgs (it doesn't classify per topic - just msg.ID)
validIwant := len(iwant) > 0
// trace the event if any of the flags was triggered
if validIhave || validIwant || validTopic {
if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt)
}
if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt)
}
}
case pubsub_pb.TraceEvent_SEND_RPC: case pubsub_pb.TraceEvent_SEND_RPC:
stats.Record(context.TODO(), metrics.PubsubSendRPC.M(1)) stats.Record(context.TODO(), metrics.PubsubSendRPC.M(1))
// only track the RPC Calls from IWANT / IHAVE / BLOCK topic
controlRPC := evt.GetSendRPC().GetMeta().GetControl()
ihave := controlRPC.GetIhave()
iwant := controlRPC.GetIwant()
msgsRPC := evt.GetSendRPC().GetMeta().GetMessages()
// check if any of the messages we are sending belong to a trackable topic
var validTopic bool = false
for _, topic := range msgsRPC {
if trw.traceMessage(topic.GetTopic()) {
validTopic = true
break
}
}
// track if the Iwant / Ihave messages are from a valid Topic
var validIhave bool = false
for _, msgs := range ihave {
if trw.traceMessage(msgs.GetTopic()) {
validIhave = true
break
}
}
// check if there was any of the Iwant msgs
validIwant := len(iwant) > 0
// trace the msgs if any of the flags was triggered
if validIhave || validIwant || validTopic {
if trw.lp2pTracer != nil {
trw.lp2pTracer.Trace(evt)
}
if trw.lotusTracer != nil {
trw.lotusTracer.Trace(evt)
}
}
case pubsub_pb.TraceEvent_DROP_RPC: case pubsub_pb.TraceEvent_DROP_RPC:
stats.Record(context.TODO(), metrics.PubsubDropRPC.M(1)) stats.Record(context.TODO(), metrics.PubsubDropRPC.M(1))
} }

View File

@ -1,18 +1,23 @@
package tracer package tracer
import ( import (
"bytes"
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"net/http"
"net/url" "net/url"
"strings" "time"
"github.com/elastic/go-elasticsearch/v7" "github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esapi" "github.com/elastic/go-elasticsearch/v7/esutil"
) )
const ( const (
ElasticSearchDefaultIndex = "lotus-pubsub" ElasticSearchDefaultIndex = "lotus-pubsub"
flushInterval = 10 * time.Second
flushBytes = 1024 * 1024 // MB
esWorkers = 2 // TODO: hardcoded
) )
func NewElasticSearchTransport(connectionString string, elasticsearchIndex string) (TracerTransport, error) { func NewElasticSearchTransport(connectionString string, elasticsearchIndex string) (TracerTransport, error) {
@ -28,12 +33,12 @@ func NewElasticSearchTransport(connectionString string, elasticsearchIndex strin
Addresses: []string{ Addresses: []string{
conUrl.Scheme + "://" + conUrl.Host, conUrl.Scheme + "://" + conUrl.Host,
}, },
Username: username, Username: username,
Password: password, Password: password,
Transport: &http.Transport{},
} }
es, err := elasticsearch.NewClient(cfg) es, err := elasticsearch.NewClient(cfg)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -45,14 +50,31 @@ func NewElasticSearchTransport(connectionString string, elasticsearchIndex strin
esIndex = ElasticSearchDefaultIndex esIndex = ElasticSearchDefaultIndex
} }
// Create the BulkIndexer to batch ES trace submission
bi, err := esutil.NewBulkIndexer(esutil.BulkIndexerConfig{
Index: esIndex,
Client: es,
NumWorkers: esWorkers,
FlushBytes: int(flushBytes),
FlushInterval: flushInterval,
OnError: func(ctx context.Context, err error) {
log.Errorf("Error persisting queries %s", err.Error())
},
})
if err != nil {
return nil, err
}
return &elasticSearchTransport{ return &elasticSearchTransport{
cl: es, cl: es,
bi: bi,
esIndex: esIndex, esIndex: esIndex,
}, nil }, nil
} }
type elasticSearchTransport struct { type elasticSearchTransport struct {
cl *elasticsearch.Client cl *elasticsearch.Client
bi esutil.BulkIndexer
esIndex string esIndex string
} }
@ -72,26 +94,18 @@ func (est *elasticSearchTransport) Transport(evt TracerTransportEvent) error {
return fmt.Errorf("error while marshaling event: %s", err) return fmt.Errorf("error while marshaling event: %s", err)
} }
req := esapi.IndexRequest{ return est.bi.Add(
Index: est.esIndex, context.Background(),
Body: strings.NewReader(string(jsonEvt)), esutil.BulkIndexerItem{
Refresh: "true", Action: "index",
} Body: bytes.NewReader(jsonEvt),
OnFailure: func(ctx context.Context, item esutil.BulkIndexerItem, res esutil.BulkIndexerResponseItem, err error) {
// Perform the request with the client. if err != nil {
res, err := req.Do(context.Background(), est.cl) log.Errorf("unable to submit trace - %s", err)
if err != nil { } else {
return err log.Errorf("unable to submit trace %s: %s", res.Error.Type, res.Error.Reason)
} }
},
err = res.Body.Close() },
if err != nil { )
return err
}
if res.IsError() {
return fmt.Errorf("[%s] Error indexing document ID=%s", res.Status(), req.DocumentID)
}
return nil
} }