Es数据汇总不准确的问题

Es数据汇总不准确的问题

问题展示

数据超过1万条的时候es查询出的数据出现数据不准确的问题

Es数据汇总不准确的问题

参考方案

新版本中的hits.total匹配数说明

在7.0版发布之前,hits.total始终用于表示符合查询条件的文档的实际数量。在Elasticsearch 7.0版中,如果匹配数大于10,000,则不会计算hits.total。 这是为了避免为给定查询计算精确匹配文档的不必要开销。 我们可以通过将track_total_hits = true作为请求参数来强制进行精确匹配的计算。

它具有10,000和“ relation” =“ gte”的值。

代码层实现

本次以go代码为例

封装的代码模块

package es

import (
    "context"
    "encoding/json"
    "fmt"
    "log"
    "os"
    "time"

    "collect/lib"

    "github.com/olivere/elastic/v7"
)

const (
    dateFormat = "2006-01-02"
)

type tracelog struct{}

func (tracelog) Printf(format string, v ...interface{}) {
    fmt.Printf(format, v...)
}

type Store struct {
    client       *elastic.Client
    indexPartter string
}

func NewStore(config lib.StoreConfig) *Store {
    client, err := elastic.NewClient(
        elastic.SetURL(config.Addrs...),
        elastic.SetBasicAuth(config.Username, config.Password),
        elastic.SetSniff(false),
        elastic.SetHealthcheckInterval(10*time.Second),
        elastic.SetErrorLog(log.New(os.Stderr, "ELASTIC ", log.LstdFlags)),
        elastic.SetTraceLog(log.New(os.Stdout, "", log.LstdFlags)))
    if err != nil {
        log.Fatal(err)
    }

    return &Store{
        client:       client,
        indexPartter: config.Index,
    }
}

type QueryReq struct {
    AppName   string
    BizName   string
    StartTime time.Time
    EndTime   time.Time
    DateTime  string
    Level     string
    Keyword   string
    Module    string
    Method    string
    TraceId   string
    SpanId    string
    Message   string
    Page      int
    PageSize  int
}

type QueryRes struct {
    Data      []*lib.Data
    TotalHits int64
}

func (s *Store) Search(q QueryReq) (*QueryRes, error) {
    var querys []elastic.Query
    if q.AppName != "" {
        querys = append(querys, elastic.NewTermQuery("AppName", q.AppName))
    }

    if q.BizName != "" {
        querys = append(querys, elastic.NewTermQuery("BizName", q.BizName))
    }

    if q.Level != "" {
        querys = append(querys, elastic.NewTermQuery("Level", q.Level))
    }

    if q.Keyword != "" {
        querys = append(querys, elastic.NewTermQuery("Keyword", q.Keyword))
    }

    if q.Module != "" {
        querys = append(querys, elastic.NewTermQuery("Module", q.Module))
    }

    if q.Method != "" {
        querys = append(querys, elastic.NewTermQuery("Method", q.Method))
    }

    if q.TraceId != "" {
        querys = append(querys, elastic.NewTermQuery("TraceId", q.TraceId))
    }

    if q.SpanId != "" {
        querys = append(querys, elastic.NewTermQuery("SpanId", q.SpanId))
    }

    querys = append(querys, elastic.NewRangeQuery("TimeStamp").Lt(q.EndTime).Gt(q.StartTime))

    if q.Message != "" {
        querys = append(querys, elastic.NewMatchQuery("Message", q.Message))
    }

    index := s.indexPartter + "-" + q.EndTime.Format(dateFormat)

    boolQuery := elastic.NewBoolQuery().Must(querys...)

    res, err := s.client.Search(index).Query(boolQuery).From(q.Page).Size(q.PageSize).TrackTotalHits(true).Sort("TimeStamp", false).Do(context.Background())
    if err != nil {
        // 索引不存在
        if elastic.IsNotFound(err) {
            return &QueryRes{}, nil
        }
        return nil, err
    }

    var data []*lib.Data
    for _, h := range res.Hits.Hits {
        b, err := h.Source.MarshalJSON()
        if err != nil {
            return nil, err
        }

        var d = new(lib.Data)
        if err := json.Unmarshal(b, d); err != nil {
            return nil, err
        }
        data = append(data, d)
    }
    return &QueryRes{Data: data, TotalHits: res.Hits.TotalHits.Value}, nil
}

type AggsReq struct {
    AppName  string
    BizName  string
    DateTime string
    Level    string
    Module   string
    Method   string
}

type Buctet struct {
    Key      string
    DocCount int64
}

type AggsRes struct {
    Buctets []*Buctet
}

func (s *Store) Aggs(q AggsReq) (*AggsRes, error) {
    var querys []elastic.Query
    if q.AppName != "" {
        querys = append(querys, elastic.NewTermQuery("AppName", q.AppName))
    }

    if q.BizName != "" {
        querys = append(querys, elastic.NewTermQuery("BizName", q.BizName))
    }

    if q.Level != "" {
        querys = append(querys, elastic.NewTermQuery("Level", q.Level))
    }

    if q.Module != "" {
        querys = append(querys, elastic.NewTermQuery("Module", q.Module))
    }

    index := s.indexPartter + "-" + q.DateTime

    boolQuery := elastic.NewBoolQuery().Must(querys...)

    // 只取出错率最高的前20
    aggs := elastic.NewTermsAggregation().Field("Method").OrderByCountDesc().Size(20)

    // 以Method字段分组聚合
    res, err := s.client.Search(index).Query(boolQuery).Aggregation("Method", aggs).Size(0).Do(context.Background())
    if err != nil {
        // 索引不存在
        if elastic.IsNotFound(err) {
            return &AggsRes{}, nil
        }
        return nil, err
    }

    var buckets []*Buctet
    methodItem, ok := res.Aggregations.Terms("Method")
    if ok {
        for _, data := range methodItem.Buckets {
            var d = new(Buctet)
            key, ok := data.Key.(string)
            if !ok {
                continue
            }

            d.Key = key

            d.DocCount = data.DocCount

            buckets = append(buckets, d)
        }
    }
    return &AggsRes{Buctets: buckets}, nil
}

修改后继续尝试

根据网上的说法仍然有问题

Es数据汇总不准确的问题

总结

有知道的欢迎评论

本作品采用《CC 协议》,转载必须注明作者和本文链接
good good study day day up
讨论数量: 0
(= ̄ω ̄=)··· 暂无内容!

讨论应以学习和精进为目的。请勿发布不友善或者负能量的内容,与人为善,比聪明更重要!