查找前N个存储桶的平均聚合

xv8emn3q  于 2022-10-06  发布在  ElasticSearch
关注(0)|答案(1)|浏览(116)

我正在尝试确定按“时间戳”(按降序)排序的前5个文档的“count”列的平均值。

指数数据示例:

{"index":{}}
{"title": "foo", "count": 1, "timestamp":"2022-09-15T09:46:43.958Z"}
{"index":{}}
{"title": "foo", "count": 2, "timestamp":"2022-09-14T09:46:43.958Z"}
{"index":{}}
{"title": "foo", "count": 3, "timestamp":"2022-09-16T09:46:43.958Z"}
{"index":{}}
{"title": "foo", "count": 4, "timestamp":"2022-09-13T09:46:43.958Z"}
{"index":{}}
{"title": "foo", "count": 5, "timestamp":"2022-09-12T09:46:43.958Z"}
{"index":{}}
{"title": "foo", "count": 6, "timestamp":"2022-09-11T09:46:43.958Z"}
{"index":{}}
{"title": "foo", "count": 7, "timestamp":"2022-09-19T09:46:43.958Z"}
{"index":{}}
{"title": "foo", "count": 8, "timestamp":"2022-09-18T09:46:43.958Z"}
{"index":{}}
{"title": "foo", "count": 9, "timestamp":"2022-09-10T09:46:43.958Z"}
{"index":{}}
{"title": "foo", "count": 10, "timestamp":"2022-09-17T09:46:43.958Z"}
{"index":{}}
{"title": "bar", "count": 20, "timestamp":"2022-09-10T09:46:43.958Z"}
{"index":{}}
{"title": "bar", "count": 21, "timestamp":"2022-09-11T09:46:43.958Z"}
{"index":{}}
{"title": "bar", "count": 22, "timestamp":"2022-09-12T09:46:43.958Z"}
{"index":{}}
{"title": "bar", "count": 23, "timestamp":"2022-09-13T09:46:43.958Z"}
{"index":{}}
{"title": "bar", "count": 24, "timestamp":"2022-09-14T09:46:43.958Z"}
{"index":{}}
{"title": "bar", "count": 25, "timestamp":"2022-09-15T09:46:43.958Z"}
{"index":{}}
{"title": "bar", "count": 26, "timestamp":"2022-09-17T09:46:43.958Z"}
{"index":{}}
{"title": "bar", "count": 27, "timestamp":"2022-09-18T09:46:43.958Z"}

搜索查询(使用热门聚合)

{
    "size": 0,
    "aggs": {
        "group_by_title": {
            "terms": {
                "field": "title.keyword",
                "size": 10
            },
            "aggs": {
                "take-N-tracks": {
                    "top_hits": {
                        "sort": [
                            {
                                "timestamp": {
                                    "order": "desc"
                                }
                            }
                        ],
                        "_source": {
                            "includes": [
                                "count",
                                "title"
                            ]
                        },
                        "size": 5
                    }
                }
            }
        }
    }
}

搜索结果:

{
    "took": 32,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 18,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "group_by_index": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "foo",
                    "doc_count": 10,
                    "take-N-tracks": {
                        "hits": {
                            "total": {
                                "value": 10,
                                "relation": "eq"
                            },
                            "max_score": null,
                            "hits": [
                                {
                                    "_index": "testidx",
                                    "_id": "-jKbjIMB37c_U4Ro89zA",
                                    "_score": null,
                                    "_source": {
                                        "title": "foo",
                                        "count": 7
                                    },
                                    "sort": [
                                        1663580803958
                                    ]
                                },
                                {
                                    "_index": "testidx",
                                    "_id": "-zKbjIMB37c_U4Ro89zA",
                                    "_score": null,
                                    "_source": {
                                        "title": "foo",
                                        "count": 8
                                    },
                                    "sort": [
                                        1663494403958
                                    ]
                                },
                                {
                                    "_index": "testidx",
                                    "_id": "_TKbjIMB37c_U4Ro89zA",
                                    "_score": null,
                                    "_source": {
                                        "title": "foo",
                                        "count": 10
                                    },
                                    "sort": [
                                        1663408003958
                                    ]
                                },
                                {
                                    "_index": "testidx",
                                    "_id": "9jKbjIMB37c_U4Ro89zA",
                                    "_score": null,
                                    "_source": {
                                        "title": "foo",
                                        "count": 3
                                    },
                                    "sort": [
                                        1663321603958
                                    ]
                                },
                                {
                                    "_index": "testidx",
                                    "_id": "9DKbjIMB37c_U4Ro89y7",
                                    "_score": null,
                                    "_source": {
                                        "title": "foo",
                                        "count": 1
                                    },
                                    "sort": [
                                        1663235203958
                                    ]
                                }
                            ]
                        }
                    }
                },
                {
                    "key": "bar",
                    "doc_count": 8,
                    "take-N-tracks": {
                        "hits": {
                            "total": {
                                "value": 8,
                                "relation": "eq"
                            },
                            "max_score": null,
                            "hits": [
                                {
                                    "_index": "testidx",
                                    "_id": "BTKbjIMB37c_U4Ro893A",
                                    "_score": null,
                                    "_source": {
                                        "title": "bar",
                                        "count": 27
                                    },
                                    "sort": [
                                        1663494403958
                                    ]
                                },
                                {
                                    "_index": "testidx",
                                    "_id": "BDKbjIMB37c_U4Ro893A",
                                    "_score": null,
                                    "_source": {
                                        "title": "bar",
                                        "count": 26
                                    },
                                    "sort": [
                                        1663408003958
                                    ]
                                },
                                {
                                    "_index": "testidx",
                                    "_id": "AzKbjIMB37c_U4Ro893A",
                                    "_score": null,
                                    "_source": {
                                        "title": "bar",
                                        "count": 25
                                    },
                                    "sort": [
                                        1663235203958
                                    ]
                                },
                                {
                                    "_index": "testidx",
                                    "_id": "AjKbjIMB37c_U4Ro893A",
                                    "_score": null,
                                    "_source": {
                                        "title": "bar",
                                        "count": 24
                                    },
                                    "sort": [
                                        1663148803958
                                    ]
                                },
                                {
                                    "_index": "testidx",
                                    "_id": "ATKbjIMB37c_U4Ro893A",
                                    "_score": null,
                                    "_source": {
                                        "title": "bar",
                                        "count": 23
                                    },
                                    "sort": [
                                        1663062403958
                                    ]
                                }
                            ]
                        }
                    }
                }
            ]
        }
    }
}

预期结果:

foo桶下,平均值应等于5.8即(29/5),在bar桶下,平均值应等于26.6即(133/5)

与顶级点击量聚合指标聚合的使用是我需要的。我相信有一种解决方法可以基于此link对热门点击量聚合的存储桶进行排序。

但是,有没有办法对热门点击量的响应进行平均聚合?

更新:

如果有人可以建议使用@Jaspreet提供的脚本的替代方案,那就太好了。

pgx2nnw8

pgx2nnw81#

无法对TOP_HITS运行子聚合。

您将需要使用scripted metric aggregation,它基本上将迭代所有文档。我使用树形图数据结构来存储时间戳和计数组合。通过迭代,我们可以计算出平均值。

查询

{
  "size": 0,
  "aggs": {
    "group_by_title": {
      "terms": {
        "field": "title.keyword",
        "size": 10
      },
      "aggs": {
        "top_N_Documents": {
          "scripted_metric": {
            "init_script": "state.count_map=new TreeMap()",
            "map_script": "def date_as_millis = doc['timestamp'].getValue().toInstant().toEpochMilli();state.count_map.put(String.valueOf(date_as_millis), doc['count'].value)",
            "combine_script": "return state",
            "reduce_script": "def sum=0;def count_map_agg=new TreeMap(); for(s in states){count_map_agg.putAll(s.count_map)} def keys = count_map_agg.keySet().toArray(); int size=keys.length;  for(int i=size-1;i>=size-5;i--){sum+=count_map_agg.get(keys[i]);} return sum/(float)5;"
          }
        }
      }
    }
  }
}

结果

"aggregations" : {
    "group_by_title" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "foo",
          "doc_count" : 10,
          "top_N_Documents" : {
            "value" : 5.8
          }
        },
        {
          "key" : "bar",
          "doc_count" : 8,
          "top_N_Documents" : {
            "value" : 25.0
          }
        }
      ]
    }
  }

脚本速度很慢,所以如果您有性能问题,在客户端计算平均值可能也是个好主意。

相关问题