ElasticSearch中的聚合和排序

lzfw57am 于 2021-06-14 发布在 ElasticSearch

关注(0)|答案(2)|浏览(472)

我想在ElasticSearch中对查询的聚合结果进行排序
等价sqlquery:- select col1，col2，sum（col3）来自表group by col1，col2 order by sum（col3）desc；
我尝试了下面的查询，它返回的结果，但不是我所期望的排序顺序

{
  "from": 0,
  "size": 0,
  "_source": {
    "includes": [
      "col1",
      "col2",
      "SUM"
    ],
    "excludes": []
  },
  "stored_fields": [
    "col1",
    "col2"
  ],
  "aggregations": {
    "col1": {
      "terms": {
        "field": "col1",
        "size": 200,
        "min_doc_count": 1,
        "shard_min_doc_count": 0,
        "show_term_doc_count_error": false
      },
      "aggregations": {
        "col2": {
          "terms": {
            "field": "col2",
            "size": 10,
            "min_doc_count": 1,
            "shard_min_doc_count": 0,
            "show_term_doc_count_error": false
          },
          "aggregations": {
            "SUM_0": {
              "sum": {
                "field": "col3"
              }
            },
          "col3_bucket_sort": {
          "bucket_sort": {
            "sort": [
              { "SUM_0": { "order": "desc" } } 
            ],
            "size": 3                                
          }
        }

          }
        }
      }
    }
  }
}

抽样指标数据

{
            "_index": "my_index",
            "_type": "products",
            "_id": "OJfBSXUB0GzAt2o_zVdS",
            "_score": 1.0,
            "_source": {
                "product_name": "car",
                "product_type": "retail",
                "qty": 5
            }
        }
        {
            "_index": "my_index",
            "_type": "report",
            "_id": "OpfBSXUB0GzAt2o_zVfG1",
            "_score": 1.0,
            "_source": {
                "product_name": "bike",
                "product_type": "retail",
                "qty": 5
            }
        },

       {
            "_index": "my_index",
            "_type": "report",
            "_id": "OpfBSXUB0GzAt2o_zVfG",
            "_score": 1.0,
            "_source": {
                "product_name": "car",
                "product_type": "retail",
                "qty": 3
            }
        },
        {
            "_index": "my_index",
            "_type": "report",
            "_id": "OpfBSXUB0GzAt2o_zVfG2",
            "_score": 1.0,
            "_source": {
                "product_name": "bike",
                "product_type": "retail",
                "qty": 1
            }
        }

预期输出：-希望根据字段“产品名称”和“产品类型”聚合（分组）我的文档，并按总和（数量）排序
等价sqlquery:- select 产品名称、产品类型、按产品名称分组的产品表中的金额（数量）、按金额（数量）说明分组的产品类型订单；

{
    "took": 3,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 2,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "product_name": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "car",
                    "doc_count": 2,
                    "product_type": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": retail,
                                "doc_count": 2,
                                "SUM_0": {
                                    "value":8
                                }
                            }
                        ]
                    }
                },
                                {
                    "key": "bike",
                    "doc_count": 2,
                    "product_type": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": retail,
                                "doc_count": 2,
                                "SUM_0": {
                                    "value": 6
                                }
                            }
                        ]
                    }
                }

            ]
        }
    }
}

但我得到低于输出即aggreating文件成功，但排序是不工作的总和（数量）

{
    "took": 3,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 2,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "product_name": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "bike",
                    "doc_count": 2,
                    "product_type": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": retail,
                                "doc_count": 2,
                                "SUM_0": {
                                    "value": 6
                                }
                            }
                        ]
                    }
                },
                {
                    "key": "car",
                    "doc_count": 2,
                    "product_type": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": retail,
                                "doc_count": 2,
                                "SUM_0": {
                                    "value":8
                                }
                            }
                        ]
                    }
                }

            ]
        }
    }
}

elasticsearch sorting elasticsearch-aggregation

来源：https://stackoverflow.com/questions/64463138/aggregation-and-sorting-in-elastic-search

2条答案

按热度按时间

jei2mxaa1#

由于您是按col1、col2（即使用两项聚合）对数据进行分组的，因此，当您尝试使用bucket sort aggregation在sum aggregation的基础上对结果进行排序时，结果并不合适。
您需要使用max bucket aggregation，它是一个同级管道聚合，用同级聚合中指定度量的最大值标识bucket，并输出bucket的值和键。
然后应该对聚合的结果执行bucket排序聚合。
添加一个使用索引数据（与问题中使用的数据相同）、搜索查询和搜索结果的工作示例。
搜索查询：

{
  "size": 0,
  "aggs": {
    "agg1": {
      "terms": {
        "field": "product_name.keyword"
      },
      "aggs": {
        "agg2": {
          "terms": {
            "field": "product_type.keyword"
          },
          "aggregations": {
            "SUM_0": {
              "sum": {
                "field": "qty"
              }
            }
          }
        },
        "sum_max_bucket": {
          "max_bucket": {
            "buckets_path": "agg2>SUM_0"        <-- note this
          }
        },
        "sum_bucket_sort": {
          "bucket_sort": {
            "sort": {
              "sum_max_bucket": {
                "order": "desc"
              }
            }
          }
        }
      }
    }
  }
}

搜索结果：

"aggregations": {
    "agg1": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "car",
          "doc_count": 2,
          "agg2": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "retail",
                "doc_count": 2,
                "SUM_0": {
                  "value": 8.0          <-- note this
                }
              }
            ]
          },
          "sum_max_bucket": {
            "value": 8.0,
            "keys": [
              "retail"
            ]
          }
        },
        {
          "key": "bike",
          "doc_count": 2,
          "agg2": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "retail",
                "doc_count": 2,
                "SUM_0": {
                  "value": 6.0       <-- note this
                }
              }
            ]
          },
          "sum_max_bucket": {
            "value": 6.0,
            "keys": [
              "retail"
            ]
          }
        }
      ]
    }

赞(0）回复(0）举报 2021-06-14

7vhp5slm2#

参考es文档的摘录
与所有管道聚合一样，bucket排序聚合在所有其他非管道聚合之后执行。这意味着排序只适用于从父聚合返回的任何bucket。例如，如果父聚合是terms，并且其大小设置为10，则bucket\u sort将仅对返回的10个term bucket进行排序。
以上就是您的查询没有给出正确结果的原因。

赞(0）回复(0）举报 2021-06-14

我来回答

ElasticSearch中的聚合和排序

2条答案

相关问题

热门标签

最新问答