ElasticSearch中的聚合和排序

lzfw57am  于 2021-06-14  发布在  ElasticSearch
关注(0)|答案(2)|浏览(472)

我想在ElasticSearch中对查询的聚合结果进行排序
等价sqlquery:- select col1,col2,sum(col3)来自表group by col1,col2 order by sum(col3)desc;
我尝试了下面的查询,它返回的结果,但不是我所期望的排序顺序

{
  "from": 0,
  "size": 0,
  "_source": {
    "includes": [
      "col1",
      "col2",
      "SUM"
    ],
    "excludes": []
  },
  "stored_fields": [
    "col1",
    "col2"
  ],
  "aggregations": {
    "col1": {
      "terms": {
        "field": "col1",
        "size": 200,
        "min_doc_count": 1,
        "shard_min_doc_count": 0,
        "show_term_doc_count_error": false
      },
      "aggregations": {
        "col2": {
          "terms": {
            "field": "col2",
            "size": 10,
            "min_doc_count": 1,
            "shard_min_doc_count": 0,
            "show_term_doc_count_error": false
          },
          "aggregations": {
            "SUM_0": {
              "sum": {
                "field": "col3"
              }
            },
          "col3_bucket_sort": {
          "bucket_sort": {
            "sort": [
              { "SUM_0": { "order": "desc" } } 
            ],
            "size": 3                                
          }
        }

          }
        }
      }
    }
  }
}

抽样指标数据

{
            "_index": "my_index",
            "_type": "products",
            "_id": "OJfBSXUB0GzAt2o_zVdS",
            "_score": 1.0,
            "_source": {
                "product_name": "car",
                "product_type": "retail",
                "qty": 5
            }
        }
        {
            "_index": "my_index",
            "_type": "report",
            "_id": "OpfBSXUB0GzAt2o_zVfG1",
            "_score": 1.0,
            "_source": {
                "product_name": "bike",
                "product_type": "retail",
                "qty": 5
            }
        },

       {
            "_index": "my_index",
            "_type": "report",
            "_id": "OpfBSXUB0GzAt2o_zVfG",
            "_score": 1.0,
            "_source": {
                "product_name": "car",
                "product_type": "retail",
                "qty": 3
            }
        },
        {
            "_index": "my_index",
            "_type": "report",
            "_id": "OpfBSXUB0GzAt2o_zVfG2",
            "_score": 1.0,
            "_source": {
                "product_name": "bike",
                "product_type": "retail",
                "qty": 1
            }
        }

预期输出:-希望根据字段“产品名称”和“产品类型”聚合(分组)我的文档,并按总和(数量)排序
等价sqlquery:- select 产品名称、产品类型、按产品名称分组的产品表中的金额(数量)、按金额(数量)说明分组的产品类型订单;

{
    "took": 3,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 2,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "product_name": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "car",
                    "doc_count": 2,
                    "product_type": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": retail,
                                "doc_count": 2,
                                "SUM_0": {
                                    "value":8
                                }
                            }
                        ]
                    }
                },
                                {
                    "key": "bike",
                    "doc_count": 2,
                    "product_type": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": retail,
                                "doc_count": 2,
                                "SUM_0": {
                                    "value": 6
                                }
                            }
                        ]
                    }
                }

            ]
        }
    }
}

但我得到低于输出即aggreating文件成功,但排序是不工作的总和(数量)

{
    "took": 3,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 2,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "product_name": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "bike",
                    "doc_count": 2,
                    "product_type": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": retail,
                                "doc_count": 2,
                                "SUM_0": {
                                    "value": 6
                                }
                            }
                        ]
                    }
                },
                {
                    "key": "car",
                    "doc_count": 2,
                    "product_type": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": retail,
                                "doc_count": 2,
                                "SUM_0": {
                                    "value":8
                                }
                            }
                        ]
                    }
                }

            ]
        }
    }
}
jei2mxaa

jei2mxaa1#

由于您是按col1、col2(即使用两项聚合)对数据进行分组的,因此,当您尝试使用bucket sort aggregation在sum aggregation的基础上对结果进行排序时,结果并不合适。
您需要使用max bucket aggregation,它是一个同级管道聚合,用同级聚合中指定度量的最大值标识bucket,并输出bucket的值和键。
然后应该对聚合的结果执行bucket排序聚合。
添加一个使用索引数据(与问题中使用的数据相同)、搜索查询和搜索结果的工作示例。
搜索查询:

{
  "size": 0,
  "aggs": {
    "agg1": {
      "terms": {
        "field": "product_name.keyword"
      },
      "aggs": {
        "agg2": {
          "terms": {
            "field": "product_type.keyword"
          },
          "aggregations": {
            "SUM_0": {
              "sum": {
                "field": "qty"
              }
            }
          }
        },
        "sum_max_bucket": {
          "max_bucket": {
            "buckets_path": "agg2>SUM_0"        <-- note this
          }
        },
        "sum_bucket_sort": {
          "bucket_sort": {
            "sort": {
              "sum_max_bucket": {
                "order": "desc"
              }
            }
          }
        }
      }
    }
  }
}

搜索结果:

"aggregations": {
    "agg1": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "car",
          "doc_count": 2,
          "agg2": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "retail",
                "doc_count": 2,
                "SUM_0": {
                  "value": 8.0          <-- note this
                }
              }
            ]
          },
          "sum_max_bucket": {
            "value": 8.0,
            "keys": [
              "retail"
            ]
          }
        },
        {
          "key": "bike",
          "doc_count": 2,
          "agg2": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "retail",
                "doc_count": 2,
                "SUM_0": {
                  "value": 6.0       <-- note this
                }
              }
            ]
          },
          "sum_max_bucket": {
            "value": 6.0,
            "keys": [
              "retail"
            ]
          }
        }
      ]
    }
7vhp5slm

7vhp5slm2#

参考es文档的摘录
与所有管道聚合一样,bucket排序聚合在所有其他非管道聚合之后执行。这意味着排序只适用于从父聚合返回的任何bucket。例如,如果父聚合是terms,并且其大小设置为10,则bucket\u sort将仅对返回的10个term bucket进行排序。
以上就是您的查询没有给出正确结果的原因。

相关问题