elasticsearch 将度量聚集与组合聚集一起使用

bogh5gae  于 2022-11-22  发布在  ElasticSearch
关注(0)|答案(1)|浏览(148)

下面是一个例子:

{
  "test5" : {
    "mappings" : {
      "dynamic" : "false",
      "properties" : {
        "messageType" : {
          "type" : "keyword"
        },
        "groupId" : {
          "type" : "keyword"
        },
        "payload" : {
          "type" : "nested",
          "include_in_root" : true,
          "properties" : {
            "request" : {
              "type" : "nested",
              "include_in_root" : true,
              "properties" : {
                "data" : {
                  "type" : "nested",
                  "include_in_root" : true,
                  "properties" : {
                    "chargingPeriods" : {
                      "type" : "nested",
                      "include_in_root" : true,
                      "properties" : {
                        "endDateTime" : {
                          "type" : "date"
                        },
                        "power" : {
                          "type" : "double"
                        },
                        "startDateTime" : {
                          "type" : "date"
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

第一个用例,我需要基于payload.request.data.chargingPeriods.startDateTimegroupId以2分钟为间隔的桶,筛选条件为messageType。顺便说一句,chargingPeriods是一个数组。
此查询适用于该用例:

GET test5/_search
{
  "size": 0,
  "aggs": {
    "my_buckets": {
      "composite": {
        "sources": [
          { "sessionId": { "terms": { "field": "groupId"} } },
          {
            "date" : {
              "date_histogram": {
                "field": "payload.request.data.chargingPeriods.startDateTime",
                "fixed_interval": "2m",
                "format": "MM/dd/yyyy - hh:mm:ss",
                "order": "asc"
              }
            }
          }
        ]
      }    
    }
  },
  "query": {
    "terms": {
      "messageType": [
        "test"
      ]
    }
  }
}

现在,我希望在返回的这些组合桶上完成指标聚合,我尝试了以下操作:

GET test5/_search
{
  "size": 0,
  "aggs": {
    "my_buckets": {
      "composite": {
        "sources": [
          { "sessionId": { "terms": { "field": "groupId"} } },
          {
            "date" : {
              "date_histogram": {
                "field": "payload.request.data.chargingPeriods.startDateTime",
                "fixed_interval": "2m",
                "format": "MM/dd/yyyy - hh:mm:ss",
                "order": "asc"
              }
            }
          }
        ]
      },
      "aggregations": {
        "metricAgg": {
          "max": {
            "field": "payload.request.data.chargingPeriods.power"
          }
        }
      }      
    }
  },
  "query": {
    "terms": {
      "messageType": [
        "test"
      ]
    }
  }
}

根据ES文档https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-composite-aggregation.html,这应该通过在复合存储桶上进行度量聚合来工作。但是,不是在复合存储桶上计算度量聚合,而是在整个给定文档的chargingPeriods数组中的所有功率字段上计算度量聚合。
如何创建索引:

PUT /test5
{
  "settings": {
    "number_of_shards": 1
  },
  "mappings" : {
      "dynamic" : "false",
      "properties" : {
        "groupId" : {
          "type" : "keyword"
        },
        "messageType" : {
          "type" : "keyword"
        },
        "payload" : {
          "type" : "nested",
          "include_in_root": true,
          "properties": {
            "request": {
              "type":"nested",
              "include_in_root":true,
              "properties": {
                "data": {
                  "type":"nested",
                  "include_in_root": true,
                  "properties": {
                    "chargingPeriods": {
                      "type": "nested",
                      "include_in_root": true,
                      "properties" : {
                        "endDateTime":{
                          "type": "date"
                        },
                        "power": {
                          "type": "double"
                        },
                        "startDateTime":{
                          "type": "date"
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
}

测试数据:

POST test5/_doc/testdocu1
{
  "groupId": "563",
  "messageType": "test",
  "payload": {
    "request": {
      "data": {
        "chargingPeriods": [
          {
            "endDateTime": "2022-10-13T17:42:25Z",
            "power": 9.62857,
            "startDateTime": "2022-10-13T17:41:55Z"
          },
          {
            "endDateTime": "2022-10-13T17:42:55Z",
            "power": 9.6491,
            "startDateTime": "2022-10-13T17:42:25Z"
          },
          {
            "endDateTime": "2022-10-13T17:43:25Z",
            "power": 9.6491,
            "startDateTime": "2022-10-13T17:42:55Z"
          },
          {
            "endDateTime": "2022-10-13T17:43:55Z",
            "power": 9.66963,
            "startDateTime": "2022-10-13T17:43:25Z"
          },
          {
            "endDateTime": "2022-10-13T17:44:25Z",
            "power": 9.67128,
            "startDateTime": "2022-10-13T17:43:55Z"
          },
          {
            "endDateTime": "2022-10-13T17:44:55Z",
            "power": 9.65079,
            "startDateTime": "2022-10-13T17:44:25Z"
          },
          {
            "endDateTime": "2022-10-13T17:45:25Z",
            "power": 9.66492,
            "startDateTime": "2022-10-13T17:44:55Z"
          },
          {
            "endDateTime": "2022-10-13T17:45:55Z",
            "power": 9.68544,
            "startDateTime": "2022-10-13T17:45:25Z"
          },
          {
            "endDateTime": "2022-10-13T17:46:25Z",
            "power": 9.68544,
            "startDateTime": "2022-10-13T17:45:55Z"
          },
          {
            "endDateTime": "2022-10-13T17:46:55Z",
            "power": 9.67434,
            "startDateTime": "2022-10-13T17:46:25Z"
          }
        ]
      }
    }
  }
}

我的输出:

"aggregations" : {
    "my_buckets" : {
      "after_key" : {
        "sessionId" : "563",
        "date" : "10/13/2022 - 05:46:00"
      },
      "buckets" : [
        {
          "key" : {
            "sessionId" : "563",
            "date" : "10/13/2022 - 05:40:00"
          },
          "doc_count" : 1,
          "metricAgg" : {
            "value" : 9.68544
          }
        },
        {
          "key" : {
            "sessionId" : "563",
            "date" : "10/13/2022 - 05:42:00"
          },
          "doc_count" : 4,
          "metricAgg" : {
            "value" : 9.68544
          }
        },
        {
          "key" : {
            "sessionId" : "563",
            "date" : "10/13/2022 - 05:44:00"
          },
          "doc_count" : 4,
          "metricAgg" : {
            "value" : 9.68544
          }
        },
        {
          "key" : {
            "sessionId" : "563",
            "date" : "10/13/2022 - 05:46:00"
          },
          "doc_count" : 1,
          "metricAgg" : {
            "value" : 9.68544
          }
        }
      ]
    }
  }

正如您所看到的,它从所有元素中选择了max payload.request.data.chargingPeriods.power,忽略了复合桶。

{
  "key" : {
    "sessionId" : "563",
    "date" : "10/13/2022 - 05:40:00"
  },
  "doc_count" : 1,
  "metricAgg" : {
    "value" : 9.68544
  }
},

metricAgg应该是9.62857

pb3s4cty

pb3s4cty1#

它并没有按照您期望的方式工作,因为您正在聚合具有include_in_root的嵌套数据,因此,所有嵌套数据都在根文档中找到自己,就好像它们没有嵌套一样,因此,startDateTimepower之间的关系基本上丢失了。
另一个问题是,复合聚合聚合了嵌套数据(payload...)和非嵌套数据(groupId),这是行不通的。
但是,如果在数组的每个元素中添加groupId字段,则可以使查询按如下方式工作:

GET test5/_search
{
  "size": 0,
  "aggs": {
    "payload": {
      "nested": {
        "path": "payload"
      },
      "aggs": {
        "request": {
          "nested": {
            "path": "payload.request"
          },
          "aggs": {
            "data": {
              "nested": {
                "path": "payload.request.data"
              },
              "aggs": {
                "charging": {
                  "nested": {
                    "path": "payload.request.data.chargingPeriods"
                  },
                  "aggs": {
                    "my_buckets": {
                      "composite": {
                        "sources": [
                          {
                            "sessionId": {
                              "terms": {
                                "field": "payload.request.data.chargingPeriods.groupId"
                              }
                            }
                          },
                          {
                            "date": {
                              "date_histogram": {
                                "field": "payload.request.data.chargingPeriods.startDateTime",
                                "fixed_interval": "2m",
                                "format": "MM/dd/yyyy - hh:mm:ss",
                                "order": "asc"
                              }
                            }
                          }
                        ]
                      },
                      "aggregations": {
                        "metricAgg": {
                          "max": {
                            "field": "payload.request.data.chargingPeriods.power"
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  },
  "query": {
    "terms": {
      "messageType": [
        "test"
      ]
    }
  }
}

结果:

{
              "key" : {
                "sessionId" : "563",
                "date" : "10/13/2022 - 05:40:00"
              },
              "doc_count" : 1,
              "metricAgg" : {
                "value" : 9.62857
              }
            },

相关问题