elasticsearch OpenSearch:按文档聚合数据

w8ntj3qf  于 2023-06-21  发布在  ElasticSearch
关注(0)|答案(1)|浏览(122)

我想知道以下是否可能。我希望能够聚合文档中的嵌套数据,然后根据聚合的数据进行筛选。
所以如果我们有

PUT warehouse/
{
  "mappings": {
  "properties": {
    "inventory": {
      "type": "nested",
      "properties": {
        "equipment": {
          "type": "keyword"
        },
        "price": {
          "type": "float"
        },
        "shopId": {
          "type": "keyword"
        }
      }
    },
    "profile": {
      "properties": {
        "name": {
          "type": "keyword"
        }
      }
    }
  }
  }
}

然后把数据

PUT warehouse/_doc/1
{
  "profile": {
    "name": "Place1"
  },
  "inventory": [
    {"equipment":"guitar", "price": 1000.00, "shopId":"1"},
    {"equipment":"guitar", "price": 200.00, "shopId":"2"},
    {"equipment":"guitar", "price": 1.0, "shopId":"4"}
  ]
}

等等
我需要按shopIds进行过滤,比如shopId 1和shopId 2。然后按文档聚合数据,因此对于高于shopId 1和shopId 2的平均Guitar价格的文档,为150。
然后我只想返回符合条件的文档和值,所以我说shopId 1 + shopId 2和平均价格>130。
我能够得到一些聚合工作,但它是跨所有返回的文档聚合,而不是每个文档。
下面给出了与搜索结果匹配的所有文档的平均价格

GET warehouse/_search
{
  "query": {
    "nested": {
      "path": "inventory",
      "query": {
        "bool": {
              "should": [
                {
                  "term": {
                    "inventory.shopId": "1"
                  }
                },
                {
                  "term": {
                    "inventory.shopId": "2"
                  }
                }
              ]
            }
      }
    }
  },
  "aggs": {
    "inventory": {
      "nested": {
        "path": "inventory"
      },
      "aggs": {
        "priceAgg": {
          "filter": {
            "bool": {
              "should": [
                {
                  "term": {
                    "inventory.shopId": "1"
                  }
                },
                {
                  "term": {
                    "inventory.shopId": "2"
                  }
                }
              ]
            }
          },
          "aggs": {
            "avg_price": {
              "avg": {
                "field": "inventory.price"
              }
            }
          }
        }
      }
    }
  }
}

结果:

"aggregations" : {
    "inventory" : {
      "doc_count" : 9,
      "priceAgg" : {
        "doc_count" : 3,
        "avg_price" : {
          "value" : 2000.0
        }
      }
    }

但我需要的是每个文档的标准的价格
较新版本的ElasticSearch具有运行时Map,但这在OpenSearch中不可用
另一个可能的选择是重新设计搜索文档,但我仍然不知道如何生成和过滤每个shopId的组合值

vojdkbi0

vojdkbi01#

您可以在嵌套的agg中使用子agg。

PUT test_warehouse/
{
  "mappings": {
  "properties": {
    "inventory": {
      "type": "nested",
      "properties": {
        "equipment": {
          "type": "keyword"
        },
        "price": {
          "type": "float"
        },
        "shopId": {
          "type": "keyword"
        }
      }
    },
    "profile": {
      "properties": {
        "name": {
          "type": "keyword"
        }
      }
    }
  }
  }
}
PUT test_warehouse/_doc/1?refresh
{
  "profile": {
    "name": "Place1"
  },
  "inventory": [
    {"equipment":"guitar", "price": 1000.00, "shopId":"1"},
    {"equipment":"guitar", "price": 200.00, "shopId":"2"},
    {"equipment":"guitar", "price": 1.0, "shopId":"4"},
    {"equipment":"clarinet", "price": 355, "shopId":"2"}
  ]
}
GET test_warehouse/_search
{
  "size": 0,
  "aggs": {
    "inventory": {
      "nested": {
        "path": "inventory"
      },
      "aggs": {
        "priceAgg": {
          "filter": {
            "bool": {
              "should": [
                {
                  "term": {
                    "inventory.shopId": "1"
                  }
                },
                {
                  "term": {
                    "inventory.shopId": "2"
                  }
                }
              ]
            }
          },
          "aggs": {
            "NAME": {
              "terms": {
                "field": "inventory.equipment"
              },
              "aggs": {
                "NAME": {
                  "avg": {
                    "field": "inventory.price"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

输出:

{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": null,
    "hits": []
  },
  "aggregations": {
    "inventory": {
      "doc_count": 4,
      "priceAgg": {
        "doc_count": 3,
        "NAME": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "guitar",
              "doc_count": 2,
              "NAME": {
                "value": 600
              }
            },
            {
              "key": "clarinet",
              "doc_count": 1,
              "NAME": {
                "value": 355
              }
            }
          ]
        }
      }
    }
  }
}

相关问题