Elasticsearch“AND”查询扁平化字段

scyqe7ek  于 2023-10-17  发布在  ElasticSearch
关注(0)|答案(1)|浏览(102)

我试图在一个flattened字段中的几个字段上执行must查询,该字段具有一个对象数组。我想在数组的一个元素上匹配所有条件,但它匹配数组中的任何元素。
我有一组文档,如下所示:

[
{
    "_source": {
    "metadata": {
        "name": "banana",
        "seasonalities": [
            {
                "date": "2023-01-01",
                "country": "AA"
            },
            {
                "date": "2023-01-22",
                "country": "BB"
            }
        ]
    }
},
{
    "_source": {
        "metadata": {
            "name": "potato",
            "seasonalities": [
                {
                    "date": "2023-01-01",
                    "country": "AA"
                },
                {
                    "date": "2023-01-01",
                    "country": "BB"
                },
                {
                    "date": "2023-01-01",
                    "country": "CC"
                },
                {
                    "date": "2023-01-22",
                    "country": "DD"
                },
                {
                    "date": "2023-01-23",
                    "country": "EE"
                }
            ]
        }
}
]

下面的查询工作并检索单个文档:

{
    "query": {
        "bool": {
            "filter": [
                {
                    "bool": {
                        "must": [
                            {
                                "term": {
                                    "metadata.seasonalities.date": {"value":"2023-01-22"}
                                }
                            },
                            {
                                "term": {
                                    "metadata.name": {"value":"banana"}
                                }
                            }
                        ]
                    }
                }
            ]
        }
    }
}

这个更简单的查询也可以工作:

{
    "query": {
        "bool": {
            "must": [
                {
                    "term": {
                        "metadata.seasonalities.date": "2023-01-22"
                    }
                },
                {
                    "term": {
                        "metadata.name": "banana"
                    }
                }
            ]
        }
    }
}

这些查询都像预期的那样返回一个值,但当我尝试这样做时:

{
    "query": {
        "bool": {
            "must": [
                {
                    "term": {
                        "metadata.seasonalities.date": "2023-01-22"
                    }
                },
                {
                    "term": {
                        "metadata.seasonalities.country": "BB"
                    }
                }
            ]
        }
    }
}

我得到了两个文件,而不仅仅是第一个。
下面是索引的Map:

"mappings": {
            "properties": {
                "metadata": {
                    "type": "flattened"
                }
            }
        },

当我用explain在我的实际索引中运行搜索时,我得到了这个(我还有一些字段和文档):

{
    "took": 7,
    "timed_out": false,
    "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 943,
            "relation": "eq"
        },
...
            {
                "_source": {
                    "metadata": {
                        "name": "XXX",
                        "seasonalities": [
                            {
                                "date": "2023-01-22",
                                "country": "MM",
                            },
                            {
                                "date": "2023-03-01",
                                "country": "BB",
                            },
...
                "_explanation": {
                    "value": 9.256371,
                    "description": "sum of:",
                    "details": [
                        {
                            "value": 5.61615,
                            "description": "weight(metadata._keyed:seasonalities.date\u00002023-01-22 in 1) [PerFieldSimilarity], result of:",
                            "details": [
                                {
                                    "value": 5.61615,
                                    "description": "score(freq=1.0), computed as boost * idf * tf from:",
                                    "details": [
                                        {
                                            "value": 2.2,
                                            "description": "boost",
                                            "details": []
                                        },
                                        {
                                            "value": 3.3873937,
                                            "description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
                                            "details": [
                                                {
                                                    "value": 177,
                                                    "description": "n, number of documents containing term",
                                                    "details": []
                                                },
                                                {
                                                    "value": 5251,
                                                    "description": "N, total number of documents with field",
                                                    "details": []
                                                }
                                            ]
                                        },
                                        {
                                            "value": 0.75361645,
                                            "description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
                                            "details": [
                                                {
                                                    "value": 1,
                                                    "description": "freq, occurrences of term within document",
                                                    "details": []
                                                },
                                                {
                                                    "value": 1.2,
                                                    "description": "k1, term saturation parameter",
                                                    "details": []
                                                },
                                                {
                                                    "value": 0.75,
                                                    "description": "b, length normalization parameter",
                                                    "details": []
                                                },
                                                {
                                                    "value": 1,
                                                    "description": "dl, length of field",
                                                    "details": []
                                                },
                                                {
                                                    "value": 33.413826,
                                                    "description": "avgdl, average length of field",
                                                    "details": []
                                                }
                                            ]
                                        }
                                    ]
                                }
                            ]
                        },
                        {
                            "value": 3.640221,
                            "description": "weight(metadata._keyed:seasonalities.country\u0000BB in 1) [PerFieldSimilarity], result of:",
                            "details": [
                                {
                                    "value": 3.640221,
                                    "description": "score(freq=1.0), computed as boost * idf * tf from:",
                                    "details": [
                                        {
                                            "value": 2.2,
                                            "description": "boost",
                                            "details": []
                                        },
                                        {
                                            "value": 2.1956074,
                                            "description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
                                            "details": [
                                                {
                                                    "value": 584,
                                                    "description": "n, number of documents containing term",
                                                    "details": []
                                                },
                                                {
                                                    "value": 5251,
                                                    "description": "N, total number of documents with field",
                                                    "details": []
                                                }
                                            ]
                                        },
                                        {
                                            "value": 0.75361645,
                                            "description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
                                            "details": [
                                                {
                                                    "value": 1,
                                                    "description": "freq, occurrences of term within document",
                                                    "details": []
                                                },
                                                {
                                                    "value": 1.2,
                                                    "description": "k1, term saturation parameter",
                                                    "details": []
                                                },
                                                {
                                                    "value": 0.75,
                                                    "description": "b, length normalization parameter",
                                                    "details": []
                                                },
                                                {
                                                    "value": 1,
                                                    "description": "dl, length of field",
                                                    "details": []
                                                },
                                                {
                                                    "value": 33.413826,
                                                    "description": "avgdl, average length of field",
                                                    "details": []
                                                }
                                            ]
                                        }
                                    ]
                                }
                            ]
                        }
                    ]
                }
            },

我得到了943个结果,但看看解释它确实说:2023年1月22日:177 n, number of documents containing term,然后对于BB:584 n, number of documents containing term在交叉点处,我希望只有一个或两个文档,我如何才能只得到这些文档?我尝试在查询中将term更改为match,但没有效果。我在ES 7.13.3。

k2fxgqgv

k2fxgqgv1#

当你需要匹配一个对象字段的两个字段时,你需要使用nested字段类型和查询。
您的Map应该如下所示:

{
  "mappings": {
    "properties": {
      "metadata": {
        "properties": {
          "name": {
            "type": "keyword"
          },
          "seasonalities": {
            "type": "nested",
            "properties": {
              "date": {
                "type": "date"
              },
              "country": {
                "type": "keyword"
              }
            }
          }
        }
      }
    }
  }
}

然后,第一个查询将像预期的那样仅返回第一个文档:

{
  "query": {
    "bool": {
      "filter": [
        {
          "nested": {
            "path": "metadata.seasonalities",
            "query": {
              "term": {
                "metadata.seasonalities.date": "2023-01-22"
              }
            }
          }
        },
        {
          "term": {
            "metadata.name": "banana"
          }
        }
      ]
    }
  }
}

当你对季节性对象有两个约束时,你可以这样做,以便也只返回第一个文档:

{
  "query": {
    "nested": {
      "path": "metadata.seasonalities",
      "query": {
        "bool": {
          "filter": [
            {
              "term": {
                "metadata.seasonalities.date": "2023-01-22"
              }
            },
            {
              "term": {
                "metadata.seasonalities.country": "BB"
              }
            }
          ]
        }
      }
    }
  }
}

相关问题