我有一个MongoDB聚合管道,我用C#写的。
$geoNear{
near: {
type: "Point",
coordinates: [-110.29665, 31.535699],
},
distanceField: "distance",
maxDistance: 100,
query: {
$and: [
{
IsResidential: true,
},
{
DaysSinceLastSale: {
$gt: 10,
},
},
],
},
spherical: true,
},
$lookup:
{
from: "tax_assessor",
let: {
propertyCity: "Chicago",
propertyState: "IL"
ownerName: "$OwnerName",
},
pipeline: [
{
$match: {
$expr: {
$and: [
{$eq: ["$PropertyCity", "$$propertyCity"]},
{$eq: ["$OwnerName", "$$ownerName"]},
{$eq: ["$PropertyState", "$$propertyState"]},
],
},
},
},
],
as: "NumberOfProperties",
},
$project:
{
_id: 0,
FullAddress: 1,
OwnerName: 1,
distance: 1,
YearBuilt: 1,
NumberOfProperties: {
$size: "$NumberOfProperties"
}
}
这里我需要的是类似于这个SQL的东西:
select res1.owner_name, res1.full_address, res1.distance, res1.year_built, count(res2.owner_name) as property_count from
(select * from properties where geolocation is <within a given range> and <some filters>) res1
left join
(select * from properties where city=<given city> and state=<given state>) res2
on res1.owner_name = res2.owner_name
group by res1.owner_name
order by res1.distance
我可以得到正确的结果,但这种聚合非常慢。
在检查执行计划时,我看到第一阶段- GeoNear使用了索引。但在第二阶段- lookup中,它没有使用任何索引。
"stages" : [
{
"$geoNearCursor" : {
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "tax_assessor",
"indexFilterSet" : false,
"parsedQuery" : {..},
"queryHash" : "4B38534E",
"planCacheKey" : "2328FDE9",
"winningPlan" : {
"stage" : "FETCH",
"filter" : {..},
"inputStage" : {
"stage" : "GEO_NEAR_2DSPHERE",
"keyPattern" : {
"PropertyGeoPoint" : "2dsphere",
"DaysSinceLastSale" : 1,
"IsResidential" : 1
},
"indexName" : "sta_geo_idx",
"indexVersion" : 2,
"inputStages" : [..]
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 2,
"executionTimeMillis" : 1911,
"totalKeysExamined" : 450,
"totalDocsExamined" : 552,
"executionStages" : {..},
}
},
"nReturned" : NumberLong(2),
"executionTimeMillisEstimate" : NumberLong(0)
},
{
"$lookup" : {
"from" : "tax_assessor",
"as" : "NumberOfProperties",
"let" : {
"propertyCity" : {
"$const" : "COLUMBUS"
},
"propertyState" : {
"$const" : "OH"
},
"ownerName" : "$OwnerName"
},
"pipeline" : [
{
"$match" : {
"$expr" : {
"$and" : [
{
"$eq" : [
"$PropertyCity",
"$$propertyCity"
]
},
{
"$eq" : [
"$PropertyState",
"$$propertyState"
]
},
{
"$eq" : [
"$OwnerName",
"$$ownerName"
]
}
]
}
}
}
]
},
"nReturned" : NumberLong(2),
"executionTimeMillisEstimate" : NumberLong(1910)
},
{
"$project" : {
"OwnerName" : true,
"FullAddress" : true,
"distance" : true,
"YearBuilt" : true,
"NumberOfProperties" : {
"$size" : [
"$NumberOfProperties"
]
},
"_id" : false
},
"nReturned" : NumberLong(2),
"executionTimeMillisEstimate" : NumberLong(1910)
},
{
"$sort" : {
"sortKey" : {
"Distance" : 1
}
},
"nReturned" : NumberLong(2),
"executionTimeMillisEstimate": NumberLong(1910)
}
]
基于上面的统计数据,$lookup是它不使用任何索引的原因。它是否通过重新排列阶段或应用适当的索引来优化结果?或者有更好的方法来获得NumberOfProperties而不使用$lookup。
1条答案
按热度按时间92dk7w1h1#
不妨试试这个: