我正在尝试为给定的过滤器计算集合中的文档数,使用countDocuments
最多使用1s per 100'000 documents
,这有点慢,而且没有考虑到如果多个用户每隔几秒到几分钟触发一次这个计数,对数据库的影响。
在底层countDocuments()
转换为aggregate
查询:
db.collection.aggregate([
{
"$match": {
"_id": {"$exists": true}
}
},
{
"$group": {
"_id": 1,
"n": {"$sum": 1}
}
}
])
在其上调用explain()
将产生:
{
explainVersion: '1',
stages: [
{
'$cursor': {
queryPlanner: {
namespace: 'collection',
indexFilterSet: false,
parsedQuery: { _id: { '$exists': true } },
queryHash: 'BA029CD5',
planCacheKey: '4D66BB31',
maxIndexedOrSolutionsReached: false,
maxIndexedAndSolutionsReached: false,
maxScansToExplodeReached: false,
winningPlan: {
stage: 'FETCH',
filter: { _id: { '$exists': true } },
inputStage: {
stage: 'IXSCAN',
keyPattern: { _id: 1 },
indexName: '_id_',
isMultiKey: false,
multiKeyPaths: { _id: [] },
isUnique: true,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: { _id: [ '[MinKey, MaxKey]' ] }
}
},
rejectedPlans: []
},
executionStats: {
executionSuccess: true,
nReturned: 614833,
executionTimeMillis: 6589,
totalKeysExamined: 614833,
totalDocsExamined: 614833,
executionStages: {
stage: 'FETCH',
filter: { _id: { '$exists': true } },
nReturned: 614833,
executionTimeMillisEstimate: 4219,
works: 614834,
advanced: 614833,
needTime: 0,
needYield: 0,
saveState: 668,
restoreState: 668,
isEOF: 1,
docsExamined: 614833,
alreadyHasObj: 0,
inputStage: {
stage: 'IXSCAN',
nReturned: 614833,
executionTimeMillisEstimate: 568,
works: 614834,
advanced: 614833,
needTime: 0,
needYield: 0,
saveState: 668,
restoreState: 668,
isEOF: 1,
keyPattern: { _id: 1 },
indexName: '_id_',
isMultiKey: false,
multiKeyPaths: { _id: [] },
isUnique: true,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: { _id: [ '[MinKey, MaxKey]' ] },
keysExamined: 614833,
seeks: 1,
dupsTested: 0,
dupsDropped: 0
}
},
allPlansExecution: []
}
},
nReturned: Long("614833"),
executionTimeMillisEstimate: Long("6510")
},
{
'$group': { _id: { '$const': 1 }, n: { '$sum': { '$const': 1 } } },
maxAccumulatorMemoryUsageBytes: { n: Long("80") },
totalOutputDataSizeBytes: Long("237"),
usedDisk: false,
spills: Long("0"),
nReturned: Long("1"),
executionTimeMillisEstimate: Long("6584")
}
],
command: {
aggregate: 'collection',
pipeline: [
{ '$match': { _id: { '$exists': true } } },
{ '$group': { _id: 1, n: { '$sum': 1 } } }
],
cursor: {},
'$db': 'db'
},
ok: 1,
'$clusterTime': {
clusterTime: Timestamp({ t: 1677593996, i: 1 }),
signature: {
hash: Binary(Buffer.from("0000000000000000000000000000000000000000", "hex"), 0),
keyId: Long("0")
}
},
operationTime: Timestamp({ t: 1677593996, i: 1 })
}
请注意,制胜策略由两个阶段组成:第一个FETCH
且仅在此IXSCAN
之后
使用find()
的查询不包括_id
字段,而是投影到索引字段(称为origin
)上,该查询将只针对IXSCAN
:
db.collection.find({}, {"_id":0, "origin": 1}).count()
将计算正确的结果,也可以使用filter
。.explain()
将产生:
{
explainVersion: '1',
queryPlanner: {
namespace: 'db.collection',
indexFilterSet: false,
parsedQuery: { origin: { '$eq': 'WF' } },
queryHash: '2428EDD1',
planCacheKey: '85C94249',
maxIndexedOrSolutionsReached: false,
maxIndexedAndSolutionsReached: false,
maxScansToExplodeReached: false,
winningPlan: {
stage: 'PROJECTION_COVERED',
transformBy: { _id: 0, origin: 1 },
inputStage: {
stage: 'IXSCAN',
keyPattern: { origin: 1, 'metadata.iv': -1 },
indexName: 'originAscending',
isMultiKey: false,
multiKeyPaths: { origin: [], 'metadata.iv': [] },
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: {
origin: [ '["WF", "WF"]' ],
'metadata.iv': [ '[MaxKey, MinKey]' ]
}
}
},
rejectedPlans: [
{
stage: 'PROJECTION_COVERED',
transformBy: { _id: 0, origin: 1 },
inputStage: {
stage: 'IXSCAN',
keyPattern: { origin: -1, 'metadata.iv': -1 },
indexName: 'originDescending',
isMultiKey: false,
multiKeyPaths: { origin: [], 'metadata.iv': [] },
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: {
origin: [ '["WF", "WF"]' ],
'metadata.iv': [ '[MaxKey, MinKey]' ]
}
}
}
]
},
command: {
find: 'collection',
filter: { origin: 'WF' },
projection: { _id: 0, origin: 1 },
'$db': 'db'
},
ok: 1,
'$clusterTime': {
clusterTime: Timestamp({ t: 1677593816, i: 1 }),
signature: {
hash: Binary(Buffer.from("0000000000000000000000000000000000000000", "hex"), 0),
keyId: Long("0")
}
},
operationTime: Timestamp({ t: 1677593816, i: 1 })
}
这将只使用一个IXSCAN
阶段,并且执行速度要快得多。
我尝试在MongoDB Java驱动程序版本4.8.0中构建此查询,方法是编写以下代码(仍在进行中):
private int aggregate(InvoiceQuery query) {
var filter = new MongoInvoiceFilterCriteria(query.getFilter()).asBson();
var projection = Projections.fields(Projections.excludeId(), Projections.include("origin"));
var findPublisher =
this.collection.find(filter).projection(projection).explain();
var result = Flowable.fromPublisher(findPublisher).blockingFirst();
LOGGER.info("Aggregate " + result.toJson());
return 0;
}
这将产生以下结果:
{
"explainVersion": "1",
"queryPlanner": {
"namespace": "db.collection",
"indexFilterSet": false,
"parsedQuery": {
"_id": {
"$exists": true
}
},
"queryHash": "88DBAD21",
"planCacheKey": "D9A2E277",
"maxIndexedOrSolutionsReached": false,
"maxIndexedAndSolutionsReached": false,
"maxScansToExplodeReached": false,
"winningPlan": {
"stage": "PROJECTION_SIMPLE",
"transformBy": {
"_id": 0,
"origin": 1
},
"inputStage": {
"stage": "FETCH",
"filter": {
"_id": {
"$exists": true
}
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"_id": 1
},
"indexName": "_id_",
"isMultiKey": false,
"multiKeyPaths": {
"_id": []
},
"isUnique": true,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"_id": [
"[MinKey, MaxKey]"
]
}
}
}
},
"rejectedPlans": []
},
"executionStats": {
"executionSuccess": true,
"nReturned": 9533,
"executionTimeMillis": 18,
"totalKeysExamined": 9533,
"totalDocsExamined": 9533,
"executionStages": {
"stage": "PROJECTION_SIMPLE",
"nReturned": 9533,
"executionTimeMillisEstimate": 4,
"works": 9534,
"advanced": 9533,
"needTime": 0,
"needYield": 0,
"saveState": 9,
"restoreState": 9,
"isEOF": 1,
"transformBy": {
"_id": 0,
"origin": 1
},
"inputStage": {
"stage": "FETCH",
"filter": {
"_id": {
"$exists": true
}
},
"nReturned": 9533,
"executionTimeMillisEstimate": 4,
"works": 9534,
"advanced": 9533,
"needTime": 0,
"needYield": 0,
"saveState": 9,
"restoreState": 9,
"isEOF": 1,
"docsExamined": 9533,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 9533,
"executionTimeMillisEstimate": 0,
"works": 9534,
"advanced": 9533,
"needTime": 0,
"needYield": 0,
"saveState": 9,
"restoreState": 9,
"isEOF": 1,
"keyPattern": {
"_id": 1
},
"indexName": "_id_",
"isMultiKey": false,
"multiKeyPaths": {
"_id": []
},
"isUnique": true,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"_id": [
"[MinKey, MaxKey]"
]
},
"keysExamined": 9533,
"seeks": 1,
"dupsTested": 0,
"dupsDropped": 0
}
}
}
},
"command": {
"find": "collection",
"filter": {
"_id": {
"$exists": true
}
},
"projection": {
"_id": 0,
"origin": 1
},
"$db": "db"
},
"ok": 1.0,
"$clusterTime": {
"clusterTime": {
"$timestamp": {
"t": 1677769219,
"i": 1
}
},
"signature": {
"hash": {
"$binary": {
"base64": "AAAAAAAAAAAAAAAAAAAAAAAAAAA=",
"subType": "00"
}
},
"keyId": 0
}
},
"operationTime": {
"$timestamp": {
"t": 1677769219,
"i": 1
}
}
}
因此,出于某种原因,mongo执行了一个FETCH
和一个IXSCAN
阶段,尽管据我所知,查询应该执行相同的阶段。
我考虑过使用documentCount()
、estimatedDocumentCount()
、不同的aggregates()
和find()
查询,但是我没有直接在mongosh上找到任何一个查询像db.collection.find({}, {"_id":0, "origin": 1}).count()
一样高效。
我怎样才能提高java中文档计数的性能?
编辑@诺埃尔提供的答案:
// build the aggregation pipeline
List<Bson> pipeline = Arrays.asList(
Aggregates.match(Filters.gte("_id", new MinKey())),
Aggregates.group("$1", Accumulators.sum("n", 1)));
// explain the execution stats of the aggregation pipeline
var findPublisher =
this.collection.aggregate(pipeline).explain();
它也有一个FETCH
步骤:
{
"explainVersion": "2",
"queryPlanner": {
"namespace": "db.collection",
"indexFilterSet": false,
"parsedQuery": {
"_id": {
"$gte": {
"$minKey": 1
}
}
},
"queryHash": "D1046F5E",
"planCacheKey": "7E518BFB",
"optimizedPipeline": true,
"maxIndexedOrSolutionsReached": false,
"maxIndexedAndSolutionsReached": false,
"maxScansToExplodeReached": false,
"winningPlan": {
"queryPlan": {
"stage": "GROUP",
"planNodeId": 3,
"inputStage": {
"stage": "FETCH",
"planNodeId": 2,
"inputStage": {
"stage": "IXSCAN",
"planNodeId": 1,
"keyPattern": {
"_id": 1
},
"indexName": "_id_",
"isMultiKey": false,
"multiKeyPaths": {
"_id": []
},
"isUnique": true,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"_id": [
"[MinKey, MaxKey]"
]
}
}
}
},
"slotBasedPlan": {
}
},
"rejectedPlans": []
},
"executionStats": {
},
"command": {
"aggregate": "collection",
"pipeline": [
{
"$match": {
"_id": {
"$gte": {
"$minKey": 1
}
}
}
},
{
"$group": {
"_id": "$1",
"n": {
"$sum": 1
}
}
}
],
"cursor": {
"batchSize": 1
},
"$db": "db"
}
}
1条答案
按热度按时间chhkpiq41#
我认为你应该把COUNT_SCAN作为获胜的计划。这个查询使用索引on_id来计数。
java :
统计数据: