4

I have total 1644662 records in one of my collections, I have created index on one of my column named "document_type_id". When I run a query

db.getCollection('my_collection').find({"document_type_id": {"$in": ["7"]}}).count()

It returns count "753800" in 0.285 secs. And When I run a query

db.getCollection('my_collection').find({"document_type_id": {"$in": ["3"]}}).count()

It returns count "819438" in 0.365 secs. Both queries working perfect. But problem comes when I run query by combining both

db.getCollection('my_collection').find({"document_type_id": {"$in": ["3", "7"]}}).count()

This query takes 54 secs to return the count. Execution Plan for third query is as follows:

{
"queryPlanner" : {
    "plannerVersion" : 1,
    "namespace" : "test.my_collection",
    "indexFilterSet" : false,
    "parsedQuery" : {
        "document_type_id" : {
            "$in" : [ 
                "3", 
                "7"
            ]
        }
    },
    "winningPlan" : {
        "stage" : "COUNT",
        "inputStage" : {
            "stage" : "FETCH",
            "inputStage" : {
                "stage" : "IXSCAN",
                "keyPattern" : {
                    "document_type_id" : 1.0
                },
                "indexName" : "document_type_id_1",
                "isMultiKey" : false,
                "multiKeyPaths" : {
                    "document_type_id" : []
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "forward",
                "indexBounds" : {
                    "document_type_id" : [ 
                        "[\"3\", \"3\"]", 
                        "[\"7\", \"7\"]"
                    ]
                }
            }
        }
    },
    "rejectedPlans" : []
},
"executionStats" : {
    "executionSuccess" : true,
    "nReturned" : 0,
    "executionTimeMillis" : 74166,
    "totalKeysExamined" : 1573238,
    "totalDocsExamined" : 1573238,
    "executionStages" : {
        "stage" : "COUNT",
        "nReturned" : 0,
        "executionTimeMillisEstimate" : 73504,
        "works" : 1573239,
        "advanced" : 0,
        "needTime" : 1573238,
        "needYield" : 0,
        "saveState" : 14007,
        "restoreState" : 14007,
        "isEOF" : 1,
        "invalidates" : 0,
        "nCounted" : 1573238,
        "nSkipped" : 0,
        "inputStage" : {
            "stage" : "FETCH",
            "nReturned" : 1573238,
            "executionTimeMillisEstimate" : 73253,
            "works" : 1573239,
            "advanced" : 1573238,
            "needTime" : 0,
            "needYield" : 0,
            "saveState" : 14007,
            "restoreState" : 14007,
            "isEOF" : 1,
            "invalidates" : 0,
            "docsExamined" : 1573238,
            "alreadyHasObj" : 0,
            "inputStage" : {
                "stage" : "IXSCAN",
                "nReturned" : 1573238,
                "executionTimeMillisEstimate" : 2729,
                "works" : 1573239,
                "advanced" : 1573238,
                "needTime" : 0,
                "needYield" : 0,
                "saveState" : 14007,
                "restoreState" : 14007,
                "isEOF" : 1,
                "invalidates" : 0,
                "keyPattern" : {
                    "document_type_id" : 1.0
                },
                "indexName" : "document_type_id_1",
                "isMultiKey" : false,
                "multiKeyPaths" : {
                    "document_type_id" : []
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "forward",
                "indexBounds" : {
                    "document_type_id" : [ 
                        "[\"3\", \"3\"]", 
                        "[\"7\", \"7\"]"
                    ]
                },
                "keysExamined" : 1573238,
                "seeks" : 1,
                "dupsTested" : 0,
                "dupsDropped" : 0,
                "seenInvalidated" : 0
            }
        }
    }
},
"serverInfo" : {
    "host" : "nb-fara",
    "port" : 27017,
    "version" : "3.4.9",
    "gitVersion" : "876ebee8c7dd0e2d992f36a848ff4dc50ee6603e"
},
"ok" : 1.0

}

4
  • 1) are the values of document_type_id numeric in string ? 2) you have FETCH stage in your explain output, what's your actual query that generated the explain ? 3) what's your MongoDB deployment topology ? standalone, replicaset or sharded cluster. Commented Jan 22, 2018 at 23:25
  • 1) yes 2)db.getCollection('my_collection').explain('executionStats').find({"document_type_id": {"$in": ["3", "7"]}}).count() 3) I'm testing on my local machine laptop. Commented Jan 23, 2018 at 3:13
  • 1) What is the size of your laptop memory ? 2) What is the average size of a document ? Object.bsonsize(db. my_collection.findOne()) Commented Jan 24, 2018 at 5:57
  • 1) Laptop has 16GB RAM, 1TB harddisk. Size of a document is 339667, returned by Object.bsonsize. Commented Jan 24, 2018 at 6:21

5 Answers 5

4
+50

You should use

db.collection.count({...})

instead of

db.collection.find({...}).count()

because count() can directly take a query as parameter, see db.collection.count() for details

So your query would be:

db.getCollection('my_collection').count({"document_type_id": {"$in": ["3", "7"]}})

Here is the explain of this query (finished in ~0.202s on my machine with MongoDB 3.6.2):

{
    "queryPlanner" : {
        "plannerVersion" : 1,
        "namespace" : "test.test",
        "indexFilterSet" : false,
        "parsedQuery" : {
            "document_type_id" : {
                "$in" : [
                    "3",
                    "7"
                ]
            }
        },
        "winningPlan" : {
            "stage" : "COUNT",
            "inputStage" : {
                "stage" : "IXSCAN",
                "keyPattern" : {
                    "document_type_id" : 1
                },
                "indexName" : "document_type_id_1",
                "isMultiKey" : false,
                "multiKeyPaths" : {
                    "document_type_id" : [ ]
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "forward",
                "indexBounds" : {
                    "document_type_id" : [
                        "[\"3\", \"3\"]",
                        "[\"7\", \"7\"]"
                    ]
                }
            }
        },
        "rejectedPlans" : [ ]
    },
    "executionStats" : {
        "executionSuccess" : true,
        "nReturned" : 0,
        "executionTimeMillis" : 202,
        "totalKeysExamined" : 274112,
        "totalDocsExamined" : 0,
        "executionStages" : {
            "stage" : "COUNT",
            "nReturned" : 0,
            "executionTimeMillisEstimate" : 170,
            "works" : 274112,
            "advanced" : 0,
            "needTime" : 274111,
            "needYield" : 0,
            "saveState" : 2141,
            "restoreState" : 2141,
            "isEOF" : 1,
            "invalidates" : 0,
            "nCounted" : 274110,
            "nSkipped" : 0,
            "inputStage" : {
                "stage" : "IXSCAN",
                "nReturned" : 274110,
                "executionTimeMillisEstimate" : 140,
                "works" : 274112,
                "advanced" : 274110,
                "needTime" : 1,
                "needYield" : 0,
                "saveState" : 2141,
                "restoreState" : 2141,
                "isEOF" : 1,
                "invalidates" : 0,
                "keyPattern" : {
                    "document_type_id" : 1
                },
                "indexName" : "document_type_id_1",
                "isMultiKey" : false,
                "multiKeyPaths" : {
                    "document_type_id" : [ ]
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "forward",
                "indexBounds" : {
                    "document_type_id" : [
                        "[\"3\", \"3\"]",
                        "[\"7\", \"7\"]"
                    ]
                },
                "keysExamined" : 274112,
                "seeks" : 2,
                "dupsTested" : 0,
                "dupsDropped" : 0,
                "seenInvalidated" : 0
            }
        }
    },
    "serverInfo" : {
        "host" : "xxxxxxx",
        "port" : 27017,
        "version" : "3.6.2",
        "gitVersion" : "489d177dbd0f0420a8ca04d39fd78d0a2c539420"
    },
    "ok" : 1
}
Sign up to request clarification or add additional context in comments.

8 Comments

Thank you, But I got the 50 secs of execution time.
@fazalerabbi what version of mongodb are you using ?
Version is 3.4.9. You can also find in query explain.
What do you think, I need to update to version 3.6.2, or can you test this with 3.4.9?
@fazalerabbi just tested on Mongodb 3.4.2, query took ~0.5s... I guess there's something else going on here, but it's hard to know what exactly without the exact same data...
|
1
db.getCollection('my_collection').find({"document_type_id": {"$or": ["3", "7"]}}).count()

6 Comments

This $or will give you the same result and reduces the time to run the query
Thank you, But I am getting this error: 2018-01-20T12:50:31.673+0500 E QUERY [thread1] Error: count failed: { "ok" : 0, "errmsg" : "unknown operator: $or", "code" : 2, "codeName" : "BadValue" } :
That should be {"$or": [{"document_type_id": "3"}, {"document_type_id": "7"}]} - even if I really don't believe that that query is faster. Curious to see the results...
{"$or": [{"document_type_id": "3"}, {"document_type_id": "7"}]} yes this is the format i just forgot.I hope this is better when compared with $in
It has same execution time (54 secs).
|
0

With db version v3.0.7 getting below

{
    "queryPlanner" : {
        "plannerVersion" : 1,
        "namespace" : "test.test",
        "indexFilterSet" : false,
        "parsedQuery" : {
            "document_type_id" : {
                "$in" : [
                    "3",
                    "7"
                ]
            }
        },
        "winningPlan" : {
            "stage" : "COUNT",
            "inputStage" : {
                "stage" : "COLLSCAN",
                "filter" : {
                    "document_type_id" : {
                        "$in" : [
                            "3",
                            "7"
                        ]
                    }
                },
                "direction" : "forward"
            }
        },
        "rejectedPlans" : [ ]
    },
    "executionStats" : {
        "executionSuccess" : true,
        "nReturned" : 0,
        "executionTimeMillis" : 681,
        "totalKeysExamined" : 0,
        "totalDocsExamined" : 2000100,
        "executionStages" : {
            "stage" : "COUNT",
            "nReturned" : 0,
            "executionTimeMillisEstimate" : 630,
            "works" : 2000102,
            "advanced" : 0,
            "needTime" : 2000101,
            "needFetch" : 0,
            "saveState" : 15625,
            "restoreState" : 15625,
            "isEOF" : 1,
            "invalidates" : 0,
            "nCounted" : 400020,
            "nSkipped" : 0,
            "inputStage" : {
                "stage" : "COLLSCAN",
                "filter" : {
                    "document_type_id" : {
                        "$in" : [
                            "3",
                            "7"
                        ]
                    }
                },
                "nReturned" : 400020,
                "executionTimeMillisEstimate" : 600,
                "works" : 2000101,
                "advanced" : 400020,
                "needTime" : 1600081,
                "needFetch" : 0,
                "saveState" : 15625,
                "restoreState" : 15625,
                "isEOF" : 1,
                "invalidates" : 0,
                "direction" : "forward",
                "docsExamined" : 2000100
            }
        }
    },
    "serverInfo" : {
        "host" : "ptpll354",
        "port" : 5000,
        "version" : "3.0.7",
        "gitVersion" : "6ce7cbe8c6b899552dadd907604559806aa2e9bd"
    },
    "ok" : 1
}

1 Comment

Can u try it out in a fresh mongod instance
0

Looks like MongoDb is using the Mulikey Index but is only using it to lookup the Min/Max bounds due to it being a $in query, you can read more here - https://docs.mongodb.com/manual/core/multikey-index-bounds/#intersect-bounds-for-multikey-index

Changing it to a or should work

db.my_collection.count({"$or": [{"document_type_id": "3"}, {"document_type_id": "7"}]})

1 Comment

Document type id isn't multiple key, it has single value, either 7 or 3
0

Have you tried the aggregate framework?

db.my_collection.aggregate( [
   { $project : { "document_type_id" : 1 }},
   { $match : { "document_type_id": { "$in": ["3", "7"] } } },
   { $group : { _id : '$document_type_id', count: { $sum: 1} } }
])

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.