1

I am querying for stock tickers that match the tickers.symbol array:

db.getCollection('reads').explain("executionStats").aggregate([
      { $match: { 'tickers.symbol': { $in: ['APPL'] } } },
      {
        $project: {
          content: 0,
          htm: 0,
        },
      },
      {
        $group: {
          _id: '$url',
          root: { $first: '$$ROOT' },
        },
      },
      { $sort: { 'root.createdAt': -1 } },
      { $limit: 50 },
      { $replaceRoot: { newRoot: '$root' } },
    ])

/* 1 */
{
    "stages" : [ 
        {
            "$cursor" : {
                "queryPlanner" : {
                    "plannerVersion" : 1,
                    "namespace" : "briskreader.reads",
                    "indexFilterSet" : false,
                    "parsedQuery" : {
                        "tickers.symbol" : {
                            "$eq" : "APPL"
                        }
                    },
                    "queryHash" : "301E9992",
                    "planCacheKey" : "97D88657",
                    "winningPlan" : {
                        "stage" : "FETCH",
                        "inputStage" : {
                            "stage" : "IXSCAN",
                            "keyPattern" : {
                                "tickers.symbol" : 1.0
                            },
                            "indexName" : "tickers.symbol_1",
                            "isMultiKey" : true,
                            "multiKeyPaths" : {
                                "tickers.symbol" : [ 
                                    "tickers"
                                ]
                            },
                            "isUnique" : false,
                            "isSparse" : false,
                            "isPartial" : false,
                            "indexVersion" : 2,
                            "direction" : "forward",
                            "indexBounds" : {
                                "tickers.symbol" : [ 
                                    "[\"APPL\", \"APPL\"]"
                                ]
                            }
                        }
                    },
                    "rejectedPlans" : [ 
                        {
                            "stage" : "FETCH",
                            "inputStage" : {
                                "stage" : "IXSCAN",
                                "keyPattern" : {
                                    "tickers.symbol" : 1.0,
                                    "createdAt" : -1.0
                                },
                                "indexName" : "tickers.symbol_1_createdAt_-1",
                                "isMultiKey" : true,
                                "multiKeyPaths" : {
                                    "tickers.symbol" : [ 
                                        "tickers"
                                    ],
                                    "createdAt" : []
                                },
                                "isUnique" : false,
                                "isSparse" : false,
                                "isPartial" : false,
                                "indexVersion" : 2,
                                "direction" : "forward",
                                "indexBounds" : {
                                    "tickers.symbol" : [ 
                                        "[\"APPL\", \"APPL\"]"
                                    ],
                                    "createdAt" : [ 
                                        "[MaxKey, MinKey]"
                                    ]
                                }
                            }
                        }
                    ]
                },
                "executionStats" : {
                    "executionSuccess" : true,
                    "nReturned" : 0,
                    "executionTimeMillis" : 118,
                    "totalKeysExamined" : 0,
                    "totalDocsExamined" : 0,
                    "executionStages" : {
                        "stage" : "FETCH",
                        "nReturned" : 0,
                        "executionTimeMillisEstimate" : 64,
                        "works" : 2,
                        "advanced" : 0,
                        "needTime" : 0,
                        "needYield" : 0,
                        "saveState" : 3,
                        "restoreState" : 3,
                        "isEOF" : 1,
                        "docsExamined" : 0,
                        "alreadyHasObj" : 0,
                        "inputStage" : {
                            "stage" : "IXSCAN",
                            "nReturned" : 0,
                            "executionTimeMillisEstimate" : 64,
                            "works" : 1,
                            "advanced" : 0,
                            "needTime" : 0,
                            "needYield" : 0,
                            "saveState" : 3,
                            "restoreState" : 3,
                            "isEOF" : 1,
                            "keyPattern" : {
                                "tickers.symbol" : 1.0
                            },
                            "indexName" : "tickers.symbol_1",
                            "isMultiKey" : true,
                            "multiKeyPaths" : {
                                "tickers.symbol" : [ 
                                    "tickers"
                                ]
                            },
                            "isUnique" : false,
                            "isSparse" : false,
                            "isPartial" : false,
                            "indexVersion" : 2,
                            "direction" : "forward",
                            "indexBounds" : {
                                "tickers.symbol" : [ 
                                    "[\"APPL\", \"APPL\"]"
                                ]
                            },
                            "keysExamined" : 0,
                            "seeks" : 1,
                            "dupsTested" : 0,
                            "dupsDropped" : 0
                        }
                    }
                }
            },
            "nReturned" : NumberLong(0),
            "executionTimeMillisEstimate" : NumberLong(0)
        }, 
        {
            "$project" : {
                "content" : false,
                "htm" : false
            },
            "nReturned" : NumberLong(0),
            "executionTimeMillisEstimate" : NumberLong(0)
        }, 
        {
            "$group" : {
                "_id" : "$url",
                "root" : {
                    "$first" : "$$ROOT"
                }
            },
            "nReturned" : NumberLong(0),
            "executionTimeMillisEstimate" : NumberLong(0)
        }, 
        {
            "$sort" : {
                "sortKey" : {
                    "root.createdAt" : -1
                },
                "limit" : NumberLong(50)
            },
            "nReturned" : NumberLong(0),
            "executionTimeMillisEstimate" : NumberLong(0)
        }, 
        {
            "$replaceRoot" : {
                "newRoot" : "$root"
            },
            "nReturned" : NumberLong(0),
            "executionTimeMillisEstimate" : NumberLong(0)
        }
    ],
    "serverInfo" : {
        "host" : "4da40b419852",
        "port" : 27017,
        "version" : "4.4.3",
        "gitVersion" : "913d6b62acfbb344dde1b116f4161360acd8fd13"
    },
    "ok" : 1.0
}

Here is an example doc:

/* 2 */
{
    "_id" : ObjectId("60073943ef54cc001c61ee7e"),
    "summary" : {
        "topics" : [ 
            "brain", 
            "virus", 
            "mice", 
            "kumar", 
            "lungs", 
            "covid19", 
            "disease", 
            "symptoms", 
            "severe", 
            "illness"
        ],
        "sentiment" : -0.0384615384615385,
        "words" : 605,
        "difficulty" : 0.766666666666667,
        "minutes" : 5
    },
    "views" : 1,
    "title" : "Study finds COVID-19 attack on brain, not lungs, triggers severe disease in mice: The findings have implications for understanding the wide range in symptoms and severity of illness among humans who are infected by SARS-CoV-2",
    "byline" : null,
    "dir" : null,
    "length" : 3333,
    "excerpt" : "Researchers have found that infecting the nasal passages of mice with the virus that causes COVID-19 led to a rapid, escalating attack on the brain that triggered severe illness, even after the lungs were successfully clearing themselves of the virus.",
    "siteName" : "ScienceDaily",
    "tickers" : [ 
        {
            "_id" : ObjectId("6007393aef54cc001c61ee7a"),
            "name" : "Visa Inc.",
            "symbol" : "V",
            "exchange" : "nyse"
        }
    ],
    "cryptos" : [],
    "meta" : {
        "title" : "Study finds COVID-19 attack on brain, not lungs, triggers severe disease in mice: The findings have implications for understanding the wide range in symptoms and severity of illness among humans who are infected by SARS-CoV-2 -- ScienceDaily",
        "thumb" : "https://www.sciencedaily.com/images/scidaily-icon.png"
    },
    "url" : "https://www.sciencedaily.com/releases/2021/01/210119114456.htm",
    "host" : "www.sciencedaily.com",
    "createdAt" : ISODate("2021-01-19T19:55:47.433Z"),
    "updatedAt" : ISODate("2021-01-19T19:55:47.433Z"),
    "shortId" : "9RVS9YmfYeY",
    "__v" : 0
}

The query works with small collections (dev) but in prod with 100k docs it timesout after 300 seconds.

5
  • move sort and limit before project Commented Jan 19, 2021 at 20:14
  • Maybe instead of root: { $first: '$$ROOT' } try createdAt: { $first: '$createdAt' }. Why fetching the entire document when you need only createdAt? Then you can also skip the $project stage Commented Jan 20, 2021 at 7:30
  • Another note, { $first: '$$ROOT' } (or { $first: '$createdAt' }) is not determined unless you put a $sort before. Maybe { $min: '$createdAt' } would be better if you need to get the first, i.e. earliest createdAt. Commented Jan 20, 2021 at 7:33
  • Add option {allowDiskUse: true} Commented Jan 20, 2021 at 7:53
  • I did. still hangs Commented Jan 20, 2021 at 23:58

1 Answer 1

1
  • As long as there is an index on createdAt, moving sort before project will do.

  • move project after limit

db.getCollection('reads').explain("executionStats").aggregate([
   { $match: { 'tickers.symbol': {$in:['APPL']} } },
   { $sort: { 'createdAt': -1 } },  
      {
        $group: {
          _id: '$url',
          root: { $first: '$$ROOT' },
        },
      },
      { $limit: 50 }, 
      {
        $project: {
          "root.content": 0,
          "root.htm": 0,
        },
      },
      { $replaceRoot: { newRoot: '$root' } },
    ])

$in

As it is written { 'tickers.symbol': {$in:['APPL'] }} should be written { 'tickers.symbol': 'APPL' }. But for many values, keep the original syntax.

Sign up to request clarification or add additional context in comments.

10 Comments

This will not return 50 documents. Due to $group it may return only one.
Yes, i'll move it back. I think the main issue is sort was after group and project though. The index being used is the one in $match only. @WernfriedDomscheit
does that match take an array? I have an array of tickers.
yes @chovy find will iterate by default. If you mean an array on the query (not in the database), then no. But that's not what you wrote on the query.
you lost me. there will be an array of stock tickers passed in. you changed from ['APPL'] to 'APPL'
|

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.