1

I have the following Product Schema: (partial, using mongoose)

 attributes: [
      {
        set: {
          ref: 'AttributeSet',
          type: Schema.Types.ObjectId
        },
        items: [
          {
            attribute: {
              ref: 'Attributes',
              type: Schema.Types.ObjectId
            },
            values: [
              {
                ref: 'AttributeValues',
                type: Schema.Types.ObjectId
              }
            ]
          }
        ],
        _id: 0
      }
    ],

example document 1: (partial)

"attributes" : [
        {
            "set" : ObjectId("5ccc079c846de44116182890"),
            "items" : [
                {
                    "values" : [
                        ObjectId("5ccc0900846de441161828a1")
                    ],
                    "_id" : ObjectId("5dee638d72fa520f53d0d1c4"),
                    "attribute" : ObjectId("5ccc0900846de441161828a0")
                },
                {
                    "values" : [
                        ObjectId("5ccc0a51846de441161828cc")
                    ],
                    "_id" : ObjectId("5dee638d72fa520f53d0d1c3"),
                    "attribute" : ObjectId("5ccc0a51846de441161828cb")
                },
                {
                    "values" : [
                        ObjectId("5ccc0c7d846de44116182906")
                    ],
                    "_id" : ObjectId("5dee638d72fa520f53d0d1c2"),
                    "attribute" : ObjectId("5ccc0c7d846de44116182904")
                },
                {
                    "values" : [
                        ObjectId("5ccc0d64846de44116182911")
                    ],
                    "_id" : ObjectId("5dee638d72fa520f53d0d1c1"),
                    "attribute" : ObjectId("5ccc0d64846de4411618290f")
                },
                {
                    "values" : [
                        ObjectId("5ccc079f846de44116182892")
                    ],
                    "_id" : ObjectId("5def6acf66910405e07e1e9f"),
                    "attribute" : ObjectId("5ccc079f846de44116182891")
                }
            ]
        }
    ]

example document 2: (partial)

"attributes" : [
        {
            "set" : ObjectId("5ccc079c846de44116182890"), 
            "items" : [
                {
                    "values" : [
                        ObjectId("5ccc079f846de44116182892")
                    ], 
                    "_id" : ObjectId("5dee635c72fa520f53d0d1c0"), 
                    "attribute" : ObjectId("5ccc079f846de44116182891")
                }, 
                {
                    "values" : [
                        ObjectId("5ccc0900846de441161828a2")
                    ], 
                    "_id" : ObjectId("5dee635c72fa520f53d0d1bf"), 
                    "attribute" : ObjectId("5ccc0900846de441161828a0")
                }, 
                {
                    "values" : [
                        ObjectId("5ccc0ea4846de44116182941")
                    ], 
                    "_id" : ObjectId("5dee635c72fa520f53d0d1be"), 
                    "attribute" : ObjectId("5ccc0ea4846de44116182940")
                }, 
                {
                    "values" : [
                        ObjectId("5ccc08ba846de4411618289c")
                    ], 
                    "_id" : ObjectId("5def56c537e877042d5abeb5"), 
                    "attribute" : ObjectId("5ccc08ba846de4411618289a")
                }, 
                {
                    "values" : [
                        ObjectId("5ccc09ca846de441161828aa"), 
                        ObjectId("5ccc09ca846de441161828a9")
                    ], 
                    "_id" : ObjectId("5def56c537e877042d5abeb4"), 
                    "attribute" : ObjectId("5ccc09ca846de441161828a7")
                }
            ]
        }
    ], 

I want to aggregate and find all products that have attributes and then group the attributes in the output.

Pipeline:

db.getCollection("products").aggregate(
  [
    { $unwind: "$attributes" },
    {
      $group: {
        _id: "$attributes",
        attributes: { $first: "$attributes.items" }
      }
    },
    { $unwind: "$attributes" },
    {
      $lookup: {
        from: "attributes",
        let: { attribute: "$attributes.attribute" },
        pipeline: [
          {
            $match: {
              $expr: {
                $eq: ["$_id", "$$attribute"]
              }
            }
          },
          { $project: { display_name: 1, _id: 1 } }
        ],
        as: "attrs"
      }
    },
    {
      $lookup: {
        from: "attributevalues",
        let: { attribute: "$attributes.values" },
        pipeline: [
          {
            $match: {
              $expr: {
                $in: ["$_id", "$$attribute"]
              }
            }
          }
        ],
        as: "values"
      }
    },
    { $project: { attrs: 1, values: 1, _id: 0 } },
    {
      $group: { _id: "$attrs", items: { $push: "$values" }, total: { $sum: 1 } }
    }
  ],
  {
    allowDiskUse: true
  }
);

Pipeline output:

[{ 
    "_id" : [
        {
            "_id" : ObjectId("5ccc079f846de44116182891"), 
            "display_name" : "Caliber (cal.)"
        }
    ], 
    "items" : [
        [
            {
                "_id" : ObjectId("5ccc079f846de44116182892"), 
                "sort_order" : NumberInt(0),
                "label" : "12", 
                "attribute_id" : ObjectId("5ccc079f846de44116182891")
            }
        ], 
        [
            {
                "_id" : ObjectId("5ccc079f846de44116182892"), 
                "sort_order" : NumberInt(0),
                "label" : "12", 
                "attribute_id" : ObjectId("5ccc079f846de44116182891")
            }
        ]
    ], 
    "total" : 2.0
},
{
    "_id" : [
        {
            "_id" : ObjectId("5ccc0900846de441161828a0"),
            "display_name" : "Mechanism"
        }
    ],
    "items" : [
        [
            {
                "_id" : ObjectId("5ccc0900846de441161828a2"),
                "sort_order" : NumberInt(1),
                "label" : "Inaction",
                "attribute_id" : ObjectId("5ccc0900846de441161828a0")
            }
        ],
        [
            {
                "_id" : ObjectId("5ccc0900846de441161828a1"),
                "sort_order" : NumberInt(0),
                "label" : "Gas",
                "attribute_id" : ObjectId("5ccc0900846de441161828a0")
            }
        ]
    ],
    "total" : 2.0
}]

Problem is that e.x. in 1st element in the array, I have a duplicate inside the items array.

This is the desired output:

[{
    "_id" : [
        {
            "_id" : ObjectId("5ccc079f846de44116182891"),
            "display_name" : "Caliber (cal.)"
        }
    ],
    "items" : [
        [
            {
                "_id" : ObjectId("5ccc079f846de44116182892"),
                "sort_order" : NumberInt(0),
                "label" : "12",
                "attribute_id" : ObjectId("5ccc079f846de44116182891"),
                "total": 'current _id total, in this case it should be 2'
            }
        ],
        ...other items goes below, grouped as above
    ]
}]

6
  • Please post the sample documents which can produce above output Commented Dec 10, 2019 at 11:13
  • I've updated the answer and included document info from 2 products. Thanks Commented Dec 10, 2019 at 11:21
  • Unclear... Please try to explain with which field you want to $group by... because I can see you have multiple attributes fields. One is main and other is inside attiributes.items and one more inside attributes.items.values Commented Dec 10, 2019 at 11:43
  • attributes.items.values is the one that must be grouped. Sorry for being unclear, it's a little bit complex schema... Commented Dec 10, 2019 at 11:47
  • have you considered that maybe this is not the best schema/structure for this data since you need such complex query pattern? Or is this sort of aggregation the exception and normally the use of data is very straight forward queries? Commented Dec 12, 2019 at 15:29

1 Answer 1

2
+50

Add the solution to group the values by attribute and values and count the occurrences followed by look up and pushing all the values for the attribute with their count.

db.products.aggregate(
[
   {"$unwind":"$attributes"},
   {"$unwind":"$attributes.items"},
   {"$replaceRoot":{"newRoot":"$attributes.items"}},
   {"$unwind":"$values"},
   {"$group":{
      "_id":{"attribute":"$attribute","values":"$values"},
      "total":{"$sum":1}
   }},
   {"$lookup":{
      "from":"attributes",
      "let":{"attribute":"$_id.attribute"},
      "pipeline":[
        {"$match":{"$expr":{"$eq":["$_id","$$attribute"]}}},
        {"$project":{"display_name":1,"_id":1}}],
      "as":"attrs"
   }},
   {"$lookup":{
      "from":"attributevalues",
      "localField":"_id.values",
      "foreignField":"_id",
      "as":"values"
   }},
   {"$unwind":"$values"},
   {"$addFields":{"values.total":"$total"}},
   {"$group":{
     "_id":{"$arrayElemAt":["$attrs", 0]},
     "values":{"$push":"$values"}
   }}
])

Use the below aggregation query. Use $addToSet to keep unique values.

db.products.aggregate(
 [
  {"$unwind":"$attributes"},
  {"$unwind":"$attributes.items"},
  {"$replaceRoot":{"newRoot":"$attributes.items"}},
  {"$unwind":"$values"},
  {"$group":{
     "_id":"$attribute",
     "values":{"$addToSet":"$values"},
     "total":{"$sum":1}
  }},
  {"$lookup":{
     "from":"attributes",
     "let":{"attribute":"$_id"},
     "pipeline":[
       {"$match":{"$expr":{"$eq":["$_id","$$attribute"]}}},
       {"$project":{"display_name":1,"_id":1}}],
     "as":"attrs"
  }},
  {"$addFields":{"attrs":{"$arrayElemAt":["$attrs", 0]}},
  {"$lookup":{
     "from":"attributevalues",
     "localField":"values",
     "foreignField":"_id",
     "as":"values"
   }}
])

Old answer

You could use below aggregation query. I tried to clean up your current query and change to group only by values field.

Something like

db.products.aggregate(
[
  {"$unwind":"$attributes"},
  {"$unwind":"$attributes.items"},
  {"$replaceRoot":{"newRoot":"$attributes.items"}},
  {"$unwind":"$values"},
  {"$group":{
    "_id":"$values",
    "items":{"$first":"$$ROOT"},
    "total":{"$sum":1}
  }},
  {"$lookup":{
    "from":"attributes",
    "let":{"attribute":"$items.attribute"},
    "pipeline":[
      {"$match":{"$expr":{"$eq":["$_id","$$attribute"]}}},
      {"$project":{"display_name":1,"_id":1}}],
    "as":"attrs"
  }},
  {"$lookup":{
     "from":"attributevalues",
     "localField":"items.values",
     "foreignField":"_id",
     "as":"values"
  }},
  {"$unwind":"$values"},
  {"$group":{
    "_id":{"$arrayElemAt":["$attrs", 0]},
    "values":{"$push":"$values"},
    "total":{"$first":"$total"}
  }},
  {"$addFields":{"_id":0, "attribute":"$_id"}}
])
Sign up to request clarification or add additional context in comments.

14 Comments

thanks for replying, I've made the following modifications: In last $project $project: { attribute: { $arrayElemAt: ['$attribute', 0] }, values: 1, total: 1, _id: 0 } since attribute is always 1 (so no array needed) and added total in order to bring the number of products that have the values. One issue tho, I still get duplicate attribute as seen in this screenshot: prnt.sc/q9t7l8
You are welcome. So there is possibility of duplicates in couple of places. one is values and other one is attribute. I added $group on value to filter duplicate values. duplicate attributes are from another value document. how do you want me to filter this out ?
I can add $group by attributes and collect all the values. Would that work for you ?
There should be only one attribute that has values, e.x. {attribute: {display_name: 'caliber'}, values: [{label: 12}, {label:'32'}]
great! please rename $attrs to attrs in $addFields and everything will be perfect! :) Thanks again for your help.
|

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.