MongoDB aggregation framework – group with the sum of the nested document in arrays and count with filter

mongodb

MongoDB Aggregation Framework Query: $group, $project, $addFields with $reduce and sum.

Use Case: I have the multiple documents with nested documents array in the collection, need a result group by and the sum of each group item as cumulative volume. Also, have match parameter on year(date), if year matches then only that year document should group by and the sum of volume(nested document array) return.

Below are the documents in the collection:

{
    "_id": "1",
    "LSD": {
        "name": "TDL 05",
        "LSDNumber": "031"
    },
    "POD": [{           
            "Volume": 35.40,
            "VolUnit": "m3"
        },
        {           
            "Volume": 20.75,
            "VolUnit": "m3"
        },
        {
            "Volume": 15,
            "VolUnit": "m3"
        }
    ],
     "createdon": {
        "$date": "2014-08-02T18:49:17.000Z"
    }
},
{
    "_id": "2",
    "LSD": {
        "name": "Stock Watering",
        "LSDNumber": "01"
    },

    "POD": [{
            "Volume": 105,
            "VolUnit": "m3"
        },
        {
            "Volume": 70,
            "VolUnit": "m3"
        },
        {
            "Volume": 35,
            "VolUnit": "m3"
        }
    ],
     "createdon": {
        "$date": "2014-08-02T18:49:17.000Z"
    }
},
{
    "_id": "3",
    "LSD": {
        "name": "TDL 30 Stock Water",
        "LSDNumber": "030"
    },

    "POD": [{
        "Volume": 87,
        "VolUnit": "m3"
    }],
     "createdon": {
        "$date": "2019-08-02T18:49:17.000Z"
    }
},
{
    "_id": "4",
    "LSD": {
        "name": "TDL 30 Stock Water",
        "LSDNumber": "030"
    },
    "POD": [{
        "Volume": 25.12,
        "VolUnit": "m3"
    }],
     "createdon": {
        "$date": "2019-08-02T18:49:17.000Z"
    }
},
{
    "_id": "5",
    "LSD": {
        "name": "TDL 05",
        "LSDNumber": "031"
    },
    "POD": [
        {
            "Volume": 21,
            "VolUnit": "m3"
        }
    ],
     "createdon": {
        "$date": "2014-08-02T18:49:17.000Z"
    }
}

I have a query (C# Driver 2.0), group by "LSD.LSDNumber" and sum of "POD.Volume". No match parameter added here. This works fine.

Query:

{
    aggregate([{
        "$group": {
            "_id": "$LSD.LSDNumber",            
            "doc": {
                "$push": "$POD"
            },
            "data": {
                "$first": "$$ROOT"
            }
        }
    }, {
        "$addFields": {
            "LSDNumber": "$_id",            
            "GroupByDocCount": {
                "$size": "$doc"
            },
            "Cumulative": {
                "$reduce": {
                    "input": "$doc",
                    "initialValue": [],
                    "in": {
                        "$concatArrays": ["$$value", "$$this"]
                    }
                }
            }
        }
    }, {
        "$project": {
            "LSDNumber": 1,
            "GroupByDocCount": 1,           
            "CumulativeVol": {
                "$sum": "$Cumulative.Volume"
            }
        }
    }])
}

Below is the result.

{    
    "LSDNumber":"031",
    "GroupByDocCount": 2,
    "CumulativeVol": 92.15
},
{    
    "LSDNumber":"030",
    "GroupByDocCount": 2,
    "CumulativeVol": 112.12
},
{    
    "LSDNumber":"01",
    "GroupByDocCount": 1,
    "CumulativeVol": 210
}

However, I'd like to get the document match by year(on "createdon") date along with group by (LSD.LSDNumber) and the sum of volume (POD.Volume). For example, if the year is 2014 then bellow should be the result.

{    
    "LSDNumber":"031",
    "GroupByDocCount": 2,
    "CumulativeVol": 92.15,
    "Year": 2014
},
{    
    "LSDNumber":"01",
    "GroupByDocCount": 1,
    "CumulativeVol": 210,
    "Year": 2014
}

The query that I'm trying always returns nothing.

{
    aggregate([{
        "$project": {
            "LSDNumber": 1,
            "GroupByDocCount": 1,
            "CumulativeVol": {
                "$sum": "$Cumulative.Volume"
            },
            "year": {
                "$year": "$createdon"
            }
        }
    }, {
        "$match": {
            "year": 2014
        }
    }, {
        "$group": {
            "_id": "$LSD.LSDNumber",
            "year": {
                "$first": "$year"
            },
            "doc": {
                "$push": "$POD"
            },
            "data": {
                "$first": "$$ROOT"
            }
        }
    }, {
        "$addFields": {
            "LSDNumber": "$_id",
            "yearCreate": "$year",
            "GroupByDocCount": {
                "$size": "$doc"
            },
            "Cumulative": {
                "$reduce": {
                    "input": "$doc",
                    "initialValue": [],
                    "in": {
                        "$concatArrays": ["$$value", "$$this"]
                    }
                }
            }
        }
    }])
}

What's going wrong here. Any help would be appreciated!!

Best Answer

A bit late, but here's the answer. We just need to add one more project stage to the pipeline at the end-stage.

{
    aggregate([{
        "$project": {
            "LSDNumber": "$LSD.LSDNumber",
            "year": {
                "$year": "$createdon"
            },
            "PointOfDiversionVolumeDetails": 1
        }
    }, {
        "$match": {
            "year": 2014
        }
    }, {
        "$group": {
            "_id": "$LSDNumber",
            "doc": {
                "$push": "$PointOfDiversionVolumeDetails"
            }
        }
    }, {
        "$addFields": {
            "GroupByDocCount": {
                "$size": "$doc"
            },
            "Cumulative": {
                "$reduce": {
                    "input": "$doc",
                    "initialValue": [],
                    "in": {
                        "$concatArrays": ["$$value", "$$this"]
                    }
                }
            }
        }
    }, {
        "$project": {
            "CumulativeVol": {
                "$sum": "$Cumulative.Volume"
            },
            "LSDNumber": 1,
            "GroupByDocCount": 1
        }
    }, {
        "$sort": {
            "GroupByDocCount": -1
        }
    }])
}

Also, please take a look at alternative answer add Year variable in $addField pipeline.