天天看點

mongodb的group和aggregate問題記錄

1.group隻支援單機分組,不支援多分片叢集分組,這是個大坑

2.aggregate支援多分片叢集,新版本建議使用aggregate

3.資料模闆

{
    "_id": "2204000000429520201204000000000",
    "mp_id": "22040000004295",
    "create_time": ISODate("2020-12-04T16:10:03.627Z"),
    "data_day": NumberInt("20201204"),
    "data_time": ISODate("2021-03-11T01:29:55.095Z"),
    "gd_id": "125F58D3-B632-43B0-B6D8-1EA90977F33C",
    "mn": "20150724000002",
    "protocol": "0",
    "update_time": ISODate("2020-12-04T16:10:03.627Z"),
    "values": [
        {
            "cou_value": null,
            "flag": null,
            "avg_value": null,
            "mstatus": NumberInt("6"),
            "pollutant_code": "001",
            "min_value": null,
            "avg_zs": null,
            "estatus": NumberInt("0"),
            "astatus": NumberInt("0"),
            "outlet_standard": "20",
            "max_zs": null,
            "min_zs": null,
            "max_value": null,
            "sstatus": NumberInt("0")
        },
        {
            "cou_value": null,
            "flag": null,
            "avg_value": null,
            "mstatus": NumberInt("6"),
            "pollutant_code": "002",
            "min_value": null,
            "avg_zs": null,
            "estatus": NumberInt("0"),
            "astatus": NumberInt("0"),
            "outlet_standard": "80",
            "max_zs": null,
            "min_zs": null,
            "max_value": null,
            "sstatus": NumberInt("0")
        },
        {
            "cou_value": null,
            "flag": null,
            "avg_value": null,
            "mstatus": NumberInt("6"),
            "pollutant_code": "003",
            "min_value": null,
            "avg_zs": null,
            "estatus": NumberInt("0"),
            "astatus": NumberInt("0"),
            "outlet_standard": "250",
            "max_zs": null,
            "min_zs": null,
            "max_value": null,
            "sstatus": NumberInt("0")
        },
        {
            "cou_value": null,
            "flag": null,
            "avg_value": null,
            "mstatus": NumberInt("6"),
            "pollutant_code": "004",
            "min_value": null,
            "avg_zs": null,
            "estatus": NumberInt("0"),
            "astatus": NumberInt("0"),
            "outlet_standard": "80",
            "max_zs": null,
            "min_zs": null,
            "max_value": null,
            "sstatus": NumberInt("0")
        },
        {
            "cou_value": null,
            "flag": null,
            "avg_value": null,
            "mstatus": NumberInt("6"),
            "pollutant_code": "008",
            "min_value": null,
            "avg_zs": null,
            "estatus": NumberInt("0"),
            "astatus": NumberInt("0"),
            "outlet_standard": "50",
            "max_zs": null,
            "min_zs": null,
            "max_value": null,
            "sstatus": NumberInt("0")
        },
        {
            "pollutant_code": "B02",
            "cou_value": null,
            "min_value": null,
            "flag": null,
            "avg_value": null,
            "outlet_standard": null,
            "max_value": null
        }
    ]
}
           

4.group分組(cond篩選條件,key是分組的字段,1标示true按此字段分組, initial初始化字段值,reduce對分組資料進行循環處理,item代表分組的每行資料,out包含要輸出的字段,item.values.length代表子集的總數);

var start_time = ISODate("2021-03-10T16:05:46.186+0800");
var end_time = ISODate("2021-03-30T16:06:01.666+0800");
 
db.collection.group({
    cond: {
        "$and": [
            {
                "data_time": {
                    "$gte": start_time
                }
            },
            {
                "data_time": {
                    "$lte": end_time
                }
            }
        ]
    },
    key: {
        "mp_id": 1,
        "total": 1,
        "type": 1
    },
    initial: {
        "total": 0,
        "type": "day_air"
    },
    reduce: function Reduce(item, out) {
        if (item.values != null) {
            out.total += item.values.length
        }
    }
});
           

查詢結果如下

[
	{
		"mp_id" : "1101050031132015",
		"total" : 64,
		"type" : "day_air"
	},
	{
		"mp_id" : "13810000000645",
		"total" : 208,
		"type" : "day_air"
	},
	{
		"mp_id" : "13810000000665",
		"total" : 208,
		"type" : "day_air"
	},
	{
		"mp_id" : "138200000001115",
		"total" : 208,
		"type" : "day_air"
	},
	{
		"mp_id" : "14010600000115",
		"total" : 208,
		"type" : "day_air"
	}
]
           

5.aggregate管道聚合方式($match篩選條件,group為分組條件,id必選,可以為空或者Null,total則是自定義聚合字段,我這取的子集的size,$values代表子集,$addFields可以添加自定義的字段,out:輸出的指定文檔)。

var start_time = ISODate("2021-03-10T16:05:46.186+0800");
var end_time = ISODate("2021-03-30T16:06:01.666+0800");
 
db.collection.aggregate([{ $match : {"$and": [
            {
                "data_time": {
                    "$gte": start_time
                }
            },
            {
                "data_time": {
                    "$lte": end_time
                }
            }
        ]  }},
    {
        $group: {
            _id: "$mp_id",
            total: {
                $sum: {
                    $size: "$values"
                }
            }
        }
    },
    {
        $addFields: {
            "type": "day_air"
        }
    },{
			$out:"temp"
		}
]);
           

輸出結果為

mongodb的group和aggregate問題記錄