时间:2021-07-01 10:21:17 帮助过:4人阅读
#!/bin/sh # retention.sh # 每日凌晨定时执行,统计留存率。 # 需 mongo 客户端。 # 以下需更改为实际目录, 将在该目录下运行。 cd /home/jinq/retention/ # 以下地址应该改为 mongod 服务器地址。 MONGODB=192.168.8.9 mongo ${MONGODB} retention.js >> log.txt echo Mongo export retention result... mongoexport -h ${MONGODB} -d mydb -c retention.result --sort ‘{"value.date" : 1}‘ -f value.date,value.register,value.1,value.2,value.3,value.4,value.5,value.6,value.7,value.14,value.30 --type=csv -o retention_tmp.csv DATE=`date +%Y%m%d` FILE=retention_${DATE}.csv # csv替换列头 echo 日期,注册数,1日,2日,3日,4日,5日,6日,7日,14日,30日 > ${FILE} tail -n +2 retention_tmp.csv >> ${FILE} echo Done ${FILE}!
// 留存率统计脚本 // 参考文档:留存率统计.txt // Usage: // mongo my.mongo.host retention.js print(Date()); db = db.getSisterDB("mydb"); // use mydb var startDate = getStartDate(); var endDate = formatDate(new Date()); print("Calculating retention rate of [" + startDate + ", " + endDate + ")..."); if (startDate < endDate) { insertDefaultResult(startDate); calcRegisterCount(startDate); calcRetention(startDate); print(Date()); print("Done."); } else { print("Do nothing."); } // Internal functions. // 获取统计开始日期,之前的已经统计完成,无需重做。 // 返回字符串,格式:"2015-01-01" // 获取 retention.result 的最大 date + 1天, 仅须处理该天及以后的数据。 // 如果是初次运行,retention.result 为空,须读取 retention.register 的最早日期作为开始。 function getStartDate() { var lastResultDate = getLastResultDate(); if (null == lastResultDate) { return getFirstRegisterDate(); } // 加一天 return getNextDate(lastResultDate); } // 获取最早的 retention.register 日期。 function getFirstRegisterDate() { var cursor = db.retention.register.find( {date : {$gt : "2015-09-01"}}, // 除去 null {_id : 0, date : 1} ).sort({date : 1}).limit(1); if (cursor.hasNext()) { return cursor.next().date; } return formatDate(new Date()); } // 获取 retention.result 中最后的 date 字段。 // 无date字段则返回null。 // 正常返回如:"2015-01-01" function getLastResultDate() { // _id 为日期串 var cursor = db.retention.result.find( {}, {_id : 1}).sort({_id : -1}).limit(1); if (cursor.hasNext()) { return cursor.next()._id; } return null; } function add0(m) { return m < 10 ? ‘0‘ + m : m; } // Return likes: "2015-01-02" function formatDate(date) { var y = date.getFullYear(); var m = date.getMonth() + 1; // 1..12 var d = date.getDate(); return y + ‘-‘ + add0(m) + ‘-‘ + add0(d); } // "2015-12-31" -> "2016-01-01" function getNextDate(dateStr) { var dateObj = new Date(dateStr + " 00:00:00"); var nextDayTime = dateObj.getTime() + 24 * 3600 * 1000; var nextDate = new Date(nextDayTime); return formatDate(nextDate); } assert(getNextDate("2015-12-31") == "2016-01-01"); assert(getNextDate("2015-01-01") == "2015-01-02"); assert(getNextDate("2015-01-31") == "2015-02-01"); // 插入缺省结果。 // 某些天无新注册,mapreduce就不会生成该条结果,须强制插入。 function insertDefaultResult(startDateStr) { var docs = new Array(); var endDateStr = formatDate(new Date()); for (var dateStr = startDateStr; dateStr < endDateStr; dateStr = getNextDate(dateStr)) { docs.push({_id : dateStr, value : {date : dateStr, register : 0}}); } // for db.retention.result.insert(docs); } // 读取 retention.register 集合, // 计算每日新注册量, 记录于 retention.result.value.register 字段 // startDate is like: "2015-01-01" function calcRegisterCount(startDate) { var mapFunction = function() { var key = this.date; var value = {date : key, register : 1}; emit(key, value); }; // mapFunction var reduceFunction = function(key, values) { var reducedObject = {date : key, register : 0}; values.forEach( function(value) { reducedObject.register += value.register; } ) return reducedObject; }; // reduceFunction var endDate = formatDate(new Date()); db.retention.register.mapReduce(mapFunction, reduceFunction, { query: {date: {$gte: startDate, $lt: endDate}}, out: {merge: "retention.result"} } ); // mapReduce() } // function calcRegisterCount() // 读取 retention.login 集合, // 计算留存率,保存于 retention.result 集合。 // startDate is like: "2015-01-01" function calcRetention(startDate) { var mapFunction = function() { var key = this.register_date; var registerDateObj = new Date(this.register_date + " 00:00:00"); var loginDateObj = new Date(this.date + " 00:00:00"); var days = (loginDateObj - registerDateObj) / (24 * 3600 * 1000); var value = {date : key, register : 0}; var field = days + "_count"; // like: 1_count value[field] = 1; emit(key, value); }; // mapFunction var reduceFunction = function(key, values) { var reducedObject = {date : key, register : 0}; for (var i = 1; i <= 60; i++) { var field = i + "_count"; reducedObject[field] = 0; } values.forEach( function(value) { reducedObject.register += value.register; for (var i = 1; i <= 60; i++) { var field = i + "_count"; // like: 1_count var count = value[field]; if (null != count) { reducedObject[field] += count; } // if } // for } // function ) // values.forEach() return reducedObject; }; // reduceFunction() var finalizeFunction = function(key, reducedVal) { if (0 == reducedVal.register) return reducedVal; for (var i = 1; i <= 60; i++) { var field = i + "_count"; // 1_count var count = reducedVal[field]; reducedVal[String(i)] = count * 100 / reducedVal.register; } return reducedVal; }; // finalizeFunction var endDate = formatDate(new Date()); db.retention.login.mapReduce(mapFunction, reduceFunction, { query: {date: {$gte: startDate, $lt: endDate}}, out: {reduce: "retention.result"}, finalize: finalizeFunction, } ); // mapReduce() } // function calcRetention()
版权声明:本文为博主原创文章,未经博主允许不得转载。
MongoDb 用 mapreduce 统计留存率
标签: