当前位置:Gxlcms > 数据库问题 > (16)mongodb mapReduce分布式统计示例遇到的一个未解问题,求平均值不对,希望哪位大神给指点一下

(16)mongodb mapReduce分布式统计示例遇到的一个未解问题,求平均值不对,希望哪位大神给指点一下

时间:2021-07-01 10:21:17 帮助过:8人阅读

"_id" : "-100:15", "value" : 9 } { "_id" : "-105:-40", "value" : 1 } { "_id" : "-105:10", "value" : 1 } { "_id" : "-105:15", "value" : 6 } { "_id" : "-105:5", "value" : 1 } { "_id" : "-10:-25", "value" : 1 } { "_id" : "-110:15", "value" : 4 } { "_id" : "-110:20", "value" : 1 } { "_id" : "-110:25", "value" : 1 } { "_id" : "-115:-30", "value" : 1 } { "_id" : "-115:-35", "value" : 2 } { "_id" : "-115:20", "value" : 1 } { "_id" : "-115:25", "value" : 4 } { "_id" : "-120:30", "value" : 1 } { "_id" : "-120:35", "value" : 3 } { "_id" : "-125:-60", "value" : 1 } { "_id" : "-125:35", "value" : 1 } { "_id" : "-125:40", "value" : 2 } { "_id" : "-130:40", "value" : 2 } { "_id" : "-130:45", "value" : 4 } Type "it" for more mongos> it { "_id" : "-135:50", "value" : 8 } { "_id" : "-135:55", "value" : 2 } { "_id" : "-140:55", "value" : 3 } { "_id" : "-145:55", "value" : 1 } { "_id" : "-145:60", "value" : 1 } { "_id" : "-150:55", "value" : 1 } { "_id" : "-150:65", "value" : 1 } { "_id" : "-155:55", "value" : 6 } { "_id" : "-155:60", "value" : 2 } { "_id" : "-15:-20", "value" : 1 } { "_id" : "-15:-5", "value" : 1 } { "_id" : "-15:-60", "value" : 1 } { "_id" : "-15:70", "value" : 2 } { "_id" : "-160:15", "value" : 1 } { "_id" : "-160:55", "value" : 2 } { "_id" : "-160:65", "value" : 1 } { "_id" : "-165:50", "value" : 2 } { "_id" : "-170:50", "value" : 5 } { "_id" : "-175:-20", "value" : 16 } { "_id" : "-175:-25", "value" : 13 } Type "it" for more mongos> it { "_id" : "-175:50", "value" : 10 } { "_id" : "-180:-15", "value" : 2 } { "_id" : "-180:-20", "value" : 26 } { "_id" : "-180:-25", "value" : 23 } { "_id" : "-180:-30", "value" : 7 } { "_id" : "-180:-35", "value" : 22 } { "_id" : "-180:50", "value" : 9 } { "_id" : "-20:-40", "value" : 1 } { "_id" : "-20:-45", "value" : 1 } { "_id" : "-20:-5", "value" : 1 } { "_id" : "-20:-60", "value" : 1 } { "_id" : "-20:-65", "value" : 1 } { "_id" : "-25:-5", "value" : 1 } { "_id" : "-25:-60", "value" : 3 } { "_id" : "-30:-60", "value" : 15 } { "_id" : "-30:-65", "value" : 1 } { "_id" : "-35:-60", "value" : 1 } { "_id" : "-35:50", "value" : 1 } { "_id" : "-35:55", "value" : 2 } { "_id" : "-45:10", "value" : 1 } Type "it" for more View Code

  为了验证数据的正确性,可以将数据导入mysql中,以下面这三条数据为例子说明 mapReduce 的统计是正确的

  { "_id" : "-100:15", "value" : 9 }  select * from dzsj w WHERE  w.jing>=-100 and w.jing<-95 and w.wei>=15 and w.wei<20

  { "_id" : "-115:25", "value" : 4 }  select * from dzsj w WHERE w.jing>=-115 and w.jing<-110 and w.wei>=25 and w.wei<30

  { "_id" : "-155:55", "value" : 6 }  select * from dzsj w WHERE w.jing>=-155 and w.jing<-150 and w.wei>=55 and w.wei<60

5、在上面的基础上统计平均震级,这次只统计经纬度大于0的,这里出现了问题尚未解决。

  var map = function(){

    if(this.jing<0 || this.wei<0){
      return;
    }
    var j = Math.floor(this.jing / 5) * 5;
    var w = Math.floor(this.wei / 5) * 5;
    var block =j + ‘:‘ + w;
    emit(block,this.lev);
  }

  var reduce = function(block,values){
    return Array.avg(values);
  }

  db.dz.mapReduce(map,reduce,{out:‘res‘});

  执行结果如下:

技术图片
mongos> db.res.find().sort({value:-1});
{ "_id" : "65:25", "value" : 7.5 }
{ "_id" : "140:65", "value" : 7.3 }
{ "_id" : "60:25", "value" : 7.050000000000001 }
{ "_id" : "95:50", "value" : 7 }
{ "_id" : "140:25", "value" : 6.920833333333333 }
{ "_id" : "150:50", "value" : 6.85 }
{ "_id" : "25:40", "value" : 6.8 }
{ "_id" : "95:5", "value" : 6.8 }
{ "_id" : "125:10", "value" : 6.783333333333333 }
{ "_id" : "165:50", "value" : 6.733333333333333 }
{ "_id" : "90:20", "value" : 6.666666666666667 }
{ "_id" : "160:50", "value" : 6.645 }
{ "_id" : "175:50", "value" : 6.608333333333333 }
{ "_id" : "125:30", "value" : 6.6 }
{ "_id" : "145:0", "value" : 6.6 }
{ "_id" : "90:0", "value" : 6.5166666666666675 }
{ "_id" : "155:50", "value" : 6.4875 }
{ "_id" : "45:30", "value" : 6.47 }
{ "_id" : "140:10", "value" : 6.45 }
{ "_id" : "135:30", "value" : 6.445833333333333 }
Type "it" for more
mongos> it
{ "_id" : "140:15", "value" : 6.4 }
{ "_id" : "145:15", "value" : 6.4 }
{ "_id" : "145:5", "value" : 6.4 }
{ "_id" : "135:35", "value" : 6.35 }
{ "_id" : "140:20", "value" : 6.300000000000001 }
{ "_id" : "95:15", "value" : 6.300000000000001 }
{ "_id" : "165:55", "value" : 6.3 }
{ "_id" : "160:55", "value" : 6.254166666666666 }
{ "_id" : "140:40", "value" : 6.239583333333333 }
{ "_id" : "125:5", "value" : 6.222916666666666 }
{ "_id" : "125:0", "value" : 6.217499999999999 }
{ "_id" : "5:70", "value" : 6.2 }
{ "_id" : "65:40", "value" : 6.2 }
{ "_id" : "155:45", "value" : 6.1899999999999995 }
{ "_id" : "120:10", "value" : 6.185714285714285 }
{ "_id" : "145:45", "value" : 6.175000000000001 }
{ "_id" : "170:50", "value" : 6.166666666666666 }
{ "_id" : "25:35", "value" : 6.154166666666667 }
{ "_id" : "120:0", "value" : 6.15 }
{ "_id" : "135:25", "value" : 6.15 }
Type "it" for more
mongos> 
View Code

 拿出两个数据来对比,发现并不是我们要的结果:

  { "_id" : "140:20", "value" : 6.300000000000001}  

  select AVG(lev) from dzsj w WHERE  w.jing>=140 and w.jing<145 and w.wei>=20 and w.wei<25  计算结果是 6.333333333333333 

  { "_id" : "145:45", "value" : 6.175000000000001 }

  select * from dzsj w WHERE  w.jing>=145 and w.jing<150 and w.wei>=45 and w.wei<50  计算结果是 6.08

  { "_id" : "160:55", "value" : 6.114285714285715 }

  select AVG(lev) from dzsj w WHERE  w.jing>=160 and w.jing<165 and w.wei>=55 and w.wei<60  计算结果是 6.050000000000001

我们先求一下和,执行以下代码:

技术图片
var map = function(){
  if(this.jing<0 || this.wei<0){
     return;
  } 
  var j = Math.floor(this.jing / 5) * 5;
  var w = Math.floor(this.wei / 5) * 5;
  var block =j + : + w;
  emit(block,this.lev);
}

var map = function(){
  var j = Math.floor(this.jing / 5) * 5;
  var w = Math.floor(this.wei / 5) * 5;
  var block =j + : + w;
  emit(block,this.lev);
}

var reduce = function(block,values){
  return Array.sum(values);
}

db.dz.mapReduce(map,reduce,{out:res});
View Code

查看一下部分结果:

技术图片
mongos> db.res.find({_id:140:20});
{ "_id" : "140:20", "value" : 19 }
mongos> db.res.find({_id:145:45});
{ "_id" : "145:45", "value" : 30.400000000000002 }
mongos> db.res.find({_id:160:55});
{ "_id" : "160:55", "value" : 48.400000000000006 }
mongos> 
View Code

与mysql中对比一下,发现求和是一样的。

  select SUM(lev) from dzsj w WHERE w.jing>=140 and w.jing<145 and w.wei>=20 and w.wei<25  19

  select SUM(lev) from dzsj w WHERE  w.jing>=145 and w.jing<150 and w.wei>=45 and w.wei<50  30.400000000000002

  select SUM(lev) from dzsj w WHERE  w.jing>=160 and w.jing<165 and w.wei>=55 and w.wei<60  48.400000000000006

在对比一下前面求的数量,发现数量也是一样的。

  { "_id" : "140:20", "value" : 3 }
  { "_id" : "145:45", "value" : 5 }
  { "_id" : "160:55", "value" : 8 }

  select count(1) from dzsj w WHERE w.jing>=140 and w.jing<145 and w.wei>=20 and w.wei<25  3
  select count(1) from dzsj w WHERE w.jing>=145 and w.jing<150 and w.wei>=45 and w.wei<50  5
  select count(1) from dzsj w WHERE w.jing>=160 and w.jing<165 and w.wei>=55 and w.wei<60  8

这就奇怪了,求和一样,求数量也一样,计算出的平均值不一样,哪位好心人能发现问题希望能指点一二,谢谢!

  

  

 

(16)mongodb mapReduce分布式统计示例遇到的一个未解问题,求平均值不对,希望哪位大神给指点一下

标签:click   分布式   mysql   color   UNC   函数   均值   enables   block   

人气教程排行