当前位置:Gxlcms > 数据库问题 > MonogoDB 查询小结

MonogoDB 查询小结

时间:2021-07-01 10:21:17 帮助过:21人阅读

"layout" : "rear mid-engine rear-wheel-drive layout", "name" : "Porsche Boxster", "productionYears" : [ ], "modelYears" : [ ], "bodyStyle" : "roadster", "assembly" : [ "Finland", "Germany", "Stuttgart", "Uusikaupunki" ], "class" : "sports car", "manufacturer" : "Porsche" }
def porsche_query():
    query = {manufacturer:Porsche}
    return query

例2:范围查询 (找出在二十一世纪建成的所有城市注意运算符 $gte,$lte)

 areaCode: [916],
 areaLand: 109271000.0,
 country: United States,
 elevation: 13.716,
 foundingDate: datetime.datetime(2000, 7, 1, 0, 0),
 governmentType: [Council\u2013manager government],
 homepage: [http://elkgrovecity.org/],
 isPartOf: [California, uSacramento County California],
 lat: 38.4383,
 leaderTitle: Chief Of Police,
 lon: -121.382,
 motto: Proud Heritage Bright Future,
 name: City of Elk Grove,
 population: 155937,
 postalCode: 95624 95757 95758 95759,
 timeZone: [Pacific Time Zone],
 utcOffset: [-7, -8]
def range_query():
#使用$gt,$lt来限定查询的条件的范围 query
= {foundingDate:{$gte:datetime(2001,1,1),$lt:datetime(2100,12,31)}} return query


    "layout" : "rear mid-engine rear-wheel-drive layout",
    "name" : "Porsche Boxster",
    "productionYears" : [ ],
    "modelYears" : [ ],
    "bodyStyle" : "roadster",
    "assembly" : [
    "class" : "sports car",
    "manufacturer" : "Porsche"
def in_query():
#使用$in来找出满足调节的集合 query
= {assembly:{$in:[Germany,England,Japan]}} return query

例4:点表示法 找出宽度大于 2.5 的所有汽车 

    "_id" : ObjectId("52fd438b5a98d65507d288cf"),
    "engine" : "Crawler-transporter__1",
    "dimensions" : {
        "width" : 34.7472,
        "length" : 39.9288,
        "weight" : 2721000
    "transmission" : "16 traction motors powered by four  generators",
    "modelYears" : [ ],
    "productionYears" : [ ],
    "manufacturer" : "Marion Power Shovel Company",
    "name" : "Crawler-transporter"
def dot_query():
#使用.来表示父节点中的子节点 query
= {dimensions.width:{$gt:2.5}} return query




    "_id" : ObjectId("5304e2e3cc9e684aa98bef97"),
    "text" : "First week of school is over :P",
    "in_reply_to_status_id" : null,
    "retweet_count" : null,
    "contributors" : null,
    "created_at" : "Thu Sep 02 18:11:25 +0000 2010",
    "geo" : null,
    "source" : "web",
    "coordinates" : null,
    "in_reply_to_screen_name" : null,
    "truncated" : false,
    "entities" : {
        "user_mentions" : [ ],
        "urls" : [ ],
        "hashtags" : [ ]
    "retweeted" : false,
    "place" : null,
    "user" : {
        "friends_count" : 145,
        "profile_sidebar_fill_color" : "E5507E",
        "location" : "Ireland :)",
        "verified" : false,
        "follow_request_sent" : null,
        "favourites_count" : 1,
        "profile_sidebar_border_color" : "CC3366",
        "profile_image_url" : "http://a1.twimg.com/profile_images/1107778717/phpkHoxzmAM_normal.jpg",
        "geo_enabled" : false,
        "created_at" : "Sun May 03 19:51:04 +0000 2009",
        "description" : "",
        "time_zone" : null,
        "url" : null,
        "screen_name" : "Catherinemull",
        "notifications" : null,
        "profile_background_color" : "FF6699",
        "listed_count" : 77,
        "lang" : "en",
        "profile_background_image_url" : "http://a3.twimg.com/profile_background_images/138228501/149174881-8cd806890274b828ed56598091c84e71_4c6fd4d8-full.jpg",
        "statuses_count" : 2475,
        "following" : null,
        "profile_text_color" : "362720",
        "protected" : false,
        "show_all_inline_media" : false,
        "profile_background_tile" : true,
        "name" : "Catherine Mullane",
        "contributors_enabled" : false,
        "profile_link_color" : "B40B43",
        "followers_count" : 169,
        "id" : 37486277,
        "profile_use_background_image" : true,
        "utc_offset" : null
    "favorited" : false,
    "in_reply_to_user_id" : null,
    "id" : NumberLong("22819398300")
def make_pipeline():
    pipeline = [
# 1.根据source进行分组,然后统计出每个分组的数量,放在count中
# 2.根据count字段降序排列
$group:{_id:$source, count:{$sum:1}}}, {$sort:{count:-1}} ] return pipeline

例6:找出巴西利亚时区的用户,哪些用户发推次数不低于 100 次,哪些用户的关注者数量最多

def make_pipeline():
pipeline = [ {$match:{user.time_zone:Brasilia, user.statuses_count:{$gte:100}}}, {$project:{followers:$user.followers_count, screen_name:$user.screen_name, tweets:$user.statuses_count}}, {$sort:{followers:-1}}, {$limit:1} ] return pipeline



    "_id" : ObjectId("52fe1d364b5ab856eea75ebc"),
    "elevation" : 1855,
    "name" : "Kud",
    "country" : "India",
    "lon" : 75.28,
    "lat" : 33.08,
    "isPartOf" : [
        "Jammu and Kashmir",
        "Udhampur district"
    "timeZone" : [
        "Indian Standard Time"
    "population" : 1140
def make_pipeline():
pipeline = [ {$match:{country:India}}, {$unwind:$isPartOf}, {$group:{_id:$isPartOf, count:{$sum:1}}}, {$sort:{count:-1}} ] return pipeline


def make_pipeline():
pipeline = [ {$group:{_id:$user.screen_name, tweet_texts:{$push:$text}, count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:5} ] return pipeline


def make_pipeline():
pipeline = [ {$match:{country:India}}, {$unwind:$isPartOf}, {$group:{_id:$isPartOf, avgp:{$avg:$population}}}, {$group:{_id:India Regional City Population avg, avg:{$avg:$avgp}}} ] return pipeline



1.仅处理 FIELDS 字典中作为键的字段,并返回清理后的值字典列表


  1.根据 FIELDS 字典中的映射更改字典的键




  5.如果“synonym”中存在值,应将其转换为数组(列表),方法是删掉“{}”字符,并根据“|” 拆分字符串。剩下的清理方式将由你自行决定,例如删除前缀“*”等。如果存在单数同义词,值应该依然是列表格式。    



[ { label: Argiope,
    uri: http://dbpedia.org/resource/Argiope_(spider),
    description: The genus Argiope includes rather large and spectacular spiders that often ...,
    name: Argiope,
    synonym: ["One", "Two"],
    classification: {
                      family: Orb-weaver spider,
                      class: Arachnid,
                      phylum: Arthropod,
                      order: Spider,
                      kingdom: Animal,
                      genus: None
  { label: ... , }, ...
import codecs
import csv
import json
import pprint
import re

DATAFILE = arachnid.csv
FIELDS ={rdf-schema#label: label,
         URI: uri,
         rdf-schema#comment: description,
         synonym: synonym,
         name: name,
         family_label: family,
         class_label: class,
         phylum_label: phylum,
         order_label: order,
         kingdom_label: kingdom,
         genus_label: genus}

def process_file(filename, fields):
    process_fields = fields.keys()
    data = []
    with open(filename, "r") as f:
        reader = csv.DictReader(f)
        for i in range(3):
            l = reader.next()
        for line in reader:
            # YOUR CODE HERE
            res = {}
            res[classification] = {}
            for field in process_fields:
                tmp_val = line[field].strip()
                new_key = FIELDS[field]
                if tmp_val == NULL:
                    tmp_val = None
                if field == rdf-schema#label:
                    tmp_val = re.sub(r\(.*\),‘‘,tmp_val).strip()
                if field == name and line[field] == NULL:
                    tmp_val = line[rdf-schema#label].strip()
                if field == synonym and tmp_val:
                    tmp_val = parse_array(line[field])
                if new_key in [kingdom,family,order,phylum,genus,class]:
                    res[classification][new_key] = tmp_val
                res[new_key] = tmp_val
    return data

def parse_array(v):
    if (v[0] == "{") and (v[-1] == "}"):
        v = v.lstrip("{")
        v = v.rstrip("}")
        v_array = v.split("|")
        v_array = [i.strip() for i in v_array]
        return v_array
    return [v]
def test():
    data = process_file(DATAFILE, FIELDS)
    print "Your first entry:"
    first_entry = {
        "synonym": None, 
        "name": "Argiope", 
        "classification": {
            "kingdom": "Animal", 
            "family": "Orb-weaver spider", 
            "order": "Spider", 
            "phylum": "Arthropod", 
            "genus": None, 
            "class": "Arachnid"
        "uri": "http://dbpedia.org/resource/Argiope_(spider)", 
        "label": "Argiope", 
        "description": "The genus Argiope includes rather large and spectacular spiders that often have a strikingly coloured abdomen. These spiders are distributed throughout the world. Most countries in tropical or temperate climates host one or more species that are similar in appearance. The etymology of the name is from a Greek name meaning silver-faced."

    assert len(data) == 76
    assert data[0] == first_entry
    assert data[17]["name"] == "Ogdenia"
    assert data[48]["label"] == "Hydrachnidiae"
    assert data[14]["synonym"] == ["Cyrene Peckham & Peckham"]

if __name__ == "__main__":


import json

def insert_data(data, db):


    arachnids = db.arachnid.insert(data)

if __name__ == "__main__":
    from pymongo import MongoClient
    client = MongoClient("mongodb://localhost:27017")
    db = client.examples

    with open(arachnid.json) as f:
        data = json.loads(f.read())
        insert_data(data, db)
        print db.arachnid.find_one()



    "_id" : ObjectId("52fe1d364b5ab856eea75ebc"),
    "elevation" : 1855,
    "name" : "Kud",
    "country" : "India",


