当前位置:Gxlcms > 数据库问题 > MonogoDB 练习一

MonogoDB 练习一

时间:2021-07-01 10:21:17 帮助过:14人阅读

label: Argiope, uri: http://dbpedia.org/resource/Argiope_(spider), description: The genus Argiope includes rather large and spectacular spiders that often ..., name: Argiope, synonym: ["One", "Two"], classification: { family: Orb-weaver spider, class: Arachnid, phylum: Arthropod, order: Spider, kingdom: Animal, genus: None } }, { label: ... , }, ... ]
import codecs
import csv
import json
import pprint
import re

DATAFILE = arachnid.csv
FIELDS ={rdf-schema#label: label,
         URI: uri,
         rdf-schema#comment: description,
         synonym: synonym,
         name: name,
         family_label: family,
         class_label: class,
         phylum_label: phylum,
         order_label: order,
         kingdom_label: kingdom,
         genus_label: genus}


def process_file(filename, fields):
  #获取FIELDS字典的keys列表
    process_fields = fields.keys()
#存放结果集 data
= [] with open(filename, "r") as f: reader = csv.DictReader(f)
     #跳过文件中的前3行
for i in range(3): l = reader.next()      #读文件 for line in reader: # YOUR CODE HERE
#存放总的字典
res = {}
#存放key是classification的子字典 res[
classification] = {}
#循环FIELDS字典的keys
for field in process_fields:
#获取excel中key所对应的val,条件1 tmp_val
= line[field].strip()
#生成json数据的新key,即是FIELDS字典的value new_key
= FIELDS[field]
#条件4
if tmp_val == NULL: tmp_val = None
#条件2
if field == rdf-schema#label: tmp_val = re.sub(r\(.*\),‘‘,tmp_val).strip()
#条件3
if field == name and line[field] == NULL: tmp_val = line[rdf-schema#label].strip()
#条件5
if field == synonym and tmp_val: tmp_val = parse_array(line[field])
#子字典中所包含的的key
if new_key in [kingdom,family,order,phylum,genus,class]:
#子字典中所包含的的key的value res[
classification][new_key] = tmp_val continue
#将新的key和val放入到res中,然后加入到列表中返回
res[new_key] = tmp_val data.append(res) return data def parse_array(v):
#解析数组
#如果以{开头和}结尾,删除左右的{},并以|进行分割,最后去除每一个项的空格,返回
if (v[0] == "{") and (v[-1] == "}"): v = v.lstrip("{") v = v.rstrip("}") v_array = v.split("|") v_array = [i.strip() for i in v_array] return v_array return [v]
def test():
#测试函数,如果不出错,结果正确 data
= process_file(DATAFILE, FIELDS) print "Your first entry:" pprint.pprint(data[0]) first_entry = { "synonym": None, "name": "Argiope", "classification": { "kingdom": "Animal", "family": "Orb-weaver spider", "order": "Spider", "phylum": "Arthropod", "genus": None, "class": "Arachnid" }, "uri": "http://dbpedia.org/resource/Argiope_(spider)", "label": "Argiope", "description": "The genus Argiope includes rather large and spectacular spiders that often have a strikingly coloured abdomen. These spiders are distributed throughout the world. Most countries in tropical or temperate climates host one or more species that are similar in appearance. The etymology of the name is from a Greek name meaning silver-faced." } assert len(data) == 76 assert data[0] == first_entry assert data[17]["name"] == "Ogdenia" assert data[48]["label"] == "Hydrachnidiae" assert data[14]["synonym"] == ["Cyrene Peckham & Peckham"] if __name__ == "__main__": test()

 

MonogoDB 练习一

标签:out   左右   comm   括号   val   new   host   first   出错   

人气教程排行