# -*- coding:utf-8 -*-
2 # 存数据到mysql (只存了时间数字)
3 import pymysql
4 import csv
5 import datetime
6 import settings
7 from mysql
import db
8 import os,time
9 import pandas as pd
10 import numpy as np
11 import threading
12 from apscheduler.schedulers.blocking
import BlockingScheduler
13 from datetime
import date
14 import csv
15 import utils
16 # filename = ‘may_origin.csv‘
17 # tablename = ‘origin_data‘
18 class data_stored(object):
19 def __init__(self):
20 pass
21
22
23 def data_stored(self,filename,tablename):
24
25 csvfile = open(filename,
‘r‘)
26 dict_reader =
csv.DictReader(csvfile)
27 db.connect()
28 datas =
[]
29 freq =
0
30
31 for row
in dict_reader:
32 row =
dict(row)
33 if row[
‘‘]:
34 del(row[
‘‘])
35 #创建表
36 columns =
[]
37 for i
in row.keys():
38 columns.append(i)
39 if (db.is_table_exist(tablename=tablename,dbname=settings.database) ==None
and freq ==
0 ):
40 db.create_table(tablename=tablename,columns=
columns)
41 freq += 1
42 print(
"create is ok")
43 else:
44 pass
45 #插入数据
46 row[
‘time‘] = int(time.mktime(time.strptime(row[
‘time‘],
‘%Y-%m-%d %H:%M:%S‘)))
47 datas.append(row)
48 db.insert_mysql_with_json(tablename, datas)
49 print(
"insert is ok")
50 db.disconnect()
51
52 def data_to_csv(self,filename,tablename,starttime,endtime,readfile=None,sep=
None):
53
54 db.connect()
55 fieldNames, results =
db.find(tablename, starttime, endtime)
56
57
58 data =
[]
59
60 for fn
in fieldNames:
61 data.append(fn)
62 #文件不存在
63 if readfile ==
None:
64 csvfile = open(filename,
‘w‘)
65 writer = csv.writer(csvfile, dialect=(
"excel"))
66 # 插入列名
67 data_1 =
[]
68 data_1 = sorted(set(data), key=
data.index)
69 writer.writerow(data_1)
70 #文件存在
71 else:
72 csvfile = open(filename,
‘a‘)
73 writer = csv.writer(csvfile, dialect=(
"excel"))
74
75 # 插入data
76 for info
in results:
77 data_2 =
[]
78 # for m_2 in info:
79 # data_2.append(m_2)
80 writer.writerow(info)
81 csvfile.close()
82 db.disconnect()
83 # #
84 # if __name__ == "__main__":
85 # filename=‘may_origin.csv‘
86 # filename2=‘csvtest_05.csv‘
87 # tablename = ‘originData‘
88 # st = 1462032004
89 # et = 1462032007
90 #
91 # t = data_stored()
92 # starttime = datetime.datetime.now()
93 #
94 # # t.data_stored(filename,tablename)
95 # t.data_to_csv(filename2,tablename, st,et)
96 # endtime =datetime.datetime.now()
97 #
98 # print(endtime-starttime)
99 ds = data_stored()
View Code
其中调用了mysql.py的代码就不码了,就是一些连接、断开数据库connect()、disconnect(),创建数据库create_table(),判断tablename是否存在is_table_exist,以及查列名find_columns(),查找数据find(),还有插入数据库数据insert_into_mysql()。
1 import settings
2 from mysql import db
3 import os
4 import csv
5 import utils
6
7
8 def time_main(start_time, end_time, tablename, columns=None):
9 timespan = settings.timespan
10 output_filename = ‘sfhd_‘ + ‘_origin_‘ + utils.getDigitDay(start_time) + ‘.csv‘
11 db.connect()
12 # 判断输出文件是否存在 :False为不存在
13 if os.path.isfile(output_filename) == False:
14 pass
15 else:
16 end_time = start_time
17 start_time = start_time -86400
18 output_filename = ‘sfhd_‘ + ‘_origin_‘ + utils.getDigitDay(start_time) + ‘.csv‘
19
20
21 # 隔一个时间段timespan存一次
22 with open(output_filename, ‘w‘) as csvfile:
23
24 if columns == None:
25 columns = db.find_columns(tablename)
26 data = list(columns)
27
28 writer = csv.writer(csvfile, dialect=("excel"))
29 data_1 = sorted(set(data), key=data.index)
30 writer.writerow(data_1)
31
32 temp_time = start_time + timespan
33 current_time = start_time
34
35 while temp_time <= end_time+3:
36 utils.log_easy(‘time_main‘, utils.getTimeDes(temp_time))
37 fieldNames, results = db.find(tablename, current_time, temp_time-1, columns)
38 # 插入data
39 for info in results:
40 writer.writerow(info)
41 current_time = temp_time
42 temp_time = current_time + timespan
43 db.disconnect()
View Code
这是简化过后的mysql数据存到CSV文件中。
常见的mode取值组合
r或rt 默认模式,文本模式读
rb 二进制文件
w或wt 文本模式写,打开前文件存储被清空
wb 二进制写,文件存储同样被清空
a 追加模式,只能写在文件末尾
a+ 可读写模式,写只能写在文件末尾
w+ 可读写,与a+的区别是要清空文件内容
r+ 可读写,与a+的区别是可以写到文件任何位置
在选择读文件的方式时,我发现 with open 打开文件并存数据,要比直接open打开存数据要快。因为前者可以在存好数据后就自动关闭文件,并且可以很好处理上下文的异常,还有清理工作。
csv格式的数据存储到mysql
标签:list reader pass b2b tty dict schedule 处理 技术