16.1 CSV文件格式
要在文本中存储数据,一个简单方式是将数据作为一系列以逗号分隔的值写入文件。这样的文件称为CSV文件。以下是一行CSV天气数据:
"USW00025333","SITKA AIRPORT, AK US","2018-07-01","0.25",,"62","50"
16.1.1 分析CSV文件头
import csv
filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
print(header_row)
16.1.2 打印文件头及其位置
import csv
filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
for index,column_header in enumerate(header_row):
print(index,column_header)
16.1.3 提取并读取数据
import csv
filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
# 从文件中获取最高温度
highs = []
for row in reader:
high = int(row[5])
highs.append(high)
print(highs)
16.1.4 绘制温度图标
import csv
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.style.use('ggplot')
filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
# 从文件中获取最高温度
highs = []
for row in reader:
high = int(row[5])
highs.append(high)
# 根据最高温度绘制图形
fig,ax = plt.subplots()
ax.plot(highs,c = 'red')
# 设置图形的格式
ax.set_title("2018年7月每日的最高温度",fontsize = 24)
ax.set_xlabel("",fontsize = 16)
ax.set_ylabel("温度(F)",fontsize=16)
ax.tick_params(axis="both",which="major",labelsize=16)
plt.show()
16.1.5 模块datetime
from datetime import datetime
first_date = datetime.strptime("2018-07-01","%Y-%m-%d")
print(first_date)
16.1.6 在图表中添加日期
from datetime import datetime
import csv
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.style.use('ggplot')
filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
# 从文件中获取日期和最高气温
dates,highs = [],[]
for row in reader:
current_date = datetime.strptime(row[2],'%Y-%m-%d')
high = int(row[5])
dates.append(current_date)
highs.append(high)
# 根据最高气温绘制图形
fig,ax = plt.subplots()
ax.plot(dates,highs,c = 'red')
# 设置图形的格式
ax.set_title('2018年7月每日最高温度',fontsize = 24)
ax.set_xlabel('',fontsize = 16)
fig.autofmt_xdate()
ax.set_ylabel("温度(F)",fontsize = 16)
ax.tick_params(axis='both',which='major',labelsize=16)
plt.show()
16.1.7 涵盖更长的时间
from datetime import datetime
import csv
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.style.use('ggplot')
filename = 'sitka_weather_2018_simple.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
# 从文件中获取日期和最高气温
dates,highs = [],[]
for row in reader:
current_date = datetime.strptime(row[2],'%Y-%m-%d')
high = int(row[5])
dates.append(current_date)
highs.append(high)
# 根据最高气温绘制图形
fig,ax = plt.subplots()
ax.plot(dates,highs,c = 'red')
# 设置图形的格式
ax.set_title('2018年每日最高温度',fontsize = 24)
ax.set_xlabel('',fontsize = 16)
fig.autofmt_xdate()
ax.set_ylabel("温度(F)",fontsize = 16)
ax.tick_params(axis='both',which='major',labelsize=16)
plt.show()
16.1.8 再绘制一个数据系列
from datetime import datetime
import csv
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.style.use('ggplot')
filename = 'sitka_weather_2018_simple.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
# 从文件中获取日期和最高气温和最低温度
dates,highs,lows = [],[],[]
for row in reader:
current_date = datetime.strptime(row[2],'%Y-%m-%d')
high = int(row[5])
low = int(row[6])
lows.append(low)
dates.append(current_date)
highs.append(high)
# 根据最高气温绘制图形
fig,ax = plt.subplots()
ax.plot(dates,highs,c = 'red')
ax.plot(dates,lows, c='blue')
# 设置图形的格式
ax.set_title('2018年每日最高和最低温度',fontsize = 24)
ax.set_xlabel('',fontsize = 16)
fig.autofmt_xdate()
ax.set_ylabel("温度(F)",fontsize = 16)
ax.tick_params(axis='both',which='major',labelsize=16)
plt.show()
16.1.9 给图表区域着色
from datetime import datetime
import csv
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.style.use('ggplot')
filename = 'sitka_weather_2018_simple.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
# 从文件中获取日期和最高气温和最低温度
dates, highs, lows = [], [], []
for row in reader:
current_date = datetime.strptime(row[2], '%Y-%m-%d')
high = int(row[5])
low = int(row[6])
lows.append(low)
dates.append(current_date)
highs.append(high)
# 根据最高气温绘制图形
fig, ax = plt.subplots()
ax.plot(dates, highs, c='red', alpha=0.5)
ax.plot(dates, lows, c='blue', alpha=0.5)
ax.fill_between(dates, highs, lows, facecolor="blue", alpha=0.1)
# 设置图形的格式
ax.set_title('2018年每日最高和最低温度', fontsize=24)
ax.set_xlabel('', fontsize=16)
fig.autofmt_xdate()
ax.set_ylabel("温度(F)", fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=16)
plt.show()
16.1.10 错误检查
Traceback (most recent call last):
File "/Volumes/job/python工程文件/shuju/16.1.10.py", line 16, in <module>
high = int(row[4])
ValueError: invalid literal for int() with base 10: ''
====================================================================
from datetime import datetime
import csv
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.style.use('ggplot')
filename = 'death_valley_2018_simple.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
# 从文件中获取日期和最高气温和最低温度
dates, highs, lows = [], [], []
for row in reader:
current_date = datetime.strptime(row[2], '%Y-%m-%d')
try:
high = int(row[4])
low = int(row[5])
except ValueError:
print(f'你缺少的是数据是{current_date}!')
else:
lows.append(low)
dates.append(current_date)
highs.append(high)
# 根据最高气温绘制图形
fig, ax = plt.subplots()
ax.plot(dates, highs, c='red', alpha=0.5)
ax.plot(dates, lows, c='blue', alpha=0.5)
ax.fill_between(dates, highs, lows, facecolor="blue", alpha=0.1)
# 设置图形的格式
title = '2018年每日最高和最低温度 \n 美国加利福尼亚死亡谷'
ax.set_title(title, fontsize=20)
ax.set_xlabel('', fontsize=16)
fig.autofmt_xdate()
ax.set_ylabel("温度(F)", fontsize=14)
ax.tick_params(axis='both', which='major', labelsize=16)
plt.show()
16.2 制作全球地震散点图:JSON格式
16.2.2 查看JSON数据
import json
# 探索数据的结构
filename = 'eq_data_1_day_m1.json'
with open(filename) as f:
all_eq_data = json.load(f)
readable_file = 'readable_eq_data.json'
with open(readable_file,'w') as f:
json.dump(all_eq_data,f,indent=4)
16.2.3 创建地震列表
import json
# 探索数据的结构
filename = 'eq_data_1_day_m1.json'
with open(filename) as f:
all_eq_data = json.load(f)
all_eq_dicts = all_eq_data['features']
print(len(all_eq_dicts))
16.2.4 提取震级
import json
# 探索数据的结构
filename = 'eq_data_1_day_m1.json'
with open(filename) as f:
all_eq_data = json.load(f)
all_eq_dicts = all_eq_data['features']
mags = []
for all_eq_dict in all_eq_dicts:
mag = all_eq_dict['properties']['mag']
mags.append(mag)
print(mags[:10])
16.2.5 提取位置数据
import json
# 探索数据的结构
filename = 'eq_data_1_day_m1.json'
with open(filename) as f:
all_eq_data = json.load(f)
all_eq_dicts = all_eq_data['features']
mags,titles,lons,lats = [],[],[],[]
for all_eq_dict in all_eq_dicts:
mag = all_eq_dict['properties']['mag']
lon = all_eq_dict['geometry']['coordinates'][0]
lat = all_eq_dict['geometry']['coordinates'][1]
title = all_eq_dict['properties']['title']
mags.append(mag)
lons.append(lon)
lats.append(lat)
titles.append(title)
print(mags[:10])
print(titles[:2])
print(lons[:5])
print(lats[:5])
16.2.6 绘制震级散点图
import plotly.express as px
import json
# 探索数据的结构
filename = 'eq_data_1_day_m1.json'
with open(filename) as f:
all_eq_data = json.load(f)
all_eq_dicts = all_eq_data['features']
mags,titles,lons,lats = [],[],[],[]
for all_eq_dict in all_eq_dicts:
mag = all_eq_dict['properties']['mag']
lon = all_eq_dict['geometry']['coordinates'][0]
lat = all_eq_dict['geometry']['coordinates'][1]
title = all_eq_dict['properties']['title']
mags.append(mag)
lons.append(lon)
lats.append(lat)
titles.append(title)
fig = px.scatter(x=lons,
y=lats,
labels={'x': '经度',
'y': '纬度'},
range_x=[-200,
200],
range_y=[-90,
90],
width=800,
height=800,
title='全球地震散点图')
fig.write_html('global_earthquakes.html')
fig.show()
16.2.7 另一种指定图标数据的方式
import plotly.express as px
import json
import pandas as pd
# 探索数据的结构
filename = 'eq_data_1_day_m1.json'
with open(filename) as f:
all_eq_data = json.load(f)
all_eq_dicts = all_eq_data['features']
mags, titles, lons, lats = [], [], [], []
for all_eq_dict in all_eq_dicts:
mag = all_eq_dict['properties']['mag']
lon = all_eq_dict['geometry']['coordinates'][0]
lat = all_eq_dict['geometry']['coordinates'][1]
title = all_eq_dict['properties']['title']
mags.append(mag)
lons.append(lon)
lats.append(lat)
titles.append(title)
data = pd.DataFrame(
data=zip(
lons,
lats,
titles,
mags),
columns=[
'经度',
'纬度',
'位置',
'震级'])
data.head()
fig = px.scatter(data, x='经度', y= '纬度',
range_x=[-200,200],
range_y=[-90,90],
width=800,
height=800,
title='全球地震散点图')
fig.write_html('global_earthquakes.html')
fig.show()
16.2.9 定制标记的颜色
import plotly.express as px
import json
import pandas as pd
# 探索数据的结构
filename = 'eq_data_30_day_m1.json'
with open(filename) as f:
all_eq_data = json.load(f)
all_eq_dicts = all_eq_data['features']
mags, titles, lons, lats = [], [], [], []
for all_eq_dict in all_eq_dicts:
mag = all_eq_dict['properties']['mag']
lon = all_eq_dict['geometry']['coordinates'][0]
lat = all_eq_dict['geometry']['coordinates'][1]
title = all_eq_dict['properties']['title']
mags.append(mag)
lons.append(lon)
lats.append(lat)
titles.append(title)
data = pd.DataFrame(
data=zip(
lons,
lats,
titles,
mags),
columns=[
'经度',
'纬度',
'位置',
'震级'])
data.head()
fig = px.scatter(data, x='经度', y= '纬度',
range_x=[-200,200],
range_y=[-90,90],
width=800,
height=800,
title='全球地震散点图',
size = '震级',
size_max=10,
color = '震级')
fig.write_html('global_earthquakes.html')
fig.show()
16.2.10 其他渐变
plotly express有大量的渐变可供选择,要获悉有哪些渐变可用,请使用文件名show_color_scales.py保存下面这个简短的程序:
import plotly.express as px
for key in px.colors.named_colorscales():
print(key)
16.2.11 添加鼠标指向时显示的文本
import plotly.express as px
import json
import pandas as pd
# 探索数据的结构
filename = 'eq_data_30_day_m1.json'
with open(filename) as f:
all_eq_data = json.load(f)
all_eq_dicts = all_eq_data['features']
mags, titles, lons, lats = [], [], [], []
for all_eq_dict in all_eq_dicts:
mag = all_eq_dict['properties']['mag']
lon = all_eq_dict['geometry']['coordinates'][0]
lat = all_eq_dict['geometry']['coordinates'][1]
title = all_eq_dict['properties']['title']
mags.append(mag)
lons.append(lon)
lats.append(lat)
titles.append(title)
data = pd.DataFrame(
data=zip(
lons,
lats,
titles,
mags),
columns=[
'经度',
'纬度',
'位置',
'震级'])
data.head()
fig = px.scatter(data, x = '经度', y='纬度',
range_x=[-200, 200],
range_y=[-90, 90],
width=800,
height=800,
title='全球地震散点图',
size='震级',
size_max=10,
color='震级',
hover_name='位置')
fig.write_html('global_earthquakes.html')
fig.show()
今天的内容相对也比较简单,最大的难点在于掉包知其然而不知其所以然。基本上想用的话还是靠现查,书中的描述还是太简单了点。
胭惜雨
2021年03月01日