《Python编程：从入门到实践》读书笔记(16)：下载数据

16.1 CSV文件格式
要在文本中存储数据，一个简单方式是将数据作为一系列以逗号分隔的值写入文件。这样的文件称为CSV文件。以下是一行CSV天气数据：
"USW00025333","SITKA AIRPORT, AK US","2018-07-01","0.25",,"62","50"

16.1.1 分析CSV文件头
import csv
filename = 'sitka_weather_07-2018_simple.csv'

with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)
    print(header_row)

16.1.2 打印文件头及其位置
import csv
filename = 'sitka_weather_07-2018_simple.csv'

with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)

for index,column_header in enumerate(header_row):
    print(index,column_header)

16.1.3 提取并读取数据
import csv
filename = 'sitka_weather_07-2018_simple.csv'

with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)

    # 从文件中获取最高温度
    highs = []
    for row in reader:
        high = int(row[5])
        highs.append(high)

    print(highs)

16.1.4 绘制温度图标
import csv
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.style.use('ggplot')
filename = 'sitka_weather_07-2018_simple.csv'


with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)

    # 从文件中获取最高温度
    highs = []
    for row in reader:
        high = int(row[5])
        highs.append(high)

# 根据最高温度绘制图形

fig,ax = plt.subplots()
ax.plot(highs,c = 'red')

# 设置图形的格式
ax.set_title("2018年7月每日的最高温度",fontsize = 24)
ax.set_xlabel("",fontsize = 16)
ax.set_ylabel("温度(F)",fontsize=16)
ax.tick_params(axis="both",which="major",labelsize=16)

plt.show()

16.1.5 模块datetime
from datetime import datetime
first_date = datetime.strptime("2018-07-01","%Y-%m-%d")
print(first_date)

16.1.6 在图表中添加日期
from datetime import datetime
import csv
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.style.use('ggplot')
filename = 'sitka_weather_07-2018_simple.csv'

with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)

    # 从文件中获取日期和最高气温
    dates,highs = [],[]
    for row in reader:
        current_date = datetime.strptime(row[2],'%Y-%m-%d')
        high = int(row[5])
        dates.append(current_date)
        highs.append(high)

    # 根据最高气温绘制图形
    fig,ax = plt.subplots()
    ax.plot(dates,highs,c = 'red')

    # 设置图形的格式
    ax.set_title('2018年7月每日最高温度',fontsize = 24)
    ax.set_xlabel('',fontsize = 16)

    fig.autofmt_xdate()
    ax.set_ylabel("温度(F)",fontsize = 16)
    ax.tick_params(axis='both',which='major',labelsize=16)

    plt.show()

16.1.7 涵盖更长的时间
from datetime import datetime
import csv
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.style.use('ggplot')
filename = 'sitka_weather_2018_simple.csv'

with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)

    # 从文件中获取日期和最高气温
    dates,highs = [],[]
    for row in reader:
        current_date = datetime.strptime(row[2],'%Y-%m-%d')
        high = int(row[5])
        dates.append(current_date)
        highs.append(high)

    # 根据最高气温绘制图形
    fig,ax = plt.subplots()
    ax.plot(dates,highs,c = 'red')

    # 设置图形的格式
    ax.set_title('2018年每日最高温度',fontsize = 24)
    ax.set_xlabel('',fontsize = 16)

    fig.autofmt_xdate()
    ax.set_ylabel("温度(F)",fontsize = 16)
    ax.tick_params(axis='both',which='major',labelsize=16)

    plt.show()

16.1.8 再绘制一个数据系列
from datetime import datetime
import csv
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.style.use('ggplot')
filename = 'sitka_weather_2018_simple.csv'

with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)

    # 从文件中获取日期和最高气温和最低温度
    dates,highs,lows = [],[],[]
    for row in reader:
        current_date = datetime.strptime(row[2],'%Y-%m-%d')
        high = int(row[5])
        low = int(row[6])
        lows.append(low)
        dates.append(current_date)
        highs.append(high)

    # 根据最高气温绘制图形
    fig,ax = plt.subplots()
    ax.plot(dates,highs,c = 'red')
    ax.plot(dates,lows, c='blue')
    # 设置图形的格式
    ax.set_title('2018年每日最高和最低温度',fontsize = 24)
    ax.set_xlabel('',fontsize = 16)

    fig.autofmt_xdate()
    ax.set_ylabel("温度(F)",fontsize = 16)
    ax.tick_params(axis='both',which='major',labelsize=16)

    plt.show()

16.1.9 给图表区域着色
from datetime import datetime
import csv
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.style.use('ggplot')
filename = 'sitka_weather_2018_simple.csv'

with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)

    # 从文件中获取日期和最高气温和最低温度
    dates, highs, lows = [], [], []
    for row in reader:
        current_date = datetime.strptime(row[2], '%Y-%m-%d')
        high = int(row[5])
        low = int(row[6])
        lows.append(low)
        dates.append(current_date)
        highs.append(high)

    # 根据最高气温绘制图形
    fig, ax = plt.subplots()
    ax.plot(dates, highs, c='red', alpha=0.5)
    ax.plot(dates, lows, c='blue', alpha=0.5)
    ax.fill_between(dates, highs, lows, facecolor="blue", alpha=0.1)
    # 设置图形的格式
    ax.set_title('2018年每日最高和最低温度', fontsize=24)
    ax.set_xlabel('', fontsize=16)

    fig.autofmt_xdate()
    ax.set_ylabel("温度(F)", fontsize=16)
    ax.tick_params(axis='both', which='major', labelsize=16)

    plt.show()

16.1.10 错误检查
Traceback (most recent call last):
  File "/Volumes/job/python工程文件/shuju/16.1.10.py", line 16, in <module>
    high = int(row[4])
ValueError: invalid literal for int() with base 10: ''
====================================================================
from datetime import datetime
import csv
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.style.use('ggplot')
filename = 'death_valley_2018_simple.csv'

with open(filename) as f:
    reader = csv.reader(f)
    header_row = next(reader)

    # 从文件中获取日期和最高气温和最低温度
    dates, highs, lows = [], [], []
    for row in reader:
        current_date = datetime.strptime(row[2], '%Y-%m-%d')
        try:
            high = int(row[4])
            low = int(row[5])
        except ValueError:
            print(f'你缺少的是数据是{current_date}！')
        else:
            lows.append(low)
            dates.append(current_date)
            highs.append(high)

    # 根据最高气温绘制图形
    fig, ax = plt.subplots()
    ax.plot(dates, highs, c='red', alpha=0.5)
    ax.plot(dates, lows, c='blue', alpha=0.5)
    ax.fill_between(dates, highs, lows, facecolor="blue", alpha=0.1)
    # 设置图形的格式
    title = '2018年每日最高和最低温度 \n 美国加利福尼亚死亡谷'
    ax.set_title(title, fontsize=20)
    ax.set_xlabel('', fontsize=16)

    fig.autofmt_xdate()
    ax.set_ylabel("温度(F)", fontsize=14)
    ax.tick_params(axis='both', which='major', labelsize=16)

    plt.show()

16.2 制作全球地震散点图：JSON格式

16.2.2 查看JSON数据
import json
# 探索数据的结构
filename = 'eq_data_1_day_m1.json'
with open(filename) as f:
    all_eq_data = json.load(f)

readable_file = 'readable_eq_data.json'
with open(readable_file,'w') as f:
    json.dump(all_eq_data,f,indent=4)


16.2.3 创建地震列表
import json
# 探索数据的结构
filename = 'eq_data_1_day_m1.json'
with open(filename) as f:
    all_eq_data = json.load(f)

all_eq_dicts = all_eq_data['features']
print(len(all_eq_dicts))

16.2.4 提取震级
import json
# 探索数据的结构
filename = 'eq_data_1_day_m1.json'
with open(filename) as f:
    all_eq_data = json.load(f)

all_eq_dicts = all_eq_data['features']
mags = []
for all_eq_dict in all_eq_dicts:
    mag = all_eq_dict['properties']['mag']
    mags.append(mag)
print(mags[:10])

16.2.5 提取位置数据
import json
# 探索数据的结构
filename = 'eq_data_1_day_m1.json'
with open(filename) as f:
    all_eq_data = json.load(f)

all_eq_dicts = all_eq_data['features']
mags,titles,lons,lats = [],[],[],[]
for all_eq_dict in all_eq_dicts:
    mag = all_eq_dict['properties']['mag']
    lon = all_eq_dict['geometry']['coordinates'][0]
    lat = all_eq_dict['geometry']['coordinates'][1]
    title = all_eq_dict['properties']['title']
    mags.append(mag)
    lons.append(lon)
    lats.append(lat)
    titles.append(title)
print(mags[:10])
print(titles[:2])
print(lons[:5])
print(lats[:5])

16.2.6 绘制震级散点图
import plotly.express as px
import json
# 探索数据的结构
filename = 'eq_data_1_day_m1.json'
with open(filename) as f:
    all_eq_data = json.load(f)

all_eq_dicts = all_eq_data['features']
mags,titles,lons,lats = [],[],[],[]
for all_eq_dict in all_eq_dicts:
    mag = all_eq_dict['properties']['mag']
    lon = all_eq_dict['geometry']['coordinates'][0]
    lat = all_eq_dict['geometry']['coordinates'][1]
    title = all_eq_dict['properties']['title']
    mags.append(mag)
    lons.append(lon)
    lats.append(lat)
    titles.append(title)

fig = px.scatter(x=lons,
                 y=lats,
                 labels={'x': '经度',
                         'y': '纬度'},
                 range_x=[-200,
                         200],
                 range_y=[-90,
                          90],
                 width=800,
                 height=800,
                 title='全球地震散点图')
fig.write_html('global_earthquakes.html')
fig.show()

16.2.7 另一种指定图标数据的方式
import plotly.express as px
import json
import pandas as pd


# 探索数据的结构
filename = 'eq_data_1_day_m1.json'
with open(filename) as f:
    all_eq_data = json.load(f)

all_eq_dicts = all_eq_data['features']
mags, titles, lons, lats = [], [], [], []
for all_eq_dict in all_eq_dicts:
    mag = all_eq_dict['properties']['mag']
    lon = all_eq_dict['geometry']['coordinates'][0]
    lat = all_eq_dict['geometry']['coordinates'][1]
    title = all_eq_dict['properties']['title']
    mags.append(mag)
    lons.append(lon)
    lats.append(lat)
    titles.append(title)

data = pd.DataFrame(
    data=zip(
        lons,
        lats,
        titles,
        mags),
    columns=[
        '经度',
        '纬度',
        '位置',
        '震级'])
data.head()

fig = px.scatter(data, x='经度', y= '纬度',
                 range_x=[-200,200],
                 range_y=[-90,90],
                 width=800,
                 height=800,
                 title='全球地震散点图')
fig.write_html('global_earthquakes.html')
fig.show()

16.2.9 定制标记的颜色
import plotly.express as px
import json
import pandas as pd


# 探索数据的结构
filename = 'eq_data_30_day_m1.json'
with open(filename) as f:
    all_eq_data = json.load(f)

all_eq_dicts = all_eq_data['features']
mags, titles, lons, lats = [], [], [], []
for all_eq_dict in all_eq_dicts:
    mag = all_eq_dict['properties']['mag']
    lon = all_eq_dict['geometry']['coordinates'][0]
    lat = all_eq_dict['geometry']['coordinates'][1]
    title = all_eq_dict['properties']['title']
    mags.append(mag)
    lons.append(lon)
    lats.append(lat)
    titles.append(title)

data = pd.DataFrame(
    data=zip(
        lons,
        lats,
        titles,
        mags),
    columns=[
        '经度',
        '纬度',
        '位置',
        '震级'])
data.head()

fig = px.scatter(data, x='经度', y= '纬度',
                 range_x=[-200,200],
                 range_y=[-90,90],
                 width=800,
                 height=800,
                 title='全球地震散点图',
                 size = '震级',
                 size_max=10,
                 color = '震级')
fig.write_html('global_earthquakes.html')
fig.show()

16.2.10 其他渐变
plotly express有大量的渐变可供选择，要获悉有哪些渐变可用，请使用文件名show_color_scales.py保存下面这个简短的程序：
import plotly.express as px
for key in px.colors.named_colorscales():
    print(key)

16.2.11 添加鼠标指向时显示的文本
import plotly.express as px
import json
import pandas as pd


# 探索数据的结构
filename = 'eq_data_30_day_m1.json'
with open(filename) as f:
    all_eq_data = json.load(f)

all_eq_dicts = all_eq_data['features']
mags, titles, lons, lats = [], [], [], []
for all_eq_dict in all_eq_dicts:
    mag = all_eq_dict['properties']['mag']
    lon = all_eq_dict['geometry']['coordinates'][0]
    lat = all_eq_dict['geometry']['coordinates'][1]
    title = all_eq_dict['properties']['title']
    mags.append(mag)
    lons.append(lon)
    lats.append(lat)
    titles.append(title)

data = pd.DataFrame(
    data=zip(
        lons,
        lats,
        titles,
        mags),
    columns=[
        '经度',
        '纬度',
        '位置',
        '震级'])
data.head()

fig = px.scatter(data, x = '经度', y='纬度',
                 range_x=[-200, 200],
                 range_y=[-90, 90],
                 width=800,
                 height=800,
                 title='全球地震散点图',
                 size='震级',
                 size_max=10,
                 color='震级',
                 hover_name='位置')
fig.write_html('global_earthquakes.html')
fig.show()

今天的内容相对也比较简单，最大的难点在于掉包知其然而不知其所以然。基本上想用的话还是靠现查，书中的描述还是太简单了点。

胭惜雨

2021年03月01日

胭惜雨的博客

《Python编程：从入门到实践》读书笔记(16)：下载数据

取消回复

归档

分类

其他操作

更新日历

浪子归 – 崔健

2024 年 11 月
一	二	三	四	五	六	日
				1	2	3
4	5	6	7	8	9	10
11	12	13	14	15	16	17
18	19	20	21	22	23	24
25	26	27	28	29	30