记录下使用numpy分析csv的代码

实验室项目数据保存为csv格式,每一个参数下面都是数据,记下处理这些csv格式的代码,免得以后再找

csv格式:

csv格式样本

处理代码:

all_pmu_dic = {}
k8s_wordcount_path='要处理的csv'
with open(k8s_wordcount_path, encoding="utf-8") as f:
    item = np.loadtxt(k8s_wordcount_path, dtype=str, delimiter=',',)
    data = np.loadtxt(k8s_wordcount_path, delimiter=',',skiprows=1)
    for i in range(len(item[0])):
        all_pmu_dic[str(item[0][i])]={'key名':data[1:,i]}

思路就是用一个dic来存储csv中的参数和值,值用列表存储

后面用numpy算平均值、方差这些就不说了

在记一下画柱状图

使用matplotlib来画图

首先使用一个列表来存储x轴名称

name_list=["平均值","最小值","最大值","中位数","总体标准差","方差"]

因为方差跟其他的数不是一个数量级(方差平方级、平均值这些常数级、总体标准差根号级),差异比较大,所以我使用双y轴制图、方差单独使用一个y轴

用两个列表存储

name_avg_list=["平均值","最小值","最大值","中位数","总体标准差"]
name_var_list=["方差"]

为每一个参数都绘制柱状图,用一个遍历来画

for list in all_pmu_dic:

获取数据

平均值这组

k8s_wordcount_list=[avg_dic[list]['k8s-wordcount'],min_dic[list]['k8s-wordcount'],max_dic[list]['k8s-wordcount'],median_dic[list]['k8s-wordcount'],std_dic[list]['k8s-wordcount']]
yarn_wordcount_list=[avg_dic[list]['yarn-wordcount'],min_dic[list]['yarn-wordcount'],max_dic[list]['yarn-wordcount'],median_dic[list]['yarn-wordcount'],std_dic[list]['yarn-wordcount']]
k8s_terasort_list=[avg_dic[list]['k8s-terasort'],min_dic[list]['k8s-terasort'],max_dic[list]['k8s-terasort'],median_dic[list]['k8s-terasort'],std_dic[list]['k8s-terasort']]

方差这组

k8s_wordcount_var_list=[var_dic[list]['k8s-wordcount']]
yarn_wordcount_var_list=[var_dic[list]['yarn-wordcount']]
k8s_terasort_var_list=[var_dic[list]['k8s-terasort']]

然后到了使用matplotlib绘图这一步

初始化工作

x = np.arange(len(name_list)) #x轴的参数长度
bar_width = 0.3 #柱状图宽度

plt.rcParams['figure.figsize'] = (12.0, 6.0) #图片大小
# plt.rcParams['figure.dpi'] = 200 #图片分辨率
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号

首先绘制平均值这一组

fig = plt.figure()
ax1 = fig.add_subplot()

# 显示每个柱的具体高度
for tmpx, y in zip(x[:len(name_avg_list)], k8s_wordcount_list):
      ax1.text(tmpx, y + 0.05, '%.3f' % y, ha='center', va='bottom')

# for tmpx, y1 in zip(x[:len(name_avg_list)], yarn_wordcount_list):
#     ax1.text(tmpx + 0.3, y1 + 0.05, '%.3f' % y1, ha='center', va='bottom')

for tmpx, y2 in zip(x[:len(name_avg_list)], k8s_terasort_list):
      ax1.text(tmpx + 0.3, y2 + 0.05, '%.3f' % y2, ha='center', va='bottom')


#柱状图
ax1.bar(x[:len(x)-len(name_var_list)], k8s_wordcount_list, bar_width, color="salmon", label="k8s-wordcount")
# ax1.bar(x[:len(x)-len(name_var_list)] + bar_width, yarn_wordcount_list, bar_width, color="orchid", label="yarn-wordcount")
ax1.bar(x[:len(x)-len(name_var_list)] + bar_width, k8s_terasort_list, bar_width, color="darkcyan", label="k8s-terasort")
#设置y轴名称
ax1.set_ylabel("平均值,最小值,最大值,中位数,总体标准差")

再接着绘制方差这一组

#关键的一步,使用副y轴
ax2=ax1.twinx()
# 显示每个柱的具体高度
for tmpx, y in zip(x[len(name_avg_list):], k8s_wordcount_var_list):
    ax2.text(tmpx, y + 0.05, '%.3f' % y, ha='center', va='bottom')

# for tmpx, y1 in zip(x[len(name_avg_list):], yarn_wordcount_var_list):
#     ax2.text(tmpx + 0.3, y1 + 0.05, '%.3f' % y1, ha='center', va='bottom')

for tmpx, y2 in zip(x[len(name_avg_list):], k8s_terasort_var_list):
    ax2.text(tmpx + 0.3, y2 + 0.05, '%.3f' % y2, ha='center', va='bottom')

ax2.bar(x[len(x)-len(name_var_list)], k8s_wordcount_var_list, bar_width, color="salmon", label="k8s-wordcount")
# ax2.bar(x[len(x)-len(name_var_list)] + bar_width, yarn_wordcount_var_list, bar_width, color="orchid", label="yarn-wordcount")
ax2.bar(x[len(x)-len(name_var_list)] + bar_width, k8s_terasort_var_list, bar_width, color="darkcyan", label="k8s-terasort")
ax2.set_ylabel("方差")

最后绘图

plt.xticks(x + bar_width / 2, name_list) #设置x轴标签位置,此处为正中间
plt.title(list) #title名
plt.legend() #显示对应图例名称
plt.show()

#保存图片
# list=list.replace("/","-")
# print(list)
# plt.savefig("./k8s-pic/"+list+".png") #保存图片
# plt.close()

全部代码

import numpy as np
import matplotlib.pyplot as plt

all_pmu_dic = {}
k8s_wordcount_path='./k8s-100g-micro-1-report.csv'
with open(k8s_wordcount_path, encoding="utf-8") as f:
    item = np.loadtxt(k8s_wordcount_path, dtype=str, delimiter=',',)
    data = np.loadtxt(k8s_wordcount_path, delimiter=',',skiprows=1)
    for i in range(len(item[0])):
        all_pmu_dic[str(item[0][i])]={'k8s-wordcount':data[1:,i]}

yarn_wordcount_path='./yarn-micro-1-report.csv'
with open(yarn_wordcount_path, encoding='utf-8') as f:
    item = np.loadtxt(yarn_wordcount_path, dtype=str, delimiter=',')
    data = np.loadtxt(yarn_wordcount_path, delimiter=',', skiprows=1)
    for i in range(len(item[0])):
        if not str(item[0][i]) in all_pmu_dic.keys():
            all_pmu_dic[str(item[0][i])]={'yarn':data[1:,i]}
        else:
            all_pmu_dic[str(item[0][i])].update({'yarn-wordcount':data[1:,i]})

k8s_terasort_path='./k8s-terasort-micro-1-report.csv'
with open(k8s_terasort_path, encoding='utf-8') as f:
    item = np.loadtxt(k8s_terasort_path, dtype=str, delimiter=',')
    data = np.loadtxt(k8s_terasort_path, delimiter=',', skiprows=1)
    for i in range(len(item[0])):
        if not str(item[0][i]) in all_pmu_dic.keys():
            all_pmu_dic[str(item[0][i])] = {'k8s-terasort': data[1:, i]}
        else:
            all_pmu_dic[str(item[0][i])].update({'k8s-terasort': data[1:, i]})

#平均值
avg_dic={}
for pmu_dic in all_pmu_dic:
    k8s_wordcount_value = float(0)
    yarn_wordcount_value = float(0)
    k8s_terasort_value = float(0)
    if 'k8s-wordcount' in all_pmu_dic[pmu_dic].keys():
        k8s_wordcount_value='%.3f' % (np.mean(all_pmu_dic[pmu_dic]['k8s-wordcount']))
    if 'yarn-wordcount' in all_pmu_dic[pmu_dic].keys():
        yarn_wordcount_value='%.3f' % (np.mean(all_pmu_dic[pmu_dic]['yarn-wordcount']))
    if 'k8s-terasort' in all_pmu_dic[pmu_dic].keys():
        k8s_terasort_value='%.3f' % (np.mean(all_pmu_dic[pmu_dic]['k8s-terasort']))
    avg_dic[pmu_dic]={'k8s-wordcount': float(k8s_wordcount_value), 'yarn-wordcount': float(yarn_wordcount_value), 'k8s-terasort': float(k8s_terasort_value)}

#最小值
min_dic={}
for pmu_dic in all_pmu_dic:
    k8s_wordcount_value = float(0)
    yarn_wordcount_value = float(0)
    k8s_terasort_value = float(0)
    if 'k8s-wordcount' in all_pmu_dic[pmu_dic].keys():
        k8s_wordcount_value='%.3f' % (np.min(all_pmu_dic[pmu_dic]['k8s-wordcount']))
    if 'yarn-wordcount' in all_pmu_dic[pmu_dic].keys():
        yarn_wordcount_value='%.3f' % (np.min(all_pmu_dic[pmu_dic]['yarn-wordcount']))
    if 'k8s-terasort' in all_pmu_dic[pmu_dic].keys():
        k8s_terasort_value='%.3f' % (np.min(all_pmu_dic[pmu_dic]['k8s-terasort']))
    min_dic[pmu_dic]={'k8s-wordcount': float(k8s_wordcount_value), 'yarn-wordcount': float(yarn_wordcount_value), 'k8s-terasort': float(k8s_terasort_value)}

#最大值
max_dic={}
for pmu_dic in all_pmu_dic:
    k8s_wordcount_value = float(0)
    yarn_wordcount_value = float(0)
    k8s_terasort_value = float(0)
    if 'k8s-wordcount' in all_pmu_dic[pmu_dic].keys():
        k8s_wordcount_value='%.3f' % (np.max(all_pmu_dic[pmu_dic]['k8s-wordcount']))
    if 'yarn-wordcount' in all_pmu_dic[pmu_dic].keys():
        yarn_wordcount_value='%.3f' % (np.max(all_pmu_dic[pmu_dic]['yarn-wordcount']))
    if 'k8s-terasort' in all_pmu_dic[pmu_dic].keys():
        k8s_terasort_value='%.3f' % (np.max(all_pmu_dic[pmu_dic]['k8s-terasort']))
    max_dic[pmu_dic]={'k8s-wordcount': float(k8s_wordcount_value), 'yarn-wordcount': float(yarn_wordcount_value), 'k8s-terasort': float(k8s_terasort_value)}

#中位数
median_dic={}
for pmu_dic in all_pmu_dic:
    k8s_wordcount_value = float(0)
    yarn_wordcount_value = float(0)
    k8s_terasort_value = float(0)
    if 'k8s-wordcount' in all_pmu_dic[pmu_dic].keys():
        k8s_wordcount_value='%.3f' % (np.median(all_pmu_dic[pmu_dic]['k8s-wordcount']))
    if 'yarn-wordcount' in all_pmu_dic[pmu_dic].keys():
        yarn_wordcount_value='%.3f' % (np.median(all_pmu_dic[pmu_dic]['yarn-wordcount']))
    if 'k8s-terasort' in all_pmu_dic[pmu_dic].keys():
        k8s_terasort_value = '%.3f' % (np.median(all_pmu_dic[pmu_dic]['k8s-terasort']))
    median_dic[pmu_dic]={'k8s-wordcount': float(k8s_wordcount_value), 'yarn-wordcount': float(yarn_wordcount_value), 'k8s-terasort': float(k8s_terasort_value)}

#方差
var_dic={}
for pmu_dic in all_pmu_dic:
    k8s_wordcount_value = float(0)
    yarn_wordcount_value = float(0)
    k8s_terasort_value = float(0)
    if 'k8s-wordcount' in all_pmu_dic[pmu_dic].keys():
        k8s_wordcount_value='%.3f' % (np.var(all_pmu_dic[pmu_dic]['k8s-wordcount']))
    if 'yarn-wordcount' in all_pmu_dic[pmu_dic].keys():
        yarn_wordcount_value='%.3f' % (np.var(all_pmu_dic[pmu_dic]['yarn-wordcount']))
    if 'k8s-terasort' in all_pmu_dic[pmu_dic].keys():
        k8s_terasort_value='%.3f' % (np.var(all_pmu_dic[pmu_dic]['k8s-terasort']))
    var_dic[pmu_dic]={'k8s-wordcount': float(k8s_wordcount_value), 'yarn-wordcount': float(yarn_wordcount_value), 'k8s-terasort': float(k8s_terasort_value)}

#总体标准差
std_dic={}
for pmu_dic in all_pmu_dic:
    k8s_wordcount_value = float(0)
    yarn_wordcount_value = float(0)
    k8s_terasort_value = float(0)
    if 'k8s-wordcount' in all_pmu_dic[pmu_dic].keys():
        k8s_wordcount_value='%.3f' % (np.std(all_pmu_dic[pmu_dic]['k8s-wordcount']))
    if 'yarn-wordcount' in all_pmu_dic[pmu_dic].keys():
        yarn_wordcount_value='%.3f' % (np.std(all_pmu_dic[pmu_dic]['yarn-wordcount']))
    if 'k8s-terasort' in all_pmu_dic[pmu_dic].keys():
        k8s_terasort_value='%.3f' % (np.std(all_pmu_dic[pmu_dic]['k8s-terasort']))
    std_dic[pmu_dic]={'k8s-wordcount': float(k8s_wordcount_value), 'yarn-wordcount': float(yarn_wordcount_value), 'k8s-terasort': float(k8s_terasort_value)}


name_list=["平均值","最小值","最大值","中位数","总体标准差","方差"]
name_avg_list=["平均值","最小值","最大值","中位数","总体标准差"]
name_var_list=["方差"]
for list in all_pmu_dic:
    k8s_wordcount_list=[avg_dic[list]['k8s-wordcount'],min_dic[list]['k8s-wordcount'],max_dic[list]['k8s-wordcount'],median_dic[list]['k8s-wordcount'],std_dic[list]['k8s-wordcount']]
    yarn_wordcount_list=[avg_dic[list]['yarn-wordcount'],min_dic[list]['yarn-wordcount'],max_dic[list]['yarn-wordcount'],median_dic[list]['yarn-wordcount'],std_dic[list]['yarn-wordcount']]
    k8s_terasort_list=[avg_dic[list]['k8s-terasort'],min_dic[list]['k8s-terasort'],max_dic[list]['k8s-terasort'],median_dic[list]['k8s-terasort'],std_dic[list]['k8s-terasort']]

    k8s_wordcount_var_list=[var_dic[list]['k8s-wordcount']]
    yarn_wordcount_var_list=[var_dic[list]['yarn-wordcount']]
    k8s_terasort_var_list=[var_dic[list]['k8s-terasort']]

    x = np.arange(len(name_list))
    bar_width = 0.3

    plt.rcParams['figure.figsize'] = (12.0, 6.0)
    # plt.rcParams['figure.dpi'] = 200
    plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
    plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号

    fig = plt.figure()
    ax1 = fig.add_subplot()

    # 显示每个柱的具体高度
    for tmpx, y in zip(x[:len(name_avg_list)], k8s_wordcount_list):
        ax1.text(tmpx, y + 0.05, '%.3f' % y, ha='center', va='bottom')

    # for tmpx, y1 in zip(x[:len(name_avg_list)], yarn_wordcount_list):
    #     ax1.text(tmpx + 0.3, y1 + 0.05, '%.3f' % y1, ha='center', va='bottom')

    for tmpx, y2 in zip(x[:len(name_avg_list)], k8s_terasort_list):
        ax1.text(tmpx + 0.3, y2 + 0.05, '%.3f' % y2, ha='center', va='bottom')



    ax1.bar(x[:len(x)-len(name_var_list)], k8s_wordcount_list, bar_width, color="salmon", label="k8s-wordcount")
    # ax1.bar(x[:len(x)-len(name_var_list)] + bar_width, yarn_wordcount_list, bar_width, color="orchid", label="yarn-wordcount")
    ax1.bar(x[:len(x)-len(name_var_list)] + bar_width, k8s_terasort_list, bar_width, color="darkcyan", label="k8s-terasort")
    ax1.set_ylabel("平均值,最小值,最大值,中位数,总体标准差")

    ax2=ax1.twinx()
    # 显示每个柱的具体高度
    for tmpx, y in zip(x[len(name_avg_list):], k8s_wordcount_var_list):
        ax2.text(tmpx, y + 0.05, '%.3f' % y, ha='center', va='bottom')

    # for tmpx, y1 in zip(x[len(name_avg_list):], yarn_wordcount_var_list):
    #     ax2.text(tmpx + 0.3, y1 + 0.05, '%.3f' % y1, ha='center', va='bottom')

    for tmpx, y2 in zip(x[len(name_avg_list):], k8s_terasort_var_list):
        ax2.text(tmpx + 0.3, y2 + 0.05, '%.3f' % y2, ha='center', va='bottom')
    ax2.bar(x[len(x)-len(name_var_list)], k8s_wordcount_var_list, bar_width, color="salmon", label="k8s-wordcount")
    # ax2.bar(x[len(x)-len(name_var_list)] + bar_width, yarn_wordcount_var_list, bar_width, color="orchid", label="yarn-wordcount")
    ax2.bar(x[len(x)-len(name_var_list)] + bar_width, k8s_terasort_var_list, bar_width, color="darkcyan", label="k8s-terasort")
    ax2.set_ylabel("方差")

    plt.xticks(x + bar_width / 2, name_list)
    plt.title(list)
    plt.legend()
    plt.show()
    # list=list.replace("/","-")
    # print(list)
    # plt.savefig("./k8s-pic/"+list+".png")
    # plt.close()
暂无评论

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇
Theme Argon
本网站自 2020-12-24 12:00:00 起已运行