实验室项目数据保存为csv格式,每一个参数下面都是数据,记下处理这些csv格式的代码,免得以后再找
csv格式:
处理代码:
all_pmu_dic = {}
k8s_wordcount_path='要处理的csv'
with open(k8s_wordcount_path, encoding="utf-8") as f:
item = np.loadtxt(k8s_wordcount_path, dtype=str, delimiter=',',)
data = np.loadtxt(k8s_wordcount_path, delimiter=',',skiprows=1)
for i in range(len(item[0])):
all_pmu_dic[str(item[0][i])]={'key名':data[1:,i]}
思路就是用一个dic来存储csv中的参数和值,值用列表存储
后面用numpy算平均值、方差这些就不说了
在记一下画柱状图
使用matplotlib来画图
首先使用一个列表来存储x轴名称
name_list=["平均值","最小值","最大值","中位数","总体标准差","方差"]
因为方差跟其他的数不是一个数量级(方差平方级、平均值这些常数级、总体标准差根号级),差异比较大,所以我使用双y轴制图、方差单独使用一个y轴
用两个列表存储
name_avg_list=["平均值","最小值","最大值","中位数","总体标准差"]
name_var_list=["方差"]
为每一个参数都绘制柱状图,用一个遍历来画
for list in all_pmu_dic:
获取数据
平均值这组
k8s_wordcount_list=[avg_dic[list]['k8s-wordcount'],min_dic[list]['k8s-wordcount'],max_dic[list]['k8s-wordcount'],median_dic[list]['k8s-wordcount'],std_dic[list]['k8s-wordcount']]
yarn_wordcount_list=[avg_dic[list]['yarn-wordcount'],min_dic[list]['yarn-wordcount'],max_dic[list]['yarn-wordcount'],median_dic[list]['yarn-wordcount'],std_dic[list]['yarn-wordcount']]
k8s_terasort_list=[avg_dic[list]['k8s-terasort'],min_dic[list]['k8s-terasort'],max_dic[list]['k8s-terasort'],median_dic[list]['k8s-terasort'],std_dic[list]['k8s-terasort']]
方差这组
k8s_wordcount_var_list=[var_dic[list]['k8s-wordcount']]
yarn_wordcount_var_list=[var_dic[list]['yarn-wordcount']]
k8s_terasort_var_list=[var_dic[list]['k8s-terasort']]
然后到了使用matplotlib绘图这一步
初始化工作
x = np.arange(len(name_list)) #x轴的参数长度
bar_width = 0.3 #柱状图宽度
plt.rcParams['figure.figsize'] = (12.0, 6.0) #图片大小
# plt.rcParams['figure.dpi'] = 200 #图片分辨率
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
首先绘制平均值这一组
fig = plt.figure()
ax1 = fig.add_subplot()
# 显示每个柱的具体高度
for tmpx, y in zip(x[:len(name_avg_list)], k8s_wordcount_list):
ax1.text(tmpx, y + 0.05, '%.3f' % y, ha='center', va='bottom')
# for tmpx, y1 in zip(x[:len(name_avg_list)], yarn_wordcount_list):
# ax1.text(tmpx + 0.3, y1 + 0.05, '%.3f' % y1, ha='center', va='bottom')
for tmpx, y2 in zip(x[:len(name_avg_list)], k8s_terasort_list):
ax1.text(tmpx + 0.3, y2 + 0.05, '%.3f' % y2, ha='center', va='bottom')
#柱状图
ax1.bar(x[:len(x)-len(name_var_list)], k8s_wordcount_list, bar_width, color="salmon", label="k8s-wordcount")
# ax1.bar(x[:len(x)-len(name_var_list)] + bar_width, yarn_wordcount_list, bar_width, color="orchid", label="yarn-wordcount")
ax1.bar(x[:len(x)-len(name_var_list)] + bar_width, k8s_terasort_list, bar_width, color="darkcyan", label="k8s-terasort")
#设置y轴名称
ax1.set_ylabel("平均值,最小值,最大值,中位数,总体标准差")
再接着绘制方差这一组
#关键的一步,使用副y轴
ax2=ax1.twinx()
# 显示每个柱的具体高度
for tmpx, y in zip(x[len(name_avg_list):], k8s_wordcount_var_list):
ax2.text(tmpx, y + 0.05, '%.3f' % y, ha='center', va='bottom')
# for tmpx, y1 in zip(x[len(name_avg_list):], yarn_wordcount_var_list):
# ax2.text(tmpx + 0.3, y1 + 0.05, '%.3f' % y1, ha='center', va='bottom')
for tmpx, y2 in zip(x[len(name_avg_list):], k8s_terasort_var_list):
ax2.text(tmpx + 0.3, y2 + 0.05, '%.3f' % y2, ha='center', va='bottom')
ax2.bar(x[len(x)-len(name_var_list)], k8s_wordcount_var_list, bar_width, color="salmon", label="k8s-wordcount")
# ax2.bar(x[len(x)-len(name_var_list)] + bar_width, yarn_wordcount_var_list, bar_width, color="orchid", label="yarn-wordcount")
ax2.bar(x[len(x)-len(name_var_list)] + bar_width, k8s_terasort_var_list, bar_width, color="darkcyan", label="k8s-terasort")
ax2.set_ylabel("方差")
最后绘图
plt.xticks(x + bar_width / 2, name_list) #设置x轴标签位置,此处为正中间
plt.title(list) #title名
plt.legend() #显示对应图例名称
plt.show()
#保存图片
# list=list.replace("/","-")
# print(list)
# plt.savefig("./k8s-pic/"+list+".png") #保存图片
# plt.close()
全部代码
import numpy as np
import matplotlib.pyplot as plt
all_pmu_dic = {}
k8s_wordcount_path='./k8s-100g-micro-1-report.csv'
with open(k8s_wordcount_path, encoding="utf-8") as f:
item = np.loadtxt(k8s_wordcount_path, dtype=str, delimiter=',',)
data = np.loadtxt(k8s_wordcount_path, delimiter=',',skiprows=1)
for i in range(len(item[0])):
all_pmu_dic[str(item[0][i])]={'k8s-wordcount':data[1:,i]}
yarn_wordcount_path='./yarn-micro-1-report.csv'
with open(yarn_wordcount_path, encoding='utf-8') as f:
item = np.loadtxt(yarn_wordcount_path, dtype=str, delimiter=',')
data = np.loadtxt(yarn_wordcount_path, delimiter=',', skiprows=1)
for i in range(len(item[0])):
if not str(item[0][i]) in all_pmu_dic.keys():
all_pmu_dic[str(item[0][i])]={'yarn':data[1:,i]}
else:
all_pmu_dic[str(item[0][i])].update({'yarn-wordcount':data[1:,i]})
k8s_terasort_path='./k8s-terasort-micro-1-report.csv'
with open(k8s_terasort_path, encoding='utf-8') as f:
item = np.loadtxt(k8s_terasort_path, dtype=str, delimiter=',')
data = np.loadtxt(k8s_terasort_path, delimiter=',', skiprows=1)
for i in range(len(item[0])):
if not str(item[0][i]) in all_pmu_dic.keys():
all_pmu_dic[str(item[0][i])] = {'k8s-terasort': data[1:, i]}
else:
all_pmu_dic[str(item[0][i])].update({'k8s-terasort': data[1:, i]})
#平均值
avg_dic={}
for pmu_dic in all_pmu_dic:
k8s_wordcount_value = float(0)
yarn_wordcount_value = float(0)
k8s_terasort_value = float(0)
if 'k8s-wordcount' in all_pmu_dic[pmu_dic].keys():
k8s_wordcount_value='%.3f' % (np.mean(all_pmu_dic[pmu_dic]['k8s-wordcount']))
if 'yarn-wordcount' in all_pmu_dic[pmu_dic].keys():
yarn_wordcount_value='%.3f' % (np.mean(all_pmu_dic[pmu_dic]['yarn-wordcount']))
if 'k8s-terasort' in all_pmu_dic[pmu_dic].keys():
k8s_terasort_value='%.3f' % (np.mean(all_pmu_dic[pmu_dic]['k8s-terasort']))
avg_dic[pmu_dic]={'k8s-wordcount': float(k8s_wordcount_value), 'yarn-wordcount': float(yarn_wordcount_value), 'k8s-terasort': float(k8s_terasort_value)}
#最小值
min_dic={}
for pmu_dic in all_pmu_dic:
k8s_wordcount_value = float(0)
yarn_wordcount_value = float(0)
k8s_terasort_value = float(0)
if 'k8s-wordcount' in all_pmu_dic[pmu_dic].keys():
k8s_wordcount_value='%.3f' % (np.min(all_pmu_dic[pmu_dic]['k8s-wordcount']))
if 'yarn-wordcount' in all_pmu_dic[pmu_dic].keys():
yarn_wordcount_value='%.3f' % (np.min(all_pmu_dic[pmu_dic]['yarn-wordcount']))
if 'k8s-terasort' in all_pmu_dic[pmu_dic].keys():
k8s_terasort_value='%.3f' % (np.min(all_pmu_dic[pmu_dic]['k8s-terasort']))
min_dic[pmu_dic]={'k8s-wordcount': float(k8s_wordcount_value), 'yarn-wordcount': float(yarn_wordcount_value), 'k8s-terasort': float(k8s_terasort_value)}
#最大值
max_dic={}
for pmu_dic in all_pmu_dic:
k8s_wordcount_value = float(0)
yarn_wordcount_value = float(0)
k8s_terasort_value = float(0)
if 'k8s-wordcount' in all_pmu_dic[pmu_dic].keys():
k8s_wordcount_value='%.3f' % (np.max(all_pmu_dic[pmu_dic]['k8s-wordcount']))
if 'yarn-wordcount' in all_pmu_dic[pmu_dic].keys():
yarn_wordcount_value='%.3f' % (np.max(all_pmu_dic[pmu_dic]['yarn-wordcount']))
if 'k8s-terasort' in all_pmu_dic[pmu_dic].keys():
k8s_terasort_value='%.3f' % (np.max(all_pmu_dic[pmu_dic]['k8s-terasort']))
max_dic[pmu_dic]={'k8s-wordcount': float(k8s_wordcount_value), 'yarn-wordcount': float(yarn_wordcount_value), 'k8s-terasort': float(k8s_terasort_value)}
#中位数
median_dic={}
for pmu_dic in all_pmu_dic:
k8s_wordcount_value = float(0)
yarn_wordcount_value = float(0)
k8s_terasort_value = float(0)
if 'k8s-wordcount' in all_pmu_dic[pmu_dic].keys():
k8s_wordcount_value='%.3f' % (np.median(all_pmu_dic[pmu_dic]['k8s-wordcount']))
if 'yarn-wordcount' in all_pmu_dic[pmu_dic].keys():
yarn_wordcount_value='%.3f' % (np.median(all_pmu_dic[pmu_dic]['yarn-wordcount']))
if 'k8s-terasort' in all_pmu_dic[pmu_dic].keys():
k8s_terasort_value = '%.3f' % (np.median(all_pmu_dic[pmu_dic]['k8s-terasort']))
median_dic[pmu_dic]={'k8s-wordcount': float(k8s_wordcount_value), 'yarn-wordcount': float(yarn_wordcount_value), 'k8s-terasort': float(k8s_terasort_value)}
#方差
var_dic={}
for pmu_dic in all_pmu_dic:
k8s_wordcount_value = float(0)
yarn_wordcount_value = float(0)
k8s_terasort_value = float(0)
if 'k8s-wordcount' in all_pmu_dic[pmu_dic].keys():
k8s_wordcount_value='%.3f' % (np.var(all_pmu_dic[pmu_dic]['k8s-wordcount']))
if 'yarn-wordcount' in all_pmu_dic[pmu_dic].keys():
yarn_wordcount_value='%.3f' % (np.var(all_pmu_dic[pmu_dic]['yarn-wordcount']))
if 'k8s-terasort' in all_pmu_dic[pmu_dic].keys():
k8s_terasort_value='%.3f' % (np.var(all_pmu_dic[pmu_dic]['k8s-terasort']))
var_dic[pmu_dic]={'k8s-wordcount': float(k8s_wordcount_value), 'yarn-wordcount': float(yarn_wordcount_value), 'k8s-terasort': float(k8s_terasort_value)}
#总体标准差
std_dic={}
for pmu_dic in all_pmu_dic:
k8s_wordcount_value = float(0)
yarn_wordcount_value = float(0)
k8s_terasort_value = float(0)
if 'k8s-wordcount' in all_pmu_dic[pmu_dic].keys():
k8s_wordcount_value='%.3f' % (np.std(all_pmu_dic[pmu_dic]['k8s-wordcount']))
if 'yarn-wordcount' in all_pmu_dic[pmu_dic].keys():
yarn_wordcount_value='%.3f' % (np.std(all_pmu_dic[pmu_dic]['yarn-wordcount']))
if 'k8s-terasort' in all_pmu_dic[pmu_dic].keys():
k8s_terasort_value='%.3f' % (np.std(all_pmu_dic[pmu_dic]['k8s-terasort']))
std_dic[pmu_dic]={'k8s-wordcount': float(k8s_wordcount_value), 'yarn-wordcount': float(yarn_wordcount_value), 'k8s-terasort': float(k8s_terasort_value)}
name_list=["平均值","最小值","最大值","中位数","总体标准差","方差"]
name_avg_list=["平均值","最小值","最大值","中位数","总体标准差"]
name_var_list=["方差"]
for list in all_pmu_dic:
k8s_wordcount_list=[avg_dic[list]['k8s-wordcount'],min_dic[list]['k8s-wordcount'],max_dic[list]['k8s-wordcount'],median_dic[list]['k8s-wordcount'],std_dic[list]['k8s-wordcount']]
yarn_wordcount_list=[avg_dic[list]['yarn-wordcount'],min_dic[list]['yarn-wordcount'],max_dic[list]['yarn-wordcount'],median_dic[list]['yarn-wordcount'],std_dic[list]['yarn-wordcount']]
k8s_terasort_list=[avg_dic[list]['k8s-terasort'],min_dic[list]['k8s-terasort'],max_dic[list]['k8s-terasort'],median_dic[list]['k8s-terasort'],std_dic[list]['k8s-terasort']]
k8s_wordcount_var_list=[var_dic[list]['k8s-wordcount']]
yarn_wordcount_var_list=[var_dic[list]['yarn-wordcount']]
k8s_terasort_var_list=[var_dic[list]['k8s-terasort']]
x = np.arange(len(name_list))
bar_width = 0.3
plt.rcParams['figure.figsize'] = (12.0, 6.0)
# plt.rcParams['figure.dpi'] = 200
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
fig = plt.figure()
ax1 = fig.add_subplot()
# 显示每个柱的具体高度
for tmpx, y in zip(x[:len(name_avg_list)], k8s_wordcount_list):
ax1.text(tmpx, y + 0.05, '%.3f' % y, ha='center', va='bottom')
# for tmpx, y1 in zip(x[:len(name_avg_list)], yarn_wordcount_list):
# ax1.text(tmpx + 0.3, y1 + 0.05, '%.3f' % y1, ha='center', va='bottom')
for tmpx, y2 in zip(x[:len(name_avg_list)], k8s_terasort_list):
ax1.text(tmpx + 0.3, y2 + 0.05, '%.3f' % y2, ha='center', va='bottom')
ax1.bar(x[:len(x)-len(name_var_list)], k8s_wordcount_list, bar_width, color="salmon", label="k8s-wordcount")
# ax1.bar(x[:len(x)-len(name_var_list)] + bar_width, yarn_wordcount_list, bar_width, color="orchid", label="yarn-wordcount")
ax1.bar(x[:len(x)-len(name_var_list)] + bar_width, k8s_terasort_list, bar_width, color="darkcyan", label="k8s-terasort")
ax1.set_ylabel("平均值,最小值,最大值,中位数,总体标准差")
ax2=ax1.twinx()
# 显示每个柱的具体高度
for tmpx, y in zip(x[len(name_avg_list):], k8s_wordcount_var_list):
ax2.text(tmpx, y + 0.05, '%.3f' % y, ha='center', va='bottom')
# for tmpx, y1 in zip(x[len(name_avg_list):], yarn_wordcount_var_list):
# ax2.text(tmpx + 0.3, y1 + 0.05, '%.3f' % y1, ha='center', va='bottom')
for tmpx, y2 in zip(x[len(name_avg_list):], k8s_terasort_var_list):
ax2.text(tmpx + 0.3, y2 + 0.05, '%.3f' % y2, ha='center', va='bottom')
ax2.bar(x[len(x)-len(name_var_list)], k8s_wordcount_var_list, bar_width, color="salmon", label="k8s-wordcount")
# ax2.bar(x[len(x)-len(name_var_list)] + bar_width, yarn_wordcount_var_list, bar_width, color="orchid", label="yarn-wordcount")
ax2.bar(x[len(x)-len(name_var_list)] + bar_width, k8s_terasort_var_list, bar_width, color="darkcyan", label="k8s-terasort")
ax2.set_ylabel("方差")
plt.xticks(x + bar_width / 2, name_list)
plt.title(list)
plt.legend()
plt.show()
# list=list.replace("/","-")
# print(list)
# plt.savefig("./k8s-pic/"+list+".png")
# plt.close()