上文中将数据处理后,要挑出几个重要参数来对比其中的关联性,记录下用pyecharts绘制热力图的流程
上来先处理csv文件,直接复制粘贴
def get_pmu():
all_pmu_dic = {}
k8s_wordcount_path = './k8s-100g-micro-1-report.csv'
with open(k8s_wordcount_path, encoding="utf-8") as f:
item = np.loadtxt(k8s_wordcount_path, dtype=str, delimiter=',', )
data = np.loadtxt(k8s_wordcount_path, delimiter=',', skiprows=1)
for i in range(len(item[0])):
all_pmu_dic[str(item[0][i])] = {'k8s-wordcount': data[1:, i]}
yarn_wordcount_path = './yarn-micro-1-report.csv'
with open(yarn_wordcount_path, encoding='utf-8') as f:
item = np.loadtxt(yarn_wordcount_path, dtype=str, delimiter=',')
data = np.loadtxt(yarn_wordcount_path, delimiter=',', skiprows=1)
for i in range(len(item[0])):
if not str(item[0][i]) in all_pmu_dic.keys():
all_pmu_dic[str(item[0][i])] = {'yarn': data[1:, i]}
else:
all_pmu_dic[str(item[0][i])].update({'yarn-wordcount': data[1:, i]})
k8s_terasort_path = './k8s-terasort-micro-1-report.csv'
with open(k8s_terasort_path, encoding='utf-8') as f:
item = np.loadtxt(k8s_terasort_path, dtype=str, delimiter=',')
data = np.loadtxt(k8s_terasort_path, delimiter=',', skiprows=1)
for i in range(len(item[0])):
if not str(item[0][i]) in all_pmu_dic.keys():
all_pmu_dic[str(item[0][i])] = {'k8s-terasort': data[1:, i]}
else:
all_pmu_dic[str(item[0][i])].update({'k8s-terasort': data[1:, i]})
return all_pmu_dic
计算关联性
def correl(matrixA,matrixB):
matrixA_avg=float(np.mean(matrixA))
matrixB_avg=float(np.mean(matrixB))
top_value=float(0)
down_matrixA=float(0)
down_matrixB=float(0)
for i in range(len(matrixA)):
value=float((matrixA[i]-matrixA_avg)*(matrixB[i]-matrixB_avg))
top_value=float(top_value+value)
down_matrixA=float(down_matrixA+np.square(matrixA[i]-matrixA_avg))
down_matrixB=float(down_matrixB+np.square(matrixB[i]-matrixB_avg))
down_value=float(np.sqrt(down_matrixB*down_matrixA))
if not down_value == 0:
return "%.3f" % (top_value/down_value)
else:
return "%.3f" % (float(0))
使用列表记录两个轴的参数
x_axis = ["resource_stalls.sb",
"msr/pperf/",
"ld_blocks_partial.address_alias",
"cache-misses",
"branch-misses",
"L1-dcache-load-misses"]
# 我们x轴y轴都一样所以直接赋值
y_axis=x_axis
处理热点图所需的data
data的格式为二维数组,其中二维中要包含x轴坐标、y轴坐标、data,例如[[0,0,1.3],[0,1,2.6],[0,2,1.446]…..]
for x_index in range(len(x_axis)):
for y_index in range(len(y_axis)):
index_data=[x_index,y_index]
matrixA=all_pmu_dic[x_axis[x_index]]['k8s-wordcount']
matrixB=all_pmu_dic[y_axis[y_index]]['k8s-wordcount']
correls=float(correl(matrixA,matrixB))
index_data.append(correls)
data.append(index_data)
绘制图片
#使用grid在外面包裹一层可以防止属性名太长导致的显示不完全
grid = Grid(init_opts=opts.InitOpts(width="100%",height='950px'))
heatmap = (HeatMap(init_opts=opts.InitOpts())
.add_xaxis(x_axis) #x轴列表
.add_yaxis("事件相关性", #
y_axis, #y轴列表
data, #数据
label_opts=opts.LabelOpts(is_show=True, position="inside"), #是否显示每个格子的data
)
.set_series_opts()
.set_global_opts(
legend_opts=opts.LegendOpts(is_show=True),
xaxis_opts=opts.AxisOpts(
type_="category",
is_scale=True,
interval=0,
position="center",
axislabel_opts=opts.LabelOpts(
font_size=18,
interval=0,
rotate=30,
border_color="black",
),
splitarea_opts=opts.SplitAreaOpts(
is_show=True,
areastyle_opts=opts.AreaStyleOpts(opacity=1),
),
),
yaxis_opts=opts.AxisOpts(
type_="category",
# is_inverse=True,
axislabel_opts=opts.LabelOpts(
font_size=16
),
splitarea_opts=opts.SplitAreaOpts(
is_show=True, areastyle_opts=opts.AreaStyleOpts(opacity=1)
),
),
#相关参数
title_opts=opts.TitleOpts(title="相关参数相关性",pos_left="15%"),
#cpu相关参数
# title_opts=opts.TitleOpts(title="cpu相关性"),
visualmap_opts=opts.VisualMapOpts(
min_=float(-1), max_=float(1), is_calculable=True, orient="horizontal", pos_left="center",split_number=5,pos_top=20
),
))
# heatmap.render(path='cpu.html')
grid.add(heatmap, grid_opts=opts.GridOpts(pos_left="15%",pos_bottom="25%",pos_top="10%"))
grid.render("correl.html") #最后生成名为correl.html的文件
全部代码
import numpy as np
from pyecharts.charts import HeatMap,Grid
from pyecharts import options as opts
from pyecharts.faker import Faker
import random
import seaborn as sns;
def correl(matrixA,matrixB):
matrixA_avg=float(np.mean(matrixA))
matrixB_avg=float(np.mean(matrixB))
top_value=float(0)
down_matrixA=float(0)
down_matrixB=float(0)
for i in range(len(matrixA)):
value=float((matrixA[i]-matrixA_avg)*(matrixB[i]-matrixB_avg))
top_value=float(top_value+value)
down_matrixA=float(down_matrixA+np.square(matrixA[i]-matrixA_avg))
down_matrixB=float(down_matrixB+np.square(matrixB[i]-matrixB_avg))
down_value=float(np.sqrt(down_matrixB*down_matrixA))
if not down_value == 0:
return "%.3f" % (top_value/down_value)
else:
return "%.3f" % (float(0))
def get_pmu():
all_pmu_dic = {}
k8s_wordcount_path = './k8s-100g-micro-1-report.csv'
with open(k8s_wordcount_path, encoding="utf-8") as f:
item = np.loadtxt(k8s_wordcount_path, dtype=str, delimiter=',', )
data = np.loadtxt(k8s_wordcount_path, delimiter=',', skiprows=1)
for i in range(len(item[0])):
all_pmu_dic[str(item[0][i])] = {'k8s-wordcount': data[1:, i]}
yarn_wordcount_path = './yarn-micro-1-report.csv'
with open(yarn_wordcount_path, encoding='utf-8') as f:
item = np.loadtxt(yarn_wordcount_path, dtype=str, delimiter=',')
data = np.loadtxt(yarn_wordcount_path, delimiter=',', skiprows=1)
for i in range(len(item[0])):
if not str(item[0][i]) in all_pmu_dic.keys():
all_pmu_dic[str(item[0][i])] = {'yarn': data[1:, i]}
else:
all_pmu_dic[str(item[0][i])].update({'yarn-wordcount': data[1:, i]})
k8s_terasort_path = './k8s-terasort-micro-1-report.csv'
with open(k8s_terasort_path, encoding='utf-8') as f:
item = np.loadtxt(k8s_terasort_path, dtype=str, delimiter=',')
data = np.loadtxt(k8s_terasort_path, delimiter=',', skiprows=1)
for i in range(len(item[0])):
if not str(item[0][i]) in all_pmu_dic.keys():
all_pmu_dic[str(item[0][i])] = {'k8s-terasort': data[1:, i]}
else:
all_pmu_dic[str(item[0][i])].update({'k8s-terasort': data[1:, i]})
return all_pmu_dic
#绘制热力图
#相关参数
x_axis = ["resource_stalls.sb",
"msr/pperf/",
"ld_blocks_partial.address_alias",
"cache-misses",
"branch-misses",
"L1-dcache-load-misses"]
#cpu相关参数
# x_axis=["cpu-cycles","cpu-clock","cpu-migrations","cpu_clk_unhalted.ref_tsc","cpu_clk_unhalted.ref_xclk",
# "cpu_clk_unhalted.ref_xclk_any",
# "cpu_clk_unhalted.ring0_trans",
# "cpu_clk_unhalted.thread",
# "cpu_clk_unhalted.thread_any",
# "cpu_clk_unhalted.thread_p",
# "cpu_clk_unhalted.thread_p_any"]
y_axis=x_axis
all_pmu_dic=get_pmu()
data=[]
for x_index in range(len(x_axis)):
for y_index in range(len(y_axis)):
index_data=[x_index,y_index]
matrixA=all_pmu_dic[x_axis[x_index]]['k8s-wordcount']
matrixB=all_pmu_dic[y_axis[y_index]]['k8s-wordcount']
correls=float(correl(matrixA,matrixB))
index_data.append(correls)
data.append(index_data)
grid = Grid(init_opts=opts.InitOpts(width="100%",height='950px'))
heatmap = (HeatMap(init_opts=opts.InitOpts())
.add_xaxis(x_axis)
.add_yaxis("事件相关性",
y_axis,
data,
label_opts=opts.LabelOpts(is_show=True, position="inside"),
)
.set_series_opts()
.set_global_opts(
legend_opts=opts.LegendOpts(is_show=True),
xaxis_opts=opts.AxisOpts(
type_="category",
is_scale=True,
interval=0,
position="center",
axislabel_opts=opts.LabelOpts(
font_size=18,
interval=0,
rotate=30,
border_color="black",
),
splitarea_opts=opts.SplitAreaOpts(
is_show=True,
areastyle_opts=opts.AreaStyleOpts(opacity=1),
),
),
yaxis_opts=opts.AxisOpts(
type_="category",
# is_inverse=True,
axislabel_opts=opts.LabelOpts(
font_size=16
),
splitarea_opts=opts.SplitAreaOpts(
is_show=True, areastyle_opts=opts.AreaStyleOpts(opacity=1)
),
),
#相关参数
title_opts=opts.TitleOpts(title="相关参数相关性",pos_left="15%"),
#cpu相关参数
# title_opts=opts.TitleOpts(title="cpu相关性"),
visualmap_opts=opts.VisualMapOpts(
min_=float(-1), max_=float(1), is_calculable=True, orient="horizontal", pos_left="center",split_number=5,pos_top=20
),
))
# heatmap.render(path='cpu.html')
grid.add(heatmap, grid_opts=opts.GridOpts(pos_left="15%",pos_bottom="25%",pos_top="10%"))
grid.render("correl.html")