0%

图表推荐AutoChart调研

图表推荐调研

背景 & 目的

图表选择困难,拿到一份数据是应该用饼图还是用柱图去展示?工程视往往有工程视的判断、设计师有设计师的考量,同时产品和客户的意愿可能又不相同。到底这个数据最适合采用哪种图表去展示?以前我们普遍的采用朴素经验主义,即以前用的啥,我们现在还用啥、别的用的啥我们就用啥。这种方式既不科学、也没有说服力。所以我们需要去维护一套知识库,界合经验和分析去判断哪些那些类的数据更适合用哪种图表展示。本质上是解决一个数据图表种类的映射关系。

前言

本文主要基于AVA logo。即阿里的autoChart去分析他们所采用的解决方案。调研对于该问题,他们的解决方案是否可行,优点以及不足。

简介

AVA AVA logo Visual Analytics) 是为了更简便的可视分析而生的技术框架。 其名称中的第一个 A 具有多重涵义:它说明了这是一个出自阿里巴巴集团(Alibaba)技术框架,其目标是成为一个自动化(Automated)、智能驱动(AI driven)、支持增强分析(Augmented)的可视分析解决方案。(引用自 ava Readme)

AVA核心流程

Data(数据)DataWizard前端数据处理类库DataProcessed已处理的数据CKBChart Knowledge Base图表知识库ChartAdvisor图表推荐器ChartInfor图表信息translater翻译器将图表信息翻译为具体的render方法chart图表

各个模块介绍

DataWizard

数据分析模块,本质上是对输入数据的一个统计和抽象、为图表推荐、智能图表提供数据支撑。

data

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
[
{
"region": "East",
"sales": 4684.44
},
{
"region": "North",
"sales": 4137.09
},
{
"region": "NorthEast",
"sales": 2681.46
},
{
"region": "SouthEast",
"sales": 2447.01
},
{
"region": "SouthWest",
"sales": 818.59
},
{
"region": "NorthWest",
"sales": 1303.5
}
]

options

1
{"preferences":{"canvasLayout":"landscape"}}

dataset analysis

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
[
{
"count": 6,
"distinct": 6,
"type": "string",
"recommendation": "string",
"missing": 0,
"samples": [
"East",
"North",
"NorthEast",
"SouthEast",
"SouthWest",
"NorthWest"
],
"valueMap": {
"East": 1,
"North": 1,
"NorthEast": 1,
"SouthEast": 1,
"SouthWest": 1,
"NorthWest": 1
},
"maxLength": 9,
"minLength": 4,
"meanLength": 7.5,
"containsChars": true,
"containsDigits": false,
"containsSpace": false,
"containsNonWorlds": false,
"name": "region",
"levelOfMeasurements": [
"Nominal"
]
},
{
"count": 6,
"distinct": 6,
"type": "float",
"recommendation": "float",
"missing": 0,
"samples": [
4684.44,
4137.09,
2681.46,
2447.01,
818.59,
1303.5
],
"valueMap": {
"4684.44": 1,
"4137.09": 1,
"2681.46": 1,
"2447.01": 1,
"818.59": 1,
"1303.5": 1
},
"minimum": 818.59,
"maximum": 4684.44,
"mean": 2678.6816666666664,
"percentile5": 818.59,
"percentile25": 1303.5,
"percentile50": 2447.01,
"percentile75": 4137.09,
"percentile95": 4684.44,
"sum": 16072.089999999998,
"variance": 1925794.4105805552,
"stdev": 1387.7299487222128,
"zeros": 0,
"name": "sales",
"levelOfMeasurements": [
"Interval",
"Continuous"
]
}
]

核心代码

path: /node_modules/@antv/chart-advisor/lib/advisor.js 344 function analyze(data, options, showLog)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
function analyze(data, options, showLog) {
if (showLog === void 0) { showLog = true; }
if (showLog)
console.log(' data ');
if (showLog)
console.log(data);
if (showLog)
console.log(' options ');
if (showLog)
console.log(options);
var dataProps = dataToDataProps(data);
if (showLog)
console.log(' dataset analysis ');
if (showLog)
console.log(dataProps);
var adviceList = dataPropsToSpecs(dataProps, options);
return adviceList;
}

CKB

本质上就是一个json,定义了各类图表、以及图表的描述,相当于是一个数据集。

详见:https://observablehq.com/@jiazhewang/ava-ckb

ChartAdvisor

图表推荐,本质上是一个评分系统,根据对数据的分析结果。

处理结果

1
2
3
4
5
6
7
8
9
10
11
12
13
[
{
channels: {y: "sales", x: "region"},
score: 1.8,
type: "column_chart"
},
{
channels: {color: "region", angle: "sales"},
score: 1.5830210531286988,
type: "pie_chart",
}
...
]

核心代码

path: /node_modules/@antv/chart-advisor/lib/advisor.js 59 function dataPropsToSpecs(dataProps, options, showLog)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
function dataPropsToSpecs(dataProps, options, showLog) {
if (showLog === void 0) { showLog = false; }
var purpose = options ? options.purpose : '';
var preferences = options ? options.preferences : undefined;


// 从知识库中获取所有的type
var allTypes = Object.keys(Wiki);

// 遍历type 根据当前的数据分析结果对每个type进行评分,分数高的优先推荐、分数低的后推荐,分数0不推荐(不存在在推荐列表里,不显示)
var list = allTypes.map(function (t) {
// 评分
var score = 0;
// for log
var record = {};
var hardScore = 1;
rules_1.default.filter(function (r) { return r.hardOrSoft === 'HARD' && r.specChartTypes.includes(t); }).forEach(function (hr) {
var score = hr.check({ dataProps: dataProps, chartType: t, purpose: purpose, preferences: preferences });
hardScore *= score;
// if (showLog) console.log('H rule: ', hr.id, ' ; charttype: ', t);
// if (showLog) console.log(score);
record[hr.id] = score;
});
var softScore = 0;
rules_1.default.filter(function (r) { return r.hardOrSoft === 'SOFT' && r.specChartTypes.includes(t); }).forEach(function (sr) {
var score = sr.check({ dataProps: dataProps, chartType: t, purpose: purpose, preferences: preferences });
softScore += score;
// if (showLog) console.log('S rule: ', sr.id, ' ; charttype: ', t);
// if (showLog) console.log(score);
record[sr.id] = score;
});
score = hardScore * (1 + softScore);
if (showLog)
console.log('score: ', score, '=', hardScore, '* (1 +', softScore, ') ;charttype: ', t);
if (showLog)
console.log(record);
// analyze channels
var channels = {};
// for Pie | Donut
if (t === 'pie_chart' || t === 'donut_chart') {
var field4Color = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Nominal']); });
var field4Angle = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Interval']); });
if (field4Angle && field4Color) {
channels.color = field4Color.name;
channels.angle = field4Angle.name;
}
else {
score = 0;
}
}
// for Line
if (t === 'line_chart' || t == 'step_line_chart') {
var field4X = dataProps.find(function (field) { return intersects(field.levelOfMeasurements, ['Time', 'Ordinal']); });
var field4Y = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Interval']); });
var field4Color = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Nominal']); });
if (field4Color) {
channels.color = field4Color.name;
}
if (field4X && field4Y) {
channels.x = field4X.name;
channels.y = field4Y.name;
}
else {
score = 0;
}
}
// for Area
if (t === 'area_chart') {
var field4X = dataProps.find(function (field) { return intersects(field.levelOfMeasurements, ['Time', 'Ordinal']); });
var field4Y = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Interval']); });
if (field4X && field4Y) {
channels.x = field4X.name;
channels.y = field4Y.name;
}
else {
score = 0;
}
}
// for Bar
if (t === 'bar_chart') {
var nominalFields = dataProps.filter(function (field) { return hasSubset(field.levelOfMeasurements, ['Nominal']); });
var sortedNominalFields = nominalFields.sort(compare);
var field4Y = sortedNominalFields[0];
var field4Color = sortedNominalFields[1];
var field4X = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Interval']); });
if (field4X && field4Y) {
channels.y = field4Y.name;
channels.x = field4X.name;
if (field4Color) {
channels.color = field4Color.name;
}
}
else {
score = 0;
}
}
// for Column
if (t === 'column_chart') {
var nominalFields = dataProps.filter(function (field) { return hasSubset(field.levelOfMeasurements, ['Nominal']); });
var sortedNominalFields = nominalFields.sort(compare);
var field4X = sortedNominalFields[0];
var field4Color = sortedNominalFields[1];
var field4Y = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Interval']); });
if (field4X && field4Y) {
channels.y = field4Y.name;
channels.x = field4X.name;
if (field4Color) {
channels.color = field4Color.name;
}
}
else {
score = 0;
}
}
// for GroupedBar | StackedBar | PercentageStackedBar
if (t === 'grouped_bar_chart' || t === 'stacked_bar_chart' || t === 'percent_stacked_bar_chart') {
var nominalFields = dataProps.filter(function (field) { return hasSubset(field.levelOfMeasurements, ['Nominal']); });
var sortedNominalFields = nominalFields.sort(compare);
var field4Y1 = sortedNominalFields[0];
var field4Y2 = sortedNominalFields[1];
var field4X = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Interval']); });
if (field4Y1 && field4Y2 && field4X) {
channels.y = field4Y1.name;
channels.y2 = field4Y2.name;
channels.x = field4X.name;
}
else {
score = 0;
}
}
// for GroupedColumn | StackedColumn | PercentageStackedColumn
if (t === 'grouped_column_chart' || t === 'stacked_column_chart' || t === 'percent_stacked_column_chart') {
var nominalFields = dataProps.filter(function (field) { return hasSubset(field.levelOfMeasurements, ['Nominal']); });
var sortedNominalFields = nominalFields.sort(compare);
var field4X1 = sortedNominalFields[0];
var field4X2 = sortedNominalFields[1];
var field4Y = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Interval']); });
if (field4X1 && field4X2 && field4Y) {
channels.x = field4X1.name;
channels.x2 = field4X2.name;
channels.y = field4Y.name;
}
else {
score = 0;
}
}
// for StackedArea | PercentageStackedArea
if (t === 'stacked_area_chart' || t === 'percent_stacked_area_chart') {
var field4X1 = dataProps.find(function (field) { return intersects(field.levelOfMeasurements, ['Time', 'Ordinal']); });
var field4X2 = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Nominal']); });
var field4Y = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Interval']); });
if (field4X1 && field4X2 && field4Y) {
channels.x = field4X1.name;
channels.x2 = field4X2.name;
channels.y = field4Y.name;
}
else {
score = 0;
}
}
// for Radar
if (t === 'radar_chart') {
var nominalFields = dataProps.filter(function (field) { return hasSubset(field.levelOfMeasurements, ['Nominal']); });
var sortedNominalFields = nominalFields.sort(compare);
var field4Angle = sortedNominalFields[0];
var field4Series = sortedNominalFields[1];
var field4Radius = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Interval']); });
if (field4Angle && field4Series && field4Radius) {
channels.angle = field4Angle.name;
channels.series = field4Series.name;
channels.radius = field4Radius.name;
}
else {
score = 0;
}
}
// for Scatter
if (t === 'scatter_plot') {
var intervalFields = dataProps.filter(function (field) { return hasSubset(field.levelOfMeasurements, ['Interval']); });
var sortedIntervalFields = intervalFields.sort(compare);
var field4X = sortedIntervalFields[0];
var field4Y = sortedIntervalFields[1];
var field4Color = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Nominal']); });
if (field4X && field4Y) {
channels.x = field4X.name;
channels.y = field4Y.name;
if (field4Color) {
channels.color = field4Color.name;
}
}
else {
score = 0;
}
}
// for Bubble
if (t === 'bubble_chart') {
var intervalFields = dataProps.filter(function (field) { return hasSubset(field.levelOfMeasurements, ['Interval']); });
var triple = {
x: intervalFields[0],
y: intervalFields[1],
corr: 0,
size: intervalFields[2],
};
var _loop_1 = function (i) {
var _loop_2 = function (j) {
var p = DWAnalyzer.pearson(intervalFields[i], intervalFields[j]);
if (Math.abs(p) > triple.corr) {
triple.x = intervalFields[i];
triple.y = intervalFields[j];
triple.corr = p;
triple.size = intervalFields[tslib_1.__spread(Array(intervalFields.length).keys()).find(function (e) { return e !== i && e !== j; }) || 0];
}
};
for (var j = i + 1; j < intervalFields.length; j++) {
_loop_2(j);
}
};
for (var i = 0; i < intervalFields.length; i++) {
_loop_1(i);
}
var field4X = triple.x;
var field4Y = triple.y;
var field4Size = triple.size;
var field4Color = dataProps.find(function (field) { return intersects(field.levelOfMeasurements, ['Nominal']); });
if (field4X && field4Y && field4Size && field4Color) {
channels.x = field4X.name;
channels.y = field4Y.name;
channels.size = field4Size.name;
channels.color = field4Color.name;
}
else {
score = 0;
}
}
// for Histogram
if (t === 'histogram') {
var field = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Interval']); });
if (field) {
channels.x = field.name;
}
else {
score = 0;
}
}
// for heatmap
if (t === 'heatmap') {
var axisFields = dataProps.filter(function (field) { return intersects(field.levelOfMeasurements, ['Nominal', 'Ordinal']); });
var sortedFields = axisFields.sort(compare);
var field4X = sortedFields[0];
var field4Y = sortedFields[1];
var field4Color = dataProps.find(function (field) { return hasSubset(field.levelOfMeasurements, ['Interval']); });
if (field4X && field4Y && field4Color) {
channels.x = field4X.name;
channels.y = field4Y.name;
channels.color = field4Color.name;
}
else {
score = 0;
}
}
return {
type: t,
channels: channels,
score: score,
};
});
// sort list
function compareAdvices(chart1, chart2) {
if (chart1.score < chart2.score) {
return 1;
}
else if (chart1.score > chart2.score) {
return -1;
}
else {
return 0;
}
}
var resultList = list.filter(function (e) { return e.score && e.score !== 0 && util_1.translate(e.type); }).sort(compareAdvices);
if (showLog)
console.log('resultList ');
if (showLog)
console.log(resultList);
return resultList;
}

translater

翻译器模块、目前是翻译成g2的渲染语言去渲染(说是后期会增加一些图表库的渲染方法)。

核心代码

path:/node_modules/@antv/chart-advisor/lib/chartLibMapping/G2Plot.js

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
exports.G2PLOT_TYPE_MAPPING = {
line_chart: 'Line',
step_line_chart: 'StepLine',
area_chart: 'Area',
stacked_area_chart: 'StackedArea',
percent_stacked_area_chart: 'PercentageStackedArea',
column_chart: 'Column',
grouped_column_chart: 'GroupedColumn',
stacked_column_chart: 'StackedColumn',
percent_stacked_column_chart: 'PercentageStackedColumn',
bar_chart: 'Bar',
grouped_bar_chart: 'GroupedBar',
stacked_bar_chart: 'StackedBar',
percent_stacked_bar_chart: 'PercentageStackedBar',
histogram: 'Histogram',
pie_chart: 'Pie',
donut_chart: 'Donut',
rose_chart: 'Rose',
scatter_plot: 'Scatter',
bubble_chart: 'Bubble',
radar_chart: 'Radar',
// density_heatmap: 'DensityHeatmap',
heatmap: 'Heatmap',
};

参考 & 引用

https://chartcube.alipay.com/guide

https://www.npmjs.com/package/@antv/knowledge

https://github.com/antvis/AVA/blob/master/zh-CN/README.zh-CN.md

https://observablehq.com/@jiazhewang/autochart-automatic-chart-dev-library-by-antv

https://observablehq.com/@jiazhewang/ava-ckb

https://jiazhe.wang/