-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathget-NumberColumnsCorrelation.pq
63 lines (63 loc) · 3.07 KB
/
get-NumberColumnsCorrelation.pq
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
let
output = (inputTbl as table) as table => [
originCntr = Table.RowCount(inputTbl),
schm = List.Buffer(Table.SelectRows(Table.Schema(Table.StopFolding(inputTbl)), each [Kind] = "number")[Name]),
addition = #table(
List.Combine({{"Columns"}, schm}),
List.Transform(
schm,
each
List.Combine(
{
{_},
[
crCol = _,
Cols = schm,
xCols = Table.Column(inputTbl, crCol),
sX = List.Sum(xCols),
sX2 = List.Sum(List.Transform(xCols, each Number.Power(_, 2))),
Y = List.Transform(
Cols,
each
if _ = crCol then
null
else
[
df = Table.SelectColumns(inputTbl, {crCol, _}),
sY = List.Sum(Table.Column(df, _)),
sXY = List.Sum(List.Transform(Table.ToRows(df), each _{0} * _{1})),
sY2 = List.Sum(
List.Transform(Table.Column(df, _), each Number.Power(_, 2))
),
pearson_numerator = (originCntr * sXY) - (sX * sY),
pearson_denominator_X = originCntr * Number.Power(sX2, 2) - Number.Power(
sX, 2
),
pearson_denominator_Y = (originCntr * sY2) - Number.Power(sY, 2),
pearson_denominator = Number.Sqrt(
pearson_denominator_X * pearson_denominator_Y
),
result = pearson_numerator / pearson_denominator
][result]
)
][Y]
}
)
)
)
][addition],
documentation = [
Documentation.Name = " get-NumberColumnsCorrelation.pq ",
Documentation.Description = " Calculates Correlation between all numeric columns in the input table.",
Documentation.Source = " https://www.datameerkat.com ",
Documentation.Version = " 1.0 ",
Documentation.Author = " Štěpán Rešl "
]
in
Value.ReplaceType(
output,
Value.ReplaceMetadata(
Value.Type(output),
documentation
)
)