Skip to content

Commit dd5cfe0

Browse files
committed
rename .util namespace to .ut
1 parent df70525 commit dd5cfe0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+224
-224
lines changed

adaboost.q

+8-8
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
-1"discrete adaboost requires the target feature to have values -1 and 1";
88
t:update -1 1 "M"=diagnosis from 11#/:wdbc.t
99
-1"we can then split into train and test partitions";
10-
d:.util.part[`train`test!3 1;0N?] t
10+
d:.ut.part[`train`test!3 1;0N?] t
1111
-1 "building a full tree is perfect on the training data";
1212
tr:.ml.ct[();::] d.train
13-
.util.assert[1f] .util.rnd[.01] avg d.train.diagnosis=.ml.pdt[tr] d.train
13+
.ut.assert[1f] .ut.rnd[.01] avg d.train.diagnosis=.ml.pdt[tr] d.train
1414
-1 "but not as good on the test data";
15-
.util.assert[.9] .util.rnd[.01] avg d.test.diagnosis=.ml.pdt[tr] d.test
15+
.ut.assert[.9] .ut.rnd[.01] avg d.test.diagnosis=.ml.pdt[tr] d.test
1616
-1 "how many leaves did we create?";
1717
count .ml.leaves tr
1818
-1 "adaboost creates an ensemble of weak learners to produce a strong learning";
@@ -26,24 +26,24 @@ k:50
2626
-1 "let's run ",string[k]," rounds of adaboost";
2727
m:.ml.fab[k;stump;.ml.pdt] d.train
2828
p:.ml.pab[k;.ml.pdt;m] d.train
29-
.util.assert[.98] .util.rnd[.01] avg d.train.diagnosis=p
29+
.ut.assert[.98] .ut.rnd[.01] avg d.train.diagnosis=p
3030
-1 "plot the improvement to accuracy on the training set as we increase the ensemble size";
3131
P:.ml.pab[1+til k;.ml.pdt;m] d.train
32-
show .util.plt avg d.train.diagnosis = P
32+
show .ut.plt avg d.train.diagnosis = P
3333

3434
-1 "but how does each extra stump help in predicting the test set?";
3535
pt:.ml.pab[k;.ml.pdt;m] d.test
36-
.util.assert[.97] .util.rnd[.01] avg d.test.diagnosis=pt
36+
.ut.assert[.97] .ut.rnd[.01] avg d.test.diagnosis=pt
3737
-1 "we can also plot the improvement to accuracy on the test set as we increase the ensemble size";
3838
Pt:.ml.pab[1+til k;.ml.pdt;m] d.test
39-
show .util.plt avg d.test.diagnosis = Pt
39+
show .ut.plt avg d.test.diagnosis = Pt
4040

4141
-1 "the number of elements in our ensemble should be decided by cross validation";
4242
ks:1+til 20
4343

4444
n:10
4545
-1"cross validate with ", string[n], " buckets";
46-
ts:.util.part[n#1;0N?] t
46+
ts:.ut.part[n#1;0N?] t
4747
ff:.ml.fab[;stump;.ml.pdt]
4848
pf:.ml.pab[;.ml.pdt]
4949
e:ts[;`diagnosis]=P:.ml.xv[ff ks;pf ks;ts] peach til n

berkstan.q

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
berkstan.f:"web-BerkStan.txt"
22
berkstan.b:"http://snap.stanford.edu/data/"
33
-1"[down]loading berkstan network graph";
4-
.util.download[berkstan.b;;".gz";.util.gunzip] berkstan.f;
4+
.ut.download[berkstan.b;;".gz";.ut.gunzip] berkstan.f;
55
berkstan.l:("II";"\t") 0: 4_read0 `$berkstan.f

bible.q

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
bible.f:"10.txt"
33
bible.b:"http://www.gutenberg.org/files/10/"
44
-1"[down]loading bible text";
5-
.util.download[bible.b;;"";""] bible.f;
5+
.ut.download[bible.b;;"";""] bible.f;
66
bible.txt:read0 `$bible.f
77
bible.s:1_"\n1:1 " vs "\n" sv 39_-373_ bible.txt

binary.q

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
binary.f:"binary.csv"
22
binary.b:"https://www.ats.ucla.edu/stat/data/"
33
-1"[down]loading binary data set";
4-
.util.download[binary.b;;"";""] binary.f;
4+
.ut.download[binary.b;;"";""] binary.f;
55
binary.t:("BIFI";1#",") 0: `$binary.f
66
binary[`Y`X]: 0 1 cut "f"$value flip binary.t

cifar.q

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
cifar.f:"cifar-10-binary"
33
cifar.b:"http://www.cs.toronto.edu/~kriz/"
44
-1"[down]loading CFAR-10 data set";
5-
.util.download[cifar.b;;".tar.gz";.util.untar] cifar.f;
5+
.ut.download[cifar.b;;".tar.gz";.ut.untar] cifar.f;
66
cifar.d:"cifar-10-batches-bin/"
77
-1"reading labels";
88
cifar.labels:`$10#read0`$cifar.d,"batches.meta.txt"

citibike.q

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
1-
\l util.q
1+
\l ut.q
22
.z.zd:17 2 6
33
sd:2017.01m / start date
44
ed:2017.12m / end date
55

66
-1"[down]loading citibike data";
77
b:"http://s3.amazonaws.com/tripdata/"
8-
m1:.util.sseq[1] . 2014.09 2016.12m
8+
m1:.ut.sseq[1] . 2014.09 2016.12m
99
m1:m1 where m1 within (sd;ed)
1010
f1:,[;"-citibike-tripdata"] each string[m1] except\: "."
11-
.util.download[b;;".zip";.util.unzip] f1;
11+
.ut.download[b;;".zip";.ut.unzip] f1;
1212

13-
m2:.util.sseq[1] . 2017.01 2017.12m
13+
m2:.ut.sseq[1] . 2017.01 2017.12m
1414
m2:m2 where m2 within (sd;ed)
1515
f2:,[;"-citibike-tripdata"] each string[m2] except\: "."
16-
.util.download[b;;".csv.zip";.util.unzip] f2;
16+
.ut.download[b;;".csv.zip";.ut.unzip] f2;
1717

1818
/ data since 2018 has an extra column
19-
/ m3:.util.sseq[1] . 2018.01m,-1+"m"$.z.D
19+
/ m3:.ut.sseq[1] . 2018.01m,-1+"m"$.z.D
2020
/ f3:,[;"_citibikenyc_tripdata"] each string[m3] except\: "."
2121
/ -1"[down]loading citibike data";
22-
/ .util.download[b;;".csv.zip";.util.unzip] f3;
22+
/ .ut.download[b;;".csv.zip";.ut.unzip] f3;
2323

2424
process:{[month;f]
2525
-1"parsing ", string f;

cloud9.q

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
cloud9.f:("sample-small.txt";"sample-medium.txt";"sample-large.txt") 2
22
cloud9.b:"http://lintool.github.io/Cloud9/docs/exercises/"
33
-1"[down]loading cloud9 network graph";
4-
.util.download[cloud9.b;;"";""] cloud9.f;
4+
.ut.download[cloud9.b;;"";""] cloud9.f;
55
cloud9.l:flip raze {x[0],/:1_ x} each "J"$"\t" vs/: read0 `$cloud9.f

cossim.q

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
X:.ml.normalize iris.X
77
flip C:.ml.skmeans[X] over .ml.forgy[3] X / spherical k-means
88
show m:.ml.mode each iris.y I:.ml.cgroup[.ml.cosdist;X;C] / classify
9-
avg iris.y=.util.ugrp m!I / accuracy
9+
avg iris.y=.ut.ugrp m!I / accuracy

decisiontree.q

+22-22
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ show t:weather.t
2424
avg t.Play=p:.ml.pdt[tr] t / accuracy
2525
-1"since the test and training data are the same, it is no surprise we have 100% accuracy";
2626
-1".ml.pdt does not fail on missing features. it digs deeper into the tree";
27-
.util.assert[.71428571428571431] avg t.Play=p:.ml.pdt[.ml.id3[();::] (1#`Outlook) _ t] t
27+
.ut.assert[.71428571428571431] avg t.Play=p:.ml.pdt[.ml.id3[();::] (1#`Outlook) _ t] t
2828
-1"id3 only handles discrete features. c4.5 handles continues features";
2929
-1".ml.q45 implements many of the features of c4.5 including:";
3030
-1"* information gain normalized by split info";
@@ -41,32 +41,32 @@ show s:@[t;`Humidity;:;85 90 78 96 80 70 65 95 70 80 70 90 75 80f]
4141

4242
z:@[{.qml.nicdf x};.0125;2.241403];
4343
-1 .ml.ptree[0] tr:.ml.prune[.ml.perr[z]] .ml.q45[();::] s;
44-
.util.assert[1f] avg s.Play=p:.ml.pdt[tr] s / accuracy
44+
.ut.assert[1f] avg s.Play=p:.ml.pdt[tr] s / accuracy
4545
-1"we can still handle null values by using the remaining features";
46-
.util.assert[`Yes] .ml.pdt[tr] d:`Outlook`Temperature`Humidity`Wind!(`Rain;`Hot;85f;`)
46+
.ut.assert[`Yes] .ml.pdt[tr] d:`Outlook`Temperature`Humidity`Wind!(`Rain;`Hot;85f;`)
4747
-1"we can even can handle nulls in the training data by propagating them down the tree";
4848
s:update Temperature:` from s where Humidity=70f
4949
-1 .ml.ptree[0] tr:.ml.q45[();::] s;
50-
.util.assert[`No] .ml.pdt[tr] d
50+
.ut.assert[`No] .ml.pdt[tr] d
5151
-1 "we can also use the Gini impurity instead of entropy (faster with similar behavior)";
5252
-1 .ml.ptree[0] tr:.ml.dt[.ml.gr;.ml.ogr;.ml.wgini;();::] t;
5353
d:`Outlook`Temperature`Humidity`Wind!(`Rain;`Hot;`High;`) / remove null
54-
.util.assert[`No] .ml.pdt[tr] d
54+
.ut.assert[`No] .ml.pdt[tr] d
5555
-1 "we can also create an aid tree when the target is numeric";
5656
-1 .ml.ptree[0] tr:.ml.aid[(1#`minsl)!1#3;::] update "e"$`Yes=Play from t; / regression tree
57-
.util.assert[.2] .ml.pdt[tr] d
57+
.ut.assert[.2] .ml.pdt[tr] d
5858
-1 "we can also create a thaid tree for classification";
5959
-1 .ml.ptree[0] tr:.ml.thaid[(1#`minsl)!1#3;::] t; / classification tree
60-
.util.assert[`Yes] .ml.pdt[tr] d
60+
.ut.assert[`Yes] .ml.pdt[tr] d
6161

6262
-1 "we can now split the iris data into training and test batches (w/ stratification)";
6363
w:`train`test!3 1
64-
show d:.util.part[w;iris.t.species] iris.t
64+
show d:.ut.part[w;iris.t.species] iris.t
6565
-1 "note that stratification can work on any type of list or table";
66-
.util.part[w;;iris.t] count[iris.t]?5;
67-
.util.part[w;select species from iris.t] iris.t;
66+
.ut.part[w;;iris.t] count[iris.t]?5;
67+
.ut.part[w;select species from iris.t] iris.t;
6868
-1 "next we confirm relative frequencies of species are the same";
69-
.util.assert[1b] .ml.identical value count each group d.train.species
69+
.ut.assert[1b] .ml.identical value count each group d.train.species
7070
-1 "then create a classification tree";
7171
-1 .ml.ptree[0] tr:.ml.ct[();::] `species xcols d`train;
7272
-1 "testing the tree on the test set produces an accuracy of:";
@@ -78,9 +78,9 @@ avg d.test.species=p:.ml.pdt[tr] d`test
7878

7979
-1 "we can predict iris petal lengths with a regression tree";
8080
-1 "first we need to one-hot encode the species";
81-
t:"f"$.util.onehot iris.t
81+
t:"f"$.ut.onehot iris.t
8282
-1 "then split the data into training and test batches"
83-
show d:.util.part[w;0N?] t
83+
show d:.ut.part[w;0N?] t
8484
-1 "and generate a regression tree";
8585
-1 .ml.ptree[0] tr:.ml.rt[();::] `plength xcols d`train;
8686
-1 "we now compute the root mean square error (rmse)";
@@ -95,38 +95,38 @@ t:([]z:`b`b`b`b`w`w`w`w`w`w`b`b`w`w`b`b)
9595
t:t,'([]x:1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4)
9696
t:t,'([]y:1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 )
9797
-1 .ml.ptree[0] tr:dtf t;
98-
.util.assert[0 0.125 0.125 0.25] first atr:flip .ml.dtmina[ef] scan (0f;tr)
98+
.ut.assert[0 0.125 0.125 0.25] first atr:flip .ml.dtmina[ef] scan (0f;tr)
9999

100100
-1 "we then pick the alpha (and therefore subtree) with cross validation";
101101
b:sqrt (1_a,0w)*a:atr 0 / geometric mean
102-
ts:.util.part[(k:10)#1;0N?] t
102+
ts:.ut.part[(k:10)#1;0N?] t
103103
show e:avg each ts[;`z]=p:.ml.dtxv[dtf;ef;b;ts] peach til k
104104
-1 .ml.ptree[0] atr[1] 0N!.ml.imax 0N!avg e;
105105

106106
-1 "returning to the iris data, we can grow and prune that too";
107107
-1 .ml.ptree[0] tr:dtf iris.t;
108-
.util.assert[0 .01 .02 .02 .04 .88 1f] 3*first atr:flip .ml.dtmina[ef] scan (0f;tr)
108+
.ut.assert[0 .01 .02 .02 .04 .88 1f] 3*first atr:flip .ml.dtmina[ef] scan (0f;tr)
109109
b:sqrt (1_a,0w)*a:atr 0 / geometric mean
110-
ts:.util.part[(k:10)#1;0N?]iris.t
110+
ts:.ut.part[(k:10)#1;0N?]iris.t
111111
show e:avg each ts[;`species]=p:.ml.dtxv[dtf;ef;b;ts] peach til k
112112
-1 .ml.ptree[0] atr[1] 0N!.ml.imax 0N!avg e;
113113

114114
-1 "or even grow and prune a regression tree with wine quality data";
115-
d:.util.part[`train`test!1 1;0N?] winequality.red.t
115+
d:.ut.part[`train`test!1 1;0N?] winequality.red.t
116116
dtf:.ml.rt[();::]
117117
ef:.ml.wmse
118118
-1 "the fully grown tree has more than 200 leaves!";
119-
.util.assert[1b] 200<0N!count .ml.leaves tr:dtf d`train
119+
.ut.assert[1b] 200<0N!count .ml.leaves tr:dtf d`train
120120
-1 "we can improve this by performing k-fold cross validation";
121121
-1 "first we find the list of critical alphas";
122122
atr:flip .ml.dtmina[ef] scan (0f;tr)
123123
b:sqrt (1_a,0w)*a:atr 0 / geometric mean
124-
ts:.util.part[(k:5)#1;0N?]d`train
124+
ts:.ut.part[(k:5)#1;0N?]d`train
125125
-1 "then we compute the accuracy of each of these alphas with kfxv";
126126
show e:avg each e*e:ts[;`quality]-p:(.ml.dtxv[dtf;ef;b;ts]0N!) peach til k
127127
-1 "finally, we pick the tree whose alpha had the min error";
128128
-1 .ml.ptree[0] btr:atr[1] 0N!.ml.imin 0N!avg e;
129129
-1 "the pruned tree has less than 25 leaves";
130-
.util.assert[1b] 25>0N!count .ml.leaves btr
130+
.ut.assert[1b] 25>0N!count .ml.leaves btr
131131
-1 "and an rms less than .73";
132-
.util.assert[1b] .73>0N!.ml.rms d.test.quality - .ml.pdt[btr] d`test
132+
.ut.assert[1b] .73>0N!.ml.rms d.test.quality - .ml.pdt[btr] d`test

dji.q

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ dji.f:"dow_jones_index"
22
dji.b:"http://archive.ics.uci.edu/ml/machine-learning-databases/"
33
dji.b,:"00312/"
44
-1"[down]loading dji data set";
5-
.util.download[dji.b;;".zip";.util.unzip] dji.f;
5+
.ut.download[dji.b;;".zip";.ut.unzip] dji.f;
66
dji.t:("HSDEEEEJFFJEEFHF";1#",")0: ssr[;"$";""] each read0 `$dji.f,".data"

em.q

+8-8
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ phi:2#1f%2f / coins are picked with equal probability
1818
/ which flips came from which THETA? pick maximum log likelihood
1919

2020
pT:(.ml.em[1b;lf;mf;x]//) pT
21-
.util.assert[1 0 0 1 0] .ml.imax .ml.likelihood[0b;lf;x] . pT
22-
.util.assert[1 0 0 1 0] .ml.imax .ml.likelihood[1b;.ml.binll[n];x] . pT
21+
.ut.assert[1 0 0 1 0] .ml.imax .ml.likelihood[0b;lf;x] . pT
22+
.ut.assert[1 0 0 1 0] .ml.imax .ml.likelihood[1b;.ml.binll[n];x] . pT
2323

2424

2525
/ multinomial example
@@ -36,7 +36,7 @@ p:.ml.imax .ml.likelihood[1b;.ml.mmmll;X] . pT
3636
show m:.ml.mode each y group p
3737
avg y=m p
3838
-1"what does the confusion matrix look like?";
39-
show .util.totals[`TOTAL] .ml.cm[y;m p]
39+
show .ut.totals[`TOTAL] .ml.cm[y;m p]
4040

4141

4242
/ Gaussian mixtures
@@ -46,7 +46,7 @@ mu0:10 20 30 / distribution's mu
4646
s20:s0*s0:1 3 2 / distribution's variance
4747
m0:100 200 150 / number of points per distribution
4848
X:raze X0:mu0+s0*(.ml.bm ?[;1f]::) each m0 / build dataset
49-
show .util.plt raze each (X0;0f*X0),'(X0;.ml.gaussl'[mu0;s20;X0]) / plot 1d data and gaussian curves
49+
show .ut.plt raze each (X0;0f*X0),'(X0;.ml.gaussl'[mu0;s20;X0]) / plot 1d data and gaussian curves
5050
k:count mu0
5151
phi:k#1f%k; / guess that distributions occur with equal frequency
5252
mu:neg[k]?X; / pick k random points as centers
@@ -71,8 +71,8 @@ p:.ml.imax .ml.likelihood[1b;.ml.gaussmvll;X] . pT
7171
show m:.ml.mode each y group p
7272
avg y=m p
7373
-1"what does the confusion matrix look like?";
74-
show .util.totals[`TOTAL] .ml.cm[y;m p]
75-
-1 value .util.plt .ml.append[0;X 0 2],'.ml.append[1] flip[pT[1;;0]] 0 2;
74+
show .ut.totals[`TOTAL] .ml.cm[y;m p]
75+
-1 value .ut.plt .ml.append[0;X 0 2],'.ml.append[1] flip[pT[1;;0]] 0 2;
7676

7777
-1"let's cluster hand written numbers into groups";
7878
-1"assuming each pixel of a black/white image is a Bernoulli distribution,";
@@ -82,7 +82,7 @@ show .util.totals[`TOTAL] .ml.cm[y;m p]
8282
X:1000#'X;y:1000#y;
8383
-1"convert the grayscale image into black/white";
8484
X>:128
85-
plt:value .util.plot[28;14;.util.c10;avg] .util.hmap flip 28 cut
85+
plt:value .ut.plot[28;14;.ut.c10;avg] .ut.hmap flip 28 cut
8686
k:10
8787
-1"let's use ",string[k]," clusters";
8888
-1"we first initialize phi to be equal weight across all clusters";
@@ -108,4 +108,4 @@ p:.ml.imax .ml.likelihood[1b;.ml.bmmll[1];X] . pT
108108
show m:.ml.mode each y group p
109109
avg y=m p
110110
-1"what does the confusion matrix look like?";
111-
show .util.totals[`TOTAL] .ml.cm[y;m p]
111+
show .ut.totals[`TOTAL] .ml.cm[y;m p]

emma.q

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
emma.f:"158.txt"
33
emma.b:"http://www.gutenberg.org/files/158/"
44
-1"[down]loading emma text";
5-
.util.download[emma.b;;"";""] emma.f;
5+
.ut.download[emma.b;;"";""] emma.f;
66
emma.txt:{x where not x like "VOLUME*"} read0 `$emma.f
77
emma.chapters:1_"CHAPTER" vs "\n" sv 39_-373_emma.txt
88
emma.s:{(3+first x ss"\n\n\n")_x} each emma.chapters

etl9b.q

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
etl9b.f:"ETL9B"
22
etl9b.b:"http://etlcdb.db.aist.go.jp/etlcdb/data/"
33
-1"[down]loading handwritten-kanji data set";
4-
.util.download[etl9b.b;;".zip";.util.unzip] etl9b.f;
4+
.ut.download[etl9b.b;;".zip";.ut.unzip] etl9b.f;
55
-1"loading etl9b ('binalized' dataset)";
6-
etl9b.x:.util.etl9b read1 `:ETL9B/ETL9B_1
6+
etl9b.x:.ut.etl9b read1 `:ETL9B/ETL9B_1
77
-1"extracting the X matrix and y vector";
88
etl9b.h:0x24,/:"x"$0x21+0x01*til 83 / hiragana
99
/ etl9b.h:0x25,/:"x"$0x21+0x01*til 83 / katakana (missing)

funq.q

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
\l util.q
1+
\l ut.q
22
\l ml.q
33
\l fmincg.q
44
\l porter.q
55

66
/ attempt to load c libraries
7-
(.util.loadf ` sv hsym[`$getenv`QHOME],) each`qml.q`svm.q`linear.q;
7+
(.ut.loadf ` sv hsym[`$getenv`QHOME],) each`qml.q`svm.q`linear.q;
88
if[`qml in key `;system "l qmlmm.q"] / use qml matrix operators

hac.q

+14-14
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,19 @@ L:.ml.link[`.ml.lw.ward] D
1414
-1"generate cluster indices";
1515
I:.ml.clust[L] 1+til 10
1616
-1"plot elbow curve (k vs ssw)";
17-
show .util.plt .ml.ssw[X] peach I
17+
show .ut.plt .ml.ssw[X] peach I
1818
-1"plot elbow curve (k vs % of variance explained)";
19-
show .util.plt (.ml.ssb[X] peach I)%.ml.sse[X]
19+
show .ut.plt (.ml.ssb[X] peach I)%.ml.sse[X]
2020
-1"link into 3 clusters";
2121
I:.ml.clust[L] 3
2222
-1"confirm accuracy";
2323
g:(.ml.mode each seeds.y I)!I
24-
.util.assert[0.9] .util.rnd[.01] avg seeds.y=.util.ugrp g
24+
.ut.assert[0.9] .ut.rnd[.01] avg seeds.y=.ut.ugrp g
2525

2626
-1"we can also check for maximum silhouette";
2727
-1"plot silhouette curve (k vs silhouette)";
2828
I:.ml.clust[L] 1+til 10
29-
show .util.plt (avg raze .ml.silhouette[.ml.edist;X]::) peach I
29+
show .ut.plt (avg raze .ml.silhouette[.ml.edist;X]::) peach I
3030

3131

3232
-1"normalize iris data set features";
@@ -38,35 +38,35 @@ L:.ml.link[`.ml.lw.median] D
3838
-1"generate cluster indices";
3939
I:.ml.clust[L] 1+til 10
4040
-1"plot elbow curve (k vs ssw)";
41-
show .util.plt .ml.ssw[X] peach I
41+
show .ut.plt .ml.ssw[X] peach I
4242
-1"plot elbow curve (k vs % of variance explained)";
43-
show .util.plt (.ml.ssb[X] peach I)%.ml.sse[X]
43+
show .ut.plt (.ml.ssb[X] peach I)%.ml.sse[X]
4444

4545
-1"link into 3 clusters";
4646
I:.ml.clust[L] 3
4747
-1"confirm accuracy";
4848
g:(.ml.mode each iris.y I)!I
49-
.util.assert[.97] .util.rnd[.01] avg iris.y=.util.ugrp g
49+
.ut.assert[.97] .ut.rnd[.01] avg iris.y=.ut.ugrp g
5050
-1"generate clusters indices";
5151
I:.ml.clust[L] 1+til 10
5252
-1"plot silhouette curve (k vs silhouette)";
53-
show .util.plt (avg raze .ml.silhouette[.ml.edist;X]::) peach I
53+
show .ut.plt (avg raze .ml.silhouette[.ml.edist;X]::) peach I
5454

5555
-1"let's apply the analysis to one of the uef reference cluster datasets";
5656
X:uef.d32
57-
show .util.plot[39;20;.util.c10;sum] X
57+
show .ut.plot[39;20;.ut.c10;sum] X
5858
-1"using pedist2 makes calculating the dissimilarity matrix much faster";
5959
D:sqrt .ml.pedist2[X;X]
6060
-1"generate hierarchical clustering linkage stats with ward metric";
6161
L:.ml.link[`.ml.lw.ward] D
6262
-1"generate cluster indices";
6363
I:.ml.clust[L] ks:1+til 19
6464
-1"plot elbow curve (k vs ssw)";
65-
show .util.plt .ml.ssw[X] peach I
65+
show .ut.plt .ml.ssw[X] peach I
6666
-1"plot elbow curve (k vs % of variance explained)";
67-
show .util.plt (.ml.ssb[X] peach I)%.ml.sse[X]
67+
show .ut.plt (.ml.ssb[X] peach I)%.ml.sse[X]
6868
-1"plot silhouette curve (k vs silhouette)";
69-
show .util.plt s:(avg raze .ml.silhouette[.ml.edist;X]::) peach I
70-
.util.assert[16] ks i:.ml.imax s
69+
show .ut.plt s:(avg raze .ml.silhouette[.ml.edist;X]::) peach I
70+
.ut.assert[16] ks i:.ml.imax s
7171
-1"plot the clustered data";
72-
show .util.plot[39;20;.util.c68;.ml.mode] X[0 1],enlist .util.ugrp I i
72+
show .ut.plot[39;20;.ut.c68;.ml.mode] X[0 1],enlist .ut.ugrp I i

0 commit comments

Comments
 (0)