@@ -400,7 +400,7 @@ gaussl:{[mu;sigma;x]
400
400
p : exp (x*x -: mu )%-2 *sigma ;
401
401
p %: sqrt sigma *twopi ;
402
402
p}
403
- / guassian log likelihood
403
+ / gaussian log likelihood
404
404
gaussll : {[mu ;sigma ;X ] -.5 *sum (logtwopi ;log sigma ;(X*X -: mu )%sigma )}
405
405
/ gaussian maximum likelihood estimator
406
406
gaussmle : {[x ](mu ;avg x*x -: mu : avg x)}
@@ -433,7 +433,7 @@ likelihood:{[l;lf;X;phi;THETA]
433
433
p : $ [l;p+log phi ;p*phi ]; / apply prior probabilities
434
434
p}
435
435
436
- / using (l)ikelhood (f)unction, (w)eighted (m)aximum likelihood estimator
436
+ / using (l)ikelihood (f)unction, (w)eighted (m)aximum likelihood estimator
437
437
/ (f)unction with prior probabilities (p)hi and distribution parameters
438
438
/ (THETA) (with optional (f)itting of (p)hi) perform expectation maximization
439
439
em : {[fp ;lf ;wmf ;X ;phi ;THETA ]
@@ -448,7 +448,7 @@ em:{[fp;lf;wmf;X;phi;THETA]
448
448
tdm : {[c ;v ](-1 _@ [(1 +count v)# 0 ;;+;1 ]:: ) each v? c}
449
449
450
450
lntf : {log 1f +x} / log normalized term frequency
451
- dntf : {[k ;x ]k+(1f -k)*x% max each x} / double normalized term frequenecy
451
+ dntf : {[k ;x ]k+(1f -k)*x% max each x} / double normalized term frequency
452
452
453
453
idf : {log count [x]%sum 0 <x} / inverse document frequency
454
454
idfs : {log 1f +count [x]%sum 0 <x} / inverse document frequency smooth
@@ -554,7 +554,7 @@ dt:{[cgf;ogf;ipf;opt;w;t]
554
554
opt : (`maxd`minss`minsl`ming`maxff ! (0N ;2 ;1 ;0 ;:: )), opt ; / default options
555
555
if [0 =opt `maxd ;: (w;first d)]; / check if we've reached max depth
556
556
if [identical a : first d;: (w;a)]; / check if all values are equal
557
- if [opt [`minss ]>count a;: (w;a)]; / check if insufficent samples
557
+ if [opt [`minss ]>count a;: (w;a)]; / check if insufficient samples
558
558
d : ((neg floor opt [`maxff ] count d)? key d)# d : 1 _ d; / sub-select features
559
559
d : {. [x isord z;y] z}[(cgf ;ogf );(ipf ;w;a)] peach d; / compute gains
560
560
d : (where (any opt [`minsl ]>count each last :: ) each d) _ d; / filter on minsl
@@ -575,7 +575,7 @@ dtcr:{[tr;d] / recursive component
575
575
v : (,'/ ) tr [2 ] .z.s \: d; / dig deeper for null values
576
576
v}
577
577
578
- / decistion tree pruning primitives
578
+ / decision tree pruning primitives
579
579
580
580
/ wilson score - binary confidence interval (Edwin Bidwell Wilson)
581
581
wscore : {[z ;f ;n ](f+(.5 *z2n )+-1 1f *z*sqrt ((.25 *z2n )+f-f*f)%n)%1f +z2n : z*z%n}
@@ -637,7 +637,7 @@ dtkfxv:{[dtf;ef;a;ts]kfxvt[dtmincc[ef]\[;a]dtf::;dtc\:/:;ts]}
637
637
638
638
/ decision tree utilities
639
639
640
- / print leaf: prediction followd by classification error% or regresssion sse
640
+ / print leaf: prediction followed by classification error% or regression sse
641
641
pleaf : {[w ;x ]
642
642
v : waom [w;x]; / value
643
643
e : $ [isord x;string sum e*e : v-x;string [.1 *"i" $ 1e3 *1f -avg x = v], "%" ];
@@ -693,7 +693,7 @@ rt:dt[oig;oig;wmse] / regression tree
693
693
/ random forest
694
694
695
695
/ generate (n) decision trees by applying (f) to a resampled (with
696
- / replacemnt ) (t)able
696
+ / replacement ) (t)able
697
697
bag : {[n ;f ;t ](f ? [;t]:: ) peach n# count t} / (b)ootstrap (ag)gregating
698
698
699
699
/ given an atom or list (k), and bootstrap aggregating (m)odel, make
0 commit comments