|
| 1 | +mlens.f:("ml-latest";"ml-latest-small") 1 / pick the smaller dataset |
| 2 | +mlens.b:"http://files.grouplens.org/datasets/movielens/" / base url |
| 3 | +-1"[down]loading latest movielens data set"; |
| 4 | +.ut.download[mlens.b;;".zip";.ut.unzip] mlens.f; |
| 5 | +-1"loading movie definitions: integer movieIds and enumerated genres"; |
| 6 | +mlens.movie:1!("I**";1#",") 0: `$mlens.f,"/movies.csv" |
| 7 | +-1"removing movies without genres"; |
| 8 | +update 0#'genres from `mlens.movie where genres like "(no genres listed)"; |
| 9 | +-1"converting unicode in titles to ascii"; |
| 10 | +update .ut.sr[.ut.ua] peach rtrim title from `mlens.movie; |
| 11 | +-1"extracting the movie's year from the title"; |
| 12 | +update year:"I"$-1_/:-5#/:title from `mlens.movie; |
| 13 | +update -7_/:title from `mlens.movie where not null year; |
| 14 | +-1"adding `u on movieId and splitting genres"; |
| 15 | +update `u#movieId,`$"|"vs'genres from `mlens.movie |
| 16 | +-1"adding the decade as a genre"; |
| 17 | +update genres:(genres,'`$string 10 xbar year) from `mlens.movie |
| 18 | +-1"enumerating genres"; |
| 19 | +mlens.movie:update `genre?/:genres from mlens.movie |
| 20 | +-1"loading movie ratings"; |
| 21 | +mlens.rating:("IIFP";1#",") 0:`$mlens.f,"/ratings.csv" |
| 22 | +-1"adding `p on userId and linking movieId to movie table"; |
| 23 | +update `p#userId,`mlens.movie$movieId from `mlens.rating; |
0 commit comments