|
| 1 | +""" |
| 2 | +find which config to use and select |
| 3 | +
|
| 4 | +""" |
| 5 | + |
| 6 | + |
| 7 | +def Config(reader, flag="azenuz_small"): |
| 8 | + if flag == "azenuz_small": |
| 9 | + return configazenuz_small(reader) |
| 10 | + elif flag == "small_amazon": |
| 11 | + return smallconfig_amazon(reader) |
| 12 | + elif flag == "test_ptb": |
| 13 | + return testconfig_ptb(reader) |
| 14 | + elif flag == "small_amazontree": |
| 15 | + return smallconfig_amazontree(reader) |
| 16 | + elif flag == "small_amazontree1": |
| 17 | + return smallconfig_amazontree1(reader) |
| 18 | + elif flag == "small_amazontree2": |
| 19 | + return smallconfig_amazontree2(reader) |
| 20 | + elif flag == "small_amazontree3": |
| 21 | + return smallconfig_amazontree3(reader) |
| 22 | + else: |
| 23 | + raise ValueError("Invalid model: %s", flag) |
| 24 | + |
| 25 | + |
| 26 | +class configazenuz_small(object): |
| 27 | + epoch_num = 1000 |
| 28 | + train_batch_size = 128 # 128 |
| 29 | + train_step_size = 4 # 20 |
| 30 | + valid_batch_size = 128 # 20 |
| 31 | + valid_step_size = 4 |
| 32 | + hidden_size = 16 # 512 |
| 33 | + |
| 34 | + lstm_forget_bias = 0.0 |
| 35 | + # max_grad_norm = 0.25 |
| 36 | + max_grad_norm = 1 |
| 37 | + init_scale = 0.05 |
| 38 | + learning_rate = 0.01 # 0.001 # 0.2 |
| 39 | + decay = 0.5 |
| 40 | + decay_when = 0.002 # AUC |
| 41 | + decay_epoch = 200 |
| 42 | + sgd_opt = 'RMSProp' |
| 43 | + beta = 0.0001 |
| 44 | + GNN_step = 3 |
| 45 | + dropout_prob = 0 |
| 46 | + adagrad_eps = 1e-5 |
| 47 | + gpu = 0 |
| 48 | + |
| 49 | + def __init__(self, loader): |
| 50 | + self.node_num = len(loader.train_set[0]) # the num of features |
| 51 | + self.feature_num = loader.feature_num |
| 52 | + assert self.node_num == len(self.feature_num) |
| 53 | + |
| 54 | + self.user_size = len(loader.train_set) |
| 55 | + max_step = 0 |
| 56 | + for line in loader.train_set: |
| 57 | + if max_step < len(line): |
| 58 | + max_step = len(line) |
| 59 | + self.maxstep_size = max_step + 1 |
| 60 | + |
| 61 | + print "usernum", self.user_size |
| 62 | + print 'node_num', self.node_num |
| 63 | + print 'feature_num', self.feature_num |
| 64 | + print "maxstep_size %d" % self.maxstep_size |
| 65 | + print "gpu_id {}".format(self.gpu) |
| 66 | + print "learning_rate {}".format(self.learning_rate) |
| 67 | + |
| 68 | + |
| 69 | +class smallconfig_amazon(object): |
| 70 | + epoch_num = 1000 |
| 71 | + train_batch_size = 1 # 128 |
| 72 | + train_step_size = 4 # 20 |
| 73 | + valid_batch_size = 1 # 128 |
| 74 | + valid_step_size = 4 # 20 |
| 75 | + test_batch_size = 1 # 20 |
| 76 | + test_step_size = 4 |
| 77 | + |
| 78 | + def __init__(self, loader): |
| 79 | + vec = loader.itemdict.values() |
| 80 | + # print vec |
| 81 | + vec_r, vec_c = zip(*vec) |
| 82 | + self.vocab_size = (max(vec_r) + 2, max(vec_c) + 2) |
| 83 | + # self.vocab_size = loader.num_items # 10000 |
| 84 | + max_step = 0 |
| 85 | + for line in loader.train_set: |
| 86 | + if max_step < len(line): |
| 87 | + max_step = len(line) |
| 88 | + self.maxstep_size = max_step + 1 |
| 89 | + print "word-embedding %d" % self.word_embedding_dim |
| 90 | + |
| 91 | + |
| 92 | +class smallconfig_amazontree(object): |
| 93 | + epoch_num = 1000 |
| 94 | + train_batch_size = 100 # 128 |
| 95 | + train_step_size = 4 # 20 |
| 96 | + valid_batch_size = 100 # 128 |
| 97 | + valid_step_size = 4 # 20 |
| 98 | + test_batch_size = 100 # 20 |
| 99 | + test_step_size = 4 |
| 100 | + word_embedding_dim = 100 # 512 |
| 101 | + lstm_layers = 1 |
| 102 | + lstm_size = 100 # 512 |
| 103 | + lstm_forget_bias = 0.0 |
| 104 | + # max_grad_norm = 0.25 |
| 105 | + max_grad_norm = 1 |
| 106 | + init_scale = 0.05 |
| 107 | + learning_rate = 1 # 0.2 |
| 108 | + decay = 0.5 |
| 109 | + decay_when = 0.002 # AUC |
| 110 | + dropout_prob = 0 |
| 111 | + adagrad_eps = 1e-5 |
| 112 | + gpu = 1 |
| 113 | + |
| 114 | + def __init__(self, loader): |
| 115 | + vec = loader.itemdict.values() |
| 116 | + # vec_r, vec_c = zip(*vec) |
| 117 | + self.tree_size = len(zip(*vec)) - 1 |
| 118 | + cat = [max(voc) + 2 for voc in zip(*vec)] |
| 119 | + self.vocab_size = tuple(cat) |
| 120 | + # self.vocab_size = loader.num_items # 10000 |
| 121 | + max_step = 0 |
| 122 | + self.loader = loader |
| 123 | + for line in loader.train_set: |
| 124 | + if max_step < len(line): |
| 125 | + max_step = len(line) |
| 126 | + self.user_size = len(loader.train_set) |
| 127 | + self.maxstep_size = max_step + 1 |
| 128 | + self.layer_embed = (0.2, 0.3, 0.3, 0.2) |
| 129 | + self.vocab_size_all = len(loader.itemdict) |
| 130 | + assert len(self.layer_embed) == self.tree_size |
| 131 | + print "usernum", self.user_size |
| 132 | + print 'itemnum_vocab_size_all', self.vocab_size_all |
| 133 | + print 'itemnum_vocab_size', self.vocab_size |
| 134 | + print "word-embedding %d" % self.word_embedding_dim |
| 135 | + |
| 136 | + |
| 137 | +class smallconfig_amazontree1(smallconfig_amazontree): |
| 138 | + def __init__(self, loader): |
| 139 | + smallconfig_amazontree.__init__(self, loader) |
| 140 | + self.layer_embed = (0.1, 0.1, 0.3, 0.5) |
| 141 | + # self.word_embedding_dim = (self.word_embedding_dim / self.layer_embed[-1]) * sum(self.layer_embed) |
| 142 | + |
| 143 | + |
| 144 | +class smallconfig_amazontree2(smallconfig_amazontree): |
| 145 | + def __init__(self, loader): |
| 146 | + smallconfig_amazontree.__init__(self, loader) |
| 147 | + self.layer_embed = (0, 0, 0, 1) |
| 148 | + # self.word_embedding_dim = (self.word_embedding_dim / self.layer_embed[-1]) * sum(self.layer_embed) |
| 149 | + |
| 150 | + |
| 151 | +class smallconfig_amazontree3(smallconfig_amazontree): |
| 152 | + def __init__(self, loader): |
| 153 | + smallconfig_amazontree.__init__(self, loader) |
| 154 | + self.layer_embed = (0.6, 0.1, 0.1, 0.2) |
| 155 | + # self.word_embedding_dim = (self.word_embedding_dim / self.layer_embed[-1]) * sum(self.layer_embed) |
| 156 | + |
| 157 | + |
| 158 | + |
| 159 | +class testconfig_ptb(object): |
| 160 | + """Tiny config, for testing.""" |
| 161 | + init_scale = 0.1 |
| 162 | + learning_rate = 1.0 |
| 163 | + max_grad_norm = 1 |
| 164 | + num_layers = 1 |
| 165 | + num_steps = 2 |
| 166 | + hidden_size = 2 |
| 167 | + max_epoch = 1 |
| 168 | + max_max_epoch = 1 |
| 169 | + keep_prob = 1.0 |
| 170 | + lr_decay = 0.5 |
| 171 | + batch_size = 20 |
| 172 | + |
| 173 | + def __init__(self, reader): |
| 174 | + self.vocab_size = len(reader.vocab.words) # 10000 |
0 commit comments