-
Notifications
You must be signed in to change notification settings - Fork 2
/
biblio.bib
243 lines (214 loc) · 8.38 KB
/
biblio.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
@article{auer2002using,
title={Using confidence bounds for exploitation-exploration trade-offs},
author={Auer, Peter},
journal={Journal of Machine Learning Research},
volume={3},
number={Nov},
pages={397--422},
year={2002}
}
@inproceedings{li2010contextual,
title={A contextual-bandit approach to personalized news article recommendation},
author={Li, Lihong and Chu, Wei and Langford, John and Schapire, Robert E},
booktitle={Proceedings of the 19th international conference on World wide web},
pages={661--670},
year={2010}
}
@inproceedings{agrawal2013thompson,
title={Thompson sampling for contextual bandits with linear payoffs},
author={Agrawal, Shipra and Goyal, Navin},
booktitle={International Conference on Machine Learning},
pages={127--135},
year={2013},
organization={PMLR}
}
@inproceedings{kaufmann2012thompson,
title={Thompson sampling: An asymptotically optimal finite-time analysis},
author={Kaufmann, Emilie and Korda, Nathaniel and Munos, R{\'e}mi},
booktitle={International conference on algorithmic learning theory},
pages={199--213},
year={2012},
organization={Springer}
}
@inproceedings{tijsma2016comparing,
title={Comparing exploration strategies for q-learning in random stochastic mazes},
author={Tijsma, Arryon D and Drugan, Madalina M and Wiering, Marco A},
booktitle={2016 IEEE Symposium Series on Computational Intelligence (SSCI)},
pages={1--8},
year={2016},
organization={IEEE}
}
@book{sutton2018reinforcement,
title={Reinforcement learning: An introduction},
author={Sutton, Richard S and Barto, Andrew G},
year={2018},
publisher={MIT press}
}
@article{mnih2013playing,
title={Playing atari with deep reinforcement learning},
author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Graves, Alex and Antonoglou, Ioannis and Wierstra, Daan and Riedmiller, Martin},
journal={arXiv preprint arXiv:1312.5602},
year={2013}
}
@article{mnih2015human,
title={Human-level control through deep reinforcement learning},
author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
journal={nature},
volume={518},
number={7540},
pages={529--533},
year={2015},
publisher={Nature Publishing Group}
}
@article{schaul2015prioritized,
title={Prioritized experience replay},
author={Schaul, Tom and Quan, John and Antonoglou, Ioannis and Silver, David},
journal={arXiv preprint arXiv:1511.05952},
year={2015}
}
@inproceedings{van2016deep,
title={Deep reinforcement learning with double q-learning},
author={Van Hasselt, Hado and Guez, Arthur and Silver, David},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={30},
number={1},
year={2016}
}
@article{hasselt2010double,
title={Double Q-learning},
author={Hasselt, Hado},
journal={Advances in neural information processing systems},
volume={23},
pages={2613--2621},
year={2010},
publisher={Citeseer}
}
@inproceedings{hessel2018rainbow,
title={Rainbow: Combining improvements in deep reinforcement learning},
author={Hessel, Matteo and Modayil, Joseph and Van Hasselt, Hado and Schaul, Tom and Ostrovski, Georg and Dabney, Will and Horgan, Dan and Piot, Bilal and Azar, Mohammad and Silver, David},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={32},
number={1},
year={2018}
}
@article{hausknecht2015deep,
title={Deep recurrent q-learning for partially observable mdps},
author={Hausknecht, Matthew and Stone, Peter},
journal={arXiv preprint arXiv:1507.06527},
year={2015}
}
@article{schulman2015high,
title={High-dimensional continuous control using generalized advantage estimation},
author={Schulman, John and Moritz, Philipp and Levine, Sergey and Jordan, Michael and Abbeel, Pieter},
journal={arXiv preprint arXiv:1506.02438},
year={2015}
}
@inproceedings{sutton2000policy,
title={Policy gradient methods for reinforcement learning with function approximation},
author={Sutton, Richard S and McAllester, David A and Singh, Satinder P and Mansour, Yishay},
booktitle={Advances in neural information processing systems},
pages={1057--1063},
year={2000}
}
@inproceedings{mnih2016asynchronous,
title={Asynchronous methods for deep reinforcement learning},
author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
booktitle={International conference on machine learning},
pages={1928--1937},
year={2016},
organization={PMLR}
}
@inproceedings{schulman2015trust,
title={Trust region policy optimization},
author={Schulman, John and Levine, Sergey and Abbeel, Pieter and Jordan, Michael and Moritz, Philipp},
booktitle={International conference on machine learning},
pages={1889--1897},
year={2015},
organization={PMLR}
}
@article{bhatnagar2007incremental,
title={Incremental natural actor-critic algorithms},
author={Bhatnagar, Shalabh and Ghavamzadeh, Mohammad and Lee, Mark and Sutton, Richard S},
journal={Advances in neural information processing systems},
volume={20},
pages={105--112},
year={2007}
}
@article{pajarinen2019compatible,
title={Compatible natural gradient policy search},
author={Pajarinen, Joni and Thai, Hong Linh and Akrour, Riad and Peters, Jan and Neumann, Gerhard},
journal={Machine Learning},
volume={108},
number={8},
pages={1443--1466},
year={2019},
publisher={Springer}
}
@article{schulman2017proximal,
title={Proximal policy optimization algorithms},
author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
journal={arXiv preprint arXiv:1707.06347},
year={2017}
}
@article{degris2012off,
title={Off-policy actor-critic},
author={Degris, Thomas and White, Martha and Sutton, Richard S},
journal={arXiv preprint arXiv:1205.4839},
year={2012}
}
@article{precup2000eligibility,
title={Eligibility traces for off-policy policy evaluation},
author={Precup, Doina},
journal={Computer Science Department Faculty Publication Series},
pages={80},
year={2000}
}
@article{munos2016safe,
title={Safe and efficient off-policy reinforcement learning},
author={Munos, R{\'e}mi and Stepleton, Tom and Harutyunyan, Anna and Bellemare, Marc G},
journal={arXiv preprint arXiv:1606.02647},
year={2016}
}
@article{wang2016sample,
title={Sample efficient actor-critic with experience replay},
author={Wang, Ziyu and Bapst, Victor and Heess, Nicolas and Mnih, Volodymyr and Munos, Remi and Kavukcuoglu, Koray and de Freitas, Nando},
journal={arXiv preprint arXiv:1611.01224},
year={2016}
}
@article{lillicrap2015continuous,
title={Continuous control with deep reinforcement learning},
author={Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
journal={arXiv preprint arXiv:1509.02971},
year={2015}
}
@inproceedings{silver2014deterministic,
title={Deterministic policy gradient algorithms},
author={Silver, David and Lever, Guy and Heess, Nicolas and Degris, Thomas and Wierstra, Daan and Riedmiller, Martin},
booktitle={International conference on machine learning},
pages={387--395},
year={2014},
organization={PMLR}
}
@inproceedings{fujimoto2018addressing,
title={Addressing function approximation error in actor-critic methods},
author={Fujimoto, Scott and Hoof, Herke and Meger, David},
booktitle={International Conference on Machine Learning},
pages={1587--1596},
year={2018},
organization={PMLR}
}
@article{barth2018distributed,
title={Distributed distributional deterministic policy gradients},
author={Barth-Maron, Gabriel and Hoffman, Matthew W and Budden, David and Dabney, Will and Horgan, Dan and Tb, Dhruva and Muldal, Alistair and Heess, Nicolas and Lillicrap, Timothy},
journal={arXiv preprint arXiv:1804.08617},
year={2018}
}
@article{gu2016q,
title={Q-prop: Sample-efficient policy gradient with an off-policy critic},
author={Gu, Shixiang and Lillicrap, Timothy and Ghahramani, Zoubin and Turner, Richard E and Levine, Sergey},
journal={arXiv preprint arXiv:1611.02247},
year={2016}
}
@Misc{silver2015,author = {David Silver},title = {Lectures on
Reinforcement Learning},howpublished = {\textsc{url:}~\url
{https://www.davidsilver.uk/teaching/}},year = {2015}}