Skip to content

Commit

Permalink
Some revision suggestions in Maximization_bias's Problem
Browse files Browse the repository at this point in the history
  • Loading branch information
unknown committed Sep 14, 2018
1 parent 8d5965a commit 73b81f0
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions chapter06/maximization_bias.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def q_learning(q1, q2=None):
else:
active_q = q2
target_q = q1
best_action = np.argmax(active_q[next_state])
best_action = np.random.choice([action_ for action_, value_ in enumerate(active_q[next_state]) if value_ == np.max(active_q[next_state])])
target = target_q[next_state][best_action]

# Q-Learning update
Expand All @@ -113,8 +113,8 @@ def figure_6_7():
for ep in range(0, episodes):
left_counts_q[run, ep] = q_learning(q)
left_counts_double_q[run, ep] = q_learning(q1, q2)
left_counts_q = np.add.accumulate(left_counts_q, axis=1).mean(axis=0) / np.arange(1, episodes + 1)
left_counts_double_q = np.add.accumulate(left_counts_double_q, axis=1).mean(axis=0) / np.arange(1, episodes + 1)
left_counts_q = left_counts_q.mean(axis=0)
left_counts_double_q = left_counts_double_q.mean(axis=0)

plt.plot(left_counts_q, label='Q-Learning')
plt.plot(left_counts_double_q, label='Double Q-Learning')
Expand Down
Binary file modified images/figure_6_7.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 73b81f0

Please sign in to comment.