We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a7b5349 commit 39d121aCopy full SHA for 39d121a
1 file changed
machine_learning/q_learning.py
@@ -104,17 +104,13 @@ def update(
104
global LEARNING_RATE, DISCOUNT_FACTOR
105
alpha = alpha if alpha is not None else LEARNING_RATE
106
gamma = gamma if gamma is not None else DISCOUNT_FACTOR
107
- max_q_next = 0.0 if done or not next_available_actions else max(
108
- get_q_value(next_state, a) for a in next_available_actions
109
max_q_next = (
110
0.0
111
if done or not next_available_actions
112
else max(get_q_value(next_state, a) for a in next_available_actions)
113
)
114
old_q = get_q_value(state, action)
115
- new_q = (1 - alpha) * old_q + alpha * (
116
- reward + gamma * max_q_next
117
- )
+ new_q = (1 - alpha) * old_q + alpha * (reward + gamma * max_q_next)
118
q_table[state][action] = new_q
119
120
0 commit comments