We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 39d121a commit f3594e6Copy full SHA for f3594e6
1 file changed
machine_learning/q_learning.py
@@ -118,6 +118,7 @@ def get_policy() -> dict[State, int]:
118
"""
119
Extract a deterministic policy from the Q-table.
120
121
+
122
>>> q_table.clear()
123
>>> q_table[(1, 2)][1] = 2.0
124
>>> q_table[(1, 2)][2] = 1.0
@@ -127,7 +128,7 @@ def get_policy() -> dict[State, int]:
127
128
policy: dict[State, int] = {}
129
for s, a_dict in q_table.items():
130
if a_dict:
- policy[s] = max(a_dict, key=a_dict.get)
131
+ policy[s] = max(a_dict, key=lambda a: a_dict[a])
132
return policy
133
134
0 commit comments