intro2ai/p3_rl/test_cases/q8/4-discountgrid.solution

1211 lines
39 KiB
Plaintext

weights_k_0: """
{((0, 0), 'exit'): 0,
((0, 1), 'exit'): 0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): 0,
((0, 4), 'exit'): 0,
((1, 0), 'east'): 0,
((1, 0), 'north'): 0,
((1, 0), 'south'): 0,
((1, 0), 'west'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((2, 0), 'east'): 0,
((2, 0), 'north'): 0,
((2, 0), 'south'): 0,
((2, 0), 'west'): 0,
((2, 2), 'exit'): 0,
((2, 4), 'exit'): 0,
((3, 0), 'east'): 0,
((3, 0), 'north'): 0,
((3, 0), 'south'): 0,
((3, 0), 'west'): 0,
((3, 2), 'east'): 0,
((3, 2), 'north'): 0,
((3, 2), 'south'): 0,
((3, 2), 'west'): 0,
((3, 3), 'east'): 0,
((3, 3), 'north'): 0,
((3, 3), 'south'): 0,
((3, 3), 'west'): 0,
((3, 4), 'east'): 0,
((3, 4), 'north'): 0,
((3, 4), 'south'): 0,
((3, 4), 'west'): 0,
((4, 0), 'east'): 0,
((4, 0), 'north'): 0,
((4, 0), 'south'): 0,
((4, 0), 'west'): 0,
((4, 1), 'east'): 0,
((4, 1), 'north'): 0,
((4, 1), 'south'): 0,
((4, 1), 'west'): 0,
((4, 2), 'east'): 0,
((4, 2), 'north'): 0,
((4, 2), 'south'): 0,
((4, 2), 'west'): 0,
((4, 3), 'east'): 0,
((4, 3), 'north'): 0,
((4, 3), 'south'): 0,
((4, 3), 'west'): 0,
((4, 4), 'east'): 0,
((4, 4), 'north'): 0,
((4, 4), 'south'): 0,
((4, 4), 'west'): 0}
"""
q_values_k_0_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
0.0000 illegal illegal illegal illegal
"""
q_values_k_0_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_1: """
{((0, 0), 'exit'): 0,
((0, 1), 'exit'): 0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): 0,
((0, 4), 'exit'): 0,
((1, 0), 'east'): 0,
((1, 0), 'north'): 0,
((1, 0), 'south'): 0.0,
((1, 0), 'west'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((2, 0), 'east'): 0,
((2, 0), 'north'): 0,
((2, 0), 'south'): 0,
((2, 0), 'west'): 0,
((2, 2), 'exit'): 0,
((2, 4), 'exit'): 0,
((3, 0), 'east'): 0,
((3, 0), 'north'): 0,
((3, 0), 'south'): 0,
((3, 0), 'west'): 0,
((3, 2), 'east'): 0,
((3, 2), 'north'): 0,
((3, 2), 'south'): 0,
((3, 2), 'west'): 0,
((3, 3), 'east'): 0,
((3, 3), 'north'): 0,
((3, 3), 'south'): 0,
((3, 3), 'west'): 0,
((3, 4), 'east'): 0,
((3, 4), 'north'): 0,
((3, 4), 'south'): 0,
((3, 4), 'west'): 0,
((4, 0), 'east'): 0,
((4, 0), 'north'): 0,
((4, 0), 'south'): 0,
((4, 0), 'west'): 0,
((4, 1), 'east'): 0,
((4, 1), 'north'): 0,
((4, 1), 'south'): 0,
((4, 1), 'west'): 0,
((4, 2), 'east'): 0,
((4, 2), 'north'): 0,
((4, 2), 'south'): 0,
((4, 2), 'west'): 0,
((4, 3), 'east'): 0,
((4, 3), 'north'): 0,
((4, 3), 'south'): 0,
((4, 3), 'west'): 0,
((4, 4), 'east'): 0,
((4, 4), 'north'): 0,
((4, 4), 'south'): 0,
((4, 4), 'west'): 0}
"""
q_values_k_1_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
0.0000 illegal illegal illegal illegal
"""
q_values_k_1_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_2: """
{((0, 0), 'exit'): 0,
((0, 1), 'exit'): 0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): 0,
((0, 4), 'exit'): 0,
((1, 0), 'east'): 0,
((1, 0), 'north'): 0,
((1, 0), 'south'): 0.0,
((1, 0), 'west'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((2, 0), 'east'): 0,
((2, 0), 'north'): 0,
((2, 0), 'south'): 0,
((2, 0), 'west'): 0,
((2, 2), 'exit'): 0,
((2, 4), 'exit'): 0,
((3, 0), 'east'): 0,
((3, 0), 'north'): 0,
((3, 0), 'south'): 0.0,
((3, 0), 'west'): 0,
((3, 2), 'east'): 0,
((3, 2), 'north'): 0,
((3, 2), 'south'): 0,
((3, 2), 'west'): 0,
((3, 3), 'east'): 0,
((3, 3), 'north'): 0,
((3, 3), 'south'): 0,
((3, 3), 'west'): 0,
((3, 4), 'east'): 0,
((3, 4), 'north'): 0,
((3, 4), 'south'): 0,
((3, 4), 'west'): 0,
((4, 0), 'east'): 0,
((4, 0), 'north'): 0,
((4, 0), 'south'): 0,
((4, 0), 'west'): 0,
((4, 1), 'east'): 0,
((4, 1), 'north'): 0,
((4, 1), 'south'): 0,
((4, 1), 'west'): 0,
((4, 2), 'east'): 0,
((4, 2), 'north'): 0,
((4, 2), 'south'): 0,
((4, 2), 'west'): 0,
((4, 3), 'east'): 0,
((4, 3), 'north'): 0,
((4, 3), 'south'): 0,
((4, 3), 'west'): 0,
((4, 4), 'east'): 0,
((4, 4), 'north'): 0,
((4, 4), 'south'): 0,
((4, 4), 'west'): 0}
"""
q_values_k_2_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
0.0000 illegal illegal illegal illegal
"""
q_values_k_2_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_3: """
{((0, 0), 'exit'): -1.0,
((0, 1), 'exit'): 0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): 0,
((0, 4), 'exit'): 0,
((1, 0), 'east'): 0,
((1, 0), 'north'): 0,
((1, 0), 'south'): 0.0,
((1, 0), 'west'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((2, 0), 'east'): 0,
((2, 0), 'north'): 0,
((2, 0), 'south'): 0,
((2, 0), 'west'): 0,
((2, 2), 'exit'): 0,
((2, 4), 'exit'): 0,
((3, 0), 'east'): 0,
((3, 0), 'north'): 0,
((3, 0), 'south'): 0.0,
((3, 0), 'west'): 0,
((3, 2), 'east'): 0,
((3, 2), 'north'): 0,
((3, 2), 'south'): 0,
((3, 2), 'west'): 0,
((3, 3), 'east'): 0,
((3, 3), 'north'): 0,
((3, 3), 'south'): 0,
((3, 3), 'west'): 0,
((3, 4), 'east'): 0,
((3, 4), 'north'): 0,
((3, 4), 'south'): 0,
((3, 4), 'west'): 0,
((4, 0), 'east'): 0,
((4, 0), 'north'): 0,
((4, 0), 'south'): 0,
((4, 0), 'west'): 0,
((4, 1), 'east'): 0,
((4, 1), 'north'): 0,
((4, 1), 'south'): 0,
((4, 1), 'west'): 0,
((4, 2), 'east'): 0,
((4, 2), 'north'): 0,
((4, 2), 'south'): 0,
((4, 2), 'west'): 0,
((4, 3), 'east'): 0,
((4, 3), 'north'): 0,
((4, 3), 'south'): 0,
((4, 3), 'west'): 0,
((4, 4), 'east'): 0,
((4, 4), 'north'): 0,
((4, 4), 'south'): 0,
((4, 4), 'west'): 0}
"""
q_values_k_3_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_3_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_4: """
{((0, 0), 'exit'): -1.0,
((0, 1), 'exit'): 0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): 0,
((0, 4), 'exit'): 0,
((1, 0), 'east'): 0.0,
((1, 0), 'north'): 0,
((1, 0), 'south'): 0.0,
((1, 0), 'west'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((2, 0), 'east'): 0,
((2, 0), 'north'): 0,
((2, 0), 'south'): 0,
((2, 0), 'west'): 0,
((2, 2), 'exit'): 0,
((2, 4), 'exit'): 0,
((3, 0), 'east'): 0,
((3, 0), 'north'): 0,
((3, 0), 'south'): 0.0,
((3, 0), 'west'): 0,
((3, 2), 'east'): 0,
((3, 2), 'north'): 0,
((3, 2), 'south'): 0,
((3, 2), 'west'): 0,
((3, 3), 'east'): 0,
((3, 3), 'north'): 0,
((3, 3), 'south'): 0,
((3, 3), 'west'): 0,
((3, 4), 'east'): 0,
((3, 4), 'north'): 0,
((3, 4), 'south'): 0,
((3, 4), 'west'): 0,
((4, 0), 'east'): 0,
((4, 0), 'north'): 0,
((4, 0), 'south'): 0,
((4, 0), 'west'): 0,
((4, 1), 'east'): 0,
((4, 1), 'north'): 0,
((4, 1), 'south'): 0,
((4, 1), 'west'): 0,
((4, 2), 'east'): 0,
((4, 2), 'north'): 0,
((4, 2), 'south'): 0,
((4, 2), 'west'): 0,
((4, 3), 'east'): 0,
((4, 3), 'north'): 0,
((4, 3), 'south'): 0,
((4, 3), 'west'): 0,
((4, 4), 'east'): 0,
((4, 4), 'north'): 0,
((4, 4), 'south'): 0,
((4, 4), 'west'): 0}
"""
q_values_k_4_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_4_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_4_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_4_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_4_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_5: """
{((0, 0), 'exit'): -1.0,
((0, 1), 'exit'): 0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): 0,
((0, 4), 'exit'): 0,
((1, 0), 'east'): 0.0,
((1, 0), 'north'): 0,
((1, 0), 'south'): 0.0,
((1, 0), 'west'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((2, 0), 'east'): 0,
((2, 0), 'north'): 0,
((2, 0), 'south'): 0,
((2, 0), 'west'): 0,
((2, 2), 'exit'): 0,
((2, 4), 'exit'): 0,
((3, 0), 'east'): 0,
((3, 0), 'north'): 0,
((3, 0), 'south'): 0.0,
((3, 0), 'west'): 0,
((3, 2), 'east'): 0,
((3, 2), 'north'): 0,
((3, 2), 'south'): 0,
((3, 2), 'west'): 0,
((3, 3), 'east'): 0,
((3, 3), 'north'): 0,
((3, 3), 'south'): 0,
((3, 3), 'west'): 0,
((3, 4), 'east'): 0,
((3, 4), 'north'): 0,
((3, 4), 'south'): 0,
((3, 4), 'west'): 0,
((4, 0), 'east'): 0,
((4, 0), 'north'): 0,
((4, 0), 'south'): 0,
((4, 0), 'west'): 0,
((4, 1), 'east'): 0,
((4, 1), 'north'): 0,
((4, 1), 'south'): 0,
((4, 1), 'west'): 0.0,
((4, 2), 'east'): 0,
((4, 2), 'north'): 0,
((4, 2), 'south'): 0,
((4, 2), 'west'): 0,
((4, 3), 'east'): 0,
((4, 3), 'north'): 0,
((4, 3), 'south'): 0,
((4, 3), 'west'): 0,
((4, 4), 'east'): 0,
((4, 4), 'north'): 0,
((4, 4), 'south'): 0,
((4, 4), 'west'): 0}
"""
q_values_k_5_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_5_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_5_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_5_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_5_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_6: """
{((0, 0), 'exit'): -1.0,
((0, 1), 'exit'): 0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): -1.0,
((0, 4), 'exit'): 0,
((1, 0), 'east'): 0.0,
((1, 0), 'north'): 0,
((1, 0), 'south'): 0.0,
((1, 0), 'west'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((2, 0), 'east'): 0,
((2, 0), 'north'): 0,
((2, 0), 'south'): 0,
((2, 0), 'west'): 0,
((2, 2), 'exit'): 0,
((2, 4), 'exit'): 0,
((3, 0), 'east'): 0,
((3, 0), 'north'): 0,
((3, 0), 'south'): 0.0,
((3, 0), 'west'): 0,
((3, 2), 'east'): 0,
((3, 2), 'north'): 0,
((3, 2), 'south'): 0,
((3, 2), 'west'): 0,
((3, 3), 'east'): 0,
((3, 3), 'north'): 0,
((3, 3), 'south'): 0,
((3, 3), 'west'): 0,
((3, 4), 'east'): 0,
((3, 4), 'north'): 0,
((3, 4), 'south'): 0,
((3, 4), 'west'): 0,
((4, 0), 'east'): 0,
((4, 0), 'north'): 0,
((4, 0), 'south'): 0,
((4, 0), 'west'): 0,
((4, 1), 'east'): 0,
((4, 1), 'north'): 0,
((4, 1), 'south'): 0,
((4, 1), 'west'): 0.0,
((4, 2), 'east'): 0,
((4, 2), 'north'): 0,
((4, 2), 'south'): 0,
((4, 2), 'west'): 0,
((4, 3), 'east'): 0,
((4, 3), 'north'): 0,
((4, 3), 'south'): 0,
((4, 3), 'west'): 0,
((4, 4), 'east'): 0,
((4, 4), 'north'): 0,
((4, 4), 'south'): 0,
((4, 4), 'west'): 0}
"""
q_values_k_6_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_6_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_6_action_exit: """
0.0000 illegal 0.0000 illegal illegal
-1.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_6_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_6_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_7: """
{((0, 0), 'exit'): -1.0,
((0, 1), 'exit'): 0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): -1.0,
((0, 4), 'exit'): 0,
((1, 0), 'east'): 0.0,
((1, 0), 'north'): 0,
((1, 0), 'south'): 0.0,
((1, 0), 'west'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((2, 0), 'east'): 0,
((2, 0), 'north'): 0,
((2, 0), 'south'): 0,
((2, 0), 'west'): 0,
((2, 2), 'exit'): 0.1,
((2, 4), 'exit'): 0,
((3, 0), 'east'): 0,
((3, 0), 'north'): 0,
((3, 0), 'south'): 0.0,
((3, 0), 'west'): 0,
((3, 2), 'east'): 0,
((3, 2), 'north'): 0,
((3, 2), 'south'): 0,
((3, 2), 'west'): 0,
((3, 3), 'east'): 0,
((3, 3), 'north'): 0,
((3, 3), 'south'): 0,
((3, 3), 'west'): 0,
((3, 4), 'east'): 0,
((3, 4), 'north'): 0,
((3, 4), 'south'): 0,
((3, 4), 'west'): 0,
((4, 0), 'east'): 0,
((4, 0), 'north'): 0,
((4, 0), 'south'): 0,
((4, 0), 'west'): 0,
((4, 1), 'east'): 0,
((4, 1), 'north'): 0,
((4, 1), 'south'): 0,
((4, 1), 'west'): 0.0,
((4, 2), 'east'): 0,
((4, 2), 'north'): 0,
((4, 2), 'south'): 0,
((4, 2), 'west'): 0,
((4, 3), 'east'): 0,
((4, 3), 'north'): 0,
((4, 3), 'south'): 0,
((4, 3), 'west'): 0,
((4, 4), 'east'): 0,
((4, 4), 'north'): 0,
((4, 4), 'south'): 0,
((4, 4), 'west'): 0}
"""
q_values_k_7_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_7_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_7_action_exit: """
0.0000 illegal 0.0000 illegal illegal
-1.0000 illegal __________ illegal illegal
0.0000 illegal 0.1000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_7_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_7_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_8: """
{((0, 0), 'exit'): -1.0,
((0, 1), 'exit'): -1.0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): -1.0,
((0, 4), 'exit'): 0,
((1, 0), 'east'): 0.0,
((1, 0), 'north'): 0,
((1, 0), 'south'): 0.0,
((1, 0), 'west'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): 0,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((2, 0), 'east'): 0,
((2, 0), 'north'): 0,
((2, 0), 'south'): 0,
((2, 0), 'west'): 0,
((2, 2), 'exit'): 0.1,
((2, 4), 'exit'): 0,
((3, 0), 'east'): 0,
((3, 0), 'north'): 0,
((3, 0), 'south'): 0.0,
((3, 0), 'west'): 0,
((3, 2), 'east'): 0,
((3, 2), 'north'): 0,
((3, 2), 'south'): 0,
((3, 2), 'west'): 0,
((3, 3), 'east'): 0,
((3, 3), 'north'): 0,
((3, 3), 'south'): 0,
((3, 3), 'west'): 0,
((3, 4), 'east'): 0,
((3, 4), 'north'): 0,
((3, 4), 'south'): 0,
((3, 4), 'west'): 0,
((4, 0), 'east'): 0,
((4, 0), 'north'): 0,
((4, 0), 'south'): 0,
((4, 0), 'west'): 0,
((4, 1), 'east'): 0,
((4, 1), 'north'): 0,
((4, 1), 'south'): 0,
((4, 1), 'west'): 0.0,
((4, 2), 'east'): 0,
((4, 2), 'north'): 0,
((4, 2), 'south'): 0,
((4, 2), 'west'): 0,
((4, 3), 'east'): 0,
((4, 3), 'north'): 0,
((4, 3), 'south'): 0,
((4, 3), 'west'): 0,
((4, 4), 'east'): 0,
((4, 4), 'north'): 0,
((4, 4), 'south'): 0,
((4, 4), 'west'): 0}
"""
q_values_k_8_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_8_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_8_action_exit: """
0.0000 illegal 0.0000 illegal illegal
-1.0000 illegal __________ illegal illegal
0.0000 illegal 0.1000 illegal illegal
-1.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_8_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_8_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_9: """
{((0, 0), 'exit'): -1.0,
((0, 1), 'exit'): -1.0,
((0, 2), 'exit'): 0,
((0, 3), 'exit'): -1.0,
((0, 4), 'exit'): 0,
((1, 0), 'east'): 0.0,
((1, 0), 'north'): 0,
((1, 0), 'south'): 0.0,
((1, 0), 'west'): 0,
((1, 1), 'east'): 0,
((1, 1), 'north'): -0.09000000000000001,
((1, 1), 'south'): 0,
((1, 1), 'west'): 0,
((1, 2), 'east'): 0,
((1, 2), 'north'): 0,
((1, 2), 'south'): 0,
((1, 2), 'west'): 0,
((1, 3), 'east'): 0,
((1, 3), 'north'): 0,
((1, 3), 'south'): 0,
((1, 3), 'west'): 0,
((1, 4), 'east'): 0,
((1, 4), 'north'): 0,
((1, 4), 'south'): 0,
((1, 4), 'west'): 0,
((2, 0), 'east'): 0,
((2, 0), 'north'): 0,
((2, 0), 'south'): 0,
((2, 0), 'west'): 0,
((2, 2), 'exit'): 0.1,
((2, 4), 'exit'): 0,
((3, 0), 'east'): 0,
((3, 0), 'north'): 0,
((3, 0), 'south'): 0.0,
((3, 0), 'west'): 0,
((3, 2), 'east'): 0,
((3, 2), 'north'): 0,
((3, 2), 'south'): 0,
((3, 2), 'west'): 0,
((3, 3), 'east'): 0,
((3, 3), 'north'): 0,
((3, 3), 'south'): 0,
((3, 3), 'west'): 0,
((3, 4), 'east'): 0,
((3, 4), 'north'): 0,
((3, 4), 'south'): 0,
((3, 4), 'west'): 0,
((4, 0), 'east'): 0,
((4, 0), 'north'): 0,
((4, 0), 'south'): 0,
((4, 0), 'west'): 0,
((4, 1), 'east'): 0,
((4, 1), 'north'): 0,
((4, 1), 'south'): 0,
((4, 1), 'west'): 0.0,
((4, 2), 'east'): 0,
((4, 2), 'north'): 0,
((4, 2), 'south'): 0,
((4, 2), 'west'): 0,
((4, 3), 'east'): 0,
((4, 3), 'north'): 0,
((4, 3), 'south'): 0,
((4, 3), 'west'): 0,
((4, 4), 'east'): 0,
((4, 4), 'north'): 0,
((4, 4), 'south'): 0,
((4, 4), 'west'): 0}
"""
q_values_k_9_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal -0.0900 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_9_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_9_action_exit: """
0.0000 illegal 0.0000 illegal illegal
-1.0000 illegal __________ illegal illegal
0.0000 illegal 0.1000 illegal illegal
-1.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_9_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_9_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
weights_k_3000: """
{((0, 0), 'exit'): -9.999907386128688,
((0, 1), 'exit'): -9.999997424145315,
((0, 2), 'exit'): -9.999960132765212,
((0, 3), 'exit'): -9.999950781191618,
((0, 4), 'exit'): -9.999999718152583,
((1, 0), 'east'): 0.023883944628551798,
((1, 0), 'north'): 0.007345075517907217,
((1, 0), 'south'): -1.8500541605036829,
((1, 0), 'west'): -6.343206820101826,
((1, 1), 'east'): 0.2926773341159188,
((1, 1), 'north'): 0.34229408532424677,
((1, 1), 'south'): -0.5504925565503596,
((1, 1), 'west'): -7.476747986710549,
((1, 2), 'east'): 0.8687488956116186,
((1, 2), 'north'): 1.069417111875237,
((1, 2), 'south'): 0.20934913434979205,
((1, 2), 'west'): -8.039885148007473,
((1, 3), 'east'): 2.0499012629179343,
((1, 3), 'north'): 4.423789087362333,
((1, 3), 'south'): -0.5604917324646312,
((1, 3), 'west'): -5.401323597944641,
((1, 4), 'east'): 8.058415501251869,
((1, 4), 'north'): 4.320461066773469,
((1, 4), 'south'): -0.3521106259982003,
((1, 4), 'west'): -6.200128408737511,
((2, 0), 'east'): 0.009741671398482152,
((2, 0), 'north'): 0.007940007422962705,
((2, 0), 'south'): 0.005998204691157282,
((2, 0), 'west'): 0.017876817897686022,
((2, 2), 'exit'): 0.9999997681730781,
((2, 4), 'exit'): 9.99998874031536,
((3, 0), 'east'): 0.16105276299757887,
((3, 0), 'north'): 0.04837252814060002,
((3, 0), 'south'): 0.05142911717022169,
((3, 0), 'west'): 0.018771554676648216,
((3, 2), 'east'): 1.7398137867471506,
((3, 2), 'north'): 3.686710364480742,
((3, 2), 'south'): 1.5388764706778615,
((3, 2), 'west'): 0.96534651035605,
((3, 3), 'east'): 3.237326922914182,
((3, 3), 'north'): 5.228354896238455,
((3, 3), 'south'): 2.13459124715536,
((3, 3), 'west'): 4.048386126159169,
((3, 4), 'east'): 3.724489705852316,
((3, 4), 'north'): 6.151706012884094,
((3, 4), 'south'): 3.6948394494904564,
((3, 4), 'west'): 7.514601541200661,
((4, 0), 'east'): 0.20513468944645144,
((4, 0), 'north'): 0.37681951125732005,
((4, 0), 'south'): 0.12225019530041295,
((4, 0), 'west'): 0.1027564434880755,
((4, 1), 'east'): 0.6668747131568407,
((4, 1), 'north'): 1.0655192675373433,
((4, 1), 'south'): 0.10056777985567189,
((4, 1), 'west'): 0.3933711247168481,
((4, 2), 'east'): 1.267139958918678,
((4, 2), 'north'): 2.04180345588135,
((4, 2), 'south'): 0.5520994720362629,
((4, 2), 'west'): 1.6080936315813792,
((4, 3), 'east'): 2.174243240311463,
((4, 3), 'north'): 3.5128789267557274,
((4, 3), 'south'): 1.567368624478333,
((4, 3), 'west'): 3.4125636359365155,
((4, 4), 'east'): 3.3947156310730717,
((4, 4), 'north'): 3.809539514332263,
((4, 4), 'south'): 2.9139369998943274,
((4, 4), 'west'): 4.901442747463662}
"""
q_values_k_3000_action_north: """
illegal 4.3205 illegal 6.1517 3.8095
illegal 4.4238 __________ 5.2284 3.5129
illegal 1.0694 illegal 3.6867 2.0418
illegal 0.3423 __________ __________ 1.0655
illegal 0.0073 0.0079 0.0484 0.3768
"""
q_values_k_3000_action_east: """
illegal 8.0584 illegal 3.7245 3.3947
illegal 2.0499 __________ 3.2373 2.1742
illegal 0.8687 illegal 1.7398 1.2671
illegal 0.2927 __________ __________ 0.6669
illegal 0.0239 0.0097 0.1611 0.2051
"""
q_values_k_3000_action_exit: """
-10.0000 illegal 10.0000 illegal illegal
-10.0000 illegal __________ illegal illegal
-10.0000 illegal 1.0000 illegal illegal
-10.0000 illegal __________ __________ illegal
-9.9999 illegal illegal illegal illegal
"""
q_values_k_3000_action_south: """
illegal -0.3521 illegal 3.6948 2.9139
illegal -0.5605 __________ 2.1346 1.5674
illegal 0.2093 illegal 1.5389 0.5521
illegal -0.5505 __________ __________ 0.1006
illegal -1.8501 0.0060 0.0514 0.1223
"""
q_values_k_3000_action_west: """
illegal -6.2001 illegal 7.5146 4.9014
illegal -5.4013 __________ 4.0484 3.4126
illegal -8.0399 illegal 0.9653 1.6081
illegal -7.4767 __________ __________ 0.3934
illegal -6.3432 0.0179 0.0188 0.1028
"""