TaxiDriverGym
In [ ]:
Copied!
import random
from IPython.display import clear_output
import gymnasium as gym
import numpy as np
from QLearning import QLearning
from numpy import loadtxt
import random
from IPython.display import clear_output
import gymnasium as gym
import numpy as np
from QLearning import QLearning
from numpy import loadtxt
In [ ]:
Copied!
env = gym.make("Taxi-v3", render_mode='ansi').env
env = gym.make("Taxi-v3", render_mode='ansi').env
In [ ]:
Copied!
# only execute the following lines if you want to create a new q-table
qlearn = QLearning(env, alpha=0.1, gamma=0.99, epsilon=0.7, epsilon_min=0.05, epsilon_dec=0.99, episodes=50000)
q_table = qlearn.train('data/q-table-taxi-driver.csv', 'results/actions_taxidriver')
#q_table = loadtxt('data/q-table-taxi-driver.csv', delimiter=',')
# only execute the following lines if you want to create a new q-table
qlearn = QLearning(env, alpha=0.1, gamma=0.99, epsilon=0.7, epsilon_min=0.05, epsilon_dec=0.99, episodes=50000)
q_table = qlearn.train('data/q-table-taxi-driver.csv', 'results/actions_taxidriver')
#q_table = loadtxt('data/q-table-taxi-driver.csv', delimiter=',')
In [ ]:
Copied!
(state, _) = env.reset()
epochs, penalties, reward = 0, 0, 0
done = False
frames = [] # for animation
(state, _) = env.reset()
epochs, penalties, reward = 0, 0, 0
done = False
frames = [] # for animation
In [ ]:
Copied!
while (not done) and (epochs < 100):
action = np.argmax(q_table[state])
state, reward, done, t, info = env.step(action)
if reward == -10:
penalties += 1
# Put each rendered frame into dict for animation
frames.append({
'frame': env.render(),
'state': state,
'action': action,
'reward': reward
}
)
epochs += 1
while (not done) and (epochs < 100):
action = np.argmax(q_table[state])
state, reward, done, t, info = env.step(action)
if reward == -10:
penalties += 1
# Put each rendered frame into dict for animation
frames.append({
'frame': env.render(),
'state': state,
'action': action,
'reward': reward
}
)
epochs += 1
In [ ]:
Copied!
from IPython.display import clear_output
from time import sleep
from IPython.display import clear_output
from time import sleep
In [ ]:
Copied!
clear_output(wait=True)
clear_output(wait=True)
In [ ]:
Copied!
def print_frames(frames):
for i, frame in enumerate(frames):
clear_output(wait=True)
print(frame['frame'])
#print(frame['frame'].getvalue())
print(f"Timestep: {i + 1}")
print(f"State: {frame['state']}")
print(f"Action: {frame['action']}")
print(f"Reward: {frame['reward']}")
sleep(.1)
def print_frames(frames):
for i, frame in enumerate(frames):
clear_output(wait=True)
print(frame['frame'])
#print(frame['frame'].getvalue())
print(f"Timestep: {i + 1}")
print(f"State: {frame['state']}")
print(f"Action: {frame['action']}")
print(f"Reward: {frame['reward']}")
sleep(.1)
In [ ]:
Copied!
print_frames(frames)
print_frames(frames)
In [ ]:
Copied!
print("\n")
print("Timesteps taken: {}".format(epochs))
print("Penalties incurred: {}".format(penalties))
print("\n")
print("Timesteps taken: {}".format(epochs))
print("Penalties incurred: {}".format(penalties))