In [1]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
sns.set_theme(style="darkgrid")
df = pd.read_csv('results/100_cliffwalking.csv')
df.head()
Out[1]:
episode | reward | algorithm | |
---|---|---|---|
0 | 1.0 | -672.0 | Q-Learning |
1 | 2.0 | -2730.0 | Q-Learning |
2 | 3.0 | -230.0 | Q-Learning |
3 | 4.0 | -164.0 | Q-Learning |
4 | 5.0 | -229.0 | Q-Learning |
In [2]:
p_alpha = 0.1
p_epsilon = 0.1
p_gamma = 0.99
In [5]:
sns.lineplot(x="episode", y="reward", hue='algorithm', data=df)
plt.xlabel('Episodes')
plt.ylabel('Reward per Episode')
plt.title(f'Learning curve for Cliff Walking problem (α={p_alpha}, γ={p_gamma}, ε={p_epsilon})')
legend = plt.legend()
legend.set_title('Algorithms')
plt.show()
In [6]:
sns.lineplot(x="episode", y="reward", hue='algorithm', data=df)
plt.gca().set_xlim([300, 500])
plt.gca().set_ylim([-100, 0])
plt.xlabel('Episodes')
plt.ylabel('Reward per Episode')
plt.title(f'Learning curve for Cliff Walking problem (α={p_alpha}, γ={p_gamma}, ε={p_epsilon})')
legend = plt.legend()
legend.set_title('Algorithms')
plt.show()
In [7]:
sns.lineplot(x="episode", y="reward", hue='algorithm', data=df)
plt.gca().set_ylim([-100, 0])
plt.xlabel('Episodes')
plt.ylabel('Reward per Episode')
plt.title(f'Learning curve for Cliff Walking problem (α={p_alpha}, γ={p_gamma}, ε={p_epsilon})')
legend = plt.legend()
legend.set_title('Algorithms')
plt.show()
In [ ]: