import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

sns.set_theme(style="darkgrid")

df = pd.read_csv('results/100_cliffwalking.csv')
df.head()

p_alpha = 0.1
p_epsilon = 0.1
p_gamma = 0.99

sns.lineplot(x="episode", y="reward", hue='algorithm', data=df)
plt.xlabel('Episodes')
plt.ylabel('Reward per Episode')
plt.title(f'Learning curve for Cliff Walking problem (α={p_alpha}, γ={p_gamma}, ε={p_epsilon})')
legend = plt.legend()
legend.set_title('Algorithms')
plt.show()

sns.lineplot(x="episode", y="reward", hue='algorithm', data=df)
plt.gca().set_xlim([300, 500])
plt.gca().set_ylim([-100, 0])
plt.xlabel('Episodes')
plt.ylabel('Reward per Episode')
plt.title(f'Learning curve for Cliff Walking problem (α={p_alpha}, γ={p_gamma}, ε={p_epsilon})')
legend = plt.legend()
legend.set_title('Algorithms')
plt.show()

sns.lineplot(x="episode", y="reward", hue='algorithm', data=df)
plt.gca().set_ylim([-100, 0])
plt.xlabel('Episodes')
plt.ylabel('Reward per Episode')
plt.title(f'Learning curve for Cliff Walking problem (α={p_alpha}, γ={p_gamma}, ε={p_epsilon})')
legend = plt.legend()
legend.set_title('Algorithms')
plt.show()

	episode	reward	algorithm
0	1.0	-672.0	Q-Learning
1	2.0	-2730.0	Q-Learning
2	3.0	-230.0	Q-Learning
3	4.0	-164.0	Q-Learning
4	5.0	-229.0	Q-Learning