| import gym
|
| import pickle
|
| import time
|
| import numpy as np
|
|
|
|
|
| with open("q_learning_model.pkl", "rb") as f:
|
| model_data = pickle.load(f)
|
|
|
| Q_table = model_data["Q_table"]
|
| state_bins = model_data["state_bins"]
|
|
|
| def discretize_state(state):
|
| state_low = env.observation_space.low
|
| state_high = env.observation_space.high
|
| bins = [np.linspace(state_low[i], state_high[i], state_bins[i]) for i in range(len(state))]
|
| state_indices = [np.digitize(state[i], bins[i]) - 1 for i in range(len(state))]
|
| return tuple(state_indices)
|
|
|
| env = gym.make("MountainCar-v0", render_mode="human")
|
|
|
| test_episodes = 10
|
|
|
| for episode in range(test_episodes):
|
| state, _ = env.reset()
|
| state = discretize_state(state)
|
| done = False
|
| total_reward = 0
|
|
|
| print(f"Testing Episode {episode + 1}")
|
|
|
| while not done:
|
| action = np.argmax(Q_table[state])
|
| next_state, reward, done, truncated, _ = env.step(action)
|
| state = discretize_state(next_state)
|
| total_reward += reward
|
|
|
|
|
| env.render()
|
| time.sleep(0.03)
|
|
|
| print(f"Total reward for Episode {episode + 1}: {total_reward}")
|
|
|
| env.close()
|
|
|