import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from robocup_ssl_env import RoboCupSSLEnv

# Create environment
env = RoboCupSSLEnv()

# Check if the environment follows the Gym interface
check_env(venv)

# Instantiate the agent
model = PPO('MlpPolicy', env, verbose=1)

# Train the agent
model.learn(total_timesteps=10000)

# Save the model
model.save("ppo_robocup_ssl")

# To reload the trained model
# model = PPO.load("ppo_robocup_ssl")

# Evaluate the trained agent
obs = env.reset()
for _ in range(1000):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    env.render()
    if done:
        obs = env.reset()

env.close()