import torch import torch.nn as nn import torch.optim as optim import gym import random import numpy as np from collections import deque
class QNetwork(nn.Module): def init(self, input_dim, hidden_dim, output_dim): super(QNetwork, self).init() self.fc1 = nn.Linear(input_dim, hidden_dim) self.fc2 = nn.Linear(hidden_dim, hidden_dim) self.fc3 = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
return self.fc3(x)class Agent: def init(self, env, input_dim, hidden_dim, output_dim, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.995, gamma=0.99, batch_size=64, memory_size=10000, lr=0.001): self.env = env self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = output_dim self.epsilon = epsilon self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.gamma = gamma self.batch_size = batch_size self.memory_size = memory_size self.lr