tensorflow Keras符号输入/输出不执行'__len__'错误

tez616oj  于 2022-11-16  发布在  其他
关注(0)|答案(2)|浏览(972)

我想让一个人工智能玩我的自定义环境,不幸的是,当我运行我的代码,以下错误累积:

File "C:\Program Files\JetBrains\PyCharm Community Edition 2021.2\plugins\python-ce\helpers\pydev\_pydev_bundle\pydev_umd.py", line 198, in runfile
    pydev_imports.execfile(filename, global_vars, local_vars)  # execute the script
  File "C:\Program Files\JetBrains\PyCharm Community Edition 2021.2\plugins\python-ce\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
    exec(compile(contents+"\n", file, 'exec'), glob, loc)
  File "D:/PycharmProjects/Custom Enviroment AI/Enviroment.py", line 88, in <module>
    DQN = buildAgent(model, actions)
  File "D:/PycharmProjects/Custom Enviroment AI/Enviroment.py", line 82, in buildAgent
    dqn = DQNAgent(model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10,
  File "D:\PycharmProjects\Custom Enviroment AI\venv\lib\site-packages\rl\agents\dqn.py", line 108, in __init__
    if hasattr(model.output, '__len__') and len(model.output) > 1:
  File "D:\PycharmProjects\Custom Enviroment AI\venv\lib\site-packages\keras\engine\keras_tensor.py", line 221, in __len__
    raise TypeError('Keras symbolic inputs/outputs do not '
TypeError: Keras symbolic inputs/outputs do not implement `__len__`. You may be trying to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the Functional Model. This error will also get raised if you try asserting a symbolic input/output directly.

这个错误说你不能使用len()而应该使用.shape isead,不幸的是这似乎是tensorflow内部的一个错误我的完整代码是:

from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy
from rl.agents.dqn import DQNAgent
from keras.layers import Dense
import tensorflow as tf
import numpy as np
import random
import pygame
import gym

class Env(gym.Env):
    def __init__(self):
        self.action_space = gym.spaces.Discrete(4)
        self.observation_space = gym.spaces.MultiDiscrete([39, 27])
        self.screen = pygame.display.set_mode((800, 600))
        self.PlayerX = 0
        self.PlayerY = 0
        self.FoodX = 0
        self.FoodY = 0
        self.state = [self.FoodX - self.PlayerX + 19, self.FoodY - self.PlayerY + 14]
        self.timeLimit = 1000

    def render(self, mode="human"):
        self.screen.fill((0, 0, 0))
        pygame.draw.rect(self.screen, (255, 255, 255), pygame.Rect(self.PlayerX * 40, self.PlayerY * 40, 40, 40))
        pygame.draw.rect(self.screen, (255, 0, 0), pygame.Rect(self.FoodX * 40, self.FoodY * 40, 40, 40))
        pygame.display.update()

    def reset(self):
        self.FoodX = random.randint(1, 19)
        self.FoodY = random.randint(1, 14)
        self.PlayerX = 0
        self.PlayerY = 0
        self.timeLimit = 1000
        return self.state

    def step(self, action):
        self.timeLimit -= 1
        reward = -1

        if action == 0 and self.PlayerY > 0:
            self.PlayerY -= 1
        if action == 1 and self.PlayerX > 0:
            self.PlayerX -= 1
        if action == 2 and self.PlayerY < 14:
            self.PlayerY += 1
        if action == 3 and self.PlayerX < 19:
            self.PlayerX += 1

        if self.PlayerX == self.FoodX and self.PlayerY == self.FoodY:
            reward += 30
            self.FoodX = random.randint(1, 19)
            self.FoodY = random.randint(1, 14)

        if self.timeLimit <= 0:
            done = True
        else:
            done = False

        self.state = [self.FoodX - self.PlayerX, self.FoodY - self.PlayerY]
        return self.state, reward, done

env = Env()

states = env.observation_space.shape
actions = env.action_space.n

def build_model(states, actions):
    model = tf.keras.Sequential()
    model.add(Dense(2, activation='relu', input_shape=states))
    model.add(Dense(4, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

def buildAgent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10,
                   target_model_update=1e-2)
    return dqn

model = build_model(states, actions)
DQN = buildAgent(model, actions)
DQN.compile(tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=['mae'])
DQN.fit(env, nb_steps=50000, visualize=False, verbose=1)
scores = DQN.test(env, nb_episodes=100, visualize=True)
print(np.mean(scores.history['episode_reward']))
pygame.quit()
model.save('model.h5')

我使用Tensorflow:2.8.0。这似乎是Tensorflow代码中的错误,但我不知道该怎么办

efzxgjgh

efzxgjgh1#

here所述,您需要安装keras-rl的更新版本:

!pip install keras-rl2

您还需要向输入形状添加一个额外的维度,并在最后添加一个Flatten层,因为Keras在使用DQN代理时需要这样做:

def build_model(states, actions):
    model = tf.keras.Sequential()
    model.add(Dense(2, activation='relu', input_shape=(1, states[0])))
    model.add(Dense(4, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    model.add(Flatten())
    return model

最后,自定义环境中的step方法还必须返回一个info字典(我刚刚创建了一个空字典):

def step(self, action):
        self.timeLimit -= 1
        reward = -1

        if action == 0 and self.PlayerY > 0:
            self.PlayerY -= 1
        if action == 1 and self.PlayerX > 0:
            self.PlayerX -= 1
        if action == 2 and self.PlayerY < 14:
            self.PlayerY += 1
        if action == 3 and self.PlayerX < 19:
            self.PlayerX += 1

        if self.PlayerX == self.FoodX and self.PlayerY == self.FoodY:
            reward += 30
            self.FoodX = random.randint(1, 19)
            self.FoodY = random.randint(1, 14)

        if self.timeLimit <= 0:
            done = True
        else:
            done = False

        self.state = [self.FoodX - self.PlayerX, self.FoodY - self.PlayerY]
        return self.state, reward, done, {}

如果您进行了这些更改,它应该可以正常工作。

from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy
from rl.agents.dqn import DQNAgent
from keras.layers import Dense, Flatten
import tensorflow as tf
import numpy as np
import random
import pygame
import gym

class Env(gym.Env):
    def __init__(self):
        self.action_space = gym.spaces.Discrete(4)
        self.observation_space = gym.spaces.MultiDiscrete([39, 27])
        self.screen = pygame.display.set_mode((800, 600))
        self.PlayerX = 0
        self.PlayerY = 0
        self.FoodX = 0
        self.FoodY = 0
        self.state = [self.FoodX - self.PlayerX + 19, self.FoodY - self.PlayerY + 14]
        self.timeLimit = 1000

    def render(self, mode="human"):
        self.screen.fill((0, 0, 0))
        pygame.draw.rect(self.screen, (255, 255, 255), pygame.Rect(self.PlayerX * 40, self.PlayerY * 40, 40, 40))
        pygame.draw.rect(self.screen, (255, 0, 0), pygame.Rect(self.FoodX * 40, self.FoodY * 40, 40, 40))
        pygame.display.update()

    def reset(self):
        self.FoodX = random.randint(1, 19)
        self.FoodY = random.randint(1, 14)
        self.PlayerX = 0
        self.PlayerY = 0
        self.timeLimit = 1000
        return self.state

    def step(self, action):
        self.timeLimit -= 1
        reward = -1

        if action == 0 and self.PlayerY > 0:
            self.PlayerY -= 1
        if action == 1 and self.PlayerX > 0:
            self.PlayerX -= 1
        if action == 2 and self.PlayerY < 14:
            self.PlayerY += 1
        if action == 3 and self.PlayerX < 19:
            self.PlayerX += 1

        if self.PlayerX == self.FoodX and self.PlayerY == self.FoodY:
            reward += 30
            self.FoodX = random.randint(1, 19)
            self.FoodY = random.randint(1, 14)

        if self.timeLimit <= 0:
            done = True
        else:
            done = False

        self.state = [self.FoodX - self.PlayerX, self.FoodY - self.PlayerY]
        return self.state, reward, done, {}

env = Env()

states = env.observation_space.shape
actions = env.action_space.n

def build_model(states, actions):
    model = tf.keras.Sequential()
    model.add(Dense(2, activation='relu', input_shape=(1, states[0])))
    model.add(Dense(4, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    model.add(Flatten())
    return model

def buildAgent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10,
                   target_model_update=1e-2)
    return dqn

model = build_model(states, actions)
DQN = buildAgent(model, actions)

DQN.compile(tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=['mae'])
DQN.fit(env, nb_steps=50000, visualize=False, verbose=1)
scores = DQN.test(env, nb_episodes=100, visualize=True)
print(np.mean(scores.history['episode_reward']))
pygame.quit()
model.save('model.h5')

如需详细信息,请参阅docs

ylamdve6

ylamdve62#

尝试安装keras-rl 2 1.0.4版。

pip install keras-rl2==1.0.4

相关问题