Spaces:

zjowowen
/

gomoku

Sleeping

App Files Files Community

gomoku / DI-engine /ding /model /template /coma.py

zjowowen

init space

079c32c 8 months ago

raw

history blame

No virus

12.5 kB

	from typing import Dict, Union
	import torch
	import torch.nn as nn

	from functools import reduce
	from ding.torch_utils import one_hot, MLP
	from ding.utils import squeeze, list_split, MODEL_REGISTRY, SequenceType
	from .q_learning import DRQN


	class COMAActorNetwork(nn.Module):
	"""
	Overview:
	Decentralized actor network in COMA algorithm.
	Interface:
	``__init__``, ``forward``
	"""

	def __init__(
	self,
	obs_shape: int,
	action_shape: int,
	hidden_size_list: SequenceType = [128, 128, 64],
	):
	"""
	Overview:
	Initialize COMA actor network
	Arguments:
	- obs_shape (:obj:`int`): the dimension of each agent's observation state
	- action_shape (:obj:`int`): the dimension of action shape
	- hidden_size_list (:obj:`list`): the list of hidden size, default to [128, 128, 64]
	"""
	super(COMAActorNetwork, self).__init__()
	self.main = DRQN(obs_shape, action_shape, hidden_size_list)

	def forward(self, inputs: Dict) -> Dict:
	"""
	Overview:
	The forward computation graph of COMA actor network
	Arguments:
	- inputs (:obj:`dict`): input data dict with keys ['obs', 'prev_state']
	- agent_state (:obj:`torch.Tensor`): each agent local state(obs)
	- action_mask (:obj:`torch.Tensor`): the masked action
	- prev_state (:obj:`torch.Tensor`): the previous hidden state
	Returns:
	- output (:obj:`dict`): output data dict with keys ['logit', 'next_state', 'action_mask']
	ArgumentsKeys:
	- necessary: ``obs`` { ``agent_state``, ``action_mask`` }, ``prev_state``
	ReturnsKeys:
	- necessary: ``logit``, ``next_state``, ``action_mask``
	Examples:
	>>> T, B, A, N = 4, 8, 3, 32
	>>> embedding_dim = 64
	>>> action_dim = 6
	>>> data = torch.randn(T, B, A, N)
	>>> model = COMAActorNetwork((N, ), action_dim, [128, embedding_dim])
	>>> prev_state = [[None for _ in range(A)] for _ in range(B)]
	>>> for t in range(T):
	>>> inputs = {'obs': {'agent_state': data[t], 'action_mask': None}, 'prev_state': prev_state}
	>>> outputs = model(inputs)
	>>> logit, prev_state = outputs['logit'], outputs['next_state']
	"""
	agent_state = inputs['obs']['agent_state']
	prev_state = inputs['prev_state']
	if len(agent_state.shape) == 3: # B, A, N
	agent_state = agent_state.unsqueeze(0)
	unsqueeze_flag = True
	else:
	unsqueeze_flag = False
	T, B, A = agent_state.shape[:3]
	agent_state = agent_state.reshape(T, -1, *agent_state.shape[3:])
	prev_state = reduce(lambda x, y: x + y, prev_state)
	output = self.main({'obs': agent_state, 'prev_state': prev_state, 'enable_fast_timestep': True})
	logit, next_state = output['logit'], output['next_state']
	next_state, _ = list_split(next_state, step=A)
	logit = logit.reshape(T, B, A, -1)
	if unsqueeze_flag:
	logit = logit.squeeze(0)
	return {'logit': logit, 'next_state': next_state, 'action_mask': inputs['obs']['action_mask']}


	class COMACriticNetwork(nn.Module):
	"""
	Overview:
	Centralized critic network in COMA algorithm.
	Interface:
	``__init__``, ``forward``
	"""

	def __init__(
	self,
	input_size: int,
	action_shape: int,
	hidden_size: int = 128,
	):
	"""
	Overview:
	initialize COMA critic network
	Arguments:
	- input_size (:obj:`int`): the size of input global observation
	- action_shape (:obj:`int`): the dimension of action shape
	- hidden_size_list (:obj:`list`): the list of hidden size, default to 128
	Returns:
	- output (:obj:`dict`): output data dict with keys ['q_value']
	Shapes:
	- obs (:obj:`dict`): ``agent_state``: :math:`(T, B, A, N, D)`, ``action_mask``: :math:`(T, B, A, N, A)`
	- prev_state (:obj:`list`): :math:`[[[h, c] for _ in range(A)] for _ in range(B)]`
	- logit (:obj:`torch.Tensor`): :math:`(T, B, A, N, A)`
	- next_state (:obj:`list`): :math:`[[[h, c] for _ in range(A)] for _ in range(B)]`
	- action_mask (:obj:`torch.Tensor`): :math:`(T, B, A, N, A)`
	"""
	super(COMACriticNetwork, self).__init__()
	self.action_shape = action_shape
	self.act = nn.ReLU()
	self.mlp = nn.Sequential(
	MLP(input_size, hidden_size, hidden_size, 2, activation=self.act), nn.Linear(hidden_size, action_shape)
	)

	def forward(self, data: Dict) -> Dict:
	"""
	Overview:
	forward computation graph of qmix network
	Arguments:
	- data (:obj:`dict`): input data dict with keys ['obs', 'prev_state', 'action']
	- agent_state (:obj:`torch.Tensor`): each agent local state(obs)
	- global_state (:obj:`torch.Tensor`): global state(obs)
	- action (:obj:`torch.Tensor`): the masked action
	ArgumentsKeys:
	- necessary: ``obs`` { ``agent_state``, ``global_state`` }, ``action``, ``prev_state``
	ReturnsKeys:
	- necessary: ``q_value``
	Examples:
	>>> agent_num, bs, T = 4, 3, 8
	>>> obs_dim, global_obs_dim, action_dim = 32, 32 * 4, 9
	>>> coma_model = COMACriticNetwork(
	>>> obs_dim - action_dim + global_obs_dim + 2 * action_dim * agent_num, action_dim)
	>>> data = {
	>>> 'obs': {
	>>> 'agent_state': torch.randn(T, bs, agent_num, obs_dim),
	>>> 'global_state': torch.randn(T, bs, global_obs_dim),
	>>> },
	>>> 'action': torch.randint(0, action_dim, size=(T, bs, agent_num)),
	>>> }
	>>> output = coma_model(data)
	"""
	x = self._preprocess_data(data)
	q = self.mlp(x)
	return {'q_value': q}

	def _preprocess_data(self, data: Dict) -> torch.Tensor:
	"""
	Overview:
	preprocess data to make it can be used by MLP net
	Arguments:
	- data (:obj:`dict`): input data dict with keys ['obs', 'prev_state', 'action']
	- agent_state (:obj:`torch.Tensor`): each agent local state(obs)
	- global_state (:obj:`torch.Tensor`): global state(obs)
	- action (:obj:`torch.Tensor`): the masked action
	ArgumentsKeys:
	- necessary: ``obs`` { ``agent_state``, ``global_state``} , ``action``, ``prev_state``
	Return:
	- x (:obj:`torch.Tensor`): the data can be used by MLP net, including \
	``global_state``, ``agent_state``, ``last_action``, ``action``, ``agent_id``
	"""
	t_size, batch_size, agent_num = data['obs']['agent_state'].shape[:3]
	agent_state_ori, global_state = data['obs']['agent_state'], data['obs']['global_state']

	# splite obs, last_action and agent_id
	agent_state = agent_state_ori[..., :-self.action_shape - agent_num]
	last_action = agent_state_ori[..., -self.action_shape - agent_num:-agent_num]
	last_action = last_action.reshape(t_size, batch_size, 1, -1).repeat(1, 1, agent_num, 1)
	agent_id = agent_state_ori[..., -agent_num:]

	action = one_hot(data['action'], self.action_shape) # T, B, A，N
	action = action.reshape(t_size, batch_size, -1, agent_num * self.action_shape).repeat(1, 1, agent_num, 1)
	action_mask = (1 - torch.eye(agent_num).to(action.device))
	action_mask = action_mask.view(-1, 1).repeat(1, self.action_shape).view(agent_num, -1) # A, A*N
	action = (action_mask.unsqueeze(0).unsqueeze(0)) * action # T, B, A, A*N
	global_state = global_state.unsqueeze(2).repeat(1, 1, agent_num, 1)

	x = torch.cat([global_state, agent_state, last_action, action, agent_id], -1)
	return x


	@MODEL_REGISTRY.register('coma')
	class COMA(nn.Module):
	"""
	Overview:
	The network of COMA algorithm, which is QAC-type actor-critic.
	Interface:
	``__init__``, ``forward``
	Properties:
	- mode (:obj:`list`): The list of forward mode, including ``compute_actor`` and ``compute_critic``
	"""

	mode = ['compute_actor', 'compute_critic']

	def __init__(
	self, agent_num: int, obs_shape: Dict, action_shape: Union[int, SequenceType],
	actor_hidden_size_list: SequenceType
	) -> None:
	"""
	Overview:
	initialize COMA network
	Arguments:
	- agent_num (:obj:`int`): the number of agent
	- obs_shape (:obj:`Dict`): the observation information, including agent_state and \
	global_state
	- action_shape (:obj:`Union[int, SequenceType]`): the dimension of action shape
	- actor_hidden_size_list (:obj:`SequenceType`): the list of hidden size
	"""
	super(COMA, self).__init__()
	action_shape = squeeze(action_shape)
	actor_input_size = squeeze(obs_shape['agent_state'])
	critic_input_size = squeeze(obs_shape['agent_state']) + squeeze(obs_shape['global_state']) + \
	agent_num * action_shape + (agent_num - 1) * action_shape
	critic_hidden_size = actor_hidden_size_list[-1]
	self.actor = COMAActorNetwork(actor_input_size, action_shape, actor_hidden_size_list)
	self.critic = COMACriticNetwork(critic_input_size, action_shape, critic_hidden_size)

	def forward(self, inputs: Dict, mode: str) -> Dict:
	"""
	Overview:
	forward computation graph of COMA network
	Arguments:
	- inputs (:obj:`dict`): input data dict with keys ['obs', 'prev_state', 'action']
	- agent_state (:obj:`torch.Tensor`): each agent local state(obs)
	- global_state (:obj:`torch.Tensor`): global state(obs)
	- action (:obj:`torch.Tensor`): the masked action
	ArgumentsKeys:
	- necessary: ``obs`` { ``agent_state``, ``global_state``, ``action_mask`` }, ``action``, ``prev_state``
	ReturnsKeys:
	- necessary:
	- compute_critic: ``q_value``
	- compute_actor: ``logit``, ``next_state``, ``action_mask``
	Shapes:
	- obs (:obj:`dict`): ``agent_state``: :math:`(T, B, A, N, D)`, ``action_mask``: :math:`(T, B, A, N, A)`
	- prev_state (:obj:`list`): :math:`[[[h, c] for _ in range(A)] for _ in range(B)]`
	- logit (:obj:`torch.Tensor`): :math:`(T, B, A, N, A)`
	- next_state (:obj:`list`): :math:`[[[h, c] for _ in range(A)] for _ in range(B)]`
	- action_mask (:obj:`torch.Tensor`): :math:`(T, B, A, N, A)`
	- q_value (:obj:`torch.Tensor`): :math:`(T, B, A, N, A)`
	Examples:
	>>> agent_num, bs, T = 4, 3, 8
	>>> agent_num, bs, T = 4, 3, 8
	>>> obs_dim, global_obs_dim, action_dim = 32, 32 * 4, 9
	>>> coma_model = COMA(
	>>> agent_num=agent_num,
	>>> obs_shape=dict(agent_state=(obs_dim, ), global_state=(global_obs_dim, )),
	>>> action_shape=action_dim,
	>>> actor_hidden_size_list=[128, 64],
	>>> )
	>>> prev_state = [[None for _ in range(agent_num)] for _ in range(bs)]
	>>> data = {
	>>> 'obs': {
	>>> 'agent_state': torch.randn(T, bs, agent_num, obs_dim),
	>>> 'action_mask': None,
	>>> },
	>>> 'prev_state': prev_state,
	>>> }
	>>> output = coma_model(data, mode='compute_actor')
	>>> data= {
	>>> 'obs': {
	>>> 'agent_state': torch.randn(T, bs, agent_num, obs_dim),
	>>> 'global_state': torch.randn(T, bs, global_obs_dim),
	>>> },
	>>> 'action': torch.randint(0, action_dim, size=(T, bs, agent_num)),
	>>> }
	>>> output = coma_model(data, mode='compute_critic')
	"""
	assert mode in self.mode, "not support forward mode: {}/{}".format(mode, self.mode)
	if mode == 'compute_actor':
	return self.actor(inputs)
	elif mode == 'compute_critic':
	return self.critic(inputs)