Spaces:

zjowowen
/

gomoku

Sleeping

App Files Files Community

gomoku / DI-engine /dizoo /gfootball /model /bots /TamakEriFever /submission.py

zjowowen

init space

079c32c 8 months ago

raw

history blame

No virus

1.75 kB

	import os.path as osp
	import yaml

	import numpy as np
	import torch

	from .football_ikki import Environment
	from .handyrl_core.model import load_model

	model_path = osp.join(osp.dirname(__file__), 'models/1679.pth')

	with open(osp.join(osp.dirname(__file__), 'config.yaml')) as f:
	config = yaml.safe_load(f)

	env = Environment(config['env_args'])
	model = load_model(env.net()(env), model_path)
	model.eval()


	def output_think(env, obs, actions, p, v, r):
	pmask = np.ones_like(p)
	pmask[actions] = 0
	p = p - pmask * 1e32

	def softmax(x):
	x = np.exp(x - np.max(x, axis=-1))
	return x / x.sum(axis=-1)

	sticky_actions = obs['players_raw'][0]['sticky_actions']
	print(sticky_actions)

	print(actions)
	print((softmax(p) * 1000).astype(int))
	print(v)
	print(r)


	prev_action = 0
	reserved_action = None


	def agent(obs):
	global prev_action, reserved_action

	info = [{'observation': obs, 'action': [prev_action]}, None]
	env.play_info(info)
	# print('step %d' % len(env.states))

	x = env.observation(0)

	p, v, r, _ = model.inference(x, None)
	actions = env.legal_actions(0)

	# output_think(env, obs, actions, p, v, r)

	ap_list = sorted([(a, p[a]) for a in actions], key=lambda x: -x[1])

	# you need return a list contains your single action(a int type number from [1, 18])
	# be ware of your model output might be a float number, so make sure return a int type number.
	action = ap_list[0][0]

	if reserved_action is not None:
	prev_action = reserved_action
	reserved_action = None
	# print('###RESERVED###')
	else:
	# split action
	prev_action, reserved_action = env.special_to_actions(action)

	return [prev_action]