File size: 5,355 Bytes
079c32c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from easydict import EasyDict
from typing import Optional
from ding.utils import LEAGUE_REGISTRY
from .base_league import BaseLeague
from .player import ActivePlayer
@LEAGUE_REGISTRY.register('one_vs_one')
class OneVsOneLeague(BaseLeague):
"""
Overview:
One vs One battle game league.
Decide which two players will play against each other.
Interface:
__init__, run, close, finish_job, update_active_player
"""
config = dict(
league_type='one_vs_one',
import_names=["ding.league"],
# ---player----
# "player_category" is just a name. Depends on the env.
# For example, in StarCraft, this can be ['zerg', 'terran', 'protoss'].
player_category=['default'],
# Support different types of active players for solo and battle league.
# For solo league, supports ['solo_active_player'].
# For battle league, supports ['battle_active_player', 'main_player', 'main_exploiter', 'league_exploiter'].
active_players=dict(
naive_sp_player=1, # {player_type: player_num}
),
naive_sp_player=dict(
# There should be keys ['one_phase_step', 'branch_probs', 'strong_win_rate'].
# Specifically for 'main_exploiter' of StarCraft, there should be an additional key ['min_valid_win_rate'].
one_phase_step=10,
branch_probs=dict(
pfsp=0.5,
sp=0.5,
),
strong_win_rate=0.7,
),
# "use_pretrain" means whether to use pretrain model to initialize active player.
use_pretrain=False,
# "use_pretrain_init_historical" means whether to use pretrain model to initialize historical player.
# "pretrain_checkpoint_path" is the pretrain checkpoint path used in "use_pretrain" and
# "use_pretrain_init_historical". If both are False, "pretrain_checkpoint_path" can be omitted as well.
# Otherwise, "pretrain_checkpoint_path" should list paths of all player categories.
use_pretrain_init_historical=False,
pretrain_checkpoint_path=dict(default='default_cate_pretrain.pth', ),
# ---payoff---
payoff=dict(
# Supports ['battle']
type='battle',
decay=0.99,
min_win_rate_games=8,
),
metric=dict(
mu=0,
sigma=25 / 3,
beta=25 / 3 / 2,
tau=0.0,
draw_probability=0.02,
),
)
# override
def _get_job_info(self, player: ActivePlayer, eval_flag: bool = False) -> dict:
"""
Overview:
Get player's job related info, called by ``_launch_job``.
Arguments:
- player (:obj:`ActivePlayer`): The active player that will be assigned a job.
"""
assert isinstance(player, ActivePlayer), player.__class__
player_job_info = EasyDict(player.get_job(eval_flag))
if eval_flag:
return {
'agent_num': 1,
'launch_player': player.player_id,
'player_id': [player.player_id],
'checkpoint_path': [player.checkpoint_path],
'player_active_flag': [isinstance(player, ActivePlayer)],
'eval_opponent': player_job_info.opponent,
}
else:
return {
'agent_num': 2,
'launch_player': player.player_id,
'player_id': [player.player_id, player_job_info.opponent.player_id],
'checkpoint_path': [player.checkpoint_path, player_job_info.opponent.checkpoint_path],
'player_active_flag': [isinstance(p, ActivePlayer) for p in [player, player_job_info.opponent]],
}
# override
def _mutate_player(self, player: ActivePlayer):
"""
Overview:
Players have the probability to be reset to supervised learning model parameters.
Arguments:
- player (:obj:`ActivePlayer`): The active player that may mutate.
"""
pass
# override
def _update_player(self, player: ActivePlayer, player_info: dict) -> Optional[bool]:
"""
Overview:
Update an active player, called by ``self.update_active_player``.
Arguments:
- player (:obj:`ActivePlayer`): The active player that will be updated.
- player_info (:obj:`dict`): An info dict of the active player which is to be updated.
Returns:
- increment_eval_difficulty (:obj:`bool`): Only return this when evaluator calls this method. \
Return True if difficulty is incremented; Otherwise return False (difficulty will not increment \
when it is already the most difficult or evaluator loses)
"""
assert isinstance(player, ActivePlayer)
if 'train_iteration' in player_info:
# Update info from learner
player.total_agent_step = player_info['train_iteration']
return False
elif 'eval_win' in player_info:
if player_info['eval_win']:
# Update info from evaluator
increment_eval_difficulty = player.increment_eval_difficulty()
return increment_eval_difficulty
else:
return False
|