File size: 5,355 Bytes
079c32c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from easydict import EasyDict
from typing import Optional

from ding.utils import LEAGUE_REGISTRY
from .base_league import BaseLeague
from .player import ActivePlayer


@LEAGUE_REGISTRY.register('one_vs_one')
class OneVsOneLeague(BaseLeague):
    """
    Overview:
        One vs One battle game league.
        Decide which two players will play against each other.
    Interface:
        __init__, run, close, finish_job, update_active_player
    """
    config = dict(
        league_type='one_vs_one',
        import_names=["ding.league"],
        # ---player----
        # "player_category" is just a name. Depends on the env.
        # For example, in StarCraft, this can be ['zerg', 'terran', 'protoss'].
        player_category=['default'],
        # Support different types of active players for solo and battle league.
        # For solo league, supports ['solo_active_player'].
        # For battle league, supports ['battle_active_player', 'main_player', 'main_exploiter', 'league_exploiter'].
        active_players=dict(
            naive_sp_player=1,  # {player_type: player_num}
        ),
        naive_sp_player=dict(
            # There should be keys ['one_phase_step', 'branch_probs', 'strong_win_rate'].
            # Specifically for 'main_exploiter' of StarCraft, there should be an additional key ['min_valid_win_rate'].
            one_phase_step=10,
            branch_probs=dict(
                pfsp=0.5,
                sp=0.5,
            ),
            strong_win_rate=0.7,
        ),
        # "use_pretrain" means whether to use pretrain model to initialize active player.
        use_pretrain=False,
        # "use_pretrain_init_historical" means whether to use pretrain model to initialize historical player.
        # "pretrain_checkpoint_path" is the pretrain checkpoint path used in "use_pretrain" and
        # "use_pretrain_init_historical". If both are False, "pretrain_checkpoint_path" can be omitted as well.
        # Otherwise, "pretrain_checkpoint_path" should list paths of all player categories.
        use_pretrain_init_historical=False,
        pretrain_checkpoint_path=dict(default='default_cate_pretrain.pth', ),
        # ---payoff---
        payoff=dict(
            # Supports ['battle']
            type='battle',
            decay=0.99,
            min_win_rate_games=8,
        ),
        metric=dict(
            mu=0,
            sigma=25 / 3,
            beta=25 / 3 / 2,
            tau=0.0,
            draw_probability=0.02,
        ),
    )

    # override
    def _get_job_info(self, player: ActivePlayer, eval_flag: bool = False) -> dict:
        """
        Overview:
            Get player's job related info, called by ``_launch_job``.
        Arguments:
            - player (:obj:`ActivePlayer`): The active player that will be assigned a job.
        """
        assert isinstance(player, ActivePlayer), player.__class__
        player_job_info = EasyDict(player.get_job(eval_flag))
        if eval_flag:
            return {
                'agent_num': 1,
                'launch_player': player.player_id,
                'player_id': [player.player_id],
                'checkpoint_path': [player.checkpoint_path],
                'player_active_flag': [isinstance(player, ActivePlayer)],
                'eval_opponent': player_job_info.opponent,
            }
        else:
            return {
                'agent_num': 2,
                'launch_player': player.player_id,
                'player_id': [player.player_id, player_job_info.opponent.player_id],
                'checkpoint_path': [player.checkpoint_path, player_job_info.opponent.checkpoint_path],
                'player_active_flag': [isinstance(p, ActivePlayer) for p in [player, player_job_info.opponent]],
            }

    # override
    def _mutate_player(self, player: ActivePlayer):
        """
        Overview:
            Players have the probability to be reset to supervised learning model parameters.
        Arguments:
            - player (:obj:`ActivePlayer`): The active player that may mutate.
        """
        pass

    # override
    def _update_player(self, player: ActivePlayer, player_info: dict) -> Optional[bool]:
        """
        Overview:
            Update an active player, called by ``self.update_active_player``.
        Arguments:
            - player (:obj:`ActivePlayer`): The active player that will be updated.
            - player_info (:obj:`dict`): An info dict of the active player which is to be updated.
        Returns:
            - increment_eval_difficulty (:obj:`bool`): Only return this when evaluator calls this method. \
                Return True if difficulty is incremented; Otherwise return False (difficulty will not increment \
                when it is already the most difficult or evaluator loses)
        """
        assert isinstance(player, ActivePlayer)
        if 'train_iteration' in player_info:
            # Update info from learner
            player.total_agent_step = player_info['train_iteration']
            return False
        elif 'eval_win' in player_info:
            if player_info['eval_win']:
                # Update info from evaluator
                increment_eval_difficulty = player.increment_eval_difficulty()
                return increment_eval_difficulty
            else:
                return False