File size: 10,938 Bytes
079c32c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import copy
from collections import defaultdict
from typing import Tuple, Optional
from easydict import EasyDict
from tabulate import tabulate
import numpy as np

from ding.utils import LockContext, LockContextType
from .player import Player


class BattleRecordDict(dict):
    """
    Overview:
        A dict which is used to record battle game result.
        Initialized four fixed keys: `wins`, `draws`, `losses`, `games`; Each with value 0.
    Interfaces:
        __mul__
    """
    data_keys = ['wins', 'draws', 'losses', 'games']

    def __init__(self) -> None:
        """
        Overview:
            Initialize four fixed keys ['wins', 'draws', 'losses', 'games'] and set value to 0
        """
        super(BattleRecordDict, self).__init__()
        for k in self.data_keys:
            self[k] = 0

    def __mul__(self, decay: float) -> dict:
        """
        Overview:
            Multiply each key's value with the input multiplier ``decay``
        Arguments:
            - decay (:obj:`float`): The multiplier.
        Returns:
            - obj (:obj:`dict`): A deepcopied RecordDict after multiplication decay.
        """
        obj = copy.deepcopy(self)
        for k in obj.keys():
            obj[k] *= decay
        return obj


class BattleSharedPayoff:
    """
    Overview:
        Payoff data structure to record historical match result, this payoff is shared among all the players.
        Use LockContext to ensure thread safe, since all players from all threads can access and modify it.
    Interface:
        __getitem__, add_player, update, get_key
    Property:
        players
    """

    # TODO(nyz) whether ensures the thread-safe

    def __init__(self, cfg: EasyDict):
        """
        Overview:
            Initialize battle payoff
        Arguments:
            - cfg (:obj:`dict`): config(contains {decay, min_win_rate_games})
        """
        # ``_players``` is a list containing the references(shallow copy) of all players,
        # while ``_players_ids``` is a list of strings.
        self._players = []
        self._players_ids = []
        # ``_data``` is a defaultdict. If a key doesn't exist when query, return an instance of BattleRecordDict class.
        # Key is '[player_id]-[player_id]' string, value is the payoff of the two players.
        self._data = defaultdict(BattleRecordDict)
        # ``_decay``` controls how past game info (win, draw, loss) decays.
        self._decay = cfg.decay
        # ``_min_win_rate_games``` is used in ``self._win_rate`` method for calculating win rate between two players.
        self._min_win_rate_games = cfg.get('min_win_rate_games', 8)
        # Thread lock.
        self._lock = LockContext(type_=LockContextType.THREAD_LOCK)

    def __repr__(self) -> str:
        headers = ["Home Player", "Away Player", "Wins", "Draws", "Losses", "Naive Win Rate"]
        data = []
        for k, v in self._data.items():
            k1 = k.split('-')
            # k is the format of '{}-{}'.format(name1, name2), and each HistoricalPlayer has `historical` suffix
            if 'historical' in k1[0]:
                # reverse representation
                naive_win_rate = (v['losses'] + v['draws'] / 2) / (v['wins'] + v['losses'] + v['draws'] + 1e-8)
                data.append([k1[1], k1[0], v['losses'], v['draws'], v['wins'], naive_win_rate])
            else:
                naive_win_rate = (v['wins'] + v['draws'] / 2) / (v['wins'] + v['losses'] + v['draws'] + 1e-8)
                data.append([k1[0], k1[1], v['wins'], v['draws'], v['losses'], naive_win_rate])
        data = sorted(data, key=lambda x: x[0])
        s = tabulate(data, headers=headers, tablefmt='pipe')
        return s

    def __getitem__(self, players: tuple) -> np.ndarray:
        """
        Overview:
            Get win rates between home players and away players one by one
        Arguments:
            - players (:obj:`tuple`): A tuple of (home, away), each one is a player or a player list.
        Returns:
            - win_rates (:obj:`np.ndarray`): Win rate (squeezed, see Shape for more details) \
                between each player from home and each player from away.
        Shape:
            - win_rates: Assume there are m home players and n away players.(m,n > 0)

                - m != 1 and n != 1: shape is (m, n)
                - m == 1: shape is (n)
                - n == 1: shape is (m)
        """
        with self._lock:
            home, away = players
            assert isinstance(home, list) or isinstance(home, Player)
            assert isinstance(away, list) or isinstance(away, Player)
            if isinstance(home, Player):
                home = [home]
            if isinstance(away, Player):
                away = [away]
            win_rates = np.array([[self._win_rate(h.player_id, a.player_id) for a in away] for h in home])
            if len(home) == 1 or len(away) == 1:
                win_rates = win_rates.reshape(-1)
            return win_rates

    def _win_rate(self, home: str, away: str) -> float:
        """
        Overview:
            Calculate win rate of one `home player` vs one `away player`
        Arguments:
            - home (:obj:`str`): home player id to access win rate
            - away (:obj:`str`): away player id to access win rate
        Returns:
            - win rate (:obj:`float`): float win rate value. \
                Only when total games is no less than ``self._min_win_rate_games``, \
                can the win rate be calculated by (wins + draws/2) / games, or return 0.5 by default.
        """
        key, reverse = self.get_key(home, away)
        handle = self._data[key]
        # No enough game records.
        if handle['games'] < self._min_win_rate_games:
            return 0.5
        # should use reverse here
        wins = handle['wins'] if not reverse else handle['losses']
        return (wins + 0.5 * handle['draws']) / (handle['games'])

    @property
    def players(self):
        """
        Overview:
            Get all the players
        Returns:
            - players (:obj:`list`): players list
        """
        with self._lock:
            return self._players

    def add_player(self, player: Player) -> None:
        """
        Overview:
            Add a player to the shared payoff.
        Arguments:
            - player (:obj:`Player`): The player to be added. Usually is a new one to the league as well.
        """
        with self._lock:
            self._players.append(player)
            self._players_ids.append(player.player_id)

    def update(self, job_info: dict) -> bool:
        """
        Overview:
            Update payoff with job_info when a job is to be finished.
            If update succeeds, return True; If raises an exception when updating, resolve it and return False.
        Arguments:
            - job_info (:obj:`dict`): A dict containing job result information.
        Returns:
            - result (:obj:`bool`): Whether update is successful.

        .. note::
            job_info has at least 5 keys ['launch_player', 'player_id', 'env_num', 'episode_num', 'result'].
            Key ``player_id`` 's value is a tuple of (home_id, away_id).
            Key ``result`` 's value is a two-layer list with the length of (episode_num, env_num).
        """

        def _win_loss_reverse(result_: str, reverse_: bool) -> str:
            if result_ == 'draws' or not reverse_:
                return result_
            reverse_dict = {'wins': 'losses', 'losses': 'wins'}
            return reverse_dict[result_]

        with self._lock:
            home_id, away_id = job_info['player_id']
            job_info_result = job_info['result']
            # for compatibility of one-layer list
            if not isinstance(job_info_result[0], list):
                job_info_result = [job_info_result]
            try:
                assert home_id in self._players_ids, "home_id error"
                assert away_id in self._players_ids, "away_id error"
                # Assert all results are in ['wins', 'losses', 'draws']
                assert all([i in BattleRecordDict.data_keys[:3] for j in job_info_result for i in j]), "results error"
            except Exception as e:
                print("[ERROR] invalid job_info: {}\n\tError reason is: {}".format(job_info, e))
                return False
            if home_id == away_id:  # self-play
                key, reverse = self.get_key(home_id, away_id)
                self._data[key]['draws'] += 1  # self-play defaults to draws
                self._data[key]['games'] += 1
            else:
                key, reverse = self.get_key(home_id, away_id)
                # Update with decay
                # job_info_result is a two-layer list, including total NxM episodes of M envs,
                # the first(outer) layer is episode dimension and the second(inner) layer is env dimension.
                for one_episode_result in job_info_result:
                    for one_episode_result_per_env in one_episode_result:
                        # All categories should decay
                        self._data[key] *= self._decay
                        self._data[key]['games'] += 1
                        result = _win_loss_reverse(one_episode_result_per_env, reverse)
                        self._data[key][result] += 1
            return True

    def get_key(self, home: str, away: str) -> Tuple[str, bool]:
        """
        Overview:
            Join home player id and away player id in alphabetival order.
        Arguments:
            - home (:obj:`str`): Home player id
            - away (:obj:`str`): Away player id
        Returns:
            - key (:obj:`str`): Tow ids sorted in alphabetical order, and joined by '-'.
            - reverse (:obj:`bool`): Whether the two player ids are reordered.
        """
        assert isinstance(home, str)
        assert isinstance(away, str)
        reverse = False
        if home <= away:
            tmp = [home, away]
        else:
            tmp = [away, home]
            reverse = True
        return '-'.join(tmp), reverse


def create_payoff(cfg: EasyDict) -> Optional[BattleSharedPayoff]:
    """
    Overview:
        Given the key (payoff type), now supports keys ['solo', 'battle'],
        create a new payoff instance if in payoff_mapping's values, or raise an KeyError.
    Arguments:
        - cfg (:obj:`EasyDict`): payoff config containing at least one key 'type'
    Returns:
        - payoff (:obj:`BattleSharedPayoff` or :obj:`SoloSharedPayoff`): the created new payoff, \
            should be an instance of one of payoff_mapping's values
    """
    payoff_mapping = {'battle': BattleSharedPayoff}
    payoff_type = cfg.type
    if payoff_type not in payoff_mapping.keys():
        raise KeyError("not support payoff type: {}".format(payoff_type))
    else:
        return payoff_mapping[payoff_type](cfg)