File size: 1,749 Bytes
079c32c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import numpy as np


def pfsp(win_rates: np.ndarray, weighting: str) -> np.ndarray:
    """
    Overview:
        Prioritized Fictitious Self-Play algorithm.
        Process win_rates with a weighting function to get priority, then calculate the selection probability of each.
    Arguments:
        - win_rates (:obj:`np.ndarray`): a numpy ndarray of win rates between one player and N opponents, shape(N)
        - weighting (:obj:`str`): pfsp weighting function type, refer to ``weighting_func`` below
    Returns:
        - probs (:obj:`np.ndarray`): a numpy ndarray of probability at which one element is selected, shape(N)
    """
    weighting_func = {
        'squared': lambda x: (1 - x) ** 2,
        'variance': lambda x: x * (1 - x),
    }
    if weighting in weighting_func.keys():
        fn = weighting_func[weighting]
    else:
        raise KeyError("invalid weighting arg: {} in pfsp".format(weighting))

    assert isinstance(win_rates, np.ndarray)
    assert win_rates.shape[0] >= 1, win_rates.shape
    # all zero win rates case, return uniform selection prob
    if win_rates.sum() < 1e-8:
        return np.full_like(win_rates, 1.0 / len(win_rates))
    fn_win_rates = fn(win_rates)
    probs = fn_win_rates / fn_win_rates.sum()
    return probs


def uniform(win_rates: np.ndarray) -> np.ndarray:
    """
    Overview:
        Uniform opponent selection algorithm. Select an opponent uniformly, regardless of historical win rates.
    Arguments:
        - win_rates (:obj:`np.ndarray`): a numpy ndarray of win rates between one player and N opponents, shape(N)
    Returns:
        - probs (:obj:`np.ndarray`): a numpy ndarray of uniform probability, shape(N)
    """
    return np.full_like(win_rates, 1.0 / len(win_rates))