import numpy as np def pfsp(win_rates: np.ndarray, weighting: str) -> np.ndarray: """ Overview: Prioritized Fictitious Self-Play algorithm. Process win_rates with a weighting function to get priority, then calculate the selection probability of each. Arguments: - win_rates (:obj:`np.ndarray`): a numpy ndarray of win rates between one player and N opponents, shape(N) - weighting (:obj:`str`): pfsp weighting function type, refer to ``weighting_func`` below Returns: - probs (:obj:`np.ndarray`): a numpy ndarray of probability at which one element is selected, shape(N) """ weighting_func = { 'squared': lambda x: (1 - x) ** 2, 'variance': lambda x: x * (1 - x), } if weighting in weighting_func.keys(): fn = weighting_func[weighting] else: raise KeyError("invalid weighting arg: {} in pfsp".format(weighting)) assert isinstance(win_rates, np.ndarray) assert win_rates.shape[0] >= 1, win_rates.shape # all zero win rates case, return uniform selection prob if win_rates.sum() < 1e-8: return np.full_like(win_rates, 1.0 / len(win_rates)) fn_win_rates = fn(win_rates) probs = fn_win_rates / fn_win_rates.sum() return probs def uniform(win_rates: np.ndarray) -> np.ndarray: """ Overview: Uniform opponent selection algorithm. Select an opponent uniformly, regardless of historical win rates. Arguments: - win_rates (:obj:`np.ndarray`): a numpy ndarray of win rates between one player and N opponents, shape(N) Returns: - probs (:obj:`np.ndarray`): a numpy ndarray of uniform probability, shape(N) """ return np.full_like(win_rates, 1.0 / len(win_rates))