diff --git a/pluribus/games/short_deck/state.py b/pluribus/games/short_deck/state.py index 04834d74..4a763d5c 100644 --- a/pluribus/games/short_deck/state.py +++ b/pluribus/games/short_deck/state.py @@ -123,9 +123,6 @@ def __init__( for player in self.players: player.is_turn = False self.current_player.is_turn = True - # TODO add attribute of public_cards, that can be supplied by - # convenience method - self._public_cards = public_cards if public_cards: assert len(public_cards) in {3, 4, 5} self._public_cards = public_cards @@ -133,10 +130,6 @@ def __init__( if self.real_time_test: # must have offline strategy loaded up self._starting_hand_probs = self._initialize_starting_hands() - # TODO: We might not need this - cards_in_deck = self._table.dealer.deck._cards_in_deck - self._evals = [c.eval_card for c in cards_in_deck] - self._evals_to_cards = {i.eval_card: i for i in cards_in_deck} def __repr__(self): """Return a helpful description of object in strings and debugger.""" @@ -329,34 +322,30 @@ def _increment_stage(self): def _normalize_bayes(self): """Normalize probability of reach for each player""" n_players = len(self.players) - for player in range(n_players): - total_prob = sum(self._starting_hand_probs[player].values()) - for starting_hand, prob in self._starting_hand_probs[player].items(): - self._starting_hand_probs[player][starting_hand] = prob / total_prob + for p_i in range(n_players): + total_prob = sum(self._starting_hand_probs[p_i].values()) + for starting_hand, prob in self._starting_hand_probs[p_i].items(): + self._starting_hand_probs[p_i][starting_hand] = prob / total_prob + # TODO: figure out typing for dicts.. def _update_hole_cards_bayes(self, offline_strategy: Dict): - """Get probability of reach for each pair of hole cards for each player""" + """Get probability of reach for each starting hand for each player""" n_players = len(self._table.players) player_indices: List[int] = [p_i for p_i in range(n_players)] for p_i in player_indices: + # TODO: might make since to put starting hands in the deck class for starting_hand in self._starting_hand_probs[p_i].keys(): + + starting_hand = list( + starting_hand + ) # TODO: is this bad? if "p_reach" in locals(): del p_reach action_sequence: Dict[str, List[str]] = collections.defaultdict(list) - previous_betting_stage = "pre_flop" - first_action_round = False for idx, betting_stage in enumerate(self._history.keys()): - # import ipdb; - # ipdb.set_trace() n_actions_round = len(self._history[betting_stage]) for i in range(n_actions_round): - # if i == 0: - # betting_stage = previous_betting_stage - # elif i == n_actions_round - 1: - # previous_betting_stage = betting_stage - # else: - # betting_stage = betting_round action = self._history[betting_stage][i] while action == 'skip': i += 1 # action sequences don't end in skip @@ -368,114 +357,113 @@ def _update_hole_cards_bayes(self, offline_strategy: Dict): ph = i % n_players if p_i != ph: prob_reach_all_hands = [] - num_hands = 0 for opp_starting_hand in self._starting_hand_probs[ p_i ].keys(): - # TODO: clean this up - public_evals = [ - c.eval_card - for c in self._public_information[betting_stage] - ] - if len(set(opp_starting_hand).union(set(public_evals)).union(set(starting_hand))) < \ - len(opp_starting_hand) + len(starting_hand) + len(public_evals): + opp_starting_hand = list( + opp_starting_hand + ) + publics = self._public_information[betting_stage] + if len( + set(opp_starting_hand).union( + set(publics) + ).union(set(starting_hand)) + ) < len( + opp_starting_hand + ) + len( + starting_hand + ) + len( + publics + ): prob = 0 - num_hands += 1 else: - num_hands += 1 - - public_cards = self._public_information[ + publics = self._public_information[ betting_stage ] - public_cards_evals = [c.eval_card for c in public_cards] infoset = self._info_set_helper( opp_starting_hand, - public_cards_evals, + publics, action_sequence, betting_stage, ) - # check to see if the strategy exists, if not equal probability - # TODO: is this hacky? problem with defaulting to 1 / 3, is that it - # doesn't work for calculations that need to be made with the object's values + # check to see if the strategy exists, + # if not equal probability + # TODO: is this overly hacky? + # Problem with defaulting to 1 / 3, is that it + # it doesn't work for calculations that + # need to be made with the object's values try: # TODO: with or without keys prob = offline_strategy[infoset][action] except KeyError: prob = 1 / len(self.legal_actions) prob_reach_all_hands.append(prob) - # import ipdb; - # ipdb.set_trace() - prob2 = sum(prob_reach_all_hands) / num_hands + total_opp_prob_h = sum(prob_reach_all_hands) /\ + len(prob_reach_all_hands) if "p_reach" not in locals(): - p_reach = prob2 + p_reach = total_opp_prob_h else: - p_reach *= prob2 + p_reach *= total_opp_prob_h elif p_i == ph: - public_evals = [ - c.eval_card - for c in self._public_information[betting_stage] - ] - if len(set(starting_hand).union(set(public_evals))) < ( - len(public_evals) + 2 + publics = self._public_information[betting_stage] + if len( + set(starting_hand).union( + set(publics) + ) + ) < ( + len(publics) + 2 ): - prob = 0 + total_prob = 0 else: - public_cards = self._public_information[betting_stage] - public_cards_evals = [c.eval_card for c in public_cards] + publics = self._public_information[betting_stage] infoset = self._info_set_helper( starting_hand, - public_cards_evals, + publics, action_sequence, betting_stage, ) # TODO: Check this try: - prob = offline_strategy[infoset][action] + total_prob = offline_strategy[infoset][action] except KeyError: - prob = 1 / len(self.legal_actions) + total_prob = 1 / len(self.legal_actions) if "p_reach" not in locals(): - p_reach = prob + p_reach = total_prob else: - p_reach *= prob + p_reach *= total_prob action_sequence[betting_stage].append(action) self._starting_hand_probs[p_i][tuple(starting_hand)] = p_reach self._normalize_bayes() - # TODO: delete this? at least for our purposes we don't need it again def deal_bayes(self): - start = time.time() + # TODO: Not sure if I need this yet lut = self.info_set_lut self.info_set_lut = {} new_state = copy.deepcopy(self) new_state.info_set_lut = self.info_set_lut = lut - end = time.time() - print(f"Took {start - end} to load") - players = list(range(len(self.players))) random.shuffle(players) - - # TODO should contain the current public cards/heros real hand, if exists - card_evals_selected = [] - - for player in players: - # does this maintain order? - starting_hand_eval = new_state._get_starting_hand(player) - len_union = len(set(starting_hand_eval).union(set(card_evals_selected))) - len_individual = len(starting_hand_eval) + len(card_evals_selected) + cards_selected = [] + # TODO: this might be made better by selecting the first player's + # cards, then normalizing the second and third, etc.. + for p_i in players: + starting_hand = new_state._get_starting_hand(p_i) + len_union = len(set(starting_hand).union(set(cards_selected))) + len_individual = len(starting_hand) + len(cards_selected) while len_union < len_individual: - starting_hand_eval = new_state._get_starting_hand(player) - len_union = len(set(starting_hand_eval).union(set(card_evals_selected))) - len_individual = len(starting_hand_eval) + len(card_evals_selected) - for card_eval in starting_hand_eval: - card = new_state._evals_to_cards[card_eval] - new_state.players[player].add_private_card(card) - card_evals_selected += starting_hand_eval - cards_selected = [new_state._evals_to_cards[c] for c in card_evals_selected] + starting_hand = new_state._get_starting_hand(p_i) + len_union = len(set(starting_hand).union(set(cards_selected))) + len_individual = len(starting_hand) + len(cards_selected) + # TODO: pull this into a helper method, maybe it should + # be in the dealer class.. + for card in starting_hand: + new_state.players[p_i].add_private_card(card) + cards_selected += starting_hand cards_selected += new_state._public_cards for card in cards_selected: new_state._table.dealer.deck.remove(card) return new_state - # TODO add convenience method to supply public cards + # TODO add convenience method to supply public cards def load_game_state(self, offline_strategy: Dict, action_sequence: list): """ @@ -498,23 +486,31 @@ def load_game_state(self, offline_strategy: Dict, action_sequence: list): def _get_starting_hand(self, player_idx: int): """Get starting hand based on probability of reach""" - starting_hand_idxs = list(range(len(self._starting_hand_probs[player_idx].keys()))) - starting_hands_probs = list(self._starting_hand_probs[player_idx].values()) - starting_hand_idx = np.random.choice(starting_hand_idxs, 1, p=starting_hands_probs)[0] - starting_hand = list(self._starting_hand_probs[player_idx].keys())[starting_hand_idx] + starting_hands = list(self._starting_hand_probs[player_idx].keys()) + # hacky for using tuples as keys + starting_hands_idxs = list(range(len(starting_hands))) + starting_hands_probs = list(self._starting_hand_probs[ + player_idx + ].values()) + starting_hand_idx = np.random.choice( + starting_hands_idxs, + 1, + p=starting_hands_probs + )[0] + starting_hand = list(starting_hands[starting_hand_idx]) return starting_hand def _initialize_starting_hands(self): """Dictionary of starting hands to store probabilities in""" assert self.betting_stage == "pre_flop" - # TODO: make this abstracted for n_players - starting_hand_probs = {0: {}, 1: {}, 2: {}} + starting_hand_probs = {} n_players = len(self.players) starting_hands = self._get_card_combos(2) for p_i in range(n_players): + starting_hand_probs[p_i] = {} for starting_hand in starting_hands: starting_hand_probs[p_i][ - tuple([c.eval_card for c in starting_hand]) + starting_hand ] = 1 return starting_hand_probs @@ -523,15 +519,23 @@ def _info_set_helper( ): # didn't want to combine this with the other, as we may want to modularize soon """Get the information set for the current player.""" - cards = sorted(hole_cards, reverse=True,) - cards += sorted(public_cards, reverse=True,) - eval_cards = tuple(cards) + cards = sorted( + hole_cards, + key=operator.attrgetter("eval_card"), + reverse=True, + ) + cards += sorted( + public_cards, + key=operator.attrgetter("eval_card"), + reverse=True, + ) + eval_cards = tuple([int(c) for c in cards]) try: cards_cluster = self.info_set_lut[betting_stage][eval_cards] except KeyError: if not self.info_set_lut: raise ValueError("Pickle luts must be loaded for info set.") - elif eval_cards not in self.info_set_lut[self._betting_stage]: + elif eval_cards not in self.info_set_lut[betting_stage]: raise ValueError("Cards {cards} not in pickle files.") else: raise ValueError("Unrecognised betting stage in pickle files.") @@ -548,7 +552,7 @@ def _info_set_helper( def _get_card_combos(self, num_cards): """Get combinations of cards""" - return list(combinations(self._poker_engine.table.dealer.deck._cards_in_deck, num_cards)) + return list(combinations(self.cards_in_deck, num_cards)) @property def community_cards(self) -> List[Card]: @@ -560,6 +564,11 @@ def private_hands(self) -> Dict[ShortDeckPokerPlayer, List[Card]]: """Return all private hands.""" return {p: p.cards for p in self.players} + @property + def cards_in_deck(self): + """Returns current cards in deck""" + return self._table.dealer.deck._cards_in_deck + @property def initial_regret(self) -> Dict[str, float]: """Returns the default regret for this state.""" @@ -590,11 +599,6 @@ def n_players_started_round(self) -> bool: """Return n_players that started the round.""" return self._n_players_started_round - # @property - # def first_move_of_current_round(self) -> bool: - # """Return boolfor first move of current round.""" - # return self._first_move_of_current_round - @property def player_i(self) -> int: """Get the index of the players turn it is.""" @@ -603,11 +607,11 @@ def player_i(self) -> int: @player_i.setter def player_i(self, _: Any): """Raise an error if player_i is set.""" - raise ValueError(f"The player_i property should not be set.") + raise ValueError("The player_i property should not be set.") @property def betting_round(self) -> int: - """Algorithm 1 of pluribus supp. material references betting_round.""" + """Return 0 indexed betting round""" try: betting_round = self._betting_stage_to_round[self._betting_stage] except KeyError: @@ -631,7 +635,7 @@ def info_set(self) -> str: key=operator.attrgetter("eval_card"), reverse=True, ) - eval_cards = tuple([card.eval_card for card in cards]) + eval_cards = tuple([int(card) for card in cards]) try: cards_cluster = self.info_set_lut[self._betting_stage][eval_cards] except KeyError: diff --git a/pluribus/poker/card.py b/pluribus/poker/card.py index 5fe30a61..a3fc6db7 100644 --- a/pluribus/poker/card.py +++ b/pluribus/poker/card.py @@ -74,6 +74,9 @@ def __eq__(self, other): def __ne__(self, other): return int(self) != int(other) + def __hash__(self): + return hash(int(self)) + @property def eval_card(self) -> EvaluationCard: """Return an `EvaluationCard` for use in the `Evaluator`.""" @@ -178,4 +181,3 @@ def from_dict(x: Dict[str, Union[int, str]]): if set(x) != {"rank", "suit"}: raise NotImplementedError(f"Unrecognised dict {x}") return Card(rank=x["rank"], suit=x["suit"]) - diff --git a/pluribus/poker/deck.py b/pluribus/poker/deck.py index a463058b..c6801105 100644 --- a/pluribus/poker/deck.py +++ b/pluribus/poker/deck.py @@ -64,10 +64,6 @@ def pick(self, random: bool = True) -> Card: def remove(self, card): """Remove a specific card from the deck""" - # TODO: is there any reason for them to be? - # Maybe better to assert it's not - c = card.eval_card - cards_in_deck = [x.eval_card for x in self._cards_in_deck] - if c in cards_in_deck: - index = cards_in_deck.index(c) - self._dealt_cards.append(self._cards_in_deck.pop(index)) + if card in self._cards_in_deck: + self._cards_in_deck.remove(card) + self._dealt_cards.append(card) diff --git a/research/test_methodology/RT.py b/research/test_methodology/RT.py index caf557e4..b89b5815 100644 --- a/research/test_methodology/RT.py +++ b/research/test_methodology/RT.py @@ -21,8 +21,7 @@ agent_output = train( agent1.offline_strategy, public_cards, action_sequence, 40, 6, 6, 3, 2, 6 ) # TODO: back to 50 - with open("realtime-strategy-refactor-game-state.pkl", "wb") as file: + with open("realtime-strategy-refactor-deck.pkl", "wb") as file: pickle.dump(agent_output, file) import ipdb - ipdb.set_trace()