diff --git a/axelrod/data/all_classifiers.yml b/axelrod/data/all_classifiers.yml index fb0b6a51c..8c4225101 100644 --- a/axelrod/data/all_classifiers.yml +++ b/axelrod/data/all_classifiers.yml @@ -1,4 +1,4 @@ -$\\phi$: +$\phi$: inspects_source: false long_run_time: false makes_use_of: !!set {} @@ -6,7 +6,7 @@ $\\phi$: manipulates_state: false memory_depth: .inf stochastic: false -$\\pi$: +$\pi$: inspects_source: false long_run_time: false makes_use_of: !!set {} @@ -170,6 +170,14 @@ BackStabber: manipulates_state: false memory_depth: .inf stochastic: false +Bayesian Forgiver: + inspects_source: false + long_run_time: false + makes_use_of: !!set {} + manipulates_source: false + manipulates_state: false + memory_depth: .inf + stochastic: false Better and Better: inspects_source: false long_run_time: false @@ -186,6 +194,14 @@ Bully: manipulates_state: false memory_depth: 1 stochastic: false +Burn Both Ends: + inspects_source: false + long_run_time: false + makes_use_of: !!set {} + manipulates_source: false + manipulates_state: false + memory_depth: 1 + stochastic: true Bush Mosteller: inspects_source: false long_run_time: false @@ -195,6 +211,14 @@ Bush Mosteller: manipulates_state: false memory_depth: .inf stochastic: true +CAPRI: + inspects_source: false + long_run_time: false + makes_use_of: !!set {} + manipulates_source: false + manipulates_state: false + memory_depth: 3 + stochastic: false Calculator: inspects_source: false long_run_time: false @@ -439,15 +463,15 @@ Evolved ANN 5 Noise 05: manipulates_state: false memory_depth: .inf stochastic: false -EvolvedAttention: +Evolved FSM 16: inspects_source: false - long_run_time: True + long_run_time: false makes_use_of: !!set {} manipulates_source: false manipulates_state: false - memory_depth: 200 + memory_depth: .inf stochastic: false -Evolved FSM 16: +Evolved FSM 16 Noise 05: inspects_source: false long_run_time: false makes_use_of: !!set {} @@ -455,7 +479,7 @@ Evolved FSM 16: manipulates_state: false memory_depth: .inf stochastic: false -Evolved FSM 16 Noise 05: +Evolved FSM 4: inspects_source: false long_run_time: false makes_use_of: !!set {} @@ -463,7 +487,7 @@ Evolved FSM 16 Noise 05: manipulates_state: false memory_depth: .inf stochastic: false -Evolved FSM 4: +Evolved FSM 6: inspects_source: false long_run_time: false makes_use_of: !!set {} @@ -479,6 +503,14 @@ Evolved HMM 5: manipulates_state: false memory_depth: 5 stochastic: true +EvolvedAttention: + inspects_source: false + long_run_time: true + makes_use_of: !!set {} + manipulates_source: false + manipulates_state: false + memory_depth: 200 + stochastic: false EvolvedLookerUp1_1_1: inspects_source: false long_run_time: false @@ -659,39 +691,23 @@ Fortress4: manipulates_state: false memory_depth: 3 stochastic: false -GTFT: +FrequencyAnalyzer: inspects_source: false long_run_time: false - makes_use_of: !!set - game: null - manipulates_source: false - manipulates_state: false - memory_depth: 1 - stochastic: true -Geller: - inspects_source: true - long_run_time: false - makes_use_of: !!set {} - manipulates_source: false - manipulates_state: false - memory_depth: .inf - stochastic: true -Geller Cooperator: - inspects_source: true - long_run_time: false makes_use_of: !!set {} manipulates_source: false manipulates_state: false memory_depth: .inf stochastic: false -Geller Defector: - inspects_source: true +GTFT: + inspects_source: false long_run_time: false - makes_use_of: !!set {} + makes_use_of: !!set + game: null manipulates_source: false manipulates_state: false - memory_depth: .inf - stochastic: false + memory_depth: 1 + stochastic: true General Soft Grudger: inspects_source: false long_run_time: false @@ -1099,47 +1115,14 @@ Michaelos: manipulates_state: false memory_depth: .inf stochastic: true -Mind Bender: - inspects_source: false - long_run_time: false - makes_use_of: !!set {} - manipulates_source: true - manipulates_state: false - memory_depth: -10 - stochastic: false -Mind Controller: +Momentum: inspects_source: false long_run_time: false makes_use_of: !!set {} - manipulates_source: true - manipulates_state: false - memory_depth: -10 - stochastic: false -Mind Reader: - inspects_source: true - long_run_time: false - makes_use_of: !!set - game: null manipulates_source: false manipulates_state: false memory_depth: .inf stochastic: false -Mind Warper: - inspects_source: false - long_run_time: false - makes_use_of: !!set {} - manipulates_source: true - manipulates_state: false - memory_depth: -10 - stochastic: false -Mirror Mind Reader: - inspects_source: true - long_run_time: false - makes_use_of: !!set {} - manipulates_source: true - manipulates_state: false - memory_depth: .inf - stochastic: false N Tit(s) For M Tat(s): inspects_source: false long_run_time: false @@ -1344,15 +1327,6 @@ Prober 4: manipulates_state: false memory_depth: .inf stochastic: false -Protected Mind Reader: - inspects_source: true - long_run_time: false - makes_use_of: !!set - game: null - manipulates_source: true - manipulates_state: false - memory_depth: .inf - stochastic: false Pun1: inspects_source: false long_run_time: false @@ -1733,6 +1707,14 @@ Spiteful Tit For Tat: manipulates_state: false memory_depth: .inf stochastic: false +SpitefulCC: + inspects_source: false + long_run_time: false + makes_use_of: !!set {} + manipulates_source: false + manipulates_state: false + memory_depth: .inf + stochastic: false Stalker: inspects_source: false long_run_time: false diff --git a/axelrod/strategies/_strategies.py b/axelrod/strategies/_strategies.py index bc80eeccc..edb2e0857 100644 --- a/axelrod/strategies/_strategies.py +++ b/axelrod/strategies/_strategies.py @@ -73,6 +73,7 @@ SecondByYamachi, ) from .backstabber import BackStabber, DoubleCrosser +from .bayesian_forgiver import BayesianForgiver from .better_and_better import BetterAndBetter from .bush_mosteller import BushMosteller from .calculator import Calculator @@ -307,6 +308,7 @@ ArrogantQLearner, AverageCopier, BackStabber, + BayesianForgiver, BetterAndBetter, Bully, BurnBothEnds, diff --git a/axelrod/strategies/bayesian_forgiver.py b/axelrod/strategies/bayesian_forgiver.py new file mode 100644 index 000000000..20799759a --- /dev/null +++ b/axelrod/strategies/bayesian_forgiver.py @@ -0,0 +1,142 @@ +""" +Bayesian Forgiver - A strategy using Bayesian inference for adaptive forgiveness. + +This strategy maintains a Bayesian belief about the opponent's cooperation probability +using a Beta distribution, and makes forgiveness decisions based on both the estimated +cooperation rate and the uncertainty in that estimate. +""" + +from axelrod.action import Action +from axelrod.player import Player + +C, D = Action.C, Action.D + + +class BayesianForgiver(Player): + """ + A strategy that uses Bayesian inference to model opponent behavior and + adaptively adjust forgiveness based on uncertainty. + + The strategy maintains a Beta distribution representing beliefs about the + opponent's cooperation probability. It uses both the mean (expected cooperation + rate) and variance (uncertainty) to make decisions: + + - When uncertain about the opponent's nature, it is cautious + - When certain the opponent is hostile, it punishes consistently + - When certain the opponent is cooperative, it cooperates consistently + + Algorithm: + 1. Maintain Beta(alpha, beta) distribution for opponent's cooperation probability + 2. Start with Beta(1, 1) - neutral/uniform prior + 3. Update after each round: C → alpha += 1, D → beta += 1 + 4. Calculate mean = alpha / (alpha + beta) + 5. Calculate uncertainty (std deviation) + 6. Adaptive forgiveness: threshold = base_threshold + uncertainty_factor * uncertainty + 7. Forgive a defection only if the estimated cooperation rate clears this threshold + + Names: + - Bayesian Forgiver: Original name by Matt Hodges + """ + + name = "Bayesian Forgiver" + classifier = { + "memory_depth": float("inf"), + "stochastic": False, + "long_run_time": False, + "inspects_source": False, + "manipulates_source": False, + "manipulates_state": False, + } + + def __init__( + self, + prior_alpha: float = 1.0, + prior_beta: float = 1.0, + base_forgiveness_threshold: float = 0.45, + uncertainty_factor: float = 2.5, + ) -> None: + """ + Initialize the Bayesian Forgiver strategy. + + Parameters + ---------- + prior_alpha : float + Initial alpha parameter for Beta distribution (default: 1.0) + Represents prior belief in cooperation count + 1 + Higher values indicate stronger prior belief in cooperation + prior_beta : float + Initial beta parameter for Beta distribution (default: 1.0) + Represents prior belief in defection count + 1 + Higher values indicate stronger prior belief in defection + base_forgiveness_threshold : float + Base threshold for forgiveness decision (default: 0.45) + If estimated cooperation probability > threshold, forgive defections + uncertainty_factor : float + How much uncertainty increases the forgiveness threshold (default: 2.5) + + Note: Default parameters have been optimized through grid search + to maximize performance against common IPD strategies. + """ + super().__init__() + self.prior_alpha = prior_alpha + self.prior_beta = prior_beta + self.base_forgiveness_threshold = base_forgiveness_threshold + self.uncertainty_factor = uncertainty_factor + + # Initialize Bayesian belief parameters + self.alpha = prior_alpha + self.beta = prior_beta + + def reset(self): + """Reset the strategy to initial state.""" + super().reset() + self.alpha = self.prior_alpha + self.beta = self.prior_beta + + def strategy(self, opponent: Player) -> Action: + """ + Determine next action using Bayesian opponent model. + + Returns + ------- + Action + C (cooperate) or D (defect) + """ + # First move: Start with cooperation (optimistic prior) + if not self.history: + return C + + # Update Bayesian belief based on opponent's last action + if opponent.history[-1] == C: + self.alpha += 1.0 + else: + self.beta += 1.0 + + # Calculate statistics from Beta distribution + total = self.alpha + self.beta + mean_cooperation = self.alpha / total + + # Calculate variance and standard deviation (uncertainty) + # Var(Beta(α,β)) = αβ / ((α+β)²(α+β+1)) + variance = (self.alpha * self.beta) / (total * total * (total + 1)) + uncertainty = variance**0.5 + + # Adaptive forgiveness threshold + forgiveness_threshold = ( + self.base_forgiveness_threshold + + self.uncertainty_factor * uncertainty + ) + + # Decision logic + if opponent.history[-1] == C: + # Opponent cooperated last round - reciprocate cooperation + return C + else: + # Opponent defected last round - decide whether to forgive or punish + if mean_cooperation >= forgiveness_threshold: + # Forgive only when the estimated cooperation rate is high enough. + return C + else: + # Opponent appears to be hostile with sufficient confidence + # Punish the defection + return D diff --git a/axelrod/tests/strategies/test_bayesian_forgiver.py b/axelrod/tests/strategies/test_bayesian_forgiver.py new file mode 100644 index 000000000..4112c342d --- /dev/null +++ b/axelrod/tests/strategies/test_bayesian_forgiver.py @@ -0,0 +1,194 @@ +"""Tests for the Bayesian Forgiver strategy.""" + +import axelrod as axl +from axelrod.tests.property import strategy_lists + +from .test_player import TestPlayer + +C, D = axl.Action.C, axl.Action.D + + +class TestBayesianForgiver(TestPlayer): + """Test suite for BayesianForgiver strategy.""" + + name = "Bayesian Forgiver: 1.0, 1.0, 0.45, 2.5" + player = axl.BayesianForgiver + expected_classifier = { + "memory_depth": float("inf"), + "stochastic": False, + "makes_use_of": set(), + "inspects_source": False, + "manipulates_source": False, + "manipulates_state": False, + } + + def test_initial_strategy(self): + """Test that the strategy starts by cooperating.""" + actions = [(C, C)] + self.versus_test(axl.Cooperator(), expected_actions=actions) + + def test_vs_cooperator(self): + """Test behavior against always cooperate - should cooperate.""" + actions = [(C, C), (C, C), (C, C), (C, C), (C, C)] + self.versus_test(axl.Cooperator(), expected_actions=actions) + + def test_vs_defector(self): + """Test behavior against always defect - should eventually defect back.""" + # First move: cooperate + # After seeing D, Bayesian model updates: alpha=1, beta=2, mean=0.33 + # Even with high uncertainty, mean is below threshold, so defect + actions = [(C, D), (D, D), (D, D), (D, D), (D, D)] + self.versus_test(axl.Defector(), expected_actions=actions) + + def test_vs_alternator(self): + """Test behavior against alternating strategy.""" + # With parameters (1.0, 1.0, 0.45, 2.5): + # Early uncertainty is high, so threshold is high, but mean cooperation ~0.5 + # When opponent defects, we evaluate: mean < threshold → defect + # This creates an alternating pattern + actions = [(C, C), (C, D), (D, C), (C, D), (D, C), (C, D)] + self.versus_test(axl.Alternator(), expected_actions=actions) + + def test_vs_tit_for_tat(self): + """Test behavior against Tit For Tat - should achieve mutual cooperation.""" + actions = [(C, C), (C, C), (C, C), (C, C), (C, C)] + self.versus_test(axl.TitForTat(), expected_actions=actions) + + def test_vs_suspicious_tit_for_tat(self): + """Test against Suspicious Tit For Tat (starts with D).""" + # STF starts with D, then mirrors + # BF starts with C, sees D → defects back + # This creates an alternating pattern + actions = [(C, D), (D, C), (C, D), (D, C), (C, D)] + self.versus_test(axl.SuspiciousTitForTat(), expected_actions=actions) + + def test_vs_grudger(self): + """Test behavior against Grudger.""" + # Grudger cooperates until any defection, then defects forever + # BayesianForgiver should cooperate, so mutual cooperation + actions = [(C, C), (C, C), (C, C), (C, C), (C, C)] + self.versus_test(axl.Grudger(), expected_actions=actions) + + def test_vs_random(self): + """Test behavior against Random strategy with specific seed.""" + # Random behavior - BayesianForgiver should adapt + # With seed=3, Random plays: C, C, C, C, D + actions = [(C, C), (C, C), (C, C), (C, C), (C, D)] + self.versus_test(axl.Random(), expected_actions=actions, seed=3) + + def test_forgiveness_high_cooperation(self): + """Test that forgiveness occurs when opponent has high cooperation rate.""" + # Opponent cooperates many times, then defects - should forgive + # This tests the forgiveness path (mean_cooperation >= threshold) + opponent = axl.MockPlayer(actions=[C, C, C, C, C, C, C, C, D, C]) + actions = [ + (C, C), # alpha=2, beta=1 + (C, C), # alpha=3, beta=1 + (C, C), # alpha=4, beta=1 + (C, C), # alpha=5, beta=1 + (C, C), # alpha=6, beta=1 + (C, C), # alpha=7, beta=1 + (C, C), # alpha=8, beta=1 + (C, C), # alpha=9, beta=1 + (C, D), # Opponent defects: alpha=9, beta=2, mean=9/11=0.818 + (C, C), # mean >= threshold → FORGIVE! Return C + ] + self.versus_test(opponent, expected_actions=actions) + + def test_vs_mock_single_defection(self): + """Test response to a single defection.""" + # Opponent cooperates then defects once then cooperates + # With new parameters: after seeing one D among many Cs, still punishes + # but quickly returns to cooperation + opponent = axl.MockPlayer(actions=[C, C, C, D, C, C, C]) + actions = [ + (C, C), # Both cooperate + (C, C), # Both cooperate + (C, C), # Both cooperate + (C, D), # Opponent defects - alpha=4, beta=2 + (D, C), # Punish the defection + (C, C), # Resume cooperation + (C, C), # Continue cooperation + ] + self.versus_test(opponent, expected_actions=actions) + + def test_vs_mock_consistent_defector(self): + """Test punishment of consistent defector.""" + # Opponent defects consistently + opponent = axl.MockPlayer(actions=[D, D, D, D, D, D]) + actions = [ + (C, D), # Start optimistic, see D - alpha=1, beta=2 + (D, D), # Mean = 0.33, below threshold even with uncertainty + (D, D), # Continue defecting + (D, D), # Continue defecting + (D, D), # Continue defecting + (D, D), # Continue defecting + ] + self.versus_test(opponent, expected_actions=actions) + + def test_vs_mock_mixed_behavior(self): + """Test adaptation to mixed behavior.""" + # Opponent with mixed C and D: [C, C, D, C, D, C, C, D] + # With default parameters, responds more reactively to defections + opponent = axl.MockPlayer(actions=[C, C, D, C, D, C, C, D]) + actions = [ + (C, C), # Start cooperating + (C, C), # Continue cooperating + (C, D), # Opponent defects + (D, C), # Punish defection + (C, D), # Opponent cooperated, we cooperate, they defect + (D, C), # Punish again + (C, C), # Resume cooperation + (C, D), # Cooperate, opponent defects + ] + self.versus_test(opponent, expected_actions=actions) + + def test_parameter_changes(self): + """Test that different parameters affect behavior.""" + # Test with different prior - more pessimistic + player = self.player(prior_alpha=1.0, prior_beta=2.0) + opponent = axl.Defector() + match = axl.Match([player, opponent], turns=3) + result = match.play() + # With pessimistic prior and defector opponent, should defect quickly + self.assertEqual(result[0], (C, D)) # First move still C + self.assertEqual(result[1][0], D) # Should defect after seeing D + + def test_reset(self): + """Test that reset properly reinitializes the strategy.""" + player = self.player() + opponent = axl.Cooperator() + + # Play some rounds - opponent cooperates so alpha should increase + for _ in range(5): + player.strategy(opponent) + opponent.strategy(player) + player.update_history(C, C) + opponent.update_history(C, C) + + # Alpha should have changed (beta stays the same since opponent always cooperates) + self.assertNotEqual(player.alpha, player.prior_alpha) + + # Reset + player.reset() + + # Should be back to initial values + self.assertEqual(player.alpha, player.prior_alpha) + self.assertEqual(player.beta, player.prior_beta) + + def test_clone(self): + """Test that cloning preserves parameters.""" + player = self.player( + prior_alpha=3.0, + prior_beta=2.0, + base_forgiveness_threshold=0.4, + uncertainty_factor=2.0, + ) + clone = player.clone() + + self.assertEqual(clone.prior_alpha, 3.0) + self.assertEqual(clone.prior_beta, 2.0) + self.assertEqual(clone.base_forgiveness_threshold, 0.4) + self.assertEqual(clone.uncertainty_factor, 2.0) + self.assertEqual(clone.alpha, 3.0) + self.assertEqual(clone.beta, 2.0) diff --git a/docs/index.rst b/docs/index.rst index 82b9f41b5..a379bbc7f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -53,7 +53,7 @@ Count the number of available players:: >>> import axelrod as axl >>> len(axl.strategies) - 243 + 244 Create matches between two players:: diff --git a/docs/reference/strategy_index.rst b/docs/reference/strategy_index.rst index 9764d3082..4bd5f8a03 100644 --- a/docs/reference/strategy_index.rst +++ b/docs/reference/strategy_index.rst @@ -28,6 +28,8 @@ Here are the docstrings of all the strategies in the library. :members: .. automodule:: axelrod.strategies.backstabber :members: +.. automodule:: axelrod.strategies.bayesian_forgiver + :members: .. automodule:: axelrod.strategies.better_and_better :members: .. automodule:: axelrod.strategies.bush_mosteller