From 06039189307588b380dea968e0cc5be652b95f52 Mon Sep 17 00:00:00 2001
From: Ted Turocy <ted.turocy@gmail.com>
Date: Tue, 16 Apr 2024 14:22:22 +0100
Subject: [PATCH] Implement empirical payoff estimation method for AQRE.

---
 doc/pygambit.api.rst         |  1 +
 src/pygambit/qre.py          | 52 ++++++++++++++++++++++++++++++++++++
 src/solvers/logit/efglogit.h |  2 +-
 3 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/doc/pygambit.api.rst b/doc/pygambit.api.rst
index bc98fc63e..41136f67a 100644
--- a/doc/pygambit.api.rst
+++ b/doc/pygambit.api.rst
@@ -300,5 +300,6 @@ Computation of quantal response equilibria
    fit_strategy_fixedpoint
    LogitQREMixedStrategyFitResult
 
+   fit_behavior_empirical
    fit_behavior_fixedpoint
    LogitQREMixedBehaviorFitResult
diff --git a/src/pygambit/qre.py b/src/pygambit/qre.py
index dea81fa5d..ec46c3845 100644
--- a/src/pygambit/qre.py
+++ b/src/pygambit/qre.py
@@ -510,6 +510,8 @@ def fit_behavior_fixedpoint(
     See Also
     --------
     fit_strategy_fixedpoint : Estimate QRE using the strategic representation
+    fit_behavior_empirical : Estimate QRE by approximation of the correspondence
+                             using independent decision problems.
 
     References
     ----------
@@ -521,3 +523,53 @@ def fit_behavior_fixedpoint(
     return LogitQREMixedBehaviorFitResult(
         data, "fixedpoint", res.lam, res.profile, res.log_like
     )
+
+
+def fit_behavior_empirical(
+        data: libgbt.MixedBehaviorProfileDouble
+) -> LogitQREMixedBehaviorFitResult:
+    """Use maximum likelihood estimation to estimate a quantal
+    response equilibrium using the empirical payoff method.
+    The empirical payoff method operates by ignoring the fixed-point
+    considerations of the QRE and approximates instead by a collection
+    of independent decision problems. [1]_
+
+    Returns
+    -------
+    LogitQREMixedBehaviorFitResult
+        The result of the estimation represented as a
+        ``LogitQREMixedBehaviorFitResult`` object.
+
+    See Also
+    --------
+    fit_behavior_fixedpoint : Estimate QRE precisely by computing the correspondence
+
+    References
+    ----------
+    .. [1] Bland, J. R. and Turocy, T. L., 2023.  Quantal response equilibrium
+        as a structural model for estimation: The missing manual.
+        SSRN working paper 4425515.
+    """
+    def do_logit(lam: float):
+        logit_probs = [[math.exp(lam*a) for a in infoset]
+                       for player in values for infoset in player]
+        sums = [sum(v) for v in logit_probs]
+        logit_probs = [[v/s for v in vv]
+                       for (vv, s) in zip(logit_probs, sums)]
+        logit_probs = [v for infoset in logit_probs for v in infoset]
+        return [max(v, 1.0e-293) for v in logit_probs]
+
+    def log_like(lam: float) -> float:
+        logit_probs = do_logit(lam)
+        return sum([f*math.log(p) for (f, p) in zip(list(flattened_data), logit_probs)])
+
+    flattened_data = [data[a] for p in data.game.players for s in p.infosets for a in s.actions]
+    normalized = data.normalize()
+    values = [[[normalized.action_value(a) for a in s.actions]
+               for s in p.infosets]
+              for p in data.game.players]
+    res = scipy.optimize.minimize(lambda x: -log_like(x[0]), (0.1,),
+                                  bounds=((0.0, None),))
+    return LogitQREMixedBehaviorFitResult(
+        data, "empirical", res.x[0], do_logit(res.x[0]), -res.fun
+    )
diff --git a/src/solvers/logit/efglogit.h b/src/solvers/logit/efglogit.h
index 97022edda..67e567da7 100644
--- a/src/solvers/logit/efglogit.h
+++ b/src/solvers/logit/efglogit.h
@@ -101,7 +101,7 @@ LogitBehaviorEstimate(const MixedBehaviorProfile<double> &p_frequencies, double
   alg.SetMaxDecel(p_maxAccel);
   alg.SetStepsize(p_firstStep);
   std::ostringstream ostream;
-  return alg.Estimate(start, p_frequencies, ostream, 1000000.0, 1.0);
+  return alg.Estimate(start, p_frequencies, ostream, 2.0, 1.0);
 }
 
 inline List<MixedBehaviorProfile<double>> LogitBehaviorSolve(const Game &p_game, double p_epsilon,