Source code for catsim.estimation.numerical

"""Numerical estimation methods for ability estimation."""

import numpy
import numpy.typing as npt
from scipy.optimize import minimize_scalar

from .. import cat, irt
from ..irt import THETA_MAX_EXTENDED, THETA_MIN_EXTENDED
from ..item_bank import ItemBank
from .base import BaseEstimator

# Mathematical constants
GOLDEN_RATIO = (1 + 5**0.5) / 2


[docs] class NumericalSearchEstimator(BaseEstimator): """Implement search algorithms in unimodal functions to find the maximum of the log-likelihood function. This class provides multiple numerical search methods for ability estimation in IRT, including ternary search, dichotomous search, Fibonacci search, and golden-section search, according to [Veliz20]_. Also check [Brent02]_. It is also possible to use the methods from :py:func:`scipy.optimize.minimize_scalar`. Parameters ---------- tol : float, optional Tolerance for convergence in the optimization algorithm. Default is 1e-6. dodd : bool, optional Whether to employ Dodd's estimation heuristic [Dod90]_ when the response vector only has one kind of response (all correct or all incorrect, see :py:func:`catsim.cat.dodd`). Default is True. verbose : bool, optional Whether to print detailed information during optimization. Default is False. method : str, optional The search method to employ. Must be one of: 'ternary', 'dichotomous', 'fibonacci', 'golden', 'brent', 'bounded', or 'golden2'. Default is 'bounded'. """ __methods = frozenset(["ternary", "dichotomous", "fibonacci", "golden", "brent", "bounded", "golden2"])
[docs] @staticmethod def available_methods() -> frozenset[str]: """Get a set of available estimation methods. Returns ------- frozenset[str] Set of available estimation methods: {'ternary', 'dichotomous', 'fibonacci', 'golden', 'brent', 'bounded', 'golden2'}. """ return NumericalSearchEstimator.__methods
def __str__(self) -> str: """Return a string representation of the estimator.""" return f"Numerical Search Estimator ({self.__search_method})" def __init__( self, tol: float = 1e-6, dodd: bool = True, verbose: bool = False, method: str = "bounded", ) -> None: """Initialize the estimator. Parameters ---------- tol : float, optional Tolerance for convergence in the optimization algorithm. Default is 1e-6. dodd : bool, optional Whether to use Dodd's estimation heuristic for edge cases (all correct or all incorrect responses). Default is True. verbose : bool, optional Whether to print detailed information during optimization. Default is False. method : str, optional The numerical search method to use for optimization. Default is "bounded". Raises ------ ValueError If the parameter `method` is not one of the available methods. """ super().__init__(verbose) if method not in NumericalSearchEstimator.__methods: msg = f"Parameter 'method' must be one of {NumericalSearchEstimator.__methods}." raise ValueError(msg) self._tol = tol self._dodd = dodd self.__search_method = method
[docs] def estimate( self, index: int | None = None, item_bank: ItemBank | None = None, administered_items: list[int] | None = None, response_vector: list[bool] | None = None, est_theta: float | None = None, ) -> float: r"""Compute the theta value that maximizes the log-likelihood function for the given examinee. When this method is used inside a simulator, its arguments are automatically filled. Outside of a simulation, the user can also specify the arguments to use the Estimator as a standalone object. Parameters ---------- index : int or None, optional Index of the current examinee in the simulator. Default is None. item_bank : ItemBank or None, optional An ItemBank containing item parameters. Default is None. administered_items : list[int] or None, optional A list containing the indexes of items that were already administered. Default is None. response_vector : list[bool] or None, optional A boolean list containing the examinee's answers to the administered items. Default is None. est_theta : float or None, optional A float containing the current estimated ability. Default is None. Returns ------- float The current estimated ability :math:`\hat\theta`. Raises ------ ValueError If required parameters are None when not using a simulator. """ item_bank, administered_items, response_vector, est_theta = self._prepare_args( return_item_bank=True, return_administered_items=True, return_response_vector=True, return_est_theta=True, index=index, item_bank=item_bank, administered_items=administered_items, response_vector=response_vector, est_theta=est_theta, ) if item_bank is None: msg = "item_bank parameter cannot be None" raise ValueError(msg) if administered_items is None: msg = "administered_items parameter cannot be None" raise ValueError(msg) if response_vector is None: msg = "response_vector parameter cannot be None" raise ValueError(msg) if est_theta is None: msg = "est_theta parameter cannot be None" raise ValueError(msg) self._calls += 1 self._evaluations = 0 summarized_answers = set(response_vector) # enter here if examinee has only answered correctly or incorrectly if len(summarized_answers) == 1: answer = summarized_answers.pop() # if the estimator was initialized with dodd = True, # use Dodd's estimation heuristic to return a theta value if self._dodd: candidate_theta = cat.dodd(est_theta, item_bank, answer) # otherwise, return positive or negative infinity, # in accordance with the definition of the MLE elif answer: candidate_theta = float("inf") else: candidate_theta = float("-inf") return candidate_theta if self.__search_method in {"ternary", "dichotomous"}: candidate_theta = self._solve_ternary_dichotomous( THETA_MAX_EXTENDED, THETA_MIN_EXTENDED, response_vector, item_bank.get_items(administered_items) ) elif self.__search_method == "fibonacci": candidate_theta = self._solve_fibonacci( THETA_MAX_EXTENDED, THETA_MIN_EXTENDED, response_vector, item_bank.get_items(administered_items) ) elif self.__search_method == "golden2": candidate_theta = self._solve_golden_section( THETA_MAX_EXTENDED, THETA_MIN_EXTENDED, response_vector, item_bank.get_items(administered_items) ) elif self.__search_method in {"brent", "bounded", "golden"}: res = minimize_scalar( irt.negative_log_likelihood, bracket=(THETA_MIN_EXTENDED, THETA_MAX_EXTENDED), bounds=(THETA_MIN_EXTENDED, THETA_MAX_EXTENDED) if self.__search_method == "bounded" else None, method=self.__search_method, args=(response_vector, item_bank.get_items(administered_items)), tol=self._tol if self.__search_method != "bounded" else None, ) self._evaluations = res.nfev candidate_theta = res.x if self._verbose: print(f"{self._evaluations} evaluations") return candidate_theta
def _solve_ternary_dichotomous( self, b: float, a: float, response_vector: list[bool], item_params: npt.NDArray[numpy.floating], ) -> float: """Find the most likely ability using ternary or dichotomous search methods. Parameters ---------- b : float The upper bound to search for the ability, in the ability/difficulty scale. a : float The lower bound to search for the ability, in the ability/difficulty scale. response_vector : list[bool] The responses given to the answered items. item_params : numpy.ndarray The parameter matrix of the answered items. Returns ------- float The estimated ability. Raises ------ ValueError If the search interval becomes invalid during iteration. """ error = float("inf") while error >= self._tol: self._evaluations += 2 if self.__search_method == "ternary": c = (b + 2 * a) / 3 d = (2 * b + a) / 3 elif self.__search_method == "dichotomous": m = (a + b) / 2 c = m - (self._tol / 2) d = m + (self._tol / 2) left_side_ll = irt.log_likelihood(c, response_vector, item_params) right_side_ll = irt.log_likelihood(d, response_vector, item_params) if left_side_ll >= right_side_ll: b = d else: a = c if not (a <= c <= d <= b): msg = f"Invalid interval: a={a}, c={c}, d={d}, b={b}" raise ValueError(msg) candidate_theta = (b + a) / 2 error = abs(b - a) if self.__search_method == "dichotomous": error /= 2 if self._verbose: print(f"\t\tTheta: {candidate_theta}, LL: {max(left_side_ll, right_side_ll)}") return candidate_theta def _solve_fibonacci( self, b: float, a: float, response_vector: list[bool], item_params: npt.NDArray[numpy.floating], ) -> float: """Find the most likely ability using the Fibonacci search method. Parameters ---------- b : float The upper bound to search for the ability, in the ability/difficulty scale. a : float The lower bound to search for the ability, in the ability/difficulty scale. response_vector : list[bool] The responses given to the answered items. item_params : numpy.ndarray The parameter matrix of the answered items. Returns ------- float The estimated ability. """ fib = [1, 1] n = 1 # while (upper_bound - lower_bound) / fib[-1] > .001: while (b - a) / fib[-1] > self._tol: n += 1 fib.append(fib[-1] + fib[-2]) c = a + (fib[n - 2] / fib[n]) * (b - a) d = a + (fib[n - 1] / fib[n]) * (b - a) left_side_ll = irt.log_likelihood(c, response_vector, item_params) right_side_ll = irt.log_likelihood(d, response_vector, item_params) self._evaluations += 2 while n != 2: # noqa: PLR2004 self._evaluations += 1 n -= 1 if left_side_ll >= right_side_ll: b = d d = c c = a + (fib[n - 2] / fib[n]) * (b - a) right_side_ll = left_side_ll left_side_ll = irt.log_likelihood(c, response_vector, item_params) else: a = c c = d d = a + (fib[n - 1] / fib[n]) * (b - a) left_side_ll = right_side_ll right_side_ll = irt.log_likelihood(d, response_vector, item_params) # assert a <= c <= d <= b if self._verbose: print(f"\t\tTheta: {(b + a) / 2}, LL: {max(left_side_ll, right_side_ll)}") return (b + a) / 2 def _solve_golden_section( self, b: float, a: float, response_vector: list[bool], item_params: npt.NDArray[numpy.floating], ) -> float: """Find the most likely ability using the golden-section search method. Parameters ---------- b : float The upper bound to search for the ability, in the ability/difficulty scale. a : float The lower bound to search for the ability, in the ability/difficulty scale. response_vector : list[bool] The responses given to the answered items. item_params : numpy.ndarray The parameter matrix of the answered items. Returns ------- float The estimated ability. Raises ------ ValueError If the golden section interval becomes invalid during iteration. """ c = b + (a - b) / GOLDEN_RATIO d = a + (b - a) / GOLDEN_RATIO left_side_ll = irt.log_likelihood(c, response_vector, item_params) right_side_ll = irt.log_likelihood(d, response_vector, item_params) while abs(b - a) > self._tol: self._evaluations += 1 if left_side_ll >= right_side_ll: b = d d = c c = b + (a - b) / GOLDEN_RATIO right_side_ll = left_side_ll left_side_ll = irt.log_likelihood(c, response_vector, item_params) else: a = c c = d d = a + (b - a) / GOLDEN_RATIO left_side_ll = right_side_ll right_side_ll = irt.log_likelihood(d, response_vector, item_params) if not (a < c <= d < b): msg = f"Invalid golden section interval: a={a}, c={c}, d={d}, b={b}" raise ValueError(msg) if self._verbose: print(f"\t\tTheta: {(b + a) / 2}, LL: {max(left_side_ll, right_side_ll)}") return (b + a) / 2 @property def dodd(self) -> bool: """Whether Dodd's estimation heuristic [Dod90]_ will be used by the estimator. Dodd's method is used when the response vector is composed solely of correct or incorrect answers, to prevent maximum likelihood methods from returning -infinity or +infinity. Returns ------- bool Boolean value indicating if Dodd's method will be used or not. See Also -------- catsim.cat.dodd : Implementation of Dodd's estimation heuristic. """ return self._dodd @property def method(self) -> str: """Get the estimator search method selected during instantiation. Returns ------- str The search method ('ternary', 'dichotomous', 'fibonacci', 'golden', 'brent', 'bounded', or 'golden2'). """ return self.__search_method