Source code for catsim.estimation

from typing import List

import numpy
from scipy.optimize import minimize_scalar

from catsim import cat, irt
from catsim.simulation import Estimator


[docs] class NumericalSearchEstimator(Estimator): """Estimator that implements multiple search algorithms in unimodal functions to find the maximum of the log-likelihood function. There are implementations of ternary search, dichotomous search, Fibonacci search and golden-section search, according to [Veliz20]_. Also check [Brent02]_. It is also possible to use the methods from :py:func:`scipy.optimize.minimize_scalar`. :param precision: number of decimal points of precision, defaults to 6 :type precision: int, optional :param dodd: whether to employ Dodd's estimation heuristic [Dod90]_ when the response vector only has one kind of response (all correct or all incorrect, see :py:func:`catsim.cat.dodd`), defaults to True :type dodd: bool, optional :param verbose: verbosity level of the maximization method :type verbose: bool, optional :param method: the search method to employ, one of `'ternary'`, `'dichotomous'`, `'fibonacci'`, `'golden'`, `'brent'`, `'bounded'` and `'golden2'`, defaults to bounded :type method: str, optional """ methods = [ "ternary", "dichotomous", "fibonacci", "golden", "brent", "bounded", "golden2", ] golden_ratio = (1 + 5**0.5) / 2 def __str__(self): return f"Numerical Search Estimator ({self.__search_method})" def __init__( self, precision: int = 6, dodd: bool = True, verbose: bool = False, method="bounded", ): super().__init__(verbose) if precision < 1: raise ValueError( f"precision for numerical estimator must be an integer larger than 1, {precision} was passed" ) if method not in NumericalSearchEstimator.methods: raise ValueError( f"Parameter 'method' must be one of {NumericalSearchEstimator.methods}" ) self._epsilon = float("1e-" + str(precision)) self._dodd = dodd self.__search_method = method
[docs] def estimate( self, index: int = None, items: numpy.ndarray = None, administered_items: List[int] = None, response_vector: List[bool] = None, est_theta: float = None, **kwargs ) -> float: """Returns the theta value that maximizes the log-likelihood function, given the current state of the test for the given examinee. :param index: index of the current examinee in the simulator :param items: a matrix containing item parameters in the format that `catsim` understands (see: :py:func:`catsim.cat.generate_item_bank`) :param administered_items: a list containing the indexes of items that were already administered :param response_vector: a boolean list containing the examinee's answers to the administered items :param est_theta: a float containing the current estimated ability :returns: the current :math:`\\hat\\theta` """ items, administered_items, response_vector, est_theta = self._prepare_args( return_items=True, return_response_vector=True, return_est_theta=True, index=index, items=items, administered_items=administered_items, response_vector=response_vector, est_theta=est_theta, **kwargs, ) assert items is not None assert administered_items is not None assert response_vector is not None assert est_theta is not None self._calls += 1 self._evaluations = 0 summarized_answers = set(response_vector) # enter here if examinee has only answered correctly or incorrectly if len(summarized_answers) == 1: answer = summarized_answers.pop() # if the estimator was initialized with dodd = True, # use Dodd's estimation heuristic to return a theta value if self._dodd: candidate_theta = cat.dodd(est_theta, items, answer) # otherwise, return positive or negative infinity, # in accordance with the definition of the MLE elif answer: candidate_theta = float("inf") else: candidate_theta = float("-inf") return candidate_theta # select lower and upper bounds for an interval in which the estimator will # look for the most probable new theta # these bounds are computed as a the minimum and maximum item difficulties # in the bank... lower_bound = min(items[:, 1]) upper_bound = max(items[:, 1]) # ... plus an arbitrary error margin margin = (upper_bound - lower_bound) / 3 upper_bound += margin lower_bound -= margin if self.__search_method in ["ternary", "dichotomous"]: candidate_theta = self._solve_ternary_dichotomous( upper_bound, lower_bound, response_vector, items[administered_items] ) elif self.__search_method == "fibonacci": candidate_theta = self._solve_fibonacci( upper_bound, lower_bound, response_vector, items[administered_items] ) elif self.__search_method == "golden2": candidate_theta = self._solve_golden_section( upper_bound, lower_bound, response_vector, items[administered_items] ) elif self.__search_method in ["brent", "bounded", "golden"]: res = minimize_scalar( irt.negative_log_likelihood, bracket=(lower_bound, upper_bound), bounds=(lower_bound, upper_bound) if self.__search_method == "bounded" else None, method=self.__search_method, args=(response_vector, items[administered_items]), tol=self._epsilon if self.__search_method != "bounded" else None, ) self._evaluations = res.nfev candidate_theta = res.x if self._verbose: print(f"{self._evaluations} evaluations") return candidate_theta
def _solve_ternary_dichotomous( self, b: float, a: float, response_vector: List[bool], item_params: numpy.ndarray, ): """Uses the ternary or dichotomous search methods to find the ability that maximizes the log-likelihood of the given response vector and item parameters :param upper_bound: the upper bound to search for the ability, in the ability/difficulty scale :type upper_bound: float :param lower_bound: the lower bound to search for the ability, in the ability/difficulty scale :type lower_bound: float :param response_vector: the responses given to the answered items :type response_vector: List[bool] :param item_params: the parameter matrix of the answered items :type item_params: numpy.ndarray :return: the estimated ability :rtype: float """ error = float("inf") while error >= self._epsilon: self._evaluations += 2 if self.__search_method == "ternary": c = (b + 2 * a) / 3 d = (2 * b + a) / 3 elif self.__search_method == "dichotomous": m = (a + b) / 2 c = m - (self._epsilon / 2) d = m + (self._epsilon / 2) left_side_ll = irt.log_likelihood(c, response_vector, item_params) right_side_ll = irt.log_likelihood(d, response_vector, item_params) if left_side_ll >= right_side_ll: b = d else: a = c assert a <= c <= d <= b candidate_theta = (b + a) / 2 error = abs(b - a) if self.__search_method == "dichotomous": error /= 2 if self._verbose: print(f"\t\tTheta: {candidate_theta}, LL: {max(left_side_ll, right_side_ll)}") return candidate_theta def _solve_fibonacci( self, b: float, a: float, response_vector: List[bool], item_params: numpy.ndarray, ): """Uses the Fibonacci search method to find the ability that maximizes the log-likelihood of the given response vector and item parameters :param upper_bound: the upper bound to search for the ability, in the ability/difficulty scale :type upper_bound: float :param lower_bound: the lower bound to search for the ability, in the ability/difficulty scale :type lower_bound: float :param response_vector: the responses given to the answered items :type response_vector: List[bool] :param item_params: the parameter matrix of the answered items :type item_params: numpy.ndarray :return: the estimated ability :rtype: float """ fib = [1, 1] n = 1 # while (upper_bound - lower_bound) / fib[-1] > .001: while (b - a) / fib[-1] > self._epsilon: n += 1 fib.append(fib[-1] + fib[-2]) c = a + (fib[n - 2] / fib[n]) * (b - a) d = a + (fib[n - 1] / fib[n]) * (b - a) left_side_ll = irt.log_likelihood(c, response_vector, item_params) right_side_ll = irt.log_likelihood(d, response_vector, item_params) self._evaluations += 2 while n != 2: self._evaluations += 1 n -= 1 if left_side_ll >= right_side_ll: b = d d = c c = a + (fib[n - 2] / fib[n]) * (b - a) right_side_ll = left_side_ll left_side_ll = irt.log_likelihood(c, response_vector, item_params) else: a = c c = d d = a + (fib[n - 1] / fib[n]) * (b - a) left_side_ll = right_side_ll right_side_ll = irt.log_likelihood(d, response_vector, item_params) # assert a <= c <= d <= b if self._verbose: print(f"\t\tTheta: {(b + a) / 2}, LL: {max(left_side_ll, right_side_ll)}") return (b + a) / 2 def _solve_golden_section( self, b: float, a: float, response_vector: List[bool], item_params: numpy.ndarray, ): """Uses the golden-section search method to find the ability that maximizes the log-likelihood of the given response vector and item parameters :param upper_bound: the upper bound to search for the ability, in the ability/difficulty scale :type upper_bound: float :param lower_bound: the lower bound to search for the ability, in the ability/difficulty scale :type lower_bound: float :param response_vector: the responses given to the answered items :type response_vector: List[bool] :param item_params: the parameter matrix of the answered items :type item_params: numpy.ndarray :return: the estimated ability :rtype: float """ c = b + (a - b) / NumericalSearchEstimator.golden_ratio d = a + (b - a) / NumericalSearchEstimator.golden_ratio left_side_ll = irt.log_likelihood(c, response_vector, item_params) right_side_ll = irt.log_likelihood(d, response_vector, item_params) while abs(b - a) > self._epsilon: self._evaluations += 1 if left_side_ll >= right_side_ll: b = d d = c c = b + (a - b) / NumericalSearchEstimator.golden_ratio right_side_ll = left_side_ll left_side_ll = irt.log_likelihood(c, response_vector, item_params) else: a = c c = d d = a + (b - a) / NumericalSearchEstimator.golden_ratio left_side_ll = right_side_ll right_side_ll = irt.log_likelihood(d, response_vector, item_params) assert a < c <= d < b if self._verbose: print(f"\t\tTheta: {(b + a) / 2}, LL: {max(left_side_ll, right_side_ll)}") return (b + a) / 2 @property def dodd(self) -> bool: """Whether Dodd's estimation heuristic [Dod90]_ will be used by estimator in case the response vector is composed solely of right or wrong answers. :returns: boolean value indicating if Dodd's method will be used or not. :see: :py:func:`catsim.cat.dodd`""" return self._dodd @property def method(self) -> str: """Get the estimator search method selected during instantiation. :returns: search method""" return self.__search_method