Source code for catsim.estimation

from typing import List

import numpy
from scipy.optimize import minimize_scalar

from catsim import cat, irt
from catsim.simulation import Estimator



[docs]
class NumericalSearchEstimator(Estimator):
    """Estimator that implements multiple search algorithms in unimodal functions to find the maximum of the log-likelihood function. There are implementations of ternary search, dichotomous search, Fibonacci search and golden-section search, according to [Veliz20]_. Also check [Brent02]_. It is also possible to use the methods from :py:func:`scipy.optimize.minimize_scalar`.

    :param precision: number of decimal points of precision, defaults to 6
    :type precision: int, optional
    :param dodd: whether to employ Dodd's estimation heuristic [Dod90]_ when the response vector only has one kind of response (all correct or all incorrect, see :py:func:`catsim.cat.dodd`), defaults to True
    :type dodd: bool, optional
    :param verbose: verbosity level of the maximization method
    :type verbose: bool, optional
    :param method: the search method to employ, one of `'ternary'`, `'dichotomous'`, `'fibonacci'`, `'golden'`, `'brent'`, `'bounded'` and `'golden2'`, defaults to bounded
    :type method: str, optional
    """

    methods = [
        "ternary",
        "dichotomous",
        "fibonacci",
        "golden",
        "brent",
        "bounded",
        "golden2",
    ]
    golden_ratio = (1 + 5**0.5) / 2

    def __str__(self):
        return f"Numerical Search Estimator ({self.__search_method})"

    def __init__(
        self,
        precision: int = 6,
        dodd: bool = True,
        verbose: bool = False,
        method="bounded",
    ):
        super().__init__(verbose)

        if precision < 1:
            raise ValueError(
                f"precision for numerical estimator must be an integer larger than 1, {precision} was passed"
            )

        if method not in NumericalSearchEstimator.methods:
            raise ValueError(
                f"Parameter 'method' must be one of {NumericalSearchEstimator.methods}"
            )

        self._epsilon = float("1e-" + str(precision))
        self._dodd = dodd
        self.__search_method = method


[docs]
    def estimate(
        self,
        index: int = None,
        items: numpy.ndarray = None,
        administered_items: List[int] = None,
        response_vector: List[bool] = None,
        est_theta: float = None,
        **kwargs
    ) -> float:
        """Returns the theta value that maximizes the log-likelihood function, given the current state of the
         test for the given examinee.

        :param index: index of the current examinee in the simulator
        :param items: a matrix containing item parameters in the format that `catsim` understands
                      (see: :py:func:`catsim.cat.generate_item_bank`)
        :param administered_items: a list containing the indexes of items that were already administered
        :param response_vector: a boolean list containing the examinee's answers to the administered items
        :param est_theta: a float containing the current estimated ability
        :returns: the current :math:`\\hat\\theta`
        """
        items, administered_items, response_vector, est_theta = self._prepare_args(
            return_items=True,
            return_response_vector=True,
            return_est_theta=True,
            index=index,
            items=items,
            administered_items=administered_items,
            response_vector=response_vector,
            est_theta=est_theta,
            **kwargs,
        )

        assert items is not None
        assert administered_items is not None
        assert response_vector is not None
        assert est_theta is not None

        self._calls += 1
        self._evaluations = 0

        summarized_answers = set(response_vector)

        # enter here if examinee has only answered correctly or incorrectly
        if len(summarized_answers) == 1:
            answer = summarized_answers.pop()

            # if the estimator was initialized with dodd = True,
            # use Dodd's estimation heuristic to return a theta value
            if self._dodd:
                candidate_theta = cat.dodd(est_theta, items, answer)

            # otherwise, return positive or negative infinity,
            # in accordance with the definition of the MLE
            elif answer:
                candidate_theta = float("inf")
            else:
                candidate_theta = float("-inf")

            return candidate_theta

        # select lower and upper bounds for an interval in which the estimator will
        # look for the most probable new theta

        # these bounds are computed as a the minimum and maximum item difficulties
        # in the bank...
        lower_bound = min(items[:, 1])
        upper_bound = max(items[:, 1])

        # ... plus an arbitrary error margin
        margin = (upper_bound - lower_bound) / 3
        upper_bound += margin
        lower_bound -= margin

        if self.__search_method in ["ternary", "dichotomous"]:
            candidate_theta = self._solve_ternary_dichotomous(
                upper_bound, lower_bound, response_vector, items[administered_items]
            )
        elif self.__search_method == "fibonacci":
            candidate_theta = self._solve_fibonacci(
                upper_bound, lower_bound, response_vector, items[administered_items]
            )
        elif self.__search_method == "golden2":
            candidate_theta = self._solve_golden_section(
                upper_bound, lower_bound, response_vector, items[administered_items]
            )
        elif self.__search_method in ["brent", "bounded", "golden"]:
            res = minimize_scalar(
                irt.negative_log_likelihood,
                bracket=(lower_bound, upper_bound),
                bounds=(lower_bound, upper_bound) if self.__search_method == "bounded" else None,
                method=self.__search_method,
                args=(response_vector, items[administered_items]),
                tol=self._epsilon if self.__search_method != "bounded" else None,
            )
            self._evaluations = res.nfev
            candidate_theta = res.x

        if self._verbose:
            print(f"{self._evaluations} evaluations")

        return candidate_theta


    def _solve_ternary_dichotomous(
        self,
        b: float,
        a: float,
        response_vector: List[bool],
        item_params: numpy.ndarray,
    ):
        """Uses the ternary or dichotomous search methods to find the ability that maximizes the log-likelihood of the given response vector and item parameters

        :param upper_bound: the upper bound to search for the ability, in the ability/difficulty scale
        :type upper_bound: float
        :param lower_bound: the lower bound to search for the ability, in the ability/difficulty scale
        :type lower_bound: float
        :param response_vector: the responses given to the answered items
        :type response_vector: List[bool]
        :param item_params: the parameter matrix of the answered items
        :type item_params: numpy.ndarray
        :return: the estimated ability
        :rtype: float
        """
        error = float("inf")
        while error >= self._epsilon:
            self._evaluations += 2

            if self.__search_method == "ternary":
                c = (b + 2 * a) / 3
                d = (2 * b + a) / 3
            elif self.__search_method == "dichotomous":
                m = (a + b) / 2
                c = m - (self._epsilon / 2)
                d = m + (self._epsilon / 2)

            left_side_ll = irt.log_likelihood(c, response_vector, item_params)
            right_side_ll = irt.log_likelihood(d, response_vector, item_params)

            if left_side_ll >= right_side_ll:
                b = d
            else:
                a = c

            assert a <= c <= d <= b

            candidate_theta = (b + a) / 2

            error = abs(b - a)
            if self.__search_method == "dichotomous":
                error /= 2

            if self._verbose:
                print(f"\t\tTheta: {candidate_theta}, LL: {max(left_side_ll, right_side_ll)}")
        return candidate_theta

    def _solve_fibonacci(
        self,
        b: float,
        a: float,
        response_vector: List[bool],
        item_params: numpy.ndarray,
    ):
        """Uses the Fibonacci search method to find the ability that maximizes the log-likelihood of the given response vector and item parameters

        :param upper_bound: the upper bound to search for the ability, in the ability/difficulty scale
        :type upper_bound: float
        :param lower_bound: the lower bound to search for the ability, in the ability/difficulty scale
        :type lower_bound: float
        :param response_vector: the responses given to the answered items
        :type response_vector: List[bool]
        :param item_params: the parameter matrix of the answered items
        :type item_params: numpy.ndarray
        :return: the estimated ability
        :rtype: float
        """
        fib = [1, 1]
        n = 1

        # while (upper_bound - lower_bound) / fib[-1] > .001:
        while (b - a) / fib[-1] > self._epsilon:
            n += 1
            fib.append(fib[-1] + fib[-2])

        c = a + (fib[n - 2] / fib[n]) * (b - a)
        d = a + (fib[n - 1] / fib[n]) * (b - a)

        left_side_ll = irt.log_likelihood(c, response_vector, item_params)
        right_side_ll = irt.log_likelihood(d, response_vector, item_params)
        self._evaluations += 2

        while n != 2:
            self._evaluations += 1

            n -= 1

            if left_side_ll >= right_side_ll:
                b = d
                d = c
                c = a + (fib[n - 2] / fib[n]) * (b - a)

                right_side_ll = left_side_ll
                left_side_ll = irt.log_likelihood(c, response_vector, item_params)
            else:
                a = c
                c = d
                d = a + (fib[n - 1] / fib[n]) * (b - a)

                left_side_ll = right_side_ll
                right_side_ll = irt.log_likelihood(d, response_vector, item_params)

            # assert a <= c <= d <= b

            if self._verbose:
                print(f"\t\tTheta: {(b + a) / 2}, LL: {max(left_side_ll, right_side_ll)}")
        return (b + a) / 2

    def _solve_golden_section(
        self,
        b: float,
        a: float,
        response_vector: List[bool],
        item_params: numpy.ndarray,
    ):
        """Uses the golden-section search method to find the ability that maximizes the log-likelihood of the given response vector and item parameters

        :param upper_bound: the upper bound to search for the ability, in the ability/difficulty scale
        :type upper_bound: float
        :param lower_bound: the lower bound to search for the ability, in the ability/difficulty scale
        :type lower_bound: float
        :param response_vector: the responses given to the answered items
        :type response_vector: List[bool]
        :param item_params: the parameter matrix of the answered items
        :type item_params: numpy.ndarray
        :return: the estimated ability
        :rtype: float
        """
        c = b + (a - b) / NumericalSearchEstimator.golden_ratio
        d = a + (b - a) / NumericalSearchEstimator.golden_ratio

        left_side_ll = irt.log_likelihood(c, response_vector, item_params)
        right_side_ll = irt.log_likelihood(d, response_vector, item_params)

        while abs(b - a) > self._epsilon:
            self._evaluations += 1

            if left_side_ll >= right_side_ll:
                b = d
                d = c
                c = b + (a - b) / NumericalSearchEstimator.golden_ratio

                right_side_ll = left_side_ll
                left_side_ll = irt.log_likelihood(c, response_vector, item_params)
            else:
                a = c
                c = d
                d = a + (b - a) / NumericalSearchEstimator.golden_ratio

                left_side_ll = right_side_ll
                right_side_ll = irt.log_likelihood(d, response_vector, item_params)

            assert a < c <= d < b

            if self._verbose:
                print(f"\t\tTheta: {(b + a) / 2}, LL: {max(left_side_ll, right_side_ll)}")
        return (b + a) / 2

    @property
    def dodd(self) -> bool:
        """Whether Dodd's estimation heuristic [Dod90]_ will be used by estimator in case the response vector is composed solely of right or wrong answers.

        :returns: boolean value indicating if Dodd's method will be used or not.
        :see: :py:func:`catsim.cat.dodd`"""
        return self._dodd

    @property
    def method(self) -> str:
        """Get the estimator search method selected during instantiation.

        :returns: search method"""
        return self.__search_method