Source code for catsim.plot

"""Module with functions for plotting IRT-related results."""

import os
from typing import List

import matplotlib.pyplot as plt
import numpy
from mpl_toolkits.mplot3d import Axes3D

from . import irt
from .simulation import Simulator


def __column(matrix, i):
    """Returns columns from a bidimensional Python list (a list of lists)"""
    return [row[i] for row in matrix]



[docs]
def item_curve(
    a: float = 1,
    b: float = 0,
    c: float = 0,
    d: float = 1,
    title: str = None,
    ptype: str = "icc",
    max_info=True,
    filepath: str = None,
    show: bool = True,
    figsize: tuple = None,
):
    """Plots 'Item Response Theory'-related item plots

    .. plot::

        from catsim.cat import generate_item_bank
        from catsim import plot
        item = generate_item_bank(1)[0]
        plot.item_curve(item[0], item[1], item[2], item[3], ptype='icc')
        plot.item_curve(item[0], item[1], item[2], item[3], ptype='iic')
        plot.item_curve(item[0], item[1], item[2], item[3], ptype='both')

    When both curves are plotted in the same figure, the figure has no grid,
    since each curve has a different scale.

    :param a: item discrimination parameter
    :param b: item difficulty parameter
    :param c: item pseudo-guessing parameter
    :param d: item upper asymptote
    :param title: plot title
    :param ptype: 'icc' for the item characteristic curve, 'iic' for the item
                  information curve or 'both' for both curves in the same plot
    :param max_info: whether the point of maximum information should be shown in the plot
    :param filepath: saves the plot in the given path
    :param show: whether the generated plot is to be shown
    """
    available_types = ["icc", "iic", "both"]

    if ptype not in available_types:
        raise ValueError(f"'{ptype}' not in available plot types: {available_types}")

    thetas = numpy.arange(b - 4, b + 4, 0.1, "double")
    p_thetas = []
    i_thetas = []
    for theta in thetas:
        p_thetas.append(irt.icc(theta, a, b, c, d))
        i_thetas.append(irt.inf(theta, a, b, c, d))

    if ptype in ["icc", "iic"]:
        plt.figure(figsize=figsize)

        if title is not None:
            plt.title(title, size=18)

        plt.annotate(
            "$a = "
            + format(a)
            + "$\n$b = "
            + format(b)
            + "$\n$c = "
            + format(c)
            + "$\n$d = "
            + format(d)
            + "$",
            bbox=dict(facecolor="white", alpha=1),
            xy=(0.75, 0.05),
            xycoords="axes fraction",
        )
        plt.xlabel(r"$\theta$")
        plt.grid()

        if ptype == "icc":
            plt.ylabel(r"$P(\theta)$")
            plt.plot(thetas, p_thetas, label=r"$P(\theta)$")

        elif ptype == "iic":
            plt.ylabel(r"$I(\theta)$")
            plt.plot(thetas, i_thetas, label=r"$I(\theta)$")
            if max_info:
                aux = irt.max_info(a, b, c, d)
                plt.plot(aux, irt.inf(aux, a, b, c, d), "o")

    elif ptype == "both":
        _, ax1 = plt.subplots(figsize=figsize)

        ax1.set_xlabel(r"$\theta$", size=16)
        ax1.set_ylabel(r"$P(\theta)$", color="b", size=16)
        ax1.plot(thetas, p_thetas, "b-", label=r"$P(\theta)$")
        # Make the y-axis label and tick labels match the line color.
        for tl in ax1.get_yticklabels():
            tl.set_color("b")

        ax2 = ax1.twinx()
        ax2.set_ylabel(r"$I(\theta)$", color="r", size=16)
        ax2.plot(thetas, i_thetas, "r-", label=r"$I(\theta)$")
        for tl in ax2.get_yticklabels():
            tl.set_color("r")
        if max_info:
            aux = irt.max_info(a, b, c, d)
            plt.plot(aux, irt.inf(aux, a, b, c, d), "o")

        if title is not None:
            ax1.set_title(title, size=18)

        ax2.annotate(
            "$a = "
            + format(a)
            + "$\n$b = "
            + format(b)
            + "$\n$c = "
            + format(c)
            + "$\n$d = "
            + format(d)
            + "$",
            bbox=dict(facecolor="white", alpha=1),
            xy=(0.75, 0.05),
            xycoords="axes fraction",
        )

    if filepath is not None:
        # if os.path.dirname(filepath) is empty, it means the user passed the name
        # of the file instead of a path, e.g. 'plot.pdf' instead '~/Downloads/plot.pdf'
        if len(os.path.dirname(filepath)) > 0 and not os.path.exists(os.path.dirname(filepath)):
            os.makedirs(os.path.dirname(filepath))
        plt.savefig(filepath, bbox_inches="tight")

    if show:
        plt.show()




[docs]
def gen3d_dataset_scatter(
    items: numpy.ndarray,
    title: str = None,
    filepath: str = None,
    show: bool = True,
    figsize: tuple = None,
):
    """Generate the item matrix tridimensional dataset scatter plot

    .. plot::

        from catsim.cat import generate_item_bank
        from catsim import plot
        items = generate_item_bank(100)
        plot.gen3d_dataset_scatter(items)

    :param items: the item matrix
    :param title: the scatter plot title
    :param filepath: the path to save the scatter plot
    :param show: whether the generated plot is to be shown
    """
    assert Axes3D
    irt.validate_item_bank(items)

    fig = plt.figure(figsize=figsize)
    ax = fig.add_subplot(111, projection="3d")
    ax.scatter(list(items[:, 0]), list(items[:, 1]), list(items[:, 2]), s=10, c="b")

    if title is not None:
        plt.title(title, size=18)

    ax.set_xlabel("a")
    ax.set_ylabel("b")
    ax.set_zlabel("c")

    if filepath is not None:
        if not os.path.exists(os.path.dirname(filepath)):
            os.makedirs(os.path.dirname(filepath))
        plt.savefig(filepath, bbox_inches="tight")

    if show:
        plt.show()




[docs]
def item_exposure(
    title: str = None,
    simulator: Simulator = None,
    items: numpy.ndarray = None,
    par: str = None,
    hist: bool = False,
    filepath: str = None,
    show: bool = True,
    figsize: tuple = None,
):
    """Generates a bar chart for the item bank exposure rate. The `x` axis represents one of the item parameters, while
    the `y` axis represents their exposure rates. an examinee's test progress.

    .. plot::

        from catsim.cat import generate_item_bank
        from catsim import plot
        from catsim.initialization import RandomInitializer
        from catsim.selection import MaxInfoSelector
        from catsim.estimation import NumericalSearchEstimator
        from catsim.stopping import MaxItemStopper
        from catsim.simulation import Simulator

        s = Simulator(generate_item_bank(100), 10)
        s.simulate(RandomInitializer(), MaxInfoSelector(), NumericalSearchEstimator(), MaxItemStopper(20))
        plot.item_exposure(title='Exposures', simulator=s, hist=True)
        plot.item_exposure(title='Exposures', simulator=s, par='b')

    :param title: the plot title.
    :param simulator: a simulator which has already simulated a series of CATs, containing estimations to the examinees'
                      abilities and a list of administered items for each examinee.
    :param items: an item matrix containing item parameters and their exposure rate in the last column.
    :param par: a string representing one of the item parameters to order the items by and use on the x axis, or `None`
                to use the default order of the item bank. Please note that, if `hist=True`, no sorting will be done.
    :param hist: if True, plots a histogram of item exposures. Otherwise, plots a dotted line chart of the exposures,
                 sorted in the x-axis by the parameter chosen in `par`.
    :param filepath: the path to save the plot.
    :param show: whether the generated plot is to be shown.
    """
    if simulator is None and items is None:
        raise ValueError("Not a single plottable object was passed.")

    plt.figure(figsize=figsize)

    if title is not None:
        plt.title(title, size=18)

    if simulator is not None:
        items = simulator.items

    assert items is not None

    if items.shape[1] != 5:
        raise ValueError(
            "The item matrix is supposed to have 5 columns, the last one representing item exposure rates"
        )

    if par is not None and par not in ["a", "b", "c", "d"]:
        raise ValueError("Unsupported parameter.")

    if par == "a":
        parameter = items[:, 0]
        xlabel = "Item discrimination"
    elif par == "b":
        parameter = items[:, 1]
        xlabel = "Item difficulty"
    elif par == "c":
        parameter = items[:, 2]
        xlabel = "Item Guessing"
    elif par == "d":
        parameter = items[:, 3]
        xlabel = "Item upper asymptote"
    else:
        parameter = numpy.array(range(items.shape[0]))
        xlabel = "Items"

    if hist:
        plt.hist(items[:, 4], max(int(items.shape[0] / 10), 3))
        plt.xlabel("Item exposure")
        plt.ylabel("Items")
    else:
        indexes = parameter.argsort()
        plt.plot(items[:, 4][indexes], marker="o")
        plt.xlabel(xlabel)
        plt.ylabel("Item exposure")

    plt.legend(loc="best")

    if filepath is not None:
        filedir = os.path.dirname(filepath)
        if len(filedir) > 0 and not os.path.exists(filedir):
            os.makedirs(os.path.dirname(filepath))
        plt.savefig(filepath, bbox_inches="tight", dpi=300)

    if show:
        plt.show()




[docs]
def test_progress(
    title: str = None,
    simulator: Simulator = None,
    index: int = None,
    thetas: List[float] = None,
    administered_items: numpy.ndarray = None,
    true_theta: float = None,
    info: bool = False,
    var: bool = False,
    see: bool = False,
    reliability: bool = False,
    filepath: str = None,
    show: bool = True,
    figsize: tuple = None,
):
    """Generates a plot representing an examinee's test progress. Note that,
    while some functions increase or decrease monotonically, like test information
    and standard error of estimation, the plot calculates these values using the
    examinee's ability estimated at that given time of the test. This means
    that a test that was tought to be informative at a given point may not be as
    informative after new estimates are done.

    .. plot::

        from catsim.cat import generate_item_bank
        from catsim import plot
        from catsim.initialization import RandomInitializer
        from catsim.selection import MaxInfoSelector
        from catsim.estimation import NumericalSearchEstimator
        from catsim.stopping import MaxItemStopper
        from catsim.simulation import Simulator

        s = Simulator(generate_item_bank(100), 10)
        s.simulate(RandomInitializer(), MaxInfoSelector(), NumericalSearchEstimator(), MaxItemStopper(20))
        plot.test_progress(simulator=s, index=0)
        plot.test_progress(simulator=s, index=0, info=True, var=True, see=True)

    :param title: the plot title.
    :param simulator: a simulator which has already simulated a series of CATs,
                      containing estimations to the examinees' abilities and
                      a list of administered items for each examinee.
    :param index: the index of the examinee in the simulator whose plot is to be done.
    :param thetas: if a :py:class:`Simulator` is not passed, then a list of ability
                   estimations can be manually passed to the function.
    :param administered_items: if a :py:class:`Simulator` is not passed, then a
                               matrix of administered items, represented by their
                               parameters, can be manually passed to the function.
    :param true_theta: the value of the examinee's true ability. If it is passed,
                       it will be shown on the plot, otherwise not.
    :param info: plot test information. It only works if both abilities and
                 administered items are passed.
    :param var: plot the estimation variance during the test. It only
                works if both abilities and administered items are passed.
    :param see: plot the standard error of estimation during the test. It only
               works if both abilities and administered items are passed.
    :param reliability: plot the test reliability. It only works if both abilities
                        and administered items are passed.
    :param filepath: the path to save the plot
    :param show: whether the generated plot is to be shown
    """
    if simulator is None and thetas is None and administered_items is None:
        raise ValueError("Not a single plottable object was passed.")

    plt.figure(figsize=figsize)

    if title is not None:
        plt.title(title, size=18)

    if simulator is not None and index is not None:
        thetas = simulator.estimations[index]
        administered_items = simulator.items[simulator.administered_items[index]]
        true_theta = simulator.examinees[index]

    assert thetas is not None
    assert administered_items is not None
    assert true_theta is not None

    if (
        thetas is not None
        and administered_items is not None
        and len(thetas) - 1 != len(administered_items[:, 1])
    ):
        raise ValueError(
            "Number of latest_estimations and administered items is not the same. They should be."
        )

    # len(thetas) - 1 because the first item is made by the initializer
    xs = (
        list(range(len(thetas)))
        if thetas is not None
        else list(range(len(administered_items[:, 1])))
    )

    if thetas is not None:
        plt.plot(xs, thetas, label=r"$\hat{\theta}$")
    if administered_items is not None:
        difficulties = administered_items[:, 1]
        plt.plot(xs[1:], difficulties, label="Item difficulty")
    if true_theta is not None:
        plt.hlines(true_theta, 0, len(xs), label=r"$\theta$")
    if thetas is not None and administered_items is not None:
        # calculate and plot test information, var, standard error and reliability
        if info:
            infos = [
                irt.test_info(
                    thetas[x],
                    administered_items[: x + 1,],
                )
                for x in xs
            ]
            plt.plot(xs, infos, label=r"$I(\theta)$")

        if var:
            varss = [
                irt.var(
                    thetas[x],
                    administered_items[: x + 1,],
                )
                for x in xs
            ]
            plt.plot(xs, varss, label=r"$Var$")

        if see:
            sees = [
                irt.see(
                    thetas[x],
                    administered_items[: x + 1,],
                )
                for x in xs
            ]
            plt.plot(xs, sees, label=r"$SEE$")

        if reliability:
            reliabilities = [
                irt.reliability(
                    thetas[x],
                    administered_items[: x + 1,],
                )
                for x in xs
            ]
            plt.plot(xs, reliabilities, label="Reliability")
    plt.xlabel("Items")
    plt.grid()
    plt.legend(loc="best")

    if filepath is not None:
        filedir = os.path.dirname(filepath)
        if len(filedir) > 0 and not os.path.exists(filedir):
            os.makedirs(os.path.dirname(filepath))
        plt.savefig(filepath, bbox_inches="tight", dpi=300)

    if show:
        plt.show()



def param_dist(
    items: numpy.ndarray, filepath: str = None, show: bool = True, figsize: tuple = None
):
    _, axes = plt.subplots(2, 2, figsize=figsize)
    _ = axes[0, 0].hist(items[:, 0], bins=100)
    _ = axes[0, 1].hist(items[:, 1], bins=100)
    _ = axes[1, 0].hist(items[:, 2], bins=100)
    _ = axes[1, 1].hist(items[:, 3], bins=100)

    if filepath is not None:
        filedir = os.path.dirname(filepath)
        if len(filedir) > 0 and not os.path.exists(filedir):
            os.makedirs(os.path.dirname(filepath))
        plt.savefig(filepath, bbox_inches="tight", dpi=300)

    if show:
        plt.show()