Source code for neutcurve.hillcurve

"""
==================
hillcurve
==================

Defines :class:`HillCurve` for fitting neutralization curves.
"""

import collections
import math
import warnings

import matplotlib.pyplot as plt

import numpy

import pandas as pd

import scipy.optimize



[docs]
class HillCurveFittingError(Exception):
    """Error fitting a :class:`HillCurve`."""

    pass




[docs]
class HillCurve:
    r"""A fitted Hill curve, optionally with free baselines.

    Fits :math:`f\left(c\right) = b + \frac{t - b}{1 + \left(c/m\right)^s}`
    where :math:`f\left(c\right)` is the fraction infectivity remaining
    at concentration :math:`c`, :math:`m` is the midpoint
    of the neutralization curve, :math:`t` is the top
    value (e.g., 1), :math:`b` is the bottom value (e.g., 0),
    and :math:`s` is the slope of the curve. Because
    :math:`f\left(c\right)` is the fraction infectivity remaining, we expect
    :math:`f\left(c\right)` to get smaller as :math:`c` gets larger.
    This should lead us to fit :math:`s > 0`.

    When :math:`t = 1` and :math:`b = 0`, this equation is identical to the
    `Hill curve <https://en.wikipedia.org/wiki/Hill_equation_(biochemistry)>`_,
    except that we are calculating the fraction **unbound** rather than
    the fraction bound.

    You may want to fit the fraction neutralized rather than the fraction
    infectivity. In that case, set `infectivity_or_neutralized='neutralized'
    and then the equation that is fit will
    be :math:`f\left(c\right) = t + \frac{b - t}{1 + \left(c/m\right)^s}`,
    which means that `f\left(c\right)` gets larger rather than smaller
    as :math:`c` increases.

    Args:
        `cs` (array-like)
            Concentrations of antibody / serum.
        `fs` (array-like)
            Fraction infectivity remaining at each concentration.
        `infectivity_or_neutralized` ({'infectivity', 'neutralized'})
            Fit the fraction infectivity (:math:`f\left(c\right)` decreases as
            :math:`c` increases) or neutralized (:math:`f\left(c\right)`
            increases as :math:`c` increases). See equations above.
        `fix_slope_first` (bool)
            If True, initially fit with fixed slope, and then start from those values
            to re-fit all parameters including slope.
        `fs_stderr` (`None` or array-like)
            If not `None`, standard errors on `fs`.
        `fixbottom` (`False`, float, or tuple/list of length 2)
            If `False`, fit bottom of curves as free parameter. If float, fix bottom of
            curve to that value. If length-2 array, constrain bottom to be in that range.
        `fixtop` (`False`, float, or tuple/list of length 2)
            If `False`, fit top of curves as free parameter. If float, fix top of
            curve to that value. If length-2 array, constrain top to be in that range.
        `fixslope` (`False`, float, or tuple/list of length 2)
            If `False`, fit slope of curves as free parameter. If float, fix slope to
            that value. If length-2 array, constrain slope to be in that range.
        `fitlogc` (bool)
            Do we do the actual fitting on the concentrations or log
            concentrations? Gives equivalent results in principle, but
            fitting to log concentrations may be more efficient in pratice.
        `use_stderr_for_fit` (bool)
            Do we use `fs_stderr` for the fitting, or just for plotting?
            Usually it is a good idea to set to `False` and **not** use
            for fitting if you only have a few replicates, and the standard
            error is often not that accurate and so will weight some
            points much more than others in a way that may not be
            justified.
        `init_slope` (float)
            Initial value of slope used in fitting. If `fixslope` is set to a single
            value, it overrides this parameter. If `fixslope` is set to a range,
            `init_slope` is adjusted up/down to be in that range.
        `no_curve_fit_first` (bool)
            Normally the method first tries to do the optimization with `curve_fit` from
            `scipy`, and if that fails then tries `scipy.optimize.minimize`. If you set
            this to `True`, it skips trying with `curve_fit`. In general, this option is
            for debugging by knowledgeable developers, and you should not be using it
            otherwise.

    Attributes:
        `cs` (numpy array)
            Concentrations, sorted from low to high.
        `fs` (numpy array)
            Fraction infectivity, ordered to match sorted concentrations.
        `fs_stderr` (numpy array or `None`)
            Standard errors on `fs`.
        `bottom` (float)
            Bottom of curve, :math:`b` in equation above.
        `top` (float)
            Top of curve, :math:`t` in equation above.
        `midpoint` (float)
            Midpoint of curve, :math:`m` in equation above. Note
            that the midpoint may **not** be the same as the :meth:`ic50`
            if :math:`t \ne 1` or :math:`b \ne 0`.
        `midpoint_bound` (float)
            Midpoint if it falls within fitted concentrations, otherwise
            the lowest concentration if below that or upper of above it.
        `midpoint_bound_type` (str)
            Is midpoint 'interpolated', or an 'upper' or 'lower' bound.
        `slope` (float)
            Hill slope of curve, :math:`s` in equation above.
        `r2` (float)
            Coefficient of determination indicating how well the curve fits the
            data (https://en.wikipedia.org/wiki/Coefficient_of_determination).
        `rmsd` (float)
            Root mean square deviation of fitted to actual values (square root of mean
            residual).
        `params_stdev` (dict or `None`)
            If standard deviations can be estimated on the fit
            parameters, keyed by 'bottom', 'top', 'midpoint',
            and 'slope' and gives standard erorr on each. Note if
            you have replicates we recommend fitting those separately
            and taking standard error rather than using fit stdev.

    Use the :meth:`ic50` method to get the fitted IC50.
    You can use :meth:`ic50_stdev` to get the estimated standard
    deviation on the IC50, although if you have multiple replicates
    you may be better off just fitting to each separately and
    then taking standard error of the individual IC50s.

    Here are some examples. First, we import the necessary modules:

    .. nbplot::

        >>> import numpy
        >>>
        >>> from neutcurve import HillCurve
        >>> from neutcurve.colorschemes import CBPALETTE

    Now simulate some data:

    .. nbplot::

        >>> m = 0.03
        >>> s = 1.9
        >>> b = 0.1
        >>> t = 1.0
        >>> cs = [0.002 * 2**x for x in range(9)]
        >>> fs = [HillCurve.evaluate(c, m, s, b, t) for c in cs]

    Now fit to these data, and confirm that the fitted values
    are close to the ones used for the simulation:

    .. nbplot::

        >>> neut = HillCurve(cs, fs, fixbottom=False)
        >>> numpy.allclose(neut.midpoint, m, atol=1e-4)
        True
        >>> neut.midpoint_bound == neut.midpoint
        True
        >>> neut.midpoint_bound_type
        'interpolated'
        >>> numpy.allclose(neut.slope, s, atol=1e-3)
        True
        >>> numpy.allclose(neut.top, t, atol=1e-4)
        True
        >>> numpy.allclose(neut.bottom, b, atol=1e-3)
        True
        >>> for key, val in neut.params_stdev.items():
        ...     print(f"{key} = {val:.2g}")
        midpoint = 0.062
        slope = 6.3
        top = 0
        bottom = 0.73
        >>> print(f"IC50: {neut.ic50():.3f} +/- {neut.ic50_stdev():.3f}")
        IC50: 0.034 +/- 0.070

    Since we fit the curve to simulated data where the bottom was
    0.1 rather than 0, the midpoint and IC50 are different. Specifically,
    the IC50 is larger than the midpoint as you have to go past the midpoint
    to get down to 0.5 fraction infectivity:

    .. nbplot::

        >>> neut.ic50() > neut.midpoint
        True
        >>> numpy.allclose(neut.ic50(), 0.0337, atol=1e-4)
        True
        >>> numpy.allclose(0.5, neut.fracinfectivity(neut.ic50()))
        True
        >>> neut.fracinfectivity(neut.midpoint) > 0.5
        True

    Now here is an example where we constrain both the top
    and the bottom (to 1 and 0, respectively) and fit
    the curve. Now the midpoint and IC50 are the same:

    .. nbplot::

        >>> b2 = 0
        >>> t2 = 1
        >>> fs2 = [HillCurve.evaluate(c, m, s, b2, t2) for c in cs]
        >>> neut2 = HillCurve(cs, fs2)
        >>> numpy.allclose(neut2.midpoint, m)
        True
        >>> numpy.allclose(neut2.ic50(), m)
        True

    Now let's fit to concentrations that are all **less**
    than the midpoint, so that we never get to complete neutralization.
    The estimated IC50 is unreliable, and so will be returned as `None` unless
    we call :meth:`ic50` with `method` set to 'bound':

    .. nbplot::

        >>> cs3 = [1e-4 * 2**x for x in range(7)]
        >>> (cs3[-1] < m)
        True
        >>> fs3 = [HillCurve.evaluate(c, m, s, b2, t2) for c in cs3]
        >>> neut3 = HillCurve(cs3, fs3)
        >>> neut3.ic50() is None
        True
        >>> numpy.allclose(neut3.ic50(method='bound'), cs3[-1])
        True
        >>> neut3.midpoint_bound == cs3[-1]
        True
        >>> neut3.midpoint_bound_type
        'lower'

    Note that we can determine if the IC50 is interpolated or an upper
    or lower bound using :meth:`ic50_bound`, and get a nice string
    using :meth:`ic50_str`:

    .. nbplot::

        >>> neut.ic50_bound()
        'interpolated'
        >>> neut3.ic50_bound()
        'lower'
        >>> neut.ic50_str()
        '0.0337'
        >>> neut3.ic50_str()
        '>0.0064'

    We can use the :meth:`dataframe` method to get the measured
    data and fit data at selected points. First, we do this
    just at measured points:

    .. nbplot::

        >>> neut.dataframe('measured').round(3)
           concentration  measurement    fit  stderr
        0          0.002        0.995  0.995     NaN
        1          0.004        0.981  0.981     NaN
        2          0.008        0.932  0.932     NaN
        3          0.016        0.791  0.791     NaN
        4          0.032        0.522  0.522     NaN
        5          0.064        0.272  0.272     NaN
        6          0.128        0.154  0.154     NaN
        7          0.256        0.115  0.115     NaN
        8          0.512        0.104  0.104     NaN

    Then we add in one more point:

    .. nbplot::

        >>> neut.dataframe([0.6]).round(3)
           concentration  measurement    fit  stderr
        0          0.002        0.995  0.995     NaN
        1          0.004        0.981  0.981     NaN
        2          0.008        0.932  0.932     NaN
        3          0.016        0.791  0.791     NaN
        4          0.032        0.522  0.522     NaN
        5          0.064        0.272  0.272     NaN
        6          0.128        0.154  0.154     NaN
        7          0.256        0.115  0.115     NaN
        8          0.512        0.104  0.104     NaN
        9          0.600          NaN  0.103     NaN

    In reality, you'd typically just call :meth:`dataframe` with
    the default argument of 'auto' to get a good range to plot.
    This is done if we call the :meth:`HillCurve.plot` method:

    .. nbplot::

        >>> fig, ax = neut.plot()

    Finally, we confirm that we get the same result regardless
    of whether we fit using the concentrations in linear or
    log space:

    .. nbplot::

        >>> neut_linear = HillCurve(cs, fs, fitlogc=False, fixbottom=False)
        >>> all(numpy.allclose(getattr(neut, attr), getattr(neut_linear, attr))
        ...     for attr in ['top', 'bottom', 'slope', 'midpoint'])
        True

    Demonstrate :meth:`HillCurve.icXX`:

    >>> neut.icXX(0.95) is None
    True
    >>> neut.icXX_str(0.95)
    '>0.512'
    >>> neut.icXX_bound(0.95)
    'lower'
    >>> numpy.allclose(neut.icXX(0.95, method='bound'), neut.cs[-1])
    True
    >>> '{:.4f}'.format(neut.icXX(0.8), 4)
    '0.0896'
    >>> numpy.allclose(0.2, neut.fracinfectivity(neut.icXX(0.8)))
    True

    We can quantify the goodness of fit with :attr:`HillCurve.r2`. For these simulated
    data the fit is perfect (coefficient of determination of 1):

    >>> round(neut.r2, 3)
    1.0

    We can also quantify the goodness of fit with :attr:`HillCurve.rmsd`:
    >>> round(neut.rmsd, 3)
    0.0

    Now fit with bounds on the parameters. First, we make bounds cover the true values:

    >>> neut_bounds_cover = HillCurve(
    ...     cs, fs, fixbottom=(0, 0.2), fixtop=(0.9, 1), fixslope=(1, 2),
    ... )
    >>> numpy.allclose(neut_bounds_cover.midpoint, m, atol=1e-4)
    True
    >>> numpy.allclose(neut_bounds_cover.slope, s, atol=1e-3)
    True
    >>> numpy.allclose(neut_bounds_cover.top, t, atol=1e-4)
    True
    >>> numpy.allclose(neut_bounds_cover.bottom, b, atol=1e-3)
    True
    >>> round(neut_bounds_cover.r2, 3)
    1.0
    >>> round(neut_bounds_cover.rmsd, 3)
    0.0

    Next fit with bounds that do not cover the true parameters:
    >>> neut_bounds_nocover = HillCurve(
    ...     cs, fs, fixbottom=(0, 0.05), fixtop=(0.9, 0.95), fixslope=(1, 1.5),
    ... )
    >>> round(neut_bounds_nocover.midpoint, 2)
    0.04
    >>> round(neut_bounds_nocover.slope, 2)
    1.5
    >>> round(neut_bounds_nocover.top, 2)
    0.95
    >>> round(neut_bounds_nocover.bottom, 2)
    0.05
    >>> round(neut_bounds_nocover.r2, 2)
    0.99
    >>> round(neut_bounds_nocover.rmsd, 3)
    0.045

    Now fit with `infectivity_or_neutralized='neutralized'`, which is useful
    when the signal **increases** rather than decreases with increasing
    concentration (as would be the case if measuring fraction bound rather
    than fraction infectivity).

    .. nbplot::

       >>> neut_opp = HillCurve(cs, [1 - f for f in fs],
       ...                      fixtop=False, fixbottom=False,
       ...                      infectivity_or_neutralized='neutralized')
       >>> numpy.allclose(neut_opp.top, 0.9, atol=1e-3)
       True
       >>> numpy.allclose(neut_opp.bottom, 0, atol=1e-3)
       True
       >>> numpy.allclose(neut_opp.midpoint, m)
       True
       >>> neut_opp.ic50() < neut_opp.midpoint
       True
       >>> fig, ax = neut_opp.plot(ylabel='fraction neutralized')

    For internal testing purposes, try with `no_curve_fit_first=False`.

    >>> neut_ncf = HillCurve(cs, fs, fixbottom=False, no_curve_fit_first=True)
    >>> numpy.allclose(neut_ncf.midpoint, m, atol=1e-4)
    True
    >>> numpy.allclose(neut_ncf.slope, s, atol=5e-3)
    True
    >>> numpy.allclose(neut_ncf.top, t, atol=1e-4)
    True
    >>> numpy.allclose(neut_ncf.bottom, b, atol=1e-3)
    True

    >>> neut_bounds_cover_ncf = HillCurve(
    ...     cs,
    ...     fs,
    ...     fixbottom=(0, 0.2),
    ...     fixtop=(0.9, 1),
    ...     fixslope=(1, 2),
    ...     no_curve_fit_first=True,
    ... )
    >>> numpy.allclose(neut_bounds_cover_ncf.midpoint, m, atol=1e-4)
    True
    >>> numpy.allclose(neut_bounds_cover_ncf.slope, s, atol=1e-3)
    True
    >>> numpy.allclose(neut_bounds_cover_ncf.top, t, atol=1e-4)
    True
    >>> numpy.allclose(neut_bounds_cover_ncf.bottom, b, atol=1e-3)
    True
    >>> round(neut_bounds_cover_ncf.r2, 3)
    1.0

    >>> neut_bounds_nocover_ncf = HillCurve(
    ...     cs,
    ...     fs,
    ...     fixbottom=(0, 0.05),
    ...     fixtop=(0.9, 0.95),
    ...     fixslope=(1, 1.5),
    ...     no_curve_fit_first=True,
    ... )
    >>> round(neut_bounds_nocover_ncf.midpoint, 2)
    0.04
    >>> round(neut_bounds_nocover_ncf.slope, 2)
    1.5
    >>> round(neut_bounds_nocover_ncf.top, 2)
    0.95
    >>> round(neut_bounds_nocover_ncf.bottom, 2)
    0.05
    >>> round(neut_bounds_nocover_ncf.r2, 2)
    0.99

    """

    def __init__(
        self,
        cs,
        fs,
        *,
        infectivity_or_neutralized="infectivity",
        fix_slope_first=True,
        fs_stderr=None,
        fixbottom=0,
        fixtop=1,
        fixslope=False,
        fitlogc=False,
        use_stderr_for_fit=False,
        init_slope=1.5,
        no_curve_fit_first=False,
    ):
        """See main class docstring."""
        # get data into arrays sorted by concentration
        self.cs = numpy.array(cs)
        self.fs = numpy.array(fs)
        if fs_stderr is not None:
            self.fs_stderr = numpy.array(fs_stderr)
            self.fs_stderr = self.fs_stderr[self.cs.argsort()]
        else:
            self.fs_stderr = None
        self.fs = self.fs[self.cs.argsort()]
        self.cs = self.cs[self.cs.argsort()]

        if infectivity_or_neutralized == "infectivity":
            if self.fs[0] < self.fs[-1] and (self.fs[0] < 0.3 and self.fs[-1] > 0.7):
                warnings.warn(
                    "`f` increases with concentration, consider "
                    '`infectivity_or_neutralized="neutralized"'
                )
        elif infectivity_or_neutralized == "neutralized":
            if self.fs[0] > self.fs[-1] and (self.fs[0] > 0.7 and self.fs[-1] < 0.3):
                warnings.warn(
                    "`f` decreases with concentration, consider "
                    '`infectivity_or_neutralized="infectivity"'
                )
        else:
            raise ValueError(f"invalid {infectivity_or_neutralized=}")
        self._infectivity_or_neutralized = infectivity_or_neutralized

        if any(self.cs <= 0):
            raise ValueError(f"concentrations must all be > 0\n{self.cs=}")

        # create initial guess of `(midpoint, slope, bottom, top)`
        # make initial guess for top
        if fixtop is False:
            top = max(1, self.fs.max())
        elif isinstance(fixtop, (int, float)):
            fixtop = float(fixtop)
            top = fixtop
        elif hasattr(fixtop, "__len__") and len(fixtop) == 2:
            fixtop = (fixtop[0], fixtop[1])
            if fixtop[1] <= fixtop[0]:
                raise ValueError(f"invalid {fixtop=}: first element must be < second")
            top = max(fixtop[0], min(max(1, self.fs.max()), fixtop[1]))
            assert fixtop[0] <= top <= fixtop[1]
        else:
            raise ValueError(f"{fixtop=} is not `False`, a number, or length-2 array")
        # make initial guess for bottom
        if fixbottom is False:
            bottom = min(0, self.fs.min())
        elif isinstance(fixbottom, (int, float)):
            fixbottom = float(fixbottom)
            bottom = fixbottom
        elif hasattr(fixbottom, "__len__") and len(fixbottom) == 2:
            fixbottom = (fixbottom[0], fixbottom[1])
            if fixbottom[1] <= fixbottom[0]:
                raise ValueError(
                    f"invalid {fixbottom=}: first element must be < second"
                )
            bottom = max(fixbottom[0], min(min(0, self.fs.min()), fixbottom[1]))
            assert fixbottom[0] <= bottom <= fixbottom[1]
        else:
            raise ValueError(
                f"{fixbottom=} is not `False`, a number, or length-2 array"
            )
        # make initial guess for midpoint
        # if midpoint guess outside range, guess outside range by amount
        # equal to spacing of last two points
        midval = (top - bottom) / 2.0
        if (self.fs > midval).all():
            midpoint = {
                "infectivity": self.cs[-1] ** 2 / self.cs[-2],
                "neutralized": self.cs[0] / (self.cs[-1] / self.cs[-2]),
            }[self._infectivity_or_neutralized]
        elif (self.fs <= midval).all():
            midpoint = {
                "neutralized": self.cs[-1] ** 2 / self.cs[-2],
                "infectivity": self.cs[0] / (self.cs[-1] / self.cs[-2]),
            }[self._infectivity_or_neutralized]
        else:
            # get first index where f crosses midpoint
            i = numpy.argmax((self.fs > midval)[:-1] != (self.fs > midval)[1:])
            assert (self.fs[i] > midval) != (self.fs[i + 1] > midval)
            midpoint = (self.cs[i] + self.cs[i + 1]) / 2.0
        # adjust initial slope if inconsistent with `fixslope`
        if fixslope is False:
            slope = float(init_slope)
        elif isinstance(fixslope, (int, float)):
            fixslope = float(fixslope)
            slope = fixslope
        elif hasattr(fixslope, "__len__") and len(fixslope) == 2:
            fixslope = (fixslope[0], fixslope[1])
            if fixslope[1] <= fixslope[0]:
                raise ValueError(f"invalid {fixslope=}: first element must be < second")
            slope = max(fixslope[0], min(init_slope, fixslope[1]))
            assert fixslope[0] <= slope <= fixslope[1]

        init_tup = (midpoint, slope, bottom, top)

        # first try to fit using curve_fit
        try:
            if no_curve_fit_first:
                raise RuntimeError("skipping curve_fit")
            if fix_slope_first:
                fix_first_init_tup, _ = self._fit_curve(
                    fixtop=fixtop,
                    fixbottom=fixbottom,
                    fitlogc=fitlogc,
                    use_stderr_for_fit=use_stderr_for_fit,
                    init_tup=init_tup,
                    fixslope=slope,
                )
            else:
                fix_first_init_tup = init_tup
            fit_tup, self.params_stdev = self._fit_curve(
                fixtop=fixtop,
                fixbottom=fixbottom,
                fitlogc=fitlogc,
                use_stderr_for_fit=use_stderr_for_fit,
                init_tup=fix_first_init_tup,
                fixslope=fixslope,
            )
        # A RuntimeError is raised by scipy if curve_fit fails:
        # https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html
        except RuntimeError:
            # curve_fit failed, try using minimize
            for method in ["TNC", "L-BFGS-B", "SLSQP", "Powell"]:
                if fix_slope_first:
                    fix_first_init_tup = self._minimize_fit(
                        fixtop=fixtop,
                        fixbottom=fixbottom,
                        fitlogc=fitlogc,
                        use_stderr_for_fit=use_stderr_for_fit,
                        method=method,
                        init_tup=init_tup,
                        fixslope=slope,
                    )
                    if fix_first_init_tup is False:
                        continue
                else:
                    fix_first_init_tup = init_tup
                fit_tup = self._minimize_fit(
                    fixtop=fixtop,
                    fixbottom=fixbottom,
                    fitlogc=fitlogc,
                    use_stderr_for_fit=use_stderr_for_fit,
                    method=method,
                    init_tup=fix_first_init_tup,
                    fixslope=fixslope,
                )
                self.params_stdev = None  # can't estimate errors
                if fit_tup is not False:
                    break
            else:
                raise HillCurveFittingError(f"fit failed:\n{self.cs=}\n{self.fs=}")

        for i, param in enumerate(["midpoint", "slope", "bottom", "top"]):
            setattr(self, param, float(fit_tup[i]))
        # debugging check
        for name, fixval, val in [
            ("slope", fixslope, self.slope),
            ("top", fixtop, self.top),
            ("bottom", fixbottom, self.bottom),
        ]:
            if not (
                (fixval is False)
                or (isinstance(fixval, float) and numpy.allclose(fixval, val))
                or (isinstance(fixval, tuple) and (fixval[0] <= val <= fixval[1]))
            ):
                raise ValueError(f"fix{name}={fixval}, but final {name}={val}")

        if self.cs[0] <= self.midpoint <= self.cs[-1]:
            self.midpoint_bound = self.midpoint
            self.midpoint_bound_type = "interpolated"
        elif self.midpoint < self.cs[0]:
            self.midpoint_bound = float(self.cs[0])
            self.midpoint_bound_type = "upper"
        else:
            assert self.midpoint > self.cs[-1]
            self.midpoint_bound = float(self.cs[-1])
            self.midpoint_bound_type = "lower"

        # compute coefficient of determination
        # https://en.wikipedia.org/wiki/Coefficient_of_determination
        assert len(self.cs) == len(self.fs)
        sstot = float(((self.fs - numpy.average(self.fs)) ** 2).sum())
        ssres = float(
            (
                (numpy.array([self.fracinfectivity(c) for c in self.cs]) - self.fs) ** 2
            ).sum()
        )
        if sstot == 0:
            if ssres == 0:
                self.r2 = 1.0
            else:
                self.r2 = 0.0
        else:
            self.r2 = 1.0 - ssres / sstot

        # compute rmsd
        self.rmsd = math.sqrt(ssres / len(self.cs))

    def _fit_curve(
        self,
        *,
        fixtop,
        fixbottom,
        fitlogc,
        use_stderr_for_fit,
        init_tup,
        fixslope,
    ):
        """curve_fit, return `(midpoint, slope, bottom, top), params_stdev`."""

        midpoint, slope, bottom, top = init_tup

        # set up function and initial guesses
        if fitlogc:
            evalfunc = self._evaluate_log
            xdata = numpy.log(self.cs)
            midpoint = numpy.log(midpoint)
        else:
            evalfunc = self.evaluate
            xdata = self.cs

        top_bounds = (-numpy.inf, numpy.inf) if (fixtop is False) else fixtop
        bottom_bounds = (-numpy.inf, numpy.inf) if (fixbottom is False) else fixbottom
        slope_bounds = (-numpy.inf, numpy.inf) if (fixslope is False) else fixslope

        if isinstance(fixslope, float):
            assert fixslope == slope
            if (not isinstance(fixtop, float)) and (not isinstance(fixbottom, float)):
                initguess = [midpoint, bottom, top]
                # https://stackoverflow.com/a/8081580
                bounds = tuple(
                    zip(*[(-numpy.inf, numpy.inf), bottom_bounds, top_bounds])
                )

                def func(c, m, b, t):
                    return evalfunc(c, m, slope, b, t, self._infectivity_or_neutralized)

            elif not isinstance(fixtop, float):
                assert isinstance(fixbottom, float)
                initguess = [midpoint, top]
                bounds = tuple(zip(*[(-numpy.inf, numpy.inf), top_bounds]))

                def func(c, m, t):
                    return evalfunc(
                        c,
                        m,
                        slope,
                        bottom,
                        t,
                        self._infectivity_or_neutralized,
                    )

            elif not isinstance(fixbottom, float):
                assert isinstance(fixtop, float)
                initguess = [midpoint, bottom]
                bounds = tuple(zip(*[(-numpy.inf, numpy.inf), bottom_bounds]))

                def func(c, m, b):
                    return evalfunc(
                        c,
                        m,
                        slope,
                        b,
                        top,
                        self._infectivity_or_neutralized,
                    )

            else:
                assert isinstance(fixtop, float) and isinstance(fixbottom, float)
                initguess = [midpoint]
                bounds = ([-numpy.inf], [numpy.inf])

                def func(c, m):
                    return evalfunc(
                        c,
                        m,
                        slope,
                        bottom,
                        top,
                        self._infectivity_or_neutralized,
                    )

        else:
            assert (fixslope is False) or (
                isinstance(fixslope, tuple) and len(fixslope) == 2
            )
            if (not isinstance(fixtop, float)) and (not isinstance(fixbottom, float)):
                initguess = [midpoint, slope, bottom, top]
                bounds = tuple(
                    zip(
                        *[
                            (-numpy.inf, numpy.inf),
                            slope_bounds,
                            bottom_bounds,
                            top_bounds,
                        ]
                    )
                )

                def func(c, m, s, b, t):
                    return evalfunc(c, m, s, b, t, self._infectivity_or_neutralized)

            elif not isinstance(fixtop, float):
                assert isinstance(fixbottom, float)
                initguess = [midpoint, slope, top]
                bounds = tuple(
                    zip(*[(-numpy.inf, numpy.inf), slope_bounds, top_bounds])
                )

                def func(c, m, s, t):
                    return evalfunc(
                        c, m, s, bottom, t, self._infectivity_or_neutralized
                    )

            elif not isinstance(fixbottom, float):
                assert isinstance(fixtop, float)
                initguess = [midpoint, slope, bottom]
                bounds = tuple(
                    zip(
                        *[
                            (-numpy.inf, numpy.inf),
                            slope_bounds,
                            bottom_bounds,
                        ]
                    )
                )

                def func(c, m, s, b):
                    return evalfunc(c, m, s, b, top, self._infectivity_or_neutralized)

            else:
                assert isinstance(fixtop, float) and isinstance(fixbottom, float)
                initguess = [midpoint, slope]
                bounds = tuple(zip(*[(-numpy.inf, numpy.inf), slope_bounds]))

                def func(c, m, s):
                    return evalfunc(
                        c,
                        m,
                        s,
                        bottom,
                        top,
                        self._infectivity_or_neutralized,
                    )

        assert len(initguess) == len(bounds[0]) == len(bounds[1])
        assert all(lb < ub for (lb, ub) in zip(bounds[0], bounds[1])), bounds
        (popt, pcov) = scipy.optimize.curve_fit(
            f=func,
            xdata=xdata,
            ydata=self.fs,
            p0=initguess,
            sigma=self.fs_stderr if use_stderr_for_fit else None,
            absolute_sigma=True,
            bounds=bounds,
            maxfev=1000,
        )

        perr = numpy.sqrt(numpy.diag(pcov))

        if fitlogc:
            midpoint = numpy.exp(midpoint)

        midpoint = popt[0]
        if isinstance(fixslope, float):
            params_stderr = {"midpoint": perr[0], "slope": 0, "top": 0, "bottom": 0}
            if (not isinstance(fixbottom, float)) and (not isinstance(fixtop, float)):
                bottom = popt[1]
                params_stderr["bottom"] = perr[1]
                top = popt[2]
                params_stderr["top"] = perr[2]
            elif not isinstance(fixbottom, float):
                bottom = popt[1]
                params_stderr["bottom"] = perr[1]
            elif not isinstance(fixtop, float):
                top = popt[1]
                params_stderr["top"] = perr[1]
        else:
            slope = popt[1]
            params_stderr = {
                "midpoint": perr[0],
                "slope": perr[1],
                "top": 0,
                "bottom": 0,
            }
            if (not isinstance(fixbottom, float)) and (not isinstance(fixtop, float)):
                bottom = popt[2]
                params_stderr["bottom"] = perr[2]
                top = popt[3]
                params_stderr["top"] = perr[3]
            elif not isinstance(fixbottom, float):
                bottom = popt[2]
                params_stderr["bottom"] = perr[2]
            elif not isinstance(fixtop, float):
                top = popt[2]
                params_stderr["top"] = perr[2]

        return (midpoint, slope, bottom, top), params_stderr

    def _minimize_fit(
        self,
        *,
        fixtop,
        fixbottom,
        fitlogc,
        use_stderr_for_fit,
        method,
        init_tup,
        fixslope,
    ):
        """Return `(midpoint, slope, bottom, top)` if succeeds or `False` if fails."""

        midpoint, slope, bottom, top = init_tup

        # set up function and initial guesses
        if fitlogc:
            evalfunc = self._evaluate_log
            xdata = numpy.log(self.cs)
            midpoint = numpy.log(midpoint)
            bounds = [(None, None)]
        else:
            evalfunc = self.evaluate
            xdata = self.cs
            bounds = [(0, None)]

        top_bounds = (None, None) if (fixtop is False) else fixtop
        bottom_bounds = (None, None) if (fixbottom is False) else fixbottom
        slope_bounds = (None, None) if (fixslope is False) else fixslope

        if isinstance(fixslope, float):
            if (not isinstance(fixtop, float)) and (not isinstance(fixbottom, float)):
                initguess = [midpoint, bottom, top]
                bounds = bounds + [bottom_bounds, top_bounds]

                def func(c, m, b, t):
                    return evalfunc(c, m, slope, b, t, self._infectivity_or_neutralized)

            elif not isinstance(fixtop, float):
                assert isinstance(fixbottom, float)
                initguess = [midpoint, top]
                bounds.append(top_bounds)

                def func(c, m, t):
                    return evalfunc(
                        c,
                        m,
                        slope,
                        bottom,
                        t,
                        self._infectivity_or_neutralized,
                    )

            elif not isinstance(fixbottom, float):
                assert isinstance(fixtop, float)
                initguess = [midpoint, bottom]
                bounds.append(bottom_bounds)

                def func(c, m, b):
                    return evalfunc(
                        c,
                        m,
                        slope,
                        b,
                        top,
                        self._infectivity_or_neutralized,
                    )

            else:
                assert isinstance(fixtop, float) and isinstance(fixbottom, float)
                initguess = [midpoint]

                def func(c, m):
                    return evalfunc(
                        c,
                        m,
                        slope,
                        bottom,
                        top,
                        self._infectivity_or_neutralized,
                    )

        else:
            assert isinstance(fixslope, tuple) or (fixslope is False)
            bounds.append(slope_bounds)

            if (not isinstance(fixtop, float)) and (not isinstance(fixbottom, float)):
                initguess = [midpoint, slope, bottom, top]
                bounds = bounds + [bottom_bounds, top_bounds]

                def func(c, m, s, b, t):
                    return evalfunc(c, m, s, b, t, self._infectivity_or_neutralized)

            elif not isinstance(fixtop, float):
                assert isinstance(fixbottom, float)
                initguess = [midpoint, slope, top]
                bounds.append(top_bounds)

                def func(c, m, s, t):
                    return evalfunc(
                        c, m, s, bottom, t, self._infectivity_or_neutralized
                    )

            elif not isinstance(fixbottom, float):
                assert isinstance(fixtop, float)
                initguess = [midpoint, slope, bottom]
                bounds.append(bottom_bounds)

                def func(c, m, s, b):
                    return evalfunc(c, m, s, b, top, self._infectivity_or_neutralized)

            else:
                assert isinstance(fixtop, float) and isinstance(fixbottom, float)
                initguess = [midpoint, slope]

                def func(c, m, s):
                    return evalfunc(
                        c,
                        m,
                        s,
                        bottom,
                        top,
                        self._infectivity_or_neutralized,
                    )

        def min_func(p):
            """Evaluate to zero when perfect fit."""
            if (use_stderr_for_fit is None) or (self.fs_stderr is None):
                return sum((func(xdata, *p) - self.fs) ** 2)
            else:
                return sum((func(xdata, *p) - self.fs / self.fs_stderr) ** 2)

        initguess = numpy.array(initguess, dtype="float")
        assert len(initguess) == len(bounds)
        res = scipy.optimize.minimize(min_func, initguess, bounds=bounds, method=method)

        if not res.success:
            return False

        if fitlogc:
            midpoint = numpy.exp(midpoint)

        midpoint = res.x[0]
        if isinstance(fixslope, float):
            if (not isinstance(fixbottom, float)) and (not isinstance(fixtop, float)):
                bottom = res.x[1]
                top = res.x[2]
            elif not isinstance(fixbottom, float):
                bottom = res.x[1]
            elif not isinstance(fixtop, float):
                top = res.x[1]
        else:
            assert (fixslope is False) or isinstance(fixslope, tuple)
            slope = res.x[1]
            if (not isinstance(fixbottom, float)) and (not isinstance(fixtop, float)):
                bottom = res.x[2]
                top = res.x[3]
            elif not isinstance(fixbottom, float):
                bottom = res.x[2]
            elif not isinstance(fixtop, float):
                top = res.x[2]

        return (midpoint, slope, bottom, top)


[docs]
    def icXX(self, fracneut, *, method="interpolate"):
        """Generalizes :meth:`HillCurve.ic50` to arbitrary frac neutralized.

        For instance, set `fracneut` to 0.95 if you want the IC95, the
        concentration where 95% is neutralized.

        Args:
            `fracneut` (float)
                Compute concentration at which `fracneut` of the virus
                is expected to be neutralized. Note that this is the
                expected fraction **neutralized**, not the fraction
                infectivity.
            `method` (str)
                Can have following values:

                  - 'interpolate': only return a number for ICXX if it
                    is in range of concentrations, otherwise return `None`.

                  - 'bound': if ICXX is out of range of concentrations,
                    return upper or lower measured concentration depending
                    on if ICXX is above or below range of concentrations.
                    Assumes infectivity decreases with concentration.

        Returns:
            Number giving ICXX or `None` (depending on value of `method`).

        """
        fracinf = 1 - fracneut
        if self.top < fracinf and self.bottom < fracinf:
            bound = "lower"
        elif self.top >= fracinf and self.bottom >= fracinf:
            bound = "upper"
        else:
            icXX = self.midpoint * ((self.top - fracinf) / (fracinf - self.bottom)) ** (
                1.0 / self.slope
            )
            if self.cs[0] <= icXX <= self.cs[-1]:
                return icXX
            elif icXX < self.cs[0]:
                bound = "lower"
            else:
                bound = "upper"

        if method == "bound":
            if bound == "upper":
                return self.cs[-1]
            elif bound == "lower":
                return self.cs[0]
            else:
                raise ValueError(f"invalid {bound=}")
        elif method == "interpolate":
            return None
        else:
            raise ValueError(f"invalid {method=}")



[docs]
    def ic50(self, method="interpolate"):
        r"""IC50 value.

        Concentration where infectivity remaining is 0.5. Equals
        `midpoint` if and only if `top = 1` and `bottom = 0`. Calculated
        from :math:`0.5 = b + \frac{t - b}{1 + (ic50/m)^s}`, which solves to
        :math:`ic50 = m \times \left(\frac{t - 0.5}{0.5 - b}\right)^{1/s}`

        Args:
            `method` (str)
                Can have following values:

                  - 'interpolate': only return a number for IC50 if it
                    is in range of concentrations, otherwise return `None`.

                  - 'bound': if IC50 is out of range of concentrations,
                    return upper or lower measured concentration depending
                    on if IC50 is above or below range of concentrations.
                    Assumes infectivity decreases with concentration.

        Returns:
            Number giving IC50 or `None` (depending on value of `method`).

        """
        return self.icXX(0.5, method=method)



[docs]
    def ic50_stdev(self):
        r"""Get standard deviation of fit IC50 parameter.

        Calculated just from estimated standard deviation on `midpoint`.
        Note if you have replicates, we recommend fitting separately
        and calculating standard error from those fits rather than
        using this value.

        Returns:
            A number giving the standard deviation, or `None` if cannot
            be estimated or if IC50 is at bound.

        """
        ic50 = self.ic50()
        if ic50 is None:
            return None
        midpoint_stdev = self.params_stdev["midpoint"]
        if midpoint_stdev is None:
            return None
        else:
            return midpoint_stdev * ic50 / self.midpoint



[docs]
    def icXX_bound(self, fracneut):
        """Like :meth:`HillCurve.ic50_bound` for arbitrary frac neutralized."""
        if self.icXX(fracneut, method="interpolate") is not None:
            return "interpolated"
        else:
            icXX = self.icXX(fracneut, method="bound")
            if icXX == self.cs[0]:
                return "upper"
            elif icXX == self.cs[-1]:
                return "lower"
            else:
                raise ValueError(f"icXX not bound for {fracneut=}")



[docs]
    def ic50_bound(self):
        """Is IC50 'interpolated', or an 'upper' or 'lower' bound."""
        return self.icXX_bound(0.5)



[docs]
    def icXX_str(self, fracneut, *, precision=3):
        """Like :meth:`HillCurve.ic50_str` for arbitrary frac neutralized."""
        icXX = f"{{:.{precision}g}}".format(self.icXX(fracneut, method="bound"))
        prefix = {"interpolated": "", "upper": "<", "lower": ">"}[
            self.icXX_bound(fracneut)
        ]
        return f"{prefix}{icXX}"



[docs]
    def ic50_str(self, precision=3):
        """IC50 as string indicating upper / lower bounds with > or <.

        Args:
            Number of significant digits in returned string.

        """
        return self.icXX_str(0.5, precision=precision)



[docs]
    def fracinfectivity(self, c):
        """Fraction infectivity at `c` for fitted parameters."""
        return self.evaluate(
            c,
            self.midpoint,
            self.slope,
            self.bottom,
            self.top,
            self._infectivity_or_neutralized,
        )



[docs]
    @staticmethod
    def evaluate(c, m, s, b, t, infectivity_or_neutralized="infectivity"):
        r""":math:`f\left(c\right) = b + \frac{t-b}{1+\left(c/m\right)^s}`.

        If `infectivity_or_neutralized` is 'neutralized' rather than
        'infectivity', instead return
        :math:`f\left(c\right) = t + \frac{b-t}{1+\left(c/m\right)^s}`.

        """
        if infectivity_or_neutralized == "infectivity":
            return b + (t - b) / (1 + (c / m) ** s)
        elif infectivity_or_neutralized == "neutralized":
            return t + (b - t) / (1 + (c / m) ** s)
        else:
            raise ValueError(f"invalid {infectivity_or_neutralized=}")


    @staticmethod
    def _evaluate_log(logc, logm, s, b, t, infectivity_or_neutralized="infectivity"):
        """Like :class:`HillCurve.evaluate` but on log concentration scale."""
        if infectivity_or_neutralized == "infectivity":
            return b + (t - b) / (1 + numpy.exp(s * (logc - logm)))
        elif infectivity_or_neutralized == "neutralized":
            return t + (b - t) / (1 + numpy.exp(s * (logc - logm)))
        else:
            raise ValueError(f"invalid {infectivity_or_neutralized=}")


[docs]
    def plot(
        self,
        *,
        concentrations="auto",
        ax=None,
        xlabel="concentration",
        ylabel="fraction infectivity",
        color="black",
        marker="o",
        markersize=6,
        linewidth=1,
        linestyle="-",
        yticklocs=None,
        draw_in_bounds=False,
    ):
        """Plot the neutralization curve.

        Args:
            `concentrations`
                Concentrations to plot, same meaning as for
                :meth:`HillCurve.dataframe`.
            `ax` (`None` or matplotlib axes.Axes object)
                Use to plot on an existing axis. If using an existing
                axis, do **not** re-scale the axis limits to the data.
            `xlabel` (str or `None`)
                Label for x-axis.
            `ylabel` (str or `None`)
                Label for y-axis.
            `color` (str)
                Color of line and point.
            `marker` (str)
                Marker shape: https://matplotlib.org/api/markers_api.html
            `markersize` (float)
                Size of point marker.
            `linewidth` (float)
                Width of line.
            `linestyle` (str)
                Line style.
            `yticklocs` (`None` or list)
                Exact locations to place yticks; `None` means auto-locate.
            `draw_in_bounds` (bool)
                By default, the plotted curve extends a bit outside the actual
                data points on both sides. If this is set to `True`, then the
                plotted curve stops at the bounds of the data points.

        Returns:
            The 2-tuple `(fig, ax)` giving the matplotlib figure and axis.

        """
        data = self.dataframe(concentrations, extend=0 if draw_in_bounds else 0.1)

        if ax is None:
            fig, ax = plt.subplots()
            fig.set_size_inches((4, 3))
            check_ybounds = True
            ylowerbound = -0.05
            yupperbound = 1.05
            ax.autoscale(True, "both")
        else:
            fig = ax.get_figure()
            ax.autoscale(False, "both")
            check_ybounds = False

        ax.plot(
            "concentration",
            "fit",
            data=data,
            linestyle=linestyle,
            linewidth=linewidth,
            color=color,
        )

        ax.errorbar(
            x="concentration",
            y="measurement",
            yerr="stderr",
            data=data,
            fmt=marker,
            color=color,
            markersize=markersize,
            capsize=markersize / 1.5,
        )

        ax.set_xscale("log")
        ax.set_xlabel(xlabel, fontsize=15)
        ax.set_ylabel(ylabel, fontsize=15)
        ax.tick_params("both", labelsize=12, length=5, width=1)
        ax.minorticks_off()
        if yticklocs is not None:
            ax.set_yticks(yticklocs)

        if check_ybounds:
            ymin, ymax = ax.get_ylim()
            ax.set_ylim(min(ymin, ylowerbound), max(ymax, yupperbound))

        return fig, ax



[docs]
    def dataframe(self, concentrations="auto", extend=0.1):
        """Get data frame with curve data for plotting.

        Useful if you want to get both the points and the fit
        curve to plot.

        Args:
            `concentrations` (array-like or 'auto' or 'measured')
                Concentrations for which we compute the fit values.
                If 'auto' the automatically computed from data
                range using :func:`concentrationRange`. If
                'measured' then only include measured values.
            `extend` (float)
                The value passed to :func:`concentrationRange` if `concentrations`
                is 'auto'.

        Returns:
            A pandas DataFrame with the following columns:

              - 'concentration': concentration
              - 'fit': curve fit value at this point
              - 'measurement': value of measurement at this point,
                or numpy.nan if no measurement here.
              - 'stderr': standard error of measurement if provided.

        """
        if concentrations == "auto":
            concentrations = concentrationRange(self.cs[0], self.cs[-1], extend=extend)
        elif concentrations == "measured":
            concentrations = []
        concentrations = numpy.concatenate([self.cs, concentrations])
        n = len(concentrations)

        points = numpy.concatenate([self.fs, numpy.full(n - len(self.fs), numpy.nan)])

        if self.fs_stderr is None:
            stderr = numpy.full(n, numpy.nan)
        else:
            stderr = numpy.concatenate(
                [self.fs_stderr, numpy.full(n - len(self.fs), numpy.nan)]
            )

        fit = numpy.array([self.fracinfectivity(c) for c in concentrations])

        return (
            pd.DataFrame.from_dict(
                collections.OrderedDict(
                    [
                        ("concentration", concentrations),
                        ("measurement", points),
                        ("fit", fit),
                        ("stderr", stderr),
                    ]
                )
            )
            .sort_values("concentration")
            .reset_index(drop=True)
        )





[docs]
def concentrationRange(bottom, top, npoints=200, extend=0.1):
    """Logarithmically spaced concentrations for plotting.

    Useful if you want to plot a curve by fitting values to densely
    sampled points and need the concentrations at which to compute
    these points.

    Args:
        `bottom` (float)
            Lowest concentration.
        `top` (float)
            Highest concentration.
        `npoints` (int)
            Number of points.
        `extend` (float)
            After transforming to log space, extend range of points
            by this much below and above `bottom` and `top`.

    Returns:
        A numpy array of `npoints` concentrations.

    >>> numpy.allclose(concentrationRange(0.1, 100, 10, extend=0),
    ...                [0.1, 0.22, 0.46, 1, 2.15, 4.64, 10, 21.54, 46.42, 100],
    ...                atol=1e-2)
    True
    >>> numpy.allclose(concentrationRange(0.1, 100, 10),
    ...                [0.05, 0.13, 0.32, 0.79, 2.00, 5.01,
    ...                 12.59, 31.62, 79.43, 199.53],
    ...                atol=1e-2)
    True

    """
    if top <= bottom:
        raise ValueError(f"{bottom=} must be less than {top=}")
    if bottom <= 0:
        raise ValueError("{bottom=} must be greater than zero")
    if extend < 0:
        raise ValueError("{extend=} must be >= 0")

    logbottom = math.log10(bottom)
    logtop = math.log10(top)
    logrange = logtop - logbottom
    assert logrange > 0
    bottom = logbottom - logrange * extend
    top = logtop + logrange * extend

    return numpy.logspace(bottom, top, npoints)



if __name__ == "__main__":
    import doctest

    doctest.testmod()
Source code for neutcurve.hillcurve

neutcurve

Navigation

Related Topics