Merge branch 'main' into typos

sherpa · May 7, 2024 · 2e01a42 · 2e01a42
2 parents 4709e7d + dffc72f
commit 2e01a42
Show file tree

Hide file tree

Showing 13 changed files with 688 additions and 1,003 deletions.
diff --git a/docs/overview/utils.rst b/docs/overview/utils.rst
@@ -20,7 +20,6 @@ The sherpa.utils module
       Knuth_close
       _guess_ampl_scale
       apache_muller
-      bisection
       bool_cast
       calc_ftest
       calc_mlr

diff --git a/sherpa/astro/fake.py b/sherpa/astro/fake.py
@@ -1,5 +1,6 @@
 #
-#  Copyright (C) 2021, 2023 MIT
+#  Copyright (C) 2021, 2023, 2024
+#  MIT
 #
 #
 #  This program is free software; you can redistribute it and/or modify
@@ -22,38 +23,31 @@
 `sherpa.astro.data.DataPHA` data objects.
 '''
 
-import numpy as np
-from sherpa.utils.random import poisson_noise
+from warnings import warn
+
 from sherpa.utils.err import ArgumentTypeErr, DataErr
-from sherpa.astro.background import get_response_for_pha
+from sherpa.utils.random import poisson_noise
+
 
 __all__ = ('fake_pha', )
 
 
 def fake_pha(data, model,
-             is_source=True, pileup_model=None,
-             add_bkgs=False, bkg_models=None, id=None,
-             method=None, rng=None):
+             is_source=None, pileup_model=None,
+             add_bkgs=None, bkg_models=None, id=None,
+             method=None, rng=None, include_bkg_data=False):
     """Simulate a PHA data set from a model.
 
-    This function replaces the counts in a PHA dataset with simulated counts
-    drawn from a model with Poisson noise. For the simulations, all the details
-    already set up in the PHA dataset will be used, including the exposure
-    time, one or more ARFs and RMFs, area and background scalings,
-    grouping, and data quality arrays.
-
-    Including a background component is optional; if requested, the background
-    will be a Poisson draw from the average of all backgrounds that have been
-    set for the input `sherpa.astro.data.DataPHA`. For each background
-    component, the method can use either the PHA distribution in that
-    background component or a model that is evaluated using the response
-    set for that background component.
-    The later case avoids adding extra noise from a Poisson draw from a
-    distribution that might already have very few counts in the first place.
-
-    The backgrounds itself are not changed by this function. To simulate
-    backgrounds as well as the source spectrum, call this function on the
-    source PHA dataset and the background PHA dataset(s) independently.
+    This function replaces the counts in a PHA dataset with simulated
+    counts drawn from a model with Poisson noise. For the simulations,
+    all the details already set up in the PHA dataset will be used,
+    including the exposure time, one or more ARFs and RMFs, area and
+    background scalings, grouping, and data quality arrays.
+
+    .. versionchanged:: 4.16.1
+       This routine has seen significant changes, and the is_source,
+       pileup_model, add_bkgs, bkg_models, and id arguments are
+       no-longer used. The include_bkg_data argument was added.
 
     .. versionchanged:: 4.16.0
        The method and rng parameters were added.
@@ -62,76 +56,158 @@ def fake_pha(data, model,
     ----------
     data : sherpa.astro.data.DataPHA
         The dataset (may be a background dataset).
-    model : sherpa.models.model.ArithmeticModel instance
-        The model that will be used for simulations.
-    is_source : bool
-        ``True`` means that the ``model`` does not contain response or
-        background components and that these need to be added based on the
-        ARF, RMF, and backgrounds set up for the data set. If ``False``, then
-        the ``model`` contains any components to describe the instrument
-        already.
-    pileup_model : None or `sherpa.astro.models.JDPileup` instance
-        Pileup Model for the source spectrum
-    add_bkgs : bool
-        If ``True`` backgrounds are added to the source counts.
-    bkg_srcs : dict
-        Keys in the dictionary need to be the background ids in the dataset
-        ``data``, and the values are the corresponding source models. For all
-        background datasets that are listed in this dictionary, the
-        background counts will be simulated based on the model, appropriately
-        scaled (for area etc.) and added to the source. The same ``is_source``
-        setting as for the source model applies here, i.e. if the source model
-        already contains the ARF and the RMF, then the background models should
-        do so, too. This setting has no effect if ``add_bkgs=False``.
-
-        For all background ids not listed in this dictionary, the
-        counts will be drawn from the PHA data of the background data set.
-    id : str
-        String with id number if called from UI layer. This is only used for
-        certain error messages.
+    model : sherpa.models.model.ArithmeticModel
+        The model that will be used for simulations. It must contain
+        any background components, appropriately scaled, and include
+        the relevant response.
+    include_bkg_data : bool, optional
+        Should the counts in the background datasets be included when
+        calculating the predicted signal? As background datasets are
+        often noisy it is generally better to include a model of the
+        background in the model argument.
     method : callable or None
         The routine used to simulate the data. If None (the default)
-        then sherpa.utils.poisson_noise is used, otherwise the function
-        must accept a single ndarray, returning a ndarray of the same shape,
-        and an optional rng argument.
+        then sherpa.utils.random.poisson_noise is used, otherwise the
+        function must accept a single ndarray and an optional rng
+        argument, returning a ndarray of the same shape as the input.
     rng : numpy.random.Generator, numpy.random.RandomState, or None, optional
         Determines how random numbers are created. If set to None then
         the routines from `numpy.random` are used, and so can be
         controlled by calling `numpy.random.seed`.
 
+    Notes
+    -----
+
+    The model must include the relevant response, and if a background
+    model is required then it must also be included. The model
+    predicted counts array is generated by evaluating the model and
+    then, if the dataset contains any background datasets and the
+    include_bkg_data flag is set, the background signal is added
+    after appropriate scaling between background and source apertures
+    and exposure times. This "model predicted counts" array is then
+    passed to the method argument to create the simulated data,
+    with the default method set to use a Poisson distribution to
+    simulate the data (the `poisson_noise` routine).
+
+    If simulated backgrounds are required then the backgrounds should
+    be simulated separately (as if they were source models but with no
+    associated backgrounds) and then added to the faked source
+    dataset.
+
+    This routine was significantly changed in Sherpa 4.16.1. To update
+    old code, note that the following arguments are no-longer used:
+    is_source, pileup_model, add_bkgs, bkg_models and id. The model
+    argument must now include any background model terms, and the
+    necessary scaling term for the conversion from the background to
+    source aperture (a combination of the add_bkgs and
+    bkg_models="model" arguments), and also include the response
+    necessary to convert to channels and counts (a combination of the
+    is_source and pileup_model arguments). Setting the bkg_models
+    argument to a PHA dataset is now handled by adding the background
+    to the data first, and then setting include_bkg_data to True.
+
     Examples
     --------
-        Estimate the signal from a 5000 second observation using the
-        ARF and RMF from "src.arf" and "src.rmf" respectively:
-
-        >>> set_source(1, xsphabs.gal * xsapec.clus)
-        >>> gal.nh = 0.12
-        >>> clus.kt, clus.abundanc = 4.5, 0.3
-        >>> clus.redshift = 0.187
-        >>> clus.norm = 1.2e-3
-        >>> fake_pha(1, 'src.arf', 'src.rmf', 5000)
-
-        Simulate a 1 mega second observation for the data and model
-        from the default data set. The simulated data will include an
-        estimated background component based on scaling the existing
-        background observations for the source. The simulated data
-        set, which has the same grouping as the default set, for
-        easier comparison, is created with the 'sim' label and then
-        written out to the file 'sim.pi':
-
-        >>> arf = get_arf()
-        >>> rmf = get_rmf()
-        >>> bkg = get_bkg()
-        >>> bscal = get_backscal()
-        >>> grp = get_grouping()
-        >>> qual = get_quality()
-        >>> texp = 1e6
-        >>> set_source('sim', get_source())
-        >>> fake_pha('sim', arf, rmf, texp, backscal=bscal, bkg=bkg,
-        ...          grouping=grp, quality=qual, grouped=True)
-        >>> save_pha('sim', 'sim.pi')
+
+    Simulate the data from an absorbed APEC model, where the response
+    is manually created (a "perfect" RMF and the ARF is flat, with a
+    break at 3.2 keV), and then use the Sherpa plot objects to display
+    the simulated data and model. Note that the exposure time must be
+    set (if the model normalization is set). First the imports:
+
+    >>> import numpy as np
+    >>> from sherpa.astro.data import DataPHA
+    >>> from sherpa.astro.instrument import Response1D, create_arf, create_delta_rmf
+    >>> from sherpa.astro.xspec import XSphabs, XSapec
+
+    The model used for the simulation is defined - in this case an
+    absorbed APEC model:
+
+    >>> gal = XSphabs()
+    >>> clus = XSapec()
+    >>> model = gal * clus
+    >>> gal.nh = 0.12
+    >>> clus.kt = 4.5
+    >>> clus.abundanc = 0.3
+    >>> clus.redshift = 0.23
+    >>> clus.norm = 6.3e-4
+
+    For this example a "fake" response is generated, using the
+    create_arf and create_delta_rmf routines to create an ARF with a
+    step in it (100 cm^2 below 3.2 keV and 50 cm^2 above it) together
+    with a "perfect" RMF (so there is no blurring of energies)
+    covering 1 to 5 keV. Normally the responses would be read from a
+    file:
+
+    >>> chans = np.arange(1, 101, dtype=np.int16)
+    >>> egrid = np.linspace(1, 5, 101)
+    >>> elo, ehi = egrid[:-1], egrid[1:]
+    >>> yarf = 100 * (elo < 3.2) + 50 * (elo >= 3.2)
+    >>> arf = create_arf(elo, ehi, yarf)
+    >>> rmf = create_delta_rmf(elo, ehi, e_min=elo, e_max=ehi)
+
+    In this case the DataPHA structure is also created manually,
+    but it can also be read in:
+
+    >>> pha = DataPHA("faked", chans, chans * 0)
+    >>> pha.set_response(arf=arf, rmf=rmf)
+    >>> pha.exposure = 50000
+
+    The "faked" data is created by applying a response model to the
+    model, and passing it to `fake_pha`:
+
+    >>> resp = Response1D(pha)
+    >>> full_model = resp(model)
+    >>> rng = numpy.random.default_rng()
+    >>> fake_pha(pha, full_model, rng=rng)
+
+    The following overlplots the model - used to simulate the data -
+    on the simulated data:
+
+    The new data can be displayed, comparing it to the model:
+
+    >>> pha.set_analysis("energy")
+    >>> from sherpa.astro.plot import DataPHAPlot, ModelPHAHistogram
+    >>> dplot = DataPHAPlot()
+    >>> dplot.prepare(pha)
+    >>> dplot.plot()
+    >>> mplot = ModelPHAHistogram()
+    >>> mplot.prepare(pha, full_model)
+    >>> mplot.overplot()
 
     """
+
+    # Warn users if they are using the old options.
+    #
+    if is_source is not None:
+        warn("is_source is no-longer used, the model "
+             "must contain a response",
+             category=DeprecationWarning)
+
+    if pileup_model is not None:
+        warn("pileup_model is no-longer used, the model "
+             "must contain any needed pileup model",
+             category=DeprecationWarning)
+
+    if add_bkgs is not None:
+        warn("add_bkgs is no-longer used, as the model "
+             "either contains a background term or the "
+             "background datasets are used",
+             category=DeprecationWarning)
+
+    if bkg_models is not None:
+        warn("bkg_models is no-longer used, as the model "
+             "must contain any background component",
+             category=DeprecationWarning)
+
+    if id is not None:
+        warn("id is no-longer used",
+             category=DeprecationWarning)
+
+    # The assumption is that the response matches the PHA, that is the
+    # number of channels matches. There is currently no explicit check
+    # of this.
+    #
     if len(data.response_ids) == 0:
         raise DataErr('normffake', data.name)
 
@@ -140,62 +216,21 @@ def fake_pha(data, model,
     elif not callable(method):
         raise ArgumentTypeErr("badarg", "method", "a callable")
 
-    if is_source:
-        model = get_response_for_pha(data, model, bkg_srcs={},
-                                     pileup_model=pileup_model, id=id)
-
-    # Get one RMF. Hopefully all of them have the same number of
-    # channels, but that sanity check should really be done elsewhere.
-    rmf0 = data.get_rmf(data.response_ids[0])
-    data.channel = np.arange(1, rmf0.detchans + 1)
-
-    # Calculate the source model, and take a "draw" based on
-    # the source model.  That becomes the simulated data.
-    data.counts = method(data.eval_model(model), rng=rng)
-
-    # Add in background counts:
-    #  -- Scale each background properly given data's
-    #     exposure time, BACKSCAL and AREASCAL
-    #  -- Take average of scaled backgrounds
-    #  -- Take a Poisson draw based on the average scaled background
-    #  -- Add that to the simulated data counts
+    # Evaluate the model. This assumes the model term contains a
+    # response and any scaled background components.
     #
-    # Adding background counts is OPTIONAL, only done if user sets
-    # "bkg" argument to fake_pha.  The reason is that the user could
-    # well set a "source" model that does include a background
-    # component.  In that case users should have the option to simulate
-    # WITHOUT background counts being added in.
-    #
-    if add_bkgs:
-        if bkg_models is None:
-            bkg_models = {}
-
-        nbkg = len(data.background_ids)
-        b = 0
+    model_prediction = data.eval_model(model)
+
+    if include_bkg_data:
+        # If there are background components then we sum up the data,
+        # after scaling to match the source data set, and use that as a
+        # source term. Note that get_background_scale will correct for
+        # exposure time (as units=counts), scaling factors, and the number
+        # of background components.
+        #
         for bkg_id in data.background_ids:
-            # we do (probably) want to filter and group the scale array
-            scale = data.get_background_scale(bkg_id)
-            if bkg_id in bkg_models:
-                bkg_pha = data.get_background(bkg_id)
-                bkg_model = bkg_models[bkg_id]
-                if is_source:
-                    bkg_model = get_response_for_pha(bkg_pha, bkg_model, id=id)
-
-                # Exposure in background could differ from exposure in
-                # source. But need to set here so eval_model works
-                # correctly.
-                # (At least I think that's how it works.)
-                orig_exposure = bkg_pha.exposure
-                bkg_pha.exposure = data.exposure
-                # No Poisson here because we make a Poisson draw
-                # later using the average of all backgrounds
-                cts = bkg_pha.eval_model(bkg_model)
-                bkg_pha.exposure = orig_exposure
-            else:
-                cts = data.get_background(bkg_id).counts
-            b += scale * cts
-
-        if nbkg > 0:
-            b = b / nbkg
-            b_poisson = method(b, rng=rng)
-            data.counts = data.counts + b_poisson
+            cts = data.get_background(bkg_id).counts
+            scale = data.get_background_scale(bkg_id, units="counts")
+            model_prediction += scale * cts
+
+    data.counts = method(model_prediction, rng=rng)