Skip to content

Commit

Permalink
Merge branch 'main' into typos
Browse files Browse the repository at this point in the history
  • Loading branch information
hamogu committed May 7, 2024
2 parents 4709e7d + dffc72f commit 2e01a42
Show file tree
Hide file tree
Showing 13 changed files with 688 additions and 1,003 deletions.
1 change: 0 additions & 1 deletion docs/overview/utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ The sherpa.utils module
Knuth_close
_guess_ampl_scale
apache_muller
bisection
bool_cast
calc_ftest
calc_mlr
Expand Down
317 changes: 176 additions & 141 deletions sherpa/astro/fake.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#
# Copyright (C) 2021, 2023 MIT
# Copyright (C) 2021, 2023, 2024
# MIT
#
#
# This program is free software; you can redistribute it and/or modify
Expand All @@ -22,38 +23,31 @@
`sherpa.astro.data.DataPHA` data objects.
'''

import numpy as np
from sherpa.utils.random import poisson_noise
from warnings import warn

from sherpa.utils.err import ArgumentTypeErr, DataErr
from sherpa.astro.background import get_response_for_pha
from sherpa.utils.random import poisson_noise


__all__ = ('fake_pha', )


def fake_pha(data, model,
is_source=True, pileup_model=None,
add_bkgs=False, bkg_models=None, id=None,
method=None, rng=None):
is_source=None, pileup_model=None,
add_bkgs=None, bkg_models=None, id=None,
method=None, rng=None, include_bkg_data=False):
"""Simulate a PHA data set from a model.
This function replaces the counts in a PHA dataset with simulated counts
drawn from a model with Poisson noise. For the simulations, all the details
already set up in the PHA dataset will be used, including the exposure
time, one or more ARFs and RMFs, area and background scalings,
grouping, and data quality arrays.
Including a background component is optional; if requested, the background
will be a Poisson draw from the average of all backgrounds that have been
set for the input `sherpa.astro.data.DataPHA`. For each background
component, the method can use either the PHA distribution in that
background component or a model that is evaluated using the response
set for that background component.
The later case avoids adding extra noise from a Poisson draw from a
distribution that might already have very few counts in the first place.
The backgrounds itself are not changed by this function. To simulate
backgrounds as well as the source spectrum, call this function on the
source PHA dataset and the background PHA dataset(s) independently.
This function replaces the counts in a PHA dataset with simulated
counts drawn from a model with Poisson noise. For the simulations,
all the details already set up in the PHA dataset will be used,
including the exposure time, one or more ARFs and RMFs, area and
background scalings, grouping, and data quality arrays.
.. versionchanged:: 4.16.1
This routine has seen significant changes, and the is_source,
pileup_model, add_bkgs, bkg_models, and id arguments are
no-longer used. The include_bkg_data argument was added.
.. versionchanged:: 4.16.0
The method and rng parameters were added.
Expand All @@ -62,76 +56,158 @@ def fake_pha(data, model,
----------
data : sherpa.astro.data.DataPHA
The dataset (may be a background dataset).
model : sherpa.models.model.ArithmeticModel instance
The model that will be used for simulations.
is_source : bool
``True`` means that the ``model`` does not contain response or
background components and that these need to be added based on the
ARF, RMF, and backgrounds set up for the data set. If ``False``, then
the ``model`` contains any components to describe the instrument
already.
pileup_model : None or `sherpa.astro.models.JDPileup` instance
Pileup Model for the source spectrum
add_bkgs : bool
If ``True`` backgrounds are added to the source counts.
bkg_srcs : dict
Keys in the dictionary need to be the background ids in the dataset
``data``, and the values are the corresponding source models. For all
background datasets that are listed in this dictionary, the
background counts will be simulated based on the model, appropriately
scaled (for area etc.) and added to the source. The same ``is_source``
setting as for the source model applies here, i.e. if the source model
already contains the ARF and the RMF, then the background models should
do so, too. This setting has no effect if ``add_bkgs=False``.
For all background ids not listed in this dictionary, the
counts will be drawn from the PHA data of the background data set.
id : str
String with id number if called from UI layer. This is only used for
certain error messages.
model : sherpa.models.model.ArithmeticModel
The model that will be used for simulations. It must contain
any background components, appropriately scaled, and include
the relevant response.
include_bkg_data : bool, optional
Should the counts in the background datasets be included when
calculating the predicted signal? As background datasets are
often noisy it is generally better to include a model of the
background in the model argument.
method : callable or None
The routine used to simulate the data. If None (the default)
then sherpa.utils.poisson_noise is used, otherwise the function
must accept a single ndarray, returning a ndarray of the same shape,
and an optional rng argument.
then sherpa.utils.random.poisson_noise is used, otherwise the
function must accept a single ndarray and an optional rng
argument, returning a ndarray of the same shape as the input.
rng : numpy.random.Generator, numpy.random.RandomState, or None, optional
Determines how random numbers are created. If set to None then
the routines from `numpy.random` are used, and so can be
controlled by calling `numpy.random.seed`.
Notes
-----
The model must include the relevant response, and if a background
model is required then it must also be included. The model
predicted counts array is generated by evaluating the model and
then, if the dataset contains any background datasets and the
include_bkg_data flag is set, the background signal is added
after appropriate scaling between background and source apertures
and exposure times. This "model predicted counts" array is then
passed to the method argument to create the simulated data,
with the default method set to use a Poisson distribution to
simulate the data (the `poisson_noise` routine).
If simulated backgrounds are required then the backgrounds should
be simulated separately (as if they were source models but with no
associated backgrounds) and then added to the faked source
dataset.
This routine was significantly changed in Sherpa 4.16.1. To update
old code, note that the following arguments are no-longer used:
is_source, pileup_model, add_bkgs, bkg_models and id. The model
argument must now include any background model terms, and the
necessary scaling term for the conversion from the background to
source aperture (a combination of the add_bkgs and
bkg_models="model" arguments), and also include the response
necessary to convert to channels and counts (a combination of the
is_source and pileup_model arguments). Setting the bkg_models
argument to a PHA dataset is now handled by adding the background
to the data first, and then setting include_bkg_data to True.
Examples
--------
Estimate the signal from a 5000 second observation using the
ARF and RMF from "src.arf" and "src.rmf" respectively:
>>> set_source(1, xsphabs.gal * xsapec.clus)
>>> gal.nh = 0.12
>>> clus.kt, clus.abundanc = 4.5, 0.3
>>> clus.redshift = 0.187
>>> clus.norm = 1.2e-3
>>> fake_pha(1, 'src.arf', 'src.rmf', 5000)
Simulate a 1 mega second observation for the data and model
from the default data set. The simulated data will include an
estimated background component based on scaling the existing
background observations for the source. The simulated data
set, which has the same grouping as the default set, for
easier comparison, is created with the 'sim' label and then
written out to the file 'sim.pi':
>>> arf = get_arf()
>>> rmf = get_rmf()
>>> bkg = get_bkg()
>>> bscal = get_backscal()
>>> grp = get_grouping()
>>> qual = get_quality()
>>> texp = 1e6
>>> set_source('sim', get_source())
>>> fake_pha('sim', arf, rmf, texp, backscal=bscal, bkg=bkg,
... grouping=grp, quality=qual, grouped=True)
>>> save_pha('sim', 'sim.pi')
Simulate the data from an absorbed APEC model, where the response
is manually created (a "perfect" RMF and the ARF is flat, with a
break at 3.2 keV), and then use the Sherpa plot objects to display
the simulated data and model. Note that the exposure time must be
set (if the model normalization is set). First the imports:
>>> import numpy as np
>>> from sherpa.astro.data import DataPHA
>>> from sherpa.astro.instrument import Response1D, create_arf, create_delta_rmf
>>> from sherpa.astro.xspec import XSphabs, XSapec
The model used for the simulation is defined - in this case an
absorbed APEC model:
>>> gal = XSphabs()
>>> clus = XSapec()
>>> model = gal * clus
>>> gal.nh = 0.12
>>> clus.kt = 4.5
>>> clus.abundanc = 0.3
>>> clus.redshift = 0.23
>>> clus.norm = 6.3e-4
For this example a "fake" response is generated, using the
create_arf and create_delta_rmf routines to create an ARF with a
step in it (100 cm^2 below 3.2 keV and 50 cm^2 above it) together
with a "perfect" RMF (so there is no blurring of energies)
covering 1 to 5 keV. Normally the responses would be read from a
file:
>>> chans = np.arange(1, 101, dtype=np.int16)
>>> egrid = np.linspace(1, 5, 101)
>>> elo, ehi = egrid[:-1], egrid[1:]
>>> yarf = 100 * (elo < 3.2) + 50 * (elo >= 3.2)
>>> arf = create_arf(elo, ehi, yarf)
>>> rmf = create_delta_rmf(elo, ehi, e_min=elo, e_max=ehi)
In this case the DataPHA structure is also created manually,
but it can also be read in:
>>> pha = DataPHA("faked", chans, chans * 0)
>>> pha.set_response(arf=arf, rmf=rmf)
>>> pha.exposure = 50000
The "faked" data is created by applying a response model to the
model, and passing it to `fake_pha`:
>>> resp = Response1D(pha)
>>> full_model = resp(model)
>>> rng = numpy.random.default_rng()
>>> fake_pha(pha, full_model, rng=rng)
The following overlplots the model - used to simulate the data -
on the simulated data:
The new data can be displayed, comparing it to the model:
>>> pha.set_analysis("energy")
>>> from sherpa.astro.plot import DataPHAPlot, ModelPHAHistogram
>>> dplot = DataPHAPlot()
>>> dplot.prepare(pha)
>>> dplot.plot()
>>> mplot = ModelPHAHistogram()
>>> mplot.prepare(pha, full_model)
>>> mplot.overplot()
"""

# Warn users if they are using the old options.
#
if is_source is not None:
warn("is_source is no-longer used, the model "
"must contain a response",
category=DeprecationWarning)

if pileup_model is not None:
warn("pileup_model is no-longer used, the model "
"must contain any needed pileup model",
category=DeprecationWarning)

if add_bkgs is not None:
warn("add_bkgs is no-longer used, as the model "
"either contains a background term or the "
"background datasets are used",
category=DeprecationWarning)

if bkg_models is not None:
warn("bkg_models is no-longer used, as the model "
"must contain any background component",
category=DeprecationWarning)

if id is not None:
warn("id is no-longer used",
category=DeprecationWarning)

# The assumption is that the response matches the PHA, that is the
# number of channels matches. There is currently no explicit check
# of this.
#
if len(data.response_ids) == 0:
raise DataErr('normffake', data.name)

Expand All @@ -140,62 +216,21 @@ def fake_pha(data, model,
elif not callable(method):
raise ArgumentTypeErr("badarg", "method", "a callable")

if is_source:
model = get_response_for_pha(data, model, bkg_srcs={},
pileup_model=pileup_model, id=id)

# Get one RMF. Hopefully all of them have the same number of
# channels, but that sanity check should really be done elsewhere.
rmf0 = data.get_rmf(data.response_ids[0])
data.channel = np.arange(1, rmf0.detchans + 1)

# Calculate the source model, and take a "draw" based on
# the source model. That becomes the simulated data.
data.counts = method(data.eval_model(model), rng=rng)

# Add in background counts:
# -- Scale each background properly given data's
# exposure time, BACKSCAL and AREASCAL
# -- Take average of scaled backgrounds
# -- Take a Poisson draw based on the average scaled background
# -- Add that to the simulated data counts
# Evaluate the model. This assumes the model term contains a
# response and any scaled background components.
#
# Adding background counts is OPTIONAL, only done if user sets
# "bkg" argument to fake_pha. The reason is that the user could
# well set a "source" model that does include a background
# component. In that case users should have the option to simulate
# WITHOUT background counts being added in.
#
if add_bkgs:
if bkg_models is None:
bkg_models = {}

nbkg = len(data.background_ids)
b = 0
model_prediction = data.eval_model(model)

if include_bkg_data:
# If there are background components then we sum up the data,
# after scaling to match the source data set, and use that as a
# source term. Note that get_background_scale will correct for
# exposure time (as units=counts), scaling factors, and the number
# of background components.
#
for bkg_id in data.background_ids:
# we do (probably) want to filter and group the scale array
scale = data.get_background_scale(bkg_id)
if bkg_id in bkg_models:
bkg_pha = data.get_background(bkg_id)
bkg_model = bkg_models[bkg_id]
if is_source:
bkg_model = get_response_for_pha(bkg_pha, bkg_model, id=id)

# Exposure in background could differ from exposure in
# source. But need to set here so eval_model works
# correctly.
# (At least I think that's how it works.)
orig_exposure = bkg_pha.exposure
bkg_pha.exposure = data.exposure
# No Poisson here because we make a Poisson draw
# later using the average of all backgrounds
cts = bkg_pha.eval_model(bkg_model)
bkg_pha.exposure = orig_exposure
else:
cts = data.get_background(bkg_id).counts
b += scale * cts

if nbkg > 0:
b = b / nbkg
b_poisson = method(b, rng=rng)
data.counts = data.counts + b_poisson
cts = data.get_background(bkg_id).counts
scale = data.get_background_scale(bkg_id, units="counts")
model_prediction += scale * cts

data.counts = method(model_prediction, rng=rng)

0 comments on commit 2e01a42

Please sign in to comment.