Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to find peaks using ratio variance/ mean in FindSXPeaksConvolve #37171

Merged
merged 10 commits into from May 2, 2024
120 changes: 86 additions & 34 deletions Framework/PythonInterface/plugins/algorithms/FindSXPeaksConvolve.py
Expand Up @@ -19,9 +19,10 @@
EnabledWhenProperty,
PropertyCriterion,
logger,
StringListValidator,
)
import numpy as np
from scipy.ndimage import label, maximum_position, binary_closing, sum_labels, uniform_filter1d
from scipy.ndimage import label, maximum_position, binary_closing, sum_labels, uniform_filter1d, uniform_filter
from scipy.signal import convolve
from IntegratePeaksSkew import InstrumentArrayConverter, get_fwhm_from_back_to_back_params

Expand Down Expand Up @@ -109,16 +110,36 @@ def PyInit(self):
validator=FloatBoundedValidator(lower=0.0),
doc="Minimum peak size as a fraction of the kernel size.",
)
self.declareProperty(
name="PeakFindingStrategy",
defaultValue="IOverSigma",
direction=Direction.Input,
validator=StringListValidator(["VarianceOverMean", "IOverSigma"]),
doc="PeakFindingStrategy=IOverSigma will find peaks by integrating data using a shoebox kernel and looking"
" for peaks with I/sigma > ThresholdIoverSigma. PeakFindingStrategy=VarianceOverMean will look for "
"peaks with local variance/mean > ThresholdVarianceOverMean.",
)
self.declareProperty(
name="ThresholdVarianceOverMean",
defaultValue=3.0,
direction=Direction.Input,
validator=FloatBoundedValidator(lower=1.0),
doc="Threshold value for variance/mean used to identify peaks.",
)
use_variance_over_mean = EnabledWhenProperty("PeakFindingStrategy", PropertyCriterion.IsNotDefault)
self.setPropertySettings("ThresholdVarianceOverMean", use_variance_over_mean)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From a user perspective, isn't it better to keep ThresholdIoverSigma and ThresholdVarianceOverMean variables closer and next to each other that becomes active/inactive based on the selection of PeakFindingStratergy after it?

image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, but the convention is to put new arguments at the bottom so as to preserve the positional argument order in the python wrapper. Sometimes if there are loads of parameters that makes it more likely no one will be using positional arguments, but there are not so many arguments here so it's probably better to keep it as is. Is that OK?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah understood, I guess users won't bother much since there is only a small number of params to fill.

use_intens_over_sigma = EnabledWhenProperty("PeakFindingStrategy", PropertyCriterion.IsDefault)
self.setPropertySettings("ThresholdIoverSigma", use_intens_over_sigma)

def PyExec(self):
# get input
ws = self.getProperty("InputWorkspace").value
threshold_i_over_sig = self.getProperty("ThresholdIoverSigma").value
nrows = self.getProperty("NRows").value
ncols = self.getProperty("NCols").value
nfwhm = self.getProperty("NFWHM").value
get_nbins_from_b2bexp_params = self.getProperty("GetNBinsFromBackToBackParams").value
min_frac_size = self.getProperty("MinFracSize").value
peak_finding_strategy = self.getProperty("PeakFindingStrategy").value

# create output table workspace
peaks = self.exec_child_alg("CreatePeaksWorkspace", InstrumentWorkspace=ws, NumberOfPeaks=0, OutputWorkspace="_peaks")
Expand Down Expand Up @@ -148,51 +169,51 @@ def PyExec(self):
# get data in detector coords
peak_data = array_converter.get_peak_data(dummy_pk, detid, bank.getName(), bank.xpixels(), bank.ypixels(), 1, 1)
_, y, esq, _ = peak_data.get_data_arrays() # 3d arrays [rows x cols x tof]
# perform convolutions to integrate kernel/shoebox
# pad with nearest so don't get peaks at edge when -ve values go outside data extent
kernel = make_kernel(nrows, ncols, nbins)

yconv = convolve(y, kernel, mode="valid")
econv = np.sqrt(convolve(esq, kernel**2, mode="valid"))

with np.errstate(divide="ignore", invalid="ignore"):
intens_over_sig = yconv / econv # ignore 0/0 which produces NaN (recall NaN > x = False)
intens_over_sig[~np.isfinite(intens_over_sig)] = 0
intens_over_sig = uniform_filter1d(intens_over_sig, size=3, axis=2, mode="nearest")

# find peaks above threshold I/sigma
pk_mask = intens_over_sig > threshold_i_over_sig
if peak_finding_strategy == "IOverSigma":
threshold = self.getProperty("ThresholdIoverSigma").value
ratio, yconv, econv, kernel_shape = calculate_intens_over_sigma(y, esq, nrows, ncols, nbins)
else:
threshold = self.getProperty("ThresholdVarianceOverMean").value
ratio, ycnts, avg_y, kernel_shape = calculate_variance_over_mean(y, esq, nrows, ncols, nbins)
# perform final smoothing
ratio[~np.isfinite(ratio)] = 0
ratio = uniform_filter1d(ratio, size=3, axis=2, mode="nearest")
# identify peaks above threshold
pk_mask = ratio > threshold
pk_mask = binary_closing(pk_mask) # removes holes - helps merge close peaks
labels, nlabels = label(pk_mask) # identify contiguous nearest-neighbour connected regions
# identify labels of peaks above min size
min_size = int(min_frac_size * kernel.size)
min_size = int(min_frac_size * np.prod(kernel_shape))
npixels = sum_labels(pk_mask, labels, range(1, nlabels + 1))
ilabels = np.flatnonzero(npixels > min_size) + 1

# find index of maximum in I/sigma for each valid peak (label index in ilabels)
imaxs = maximum_position(intens_over_sig, labels, ilabels)
imaxs = maximum_position(ratio, labels, ilabels)

# add peaks to table
for ipk in range(len(imaxs)):
irow, icol, itof = imaxs[ipk]

peak_conv_intens = yconv[irow, icol, itof]
sig_conv_intens = econv[irow, icol, itof]

# map convolution output to the input
irow += (kernel.shape[0] - 1) // 2
icol += (kernel.shape[1] - 1) // 2
itof += (kernel.shape[2] - 1) // 2
irow += (kernel_shape[0] - 1) // 2
icol += (kernel_shape[1] - 1) // 2
itof += (kernel_shape[2] - 1) // 2

# find peak position
# get data in kernel window around index with max I/sigma
irow_lo = np.clip(irow - kernel.shape[0] // 2, a_min=0, a_max=y.shape[0])
irow_hi = np.clip(irow + kernel.shape[0] // 2, a_min=0, a_max=y.shape[0])
icol_lo = np.clip(icol - kernel.shape[1] // 2, a_min=0, a_max=y.shape[1])
icol_hi = np.clip(icol + kernel.shape[1] // 2, a_min=0, a_max=y.shape[1])
itof_lo = np.clip(itof - kernel.shape[2] // 2, a_min=0, a_max=y.shape[2])
itof_hi = np.clip(itof + kernel.shape[2] // 2, a_min=0, a_max=y.shape[2])
irow_lo = np.clip(irow - kernel_shape[0] // 2, a_min=0, a_max=y.shape[0])
irow_hi = np.clip(irow + kernel_shape[0] // 2, a_min=0, a_max=y.shape[0])
icol_lo = np.clip(icol - kernel_shape[1] // 2, a_min=0, a_max=y.shape[1])
icol_hi = np.clip(icol + kernel_shape[1] // 2, a_min=0, a_max=y.shape[1])
itof_lo = np.clip(itof - kernel_shape[2] // 2, a_min=0, a_max=y.shape[2])
itof_hi = np.clip(itof + kernel_shape[2] // 2, a_min=0, a_max=y.shape[2])
ypk = y[irow_lo:irow_hi, icol_lo:icol_hi, itof_lo:itof_hi]
if peak_finding_strategy == "VarianceOverMean":
# perform additional check as in Winter (2018) https://doi.org/10.1107/S2059798317017235
background_cnts = avg_y[tuple(imaxs[ipk])]
ypk_cnts = ycnts[irow_lo:irow_hi, icol_lo:icol_hi, itof_lo:itof_hi]
if not np.any(ypk_cnts > background_cnts + np.sqrt(background_cnts)):
continue # skip peak
# integrate over TOF and select detector with max intensity
imax_det = np.argmax(ypk.sum(axis=2))
irow_max, icol_max = np.unravel_index(imax_det, ypk.shape[0:-1])
Expand All @@ -209,10 +230,13 @@ def PyExec(self):
DetectorID=int(peak_data.detids[irow_max, icol_max]),
)
# set intensity of peak (rough estimate)
pk = peaks.getPeak(peaks.getNumberPeaks() - 1)
bin_width = xspec[itof + 1] - xspec[itof]
pk.setIntensity(peak_conv_intens * bin_width)
pk.setSigmaIntensity(sig_conv_intens * bin_width)
if peak_finding_strategy == "IOverSigma":
peak_conv_intens = yconv[tuple(imaxs[ipk])]
sig_conv_intens = econv[tuple(imaxs[ipk])]
pk = peaks.getPeak(peaks.getNumberPeaks() - 1)
bin_width = xspec[itof + 1] - xspec[itof]
pk.setIntensity(peak_conv_intens * bin_width)
pk.setSigmaIntensity(sig_conv_intens * bin_width)

# remove dummy peak
self.exec_child_alg("DeleteTableRows", TableWorkspace=peaks, Rows=[irow_to_del])
Expand All @@ -235,6 +259,34 @@ def exec_child_alg(self, alg_name, **kwargs):
return None


def calculate_intens_over_sigma(y, esq, nrows, ncols, nbins):
kernel = make_kernel(nrows, ncols, nbins) # integration shoebox kernel
yconv = convolve(y, kernel, mode="valid")
econv = np.sqrt(convolve(esq, kernel**2, mode="valid"))
with np.errstate(divide="ignore", invalid="ignore"):
intens_over_sigma = yconv / econv # ignore 0/0 which produces NaN (recall NaN > x = False)
return intens_over_sigma, yconv, econv, kernel.shape


def calculate_variance_over_mean(y, esq, nrows, ncols, nbins):
# Evaluate ratio of Variance/mean (should be 1 for Poisson distribution if constant bg)
# Peak finding criterion used in DIALS, Winter (2018) https://doi.org/10.1107/S2059798317017235
with np.errstate(divide="ignore", invalid="ignore"):
# scale to raw counts
scale = y / esq
scale[~np.isfinite(scale)] = 0
ycnts = y * scale
# calculate variance = (E[X^2] - E[X]^2)
avg_y = uniform_filter(ycnts, size=(nrows, ncols, nbins), mode="nearest")
avg_ysq = uniform_filter(ycnts**2, size=(nrows, ncols, nbins), mode="nearest")
var_over_mean = (avg_ysq - avg_y**2) / avg_y
# crop to valid region
var_over_mean = var_over_mean[
(nrows - 1) // 2 : -(nrows - 1) // 2, (ncols - 1) // 2 : -(ncols - 1) // 2, (nbins - 1) // 2 : -(nbins - 1) // 2
]
return var_over_mean, ycnts, avg_y, (nrows, ncols, nbins)


def make_kernel(nrows, ncols, nbins):
# make a kernel with background subtraction shell of approx. same number of elements as peak region
kernel_shape, (nrows_bg, ncols_bg, nbins_bg) = get_kernel_shape(nrows, ncols, nbins)
Expand Down
Expand Up @@ -59,15 +59,13 @@ def setUpClass(cls):
def tearDownClass(cls):
AnalysisDataService.clear()

def _assert_found_correct_peaks(self, peak_ws):
def _assert_found_correct_peaks(self, peak_ws, integrated=True):
self.assertEqual(peak_ws.getNumberPeaks(), 1)
peak_ws = SortPeaksWorkspace(
InputWorkspace=peak_ws, OutputWorkspace=peak_ws.name(), ColumnNameToSortBy="DetID", SortAscending=False
)
pk = peak_ws.getPeak(0)
self.assertEqual(pk.getDetectorID(), 74)
self.assertAlmostEqual(pk.getTOF(), 5.0, delta=1e-8)
self.assertAlmostEqual(pk.getIntensityOverSigma(), 7.4826, delta=1e-4)
if integrated:
self.assertAlmostEqual(pk.getTOF(), 5.0, delta=1e-8)
self.assertAlmostEqual(pk.getIntensityOverSigma(), 7.4826, delta=1e-4)

def test_exec_specify_nbins(self):
out = FindSXPeaksConvolve(
Expand All @@ -93,10 +91,23 @@ def test_exec_IoverSigma_threshold(self):

def test_exec_min_frac_size(self):
out = FindSXPeaksConvolve(
InputWorkspace=self.ws, PeaksWorkspace="peaks1", NRows=3, NCols=3, NBins=5, ThresholdIoverSigma=3.0, MinFracSize=0.5
InputWorkspace=self.ws, PeaksWorkspace="peaks4", NRows=3, NCols=3, NBins=5, ThresholdIoverSigma=3.0, MinFracSize=0.5
)
self.assertEqual(out.getNumberPeaks(), 0)

def test_exec_VarOverMean(self):
out = FindSXPeaksConvolve(
InputWorkspace=self.ws,
PeaksWorkspace="peaks5",
NRows=5,
NCols=5,
Nbins=3,
PeakFindingStrategy="VarianceOverMean",
ThresholdVarianceOverMean=3.0,
MinFracSize=0.02,
)
self._assert_found_correct_peaks(out, integrated=False)


if __name__ == "__main__":
unittest.main()
6 changes: 3 additions & 3 deletions Testing/SystemTests/tests/framework/SXDAnalysis.py
Expand Up @@ -21,16 +21,16 @@ def cleanup(self):

def runTest(self):
ws = Load(Filename="SXD23767.raw", LoadMonitors="Exclude")
self.peaks = SXD.find_sx_peaks(ws, nstd=6)
self.peaks = SXD.find_sx_peaks(ws, ThresholdVarianceOverMean=1.5)
FindUBUsingFFT(PeaksWorkspace=self.peaks, MinD=1, MaxD=10, Tolerance=0.15)
SelectCellOfType(PeaksWorkspace=self.peaks, CellType="Cubic", Centering="F", Apply=True)
OptimizeLatticeForCellType(PeaksWorkspace=self.peaks, CellType="Cubic", Apply=True)
self.nindexed, *_ = IndexPeaks(PeaksWorkspace=self.peaks, Tolerance=0.1, CommonUBForAll=True)

def validate(self):
self.assertEqual(214, self.nindexed)
self.assertEqual(171, self.nindexed)
latt = SXD.retrieve(self.peaks).sample().getOrientedLattice()
a, alpha = 5.6541, 90 # published value for NaCl is a=6.6402 but the detector positions haven't been calibrated
a, alpha = 5.6533, 90 # published value for NaCl is a=6.6402 but the detector positions haven't been calibrated
self.assertAlmostEqual(a, latt.a(), delta=1e-5)
self.assertAlmostEqual(a, latt.b(), delta=1e-5)
self.assertAlmostEqual(a, latt.c(), delta=1e-5)
Expand Down
@@ -0,0 +1 @@
23bbde7e64f3d3fcb00984fd91e89e62
27 changes: 20 additions & 7 deletions docs/source/algorithms/FindSXPeaksConvolve-v1.rst
Expand Up @@ -10,8 +10,18 @@ Description
-----------

This is an algorithm to find single-crystal Bragg peaks in a :ref:`MatrixWorkspace <MatrixWorkspace>` with detector
banks of type :ref:`RectangularDetector <RectangularDetector>` (e.g. SXD, TOPAZ) by integrating the data using a
convolution with a shoebox kernel.
banks of type :ref:`RectangularDetector <RectangularDetector>` (e.g. SXD, TOPAZ).

There are two peak finding strategies set by ``PeakFindingStrategy``:

a. ``PeakFindingStrategy="IOverSigma"`` - by integrating the data by convolution with a shoebox kernel and looking for
regions with statistically significant I/sigma (larger than ``ThresholdIoverSigma``). Note a valid peak would be
expected to have intensity/sigma > 3 and stronger peaks will have a larger intensity/sigma.

b. ``PeakFindingStrategy="VarianceOverMean"`` - by looking for regions with ratio of variance/mean larger than
``ThresholdVarianceOverMean`` - note for a poisson distributed counts with a constant count-rate the ratio is
expected to be 1. This peak finding criterion taken from DIALS [1]_.


The size of the kernel is defined in the input to the algorithm and should match the approximate extent of a typical peak.
The size on the detector is governed by ``NRows`` and ``NCols`` which are in units of pixels.
Expand All @@ -26,15 +36,14 @@ The size of the kernel along the TOF dimension can be specified in one of two wa
Note to use method 2, back-to-back exponential coefficients must be defined in the Parameters.xml file for the
instrument.

The integration requires a background shell with negative weights, such that the total kernel size is increased by a
factor 1.25 along each dimension (such that there are approximately the same number of elements in the kernel and the
background shell). The integral of the kernel and background shell together is zero.
The shoebox integration (for ``PeakFindingStrategy="IOverSigma"``) requires a background shell with negative weights,
such that the total kernel size is increased by a factor 1.25 along each dimension (such that there are approximately
the same number of elements in the kernel and the background shell). The integral of the kernel and background shell
together is zero.

The integrated intensity is evaluated by convolution of the signal array with the kernel and the square of the error on
the integrated intensity is determined by convolution of the squared error array with the squared value of the kernel.

The threshold for peak detection is given by ``ThresholdIoverSigma`` which is the cutoff ratio of intensity/sigma (i.e.
a valid peak would be expected to have intensity/sigma > 3). Stronger peaks will have a larger intensity/sigma.

Usage
-----
Expand All @@ -56,6 +65,10 @@ Usage

Found 261 peaks

References
----------

.. [1] Winter, G., et al. Acta Crystallographica Section D: Structural Biology 74.2 (2018): 85-97.

.. categories::

Expand Down
@@ -0,0 +1,2 @@
- Add option to find peaks using the ratio of variance/ mean in :ref:`FindSXPeaksConvolve <algm-FindSXPeaksConvolve>` - this is a peak finding criterion used in DIALS software Winter, G., et al. Acta Crystallographica Section D: Structural Biology 74.2 (2018): 85-97.
- :ref:`FindSXPeaksConvolve <algm-FindSXPeaksConvolve>` is the default peak finding algorithm in the SXD reduction class.
Expand Up @@ -37,7 +37,7 @@ Here is an example using the ``SXD`` class that finds peaks and then removes dup

# find peaks using SXD static method - determines peak threshold from
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this comment needs to be updated to represent Variance over mean

# the standard deviation of the intensity distribution
peaks_ws = SXD.find_sx_peaks(ws, nstd=6)
peaks_ws = SXD.find_sx_peaks(ws, ThresholdVarianceOverMean=2.0)
SXD.remove_duplicate_peaks_by_qlab(peaks_ws, q_tol=0.05)

# find a UB
Expand Down
20 changes: 12 additions & 8 deletions scripts/Diffraction/single_crystal/sxd.py
Expand Up @@ -336,16 +336,20 @@ def remove_peaks_on_detector_edge(peaks, nedge):
mantid.DeleteTableRows(TableWorkspace=peaks, Rows=iremove, EnableLogging=False)

@staticmethod
def find_sx_peaks(ws, bg=None, nstd=None, lambda_min=0.45, lambda_max=3, nbunch=3, out_peaks=None, **kwargs):
def find_sx_peaks(ws, out_peaks=None, **kwargs):
wsname = SXD.retrieve(ws).name()
ws_rb = wsname + "_rb"
mantid.Rebunch(InputWorkspace=ws, OutputWorkspace=ws_rb, NBunch=nbunch, EnableLogging=False)
SXD.mask_detector_edges(ws_rb)
SXD.crop_ws(ws_rb, lambda_min, lambda_max, xunit="Wavelength")
if out_peaks is None:
out_peaks = wsname + "_peaks" # need to do this so not "_rb_peaks"
BaseSX.find_sx_peaks(ws_rb, bg, nstd, out_peaks, **kwargs)
mantid.DeleteWorkspace(ws_rb, EnableLogging=False)
out_peaks = wsname + "_peaks"
default_kwargs = {
"NRows": 7,
"NCols": 7,
"GetNBinsFromBackToBackParams": True,
"NFWHM": 6,
"PeakFindingStrategy": "VarianceOverMean",
"ThresholdVarianceOverMean": 2,
}
kwargs = {**default_kwargs, **kwargs} # will overwrite default with provided if duplicate keys
mantid.FindSXPeaksConvolve(InputWorkspace=wsname, PeaksWorkspace=out_peaks, **kwargs)
return out_peaks

@staticmethod
Expand Down