Skip to content

Commit

Permalink
Merge pull request #37171 from mantidproject/variance_over_mean_ratio…
Browse files Browse the repository at this point in the history
…_in_FindSXPeaksConvovle

Add option to find peaks using ratio variance/ mean in FindSXPeaksConvolve
  • Loading branch information
SilkeSchomann committed May 2, 2024
2 parents 51943ac + f653bed commit bfe8895
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 62 deletions.
120 changes: 86 additions & 34 deletions Framework/PythonInterface/plugins/algorithms/FindSXPeaksConvolve.py
Expand Up @@ -19,9 +19,10 @@
EnabledWhenProperty,
PropertyCriterion,
logger,
StringListValidator,
)
import numpy as np
from scipy.ndimage import label, maximum_position, binary_closing, sum_labels, uniform_filter1d
from scipy.ndimage import label, maximum_position, binary_closing, sum_labels, uniform_filter1d, uniform_filter
from scipy.signal import convolve
from IntegratePeaksSkew import InstrumentArrayConverter, get_fwhm_from_back_to_back_params

Expand Down Expand Up @@ -109,16 +110,36 @@ def PyInit(self):
validator=FloatBoundedValidator(lower=0.0),
doc="Minimum peak size as a fraction of the kernel size.",
)
self.declareProperty(
name="PeakFindingStrategy",
defaultValue="IOverSigma",
direction=Direction.Input,
validator=StringListValidator(["VarianceOverMean", "IOverSigma"]),
doc="PeakFindingStrategy=IOverSigma will find peaks by integrating data using a shoebox kernel and looking"
" for peaks with I/sigma > ThresholdIoverSigma. PeakFindingStrategy=VarianceOverMean will look for "
"peaks with local variance/mean > ThresholdVarianceOverMean.",
)
self.declareProperty(
name="ThresholdVarianceOverMean",
defaultValue=3.0,
direction=Direction.Input,
validator=FloatBoundedValidator(lower=1.0),
doc="Threshold value for variance/mean used to identify peaks.",
)
use_variance_over_mean = EnabledWhenProperty("PeakFindingStrategy", PropertyCriterion.IsNotDefault)
self.setPropertySettings("ThresholdVarianceOverMean", use_variance_over_mean)
use_intens_over_sigma = EnabledWhenProperty("PeakFindingStrategy", PropertyCriterion.IsDefault)
self.setPropertySettings("ThresholdIoverSigma", use_intens_over_sigma)

def PyExec(self):
# get input
ws = self.getProperty("InputWorkspace").value
threshold_i_over_sig = self.getProperty("ThresholdIoverSigma").value
nrows = self.getProperty("NRows").value
ncols = self.getProperty("NCols").value
nfwhm = self.getProperty("NFWHM").value
get_nbins_from_b2bexp_params = self.getProperty("GetNBinsFromBackToBackParams").value
min_frac_size = self.getProperty("MinFracSize").value
peak_finding_strategy = self.getProperty("PeakFindingStrategy").value

# create output table workspace
peaks = self.exec_child_alg("CreatePeaksWorkspace", InstrumentWorkspace=ws, NumberOfPeaks=0, OutputWorkspace="_peaks")
Expand Down Expand Up @@ -148,51 +169,51 @@ def PyExec(self):
# get data in detector coords
peak_data = array_converter.get_peak_data(dummy_pk, detid, bank.getName(), bank.xpixels(), bank.ypixels(), 1, 1)
_, y, esq, _ = peak_data.get_data_arrays() # 3d arrays [rows x cols x tof]
# perform convolutions to integrate kernel/shoebox
# pad with nearest so don't get peaks at edge when -ve values go outside data extent
kernel = make_kernel(nrows, ncols, nbins)

yconv = convolve(y, kernel, mode="valid")
econv = np.sqrt(convolve(esq, kernel**2, mode="valid"))

with np.errstate(divide="ignore", invalid="ignore"):
intens_over_sig = yconv / econv # ignore 0/0 which produces NaN (recall NaN > x = False)
intens_over_sig[~np.isfinite(intens_over_sig)] = 0
intens_over_sig = uniform_filter1d(intens_over_sig, size=3, axis=2, mode="nearest")

# find peaks above threshold I/sigma
pk_mask = intens_over_sig > threshold_i_over_sig
if peak_finding_strategy == "IOverSigma":
threshold = self.getProperty("ThresholdIoverSigma").value
ratio, yconv, econv, kernel_shape = calculate_intens_over_sigma(y, esq, nrows, ncols, nbins)
else:
threshold = self.getProperty("ThresholdVarianceOverMean").value
ratio, ycnts, avg_y, kernel_shape = calculate_variance_over_mean(y, esq, nrows, ncols, nbins)
# perform final smoothing
ratio[~np.isfinite(ratio)] = 0
ratio = uniform_filter1d(ratio, size=3, axis=2, mode="nearest")
# identify peaks above threshold
pk_mask = ratio > threshold
pk_mask = binary_closing(pk_mask) # removes holes - helps merge close peaks
labels, nlabels = label(pk_mask) # identify contiguous nearest-neighbour connected regions
# identify labels of peaks above min size
min_size = int(min_frac_size * kernel.size)
min_size = int(min_frac_size * np.prod(kernel_shape))
npixels = sum_labels(pk_mask, labels, range(1, nlabels + 1))
ilabels = np.flatnonzero(npixels > min_size) + 1

# find index of maximum in I/sigma for each valid peak (label index in ilabels)
imaxs = maximum_position(intens_over_sig, labels, ilabels)
imaxs = maximum_position(ratio, labels, ilabels)

# add peaks to table
for ipk in range(len(imaxs)):
irow, icol, itof = imaxs[ipk]

peak_conv_intens = yconv[irow, icol, itof]
sig_conv_intens = econv[irow, icol, itof]

# map convolution output to the input
irow += (kernel.shape[0] - 1) // 2
icol += (kernel.shape[1] - 1) // 2
itof += (kernel.shape[2] - 1) // 2
irow += (kernel_shape[0] - 1) // 2
icol += (kernel_shape[1] - 1) // 2
itof += (kernel_shape[2] - 1) // 2

# find peak position
# get data in kernel window around index with max I/sigma
irow_lo = np.clip(irow - kernel.shape[0] // 2, a_min=0, a_max=y.shape[0])
irow_hi = np.clip(irow + kernel.shape[0] // 2, a_min=0, a_max=y.shape[0])
icol_lo = np.clip(icol - kernel.shape[1] // 2, a_min=0, a_max=y.shape[1])
icol_hi = np.clip(icol + kernel.shape[1] // 2, a_min=0, a_max=y.shape[1])
itof_lo = np.clip(itof - kernel.shape[2] // 2, a_min=0, a_max=y.shape[2])
itof_hi = np.clip(itof + kernel.shape[2] // 2, a_min=0, a_max=y.shape[2])
irow_lo = np.clip(irow - kernel_shape[0] // 2, a_min=0, a_max=y.shape[0])
irow_hi = np.clip(irow + kernel_shape[0] // 2, a_min=0, a_max=y.shape[0])
icol_lo = np.clip(icol - kernel_shape[1] // 2, a_min=0, a_max=y.shape[1])
icol_hi = np.clip(icol + kernel_shape[1] // 2, a_min=0, a_max=y.shape[1])
itof_lo = np.clip(itof - kernel_shape[2] // 2, a_min=0, a_max=y.shape[2])
itof_hi = np.clip(itof + kernel_shape[2] // 2, a_min=0, a_max=y.shape[2])
ypk = y[irow_lo:irow_hi, icol_lo:icol_hi, itof_lo:itof_hi]
if peak_finding_strategy == "VarianceOverMean":
# perform additional check as in Winter (2018) https://doi.org/10.1107/S2059798317017235
background_cnts = avg_y[tuple(imaxs[ipk])]
ypk_cnts = ycnts[irow_lo:irow_hi, icol_lo:icol_hi, itof_lo:itof_hi]
if not np.any(ypk_cnts > background_cnts + np.sqrt(background_cnts)):
continue # skip peak
# integrate over TOF and select detector with max intensity
imax_det = np.argmax(ypk.sum(axis=2))
irow_max, icol_max = np.unravel_index(imax_det, ypk.shape[0:-1])
Expand All @@ -209,10 +230,13 @@ def PyExec(self):
DetectorID=int(peak_data.detids[irow_max, icol_max]),
)
# set intensity of peak (rough estimate)
pk = peaks.getPeak(peaks.getNumberPeaks() - 1)
bin_width = xspec[itof + 1] - xspec[itof]
pk.setIntensity(peak_conv_intens * bin_width)
pk.setSigmaIntensity(sig_conv_intens * bin_width)
if peak_finding_strategy == "IOverSigma":
peak_conv_intens = yconv[tuple(imaxs[ipk])]
sig_conv_intens = econv[tuple(imaxs[ipk])]
pk = peaks.getPeak(peaks.getNumberPeaks() - 1)
bin_width = xspec[itof + 1] - xspec[itof]
pk.setIntensity(peak_conv_intens * bin_width)
pk.setSigmaIntensity(sig_conv_intens * bin_width)

# remove dummy peak
self.exec_child_alg("DeleteTableRows", TableWorkspace=peaks, Rows=[irow_to_del])
Expand All @@ -235,6 +259,34 @@ def exec_child_alg(self, alg_name, **kwargs):
return None


def calculate_intens_over_sigma(y, esq, nrows, ncols, nbins):
kernel = make_kernel(nrows, ncols, nbins) # integration shoebox kernel
yconv = convolve(y, kernel, mode="valid")
econv = np.sqrt(convolve(esq, kernel**2, mode="valid"))
with np.errstate(divide="ignore", invalid="ignore"):
intens_over_sigma = yconv / econv # ignore 0/0 which produces NaN (recall NaN > x = False)
return intens_over_sigma, yconv, econv, kernel.shape


def calculate_variance_over_mean(y, esq, nrows, ncols, nbins):
# Evaluate ratio of Variance/mean (should be 1 for Poisson distribution if constant bg)
# Peak finding criterion used in DIALS, Winter (2018) https://doi.org/10.1107/S2059798317017235
with np.errstate(divide="ignore", invalid="ignore"):
# scale to raw counts
scale = y / esq
scale[~np.isfinite(scale)] = 0
ycnts = y * scale
# calculate variance = (E[X^2] - E[X]^2)
avg_y = uniform_filter(ycnts, size=(nrows, ncols, nbins), mode="nearest")
avg_ysq = uniform_filter(ycnts**2, size=(nrows, ncols, nbins), mode="nearest")
var_over_mean = (avg_ysq - avg_y**2) / avg_y
# crop to valid region
var_over_mean = var_over_mean[
(nrows - 1) // 2 : -(nrows - 1) // 2, (ncols - 1) // 2 : -(ncols - 1) // 2, (nbins - 1) // 2 : -(nbins - 1) // 2
]
return var_over_mean, ycnts, avg_y, (nrows, ncols, nbins)


def make_kernel(nrows, ncols, nbins):
# make a kernel with background subtraction shell of approx. same number of elements as peak region
kernel_shape, (nrows_bg, ncols_bg, nbins_bg) = get_kernel_shape(nrows, ncols, nbins)
Expand Down
Expand Up @@ -59,15 +59,13 @@ def setUpClass(cls):
def tearDownClass(cls):
AnalysisDataService.clear()

def _assert_found_correct_peaks(self, peak_ws):
def _assert_found_correct_peaks(self, peak_ws, integrated=True):
self.assertEqual(peak_ws.getNumberPeaks(), 1)
peak_ws = SortPeaksWorkspace(
InputWorkspace=peak_ws, OutputWorkspace=peak_ws.name(), ColumnNameToSortBy="DetID", SortAscending=False
)
pk = peak_ws.getPeak(0)
self.assertEqual(pk.getDetectorID(), 74)
self.assertAlmostEqual(pk.getTOF(), 5.0, delta=1e-8)
self.assertAlmostEqual(pk.getIntensityOverSigma(), 7.4826, delta=1e-4)
if integrated:
self.assertAlmostEqual(pk.getTOF(), 5.0, delta=1e-8)
self.assertAlmostEqual(pk.getIntensityOverSigma(), 7.4826, delta=1e-4)

def test_exec_specify_nbins(self):
out = FindSXPeaksConvolve(
Expand All @@ -93,10 +91,23 @@ def test_exec_IoverSigma_threshold(self):

def test_exec_min_frac_size(self):
out = FindSXPeaksConvolve(
InputWorkspace=self.ws, PeaksWorkspace="peaks1", NRows=3, NCols=3, NBins=5, ThresholdIoverSigma=3.0, MinFracSize=0.5
InputWorkspace=self.ws, PeaksWorkspace="peaks4", NRows=3, NCols=3, NBins=5, ThresholdIoverSigma=3.0, MinFracSize=0.5
)
self.assertEqual(out.getNumberPeaks(), 0)

def test_exec_VarOverMean(self):
out = FindSXPeaksConvolve(
InputWorkspace=self.ws,
PeaksWorkspace="peaks5",
NRows=5,
NCols=5,
Nbins=3,
PeakFindingStrategy="VarianceOverMean",
ThresholdVarianceOverMean=3.0,
MinFracSize=0.02,
)
self._assert_found_correct_peaks(out, integrated=False)


if __name__ == "__main__":
unittest.main()
7 changes: 3 additions & 4 deletions Testing/SystemTests/tests/framework/SXDAnalysis.py
Expand Up @@ -21,23 +21,22 @@ def cleanup(self):

def runTest(self):
ws = Load(Filename="SXD23767.raw", LoadMonitors="Exclude")
self.peaks = SXD.find_sx_peaks(ws, nstd=6)
self.peaks = SXD.find_sx_peaks(ws, ThresholdVarianceOverMean=1.5)
FindUBUsingFFT(PeaksWorkspace=self.peaks, MinD=1, MaxD=10, Tolerance=0.15)
SelectCellOfType(PeaksWorkspace=self.peaks, CellType="Cubic", Centering="F", Apply=True)
OptimizeLatticeForCellType(PeaksWorkspace=self.peaks, CellType="Cubic", Apply=True)
self.nindexed, *_ = IndexPeaks(PeaksWorkspace=self.peaks, Tolerance=0.1, CommonUBForAll=True)

def validate(self):
self.assertEqual(214, self.nindexed)
self.assertEqual(171, self.nindexed)
latt = SXD.retrieve(self.peaks).sample().getOrientedLattice()
a, alpha = 5.6541, 90 # published value for NaCl is a=6.6402 but the detector positions haven't been calibrated
a, alpha = 5.6533, 90 # published value for NaCl is a=6.6402 but the detector positions haven't been calibrated
self.assertAlmostEqual(a, latt.a(), delta=1e-5)
self.assertAlmostEqual(a, latt.b(), delta=1e-5)
self.assertAlmostEqual(a, latt.c(), delta=1e-5)
self.assertAlmostEqual(alpha, latt.alpha(), delta=1e-10)
self.assertAlmostEqual(alpha, latt.beta(), delta=1e-10)
self.assertAlmostEqual(alpha, latt.gamma(), delta=1e-10)
return self.peaks, "SXD23767_found_peaks.nxs"


class SXDDetectorCalibration(systemtesting.MantidSystemTest):
Expand Down
@@ -0,0 +1 @@
23bbde7e64f3d3fcb00984fd91e89e62
27 changes: 20 additions & 7 deletions docs/source/algorithms/FindSXPeaksConvolve-v1.rst
Expand Up @@ -10,8 +10,18 @@ Description
-----------

This is an algorithm to find single-crystal Bragg peaks in a :ref:`MatrixWorkspace <MatrixWorkspace>` with detector
banks of type :ref:`RectangularDetector <RectangularDetector>` (e.g. SXD, TOPAZ) by integrating the data using a
convolution with a shoebox kernel.
banks of type :ref:`RectangularDetector <RectangularDetector>` (e.g. SXD, TOPAZ).

There are two peak finding strategies set by ``PeakFindingStrategy``:

a. ``PeakFindingStrategy="IOverSigma"`` - by integrating the data by convolution with a shoebox kernel and looking for
regions with statistically significant I/sigma (larger than ``ThresholdIoverSigma``). Note a valid peak would be
expected to have intensity/sigma > 3 and stronger peaks will have a larger intensity/sigma.

b. ``PeakFindingStrategy="VarianceOverMean"`` - by looking for regions with ratio of variance/mean larger than
``ThresholdVarianceOverMean`` - note for a poisson distributed counts with a constant count-rate the ratio is
expected to be 1. This peak finding criterion taken from DIALS [1]_.


The size of the kernel is defined in the input to the algorithm and should match the approximate extent of a typical peak.
The size on the detector is governed by ``NRows`` and ``NCols`` which are in units of pixels.
Expand All @@ -26,15 +36,14 @@ The size of the kernel along the TOF dimension can be specified in one of two wa
Note to use method 2, back-to-back exponential coefficients must be defined in the Parameters.xml file for the
instrument.

The integration requires a background shell with negative weights, such that the total kernel size is increased by a
factor 1.25 along each dimension (such that there are approximately the same number of elements in the kernel and the
background shell). The integral of the kernel and background shell together is zero.
The shoebox integration (for ``PeakFindingStrategy="IOverSigma"``) requires a background shell with negative weights,
such that the total kernel size is increased by a factor 1.25 along each dimension (such that there are approximately
the same number of elements in the kernel and the background shell). The integral of the kernel and background shell
together is zero.

The integrated intensity is evaluated by convolution of the signal array with the kernel and the square of the error on
the integrated intensity is determined by convolution of the squared error array with the squared value of the kernel.

The threshold for peak detection is given by ``ThresholdIoverSigma`` which is the cutoff ratio of intensity/sigma (i.e.
a valid peak would be expected to have intensity/sigma > 3). Stronger peaks will have a larger intensity/sigma.

Usage
-----
Expand All @@ -56,6 +65,10 @@ Usage

Found 261 peaks

References
----------

.. [1] Winter, G., et al. Acta Crystallographica Section D: Structural Biology 74.2 (2018): 85-97.
.. categories::

Expand Down
@@ -0,0 +1,2 @@
- Add option to find peaks using the ratio of variance/ mean in :ref:`FindSXPeaksConvolve <algm-FindSXPeaksConvolve>` - this is a peak finding criterion used in DIALS software Winter, G., et al. Acta Crystallographica Section D: Structural Biology 74.2 (2018): 85-97.
- :ref:`FindSXPeaksConvolve <algm-FindSXPeaksConvolve>` is the default peak finding algorithm in the SXD reduction class.
Expand Up @@ -36,8 +36,8 @@ Here is an example using the ``SXD`` class that finds peaks and then removes dup
ws = Load(Filename='SXD33335.nxs', OutputWorkspace='SXD33335')
# find peaks using SXD static method - determines peak threshold from
# the standard deviation of the intensity distribution
peaks_ws = SXD.find_sx_peaks(ws, nstd=6)
# the ratio of local variance over mean
peaks_ws = SXD.find_sx_peaks(ws, ThresholdVarianceOverMean=2.0)
SXD.remove_duplicate_peaks_by_qlab(peaks_ws, q_tol=0.05)
# find a UB
Expand Down
20 changes: 12 additions & 8 deletions scripts/Diffraction/single_crystal/sxd.py
Expand Up @@ -336,16 +336,20 @@ def remove_peaks_on_detector_edge(peaks, nedge):
mantid.DeleteTableRows(TableWorkspace=peaks, Rows=iremove, EnableLogging=False)

@staticmethod
def find_sx_peaks(ws, bg=None, nstd=None, lambda_min=0.45, lambda_max=3, nbunch=3, out_peaks=None, **kwargs):
def find_sx_peaks(ws, out_peaks=None, **kwargs):
wsname = SXD.retrieve(ws).name()
ws_rb = wsname + "_rb"
mantid.Rebunch(InputWorkspace=ws, OutputWorkspace=ws_rb, NBunch=nbunch, EnableLogging=False)
SXD.mask_detector_edges(ws_rb)
SXD.crop_ws(ws_rb, lambda_min, lambda_max, xunit="Wavelength")
if out_peaks is None:
out_peaks = wsname + "_peaks" # need to do this so not "_rb_peaks"
BaseSX.find_sx_peaks(ws_rb, bg, nstd, out_peaks, **kwargs)
mantid.DeleteWorkspace(ws_rb, EnableLogging=False)
out_peaks = wsname + "_peaks"
default_kwargs = {
"NRows": 7,
"NCols": 7,
"GetNBinsFromBackToBackParams": True,
"NFWHM": 6,
"PeakFindingStrategy": "VarianceOverMean",
"ThresholdVarianceOverMean": 2,
}
kwargs = {**default_kwargs, **kwargs} # will overwrite default with provided if duplicate keys
mantid.FindSXPeaksConvolve(InputWorkspace=wsname, PeaksWorkspace=out_peaks, **kwargs)
return out_peaks

@staticmethod
Expand Down

0 comments on commit bfe8895

Please sign in to comment.