Merge pull request #37171 from mantidproject/variance_over_mean_ratio…

…_in_FindSXPeaksConvovle Add option to find peaks using ratio variance/ mean in FindSXPeaksConvolve
mantidproject · May 2, 2024 · bfe8895 · bfe8895
2 parents 51943ac + f653bed
commit bfe8895
Show file tree

Hide file tree

Showing 8 changed files with 144 additions and 62 deletions.
diff --git a/Framework/PythonInterface/plugins/algorithms/FindSXPeaksConvolve.py b/Framework/PythonInterface/plugins/algorithms/FindSXPeaksConvolve.py
@@ -19,9 +19,10 @@
     EnabledWhenProperty,
     PropertyCriterion,
     logger,
+    StringListValidator,
 )
 import numpy as np
-from scipy.ndimage import label, maximum_position, binary_closing, sum_labels, uniform_filter1d
+from scipy.ndimage import label, maximum_position, binary_closing, sum_labels, uniform_filter1d, uniform_filter
 from scipy.signal import convolve
 from IntegratePeaksSkew import InstrumentArrayConverter, get_fwhm_from_back_to_back_params
 
@@ -109,16 +110,36 @@ def PyInit(self):
             validator=FloatBoundedValidator(lower=0.0),
             doc="Minimum peak size as a fraction of the kernel size.",
         )
+        self.declareProperty(
+            name="PeakFindingStrategy",
+            defaultValue="IOverSigma",
+            direction=Direction.Input,
+            validator=StringListValidator(["VarianceOverMean", "IOverSigma"]),
+            doc="PeakFindingStrategy=IOverSigma will find peaks by integrating data using a shoebox kernel and looking"
+            " for peaks with I/sigma > ThresholdIoverSigma. PeakFindingStrategy=VarianceOverMean will look for "
+            "peaks with local variance/mean > ThresholdVarianceOverMean.",
+        )
+        self.declareProperty(
+            name="ThresholdVarianceOverMean",
+            defaultValue=3.0,
+            direction=Direction.Input,
+            validator=FloatBoundedValidator(lower=1.0),
+            doc="Threshold value for variance/mean used to identify peaks.",
+        )
+        use_variance_over_mean = EnabledWhenProperty("PeakFindingStrategy", PropertyCriterion.IsNotDefault)
+        self.setPropertySettings("ThresholdVarianceOverMean", use_variance_over_mean)
+        use_intens_over_sigma = EnabledWhenProperty("PeakFindingStrategy", PropertyCriterion.IsDefault)
+        self.setPropertySettings("ThresholdIoverSigma", use_intens_over_sigma)
 
     def PyExec(self):
         # get input
         ws = self.getProperty("InputWorkspace").value
-        threshold_i_over_sig = self.getProperty("ThresholdIoverSigma").value
         nrows = self.getProperty("NRows").value
         ncols = self.getProperty("NCols").value
         nfwhm = self.getProperty("NFWHM").value
         get_nbins_from_b2bexp_params = self.getProperty("GetNBinsFromBackToBackParams").value
         min_frac_size = self.getProperty("MinFracSize").value
+        peak_finding_strategy = self.getProperty("PeakFindingStrategy").value
 
         # create output table workspace
         peaks = self.exec_child_alg("CreatePeaksWorkspace", InstrumentWorkspace=ws, NumberOfPeaks=0, OutputWorkspace="_peaks")
@@ -148,51 +169,51 @@ def PyExec(self):
             # get data in detector coords
             peak_data = array_converter.get_peak_data(dummy_pk, detid, bank.getName(), bank.xpixels(), bank.ypixels(), 1, 1)
             _, y, esq, _ = peak_data.get_data_arrays()  # 3d arrays [rows x cols x tof]
-            # perform convolutions to integrate kernel/shoebox
-            # pad with nearest so don't get peaks at edge when -ve values go outside data extent
-            kernel = make_kernel(nrows, ncols, nbins)
-
-            yconv = convolve(y, kernel, mode="valid")
-            econv = np.sqrt(convolve(esq, kernel**2, mode="valid"))
-
-            with np.errstate(divide="ignore", invalid="ignore"):
-                intens_over_sig = yconv / econv  # ignore 0/0 which produces NaN (recall NaN > x = False)
-                intens_over_sig[~np.isfinite(intens_over_sig)] = 0
-                intens_over_sig = uniform_filter1d(intens_over_sig, size=3, axis=2, mode="nearest")
-
-            # find peaks above threshold I/sigma
-            pk_mask = intens_over_sig > threshold_i_over_sig
+            if peak_finding_strategy == "IOverSigma":
+                threshold = self.getProperty("ThresholdIoverSigma").value
+                ratio, yconv, econv, kernel_shape = calculate_intens_over_sigma(y, esq, nrows, ncols, nbins)
+            else:
+                threshold = self.getProperty("ThresholdVarianceOverMean").value
+                ratio, ycnts, avg_y, kernel_shape = calculate_variance_over_mean(y, esq, nrows, ncols, nbins)
+            # perform final smoothing
+            ratio[~np.isfinite(ratio)] = 0
+            ratio = uniform_filter1d(ratio, size=3, axis=2, mode="nearest")
+            # identify peaks above threshold
+            pk_mask = ratio > threshold
             pk_mask = binary_closing(pk_mask)  # removes holes - helps merge close peaks
             labels, nlabels = label(pk_mask)  # identify contiguous nearest-neighbour connected regions
             # identify labels of peaks above min size
-            min_size = int(min_frac_size * kernel.size)
+            min_size = int(min_frac_size * np.prod(kernel_shape))
             npixels = sum_labels(pk_mask, labels, range(1, nlabels + 1))
             ilabels = np.flatnonzero(npixels > min_size) + 1
 
             # find index of maximum in I/sigma for each valid peak (label index in ilabels)
-            imaxs = maximum_position(intens_over_sig, labels, ilabels)
+            imaxs = maximum_position(ratio, labels, ilabels)
 
             # add peaks to table
             for ipk in range(len(imaxs)):
                 irow, icol, itof = imaxs[ipk]
 
-                peak_conv_intens = yconv[irow, icol, itof]
-                sig_conv_intens = econv[irow, icol, itof]
-
                 # map convolution output to the input
-                irow += (kernel.shape[0] - 1) // 2
-                icol += (kernel.shape[1] - 1) // 2
-                itof += (kernel.shape[2] - 1) // 2
+                irow += (kernel_shape[0] - 1) // 2
+                icol += (kernel_shape[1] - 1) // 2
+                itof += (kernel_shape[2] - 1) // 2
 
                 # find peak position
                 # get data in kernel window around index with max I/sigma
-                irow_lo = np.clip(irow - kernel.shape[0] // 2, a_min=0, a_max=y.shape[0])
-                irow_hi = np.clip(irow + kernel.shape[0] // 2, a_min=0, a_max=y.shape[0])
-                icol_lo = np.clip(icol - kernel.shape[1] // 2, a_min=0, a_max=y.shape[1])
-                icol_hi = np.clip(icol + kernel.shape[1] // 2, a_min=0, a_max=y.shape[1])
-                itof_lo = np.clip(itof - kernel.shape[2] // 2, a_min=0, a_max=y.shape[2])
-                itof_hi = np.clip(itof + kernel.shape[2] // 2, a_min=0, a_max=y.shape[2])
+                irow_lo = np.clip(irow - kernel_shape[0] // 2, a_min=0, a_max=y.shape[0])
+                irow_hi = np.clip(irow + kernel_shape[0] // 2, a_min=0, a_max=y.shape[0])
+                icol_lo = np.clip(icol - kernel_shape[1] // 2, a_min=0, a_max=y.shape[1])
+                icol_hi = np.clip(icol + kernel_shape[1] // 2, a_min=0, a_max=y.shape[1])
+                itof_lo = np.clip(itof - kernel_shape[2] // 2, a_min=0, a_max=y.shape[2])
+                itof_hi = np.clip(itof + kernel_shape[2] // 2, a_min=0, a_max=y.shape[2])
                 ypk = y[irow_lo:irow_hi, icol_lo:icol_hi, itof_lo:itof_hi]
+                if peak_finding_strategy == "VarianceOverMean":
+                    # perform additional check as in Winter (2018) https://doi.org/10.1107/S2059798317017235
+                    background_cnts = avg_y[tuple(imaxs[ipk])]
+                    ypk_cnts = ycnts[irow_lo:irow_hi, icol_lo:icol_hi, itof_lo:itof_hi]
+                    if not np.any(ypk_cnts > background_cnts + np.sqrt(background_cnts)):
+                        continue  # skip peak
                 # integrate over TOF and select detector with max intensity
                 imax_det = np.argmax(ypk.sum(axis=2))
                 irow_max, icol_max = np.unravel_index(imax_det, ypk.shape[0:-1])
@@ -209,10 +230,13 @@ def PyExec(self):
                     DetectorID=int(peak_data.detids[irow_max, icol_max]),
                 )
                 # set intensity of peak (rough estimate)
-                pk = peaks.getPeak(peaks.getNumberPeaks() - 1)
-                bin_width = xspec[itof + 1] - xspec[itof]
-                pk.setIntensity(peak_conv_intens * bin_width)
-                pk.setSigmaIntensity(sig_conv_intens * bin_width)
+                if peak_finding_strategy == "IOverSigma":
+                    peak_conv_intens = yconv[tuple(imaxs[ipk])]
+                    sig_conv_intens = econv[tuple(imaxs[ipk])]
+                    pk = peaks.getPeak(peaks.getNumberPeaks() - 1)
+                    bin_width = xspec[itof + 1] - xspec[itof]
+                    pk.setIntensity(peak_conv_intens * bin_width)
+                    pk.setSigmaIntensity(sig_conv_intens * bin_width)
 
             # remove dummy peak
             self.exec_child_alg("DeleteTableRows", TableWorkspace=peaks, Rows=[irow_to_del])
@@ -235,6 +259,34 @@ def exec_child_alg(self, alg_name, **kwargs):
             return None
 
 
+def calculate_intens_over_sigma(y, esq, nrows, ncols, nbins):
+    kernel = make_kernel(nrows, ncols, nbins)  # integration shoebox kernel
+    yconv = convolve(y, kernel, mode="valid")
+    econv = np.sqrt(convolve(esq, kernel**2, mode="valid"))
+    with np.errstate(divide="ignore", invalid="ignore"):
+        intens_over_sigma = yconv / econv  # ignore 0/0 which produces NaN (recall NaN > x = False)
+    return intens_over_sigma, yconv, econv, kernel.shape
+
+
+def calculate_variance_over_mean(y, esq, nrows, ncols, nbins):
+    # Evaluate ratio of Variance/mean (should be 1 for Poisson distribution if constant bg)
+    # Peak finding criterion used in DIALS, Winter (2018) https://doi.org/10.1107/S2059798317017235
+    with np.errstate(divide="ignore", invalid="ignore"):
+        # scale to raw counts
+        scale = y / esq
+        scale[~np.isfinite(scale)] = 0
+        ycnts = y * scale
+        # calculate variance = (E[X^2] - E[X]^2)
+        avg_y = uniform_filter(ycnts, size=(nrows, ncols, nbins), mode="nearest")
+        avg_ysq = uniform_filter(ycnts**2, size=(nrows, ncols, nbins), mode="nearest")
+        var_over_mean = (avg_ysq - avg_y**2) / avg_y
+    # crop to valid region
+    var_over_mean = var_over_mean[
+        (nrows - 1) // 2 : -(nrows - 1) // 2, (ncols - 1) // 2 : -(ncols - 1) // 2, (nbins - 1) // 2 : -(nbins - 1) // 2
+    ]
+    return var_over_mean, ycnts, avg_y, (nrows, ncols, nbins)
+
+
 def make_kernel(nrows, ncols, nbins):
     # make a kernel with background subtraction shell of approx. same number of elements as peak region
     kernel_shape, (nrows_bg, ncols_bg, nbins_bg) = get_kernel_shape(nrows, ncols, nbins)

diff --git a/Framework/PythonInterface/test/python/plugins/algorithms/FindSXPeaksConvolveTest.py b/Framework/PythonInterface/test/python/plugins/algorithms/FindSXPeaksConvolveTest.py
@@ -59,15 +59,13 @@ def setUpClass(cls):
     def tearDownClass(cls):
         AnalysisDataService.clear()
 
-    def _assert_found_correct_peaks(self, peak_ws):
+    def _assert_found_correct_peaks(self, peak_ws, integrated=True):
         self.assertEqual(peak_ws.getNumberPeaks(), 1)
-        peak_ws = SortPeaksWorkspace(
-            InputWorkspace=peak_ws, OutputWorkspace=peak_ws.name(), ColumnNameToSortBy="DetID", SortAscending=False
-        )
         pk = peak_ws.getPeak(0)
         self.assertEqual(pk.getDetectorID(), 74)
-        self.assertAlmostEqual(pk.getTOF(), 5.0, delta=1e-8)
-        self.assertAlmostEqual(pk.getIntensityOverSigma(), 7.4826, delta=1e-4)
+        if integrated:
+            self.assertAlmostEqual(pk.getTOF(), 5.0, delta=1e-8)
+            self.assertAlmostEqual(pk.getIntensityOverSigma(), 7.4826, delta=1e-4)
 
     def test_exec_specify_nbins(self):
         out = FindSXPeaksConvolve(
@@ -93,10 +91,23 @@ def test_exec_IoverSigma_threshold(self):
 
     def test_exec_min_frac_size(self):
         out = FindSXPeaksConvolve(
-            InputWorkspace=self.ws, PeaksWorkspace="peaks1", NRows=3, NCols=3, NBins=5, ThresholdIoverSigma=3.0, MinFracSize=0.5
+            InputWorkspace=self.ws, PeaksWorkspace="peaks4", NRows=3, NCols=3, NBins=5, ThresholdIoverSigma=3.0, MinFracSize=0.5
         )
         self.assertEqual(out.getNumberPeaks(), 0)
 
+    def test_exec_VarOverMean(self):
+        out = FindSXPeaksConvolve(
+            InputWorkspace=self.ws,
+            PeaksWorkspace="peaks5",
+            NRows=5,
+            NCols=5,
+            Nbins=3,
+            PeakFindingStrategy="VarianceOverMean",
+            ThresholdVarianceOverMean=3.0,
+            MinFracSize=0.02,
+        )
+        self._assert_found_correct_peaks(out, integrated=False)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Testing/SystemTests/tests/framework/SXDAnalysis.py b/Testing/SystemTests/tests/framework/SXDAnalysis.py
@@ -21,23 +21,22 @@ def cleanup(self):
 
     def runTest(self):
         ws = Load(Filename="SXD23767.raw", LoadMonitors="Exclude")
-        self.peaks = SXD.find_sx_peaks(ws, nstd=6)
+        self.peaks = SXD.find_sx_peaks(ws, ThresholdVarianceOverMean=1.5)
         FindUBUsingFFT(PeaksWorkspace=self.peaks, MinD=1, MaxD=10, Tolerance=0.15)
         SelectCellOfType(PeaksWorkspace=self.peaks, CellType="Cubic", Centering="F", Apply=True)
         OptimizeLatticeForCellType(PeaksWorkspace=self.peaks, CellType="Cubic", Apply=True)
         self.nindexed, *_ = IndexPeaks(PeaksWorkspace=self.peaks, Tolerance=0.1, CommonUBForAll=True)
 
     def validate(self):
-        self.assertEqual(214, self.nindexed)
+        self.assertEqual(171, self.nindexed)
         latt = SXD.retrieve(self.peaks).sample().getOrientedLattice()
-        a, alpha = 5.6541, 90  # published value for NaCl is a=6.6402 but the detector positions haven't been calibrated
+        a, alpha = 5.6533, 90  # published value for NaCl is a=6.6402 but the detector positions haven't been calibrated
         self.assertAlmostEqual(a, latt.a(), delta=1e-5)
         self.assertAlmostEqual(a, latt.b(), delta=1e-5)
         self.assertAlmostEqual(a, latt.c(), delta=1e-5)
         self.assertAlmostEqual(alpha, latt.alpha(), delta=1e-10)
         self.assertAlmostEqual(alpha, latt.beta(), delta=1e-10)
         self.assertAlmostEqual(alpha, latt.gamma(), delta=1e-10)
-        return self.peaks, "SXD23767_found_peaks.nxs"
 
 
 class SXDDetectorCalibration(systemtesting.MantidSystemTest):

diff --git a/Testing/SystemTests/tests/framework/reference/SXD23767_found_peaks_convolve.nxs.md5 b/Testing/SystemTests/tests/framework/reference/SXD23767_found_peaks_convolve.nxs.md5
@@ -0,0 +1 @@
+23bbde7e64f3d3fcb00984fd91e89e62
diff --git a/docs/source/algorithms/FindSXPeaksConvolve-v1.rst b/docs/source/algorithms/FindSXPeaksConvolve-v1.rst
@@ -10,8 +10,18 @@ Description
 -----------
 
 This is an algorithm to find single-crystal Bragg peaks in a :ref:`MatrixWorkspace <MatrixWorkspace>` with detector
-banks of type :ref:`RectangularDetector <RectangularDetector>` (e.g. SXD, TOPAZ) by integrating the data using a
-convolution with a shoebox kernel.
+banks of type :ref:`RectangularDetector <RectangularDetector>` (e.g. SXD, TOPAZ).
+
+There are two peak finding strategies set by ``PeakFindingStrategy``:
+
+a. ``PeakFindingStrategy="IOverSigma"`` - by integrating the data by convolution with a shoebox kernel and looking for
+   regions with statistically significant I/sigma (larger than ``ThresholdIoverSigma``). Note a valid peak would be
+   expected to have intensity/sigma > 3 and stronger peaks will have a larger intensity/sigma.
+
+b. ``PeakFindingStrategy="VarianceOverMean"`` - by looking for regions with ratio of variance/mean larger than
+   ``ThresholdVarianceOverMean`` - note for a poisson distributed counts with a constant count-rate the ratio is
+   expected to be 1. This peak finding criterion taken from DIALS [1]_.
+
 
 The size of the kernel is defined in the input to the algorithm and should match the approximate extent of a typical peak.
 The size on the detector is governed by ``NRows`` and ``NCols`` which are in units of pixels.
@@ -26,15 +36,14 @@ The size of the kernel along the TOF dimension can be specified in one of two wa
 Note to use method 2, back-to-back exponential coefficients must be defined in the Parameters.xml file for the
 instrument.
 
-The integration requires a background shell with negative weights, such that the total kernel size is increased by a
-factor 1.25 along each dimension (such that there are approximately the same number of elements in the kernel and the
-background shell). The integral of the kernel and background shell together is zero.
+The shoebox integration (for ``PeakFindingStrategy="IOverSigma"``) requires a background shell with negative weights,
+such that the total kernel size is increased by a factor 1.25 along each dimension (such that there are approximately
+the same number of elements in the kernel and the background shell). The integral of the kernel and background shell
+together is zero.
 
 The integrated intensity is evaluated by convolution of the signal array with the kernel and the square of the error on
 the integrated intensity is determined by convolution of the squared error array with the squared value of the kernel.
 
-The threshold for peak detection is given by ``ThresholdIoverSigma`` which is the cutoff ratio of intensity/sigma (i.e.
-a valid peak would be expected to have intensity/sigma > 3). Stronger peaks will have a larger intensity/sigma.
 
 Usage
 -----
@@ -56,6 +65,10 @@ Usage
 
     Found 261 peaks
 
+References
+----------
+
+.. [1]  Winter, G., et al.  Acta Crystallographica Section D: Structural Biology 74.2 (2018): 85-97.
 
 .. categories::
 

diff --git a/docs/source/release/v6.10.0/Diffraction/Single_Crystal/New_features/37171.rst b/docs/source/release/v6.10.0/Diffraction/Single_Crystal/New_features/37171.rst
@@ -0,0 +1,2 @@
+- Add option to find peaks using the ratio of variance/ mean in :ref:`FindSXPeaksConvolve <algm-FindSXPeaksConvolve>` - this is a peak finding criterion used in DIALS software Winter, G., et al.  Acta Crystallographica Section D: Structural Biology 74.2 (2018): 85-97.
+- :ref:`FindSXPeaksConvolve <algm-FindSXPeaksConvolve>` is the default peak finding algorithm in the SXD reduction class.
diff --git a/docs/source/techniques/ISIS_SingleCrystalDiffraction_Workflow.rst b/docs/source/techniques/ISIS_SingleCrystalDiffraction_Workflow.rst
@@ -36,8 +36,8 @@ Here is an example using the ``SXD`` class that finds peaks and then removes dup
   ws = Load(Filename='SXD33335.nxs', OutputWorkspace='SXD33335')
 
   # find peaks using SXD static method - determines peak threshold from
-  # the standard deviation of the intensity distribution
-  peaks_ws = SXD.find_sx_peaks(ws, nstd=6)
+  # the ratio of local variance over mean
+  peaks_ws = SXD.find_sx_peaks(ws, ThresholdVarianceOverMean=2.0)
   SXD.remove_duplicate_peaks_by_qlab(peaks_ws, q_tol=0.05)
 
   # find a UB

diff --git a/scripts/Diffraction/single_crystal/sxd.py b/scripts/Diffraction/single_crystal/sxd.py
@@ -336,16 +336,20 @@ def remove_peaks_on_detector_edge(peaks, nedge):
             mantid.DeleteTableRows(TableWorkspace=peaks, Rows=iremove, EnableLogging=False)
 
     @staticmethod
-    def find_sx_peaks(ws, bg=None, nstd=None, lambda_min=0.45, lambda_max=3, nbunch=3, out_peaks=None, **kwargs):
+    def find_sx_peaks(ws, out_peaks=None, **kwargs):
         wsname = SXD.retrieve(ws).name()
-        ws_rb = wsname + "_rb"
-        mantid.Rebunch(InputWorkspace=ws, OutputWorkspace=ws_rb, NBunch=nbunch, EnableLogging=False)
-        SXD.mask_detector_edges(ws_rb)
-        SXD.crop_ws(ws_rb, lambda_min, lambda_max, xunit="Wavelength")
         if out_peaks is None:
-            out_peaks = wsname + "_peaks"  # need to do this so not "_rb_peaks"
-        BaseSX.find_sx_peaks(ws_rb, bg, nstd, out_peaks, **kwargs)
-        mantid.DeleteWorkspace(ws_rb, EnableLogging=False)
+            out_peaks = wsname + "_peaks"
+        default_kwargs = {
+            "NRows": 7,
+            "NCols": 7,
+            "GetNBinsFromBackToBackParams": True,
+            "NFWHM": 6,
+            "PeakFindingStrategy": "VarianceOverMean",
+            "ThresholdVarianceOverMean": 2,
+        }
+        kwargs = {**default_kwargs, **kwargs}  # will overwrite default with provided if duplicate keys
+        mantid.FindSXPeaksConvolve(InputWorkspace=wsname, PeaksWorkspace=out_peaks, **kwargs)
         return out_peaks
 
     @staticmethod