Merge branch 'develop'

taborlab · Mar 23, 2016 · a8feb8b · a8feb8b
2 parents 13e91f3 + 8be6839
commit a8feb8b
Show file tree

Hide file tree

Showing 14 changed files with 1,352 additions and 465 deletions.
diff --git a/FlowCal/__init__.py b/FlowCal/__init__.py
@@ -6,7 +6,7 @@
 # Versions should comply with PEP440.  For a discussion on single-sourcing
 # the version across setup.py and the project code, see
 # https://packaging.python.org/en/latest/single_source_version.html
-__version__ = '1.0.0b2'
+__version__ = '1.0.0'
 
 import io
 import excel_ui

diff --git a/FlowCal/excel_ui.py b/FlowCal/excel_ui.py
diff --git a/FlowCal/io.py b/FlowCal/io.py
@@ -1195,22 +1195,8 @@ def __new__(cls, infile):
         else:
             time_step = None
 
-        # Extract the acquisition date. The FCS standard includes an optional
-        # keyword parameter $DATE in which the acquistion date is stored. In
-        # FCS 2.0, the date is saved as 'dd-mmm-yy', whereas in FCS 3.0 and 3.1
-        # the date is saved as 'dd-mmm-yyyy'.
-        if '$DATE' in fcs_file.text:
-            try:
-                acquisition_date = datetime.datetime.strptime(
-                    fcs_file.text['$DATE'],
-                    '%d-%b-%y')
-            except ValueError:
-                acquisition_date = datetime.datetime.strptime(
-                    fcs_file.text['$DATE'],
-                    '%d-%b-%Y')
-            acquisition_date = acquisition_date.date()
-        else:
-            acquisition_date = None
+        # Extract the acquisition date.
+        acquisition_date = cls._parse_date_string(fcs_file.text.get('$DATE'))
 
         # Extract the times of start and end of acquisition time.
         acquisition_start_time = cls._parse_time_string(
@@ -1287,7 +1273,8 @@ def __new__(cls, infile):
         # The CellQuest Pro software saves the detector voltage in keyword
         # parameters BD$WORD13, BD$WORD14, BD$WORD15... for channels 1, 2,
         # 3...
-        if 'CellQuest Pro' in fcs_file.text.get('CREATOR'):
+        if 'CREATOR' in fcs_file.text and \
+                'CellQuest Pro' in fcs_file.text.get('CREATOR'):
             detector_voltage = [fcs_file.text.get('BD$WORD{}'.format(12 + i))
                                 for i in range(1, num_channels + 1)]
         else:
@@ -1424,6 +1411,59 @@ def _parse_time_string(time_str):
 
         return t
 
+    @staticmethod
+    def _parse_date_string(date_str):
+        """
+        Get a datetime.date object from a string date representation.
+
+        The FCS standard includes an optional keyword parameter $DATE in
+        which the acquistion date is stored. In FCS 2.0, the date is saved
+        as 'dd-mmm-yy', whereas in FCS 3.0 and 3.1 the date is saved as
+        'dd-mmm-yyyy'.
+
+        This function attempts to parse these formats, along with a couple
+        of nonstandard ones, using the datetime module.
+
+        Parameters:
+        -----------
+        date_str : str, or None
+            String representation of date, or None.
+
+        Returns:
+        --------
+        t : datetime.datetime, or None
+            Date parsed from `date_str`. If parsing was not possible,
+            return None. If `date_str` is None, return None
+
+        """
+        # If input is None, return None
+        if date_str is None:
+            return None
+
+        # Standard format for FCS2.0
+        try:
+            return datetime.datetime.strptime(date_str, '%d-%b-%y')
+        except ValueError:
+            pass
+        # Standard format for FCS3.0
+        try:
+            return datetime.datetime.strptime(date_str, '%d-%b-%Y')
+        except ValueError:
+            pass
+        # Nonstandard format 1
+        try:
+            return datetime.datetime.strptime(date_str, '%y-%b-%d')
+        except ValueError:
+            pass
+        # Nonstandard format 2
+        try:
+            return datetime.datetime.strptime(date_str, '%Y-%b-%d')
+        except ValueError:
+            pass
+
+        # If none of these formats work, return None
+        return None
+
 
     def _name_to_index(self, channels):
         """
@@ -1524,10 +1564,10 @@ def __getitem__(self, key):
                     [new_arr._detector_voltage[kc] for kc in key_channel])
                 new_arr._amplifier_gain = tuple(
                     [new_arr._amplifier_gain[kc] for kc in key_channel])
-                new_arr._domain = tuple(
-                    [new_arr._domain[kc] for kc in key_channel])
-                new_arr._hist_bin_edges = tuple(
-                    [new_arr._hist_bin_edges[kc] for kc in key_channel])
+                new_arr._domain = \
+                    [new_arr._domain[kc] for kc in key_channel]
+                new_arr._hist_bin_edges = \
+                    [new_arr._hist_bin_edges[kc] for kc in key_channel]
             elif isinstance(key_channel, slice):
                 new_arr._channels = new_arr._channels[key_channel]
                 new_arr._amplification_type = \
@@ -1549,9 +1589,9 @@ def __getitem__(self, key):
                 new_arr._amplifier_gain = \
                     tuple([new_arr._amplifier_gain[key_channel]])
                 new_arr._domain = \
-                    tuple([new_arr._domain[key_channel]])
+                    [new_arr._domain[key_channel]]
                 new_arr._hist_bin_edges = \
-                    tuple([new_arr._hist_bin_edges[key_channel]])
+                    [new_arr._hist_bin_edges[key_channel]]
 
         elif isinstance(key, tuple) and len(key) == 2 \
             and (key[0] is None or key[1] is None):

diff --git a/FlowCal/mef.py b/FlowCal/mef.py
@@ -217,6 +217,11 @@ def fit_beads_autofluorescence(fl_channel, fl_mef):
     beads_params : array
         Fitted parameters of the bead fluorescence model: ``[m, b,
         fl_mef_auto]``.
+    beads_model_str : str
+        String representation of the beads model used.
+    beads_params_names : list of str
+        Names of the parameters in a list, in the same order as they are
+        given in `beads_params`.
 
     Notes
     -----
@@ -289,8 +294,18 @@ def sc_fun(p,x):
 
     # Standard curve function
     std_crv = lambda x: sc_fun(beads_params, x)
+
+    # Model string representation
+    beads_model_str = 'm*fl_ch + b = log(fl_mef_auto + fl_mef)'
+
+    # Parameter names
+    beads_params_names = ['m', 'b', 'fl_mef_auto']
 
-    return (std_crv, beads_model, beads_params)
+    return (std_crv,
+            beads_model,
+            beads_params,
+            beads_model_str,
+            beads_params_names)
 
 def plot_standard_curve(fl_channel,
                         fl_mef,
@@ -404,6 +419,9 @@ def get_transform_fxn(data_beads,
         general transformation function specified in ``FlowCal.transform``.
     namedtuple, if ``full_output==True``
         ``namedtuple``, containing the following fields in this order:
+        mef_channels : int, or str, or list of int, or list of str
+            Channels on which transformation functions have been generated.
+            Directly copied from the `mef_channels` argument.
         transform_fxn : function
             Transformation function to convert flow cytometry data from
             channel units to MEF. This function has the same basic
@@ -478,14 +496,18 @@ def get_transform_fxn(data_beads,
     fitting_fxn : function, optional
         Function used to fit the beads fluorescence model and obtain a
         standard curve. Must have the following signature: ``std_crv,
-        beads_model, beads_params = fitting_fxn(fl_channel, fl_mef,
-        **fitting_params)``, where `std_crv` is a function implementing the
-        standard curve, `beads_model` is a function implementing the beads
-        fluorescence model, `beads_params` is an array containing the
-        fitted parameters of the beads model, and `fl_channel` and `fl_mef`
-        are the fluorescence values of the beads in channel units and MEF
-        units, respectively. Note that the standard curve and the fitted
-        beads model are not necessarily the same.
+        beads_model, beads_params, beads_model_str, beads_params_names =
+        fitting_fxn(fl_channel, fl_mef, **fitting_params)``, where
+        `std_crv` is a function implementing the standard curve,
+        `beads_model` is a function implementing the beads fluorescence
+        model, `beads_params` is an array containing the fitted parameters
+        of the beads model, `beads_model_str` is a string representation
+        of the beads model used, `beads_params_names` is a list with the
+        parameter names in the same order as they are given in
+        `beads_params`, and `fl_channel` and `fl_mef` are the fluorescence
+        values of the beads in channel units and MEF units, respectively.
+        Note that the standard curve and the fitted beads model are not
+        necessarily the same.
     fitting_params : dict, optional
         Additional keyword parameters to pass to `fitting_fxn`.
 
@@ -618,6 +640,8 @@ def get_transform_fxn(data_beads,
         selected_mef_res = []
         beads_model_res = []
         beads_params_res =[]
+        beads_model_str_res =[]
+        beads_params_names_res =[]
 
     # Iterate through each mef channel
     for mef_channel, mef_values_channel in zip(mef_channels, mef_values):
@@ -717,15 +741,21 @@ def get_transform_fxn(data_beads,
         ###
 
         # Fit
-        std_crv, beads_model, beads_params = fitting_fxn(
-            selected_channel,
-            selected_mef,
-            **fitting_params)
+        fitting_output = fitting_fxn(selected_channel,
+                                     selected_mef,
+                                     **fitting_params)
+        std_crv = fitting_output[0]
+        beads_model = fitting_output[1]
+        beads_params = fitting_output[2]
+        beads_model_str = fitting_output[3]
+        beads_params_names = fitting_output[4]
         # Accumulate results
         std_crv_res.append(std_crv)
         if full_output:
             beads_model_res.append(beads_model)
             beads_params_res.append(beads_params)
+            beads_model_str_res.append(beads_model_str)
+            beads_params_names_res.append(beads_params_names)
 
         # Print information
         if verbose:
@@ -782,15 +812,19 @@ def get_transform_fxn(data_beads,
         fitting_res['std_crv'] = std_crv_res
         fitting_res['beads_model'] = beads_model_res
         fitting_res['beads_params'] = beads_params_res
+        fitting_res['beads_model_str'] = beads_model_str_res
+        fitting_res['beads_params_names'] = beads_params_names_res
 
         # Make namedtuple
-        fields = ['transform_fxn',
+        fields = ['mef_channels',
+                  'transform_fxn',
                   'clustering',
                   'statistic',
                   'selection',
                   'fitting']
         MEFOutput = collections.namedtuple('MEFOutput', fields, verbose=False)
-        out = MEFOutput(transform_fxn=transform_fxn,
+        out = MEFOutput(mef_channels=mef_channels,
+                        transform_fxn=transform_fxn,
                         clustering=clustering_res,
                         statistic=statistic_res,
                         selection=selection_res,

diff --git a/FlowCal/plot.py b/FlowCal/plot.py
@@ -67,7 +67,8 @@ def hist1d(data_list,
            div=1,
            bins=None,
            histtype='stepfilled',
-           normed=False,
+           normed_area=False,
+           normed_height=False,
            xlabel=None,
            ylabel=None,
            xlim=None,
@@ -107,9 +108,15 @@ def hist1d(data_list,
         attempts to extract bins from ``data_list[i].domain``.
     histtype : {'bar', 'barstacked', 'step', 'stepfilled'}, str, optional
         Histogram type. Directly passed to ``plt.hist``.
-    normed : bool, optional
+    normed_area : bool, optional
+        Flag indicating whether to normalize the histogram such that the
+        area under the curve is equal to one. The resulting plot is
+        equivalent to a probability density function.
+    normed_height : bool, optional
         Flag indicating whether to normalize the histogram such that the
-        area under the curve is equal to one.
+        sum of all bins' heights is equal to one. The resulting plot is
+        equivalent to a probability mass function. `normed_height` is
+        ignored if `normed_area` is True.
     savefig : str, optional
         The name of the file to save the figure to. If None, do not save.
 
@@ -195,19 +202,27 @@ def hist1d(data_list,
             # Generate sub-sampled bins
             bins = np.interp(xs, xd, bd)
 
+        # Decide whether to normalize
+        if normed_height:
+            weights = np.ones_like(y)/float(len(y))
+        else:
+            weights = None
+
         # Actually plot
         if bins is not None:
             n, edges, patches = plt.hist(y,
                                          bins,
+                                         weights=weights,
+                                         normed=normed_area,
                                          histtype=histtype,
-                                         normed=normed,
                                          edgecolor=edgecolor[i],
                                          facecolor=facecolor[i],
                                          **kwargs)
         else:
             n, edges, patches = plt.hist(y,
+                                         weights=weights,
+                                         normed=normed_area,
                                          histtype=histtype,
-                                         normed=normed,
                                          edgecolor=edgecolor[i],
                                          facecolor=facecolor[i],
                                          **kwargs)
@@ -229,8 +244,10 @@ def hist1d(data_list,
     if ylabel is not None:
         # Highest priority is user-provided label
         plt.ylabel(ylabel)
-    elif normed:
+    elif normed_area:
         plt.ylabel('Probability')
+    elif normed_height:
+        plt.ylabel('Counts (normalized)')
     else:
         # Default is "Counts"
         plt.ylabel('Counts')
@@ -266,14 +283,15 @@ def hist1d(data_list,
 
 def density2d(data, 
               channels=[0,1],
-              log=False,
               div=1,
               bins=None,
               mode='mesh',
               normed=False,
               smooth=True,
               sigma=10.0,
               colorbar=False,
+              xlog=False,
+              ylog=False,
               xlabel=None,
               ylabel=None,
               xlim=None,
@@ -304,8 +322,6 @@ def density2d(data,
         Flow cytometry data to plot.
     channels : list of int, list of str, optional
         Two channels to use for the plot.
-    log : bool, optional
-        Flag specifying whether the axes should be in log scale.
     div : int or float, optional
         Downscaling factor for the default number of bins. If `bins` is not
         specified, the default set of bins extracted from `data` contains
@@ -333,6 +349,10 @@ def density2d(data,
     ----------------
     sigma : float, optional
         The sigma parameter for the Gaussian kernel to use when smoothing.
+    xlog : bool, optional
+        Flag specifying whether the x axis should be in log scale.
+    ylog : bool, optional
+        Flag specifying whether the y axis should be in log scale.
     xlabel : str, optional
         Label to use on the x axis. If None, attempts to extract channel
         name from `data`.
@@ -434,8 +454,9 @@ def density2d(data,
             cbar.ax.set_ylabel('Counts')
 
     # Make axes log if necessary
-    if log:
+    if xlog:
         plt.gca().set_xscale('log')
+    if ylog:
         plt.gca().set_yscale('log')
 
     # x and y limits