Merge branch 'master' of https://github.com/gb119/Stoner-PythonCode

stonerlab · May 15, 2017 · a1a0972 · a1a0972
2 parents 14e587e + cc6208a
commit a1a0972
Show file tree

Hide file tree

Showing 21 changed files with 728 additions and 427 deletions.
diff --git a/Stoner/Analysis.py b/Stoner/Analysis.py
@@ -325,33 +325,38 @@ def __get_math_val(self, col):
             raise RuntimeError("Bad column index: {}".format(col))
         return data, name
 
-    def __poly_outlier(self, row, column, window, metric, xcol=None, order=1):
+    def _poly_outlier(self, row, column, window, metric=3.0, xcol=None, order=1,yerr=None):
         """Alternative outlier detection function that fits a polynomial locally over the window.
 
         Args:
             row (1D array): Current row of data
             column int): Column index of y values to examine
             window (2D array): Local window of data
-            metric (float): Some measure of how sensitive the dection should be
 
         Keyyword Arguments:
+            metric (float): Some measure of how sensitive the dection should be
             xcol (column index): Column of data to use for X values. Defaults to current setas value
             order (int): Order of polynomial to fit. Must be < length of window-1
 
         Returns:
             True if current row is an outlier
         """
-        if order > window.shape[0] - 1:
+        if order > window.shape[0] - 2:
             raise ValueError("order should be smaller than the window length.")
-        if xcol is None:
-            xcol = self.setas._get_cols("xcol")
-        else:
-            xcol = self.find_col(xcol)
+        _=self._col_args(xcol=xcol,ycol=column,yerr=yerr)
 
-        popt, pcov = _np_.polyfit(window[:, xcol], window[:, column], deg=order, cov=True)
-        pval = _np_.polyval(popt, row[xcol])
+
+        x=window[:,_.xcol]-row[_.xcol]
+        y=window[:,_.ycol]
+        if _.yerr is not None and len(_.yerr)>0:
+            w=1.0/window[:,_.yerr]
+        else:
+            w=None
+
+        popt, pcov = _np_.polyfit(x,y, w=w,deg=order, cov=True)
+        pval = _np_.polyval(popt, 0.0)
         perr = _np_.sqrt(_np_.diag(pcov))[-1]
-        return abs(row[column] - pval) > metric * perr
+        return abs(pval-row[_.ycol]) > metric * perr
 
     def _get_curve_fit_data(self,xcol,ycol,bounds,sigma):
         """"Gather up the xdata and sigma columns for curve_fit."""
@@ -543,7 +548,7 @@ def add(self, a, b, replace=False, header=None):
         additional column with the uncertainites will be added to the data.
         """
         a = self.find_col(a)
-        if isinstance(a, tuple) and isinstance(b, tuple) and len(a) == 2 and len(b) == 2:  #Error columns on
+        if isinstance(a, (tuple,list)) and isinstance(b, (tuple,list)) and len(a) == 2 and len(b) == 2:  #Error columns on
             (a, e1) = a
             (b, e2) = b
             e1data = self.__get_math_val(e1)[0]
@@ -890,7 +895,8 @@ def diffsum(self, a, b, replace=False, header=None):
         the second element an uncertainty in the value. The uncertainties will then be propagated and an
         additional column with the uncertainites will be added to the data.
         """
-        if isinstance(a, tuple) and isinstance(b, tuple) and len(a) == 2 and len(b) == 2:  #Error columns on
+        a = self.find_col(a)
+        if isinstance(a, (list,tuple)) and isinstance(b, (list,tuple)) and len(a) == 2 and len(b) == 2:  #Error columns on
             (a, e1) = a
             (b, e2) = b
             e1data = self.__get_math_val(e1)[0]
@@ -935,7 +941,8 @@ def divide(self, a, b, replace=False, header=None):
         the second element an uncertainty in the value. The uncertainties will then be propagated and an
         additional column with the uncertainites will be added to the data.
         """
-        if isinstance(a, tuple) and isinstance(b, tuple) and len(a) == 2 and len(b) == 2:  #Error columns on
+        a = self.find_col(a)
+        if isinstance(a, (list,tuple)) and isinstance(b, (list,tuple)) and len(a) == 2 and len(b) == 2:  #Error columns on
             (a, e1) = a
             (b, e2) = b
             e1data = self.__get_math_val(e1)[0]
@@ -1488,7 +1495,7 @@ def multiply(self, a, b, replace=False, header=None):
         additional column with the uncertainites will be added to the data.
         """
         a = self.find_col(a)
-        if isinstance(a, tuple) and isinstance(b, tuple) and len(a) == 2 and len(b) == 2:  #Error columns on
+        if isinstance(a, (list,tuple)) and isinstance(b, (list,tuple)) and len(a) == 2 and len(b) == 2:  #Error columns on
             (a, e1) = a
             (b, e2) = b
             e1data = self.__get_math_val(e1)[0]
@@ -1603,7 +1610,7 @@ def action(i,column,row):
         index = []
         column = self.find_col(column)  #going to be easier if this is an integer later on
         for i, t in enumerate(self.rolling_window(window, wrap=False, exclude_centre=width)):
-            if func(self.data[i], column, t, certainty, **kargs):
+            if func(self.data[i], column, t, metric=certainty, **kargs):
                 index.append(i)
         self['outliers'] = index  #add outlier indecies to metadata
         index.reverse()  #Always reverse the index in case we're deleting rows in sucession
@@ -2207,7 +2214,8 @@ def subtract(self, a, b, replace=False, header=None):
         the second element an uncertainty in the value. The uncertainties will then be propagated and an
         additional column with the uncertainites will be added to the data.
         """
-        if isinstance(a, tuple) and isinstance(b, tuple) and len(a) == 2 and len(b) == 2:  #Error columns on
+        a = self.find_col(a)
+        if isinstance(a, (list,tuple)) and isinstance(b, (list,tuple)) and len(a) == 2 and len(b) == 2:  #Error columns on
             (a, e1) = a
             (b, e2) = b
             e1data = self.__get_math_val(e1)[0]
@@ -2229,7 +2237,7 @@ def subtract(self, a, b, replace=False, header=None):
         self.add_column((adata - bdata), header=header, index=a, replace=replace)
         if err_calc is not None:
             a = self.find_col(a)
-            self.add_column(err_data, header=err_header, index=a, replace=False)
+            self.add_column(err_data, header=err_header, index=a+1, replace=False)
         return self
 
     def threshold(self, threshold, **kargs):

diff --git a/Stoner/Core.py b/Stoner/Core.py
@@ -119,7 +119,8 @@ def _size(self):
 
     @property
     def clone(self):
-        new = _setas()
+        cls=self.__class__
+        new = cls()
         for attr in self.__dict__:
             if not callable(self.__dict__[attr]):
                 new.__dict__[attr] = copy.deepcopy(self.__dict__[attr])
@@ -261,6 +262,7 @@ def __call__(self, *args, **kargs):
                 value.extend(list("." * (len(self.column_headers) - len(value))))
             if len(self.setas)<self._size:
                 self.setas.extend("."*(self._size-len(self.setas)))
+            value=value[:len(self.setas)]
             for i, v in enumerate(list(value)):
                 if v.lower() not in "xyzedfuvw.-":
                     raise ValueError("Set as column element is invalid: {}".format(v))
@@ -531,6 +533,8 @@ class _evaluatable(object):
 
 class regexpDict(sorteddict):
     """An ordered dictionary that permits looks up by regular expression."""
+    allowed_keys=(object,)
+
     def __init__(self,*args,**kargs):
         super(regexpDict,self).__init__(*args,**kargs)
 
@@ -588,7 +592,7 @@ def __setitem__(self,name,value):
         try:
             key=self.__lookup__(name,exact=True)
         except KeyError:
-            if not isinstance(name,string_types):
+            if not isinstance(name,self.allowed_keys):
                 raise KeyError("{} is not a match to any key.".format(name))
             key=name
         super(regexpDict,self).__setitem__(key, value)
@@ -638,6 +642,8 @@ class typeHintedDict(regexpDict):
         Rather than subclassing a plain dict, this is a subclass of a :py:class:`blist.sorteddict` which stores the entries in a binary list structure.
         This makes accessing the keys much faster and also ensures that keys are always returned in alphabetical order.
     """
+    allowed_keys=string_types
+    #Force metadata keys to be strings
     _typehints = sorteddict()
 
     __regexGetType = re.compile(r'([^\{]*)\{([^\}]*)\}')
@@ -910,7 +916,8 @@ def copy(self):
         Returns:
             A copy of the current typeHintedDict
         """
-        ret = typeHintedDict()
+        cls=self.__class__
+        ret = cls()
         for k in self.keys():
             t = self._typehints[k]
             nk = k + "{" + t + "}"
@@ -2125,6 +2132,18 @@ def __contains__(self, item):
             bool: True if item in self.metadata"""
         return item in self.metadata
 
+    def __deepcopy__(self, memo):
+        """Provides support for copy.deepcopy to work."""
+        cls = self.__class__
+        result = cls.__new__(cls)
+        memo[id(self)] = result
+        for k, v in self.__dict__.items():
+            try:
+                setattr(result, k, copy.deepcopy(v, memo))
+            except Exception:
+                setattr(result, k, copy.copy(v))                
+        return result
+
     def __delitem__(self, item):
         """Implements row or metadata deletion.
 
@@ -2303,7 +2322,7 @@ def __getitem__(self, name):
         return ret
 
     def __getstate__(self):
-        return {"data": self.data, "column_headers": self.column_headers, "metadata": self.metadata}
+        return {"data": self.data, "column_headers": self.column_headers, "metadata": self.metadata,"filename":self.filename}
 
 
     def __iter__(self):
@@ -2516,7 +2535,8 @@ def __read_iterable(self, reader):
         self["TDI Format"]=fmt
 
     def __reduce_ex__(self, p):
-        return (DataFile, (), self.__getstate__())
+        cls=self.__class__
+        return (cls, (), self.__getstate__())
 
     def __regexp_meta__(self, test):
         """Do a regular expression search for all meta data items.
@@ -2745,6 +2765,15 @@ def _col_args(self,scalar=True,xcol=None,ycol=None,zcol=None,ucol=None,vcol=None
                         ret[c]=ret[c][0]
                     else:
                         ret[c]=None
+        elif isinstance(scalar,bool) and not scalar:
+            for c in ret:
+                if c.startswith("x") or c.startswith("has_"):
+                    continue
+                if not isinstance(ret[c],Iterable) and ret[c] is not None:
+                    ret[c]=list([ret[c]])
+                elif ret[c] is None:
+                    ret[c]=[]
+
         return ret
 
     def _pop_mask(self):
@@ -2768,7 +2797,7 @@ def _raise_type_error(self,k):
 
 #   PUBLIC METHODS
 
-    def add_column(self, column_data, header=None, index=None, func_args=None, replace=False):
+    def add_column(self, column_data, header=None, index=None, func_args=None, replace=False,setas=None):
         """Appends a column of data or inserts a column to a datafile instance.
 
         Args:
@@ -2781,6 +2810,7 @@ def add_column(self, column_data, header=None, index=None, func_args=None, repla
             func_args (dict): If column_data is a callable object, then this argument
                 can be used to supply a dictionary of function arguments to the callable object.
             replace (bool): Replace the data or insert the data (default)
+            setas (str): Set the type of column (x,y,z data etc - see :py:attr:`Stoner.Core.DataFile.setas`)
 
         Returns:
             self: The :py:class:`DataFile` instance with the additonal column inserted.
@@ -2797,7 +2827,14 @@ def add_column(self, column_data, header=None, index=None, func_args=None, repla
             index = self.find_col(index)
             if header is None:
                 header = self.column_headers[index]
-
+
+        if isinstance(setas,str) and len(setas)==1 and setas in "xyzdefuvw.-":
+            pass
+        elif setas is not None:
+            raise TypeError("setas parameter should be a single letter in the set xyzdefuvw.-, not {}".format(setas))
+        else:
+            setas="."
+
         if isinstance(column_data, list):
             column_data = _np_.array(column_data)
 
@@ -2830,16 +2867,24 @@ def add_column(self, column_data, header=None, index=None, func_args=None, repla
             _np__data = _np_.append(_np__data, _np_.zeros(dr - cl))
         if replace:
             self.data[:, index] = _np__data
+            if setas!="-":
+                self.setas[index]=setas
+
         else:
             if dc * dr == 0:
                 self.data = DataArray(_np_.transpose(_np_.atleast_2d(_np__data)),setas=self.data._setas)
                 self.column_headers=[header,]
+                self.setas=setas
+
             else:
                 columns=copy.copy(self.column_headers)
-                setas=list(self.setas)
-                setas.insert(index,".")
+                old_setas=list(self.setas)
+                if setas!="-":
+                    old_setas.insert(index,setas)
+                else:
+                    old_setas.insert(index,old_setas[index])
                 self.data = DataArray(_np_.insert(self.data, index, _np__data, 1))
-                self.setas(setas)
+                self.setas(old_setas)
                 columns.insert(index,header)
                 self.column_headers=columns
         return self