diff --git a/src/interface/set.h b/src/interface/set.h index 956603ad..c9df4fa4 100644 --- a/src/interface/set.h +++ b/src/interface/set.h @@ -663,6 +663,30 @@ namespace CTF { std::sort((dtypePair*)pairs,((dtypePair*)pairs)+n); } + void to_s_of_a(int64_t n, char const * pairs, int64_t * keys, char * vals) const { + Pair const * dpairs = (Pair const *)pairs; + dtype * dvals = (dtype*)vals; +#ifdef _OPENMP + #pragma omp parallel for +#endif + for (int64_t i=0; i * dpairs = (Pair*)pairs; + dtype const * dvals = (dtype const*)vals; +#ifdef _OPENMP + #pragma omp parallel for +#endif + for (int64_t i=0; ito_s_of_a(*npair, cpairs, *global_idx, (char*)*data); if (cpairs != NULL) sr->pair_dealloc(cpairs); } @@ -425,13 +417,14 @@ namespace CTF { int64_t i; char * cpairs = sr->pair_alloc(npair); Pair< dtype > * pairs =(Pair< dtype >*)cpairs; -#ifdef _OPENMP - #pragma omp parallel for -#endif - for (i=0; ito_a_of_s(npair, global_idx, (char*)data, (char*)pairs); +//#ifdef _OPENMP +// #pragma omp parallel for +//#endif +// for (i=0; ipair_alloc(npair); Pair< dtype > * pairs =(Pair< dtype >*)cpairs; -#ifdef _OPENMP - #pragma omp parallel for -#endif - for (i=0; ito_a_of_s(npair, global_idx, (char*)data, (char*)pairs); +//#ifdef _OPENMP +// #pragma omp parallel for +//#endif +// for (i=0; ipair_alloc(npair); CTF_int::PairIterator pairs = CTF_int::PairIterator(sr, cpairs); for (i=0; ipair_alloc(npair); Pair< dtype > * pairs =(Pair< dtype >*)cpairs; - for (i=0; ito_a_of_s(npair, global_idx, (char*)data, (char*)pairs); + //for (i=0; isort(n, ptr); } - void ConstPairIterator::permute(int64_t n, int order, int64_t const * old_lens, int64_t const * new_lda, PairIterator wA){ - ConstPairIterator rA = * this; -#ifdef USE_OMP - #pragma omp parallel for -#endif - for (int64_t i=0; iel_size*(*num_pair)); + sr->to_s_of_a(*num_pair, pairs, *inds, *vals); + } + int64_t tensor::get_tot_size(bool packed=false){ if (!packed){ int64_t tsize = 1; diff --git a/src/tensor/untyped_tensor.h b/src/tensor/untyped_tensor.h index 6a24b6a5..915afdc4 100644 --- a/src/tensor/untyped_tensor.h +++ b/src/tensor/untyped_tensor.h @@ -508,6 +508,18 @@ namespace CTF_int { */ char * read_all_pairs(int64_t * num_pair, bool unpack, bool nonzero_only=false) const; + + /** + * \brief read all pairs with each processor (packed) + * \param[out] num_pair number of values read + * \param[in] unpack whether to read all or unique pairs up to symmetry + * \param[out] inds array of indices of nonzeros or all values + * \param[out] vals array of nonzeros or all values + * \param[in] nonzero_only whether to read only nonzeros + */ + void read_all_pairs(int64_t * num_pair, bool unpack, int64_t ** inds, char ** vals, bool nonzero_only=false) const; + + /** * \brief accumulates out a slice (block) of this tensor = B * B[offsets,ends)=beta*B[offsets,ends) + alpha*A[offsets_A,ends_A) diff --git a/src_python/ctf/chelper.pxd b/src_python/ctf/chelper.pxd index beba8bec..0bc865fc 100644 --- a/src_python/ctf/chelper.pxd +++ b/src_python/ctf/chelper.pxd @@ -1,7 +1,7 @@ from libc.stdint cimport int64_t cdef char* char_arr_py_to_c(a) - cdef int64_t* int64_t_arr_py_to_c(a) - cdef int* int_arr_py_to_c(a) +cdef _cast_carray_as_python(n, char * cdata, dtype) + diff --git a/src_python/ctf/chelper.pyx b/src_python/ctf/chelper.pyx index 4431486f..93b4d5c8 100644 --- a/src_python/ctf/chelper.pyx +++ b/src_python/ctf/chelper.pyx @@ -1,5 +1,11 @@ import numpy as np from libc.stdlib cimport malloc, free +from libc.stdint cimport int64_t +from libcpp cimport bool + +ctypedef double complex complex128_t +ctypedef float complex complex64_t +ctypedef long long iint64_t cdef char* char_arr_py_to_c(a): cdef char * ca @@ -32,4 +38,40 @@ cdef int* int_arr_py_to_c(a): ca[i] = a[i] return ca +cdef _cast_carray_as_python(n, char * cdata, dtype): + if dtype == np.float64: + return np.asarray(cdata) + elif dtype == np.float32: + return np.asarray(cdata) + elif dtype == np.complex64: + return np.asarray(cdata) + elif dtype == np.complex128: + return np.asarray(cdata) + elif dtype == np.int64: + return np.asarray(cdata) + elif dtype == np.int32: + return np.asarray(cdata) + elif dtype == np.bool_: + return np.asarray(cdata) + + else: + print(dtype) + raise ValueError('CTF PYTHON ERROR: bad dtype') + return np.ndarray() + +#WARNING: copy versions below inadequate for by-reference usage of above to write into C++ arrays +#cdef _cast_complex128_array_as_python(n, complex128_t * cdata): +# cdef complex128_t[:] cview = cdata +# data = np.empty(n, dtype=np.cdouble) +# data[:] = cview[:] +# free(cdata) +# return data +# +# +#cdef _cast_int64_array_as_python(n, int64_t * cdata): +# cdef int64_t[:] cview = cdata +# data = np.empty(n, dtype=np.int64) +# data[:] = cview[:] +# return data + diff --git a/src_python/ctf/linalg.pyx b/src_python/ctf/linalg.pyx index edb14ab1..c52b05a3 100644 --- a/src_python/ctf/linalg.pyx +++ b/src_python/ctf/linalg.pyx @@ -1,9 +1,9 @@ from libcpp cimport bool -from tensor cimport ctensor, tensor +from ctf.tensor cimport ctensor, tensor -from helper import * -from chelper cimport * +from ctf.helper import * +from ctf.chelper cimport * from libc.stdlib cimport malloc, free import ctf.profile diff --git a/src_python/ctf/multilinear.pyx b/src_python/ctf/multilinear.pyx index a7811191..2b05094d 100644 --- a/src_python/ctf/multilinear.pyx +++ b/src_python/ctf/multilinear.pyx @@ -1,8 +1,8 @@ import numpy as np -from tensor cimport Tensor, tensor, ctensor +from ctf.tensor cimport Tensor, tensor, ctensor from libcpp cimport bool from libc.stdlib cimport malloc, free -from profile import timer +from ctf.profile import timer cdef extern from "ctf.hpp" namespace "CTF": cdef void TTTP_ "CTF::TTTP"[dtype](Tensor[dtype] * T, int num_ops, int * modes, Tensor[dtype] ** mat_list, bool aux_mode_first) diff --git a/src_python/ctf/partition.pyx b/src_python/ctf/partition.pyx index d126b36f..3594392a 100644 --- a/src_python/ctf/partition.pyx +++ b/src_python/ctf/partition.pyx @@ -1,5 +1,5 @@ from libc.stdlib cimport malloc, free -from chelper cimport * +from ctf.chelper cimport * cdef class partition: """ diff --git a/src_python/ctf/tensor.pxd b/src_python/ctf/tensor.pxd index aa1c76e2..12c7ecbd 100644 --- a/src_python/ctf/tensor.pxd +++ b/src_python/ctf/tensor.pxd @@ -1,8 +1,8 @@ from libc.stdint cimport int64_t from libcpp cimport bool cimport numpy as cnp -from partition cimport Idx_Partition -from world cimport World +from ctf.partition cimport Idx_Partition +from ctf.world cimport World cdef extern from "ctf.hpp" namespace "CTF": cdef cppclass Term: @@ -61,6 +61,7 @@ cdef extern from "ctf.hpp" namespace "CTF_int": void reshape(ctensor * tsr, char * alpha, char * beta) void allread(int64_t * num_pair, char * data, bool unpack) char * read_all_pairs(int64_t * num_pair, bool unpack, bool nonzeros_only) + void read_all_pairs(int64_t * num_pair, bool unpack, int64_t ** inds, char ** vals, bool nonzero_only) const; void slice(int64_t *, int64_t *, char *, ctensor *, int64_t *, int64_t *, char *) int64_t get_tot_size(bool packed) void get_raw_data(char **, int64_t * size) diff --git a/src_python/ctf/tensor.pyx b/src_python/ctf/tensor.pyx index cd85938f..01225bb0 100644 --- a/src_python/ctf/tensor.pyx +++ b/src_python/ctf/tensor.pyx @@ -24,6 +24,7 @@ from ctf.partition cimport idx_partition #from ctf.helper import ctf.helper._ord_comp, ctf.helper.type_index, ctf.helper._rev_array, ctf.helper._get_np_dtype, ctf.helper._get_num_str, ctf.helper._use_align_for_pair from ctf.chelper cimport * +#cdef extern from "numpy/arrayobject.h": #from ctf.tensor_aux import ctf.tensor_aux.astensor, ctf.tensor_aux.transpose, ctf.tensor_aux.power, ctf.tensor_aux.dot, ctf.tensor_aux.reshape, ctf.tensor_aux.zeros, ctf.tensor_aux.conj, ctf.tensor_aux._match_tensor_types, ctf.tensor_aux._div, ctf.tensor_aux._setgetitem_helper, ctf.tensor_aux.trace, ctf.tensor_aux.diagonal, ctf.tensor_aux.take, ctf.tensor_aux.ravel tensorctf.tensor_aux.dot #from ctf.term import itensor #from ctf.world import comm @@ -735,7 +736,9 @@ cdef class tensor: [idx_A, idx_B, idx_C, out_tsr] = tsr._ufunc_interpret(otsr) out_tsr.i(idx_C) << tsr.i(idx_A) - out_tsr.i(idx_C) << -1*otsr.i(idx_B) + iotsr = otsr.i(idx_B) + iotsr.scale(-1) + out_tsr.i(idx_C) << iotsr return out_tsr def __isub__(self, other_in): @@ -747,9 +750,13 @@ cdef class tensor: raise ValueError('CTF PYTHON ERROR: invalid call to __isub__ (-=)') if self.dtype != other.dtype: [tsr, otsr] = ctf.tensor_aux._match_tensor_types(self,other) # solve the bug when np.float64 -= np.int64 - self.i(idx_C) << -1*otsr.i(idx_A) + iotsr = otsr.i(idx_A) + iotsr.scale(-1) + self.i(idx_C) << iotsr else: - self.i(idx_C) << -1*other.i(idx_A) + iotsr = other.i(idx_A) + iotsr.scale(-1) + self.i(idx_C) << iotsr return self def __truediv__(self, other): @@ -1691,17 +1698,9 @@ cdef class tensor: cdef int64_t * cinds cdef char * cdata cdef int64_t n - self.dt.read_local(&n,&cdata,unpack_sym) - inds = np.empty(n, dtype=np.int64) - vals = np.empty(n, dtype=self.dtype) - - cdef cnp.ndarray buf = np.empty(len(inds), dtype=np.dtype([('a','i8'),('b',self.dtype)],align=ctf.helper._use_align_for_pair(self.dtype))) - d = buf.data - buf.data = cdata - vals[:] = buf['b'][:] - inds[:] = buf['a'][:] - buf.data = d - delete_pairs(self.dt, cdata) + self.dt.read_local(&n,&cinds,&cdata,unpack_sym) + inds = _cast_carray_as_python(n, cinds, np.int64) + vals = _cast_carray_as_python(n, cdata, self.dtype) return inds, vals def dot(self, other, out=None): @@ -1793,16 +1792,9 @@ cdef class tensor: cdef int64_t * cinds cdef char * cdata cdef int64_t n - self.dt.read_local_nnz(&n,&cdata,unpack_sym) - inds = np.empty(n, dtype=np.int64) - vals = np.empty(n, dtype=self.dtype) - cdef cnp.ndarray buf = np.empty(len(inds), dtype=np.dtype([('a','i8'),('b',self.dtype)],align=ctf.helper._use_align_for_pair(self.dtype))) - d = buf.data - buf.data = cdata - vals[:] = buf['b'][:] - inds[:] = buf['a'][:] - buf.data = d - delete_arr(self.dt, cdata) + self.dt.read_local_nnz(&n,&cinds,&cdata,unpack_sym) + inds = _cast_carray_as_python(n, cinds, np.int64) + vals = _cast_carray_as_python(n, cdata, self.dtype) return inds, vals def tot_size(self, unpack=True): @@ -1839,17 +1831,11 @@ cdef class tensor: return cvals = malloc(sz*tB) self.dt.allread(&sz, cvals, unpack) - cdef cnp.ndarray buf = np.empty(sz, dtype=self.dtype) - odata = buf.data - buf.data = cvals + vals = _cast_carray_as_python(sz, cvals, self.dtype) if arr is None: - sbuf = np.asarray(buf) - free(odata) - return buf + return vals else: - arr[:] = buf[:] - free(cvals) - buf.data = odata + arr[:] = vals[:] def read_all_nnz(self, unpack=True): """ @@ -1869,16 +1855,9 @@ cdef class tensor: cdef int64_t * cinds cdef char * cdata cdef int64_t n - cdata = self.dt.read_all_pairs(&n, unpack, True) - inds = np.empty(n, dtype=np.int64) - vals = np.empty(n, dtype=self.dtype) - cdef cnp.ndarray buf = np.empty(len(inds), dtype=np.dtype([('a','i8'),('b',self.dtype)],align=ctf.helper._use_align_for_pair(self.dtype))) - d = buf.data - buf.data = cdata - vals[:] = buf['b'][:] - inds[:] = buf['a'][:] - buf.data = d - delete_arr(self.dt, cdata) + self.dt.read_all_pairs(&n, unpack, &cinds, &cdata, True) + inds = _cast_carray_as_python(n, cinds, np.int64) + vals = _cast_carray_as_python(n, cdata, self.dtype) return inds, vals def __read_all(self, arr): @@ -1893,11 +1872,8 @@ cdef class tensor: sz = self.dt.get_tot_size(False) tB = arr.dtype.itemsize self.dt.get_raw_data(&cvals, &sz) - cdef cnp.ndarray buf = np.empty(sz, dtype=self.dtype) - odata = buf.data - buf.data = cvals - arr[:] = buf[:] - buf.data = odata + vals = _cast_carray_as_python(sz, cvals, self.dtype) + arr[:] = vals[:] def __write_all(self, arr): """ @@ -1911,12 +1887,9 @@ cdef class tensor: sz = self.dt.get_tot_size(False) tB = arr.dtype.itemsize self.dt.get_raw_data(&cvals, &sz) - cdef cnp.ndarray buf = np.empty(sz, dtype=self.dtype) - odata = buf.data - buf.data = cvals + vals = _cast_carray_as_python(sz, cvals, self.dtype) rarr = arr.ravel() - buf[:] = rarr[:] - buf.data = odata + vals[:] = rarr[:] def conj(self): """ diff --git a/src_python/ctf/tensor_aux.pyx b/src_python/ctf/tensor_aux.pyx index ca9c003e..4d3b3be6 100644 --- a/src_python/ctf/tensor_aux.pyx +++ b/src_python/ctf/tensor_aux.pyx @@ -14,6 +14,7 @@ from ctf.chelper cimport * from ctf.tensor cimport tensor, ctensor from ctf.profile import timer from ctf.linalg import svd +from ctf.term cimport itensor cdef extern from "../ctf_ext.h" namespace "CTF_int": cdef void pow_helper[dtype](ctensor * A, ctensor * B, ctensor * C, char * idx_A, char * idx_B, char * idx_C); @@ -514,9 +515,6 @@ def take(init_A, indices, axis=None, out=None, mode='raise'): if axis > len(A.shape): raise IndexError("CTF PYTHON ERROR: axis out of bounds") if indices.shape == () or indices.shape== (1,): - total_size = 1 - for i in range(len(A.shape)): - total_size *= A[i] if indices >= A.shape[axis]: raise IndexError("CTF PYTHON ERROR: index out of bounds") ret_shape = list(A.shape) @@ -542,9 +540,6 @@ def take(init_A, indices, axis=None, out=None, mode='raise'): else: if len(indices.shape) > 1: raise ValueError("CTF PYTHON ERROR: current ctf does not support when specify axis and the len(indices.shape) > 1") - total_size = 1 - for i in range(len(A.shape)): - total_size *= A[i] for i in range(len(indices)): if indices[i] >= A.shape[axis]: raise IndexError("index out of bounds") @@ -2479,7 +2474,9 @@ def einsum(subscripts, *operands, out=None, dtype=None, order='K', casting='safe operand = new_operands[0].i(inds[0]) for i in range(1,numop): operand = operand * new_operands[i].i(inds[i]) - out_scale*output.i(out_inds) << operand + itsr = output.i(out_inds) + itsr.scale(out_scale) + itsr << operand if out is None: if len(out_inds) == 0: output = output.item() @@ -2752,5 +2749,3 @@ def _comp_all(tensor A, axis=None, out=None, keepdims=None): if axis is None: x = A._bool_sum() return x == A.tot_size() - - diff --git a/test/python/test_sparse.py b/test/python/test_sparse.py index c8bf9bf6..3a74c9e2 100644 --- a/test/python/test_sparse.py +++ b/test/python/test_sparse.py @@ -51,8 +51,8 @@ def test_scaled_expression(self): b_np = ctf.to_nparray(b_dn) c_np = ctf.to_nparray(c_dn) - c_sp.i("ijk") << 2.3*a_sp.i("ijl")*b_sp.i("kjl") + 7*c_sp.i("ijk") - a_sp.i("ijk") - 1. * a_sp.i("ijk") - 2 * b_sp.i("ijk") - c_dn.i("ijk") << 2.3*a_dn.i("ijl")*b_dn.i("kjl") + 7*c_dn.i("ijk") - a_dn.i("ijk") - 1. * a_dn.i("ijk") - 2 * b_dn.i("ijk") + c_sp.i("ijk") << a_sp.i("ijl")*b_sp.i("kjl")*2.3 + c_sp.i("ijk")*7 - a_sp.i("ijk") - a_sp.i("ijk") - b_sp.i("ijk")*2 + c_dn.i("ijk") << a_dn.i("ijl")*b_dn.i("kjl")*2.3 + c_dn.i("ijk")*7 - a_dn.i("ijk") - a_dn.i("ijk") - b_dn.i("ijk")*2 c_np += 2.3*np.einsum("ijl,kjl->ijk",a_np,b_np) + 7*c_np - a_np - 1. * a_np - 2 * b_np self.assertTrue(allclose(c_np,c_dn)) self.assertTrue(allclose(c_np,c_sp)) @@ -62,7 +62,11 @@ def test_complex(self): b0 = np.arange(27.).reshape(3,3,3) a1 = ctf.astensor(a0) b1 = ctf.astensor(b0) - .2*b1.i("ijk") << .7*a1.i("kij") + b1i = b1.i("ijk") + b1i.scale(.2) + a1i = b1.i("kij") + a1i.scale(.7) + b1i << a1i b0 = .2*b0 + .7*a0.transpose([1,2,0]) self.assertTrue(allclose(b0,b1))