Merge branch 'master' into node_awareness

cyclops-community · Nov 9, 2023 · 917bcb8 · 917bcb8
2 parents f04f035 + 20e7853
commit 917bcb8
Show file tree

Hide file tree

Showing 40 changed files with 765 additions and 549 deletions.
diff --git a/.github/workflows/autotest.yml b/.github/workflows/autotest.yml
@@ -0,0 +1,130 @@
+# Based on Nick Naso's cpp.yml workflow, https://gist.github.com/NickNaso/0d478f1481686d5bcc868cac06620a60
+
+on: 
+  push:
+  pull_request:
+  release:
+
+jobs:
+  build_cpp:
+    name: ${{ matrix.config.name }}
+    runs-on: ${{ matrix.config.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        config: 
+        - {
+            name: "Ubuntu_Latest_GCC",
+            os: ubuntu-latest,
+            artifact: "ubuntu_gcc.7z",
+            build_type: "Release",
+            cc: "gcc",
+            cxx: "g++",
+            archiver: "7z a",
+          }
+        - {
+            name: "Ubuntu_GCC_9",
+            os: ubuntu-latest,
+            artifact: "ubuntu_gcc9.7z",
+            build_type: "Release",
+            cc: "gcc",
+            cxx: "g++",
+            archiver: "7z a",
+          }
+        - {
+            name: "macOS Latest Clang",
+            os: macos-latest,
+            artifact: "macos_clang.7z",
+            build_type: "Release",
+            cc: "clang",
+            cxx: "clang++",
+            archiver: "7za a",
+          }
+        build_scalapack: [True, False]
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Print env
+        run: |
+          echo github.event.action: ${{ github.event.action }}
+          echo github.event_name: ${{ github.event_name }}
+          echo matrix.config.build_scalapack
+          echo ${{ matrix.config.build_scalapack }}
+
+      - name: Install dependencies on ubuntu
+        if: startsWith(matrix.config.name, 'Ubuntu')
+        run: |
+          sudo apt-get update;
+          sudo apt install -y g++ gfortran git make libblas-dev liblapack-dev mpich
+
+      - name: Install dependencies on macos
+        if: startsWith(matrix.config.os, 'macos')
+        run: |
+          brew install mpich
+
+      - name: Configure Cyclops and Build Scalapack
+        if: matrix.config.build_scalapack
+        shell: bash
+        run:
+            ./configure CXXFLAGS="-O0" --build-scalapack
+
+      - name: Configure Cyclops without Scalapack
+        if: ( ! matrix.config.build_scalapack )
+        shell: bash
+        run:
+            ./configure CXXFLAGS="-O0"
+
+
+      - name: Build Cyclops
+        shell: bash
+        run:
+          make -j4
+
+      - name: Build Tests and Test Cyclops C++
+        run:
+          make test
+
+      - name: Test Cyclops C++ with 2 MPI processes
+        if: startsWith(matrix.config.name, 'Ubuntu')
+        shell: bash
+        run:
+          export OMP_NUM_THREADS=1;
+          export MPIR_CVAR_DEVICE_COLLECTIVES=none;
+          make test2
+
+
+      - name: Build Python Install dependencies on ubuntu
+        if: startsWith(matrix.config.name, 'Ubuntu')
+        run: |
+          sudo apt install -y python3-dev virtualenv;
+          mkdir envs
+
+      - name: Build Python Install dependencies on macos
+        if: startsWith(matrix.config.os, 'macos')
+        run: |
+          brew install virtualenv
+
+      - name: Create Python virtual environment and install dependencies via pip
+        run: |
+          virtualenv -p python3 ./envs/py3env;
+          source ./envs/py3env/bin/activate;
+          pip install numpy cython setuptools
+
+      - name: Build Python library
+        run:
+          source ./envs/py3env/bin/activate;
+          make python
+
+      - name: Test Cyclops Python
+        run:
+          source ./envs/py3env/bin/activate;
+          make python_test
+
+      - name: Test Cyclops Python with 2 MPI processes
+        if: startsWith(matrix.config.name, 'Ubuntu')
+        shell: bash
+        run:
+          source ./envs/py3env/bin/activate;
+          export OMP_NUM_THREADS=1;
+          export MPIR_CVAR_DEVICE_COLLECTIVES=none;
+          make python_test2
diff --git a/.travis.yml b/.travis.yml
diff --git a/Makefile b/Makefile
@@ -130,7 +130,7 @@ pip: $(BDIR)/setup.py $(BDIR)/lib_shared/libctf.so $(PYTHON_SRC_FILES)
 	cd src_python; \
 	ln -sf $(BDIR)/setup.py setup.py; \
 	mkdir -p $(BDIR)/lib_python/ctf && cp ctf/__init__.py $(BDIR)/lib_python/ctf/; \
-	pip install --force -b $(BDIR)/lib_python/ . --upgrade; \
+	pip install --force  . --upgrade; \
 	rm setup.py; \
 	cd ..;
 

diff --git a/README.md b/README.md
@@ -1,5 +1,4 @@
 ## Cyclops Tensor Framework (CTF)
-[<img src="https://travis-ci.org/cyclops-community/ctf.svg?branch=master">](https://travis-ci.org/cyclops-community/ctf)
 
 Cyclops is a parallel (distributed-memory) numerical library for multidimensional arrays (tensors) in C++ and Python.
 
@@ -27,6 +26,12 @@ First, its necessary to run the configure script, which can be set to the approp
 ```
 then execute ./configure with the appropriate options. Successful execution of this script, will generate a `config.mk` file and a `setup.py` file, needed for C++ and Python builds, respectively, as well as a how-did-i-configure file with info on how the build was configured. You may modify the `config.mk` and `setup.py` files thereafter, subsequent executions of configure will prompt to overwrite these files.
 
+Note: there is a (now-fixed) [bug](https://github.com/pmodels/mpich/pull/6543) in recent versions of MPICH that causes a segmentation fault in CTF when executing with 2 or more processors.
+The bug can be remedied without rebuilding CTF by setting an environment variable as follows,
+```sh
+export MPIR_CVAR_DEVICE_COLLECTIVES=none
+```
+
 ### Dependencies and Supplemental Packages
 
 The strict library dependencies of Cyclops are MPI and BLAS libraries.

diff --git a/bench/model_trainer.cxx b/bench/model_trainer.cxx
@@ -23,6 +23,7 @@ void train_off_vec_mat(int64_t n, int64_t m, World & dw, bool sp_A, bool sp_B, b
 void train_ttm(int64_t sz, int64_t r, World & dw){
   Timer TTM("TTM");
   TTM.start();
+  srand48(dw.rank);
   for (int order=2; order<7; order++){
     int64_t n = 1;
     while (std::pow(n,order) < sz){
@@ -87,6 +88,7 @@ void train_ttm(int64_t sz, int64_t r, World & dw){
 void train_sparse_mttkrp(int64_t sz, int64_t R, World & dw){
   Timer sMTTKRP("sMTTKRP");
   sMTTKRP.start();
+  srand48(dw.rank);
   for (double sp = .1; sp>.000001; sp*=.25){
     int64_t n = (int64_t)cbrt(sz/sp);
     int64_t lens[3] = {n, n, n};
@@ -223,6 +225,7 @@ void train_sps_vec_mat(int64_t n, int64_t m, World & dw, bool sp_A, bool sp_B, b
 void train_ccsd(int64_t n, int64_t m, World & dw){
   Timer ccsd_t("CCSD");
   ccsd_t.start();
+  srand48(dw.rank);
   int nv = sqrt(n);
   int no = sqrt(m);
   Integrals V(no, nv, dw);
@@ -271,18 +274,15 @@ void train_world(double dtime, World & dw, double step_size){
       }
       train_sparse_mttkrp(n*m/8, m, dw);
       train_dns_vec_mat(n, m, dw);
-      train_sps_vec_mat(n-2, m, dw, 0, 0, 0);
-      train_sps_vec_mat(n-4, m-2, dw, 1, 0, 0);
-      train_sps_vec_mat(n-1, m-4, dw, 1, 1, 0);
-      train_sps_vec_mat(n-2, m-3, dw, 1, 1, 1);
-      train_off_vec_mat(n+7, m-4, dw, 0, 0, 0);
-      train_off_vec_mat(n-2, m+6, dw, 1, 0, 0);
-      train_off_vec_mat(n-5, m+2, dw, 1, 1, 0);
-      train_off_vec_mat(n-3, m-1, dw, 1, 1, 1);
-      train_ccsd(n/2, m/2, dw);
+      train_sps_vec_mat(n, m, dw, 0, 0, 0);
+      train_sps_vec_mat(n, m, dw, 0, 0, 1);
+      train_off_vec_mat(n, m, dw, 0, 0, 0);
+      train_off_vec_mat(n, m, dw, 1, 0, 0);
+      train_off_vec_mat(n, m, dw, 1, 1, 0);
+      train_off_vec_mat(n, m, dw, 1, 1, 1);
+      train_ccsd(n, m, dw);
       train_sparse_mp3(n,m,dw);
       niter++;
-      // m *= 1.9;
       m *= step_size;
       n += 2;
       ctime = MPI_Wtime() - t_st;
@@ -310,7 +310,8 @@ void frize(std::set<int> & ps, int p){
   }
 }
 
-void train_all(double time, bool write_coeff, bool dump_data, std::string coeff_file, std::string data_dir){
+void train_all(double time, bool write_coeff, bool dump_data, std::string coeff_file, std::string data_dir,
+               int num_iterations, double time_jump, int verbose){
   World dw(MPI_COMM_WORLD);
   int np = dw.np;
   int rank;
@@ -333,13 +334,6 @@ void train_all(double time, bool write_coeff, bool dump_data, std::string coeff_
   MPI_Comm_split(dw.comm, color, key, &cm);
   World w(cm);
 
-  // number of iterations for training
-  int num_iterations = 5;
-
-  // control how much dtime should be increased upon each iteration
-  // dtime = dtime * time_dump at the end of each iteration
-  double time_jump = 1.5;
-
   double dtime = (time / (1- 1/time_jump)) / pow(time_jump, num_iterations - 1.0);
   for (int i=0; i<num_iterations; i++){
     // TODO probably need to adjust
@@ -351,35 +345,35 @@ void train_all(double time, bool write_coeff, bool dump_data, std::string coeff_
     if (color != end_color){
       train_world(dtime/5, w, step_size);
       CTF_int::update_all_models(cm);
-      if (rank == 0){
+      if (rank == 0 && verbose == 1){
         printf("Completed training round 1/5\n");
       }
     }
 
     if (color != end_color)
       train_world(dtime/5, w, step_size);
     CTF_int::update_all_models(MPI_COMM_WORLD);
-    if (rank == 0){
+    if (rank == 0 && verbose == 1){
       printf("Completed training round 2/5\n");
     }
     if (color != end_color){
       train_world(dtime/5, w, step_size);
       CTF_int::update_all_models(cm);
-      if (rank == 0){
+      if (rank == 0 && verbose == 1){
         printf("Completed training round 3/5\n");
       }
     }
 
     if (color != end_color)
       train_world(dtime/5, w, step_size);
     CTF_int::update_all_models(MPI_COMM_WORLD);
-    if (rank == 0){
+    if (rank == 0 && verbose == 1){
       printf("Completed training round 4/5\n");
     }
     train_world(dtime/5, dw, step_size);
     CTF_int::update_all_models(MPI_COMM_WORLD);
 
-    if (rank == 0){
+    if (rank == 0 && verbose == 1){
       printf("Completed training round 5/5\n");
     }
     // double dtime for next iteration
@@ -409,8 +403,8 @@ char* getCmdOption(char ** begin,
 
 
 int main(int argc, char ** argv){
-  int rank, np;
-  double time;
+  int rank, np, num_iterations, verbose;
+  double time, time_jump;
   char * file_path;
   int const in_num = argc;
   char ** input_str = argv;
@@ -428,6 +422,21 @@ int main(int argc, char ** argv){
     if (time < 0) time = 5.0;
   } else time = 5.0;
 
+ if (getCmdOption(input_str, input_str+in_num, "-verbose")){
+   verbose = atoi(getCmdOption(input_str, input_str+in_num, "-verbose"));
+  if (verbose < 0 || verbose > 1) verbose = 0;
+  } else verbose = 0;
+
+  // number of iterations for training
+  if (getCmdOption(input_str, input_str+in_num, "-niter")){
+  num_iterations = atoi(getCmdOption(input_str, input_str+in_num, "-niter"));
+  } else num_iterations = 5;
+
+  // control how much dtime should be increased upon each iteration
+  // dtime = dtime * time_jump at the end of each iteration
+  if (getCmdOption(input_str, input_str+in_num, "-time_jump")){
+  time_jump = atof(getCmdOption(input_str, input_str+in_num, "-time_jump"));
+  } else time_jump = 1.5;
 
   // Boolean expression that are used to pass command line argument to function train_all
   bool write_coeff = false;
@@ -460,7 +469,7 @@ int main(int argc, char ** argv){
       printf("Executing a wide set of contractions to train model with time budget of %lf sec\n", time);
       if (write_coeff) printf("At the end of execution write new coefficients will be written to model file %s\n",file_path);
     }
-    train_all(time, write_coeff, dump_data, coeff_file, data_dir_str);
+    train_all(time, write_coeff, dump_data, coeff_file, data_dir_str, num_iterations, time_jump, verbose);
   }
 
 

diff --git a/configure b/configure
@@ -1357,8 +1357,7 @@ fi
 
 
 cat > $BUILDDIR/setup.py <<EOF
-from distutils.core import setup
-from distutils.extension import Extension
+from setuptools import setup, Extension
 import numpy
 
 import os