add

neurodata · Oct 17, 2023 · ebc6f4e · ebc6f4e
1 parent e6566e4
commit ebc6f4e
Show file tree

Hide file tree

Showing 3 changed files with 206 additions and 55 deletions.
diff --git a/data.ipynb b/data.ipynb
@@ -40,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 20,
    "id": "03d96d14-c068-471b-b7c1-80e17621f73e",
    "metadata": {},
    "outputs": [],
@@ -50,12 +50,15 @@
     "n = 200\n",
     "reps = 300\n",
     "\n",
+    "phi = 0.5\n",
+    "sigma = 1 - (phi**2)\n",
+    "\n",
     "np.random.seed(1)\n",
     "\n",
-    "datas = [ts_sim(\"indep_ar\", n) for _ in range(reps)]\n",
+    "datas = [ts_sim(\"indep_ar\", n, phi=phi, sigma=sigma) for _ in range(reps)]\n",
     "\n",
-    "X = np.hstack([data[0] for data in datas])\n",
-    "Y = np.hstack([data[1] for data in datas])\n",
+    "X = np.stack([data[0] for data in datas])\n",
+    "Y = np.stack([data[1] for data in datas])\n",
     "\n",
     "savedict = {\n",
     "    'X' : X,\n",
@@ -76,24 +79,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
    "id": "d6a02659-109a-451f-9d06-773e0726e9b9",
    "metadata": {},
    "outputs": [],
    "source": [
     "fname = \"2-independent_ar_phi\"\n",
     "\n",
-    "n = 1200\n",
+    "n = 300\n",
     "reps = 300\n",
     "phis = np.arange(0.2, 1, 0.025)\n",
+    "sigmas = 1 - (phis**2)\n",
     "\n",
     "np.random.seed(1)\n",
     "\n",
     "Xs = []\n",
     "Ys = []\n",
     "\n",
-    "for phi in phis:\n",
-    "    datas = [ts_sim(\"indep_ar\", n, phi=float(phi)) for _ in range(reps)]\n",
+    "for (phi, sigma) in zip(phis, sigmas):\n",
+    "    datas = [ts_sim(\"indep_ar\", n, phi=float(phi), sigma=float(sigma)) for _ in range(reps)]\n",
     "    Xs.append(np.hstack([data[0] for data in datas]))\n",
     "    Ys.append(np.hstack([data[1] for data in datas]))\n",
     "\n",
@@ -108,7 +112,7 @@
     "}\n",
     "\n",
     "# save to disk\n",
-    "sp.io.savemat(f'{p}{fname}.mat', savedict, do_compression=True)"
+    "# sp.io.savemat(f'{p}{fname}.mat', savedict, do_compression=True)"
    ]
   },
   {
@@ -220,15 +224,98 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "id": "98cf8254-3a4e-4421-a272-5a0eb81d2c37",
    "metadata": {},
+   "source": [
+    "Generate Experiment 6 - Independent Vector AR(1) with increasing sample size\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "2899b2f6",
+   "metadata": {},
    "outputs": [],
    "source": [
-    "# Generate Experiment 6 - optimal lag estimation\n",
-    "\n"
+    "def indep_var(n, d, phi=0.5, seed=None):\n",
+    "    \"\"\"\n",
+    "    d : corresponds to dimension of the time series\n",
+    "    \"\"\"\n",
+    "    rng = rng = np.random.default_rng(seed)\n",
+    "    coeff = np.eye(d*2) * phi\n",
+    "    covar = np.eye(d*2) * (1 - (phi ** 2))\n",
+    "    errors = np.random.multivariate_normal(np.zeros(d * 2), covar, n)\n",
+    "\n",
+    "    Y = np.zeros((n, d * 2))\n",
+    "    Y[0] = 0\n",
+    "\n",
+    "    for t in range(1, n):\n",
+    "        Y[t] = np.dot(coeff, Y[t - 1]) + errors[t]\n",
+    "\n",
+    "    series1 = Y[:, :d]\n",
+    "    series2 = Y[:, d:]\n",
+    "\n",
+    "    return series1, series2"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "5f28ebc4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fname = \"6-independent_var_n\"\n",
+    "\n",
+    "n = 200\n",
+    "d = 100\n",
+    "reps = 300\n",
+    "\n",
+    "phi = 0.5\n",
+    "\n",
+    "datas = [indep_var(n, d, phi, seed=1) for _ in range(reps)]\n",
+    "\n",
+    "X = np.stack([data[0] for data in datas])\n",
+    "Y = np.stack([data[1] for data in datas])\n",
+    "\n",
+    "savedict = {\n",
+    "    'X' : X,\n",
+    "    'Y' : Y,\n",
+    "}\n",
+    "\n",
+    "# save to disk\n",
+    "sp.io.savemat(f'{p}{fname}.mat', savedict, do_compression=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "b8583276",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(300, 200, 100)"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4b0ac59d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -247,7 +334,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,

diff --git a/figure1.ipynb b/figure1.ipynb
@@ -5,40 +5,38 @@
    "id": "fd860b83-29b8-4689-ba75-79b11670a5dc",
    "metadata": {},
    "source": [
-    "# Comparing the False Positive Rate (Type I) Under the Null \n",
+    "# Comparing the False Positive Rate (Type I) Under the Null\n",
     "\n",
     "The purpose of this simulation is to confirm the validity of the test, and thus, we expect the testing power to be close to the significance level $\\alpha$. Here we use the independent AR(1) processes, and the sampling process is:\n",
     "\n",
     "\\begin{equation}\n",
-    "    \\begin{bmatrix}\n",
-    "    X_t\\\\\n",
-    "    Y_t\n",
-    "    \\end{bmatrix}\n",
-    "    =\n",
-    "    \\begin{bmatrix}\n",
-    "    \\phi & 0\\\\\n",
-    "    0 & \\phi\n",
-    "    \\end{bmatrix}\n",
-    "    \\begin{bmatrix}\n",
-    "    X_{t-1}\\\\\n",
-    "    Y_{t-1}\n",
-    "    \\end{bmatrix}\n",
-    "    +\n",
-    "    \\begin{bmatrix}\n",
-    "    \\epsilon_t\\\\\n",
-    "    \\eta_t\n",
-    "    \\end{bmatrix},\n",
-    "    \\label{eqn:nodep}\n",
+    "\\begin{bmatrix}\n",
+    "X_t\\\\\n",
+    "Y_t\n",
+    "\\end{bmatrix}\n",
+    "=\n",
+    "\\begin{bmatrix}\n",
+    "\\phi & 0\\\\\n",
+    "0 & \\phi\n",
+    "\\end{bmatrix}\n",
+    "\\begin{bmatrix}\n",
+    "X_{t-1}\\\\\n",
+    "Y_{t-1}\n",
+    "\\end{bmatrix} +\n",
+    "\\begin{bmatrix}\n",
+    "\\epsilon_t\\\\\n",
+    "\\eta_t\n",
+    "\\end{bmatrix},\n",
     "\\end{equation}\n",
     "\n",
-    "where $(\\epsilon_t,\\eta_t)$ is the noise generated by standard normal. For first experiment, we vary the length of time series from $n\\in \\{10, 20, 30, \\ldots, 200\\}$  with $\\phi=0.5$. For second experiment, we vary the AR coefficient $\\phi\\in\\{0.2,  0.25,\\ldots, 0.95\\}$ with $n=1200$. We use 1000 permutation per replication with 300 replications in total.\n",
+    "where $(\\epsilon_t,\\eta_t)$ is the noise generated by standard normal. For first experiment, we vary the length of time series from $n\\in \\{10, 20, 30, \\ldots, 200\\}$ with $\\phi=0.5$. For second experiment, we vary the AR coefficient $\\phi\\in\\{0.2,  0.25,\\ldots, 0.95\\}$ with $n=1200$. We use 1000 permutation per replication with 300 replications in total.\n",
     "\n",
-    "See here for wildHSIC and shiftHSIC computation done in matlab, and see here for data generation notebook."
+    "See here for wildHSIC and shiftHSIC computation done in matlab, and see here for data generation notebook.\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 16,
    "id": "86756329-8d2b-4794-998c-c7cc3174d8d9",
    "metadata": {
     "tags": [
@@ -47,47 +45,113 @@
    },
    "outputs": [],
    "source": [
-    "import matplotlib.pyplot as plt"
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import scipy as sp\n",
+    "from pathlib import Path\n",
+    "from hyppo.time_series import DcorrX, MGCX, LjungBox\n",
+    "from joblib import Parallel, delayed"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "0f8f0546-9cb7-4b0c-9cf4-3c4043cfada2",
+   "execution_count": 12,
+   "id": "4d04d001-cea3-42d4-9085-267074f5932a",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import scipy as sp\n",
-    "from pathlib import Path\n",
+    "data = sp.io.loadmat(\"./data/1-independent_ar_n.mat\")\n",
     "\n",
-    "from hyppo.time_series import DcorrX, MGCX\n",
+    "X = data[\"X\"]\n",
+    "Y = data[\"Y\"]\n",
     "\n",
-    "from joblib import Parallel, delayed"
+    "n_reps = X.shape[0]\n",
+    "\n",
+    "ns = list(range(10, 201, 10))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "4d04d001-cea3-42d4-9085-267074f5932a",
+   "execution_count": 21,
+   "id": "c7fbd536-8461-4e76-8b09-b7a6b2144b34",
    "metadata": {},
    "outputs": [],
    "source": [
-    "data = sp.io.loadmat(\"./data/1-independent_ar_n.mat\")\n",
+    "def worker(X, Y, test, reps=1000):\n",
+    "    n, d = X.shape\n",
     "\n",
-    "X = data['X']\n",
-    "Y = data['Y']"
+    "    res = test.test(X, Y, reps=reps)\n",
+    "    return n, d, res[1] # pvalue"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "c7fbd536-8461-4e76-8b09-b7a6b2144b34",
+   "execution_count": 22,
+   "id": "dbb6d66d",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=-2)]: Done   2 out of   6 | elapsed:    1.7s remaining:    3.4s\n",
+      "[Parallel(n_jobs=-2)]: Done   3 out of   6 | elapsed:    1.7s remaining:    1.7s\n",
+      "[Parallel(n_jobs=-2)]: Done   4 out of   6 | elapsed:    1.7s remaining:    0.9s\n",
+      "[Parallel(n_jobs=-2)]: Done   6 out of   6 | elapsed:    1.7s remaining:    0.0s\n",
+      "[Parallel(n_jobs=-2)]: Done   6 out of   6 | elapsed:    1.7s finished\n",
+      "[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.\n",
+      "[Parallel(n_jobs=-2)]: Done   2 out of   6 | elapsed:   20.7s remaining:   41.4s\n",
+      "[Parallel(n_jobs=-2)]: Done   3 out of   6 | elapsed:   20.8s remaining:   20.8s\n",
+      "[Parallel(n_jobs=-2)]: Done   4 out of   6 | elapsed:   20.8s remaining:   10.4s\n",
+      "[Parallel(n_jobs=-2)]: Done   6 out of   6 | elapsed:   20.9s remaining:    0.0s\n",
+      "[Parallel(n_jobs=-2)]: Done   6 out of   6 | elapsed:   20.9s finished\n",
+      "[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.\n",
+      "[Parallel(n_jobs=-2)]: Done   2 out of   6 | elapsed:    8.4s remaining:   16.9s\n",
+      "[Parallel(n_jobs=-2)]: Done   3 out of   6 | elapsed:    8.5s remaining:    8.5s\n",
+      "[Parallel(n_jobs=-2)]: Done   4 out of   6 | elapsed:   10.6s remaining:    5.3s\n",
+      "[Parallel(n_jobs=-2)]: Done   6 out of   6 | elapsed:   11.1s remaining:    0.0s\n",
+      "[Parallel(n_jobs=-2)]: Done   6 out of   6 | elapsed:   11.1s finished\n"
+     ]
+    }
+   ],
    "source": [
-    "def worker():\n",
-    "    pass"
+    "test_dict = {\n",
+    "    \"LjungBox\": LjungBox,\n",
+    "    \"DcorrX\": DcorrX,\n",
+    "    \"MGCX\": MGCX\n",
+    "}\n",
+    "\n",
+    "dfs = []\n",
+    "\n",
+    "for test_name, test in test_dict.items():\n",
+    "    # if test_name == \"LjungBox\":\n",
+    "    #     auto=True\n",
+    "    # else:\n",
+    "    #     auto=False\n",
+    "    results = Parallel(-2, verbose=1)(delayed(worker)(X[i, :n, :], Y[i, :n, :], test(max_lag=1)) for n in [10, 20] for i in range(3))\n",
+    "\n",
+    "    df = pd.DataFrame(results, columns=[\"n\", \"d\", 'pval'])\n",
+    "    df['test'] = test_name\n",
+    "    dfs.append(df)\n",
+    "\n",
+    "df = pd.concat(dfs, axis=0, ignore_index=True)\n",
+    "df.to_csv(\"./outs/indep_ar_n.csv\", index=False)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "60cbfb7e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -106,7 +170,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,

diff --git a/outs/.gitkeep b/outs/.gitkeep