Skip to content

Commit

Permalink
add
Browse files Browse the repository at this point in the history
  • Loading branch information
j1c committed Oct 17, 2023
1 parent e6566e4 commit ebc6f4e
Show file tree
Hide file tree
Showing 3 changed files with 206 additions and 55 deletions.
115 changes: 101 additions & 14 deletions data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 20,
"id": "03d96d14-c068-471b-b7c1-80e17621f73e",
"metadata": {},
"outputs": [],
Expand All @@ -50,12 +50,15 @@
"n = 200\n",
"reps = 300\n",
"\n",
"phi = 0.5\n",
"sigma = 1 - (phi**2)\n",
"\n",
"np.random.seed(1)\n",
"\n",
"datas = [ts_sim(\"indep_ar\", n) for _ in range(reps)]\n",
"datas = [ts_sim(\"indep_ar\", n, phi=phi, sigma=sigma) for _ in range(reps)]\n",
"\n",
"X = np.hstack([data[0] for data in datas])\n",
"Y = np.hstack([data[1] for data in datas])\n",
"X = np.stack([data[0] for data in datas])\n",
"Y = np.stack([data[1] for data in datas])\n",
"\n",
"savedict = {\n",
" 'X' : X,\n",
Expand All @@ -76,24 +79,25 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 5,
"id": "d6a02659-109a-451f-9d06-773e0726e9b9",
"metadata": {},
"outputs": [],
"source": [
"fname = \"2-independent_ar_phi\"\n",
"\n",
"n = 1200\n",
"n = 300\n",
"reps = 300\n",
"phis = np.arange(0.2, 1, 0.025)\n",
"sigmas = 1 - (phis**2)\n",
"\n",
"np.random.seed(1)\n",
"\n",
"Xs = []\n",
"Ys = []\n",
"\n",
"for phi in phis:\n",
" datas = [ts_sim(\"indep_ar\", n, phi=float(phi)) for _ in range(reps)]\n",
"for (phi, sigma) in zip(phis, sigmas):\n",
" datas = [ts_sim(\"indep_ar\", n, phi=float(phi), sigma=float(sigma)) for _ in range(reps)]\n",
" Xs.append(np.hstack([data[0] for data in datas]))\n",
" Ys.append(np.hstack([data[1] for data in datas]))\n",
"\n",
Expand All @@ -108,7 +112,7 @@
"}\n",
"\n",
"# save to disk\n",
"sp.io.savemat(f'{p}{fname}.mat', savedict, do_compression=True)"
"# sp.io.savemat(f'{p}{fname}.mat', savedict, do_compression=True)"
]
},
{
Expand Down Expand Up @@ -220,15 +224,98 @@
]
},
{
"cell_type": "code",
"execution_count": null,
"cell_type": "markdown",
"id": "98cf8254-3a4e-4421-a272-5a0eb81d2c37",
"metadata": {},
"source": [
"Generate Experiment 6 - Independent Vector AR(1) with increasing sample size\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2899b2f6",
"metadata": {},
"outputs": [],
"source": [
"# Generate Experiment 6 - optimal lag estimation\n",
"\n"
"def indep_var(n, d, phi=0.5, seed=None):\n",
" \"\"\"\n",
" d : corresponds to dimension of the time series\n",
" \"\"\"\n",
" rng = rng = np.random.default_rng(seed)\n",
" coeff = np.eye(d*2) * phi\n",
" covar = np.eye(d*2) * (1 - (phi ** 2))\n",
" errors = np.random.multivariate_normal(np.zeros(d * 2), covar, n)\n",
"\n",
" Y = np.zeros((n, d * 2))\n",
" Y[0] = 0\n",
"\n",
" for t in range(1, n):\n",
" Y[t] = np.dot(coeff, Y[t - 1]) + errors[t]\n",
"\n",
" series1 = Y[:, :d]\n",
" series2 = Y[:, d:]\n",
"\n",
" return series1, series2"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "5f28ebc4",
"metadata": {},
"outputs": [],
"source": [
"fname = \"6-independent_var_n\"\n",
"\n",
"n = 200\n",
"d = 100\n",
"reps = 300\n",
"\n",
"phi = 0.5\n",
"\n",
"datas = [indep_var(n, d, phi, seed=1) for _ in range(reps)]\n",
"\n",
"X = np.stack([data[0] for data in datas])\n",
"Y = np.stack([data[1] for data in datas])\n",
"\n",
"savedict = {\n",
" 'X' : X,\n",
" 'Y' : Y,\n",
"}\n",
"\n",
"# save to disk\n",
"sp.io.savemat(f'{p}{fname}.mat', savedict, do_compression=True)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "b8583276",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(300, 200, 100)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4b0ac59d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -247,7 +334,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.10.13"
}
},
"nbformat": 4,
Expand Down
146 changes: 105 additions & 41 deletions figure1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -5,40 +5,38 @@
"id": "fd860b83-29b8-4689-ba75-79b11670a5dc",
"metadata": {},
"source": [
"# Comparing the False Positive Rate (Type I) Under the Null \n",
"# Comparing the False Positive Rate (Type I) Under the Null\n",
"\n",
"The purpose of this simulation is to confirm the validity of the test, and thus, we expect the testing power to be close to the significance level $\\alpha$. Here we use the independent AR(1) processes, and the sampling process is:\n",
"\n",
"\\begin{equation}\n",
" \\begin{bmatrix}\n",
" X_t\\\\\n",
" Y_t\n",
" \\end{bmatrix}\n",
" =\n",
" \\begin{bmatrix}\n",
" \\phi & 0\\\\\n",
" 0 & \\phi\n",
" \\end{bmatrix}\n",
" \\begin{bmatrix}\n",
" X_{t-1}\\\\\n",
" Y_{t-1}\n",
" \\end{bmatrix}\n",
" +\n",
" \\begin{bmatrix}\n",
" \\epsilon_t\\\\\n",
" \\eta_t\n",
" \\end{bmatrix},\n",
" \\label{eqn:nodep}\n",
"\\begin{bmatrix}\n",
"X_t\\\\\n",
"Y_t\n",
"\\end{bmatrix}\n",
"=\n",
"\\begin{bmatrix}\n",
"\\phi & 0\\\\\n",
"0 & \\phi\n",
"\\end{bmatrix}\n",
"\\begin{bmatrix}\n",
"X_{t-1}\\\\\n",
"Y_{t-1}\n",
"\\end{bmatrix} +\n",
"\\begin{bmatrix}\n",
"\\epsilon_t\\\\\n",
"\\eta_t\n",
"\\end{bmatrix},\n",
"\\end{equation}\n",
"\n",
"where $(\\epsilon_t,\\eta_t)$ is the noise generated by standard normal. For first experiment, we vary the length of time series from $n\\in \\{10, 20, 30, \\ldots, 200\\}$ with $\\phi=0.5$. For second experiment, we vary the AR coefficient $\\phi\\in\\{0.2, 0.25,\\ldots, 0.95\\}$ with $n=1200$. We use 1000 permutation per replication with 300 replications in total.\n",
"where $(\\epsilon_t,\\eta_t)$ is the noise generated by standard normal. For first experiment, we vary the length of time series from $n\\in \\{10, 20, 30, \\ldots, 200\\}$ with $\\phi=0.5$. For second experiment, we vary the AR coefficient $\\phi\\in\\{0.2, 0.25,\\ldots, 0.95\\}$ with $n=1200$. We use 1000 permutation per replication with 300 replications in total.\n",
"\n",
"See here for wildHSIC and shiftHSIC computation done in matlab, and see here for data generation notebook."
"See here for wildHSIC and shiftHSIC computation done in matlab, and see here for data generation notebook.\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 16,
"id": "86756329-8d2b-4794-998c-c7cc3174d8d9",
"metadata": {
"tags": [
Expand All @@ -47,47 +45,113 @@
},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt"
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import scipy as sp\n",
"from pathlib import Path\n",
"from hyppo.time_series import DcorrX, MGCX, LjungBox\n",
"from joblib import Parallel, delayed"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "0f8f0546-9cb7-4b0c-9cf4-3c4043cfada2",
"execution_count": 12,
"id": "4d04d001-cea3-42d4-9085-267074f5932a",
"metadata": {},
"outputs": [],
"source": [
"import scipy as sp\n",
"from pathlib import Path\n",
"data = sp.io.loadmat(\"./data/1-independent_ar_n.mat\")\n",
"\n",
"from hyppo.time_series import DcorrX, MGCX\n",
"X = data[\"X\"]\n",
"Y = data[\"Y\"]\n",
"\n",
"from joblib import Parallel, delayed"
"n_reps = X.shape[0]\n",
"\n",
"ns = list(range(10, 201, 10))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "4d04d001-cea3-42d4-9085-267074f5932a",
"execution_count": 21,
"id": "c7fbd536-8461-4e76-8b09-b7a6b2144b34",
"metadata": {},
"outputs": [],
"source": [
"data = sp.io.loadmat(\"./data/1-independent_ar_n.mat\")\n",
"def worker(X, Y, test, reps=1000):\n",
" n, d = X.shape\n",
"\n",
"X = data['X']\n",
"Y = data['Y']"
" res = test.test(X, Y, reps=reps)\n",
" return n, d, res[1] # pvalue"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c7fbd536-8461-4e76-8b09-b7a6b2144b34",
"execution_count": 22,
"id": "dbb6d66d",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-2)]: Done 2 out of 6 | elapsed: 1.7s remaining: 3.4s\n",
"[Parallel(n_jobs=-2)]: Done 3 out of 6 | elapsed: 1.7s remaining: 1.7s\n",
"[Parallel(n_jobs=-2)]: Done 4 out of 6 | elapsed: 1.7s remaining: 0.9s\n",
"[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 1.7s remaining: 0.0s\n",
"[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 1.7s finished\n",
"[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.\n",
"[Parallel(n_jobs=-2)]: Done 2 out of 6 | elapsed: 20.7s remaining: 41.4s\n",
"[Parallel(n_jobs=-2)]: Done 3 out of 6 | elapsed: 20.8s remaining: 20.8s\n",
"[Parallel(n_jobs=-2)]: Done 4 out of 6 | elapsed: 20.8s remaining: 10.4s\n",
"[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 20.9s remaining: 0.0s\n",
"[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 20.9s finished\n",
"[Parallel(n_jobs=-2)]: Using backend LokyBackend with 11 concurrent workers.\n",
"[Parallel(n_jobs=-2)]: Done 2 out of 6 | elapsed: 8.4s remaining: 16.9s\n",
"[Parallel(n_jobs=-2)]: Done 3 out of 6 | elapsed: 8.5s remaining: 8.5s\n",
"[Parallel(n_jobs=-2)]: Done 4 out of 6 | elapsed: 10.6s remaining: 5.3s\n",
"[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 11.1s remaining: 0.0s\n",
"[Parallel(n_jobs=-2)]: Done 6 out of 6 | elapsed: 11.1s finished\n"
]
}
],
"source": [
"def worker():\n",
" pass"
"test_dict = {\n",
" \"LjungBox\": LjungBox,\n",
" \"DcorrX\": DcorrX,\n",
" \"MGCX\": MGCX\n",
"}\n",
"\n",
"dfs = []\n",
"\n",
"for test_name, test in test_dict.items():\n",
" # if test_name == \"LjungBox\":\n",
" # auto=True\n",
" # else:\n",
" # auto=False\n",
" results = Parallel(-2, verbose=1)(delayed(worker)(X[i, :n, :], Y[i, :n, :], test(max_lag=1)) for n in [10, 20] for i in range(3))\n",
"\n",
" df = pd.DataFrame(results, columns=[\"n\", \"d\", 'pval'])\n",
" df['test'] = test_name\n",
" dfs.append(df)\n",
"\n",
"df = pd.concat(dfs, axis=0, ignore_index=True)\n",
"df.to_csv(\"./outs/indep_ar_n.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60cbfb7e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -106,7 +170,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.10.13"
}
},
"nbformat": 4,
Expand Down
Empty file added outs/.gitkeep
Empty file.

0 comments on commit ebc6f4e

Please sign in to comment.