Skip to content

Commit 6a3eca1

Browse files
PBMCs on cluster changes
1 parent a5b7f21 commit 6a3eca1

File tree

5 files changed

+859
-404
lines changed

5 files changed

+859
-404
lines changed

Plate_2_data/4.processing_features/0.merge_sc_plate2.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
"source": [
5959
"# set directory for sqlite files\n",
6060
"sqlite_dir = pathlib.Path(\n",
61-
" \"/media/lippincm/c58d4f19-ae4d-4b78-8370-2c2639886da0/interstellar_data/70117_20230210MM1_Gasdermin514_CP_BC430856__2023-03-22T15_42_38-Measurement1/PBMC_SQLite_Outputs\"\n",
61+
" \"/scratch/alpine/mlippincott@xsede.org/sqlite_files\"\n",
6262
").resolve(strict=True)\n",
6363
"\n",
6464
"# dictionary with info for the sqlite file from each run\n",

Plate_2_data/4.processing_features/5.extract_image_features.ipynb

Lines changed: 116 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -61,16 +61,76 @@
6161
"# metadata I need to use to identify what exact image the features come from\n",
6262
"strata=[\"Image_Metadata_Well\", \"Image_Metadata_Plate\", \"Image_Metadata_Site\"]\n",
6363
"\n",
64-
"run_info_dictionary = {\n",
65-
" \"SHSY5Y_first_run\": {\n",
66-
" \"sql_file\": \"SHSY5Y_cells_incomplete_first_run.sqlite\",\n",
67-
" \"image_features_output_file\": pathlib.Path(f\"{features_output_dir}/frist_run_image_quality.csv.gz\"),\n",
64+
"# set directory for sqlite files\n",
65+
"sqlite_dir = pathlib.Path(\n",
66+
" \"/scratch/alpine/mlippincott@xsede.org/sqlite_files\"\n",
67+
").resolve(strict=True)\n",
6868
"\n",
69+
"# dictionary with info for the sqlite file from each run\n",
70+
"run_info_dictionary = {\n",
71+
" \"batch_1\": {\n",
72+
" # path to outputted SQLite file\n",
73+
" \"source_path\": str(\n",
74+
" pathlib.Path(\n",
75+
" f\"{sqlite_dir}/PBMC_batch_1.sqlite\"\n",
76+
" )\n",
77+
" ),\n",
78+
" \"dest_path\": str(pathlib.Path(f\"{features_output_dir}/PBMC_batch_1_image_quality.parquet\")),\n",
79+
" },\n",
80+
" \"batch_2\": {\n",
81+
" # path to outputted SQLite file\n",
82+
" \"source_path\": str(\n",
83+
" pathlib.Path(\n",
84+
" f\"{sqlite_dir}/PBMC_batch_2.sqlite\"\n",
85+
" )\n",
86+
" ),\n",
87+
" \"dest_path\": str(pathlib.Path(f\"{features_output_dir}/PBMC_batch_2_image_quality.parquet\")),\n",
88+
" },\n",
89+
" \"batch_3\": {\n",
90+
" # path to outputted SQLite file\n",
91+
" \"source_path\": str(\n",
92+
" pathlib.Path(\n",
93+
" f\"{sqlite_dir}/PBMC_batch_3.sqlite\"\n",
94+
" )\n",
95+
" ),\n",
96+
" \"dest_path\": str(pathlib.Path(f\"{features_output_dir}/PBMC_batch_3.parquet\")),\n",
97+
" },\n",
98+
" \"batch_4\": {\n",
99+
" # path to outputted SQLite file\n",
100+
" \"source_path\": str(\n",
101+
" pathlib.Path(\n",
102+
" f\"{sqlite_dir}/PBMC_batch_4.sqlite\"\n",
103+
" )\n",
104+
" ),\n",
105+
" \"dest_path\": str(pathlib.Path(f\"{features_output_dir}/PBMC_batch_4.parquet\")),\n",
106+
" },\n",
107+
" \"batch_5\": {\n",
108+
" # path to outputted SQLite file\n",
109+
" \"source_path\": str(\n",
110+
" pathlib.Path(\n",
111+
" f\"{sqlite_dir}/PBMC_batch_5.sqlite\"\n",
112+
" )\n",
113+
" ),\n",
114+
" \"dest_path\": str(pathlib.Path(f\"{features_output_dir}/PBMC_batch_5.parquet\")),\n",
69115
" },\n",
70-
" \"SHSY5Y_second_run\": {\n",
71-
" \"sql_file\": \"SHSY5Y_cells_second_run.sqlite\",\n",
72-
" \"image_features_output_file\": pathlib.Path(f\"{features_output_dir}/second_run_image_quality.csv.gz\"),\n",
116+
" \"batch_6\": {\n",
117+
" # path to outputted SQLite file\n",
118+
" \"source_path\": str(\n",
119+
" pathlib.Path(\n",
120+
" f\"{sqlite_dir}/PBMC_batch_6.sqlite\"\n",
121+
" )\n",
122+
" ),\n",
123+
" \"dest_path\": str(pathlib.Path(f\"{features_output_dir}/PBMC_batch_6.parquet\")),\n",
73124
" },\n",
125+
" \"batch_7\": {\n",
126+
" # path to outputted SQLite file\n",
127+
" \"source_path\": str(\n",
128+
" pathlib.Path(\n",
129+
" f\"{sqlite_dir}/PBMC_batch_7.sqlite\"\n",
130+
" )\n",
131+
" ),\n",
132+
" \"dest_path\": str(pathlib.Path(f\"{features_output_dir}/PBMC_batch_7.parquet\")),\n",
133+
" } \n",
74134
"}\n"
75135
]
76136
},
@@ -367,23 +427,63 @@
367427
"source": [
368428
"# read in SQLite Per_Image table as dataframe for each run\n",
369429
"## First run\n",
370-
"sql_file_first_run = run_info_dictionary[\"SHSY5Y_first_run\"][\"sql_file\"]\n",
430+
"sql_file_first_run = run_info_dictionary[\"batch_1\"][\"source_path\"]\n",
371431
"single_cell_file_first_run = f\"sqlite:///{cp_output_dir}/{sql_file_first_run}\"\n",
372432
"image_df_first_run = extract_utils.load_sqlite_as_df(\n",
373433
" sqlite_file_path=single_cell_file_first_run, image_table_name=\"Per_Image\"\n",
374434
")\n",
435+
"\n",
375436
"## Second run\n",
376-
"sql_file_second_run = run_info_dictionary[\"SHSY5Y_second_run\"][\"sql_file\"]\n",
437+
"sql_file_second_run = run_info_dictionary[\"batch_2\"][\"source_path\"]\n",
377438
"single_cell_file_second_run = f\"sqlite:///{cp_output_dir}/{sql_file_second_run}\"\n",
378439
"image_df_second_run = extract_utils.load_sqlite_as_df(\n",
379440
" sqlite_file_path=single_cell_file_second_run, image_table_name=\"Per_Image\"\n",
380441
")\n",
381442
"\n",
443+
"## Third run\n",
444+
"sql_file_third_run = run_info_dictionary[\"batch_3\"][\"source_path\"]\n",
445+
"single_cell_file_third_run = f\"sqlite:///{cp_output_dir}/{sql_file_third_run}\"\n",
446+
"image_df_third_run = extract_utils.load_sqlite_as_df(\n",
447+
" sqlite_file_path=single_cell_file_third_run, image_table_name=\"Per_Image\"\n",
448+
")\n",
449+
"\n",
450+
"## Fourth run\n",
451+
"sql_file_fourth_run = run_info_dictionary[\"batch_4\"][\"source_path\"]\n",
452+
"single_cell_file_fourth_run = f\"sqlite:///{cp_output_dir}/{sql_file_fourth_run}\"\n",
453+
"image_df_fourth_run = extract_utils.load_sqlite_as_df(\n",
454+
" sqlite_file_path=single_cell_file_fourth_run, image_table_name=\"Per_Image\"\n",
455+
")\n",
456+
"\n",
457+
"## Fifth run\n",
458+
"sql_file_fifth_run = run_info_dictionary[\"batch_5\"][\"source_path\"]\n",
459+
"single_cell_file_fifth_run = f\"sqlite:///{cp_output_dir}/{sql_file_fifth_run}\"\n",
460+
"image_df_fifth_run = extract_utils.load_sqlite_as_df(\n",
461+
" sqlite_file_path=single_cell_file_fifth_run, image_table_name=\"Per_Image\"\n",
462+
")\n",
463+
"\n",
464+
"## Sixth run\n",
465+
"sql_file_sixth_run = run_info_dictionary[\"batch_6\"][\"source_path\"]\n",
466+
"single_cell_file_sixth_run = f\"sqlite:///{cp_output_dir}/{sql_file_sixth_run}\"\n",
467+
"image_df_sixth_run = extract_utils.load_sqlite_as_df(\n",
468+
" sqlite_file_path=single_cell_file_sixth_run, image_table_name=\"Per_Image\"\n",
469+
")\n",
470+
"\n",
471+
"## Seventh run\n",
472+
"sql_file_seventh_run = run_info_dictionary[\"batch_7\"][\"source_path\"]\n",
473+
"single_cell_file_seventh_run = f\"sqlite:///{cp_output_dir}/{sql_file_seventh_run}\"\n",
474+
"image_df_seventh_run = extract_utils.load_sqlite_as_df(\n",
475+
" sqlite_file_path=single_cell_file_seventh_run, image_table_name=\"Per_Image\"\n",
476+
")\n",
477+
"\n",
478+
"\n",
479+
"\n",
480+
"\n",
481+
"\n",
382482
"# merge the dataframes together into one combined run\n",
383-
"SHSY5Y_run_df = pd.concat([image_df_first_run, image_df_second_run], ignore_index=True)\n",
483+
"PBMC_run_df = pd.concat([image_df_first_run, image_df_second_run, image_df_third_run, image_df_fourth_run, image_df_fifth_run, image_df_sixth_run, image_df_seventh_run], ignore_index=True)\n",
384484
"\n",
385-
"print(SHSY5Y_run_df.shape)\n",
386-
"SHSY5Y_run_df.head()"
485+
"print(PBMC_run_df.shape)\n",
486+
"PBMC_run_df.head()"
387487
]
388488
},
389489
{
@@ -408,10 +508,10 @@
408508
}
409509
],
410510
"source": [
411-
"# extract image quality features from merged SHSY5Y runs image table df\n",
511+
"# extract image quality features from merged PBMC runs image table df\n",
412512
"image_features_df = extract_utils.extract_image_features(\n",
413513
" image_feature_categories=image_feature_categories,\n",
414-
" image_df=SHSY5Y_run_df,\n",
514+
" image_df=PBMC_run_df,\n",
415515
" image_cols=image_cols,\n",
416516
" strata=strata\n",
417517
")\n",
@@ -427,10 +527,10 @@
427527
"# output df as parquet file\n",
428528
"output(\n",
429529
" df=annotated_image_features_df,\n",
430-
" output_filename=pathlib.Path(f\"{features_output_dir}/plate2_SHSY5Y_image_features.parquet\"),\n",
530+
" output_filename=pathlib.Path(f\"{features_output_dir}/plate2_PBMC_image_features.parquet\"),\n",
431531
" output_type='parquet',\n",
432532
")\n",
433-
"print(\"The image features for the SHSY5Y cells have been extracted and saved!\")"
533+
"print(\"The image features for the PBMC cells have been extracted and saved!\")"
434534
]
435535
},
436536
{
Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,38 @@
11
#!/bin/bash
22

3-
# initialize the correct shell for your machine to allow conda to work (see README for note on shell names)
4-
conda init bash
3+
4+
#SBATCH --nodes=1
5+
#SBATCH --ntasks=1
6+
7+
#SBATCH --mem=500G
8+
#SBATCH --partition=amem
9+
#SBATCH --qos=mem
10+
#SBATCH --time=25:00:00
11+
#SBATCH --output=sample-%j.out
12+
13+
module purge
14+
15+
module load anaconda
16+
517
# activate the main conda environment
618
conda activate interstellar_data
719

820
# convert all notebooks to python files into the scripts folder
921
jupyter nbconvert --to python --output-dir=scripts/ *.ipynb
1022

1123
# run the python scripts in order (from convert+merge, annotate, normalize, feature select, and extract image features)
24+
echo "Starting processing of plate 2 data"
25+
26+
echo "Converting and merging plate 2 data"
1227
python scripts/0.merge_sc_plate2.py
28+
echo "Annotating plate 2 data"
1329
python scripts/1.annotate_sc_plate2.py
30+
echo "Combining plate 2 data"
1431
python scripts/2.combine_sc_runs_plate2.py
32+
echo "Normalizing plate 2 data"
1533
python scripts/3.normalize_sc_plate2.py
34+
echo "Feature selecting plate 2 data"
1635
python scripts/4.feature_select_sc_plate2.py
36+
echo "Extracting image features from plate 2 data"
1737
python scripts/5.extract_image_features
38+
echo "Processing of plate 2 data complete"

0 commit comments

Comments
 (0)