Skip to content

Commit

Permalink
update all notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
zktuong committed Jul 7, 2022
1 parent 026d701 commit 28e6d99
Show file tree
Hide file tree
Showing 10 changed files with 1,272 additions and 1,797 deletions.
422 changes: 250 additions & 172 deletions docs/notebooks/0_dandelion_primer.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/notebooks/1_dandelion_preprocessing-10x_data.ipynb
Expand Up @@ -100,7 +100,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"dandelion==0.2.4.dev87 pandas==1.4.2 numpy==1.21.6 matplotlib==3.5.2 networkx==2.8.4 scipy==1.8.1\n"
"dandelion==0.2.4.dev101 pandas==1.4.2 numpy==1.21.6 matplotlib==3.5.2 networkx==2.8.4 scipy==1.8.1\n"
]
}
],
Expand Down
15 changes: 11 additions & 4 deletions docs/notebooks/1b_dandelion_noreannotation-10x_data.ipynb
Expand Up @@ -39,7 +39,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"dandelion==0.2.4.dev87 pandas==1.4.2 numpy==1.21.6 matplotlib==3.5.2 networkx==2.8.4 scipy==1.8.1\n"
"dandelion==0.2.4.dev101 pandas==1.4.2 numpy==1.21.6 matplotlib==3.5.2 networkx==2.8.4 scipy==1.8.1\n"
]
}
],
Expand Down Expand Up @@ -238,8 +238,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Preparing data: 2093it [00:00, 7038.23it/s]\n",
"Scanning for poor quality/ambiguous contigs: 100%|██████████| 994/994 [00:02<00:00, 376.74it/s] \n"
"Preparing data: 2093it [00:00, 4118.18it/s]\n",
"Scanning for poor quality/ambiguous contigs: 100%|██████████| 994/994 [00:03<00:00, 321.38it/s] \n"
]
}
],
Expand All @@ -265,7 +265,7 @@
{
"data": {
"text/plain": [
"Dandelion class object with n_obs = 984 and n_contigs = 2073\n",
"Dandelion class object with n_obs = 984 and n_contigs = 2093\n",
" data: 'cell_id', 'sequence_id', 'sequence', 'sequence_aa', 'productive', 'rev_comp', 'v_call', 'v_cigar', 'd_call', 'd_cigar', 'j_call', 'j_cigar', 'c_call', 'c_cigar', 'sequence_alignment', 'germline_alignment', 'junction', 'junction_aa', 'junction_length', 'junction_aa_length', 'v_sequence_start', 'v_sequence_end', 'd_sequence_start', 'd_sequence_end', 'j_sequence_start', 'j_sequence_end', 'c_sequence_start', 'c_sequence_end', 'consensus_count', 'duplicate_count', 'is_cell', 'locus', 'rearrangement_status', 'ambiguous'\n",
" metadata: 'locus_VDJ', 'locus_VJ', 'productive_VDJ', 'productive_VJ', 'v_call_VDJ', 'd_call_VDJ', 'j_call_VDJ', 'v_call_VJ', 'j_call_VJ', 'c_call_VDJ', 'c_call_VJ', 'junction_VDJ', 'junction_VJ', 'junction_aa_VDJ', 'junction_aa_VJ', 'v_call_B_VDJ', 'd_call_B_VDJ', 'j_call_B_VDJ', 'v_call_B_VJ', 'j_call_B_VJ', 'c_call_B_VDJ', 'c_call_B_VJ', 'productive_B_VDJ', 'productive_B_VJ', 'duplicate_count_B_VDJ', 'duplicate_count_B_VJ', 'isotype', 'isotype_status', 'locus_status', 'chain_status', 'rearrangement_status_VDJ', 'rearrangement_status_VJ'"
]
Expand Down Expand Up @@ -657,6 +657,13 @@
"source": [
"vdj.write_h5ddl('dandelion_results2.h5ddl', complib = 'bzip2')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
62 changes: 31 additions & 31 deletions docs/notebooks/1c_dandelion_scirpy.ipynb

Large diffs are not rendered by default.

41 changes: 27 additions & 14 deletions docs/notebooks/2_dandelion_filtering-10x_data.ipynb
Expand Up @@ -26,7 +26,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"dandelion==0.2.4.dev87 pandas==1.4.2 numpy==1.21.6 matplotlib==3.5.2 networkx==2.8.4 scipy==1.8.1\n"
"dandelion==0.2.4.dev101 pandas==1.4.2 numpy==1.21.6 matplotlib==3.5.2 networkx==2.8.4 scipy==1.8.1\n"
]
}
],
Expand Down Expand Up @@ -674,8 +674,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Preparing data: 7946it [00:03, 2451.65it/s]\n",
"Scanning for poor quality/ambiguous contigs: 100%|██████████| 3847/3847 [00:31<00:00, 123.56it/s] \n"
"Preparing data: 7946it [00:04, 1800.67it/s]\n",
"Scanning for poor quality/ambiguous contigs: 100%|██████████| 3847/3847 [00:35<00:00, 108.43it/s] \n"
]
}
],
Expand Down Expand Up @@ -1073,8 +1073,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Preparing data: 7946it [00:03, 2090.75it/s]\n",
"Scanning for poor quality/ambiguous contigs: 100%|██████████| 3847/3847 [00:29<00:00, 131.12it/s] \n"
"Preparing data: 7946it [00:04, 1851.79it/s]\n",
"Scanning for poor quality/ambiguous contigs: 100%|██████████| 3847/3847 [00:38<00:00, 99.30it/s] \n"
]
}
],
Expand All @@ -1098,7 +1098,7 @@
{
"data": {
"text/plain": [
"Dandelion class object with n_obs = 2773 and n_contigs = 5706\n",
"Dandelion class object with n_obs = 2773 and n_contigs = 9005\n",
" data: 'sequence_id', 'sequence', 'rev_comp', 'productive', 'v_call', 'd_call', 'j_call', 'sequence_alignment', 'germline_alignment', 'junction', 'junction_aa', 'v_cigar', 'd_cigar', 'j_cigar', 'stop_codon', 'vj_in_frame', 'locus', 'junction_length', 'np1_length', 'np2_length', 'v_sequence_start', 'v_sequence_end', 'v_germline_start', 'v_germline_end', 'd_sequence_start', 'd_sequence_end', 'd_germline_start', 'd_germline_end', 'j_sequence_start', 'j_sequence_end', 'j_germline_start', 'j_germline_end', 'v_score', 'v_identity', 'v_support', 'd_score', 'd_identity', 'd_support', 'j_score', 'j_identity', 'j_support', 'fwr1', 'fwr2', 'fwr3', 'fwr4', 'cdr1', 'cdr2', 'cdr3', 'cell_id', 'c_call', 'consensus_count', 'duplicate_count', 'v_call_10x', 'd_call_10x', 'j_call_10x', 'junction_10x', 'junction_10x_aa', 'v_call_genotyped', 'germline_alignment_d_mask', 'sample_id', 'j_support_igblastn', 'j_score_igblastn', 'j_call_igblastn', 'j_call_blastn', 'j_identity_blastn', 'j_alignment_length_blastn', 'j_number_of_mismatches_blastn', 'j_number_of_gap_openings_blastn', 'j_sequence_start_blastn', 'j_sequence_end_blastn', 'j_germline_start_blastn', 'j_germline_end_blastn', 'j_support_blastn', 'j_score_blastn', 'j_sequence_alignment_blastn', 'j_germline_alignment_blastn', 'cell_id_blastn', 'j_source', 'd_support_igblastn', 'd_score_igblastn', 'd_call_igblastn', 'd_call_blastn', 'd_identity_blastn', 'd_alignment_length_blastn', 'd_number_of_mismatches_blastn', 'd_number_of_gap_openings_blastn', 'd_sequence_start_blastn', 'd_sequence_end_blastn', 'd_germline_start_blastn', 'd_germline_end_blastn', 'd_support_blastn', 'd_score_blastn', 'd_sequence_alignment_blastn', 'd_germline_alignment_blastn', 'd_source', 'c_sequence_alignment', 'c_germline_alignment', 'c_sequence_start', 'c_sequence_end', 'c_score', 'c_identity', 'c_call_10x', 'junction_aa_length', 'fwr1_aa', 'fwr2_aa', 'fwr3_aa', 'fwr4_aa', 'cdr1_aa', 'cdr2_aa', 'cdr3_aa', 'sequence_alignment_aa', 'v_sequence_alignment_aa', 'd_sequence_alignment_aa', 'j_sequence_alignment_aa', 'mu_count', 'ambiguous', 'rearrangement_status'\n",
" metadata: 'sample_id', 'locus_VDJ', 'locus_VJ', 'productive_VDJ', 'productive_VJ', 'v_call_genotyped_VDJ', 'd_call_VDJ', 'j_call_VDJ', 'v_call_genotyped_VJ', 'j_call_VJ', 'c_call_VDJ', 'c_call_VJ', 'junction_VDJ', 'junction_VJ', 'junction_aa_VDJ', 'junction_aa_VJ', 'v_call_genotyped_B_VDJ', 'd_call_B_VDJ', 'j_call_B_VDJ', 'v_call_genotyped_B_VJ', 'j_call_B_VJ', 'c_call_B_VDJ', 'c_call_B_VJ', 'productive_B_VDJ', 'productive_B_VJ', 'duplicate_count_B_VDJ', 'duplicate_count_B_VJ', 'isotype', 'isotype_status', 'locus_status', 'chain_status', 'rearrangement_status_VDJ', 'rearrangement_status_VJ'"
]
Expand Down Expand Up @@ -1161,10 +1161,16 @@
"- detailed information on chain status pairings (below).<br><br>\n",
" \n",
"- `chain_status`\n",
"- summarised information of the chain locus status pairings (similar to `chain_pairing` in `scirpy`).\n",
"- summarised information of the chain locus status pairings (similar to `chain_pairing` in `scirpy`).<br><br>\n",
" \n",
"- `rearrangement_status_VDJ` and `rearrangement_status_VJ`\n",
"- whether or not V(D)J gene usage are standard (i.e. all from the same locus).\n",
"\n",
"</div>\n",
"\n",
"No additional filtering is required: the updated `Dandelion` is designed to work without having the need to filter the contigs marked as `ambiguous` in the `.metadata.chain_status`. "
"So in a standard situation, I would remove cells flagged with `Orphan VJ`, `Orphan VJ-exception`, `Extra pair`, `ambiguous` in `.metadata.chain_status`, and also any cell marked as `chimeric` in the `.metadata.rearrangement_status_VDJ` and `.metadata.rearrangement_status_VJ` from downstream cell-level calculations/analysis. \n",
"\n",
"Having said that, you will find that most of `Dandelion`'s functions will work without the need to requirement to perform additional filtering and filtering can be performed on the final `AnnData` object (described in the visualisation section)."
]
},
{
Expand Down Expand Up @@ -1681,7 +1687,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Running `ddl.pp.filter_contigs` without `AnnData`\n",
"## Running `ddl.pp.filter_contigs` and `ddl.pp.check_contigs` without `AnnData`\n",
"\n",
"Finally, `ddl.pp.filter_contigs` can also be run without an `AnnData` object:"
]
Expand All @@ -1695,8 +1701,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Preparing data: 7946it [00:03, 2068.69it/s]\n",
"Scanning for poor quality/ambiguous contigs: 100%|██████████| 3847/3847 [00:32<00:00, 117.27it/s] \n"
"Preparing data: 7946it [00:03, 2049.40it/s]\n",
"Scanning for poor quality/ambiguous contigs: 100%|██████████| 3847/3847 [00:32<00:00, 116.60it/s] \n"
]
},
{
Expand Down Expand Up @@ -1726,14 +1732,14 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Preparing data: 7946it [00:04, 1618.15it/s]\n",
"Scanning for poor quality/ambiguous contigs: 100%|██████████| 3847/3847 [00:31<00:00, 122.29it/s] \n"
"Preparing data: 7946it [00:04, 1878.08it/s]\n",
"Scanning for poor quality/ambiguous contigs: 100%|██████████| 3847/3847 [00:31<00:00, 122.86it/s] \n"
]
},
{
"data": {
"text/plain": [
"Dandelion class object with n_obs = 3843 and n_contigs = 7946\n",
"Dandelion class object with n_obs = 3843 and n_contigs = 9005\n",
" data: 'sequence_id', 'sequence', 'rev_comp', 'productive', 'v_call', 'd_call', 'j_call', 'sequence_alignment', 'germline_alignment', 'junction', 'junction_aa', 'v_cigar', 'd_cigar', 'j_cigar', 'stop_codon', 'vj_in_frame', 'locus', 'junction_length', 'np1_length', 'np2_length', 'v_sequence_start', 'v_sequence_end', 'v_germline_start', 'v_germline_end', 'd_sequence_start', 'd_sequence_end', 'd_germline_start', 'd_germline_end', 'j_sequence_start', 'j_sequence_end', 'j_germline_start', 'j_germline_end', 'v_score', 'v_identity', 'v_support', 'd_score', 'd_identity', 'd_support', 'j_score', 'j_identity', 'j_support', 'fwr1', 'fwr2', 'fwr3', 'fwr4', 'cdr1', 'cdr2', 'cdr3', 'cell_id', 'c_call', 'consensus_count', 'duplicate_count', 'v_call_10x', 'd_call_10x', 'j_call_10x', 'junction_10x', 'junction_10x_aa', 'v_call_genotyped', 'germline_alignment_d_mask', 'sample_id', 'j_support_igblastn', 'j_score_igblastn', 'j_call_igblastn', 'j_call_blastn', 'j_identity_blastn', 'j_alignment_length_blastn', 'j_number_of_mismatches_blastn', 'j_number_of_gap_openings_blastn', 'j_sequence_start_blastn', 'j_sequence_end_blastn', 'j_germline_start_blastn', 'j_germline_end_blastn', 'j_support_blastn', 'j_score_blastn', 'j_sequence_alignment_blastn', 'j_germline_alignment_blastn', 'cell_id_blastn', 'j_source', 'd_support_igblastn', 'd_score_igblastn', 'd_call_igblastn', 'd_call_blastn', 'd_identity_blastn', 'd_alignment_length_blastn', 'd_number_of_mismatches_blastn', 'd_number_of_gap_openings_blastn', 'd_sequence_start_blastn', 'd_sequence_end_blastn', 'd_germline_start_blastn', 'd_germline_end_blastn', 'd_support_blastn', 'd_score_blastn', 'd_sequence_alignment_blastn', 'd_germline_alignment_blastn', 'd_source', 'c_sequence_alignment', 'c_germline_alignment', 'c_sequence_start', 'c_sequence_end', 'c_score', 'c_identity', 'c_call_10x', 'junction_aa_length', 'fwr1_aa', 'fwr2_aa', 'fwr3_aa', 'fwr4_aa', 'cdr1_aa', 'cdr2_aa', 'cdr3_aa', 'sequence_alignment_aa', 'v_sequence_alignment_aa', 'd_sequence_alignment_aa', 'j_sequence_alignment_aa', 'mu_count', 'ambiguous', 'rearrangement_status'\n",
" metadata: 'sample_id', 'locus_VDJ', 'locus_VJ', 'productive_VDJ', 'productive_VJ', 'v_call_genotyped_VDJ', 'd_call_VDJ', 'j_call_VDJ', 'v_call_genotyped_VJ', 'j_call_VJ', 'c_call_VDJ', 'c_call_VJ', 'junction_VDJ', 'junction_VJ', 'junction_aa_VDJ', 'junction_aa_VJ', 'v_call_genotyped_B_VDJ', 'd_call_B_VDJ', 'j_call_B_VDJ', 'v_call_genotyped_B_VJ', 'j_call_B_VJ', 'c_call_B_VDJ', 'c_call_B_VJ', 'productive_B_VDJ', 'productive_B_VJ', 'duplicate_count_B_VDJ', 'duplicate_count_B_VJ', 'isotype', 'isotype_status', 'locus_status', 'chain_status', 'rearrangement_status_VDJ', 'rearrangement_status_VJ'"
]
Expand All @@ -1747,6 +1753,13 @@
"vdj4 = ddl.pp.check_contigs(bcr)\n",
"vdj4"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down

0 comments on commit 28e6d99

Please sign in to comment.