Skip to content

Commit

Permalink
feat: also adding gene-to-phen edges from HPO (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Sep 11, 2023
1 parent d89a033 commit d5a8337
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 9 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ jobs:
pip freeze
- name: Run tests
run: pytest
run: |
pytest -vvv --capture=no
- uses: codecov/codecov-action@v3
9 changes: 8 additions & 1 deletion cada_prio/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def cli(ctx: click.Context, verbose: bool):
required=True,
)
@click.option("--path-hpo-obo", type=str, help="path HPO OBO file", required=True)
@click.option("--cpus", type=int, help="number of CPUs to use", default=1)
@click.pass_context
def cli_train_model(
ctx: click.Context,
Expand All @@ -38,11 +39,17 @@ def cli_train_model(
path_gene_hpo_links: str,
path_hpo_genes_to_phenotype: str,
path_hpo_obo: str,
cpus: int,
):
"""train model"""
ctx.ensure_object(dict)
train_model.run(
path_out, path_hgnc_json, path_gene_hpo_links, path_hpo_genes_to_phenotype, path_hpo_obo
path_out,
path_hgnc_json,
path_gene_hpo_links,
path_hpo_genes_to_phenotype,
path_hpo_obo,
cpus,
)


Expand Down
22 changes: 15 additions & 7 deletions cada_prio/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,16 +220,18 @@ class EmbeddingParams:
min_count: int = 1
#: Set the batch_words in the fitting
batch_words: int = 4
#: Number of workers threads to use
workers: int = 4


def build_and_fit_model(clinvar_gen2phen, hpo_ontology):
def build_and_fit_model(*, clinvar_gen2phen, hpo_gen2phen, hpo_ontology, cpus: int = 1):
# create graph edges combining HPO hierarchy and training edges from ClinVar
logger.info("Constructing training graph ...")
logger.info("- building edges ...")
training_edges = list(
itertools.chain(yield_hpo_edges(hpo_ontology), yield_gene2phen_edges(clinvar_gen2phen))
itertools.chain(
yield_hpo_edges(hpo_ontology),
yield_gene2phen_edges(hpo_gen2phen),
yield_gene2phen_edges(clinvar_gen2phen),
)
)
logger.info("- graph construction")
training_graph = nx.Graph()
Expand All @@ -246,7 +248,7 @@ def build_and_fit_model(clinvar_gen2phen, hpo_ontology):
num_walks=embedding_params.num_walks,
p=embedding_params.p,
q=embedding_params.q,
workers=embedding_params.workers,
workers=cpus,
)
logger.info("- fitting model")
model = embedding.fit(
Expand Down Expand Up @@ -292,15 +294,21 @@ def run(
path_gene_hpo_links: str,
path_hpo_genes_to_phenotype: str,
path_hpo_obo: str,
cpus: int = 1,
):
# load all data
ncbi_to_hgnc, hgnc_info = load_hgnc_info(path_hgnc_json)
clinvar_gen2phen = load_clinvar_gen2phen(path_gene_hpo_links)
hpo_gen2phen = load_hpo_gen2phen(path_hpo_genes_to_phenotype, ncbi_to_hgnc)
hpo_ontology, hpo_id_from_alt, hpo_id_to_name = load_hpo_ontology(path_hpo_obo)
_, _, _ = hpo_gen2phen, hpo_id_from_alt, hpo_id_to_name
_, _ = hpo_id_from_alt, hpo_id_to_name

# build and fit model
training_graph, model = build_and_fit_model(clinvar_gen2phen, hpo_ontology)
training_graph, model = build_and_fit_model(
clinvar_gen2phen=clinvar_gen2phen,
hpo_gen2phen=hpo_gen2phen,
hpo_ontology=hpo_ontology,
cpus=cpus,
)
# write out graph and model
write_graph_and_model(path_out, hgnc_info, training_graph, model)
3 changes: 3 additions & 0 deletions tests/data/train_smoke/genes_to_phenotype.head.txt
Git LFS file not shown

0 comments on commit d5a8337

Please sign in to comment.