[cleanup] change logging levels to debug to declutter output (#391)

* [cleanup] change logging levels to debug to declutter output * added to CHANGELOG * [cleanup] change logging levels to debug to declutter output * [cleanup] change logging levels to debug to declutter output for pdb_data utils
a-r-j · May 1, 2024 · 5f8e018 · 5f8e018
1 parent 2c18887
commit 5f8e018
Show file tree

Hide file tree

Showing 9 changed files with 25 additions and 23 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,7 @@
 * Fix bug where the `deprotonate` argument is not wired up to `graphein.protein.graphs.construct_graphs`. [#375](https://github.com/a-r-j/graphein/pull/375)
 
 #### Misc
+* bumped logging level down from `INFO` to `DEBUG` at several places to reduced output length [#391](https://github.com/a-r-j/graphein/pull/391)
 * exposed `fill_value` and `bfactor` option to `protein_to_pyg` function. [#385](https://github.com/a-r-j/graphein/pull/385) and [#388](https://github.com/a-r-j/graphein/pull/388)
 * Updated Foldcomp datasets with improved setup function and updated database choices such as ESMAtlas. [#382](https://github.com/a-r-j/graphein/pull/382)
 * Resolve issue with notebook version and `pluggy` in Dockerfile. [#372](https://github.com/a-r-j/graphein/pull/372)

diff --git a/graphein/ml/datasets/pdb_data.py b/graphein/ml/datasets/pdb_data.py
@@ -341,7 +341,7 @@ def _download_pdb_sequences(self):
         ):
             log.info("Downloading PDB sequences...")
             wget.download(self.pdb_sequences_url, out=str(self.root_dir))
-            log.info("Downloaded sequences")
+            log.debug("Downloaded sequences")
 
         # Unzip all collected sequences
         if not os.path.exists(self.root_dir / self.pdb_seqres_filename):
@@ -353,7 +353,7 @@ def _download_pdb_sequences(self):
                     self.root_dir / self.pdb_seqres_filename, "wb"
                 ) as f_out:
                     shutil.copyfileobj(f_in, f_out)
-            log.info("Unzipped sequences")
+            log.debug("Unzipped sequences")
 
     def _download_ligand_map(self):
         """Download ligand map from
@@ -362,7 +362,7 @@ def _download_ligand_map(self):
         if not os.path.exists(self.root_dir / self.ligand_map_filename):
             log.info("Downloading ligand map...")
             wget.download(self.ligand_map_url, out=str(self.root_dir))
-            log.info("Downloaded ligand map")
+            log.debug("Downloaded ligand map")
 
     def _download_source_map(self):
         """Download source map from
@@ -371,7 +371,7 @@ def _download_source_map(self):
         if not os.path.exists(self.root_dir / self.source_map_filename):
             log.info("Downloading source map...")
             wget.download(self.source_map_url, out=str(self.root_dir))
-            log.info("Downloaded source map")
+            log.debug("Downloaded source map")
 
     def _download_resolution(self):
         """Download source map from
@@ -380,7 +380,7 @@ def _download_resolution(self):
         if not os.path.exists(self.root_dir / self.resolution_filename):
             log.info("Downloading resolution map...")
             wget.download(self.resolution_url, out=str(self.root_dir))
-            log.info("Downloaded resolution map")
+            log.debug("Downloaded resolution map")
 
     def _download_entry_metadata(self):
         """Download PDB entry metadata from
@@ -391,7 +391,7 @@ def _download_entry_metadata(self):
         ):
             log.info("Downloading entry metadata...")
             wget.download(self.pdb_deposition_date_url, out=str(self.root_dir))
-            log.info("Downloaded entry metadata")
+            log.debug("Downloaded entry metadata")
 
     def _download_exp_type(self):
         """Download PDB experiment metadata from
@@ -400,7 +400,7 @@ def _download_exp_type(self):
         if not os.path.exists(self.root_dir / self.pdb_entry_type_filename):
             log.info("Downloading experiment type map...")
             wget.download(self.pdb_entry_type_url, out=str(self.root_dir))
-            log.info("Downloaded experiment type map")
+            log.debug("Downloaded experiment type map")
 
     def _download_pdb_availability(self):
         """Download PDB availability metadata from
@@ -409,7 +409,7 @@ def _download_pdb_availability(self):
         if not os.path.exists(self.root_dir / self.pdb_availability_filename):
             log.info("Downloading PDB availability map...")
             wget.download(self.pdb_availability_url, out=str(self.root_dir))
-            log.info("Downloaded PDB availability map")
+            log.debug("Downloaded PDB availability map")
 
     def _parse_ligand_map(self) -> Dict[str, List[str]]:
         """Parse the ligand maps for all PDB records.
@@ -1300,7 +1300,7 @@ def split_clusters(
             self.split_ratios,
             self.assign_leftover_rows_to_split_n,
         )
-        log.info("Done splitting clusters")
+        log.debug("Done splitting clusters")
 
         # Update splits
         for split in self.splits:

diff --git a/graphein/protein/features/sequence/embeddings.py b/graphein/protein/features/sequence/embeddings.py
@@ -31,7 +31,7 @@
         pip_install=True,
         conda_channel="pytorch",
     )
-    log.warning(message)
+    log.debug(message)
 
 try:
     import biovec
@@ -42,7 +42,7 @@
         pip_install=True,
         extras=True,
     )
-    log.warning(message)
+    log.debug(message)
 
 
 @lru_cache()

diff --git a/graphein/protein/meshes.py b/graphein/protein/meshes.py
@@ -27,7 +27,7 @@
         conda_channel="pytorch3d",
         pip_install=True,
     )
-    log.warning(message)
+    log.debug(message)
 
 
 def check_for_pymol_installation():

diff --git a/graphein/protein/tensor/io.py b/graphein/protein/tensor/io.py
@@ -49,7 +49,7 @@
         conda_channel="pyg",
         pip_install=True,
     )
-    log.warning(message)
+    log.debug(message)
 
 try:
     import torch
@@ -60,7 +60,7 @@
         conda_channel="pytorch",
         pip_install=True,
     )
-    log.warning(message)
+    log.debug(message)
 
 
 def get_protein_length(df: pd.DataFrame, insertions: bool = True) -> int:

diff --git a/graphein/protein/tensor/representation.py b/graphein/protein/tensor/representation.py
@@ -24,7 +24,7 @@
         conda_channel="pytorch",
         pip_install=True,
     )
-    log.warning(message)
+    log.debug(message)
 
 
 def get_full_atom_coords(

diff --git a/graphein/protein/tensor/sequence.py b/graphein/protein/tensor/sequence.py
@@ -31,7 +31,7 @@
         conda_channel="pytorch",
         pip_install=True,
     )
-    log.warning(message)
+    log.debug(message)
 
 
 def get_sequence(

diff --git a/graphein/protein/utils.py b/graphein/protein/utils.py
@@ -205,7 +205,7 @@ def download_pdb(
 
     # Check if PDB already exists
     if os.path.exists(out_dir / f"{pdb_code}{extension}") and not overwrite:
-        log.info(
+        log.debug(
             f"{pdb_code} already exists: {out_dir / f'{pdb_code}{extension}'}"
         )
         return out_dir / f"{pdb_code}{extension}"
@@ -224,7 +224,7 @@ def download_pdb(
         assert os.path.exists(
             out_dir / f"{pdb_code}{extension}"
         ), f"{pdb_code} download failed. Not found in {out_dir}"
-    log.info(f"{pdb_code} downloaded to {out_dir}")
+    log.debug(f"{pdb_code} downloaded to {out_dir}")
     return out_dir / f"{pdb_code}{extension}"
 
 
@@ -346,7 +346,7 @@ def download_alphafold_structure(
             (Path(out_dir) / f"{uniprot_id}{extension}").resolve()
         )
 
-    log.info(f"Downloaded AlphaFold PDB file for: {uniprot_id}")
+    log.debug(f"Downloaded AlphaFold PDB file for: {uniprot_id}")
     if aligned_score:
         score_query = (
             BASE_URL
@@ -412,7 +412,7 @@ def save_graph_to_pdb(
     if hetatms:
         ppd.df["HETATM"] = hetatm_df
     ppd.to_pdb(path=path, records=None, gz=gz, append_newline=True)
-    log.info(f"Successfully saved graph to {path}")
+    log.debug(f"Successfully saved graph to {path}")
 
 
 def save_pdb_df_to_pdb(
@@ -439,7 +439,7 @@ def save_pdb_df_to_pdb(
     if hetatms:
         ppd.df["HETATM"] = hetatm_df
     ppd.to_pdb(path=path, records=None, gz=gz, append_newline=True)
-    log.info(f"Successfully saved PDB dataframe to {path}")
+    log.debug(f"Successfully saved PDB dataframe to {path}")
 
 
 def save_rgroup_df_to_pdb(
@@ -475,7 +475,7 @@ def save_rgroup_df_to_pdb(
     if hetatms:
         ppd.df["HETATM"] = hetatm_df
     ppd.to_pdb(path=path, records=None, gz=gz, append_newline=True)
-    log.info(f"Successfully saved rgroup data to {path}")
+    log.debug(f"Successfully saved rgroup data to {path}")
 
 
 def esmfold(

diff --git a/graphein/protein/visualisation.py b/graphein/protein/visualisation.py
@@ -33,7 +33,7 @@
         package="pytorch3d",
         conda_channel="pytorch3d",
     )
-    log.warning(message)
+    log.debug(message)
 
 try:
     from mpl_chord_diagram import chord_diagram
@@ -44,6 +44,7 @@
         pip_install=True,
         extras=True,
     )
+    log.debug(message)
 
 
 def plot_pointcloud(mesh: Meshes, title: str = "") -> Axes3D: