Skip to content

Commit

Permalink
Improve graphein.protein.tensor.io.to_pdb & prep for 1.7.5 release (
Browse files Browse the repository at this point in the history
#352)

* update pdb writing util

* bump version strings to 1.7.5

* update changelog

---------

Co-authored-by: Arian Jamasb <arian.jamasb@roche.com>
  • Loading branch information
a-r-j and Arian Jamasb committed Oct 27, 2023
1 parent 27c065b commit 3d7af1f
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 8 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
@@ -1,3 +1,7 @@
### 1.7.5 - 27/10/2024

* Improves the tensor->PDB writer (`graphein.protein.tensor.io.to_pdb`) by automatically unravelling residue-level b-factor predictions/annotations ([#352](https://github.com/a-r-j/pull/352)).

### 1.7.4 - 26/10/2023

* Adds support for PyG 2.4+ ([#350](https://www.github.com/a-r-j/graphein/pull/339))
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Expand Up @@ -34,7 +34,7 @@
copyright = f"{datetime.datetime.now().year}, {author}"

# The full version, including alpha/beta/rc tags
release = "1.7.4"
release = "1.7.5"


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion graphein/__init__.py
Expand Up @@ -12,7 +12,7 @@
from .testing import *

__author__ = "Arian Jamasb <arian@jamasb.io>"
__version__ = "1.7.4"
__version__ = "1.7.5"


logger.configure(
Expand Down
24 changes: 19 additions & 5 deletions graphein/protein/tensor/io.py
Expand Up @@ -392,7 +392,9 @@ def to_dataframe(
:param insertions: List of insertion codes, defaults to ``None`` (``""``).
:type insertions: Optional[List[Union[str, float]]], optional
:param b_factors: List or tensor of b factors (length: num residues),
defaults to ``None`` (``""``).
defaults to ``None`` (``""``). If ``b_factors`` is of length/shape
number of residues (as opposed to number of atoms) it is automatically
unravelled to the correct length.
:type b_factors: Optional[List[Union[str, float]]], optional
:param occupancy: List or tensor of occupancy values (length: num residues),
defaults to ``None`` (``1.0``).
Expand Down Expand Up @@ -434,12 +436,25 @@ def to_dataframe(
element_symbols = [ELEMENT_SYMBOL_MAP[a] for a in atom_type]

chains = ["A"] * len(res_nums) if chains is None else chains[res_nums - 1]
if b_factors is not None:
num_b_factors = (
len(b_factors)
if isinstance(b_factors, list)
else b_factors.shape[0]
)
b_factors = (
b_factors[res_nums - 1]
if num_b_factors == x.shape[0]
else b_factors
)
if isinstance(b_factors, torch.Tensor):
b_factors = b_factors.tolist()
else:
b_factors = [0.0] * len(res_nums)
if segment_id is None:
segment_id = [""] * len(res_nums)
if insertions is None:
insertions = [""] * len(res_nums)
if b_factors is None:
b_factors = [0.0] * len(res_nums)
if occupancy is None:
occupancy = [1.0] * len(res_nums)
if charge is None:
Expand Down Expand Up @@ -480,7 +495,6 @@ def to_dataframe(
"line_idx": atom_number,
}
df = pd.DataFrame().from_dict(out)

if biopandas:
ppdb = PandasPdb()
ppdb.df["ATOM"] = df
Expand All @@ -501,7 +515,7 @@ def to_pdb(x: AtomTensor, out_path: str, gz: bool = False, **kwargs):
:type x: AtomTensor
:param out_path: Path to output pdb file.
:type out_path: str
:param gz: Whether to gzip out the ouput, defaults to ``False``.
:param gz: Whether to gzip out the output, defaults to ``False``.
:type gz: bool, optional
:param kwargs: Keyword args for :func:`graphein.protein.tensor.to_dataframe`
"""
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -134,7 +134,7 @@ def run(self):

setup(
name="graphein",
version="1.7.4",
version="1.7.5",
description="Protein & Interactomic Graph Construction for Machine Learning",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit 3d7af1f

Please sign in to comment.