From 561344d0f3d63eb902a825d63af694ed5dec1f08 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 18:43:16 +0100
Subject: [PATCH] linting

---
 biopandas/mmcif/mmcif_parser.py          |  21 +++-
 biopandas/mmcif/pandas_mmcif.py          | 144 +++++++++++++++--------
 biopandas/mmcif/tests/test_amino3to1.py  |   8 +-
 biopandas/mmcif/tests/test_distance.py   |  10 +-
 biopandas/mmcif/tests/test_read_mmcif.py |  21 ++--
 biopandas/mmcif/tests/test_rmsd.py       |   6 +-
 6 files changed, 145 insertions(+), 65 deletions(-)

diff --git a/biopandas/mmcif/mmcif_parser.py b/biopandas/mmcif/mmcif_parser.py
index 96d0a31..91556bb 100644
--- a/biopandas/mmcif/mmcif_parser.py
+++ b/biopandas/mmcif/mmcif_parser.py
@@ -22,19 +22,28 @@ def __init__(self, parser_obj):
         self.names_defined = False
 
     def add_name(self, name):
-        cat_name = type(name) == str and partition_string(name, ".") or ["", "", ""]
+        cat_name = (
+            type(name) == str and partition_string(name, ".") or ["", "", ""]
+        )
         if cat_name[1]:
             if cat_name[0] not in self.parser_obj.current_target[-2]:
                 self.parser_obj.current_target[-2][cat_name[0]] = {}
-            if cat_name[2] not in self.parser_obj.current_target[-2][cat_name[0]]:
-                self.parser_obj.current_target[-2][cat_name[0]][cat_name[2]] = []
+            if (
+                cat_name[2]
+                not in self.parser_obj.current_target[-2][cat_name[0]]
+            ):
+                self.parser_obj.current_target[-2][cat_name[0]][
+                    cat_name[2]
+                ] = []
             self.ref_list.append(
                 self.parser_obj.current_target[-2][cat_name[0]][cat_name[2]]
             )
         else:
             if cat_name[0] not in self.parser_obj.current_target[-2]:
                 self.parser_obj.current_target[-2][cat_name[0]] = []
-            self.ref_list.append(self.parser_obj.current_target[-2][cat_name[0]])
+            self.ref_list.append(
+                self.parser_obj.current_target[-2][cat_name[0]]
+            )
         self.length = len(self.ref_list)
 
     def push_value(self, value):
@@ -289,7 +298,9 @@ def __dump_str__(inp):
         return str(inp)
     if re.search(__CIF_STR_NL_CHECK__, inp) is not None:
         return "\n;%s\n;" % inp
-    return "'%s'" % inp if re.search(__CIF_STR_CHECK__, inp) is not None else inp
+    return (
+        "'%s'" % inp if re.search(__CIF_STR_CHECK__, inp) is not None else inp
+    )
 
 
 def __pad_string__(inp, flength):
diff --git a/biopandas/mmcif/pandas_mmcif.py b/biopandas/mmcif/pandas_mmcif.py
index 167b79e..e00c1f5 100644
--- a/biopandas/mmcif/pandas_mmcif.py
+++ b/biopandas/mmcif/pandas_mmcif.py
@@ -1,4 +1,5 @@
 """Class for working with MMCIF files."""
+
 # BioPandas
 # Authors: Arian Jamasb <arian@jamasb.io>,
 # Authors: Sebastian Raschka <mail@sebastianraschka.com>
@@ -69,56 +70,76 @@ def read_mmcif(self, path):
         self.code = self.data["entry"]["id"][0].lower()
         return self
 
-    def fetch_mmcif(self, pdb_code: Optional[str] = None, uniprot_id: Optional[str] = None, source: str = "pdb"):
+    def fetch_mmcif(
+        self,
+        pdb_code: Optional[str] = None,
+        uniprot_id: Optional[str] = None,
+        source: str = "pdb",
+    ):
         """Fetches mmCIF file contents from the Protein Databank at rcsb.org or AlphaFold database at https://alphafold.ebi.ac.uk/.
-.
+        .
 
-        Parameters
-        ----------
-        pdb_code : str, optional
-            A 4-letter PDB code, e.g., `"3eiy"` to retrieve structures from the PDB. Defaults to `None`.
+                Parameters
+                ----------
+                pdb_code : str, optional
+                    A 4-letter PDB code, e.g., `"3eiy"` to retrieve structures from the PDB. Defaults to `None`.
 
-        uniprot_id : str, optional
-            A UniProt Identifier, e.g., `"Q5VSL9"` to retrieve structures from the AF2 database. Defaults to `None`.
+                uniprot_id : str, optional
+                    A UniProt Identifier, e.g., `"Q5VSL9"` to retrieve structures from the AF2 database. Defaults to `None`.
 
-        source : str
-            The source to retrieve the structure from 
-            (`"pdb"`, `"alphafold2-v3"` or `"alphafold2-v4"`). Defaults to `"pdb"`.
+                source : str
+                    The source to retrieve the structure from
+                    (`"pdb"`, `"alphafold2-v3"` or `"alphafold2-v4"`). Defaults to `"pdb"`.
 
-        Returns
-        ---------
-        self
+                Returns
+                ---------
+                self
 
         """
         # Sanitize input
         invalid_input_identifier_1 = pdb_code is None and uniprot_id is None
-        invalid_input_identifier_2 = pdb_code is not None and uniprot_id is not None
-        invalid_input_combination_1 = uniprot_id is not None and source == "pdb"
+        invalid_input_identifier_2 = (
+            pdb_code is not None and uniprot_id is not None
+        )
+        invalid_input_combination_1 = (
+            uniprot_id is not None and source == "pdb"
+        )
         invalid_input_combination_2 = pdb_code is not None and source in {
-            "alphafold2-v3", "alphafold2-v4"}
+            "alphafold2-v3",
+            "alphafold2-v4",
+        }
 
         if invalid_input_identifier_1 or invalid_input_identifier_2:
             raise ValueError(
-                "Please provide either a PDB code or a UniProt ID.")
+                "Please provide either a PDB code or a UniProt ID."
+            )
 
         if invalid_input_combination_1:
             raise ValueError(
-                "Please use a 'pdb_code' instead of 'uniprot_id' for source='pdb'.")
+                "Please use a 'pdb_code' instead of 'uniprot_id' for source='pdb'."
+            )
         elif invalid_input_combination_2:
             raise ValueError(
-                f"Please use a 'uniprot_id' instead of 'pdb_code' for source={source}.")
+                f"Please use a 'uniprot_id' instead of 'pdb_code' for source={source}."
+            )
 
         if source == "pdb":
             self.mmcif_path, self.mmcif_text = self._fetch_mmcif(pdb_code)
         elif source == "alphafold2-v3":
             af2_version = 3
-            self.mmcif_path, self.mmcif_text = self._fetch_af2(uniprot_id, af2_version)
+            self.mmcif_path, self.mmcif_text = self._fetch_af2(
+                uniprot_id, af2_version
+            )
         elif source == "alphafold2-v4":
             af2_version = 4
-            self.mmcif_path, self.mmcif_text = self._fetch_af2(uniprot_id, af2_version)
+            self.mmcif_path, self.mmcif_text = self._fetch_af2(
+                uniprot_id, af2_version
+            )
         else:
-            raise ValueError(f"Invalid source: {source}."
-                " Please use one of 'pdb', 'alphafold2-v3' or 'alphafold2-v4'.")
+            raise ValueError(
+                f"Invalid source: {source}."
+                " Please use one of 'pdb', 'alphafold2-v3' or 'alphafold2-v4'."
+            )
 
         self._df = self._construct_df(text=self.mmcif_text)
         return self
@@ -129,7 +150,8 @@ def _construct_df(self, text: str):
         self.data = data
         df: Dict[str, pd.DataFrame] = {}
         full_df = pd.DataFrame.from_dict(
-            data["atom_site"], orient="index").transpose()
+            data["atom_site"], orient="index"
+        ).transpose()
         full_df = full_df.astype(mmcif_col_types, errors="ignore")
         df["ATOM"] = pd.DataFrame(full_df[full_df.group_PDB == "ATOM"])
         df["HETATM"] = pd.DataFrame(full_df[full_df.group_PDB == "HETATM"])
@@ -148,8 +170,9 @@ def _fetch_mmcif(pdb_code):
             response = urlopen(url)
             txt = response.read()
             txt = (
-                txt.decode(
-                    "utf-8") if sys.version_info[0] >= 3 else txt.encode("ascii")
+                txt.decode("utf-8")
+                if sys.version_info[0] >= 3
+                else txt.encode("ascii")
             )
         except HTTPError as e:
             print(f"HTTP Error {e.code}")
@@ -166,11 +189,15 @@ def _fetch_af2(uniprot_id: str, af2_version: int = 3):
         try:
             response = urlopen(url)
             txt = response.read()
-            txt = txt.decode('utf-8') if sys.version_info[0] >= 3 else txt.encode('ascii')
+            txt = (
+                txt.decode("utf-8")
+                if sys.version_info[0] >= 3
+                else txt.encode("ascii")
+            )
         except HTTPError as e:
-            print(f'HTTP Error {e.code}')
+            print(f"HTTP Error {e.code}")
         except URLError as e:
-            print(f'URL Error {e.args}')
+            print(f"URL Error {e.args}")
         return url, txt
 
     @staticmethod
@@ -184,7 +211,8 @@ def _read_mmcif(path):
             openf = gzip.open
         else:
             allowed_formats = ", ".join(
-                (".cif", ".cif.gz", ".mmcif", ".mmcif.gz"))
+                (".cif", ".cif.gz", ".mmcif", ".mmcif.gz")
+            )
             raise ValueError(
                 f"Wrong file format; allowed file formats are {allowed_formats}"
             )
@@ -194,8 +222,9 @@ def _read_mmcif(path):
 
         if path.endswith(".gz"):
             txt = (
-                txt.decode(
-                    "utf-8") if sys.version_info[0] >= 3 else txt.encode("ascii")
+                txt.decode("utf-8")
+                if sys.version_info[0] >= 3
+                else txt.encode("ascii")
             )
         return path, txt
 
@@ -271,14 +300,19 @@ def _get_mainchain(
     def _get_hydrogen(df, invert):
         """Return only hydrogen atom entries from a DataFrame"""
         return (
-            df[(df["type_symbol"] != "H")] if invert else df[(
-                df["type_symbol"] == "H")]
+            df[(df["type_symbol"] != "H")]
+            if invert
+            else df[(df["type_symbol"] == "H")]
         )
 
     @staticmethod
     def _get_heavy(df, invert):
         """Return only heavy atom entries from a DataFrame"""
-        return df[df["type_symbol"] == "H"] if invert else df[df["type_symbol"] != "H"]
+        return (
+            df[df["type_symbol"] == "H"]
+            if invert
+            else df[df["type_symbol"] != "H"]
+        )
 
     @staticmethod
     def _get_calpha(df, invert, atom_col: str = "auth_atom_id"):
@@ -288,7 +322,11 @@ def _get_calpha(df, invert, atom_col: str = "auth_atom_id"):
     @staticmethod
     def _get_carbon(df, invert):
         """Return carbon atom entries from a DataFrame"""
-        return df[df["type_symbol"] != "C"] if invert else df[df["type_symbol"] == "C"]
+        return (
+            df[df["type_symbol"] != "C"]
+            if invert
+            else df[df["type_symbol"] == "C"]
+        )
 
     def amino3to1(
         self,
@@ -339,8 +377,9 @@ def amino3to1(
                 indices.append(ind)
             cmp = num
 
-        transl = tmp.iloc[indices][residue_col].map(
-            amino3to1dict).fillna(fillna)
+        transl = (
+            tmp.iloc[indices][residue_col].map(amino3to1dict).fillna(fillna)
+        )
 
         return pd.concat((tmp.iloc[indices][chain_col], transl), axis=1)
 
@@ -425,7 +464,9 @@ def distance(self, xyz=(0.00, 0.00, 0.00), records=("ATOM", "HETATM")):
 
         return np.sqrt(
             np.sum(
-                df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1) ** 2, axis=1
+                df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1)
+                ** 2,
+                axis=1,
             )
         )
 
@@ -451,7 +492,9 @@ def distance_df(df, xyz=(0.00, 0.00, 0.00)):
         """
         return np.sqrt(
             np.sum(
-                df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1) ** 2, axis=1
+                df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1)
+                ** 2,
+                axis=1,
             )
         )
 
@@ -485,7 +528,11 @@ def read_mmcif_from_list(self, mmcif_lines):
         self.code = self.data["entry"]["id"][0].lower()
         return self
 
-    def convert_to_pandas_pdb(self, offset_chains: bool = True, records: List[str] = ["ATOM", "HETATM"]) -> PandasPdb:
+    def convert_to_pandas_pdb(
+        self,
+        offset_chains: bool = True,
+        records: List[str] = ["ATOM", "HETATM"],
+    ) -> PandasPdb:
         """Returns a PandasPdb object with the same data as the PandasMmcif
         object.
 
@@ -525,10 +572,15 @@ def convert_to_pandas_pdb(self, offset_chains: bool = True, records: List[str] =
 
         # Update atom numbers
         if offset_chains:
-            offsets = pandaspdb.df["ATOM"]["chain_id"].astype(
-                "category").cat.codes
-            pandaspdb.df["ATOM"]["atom_number"] = pandaspdb.df["ATOM"]["atom_number"] + offsets
+            offsets = (
+                pandaspdb.df["ATOM"]["chain_id"].astype("category").cat.codes
+            )
+            pandaspdb.df["ATOM"]["atom_number"] = (
+                pandaspdb.df["ATOM"]["atom_number"] + offsets
+            )
             hetatom_offset = offsets.max() + 1
-            pandaspdb.df["HETATM"]["atom_number"] = pandaspdb.df["HETATM"]["atom_number"] + hetatom_offset
+            pandaspdb.df["HETATM"]["atom_number"] = (
+                pandaspdb.df["HETATM"]["atom_number"] + hetatom_offset
+            )
 
         return pandaspdb
diff --git a/biopandas/mmcif/tests/test_amino3to1.py b/biopandas/mmcif/tests/test_amino3to1.py
index a03c364..83a671c 100644
--- a/biopandas/mmcif/tests/test_amino3to1.py
+++ b/biopandas/mmcif/tests/test_amino3to1.py
@@ -805,8 +805,12 @@ def test_multichain():
     expect_chain = ["A" for _ in range(88)] + ["B" for _ in range(94)]
     got_chain = list(transl["auth_asym_id"].values)
 
-    got_res_a = list(transl.loc[transl["auth_asym_id"] == "A", "auth_comp_id"].values)
-    got_res_b = list(transl.loc[transl["auth_asym_id"] == "B", "auth_comp_id"].values)
+    got_res_a = list(
+        transl.loc[transl["auth_asym_id"] == "A", "auth_comp_id"].values
+    )
+    got_res_b = list(
+        transl.loc[transl["auth_asym_id"] == "B", "auth_comp_id"].values
+    )
 
     assert expect_chain == got_chain
     assert expect_res_a == got_res_a
diff --git a/biopandas/mmcif/tests/test_distance.py b/biopandas/mmcif/tests/test_distance.py
index f827d01..e7cd116 100644
--- a/biopandas/mmcif/tests/test_distance.py
+++ b/biopandas/mmcif/tests/test_distance.py
@@ -18,7 +18,8 @@ def test_equal():
     dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), records=("ATOM",))
 
     expect = pd.Series(
-        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16]
+        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510],
+        index=[12, 13, 14, 15, 16],
     )
     assert dist[dist < 3].all() == expect.all()
 
@@ -31,7 +32,8 @@ def test_deprecated_str_arg():
     dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), records="ATOM")
 
     expect = pd.Series(
-        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16]
+        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510],
+        index=[12, 13, 14, 15, 16],
     )
     assert dist[dist < 3].all() == expect.all()
 
@@ -44,5 +46,7 @@ def test_use_external_df():
     new_df = p1t48.df["ATOM"].iloc[:-1, :].copy()
     dist = PandasMmcif.distance_df(df=new_df, xyz=(70.785, 15.477, 23.359))
 
-    expect = pd.Series([2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15])
+    expect = pd.Series(
+        [2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15]
+    )
     assert dist[dist < 3].all() == expect.all()
diff --git a/biopandas/mmcif/tests/test_read_mmcif.py b/biopandas/mmcif/tests/test_read_mmcif.py
index 7189702..983e848 100644
--- a/biopandas/mmcif/tests/test_read_mmcif.py
+++ b/biopandas/mmcif/tests/test_read_mmcif.py
@@ -6,11 +6,11 @@
 
 
 import os
-import pytest
-from urllib.error import HTTPError
 from pathlib import Path
+from urllib.error import HTTPError
 
 import pandas as pd
+import pytest
 from biopandas.mmcif import PandasMmcif
 from biopandas.pdb import PandasPdb
 from biopandas.testutils import assert_raises
@@ -22,8 +22,12 @@
 # TESTDATA_FILENAME2 = os.path.join(
 #    os.path.dirname(__file__), "data", "4eiy_anisouchunk.cif"
 # )
-TESTDATA_FILENAME2 = os.path.join(os.path.dirname(__file__), "data", "4eiy.cif")
-TESTDATA_FILENAME_GZ = os.path.join(os.path.dirname(__file__), "data", "3eiy.cif.gz")
+TESTDATA_FILENAME2 = os.path.join(
+    os.path.dirname(__file__), "data", "4eiy.cif"
+)
+TESTDATA_FILENAME_GZ = os.path.join(
+    os.path.dirname(__file__), "data", "3eiy.cif.gz"
+)
 
 TESTDATA_FILENAME_AF2_V4 = os.path.join(
     os.path.dirname(__file__), "data", "AF-Q5VSL9-F1-model_v4.cif"
@@ -90,7 +94,6 @@
     af2_test_struct_v3 = f.read()
 
 
-
 def test__read_pdb():
     """Test private _read_pdb"""
     ppdb = PandasMmcif()
@@ -334,7 +337,9 @@ def test_mmcif_pdb_conversion():
     )
     assert_frame_equal(
         pdb.df["HETATM"].drop(columns=["line_idx"]),
-        mmcif_pdb.df["HETATM"].drop(columns=["line_idx"]).reset_index(drop=True),
+        mmcif_pdb.df["HETATM"]
+        .drop(columns=["line_idx"])
+        .reset_index(drop=True),
     )
 
     # single chain test
@@ -348,5 +353,7 @@ def test_mmcif_pdb_conversion():
     )
     assert_frame_equal(
         pdb.df["HETATM"].drop(columns=["line_idx"]),
-        mmcif_pdb.df["HETATM"].drop(columns=["line_idx"]).reset_index(drop=True),
+        mmcif_pdb.df["HETATM"]
+        .drop(columns=["line_idx"])
+        .reset_index(drop=True),
     )
diff --git a/biopandas/mmcif/tests/test_rmsd.py b/biopandas/mmcif/tests/test_rmsd.py
index 5507059..054f3b2 100644
--- a/biopandas/mmcif/tests/test_rmsd.py
+++ b/biopandas/mmcif/tests/test_rmsd.py
@@ -5,8 +5,8 @@
 # Code Repository: https://github.com/rasbt/biopandas
 
 import os
-import pytest
 
+import pytest
 from biopandas.mmcif import PandasMmcif
 
 TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.cif")
@@ -48,7 +48,9 @@ def test_invalid_query():
 
 
 def test_protein():
-    r = PandasMmcif.rmsd(p1t48.df["ATOM"], p1t49.df["ATOM"], s="c-alpha", invert=False)
+    r = PandasMmcif.rmsd(
+        p1t48.df["ATOM"], p1t49.df["ATOM"], s="c-alpha", invert=False
+    )
     assert r == 0.4923, r