diff --git a/biopandas/mol2/pandas_mol2.py b/biopandas/mol2/pandas_mol2.py index 369ba2a..cbb893c 100644 --- a/biopandas/mol2/pandas_mol2.py +++ b/biopandas/mol2/pandas_mol2.py @@ -180,7 +180,7 @@ def _get_atomsection(mol2_lst): if s.startswith("@ATOM"): first_idx = idx + 1 started = True - elif started and s.startswith("@"): + elif started and (s.startswith("@") or not s.strip()): last_idx_plus1 = idx break if first_idx is None: diff --git a/biopandas/mol2/tests/data/empty_line.mol2 b/biopandas/mol2/tests/data/empty_line.mol2 new file mode 100644 index 0000000..5306e1c --- /dev/null +++ b/biopandas/mol2/tests/data/empty_line.mol2 @@ -0,0 +1,134 @@ +### +### Created by X-TOOL on Mon Sep 10 21:13:02 2018 +### + +@MOLECULE +KEGG_DURG-00000915-01 + 59 62 0 0 0 +SMALL +USER_CHARGES + +@ATOM + 1 C1 13.6130 -13.2290 0.5400 C.3 1 UNK -0.0641 + 2 C2 13.6610 -14.6640 -0.0280 C.3 1 UNK -0.1181 + 3 H1 13.5640 -14.6060 -1.1140 H 1 UNK 0.0751 + 4 C3 14.9450 -12.6420 -0.0330 C.3 1 UNK 0.0713 + 5 C4 12.2940 -12.5990 0.0140 C.3 1 UNK -0.1283 + 6 H4 12.1950 -11.5780 0.3820 H 1 UNK 0.1073 + 7 H5 12.3260 -12.5380 -1.0730 H 1 UNK 0.1073 + 8 C5 13.6650 -13.1760 2.1030 C.3 1 UNK -0.1903 + 9 H6 12.9190 -13.8150 2.5630 H 1 UNK 0.0759 + 10 H7 13.4830 -12.1610 2.4520 H 1 UNK 0.0759 + 11 H8 14.6310 -13.4920 2.4920 H 1 UNK 0.0759 + 12 C6 12.5050 -15.5710 0.4610 C.3 1 UNK -0.0772 + 13 H2 12.5780 -15.7040 1.5370 H 1 UNK 0.0964 + 14 C7 15.1040 -15.1140 0.2500 C.3 1 UNK -0.1650 + 15 H9 15.1810 -15.5500 1.2450 H 1 UNK 0.0719 + 16 H10 15.4370 -15.8400 -0.4910 H 1 UNK 0.0719 + 17 C8 15.9400 -13.8200 0.1740 C.3 1 UNK -0.1775 + 18 H11 16.6600 -13.8750 -0.6430 H 1 UNK 0.1311 + 19 H12 16.4900 -13.7230 1.1080 H 1 UNK 0.1311 + 20 C9 15.3500 -11.2870 0.5690 C.2 1 UNK 0.2576 + 21 O1 14.8390 -12.4040 -1.4490 O.3 1 UNK -0.2914 + 22 H13 14.6770 -13.2350 -1.9010 H 1 UNK 0.1808 + 23 C10 11.0220 -13.4010 0.3830 C.3 1 UNK 0.0519 + 24 H14 10.2200 -12.9980 -0.2360 H 1 UNK 0.0475 + 25 C11 11.1300 -14.9350 0.0800 C.3 1 UNK -0.1158 + 26 H3 11.1240 -14.9650 -1.0110 H 1 UNK 0.0831 + 27 C12 12.6340 -16.9590 -0.2080 C.3 1 UNK -0.1553 + 28 H15 12.6060 -16.8550 -1.2930 H 1 UNK 0.0891 + 29 H16 13.5810 -17.4220 0.0640 H 1 UNK 0.0891 + 30 C13 16.8260 -11.0440 0.8340 C.3 1 UNK 0.0393 + 31 H17 17.1600 -11.7490 1.5930 H 1 UNK 0.0811 + 32 H18 16.9170 -10.0260 1.2140 H 1 UNK 0.0811 + 33 O2 14.5050 -10.3750 0.7910 O.2 1 UNK -0.3530 + 34 O3 10.6600 -13.1060 1.7360 O.3 1 UNK -0.3271 + 35 H19 9.7760 -13.4360 1.9080 H 1 UNK 0.1813 + 36 C14 9.9080 -15.8370 0.5080 C.3 1 UNK -0.0198 + 37 C15 11.4970 -17.8860 0.2330 C.3 1 UNK -0.1375 + 38 H20 11.5910 -18.8340 -0.2960 H 1 UNK 0.0883 + 39 H21 11.6230 -18.0880 1.2960 H 1 UNK 0.0883 + 40 O4 17.6110 -11.1790 -0.3690 O.3 1 UNK -0.7221 + 41 C16 10.1250 -17.2710 0.0120 C.2 1 UNK -0.0019 + 42 C17 8.6370 -15.2160 -0.1430 C.3 1 UNK -0.1517 + 43 H22 8.3730 -14.2770 0.3430 H 1 UNK 0.0896 + 44 H23 8.8340 -15.0010 -1.1940 H 1 UNK 0.0896 + 45 C18 9.6870 -15.9140 2.0440 C.3 1 UNK -0.1960 + 46 H24 9.0260 -16.7450 2.2870 H 1 UNK 0.0767 + 47 H25 9.2190 -15.0140 2.4360 H 1 UNK 0.0767 + 48 H26 10.6250 -16.0800 2.5700 H 1 UNK 0.0767 + 49 P1 19.1650 -10.8930 -0.3570 P.3 1 UNK 2.4858 + 50 C19 9.1550 -17.9940 -0.5410 C.2 1 UNK -0.2577 + 51 H27 9.3730 -18.9970 -0.8790 H 1 UNK 0.1428 + 52 C20 7.4270 -16.1570 -0.0830 C.3 1 UNK -0.1970 + 53 H28 7.1160 -16.2980 0.9510 H 1 UNK 0.1046 + 54 H29 6.5980 -15.7140 -0.6340 H 1 UNK 0.1046 + 55 C21 7.7690 -17.5030 -0.6830 C.2 1 UNK 0.2584 + 56 O5 6.9100 -18.2110 -1.2760 O.2 1 UNK -0.3304 + 57 O6 19.7370 -11.4000 -1.6580 O.co2 1 UNK -1.2593 + 58 O7 19.7870 -11.6320 0.8040 O.co2 1 UNK -1.2593 + 59 O8 19.3700 -9.4020 -0.2290 O.co2 1 UNK -1.2593 + +@BOND + 1 1 2 1 + 2 1 4 1 + 3 1 5 1 + 4 1 8 1 + 5 2 12 1 + 6 2 14 1 + 7 4 17 1 + 8 4 20 1 + 9 4 21 1 + 10 5 23 1 + 11 12 25 1 + 12 12 27 1 + 13 20 30 1 + 14 20 33 2 + 15 23 34 1 + 16 25 36 1 + 17 27 37 1 + 18 30 40 1 + 19 36 41 1 + 20 36 42 1 + 21 36 45 1 + 22 40 49 1 + 23 41 50 2 + 24 42 52 1 + 25 50 55 1 + 26 55 56 2 + 27 14 17 1 + 28 23 25 1 + 29 37 41 1 + 30 52 55 1 + 31 2 3 1 + 32 12 13 1 + 33 25 26 1 + 34 49 57 1 + 35 49 58 1 + 36 49 59 2 + 37 5 6 1 + 38 5 7 1 + 39 8 9 1 + 40 8 10 1 + 41 8 11 1 + 42 14 15 1 + 43 14 16 1 + 44 17 18 1 + 45 17 19 1 + 46 21 22 1 + 47 23 24 1 + 48 27 28 1 + 49 27 29 1 + 50 30 31 1 + 51 30 32 1 + 52 34 35 1 + 53 37 38 1 + 54 37 39 1 + 55 42 43 1 + 56 42 44 1 + 57 45 46 1 + 58 45 47 1 + 59 45 48 1 + 60 50 51 1 + 61 52 53 1 + 62 52 54 1 diff --git a/biopandas/mol2/tests/test_pandas_mol2.py b/biopandas/mol2/tests/test_pandas_mol2.py index 3a827df..6cb4dba 100644 --- a/biopandas/mol2/tests/test_pandas_mol2.py +++ b/biopandas/mol2/tests/test_pandas_mol2.py @@ -17,6 +17,7 @@ def test_read_mol2(): data_path_1 = os.path.join(this_dir, "data", "40_mol2_files.mol2") data_path_2 = os.path.join(this_dir, "data", "40_mol2_files.mol2.gz") + data_path_3 = os.path.join(this_dir, "data", "empty_line.mol2") for data_path in (data_path_1, data_path_2): pdmol = PandasMol2().read_mol2(data_path) @@ -38,6 +39,9 @@ def test_read_mol2(): assert len(pdmol.mol2_text) == 6469 assert pdmol.mol2_path == data_path + pdmol = PandasMol2().read_mol2(data_path_3) + assert pdmol.df.shape == (59, 9) + def test_read_mol2_from_list(): diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 3863c34..f4176fc 100755 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,4 +1,5 @@ # Release Notes ![](img/logos/3eiy_120.png) +- Supports `mol` files that have empty lines between blocks, (Via [Ruibin Liu](https://github.com/Ruibin-Liu) PR #[140](https://github.com/BioPandas/biopandas/pull/140#)) The CHANGELOG for the current development version is available at [https://github.com/rasbt/biopandas/blob/main/docs/sources/CHANGELOG.md](https://github.com/rasbt/biopandas/blob/main/docs/sources/CHANGELOG.md). @@ -353,4 +354,4 @@ The CHANGELOG for the current development version is available at ### 0.1.2 (2015-11-23) -- First Release. \ No newline at end of file +- First Release.