From 2ea25652124d822089ad7a7df44779dbe7883da9 Mon Sep 17 00:00:00 2001
From: Andrei Ivanov <andreii@nvidia.com>
Date: Thu, 2 Jan 2025 12:22:50 -0800
Subject: [PATCH 1/6] Fixing issue with `weights_only` parameter in torch.load
 function calls.

---
 python/dgl/graphbolt/impl/ondisk_dataset.py   |  2 +-
 python/dgl/graphbolt/internal/utils.py        |  2 +-
 .../impl/test_fused_csc_sampling_graph.py     |  4 ++--
 .../graphbolt/impl/test_ondisk_dataset.py     | 21 ++++++++-----------
 4 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/python/dgl/graphbolt/impl/ondisk_dataset.py b/python/dgl/graphbolt/impl/ondisk_dataset.py
index 303a423a853b..cf1c144ceb70 100644
--- a/python/dgl/graphbolt/impl/ondisk_dataset.py
+++ b/python/dgl/graphbolt/impl/ondisk_dataset.py
@@ -852,7 +852,7 @@ def _load_graph(
         if graph_topology is None:
             return None
         if graph_topology.type == "FusedCSCSamplingGraph":
-            return torch.load(graph_topology.path)
+            return torch.load(graph_topology.path, weights_only=False)
         raise NotImplementedError(
             f"Graph topology type {graph_topology.type} is not supported."
         )
diff --git a/python/dgl/graphbolt/internal/utils.py b/python/dgl/graphbolt/internal/utils.py
index 3855b3142f28..db4711399ce4 100644
--- a/python/dgl/graphbolt/internal/utils.py
+++ b/python/dgl/graphbolt/internal/utils.py
@@ -28,7 +28,7 @@ def numpy_save_aligned(*args, **kwargs):
 
 
 def _read_torch_data(path):
-    return torch.load(path)
+    return torch.load(path, weights_only=False)
 
 
 def _read_numpy_data(path, in_memory=True):
diff --git a/tests/python/pytorch/graphbolt/impl/test_fused_csc_sampling_graph.py b/tests/python/pytorch/graphbolt/impl/test_fused_csc_sampling_graph.py
index 93985441370a..86ee15879401 100644
--- a/tests/python/pytorch/graphbolt/impl/test_fused_csc_sampling_graph.py
+++ b/tests/python/pytorch/graphbolt/impl/test_fused_csc_sampling_graph.py
@@ -374,7 +374,7 @@ def test_load_save_homo_graph(
     with tempfile.TemporaryDirectory() as test_dir:
         filename = os.path.join(test_dir, "fused_csc_sampling_graph.pt")
         torch.save(graph, filename)
-        graph2 = torch.load(filename)
+        graph2 = torch.load(filename, weights_only=False)
 
     assert graph.total_num_nodes == graph2.total_num_nodes
     assert graph.total_num_edges == graph2.total_num_edges
@@ -459,7 +459,7 @@ def test_load_save_hetero_graph(
     with tempfile.TemporaryDirectory() as test_dir:
         filename = os.path.join(test_dir, "fused_csc_sampling_graph.pt")
         torch.save(graph, filename)
-        graph2 = torch.load(filename)
+        graph2 = torch.load(filename, weights_only=False)
 
     assert graph.total_num_nodes == graph2.total_num_nodes
     assert graph.total_num_edges == graph2.total_num_edges
diff --git a/tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py b/tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py
index 6213d9784177..9fd944278a9d 100644
--- a/tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py
+++ b/tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py
@@ -37,6 +37,11 @@ def write_yaml_and_load_dataset(yaml_content, dir, force_preprocess=False):
         gb.OnDiskDataset(dir, force_preprocess=force_preprocess)
     )
 
+def load_sampling_graph():
+    return torch.load(
+        os.path.join(test_dir, processed_dataset["graph_topology"]["path"]),
+        weights_only=False
+    )
 
 def test_OnDiskDataset_TVTSet_exceptions():
     """Test excpetions thrown when parsing TVTSet."""
@@ -1167,9 +1172,7 @@ def test_OnDiskDataset_preprocess_homogeneous(edge_fmt):
         assert "graph" not in processed_dataset
         assert "graph_topology" in processed_dataset
 
-        fused_csc_sampling_graph = torch.load(
-            os.path.join(test_dir, processed_dataset["graph_topology"]["path"])
-        )
+        fused_csc_sampling_graph = load_sampling_graph()
         assert fused_csc_sampling_graph.total_num_nodes == num_nodes
         assert fused_csc_sampling_graph.total_num_edges == num_edges
         assert (
@@ -1220,9 +1223,7 @@ def test_OnDiskDataset_preprocess_homogeneous(edge_fmt):
         )
         with open(output_file, "rb") as f:
             processed_dataset = yaml.load(f, Loader=yaml.Loader)
-        fused_csc_sampling_graph = torch.load(
-            os.path.join(test_dir, processed_dataset["graph_topology"]["path"])
-        )
+        fused_csc_sampling_graph = load_sampling_graph()
         assert (
             fused_csc_sampling_graph.edge_attributes is not None
             and gb.ORIGINAL_EDGE_ID in fused_csc_sampling_graph.edge_attributes
@@ -1365,9 +1366,7 @@ def test_OnDiskDataset_preprocess_homogeneous_hardcode(
         assert "graph" not in processed_dataset
         assert "graph_topology" in processed_dataset
 
-        fused_csc_sampling_graph = torch.load(
-            os.path.join(test_dir, processed_dataset["graph_topology"]["path"])
-        )
+        fused_csc_sampling_graph = load_sampling_graph()
         assert fused_csc_sampling_graph.total_num_nodes == num_nodes
         assert fused_csc_sampling_graph.total_num_edges == num_edges
         assert torch.equal(
@@ -1575,9 +1574,7 @@ def test_OnDiskDataset_preprocess_heterogeneous_hardcode(
         assert "graph" not in processed_dataset
         assert "graph_topology" in processed_dataset
 
-        fused_csc_sampling_graph = torch.load(
-            os.path.join(test_dir, processed_dataset["graph_topology"]["path"])
-        )
+        fused_csc_sampling_graph = load_sampling_graph()
         assert fused_csc_sampling_graph.total_num_nodes == 5
         assert fused_csc_sampling_graph.total_num_edges == 10
         assert torch.equal(

From a7b1be9dc2674836d7bed03b25731535e14401da Mon Sep 17 00:00:00 2001
From: Andrei Ivanov <andreii@nvidia.com>
Date: Fri, 3 Jan 2025 12:26:43 -0800
Subject: [PATCH 2/6] Fixing issue with the parameters of `load_sampling_graph`

---
 .../pytorch/graphbolt/impl/test_ondisk_dataset.py      | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py b/tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py
index 9fd944278a9d..b08163c79e23 100644
--- a/tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py
+++ b/tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py
@@ -37,7 +37,7 @@ def write_yaml_and_load_dataset(yaml_content, dir, force_preprocess=False):
         gb.OnDiskDataset(dir, force_preprocess=force_preprocess)
     )
 
-def load_sampling_graph():
+def load_sampling_graph(test_dir, processed_dataset):
     return torch.load(
         os.path.join(test_dir, processed_dataset["graph_topology"]["path"]),
         weights_only=False
@@ -1172,7 +1172,7 @@ def test_OnDiskDataset_preprocess_homogeneous(edge_fmt):
         assert "graph" not in processed_dataset
         assert "graph_topology" in processed_dataset
 
-        fused_csc_sampling_graph = load_sampling_graph()
+        fused_csc_sampling_graph = load_sampling_graph(test_dir, processed_dataset)
         assert fused_csc_sampling_graph.total_num_nodes == num_nodes
         assert fused_csc_sampling_graph.total_num_edges == num_edges
         assert (
@@ -1223,7 +1223,7 @@ def test_OnDiskDataset_preprocess_homogeneous(edge_fmt):
         )
         with open(output_file, "rb") as f:
             processed_dataset = yaml.load(f, Loader=yaml.Loader)
-        fused_csc_sampling_graph = load_sampling_graph()
+        fused_csc_sampling_graph = load_sampling_graph(test_dir, processed_dataset)
         assert (
             fused_csc_sampling_graph.edge_attributes is not None
             and gb.ORIGINAL_EDGE_ID in fused_csc_sampling_graph.edge_attributes
@@ -1366,7 +1366,7 @@ def test_OnDiskDataset_preprocess_homogeneous_hardcode(
         assert "graph" not in processed_dataset
         assert "graph_topology" in processed_dataset
 
-        fused_csc_sampling_graph = load_sampling_graph()
+        fused_csc_sampling_graph = load_sampling_graph(test_dir, processed_dataset)
         assert fused_csc_sampling_graph.total_num_nodes == num_nodes
         assert fused_csc_sampling_graph.total_num_edges == num_edges
         assert torch.equal(
@@ -1574,7 +1574,7 @@ def test_OnDiskDataset_preprocess_heterogeneous_hardcode(
         assert "graph" not in processed_dataset
         assert "graph_topology" in processed_dataset
 
-        fused_csc_sampling_graph = load_sampling_graph()
+        fused_csc_sampling_graph = load_sampling_graph(test_dir, processed_dataset)
         assert fused_csc_sampling_graph.total_num_nodes == 5
         assert fused_csc_sampling_graph.total_num_edges == 10
         assert torch.equal(

From 11b4220f41de8ab8730bb754f880d0c5557676cf Mon Sep 17 00:00:00 2001
From: Andrei Ivanov <andreii@nvidia.com>
Date: Mon, 6 Jan 2025 09:49:10 -0800
Subject: [PATCH 3/6] Fixing lint issues

---
 .../graphbolt/impl/test_ondisk_dataset.py     | 20 ++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py b/tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py
index b08163c79e23..4a0da78b10ea 100644
--- a/tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py
+++ b/tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py
@@ -37,12 +37,14 @@ def write_yaml_and_load_dataset(yaml_content, dir, force_preprocess=False):
         gb.OnDiskDataset(dir, force_preprocess=force_preprocess)
     )
 
+
 def load_sampling_graph(test_dir, processed_dataset):
     return torch.load(
         os.path.join(test_dir, processed_dataset["graph_topology"]["path"]),
-        weights_only=False
+        weights_only=False,
     )
 
+
 def test_OnDiskDataset_TVTSet_exceptions():
     """Test excpetions thrown when parsing TVTSet."""
     with tempfile.TemporaryDirectory() as test_dir:
@@ -1172,7 +1174,9 @@ def test_OnDiskDataset_preprocess_homogeneous(edge_fmt):
         assert "graph" not in processed_dataset
         assert "graph_topology" in processed_dataset
 
-        fused_csc_sampling_graph = load_sampling_graph(test_dir, processed_dataset)
+        fused_csc_sampling_graph = load_sampling_graph(
+            test_dir, processed_dataset
+        )
         assert fused_csc_sampling_graph.total_num_nodes == num_nodes
         assert fused_csc_sampling_graph.total_num_edges == num_edges
         assert (
@@ -1223,7 +1227,9 @@ def test_OnDiskDataset_preprocess_homogeneous(edge_fmt):
         )
         with open(output_file, "rb") as f:
             processed_dataset = yaml.load(f, Loader=yaml.Loader)
-        fused_csc_sampling_graph = load_sampling_graph(test_dir, processed_dataset)
+        fused_csc_sampling_graph = load_sampling_graph(
+            test_dir, processed_dataset
+        )
         assert (
             fused_csc_sampling_graph.edge_attributes is not None
             and gb.ORIGINAL_EDGE_ID in fused_csc_sampling_graph.edge_attributes
@@ -1366,7 +1372,9 @@ def test_OnDiskDataset_preprocess_homogeneous_hardcode(
         assert "graph" not in processed_dataset
         assert "graph_topology" in processed_dataset
 
-        fused_csc_sampling_graph = load_sampling_graph(test_dir, processed_dataset)
+        fused_csc_sampling_graph = load_sampling_graph(
+            test_dir, processed_dataset
+        )
         assert fused_csc_sampling_graph.total_num_nodes == num_nodes
         assert fused_csc_sampling_graph.total_num_edges == num_edges
         assert torch.equal(
@@ -1574,7 +1582,9 @@ def test_OnDiskDataset_preprocess_heterogeneous_hardcode(
         assert "graph" not in processed_dataset
         assert "graph_topology" in processed_dataset
 
-        fused_csc_sampling_graph = load_sampling_graph(test_dir, processed_dataset)
+        fused_csc_sampling_graph = load_sampling_graph(
+            test_dir, processed_dataset
+        )
         assert fused_csc_sampling_graph.total_num_nodes == 5
         assert fused_csc_sampling_graph.total_num_edges == 10
         assert torch.equal(

From 63465d8142c36e30da1eea2ffbe22cc121b3bd13 Mon Sep 17 00:00:00 2001
From: Andrei Ivanov <andreii@nvidia.com>
Date: Mon, 6 Jan 2025 12:28:14 -0800
Subject: [PATCH 4/6] Fixing `weights_only` issue in remaining code locations

---
 dglgo/dglgo/apply_pipeline/graphpred/gen.py                | 7 +++++--
 dglgo/dglgo/apply_pipeline/graphpred/graphpred.jinja-py    | 2 +-
 dglgo/dglgo/apply_pipeline/nodepred/gen.py                 | 4 ++--
 dglgo/dglgo/apply_pipeline/nodepred/nodepred.jinja-py      | 2 +-
 dglgo/dglgo/apply_pipeline/nodepred_sample/gen.py          | 4 ++--
 .../apply_pipeline/nodepred_sample/nodepred-ns.jinja-py    | 2 +-
 dglgo/dglgo/pipeline/graphpred/graphpred.jinja-py          | 2 +-
 dglgo/dglgo/pipeline/nodepred/nodepred.jinja-py            | 2 +-
 dglgo/dglgo/pipeline/nodepred_sample/nodepred-ns.jinja-py  | 2 +-
 dglgo/dglgo/utils/early_stop.py                            | 2 +-
 examples/pytorch/GNN-FiLM/main.py                          | 2 +-
 examples/pytorch/TAHIN/main.py                             | 2 +-
 examples/pytorch/argo/main.py                              | 2 +-
 examples/pytorch/correct_and_smooth/main.py                | 2 +-
 examples/pytorch/dgi/train.py                              | 2 +-
 examples/pytorch/diffpool/train.py                         | 6 ++++--
 examples/pytorch/dimenet/main.py                           | 2 +-
 examples/pytorch/gatv2/train.py                            | 2 +-
 examples/pytorch/graphsaint/train_sampling.py              | 2 +-
 examples/pytorch/graphwriter/train.py                      | 2 +-
 examples/pytorch/han/utils.py                              | 2 +-
 examples/pytorch/hardgat/train.py                          | 2 +-
 examples/pytorch/hilander/PSS/Smooth_AP/src/netlib.py      | 2 +-
 examples/pytorch/hilander/PSS/test_subg_inat.py            | 2 +-
 examples/pytorch/hilander/test.py                          | 2 +-
 examples/pytorch/hilander/test_subg.py                     | 2 +-
 examples/pytorch/jtnn/vaetrain_dgl.py                      | 2 +-
 examples/pytorch/lda/lda_model.py                          | 2 +-
 examples/pytorch/ogb/ngnn_seal/main.py                     | 4 ++--
 examples/pytorch/ogb/ogbn-arxiv/correct_and_smooth.py      | 2 +-
 .../pytorch/ogb/ogbn-proteins/main_proteins_full_dgl.py    | 2 +-
 examples/pytorch/ogb_lsc/MAG240M/train.py                  | 2 +-
 examples/pytorch/ogb_lsc/MAG240M/train_multi_gpus.py       | 2 +-
 examples/pytorch/ogb_lsc/PCQM4M/test_inference.py          | 2 +-
 examples/pytorch/pointcloud/bipointnet/train_cls.py        | 2 +-
 examples/pytorch/pointcloud/edgeconv/main.py               | 2 +-
 examples/pytorch/pointcloud/pct/train_cls.py               | 2 +-
 examples/pytorch/pointcloud/pct/train_partseg.py           | 2 +-
 examples/pytorch/pointcloud/point_transformer/train_cls.py | 2 +-
 .../pytorch/pointcloud/point_transformer/train_partseg.py  | 2 +-
 examples/pytorch/pointcloud/pointnet/train_cls.py          | 2 +-
 examples/pytorch/pointcloud/pointnet/train_partseg.py      | 2 +-
 examples/pytorch/rgcn/link.py                              | 2 +-
 examples/pytorch/rrn/sudoku_solver.py                      | 2 +-
 examples/pytorch/rrn/train_sudoku.py                       | 2 +-
 examples/pytorch/stgcn_wave/main.py                        | 2 +-
 python/dgl/distributed/partition.py                        | 2 +-
 python/dgl/graphbolt/impl/torch_based_feature_store.py     | 3 ++-
 tests/python/pytorch/graphbolt/internal/test_utils.py      | 2 +-
 49 files changed, 60 insertions(+), 54 deletions(-)

diff --git a/dglgo/dglgo/apply_pipeline/graphpred/gen.py b/dglgo/dglgo/apply_pipeline/graphpred/gen.py
index 789c2300ca14..d6a1b457073c 100644
--- a/dglgo/dglgo/apply_pipeline/graphpred/gen.py
+++ b/dglgo/dglgo/apply_pipeline/graphpred/gen.py
@@ -63,7 +63,7 @@ def config(
             cpt: str = typer.Option(..., help="input checkpoint file path"),
         ):
             # Training configuration
-            train_cfg = torch.load(cpt)["cfg"]
+            train_cfg = torch.load(cpt, weights_only=False)["cfg"]
             if data is None:
                 print("data is not specified, use the training dataset")
                 data = train_cfg["data_name"]
@@ -119,7 +119,10 @@ def gen_script(cls, user_cfg_dict):
         cls.user_cfg_cls(**user_cfg_dict)
 
         # Training configuration
-        train_cfg = torch.load(user_cfg_dict["cpt_path"])["cfg"]
+        train_cfg = torch.load(
+            user_cfg_dict["cpt_path"],
+            weights_only=False
+        )["cfg"]
 
         # Dict for code rendering
         render_cfg = deepcopy(user_cfg_dict)
diff --git a/dglgo/dglgo/apply_pipeline/graphpred/graphpred.jinja-py b/dglgo/dglgo/apply_pipeline/graphpred/graphpred.jinja-py
index 0a04eca16a62..4b7f2e1d4827 100644
--- a/dglgo/dglgo/apply_pipeline/graphpred/graphpred.jinja-py
+++ b/dglgo/dglgo/apply_pipeline/graphpred/graphpred.jinja-py
@@ -57,7 +57,7 @@ def main():
         data to have the same number of input edge features, got {:d} and {:d}'.format(model_edge_feat_size, data_edge_feat_size)
 
     model = {{ model_class_name }}(**cfg['model'])
-    model.load_state_dict(torch.load(cfg['cpt_path'], map_location='cpu')['model'])
+    model.load_state_dict(torch.load(cfg['cpt_path'], weights_only=False, map_location='cpu')['model'])
     pred = infer(device, data_loader, model).detach().cpu()
 
     # Dump the results
diff --git a/dglgo/dglgo/apply_pipeline/nodepred/gen.py b/dglgo/dglgo/apply_pipeline/nodepred/gen.py
index 934786229b0c..f6e00c3a3370 100644
--- a/dglgo/dglgo/apply_pipeline/nodepred/gen.py
+++ b/dglgo/dglgo/apply_pipeline/nodepred/gen.py
@@ -48,7 +48,7 @@ def config(
             cpt: str = typer.Option(..., help="input checkpoint file path"),
         ):
             # Training configuration
-            train_cfg = torch.load(cpt)["cfg"]
+            train_cfg = torch.load(cpt, weights_only=False)["cfg"]
             if data is None:
                 print("data is not specified, use the training dataset")
                 data = train_cfg["data_name"]
@@ -101,7 +101,7 @@ def gen_script(cls, user_cfg_dict):
         cls.user_cfg_cls(**user_cfg_dict)
 
         # Training configuration
-        train_cfg = torch.load(user_cfg_dict["cpt_path"])["cfg"]
+        train_cfg = torch.load(user_cfg_dict["cpt_path"], weights_only=False)["cfg"]
 
         # Dict for code rendering
         render_cfg = deepcopy(user_cfg_dict)
diff --git a/dglgo/dglgo/apply_pipeline/nodepred/nodepred.jinja-py b/dglgo/dglgo/apply_pipeline/nodepred/nodepred.jinja-py
index bc78a1bac9b3..7243938506f5 100644
--- a/dglgo/dglgo/apply_pipeline/nodepred/nodepred.jinja-py
+++ b/dglgo/dglgo/apply_pipeline/nodepred/nodepred.jinja-py
@@ -48,7 +48,7 @@ def main():
                 features, got {:d} and {:d}'.format(model_in_size, data_in_size)
 
     model = {{ model_class_name }}(**cfg['model'])
-    model.load_state_dict(torch.load(cfg['cpt_path'], map_location='cpu')['model'])
+    model.load_state_dict(torch.load(cfg['cpt_path'], weights_only=False, map_location='cpu')['model'])
     logits = infer(device, data, model)
     pred = logits.argmax(dim=1).cpu()
 
diff --git a/dglgo/dglgo/apply_pipeline/nodepred_sample/gen.py b/dglgo/dglgo/apply_pipeline/nodepred_sample/gen.py
index f94c66e170ad..bc866ed85903 100644
--- a/dglgo/dglgo/apply_pipeline/nodepred_sample/gen.py
+++ b/dglgo/dglgo/apply_pipeline/nodepred_sample/gen.py
@@ -48,7 +48,7 @@ def config(
             cpt: str = typer.Option(..., help="input checkpoint file path"),
         ):
             # Training configuration
-            train_cfg = torch.load(cpt)["cfg"]
+            train_cfg = torch.load(cpt, weights_only=False)["cfg"]
             if data is None:
                 print("data is not specified, use the training dataset")
                 data = train_cfg["data_name"]
@@ -101,7 +101,7 @@ def gen_script(cls, user_cfg_dict):
         cls.user_cfg_cls(**user_cfg_dict)
 
         # Training configuration
-        train_cfg = torch.load(user_cfg_dict["cpt_path"])["cfg"]
+        train_cfg = torch.load(user_cfg_dict["cpt_path"], weights_only=False)["cfg"]
 
         # Dict for code rendering
         render_cfg = deepcopy(user_cfg_dict)
diff --git a/dglgo/dglgo/apply_pipeline/nodepred_sample/nodepred-ns.jinja-py b/dglgo/dglgo/apply_pipeline/nodepred_sample/nodepred-ns.jinja-py
index bc78a1bac9b3..7243938506f5 100644
--- a/dglgo/dglgo/apply_pipeline/nodepred_sample/nodepred-ns.jinja-py
+++ b/dglgo/dglgo/apply_pipeline/nodepred_sample/nodepred-ns.jinja-py
@@ -48,7 +48,7 @@ def main():
                 features, got {:d} and {:d}'.format(model_in_size, data_in_size)
 
     model = {{ model_class_name }}(**cfg['model'])
-    model.load_state_dict(torch.load(cfg['cpt_path'], map_location='cpu')['model'])
+    model.load_state_dict(torch.load(cfg['cpt_path'], weights_only=False, map_location='cpu')['model'])
     logits = infer(device, data, model)
     pred = logits.argmax(dim=1).cpu()
 
diff --git a/dglgo/dglgo/pipeline/graphpred/graphpred.jinja-py b/dglgo/dglgo/pipeline/graphpred/graphpred.jinja-py
index 48064bf32b3f..c731b1be5456 100644
--- a/dglgo/dglgo/pipeline/graphpred/graphpred.jinja-py
+++ b/dglgo/dglgo/pipeline/graphpred/graphpred.jinja-py
@@ -101,7 +101,7 @@ def main(run, cfg, data):
         else:
             lr_scheduler.step()
 
-    model.load_state_dict(torch.load(tmp_cpt_path))
+    model.load_state_dict(torch.load(tmp_cpt_path, weights_only=False))
     os.remove(tmp_cpt_path)
     test_metric = evaluate(device, test_loader, model)
     print('Test Metric: {:.4f}'.format(test_metric))
diff --git a/dglgo/dglgo/pipeline/nodepred/nodepred.jinja-py b/dglgo/dglgo/pipeline/nodepred/nodepred.jinja-py
index e34d478de882..4f310f0b7c46 100644
--- a/dglgo/dglgo/pipeline/nodepred/nodepred.jinja-py
+++ b/dglgo/dglgo/pipeline/nodepred/nodepred.jinja-py
@@ -42,7 +42,7 @@ class EarlyStopping:
         torch.save(model.state_dict(), self.checkpoint_path)
 
     def load_checkpoint(self, model):
-        model.load_state_dict(torch.load(self.checkpoint_path))
+        model.load_state_dict(torch.load(self.checkpoint_path, weights_only=False))
 
     def close(self):
         os.remove(self.checkpoint_path)
diff --git a/dglgo/dglgo/pipeline/nodepred_sample/nodepred-ns.jinja-py b/dglgo/dglgo/pipeline/nodepred_sample/nodepred-ns.jinja-py
index 8cb142940c84..6607443863b1 100644
--- a/dglgo/dglgo/pipeline/nodepred_sample/nodepred-ns.jinja-py
+++ b/dglgo/dglgo/pipeline/nodepred_sample/nodepred-ns.jinja-py
@@ -42,7 +42,7 @@ class EarlyStopping:
         torch.save(model.state_dict(), self.checkpoint_path)
 
     def load_checkpoint(self, model):
-        model.load_state_dict(torch.load(self.checkpoint_path))
+        model.load_state_dict(torch.load(self.checkpoint_path, weights_only=False))
 
     def close(self):
         os.remove(self.checkpoint_path)
diff --git a/dglgo/dglgo/utils/early_stop.py b/dglgo/dglgo/utils/early_stop.py
index e56c76432bfa..946bb20109da 100644
--- a/dglgo/dglgo/utils/early_stop.py
+++ b/dglgo/dglgo/utils/early_stop.py
@@ -34,4 +34,4 @@ def save_checkpoint(self, model):
         torch.save(model.state_dict(), self.checkpoint_path)
 
     def load_checkpoint(self, model):
-        model.load_state_dict(torch.load(self.checkpoint_path))
+        model.load_state_dict(torch.load(self.checkpoint_path, weights_only=False))
diff --git a/examples/pytorch/GNN-FiLM/main.py b/examples/pytorch/GNN-FiLM/main.py
index d49d2b507501..db125f345e46 100644
--- a/examples/pytorch/GNN-FiLM/main.py
+++ b/examples/pytorch/GNN-FiLM/main.py
@@ -194,7 +194,7 @@ def main(args):
     model.eval()
     test_loss = []
     test_f1 = []
-    model.load_state_dict(torch.load(os.path.join(args.save_dir, args.name)))
+    model.load_state_dict(torch.load(os.path.join(args.save_dir, args.name), weights_only=False))
     with torch.no_grad():
         for batch in test_set:
             g = batch.graph
diff --git a/examples/pytorch/TAHIN/main.py b/examples/pytorch/TAHIN/main.py
index 6d9c89e33c48..5e3062284f5b 100644
--- a/examples/pytorch/TAHIN/main.py
+++ b/examples/pytorch/TAHIN/main.py
@@ -111,7 +111,7 @@ def main(args):
     # test use the best model
     model.eval()
     with torch.no_grad():
-        model.load_state_dict(torch.load("TAHIN" + "_" + args.dataset))
+        model.load_state_dict(torch.load("TAHIN" + "_" + args.dataset, weights_only=False))
         test_loss = []
         test_acc = []
         test_auc = []
diff --git a/examples/pytorch/argo/main.py b/examples/pytorch/argo/main.py
index d9e36d785bff..c9285a889c77 100644
--- a/examples/pytorch/argo/main.py
+++ b/examples/pytorch/argo/main.py
@@ -180,7 +180,7 @@ def train(
 
     PATH = "model.pt"
     if counter[0] != 0:
-        checkpoint = torch.load(PATH)
+        checkpoint = torch.load(PATH, weights_only=False)
         model.load_state_dict(checkpoint["model_state_dict"])
         opt.load_state_dict(checkpoint["optimizer_state_dict"])
         epoch = checkpoint["epoch"]
diff --git a/examples/pytorch/correct_and_smooth/main.py b/examples/pytorch/correct_and_smooth/main.py
index d33ac2fe0678..67826e16b705 100644
--- a/examples/pytorch/correct_and_smooth/main.py
+++ b/examples/pytorch/correct_and_smooth/main.py
@@ -66,7 +66,7 @@ def main():
     if args.pretrain:
         print("---------- Before ----------")
         model.load_state_dict(
-            torch.load(f"base/{args.dataset}-{args.model}.pt")
+            torch.load(f"base/{args.dataset}-{args.model}.pt", weights_only=False)
         )
         model.eval()
 
diff --git a/examples/pytorch/dgi/train.py b/examples/pytorch/dgi/train.py
index 74e438b258ef..03571b48a611 100644
--- a/examples/pytorch/dgi/train.py
+++ b/examples/pytorch/dgi/train.py
@@ -126,7 +126,7 @@ def main(args):
 
     # train classifier
     print("Loading {}th epoch".format(best_t))
-    dgi.load_state_dict(torch.load("best_dgi.pkl"))
+    dgi.load_state_dict(torch.load("best_dgi.pkl", weights_only=False))
     embeds = dgi.encoder(features, corrupt=False)
     embeds = embeds.detach()
     mean = 0
diff --git a/examples/pytorch/diffpool/train.py b/examples/pytorch/diffpool/train.py
index b89b56b94f05..4915c6d2fede 100755
--- a/examples/pytorch/diffpool/train.py
+++ b/examples/pytorch/diffpool/train.py
@@ -223,7 +223,8 @@ def graph_classify_task(prog_args):
                 + "/"
                 + prog_args.dataset
                 + "/model.iter-"
-                + str(prog_args.load_epoch)
+                + str(prog_args.load_epoch),
+                weights_only=False
             )
         )
 
@@ -334,7 +335,8 @@ def evaluate(dataloader, model, prog_args, logger=None):
                 + "/"
                 + prog_args.dataset
                 + "/model.iter-"
-                + str(logger["best_epoch"])
+                + str(logger["best_epoch"]),
+                weights_only=False
             )
         )
     model.eval()
diff --git a/examples/pytorch/dimenet/main.py b/examples/pytorch/dimenet/main.py
index eb154bcb776b..12193dcbbcb1 100644
--- a/examples/pytorch/dimenet/main.py
+++ b/examples/pytorch/dimenet/main.py
@@ -238,7 +238,7 @@ def main(model_cnf):
     if pretrain_params["flag"]:
         torch_path = pretrain_params["path"]
         target = model_params["targets"][0]
-        model.load_state_dict(torch.load(f"{torch_path}/{target}.pt"))
+        model.load_state_dict(torch.load(f"{torch_path}/{target}.pt", weights_only=False))
 
         logger.info("Testing with Pretrained model")
         predictions, labels = evaluate(device, model, test_loader)
diff --git a/examples/pytorch/gatv2/train.py b/examples/pytorch/gatv2/train.py
index e2e91821b54e..ef4f3031d66d 100644
--- a/examples/pytorch/gatv2/train.py
+++ b/examples/pytorch/gatv2/train.py
@@ -178,7 +178,7 @@ def main(args):
 
     print()
     if args.early_stop:
-        model.load_state_dict(torch.load("es_checkpoint.pt"))
+        model.load_state_dict(torch.load("es_checkpoint.pt", weights_only=False))
     acc = evaluate(g, model, features, labels, test_mask)
     print("Test Accuracy {:.4f}".format(acc))
 
diff --git a/examples/pytorch/graphsaint/train_sampling.py b/examples/pytorch/graphsaint/train_sampling.py
index 165e9e6f706d..1db40fed4b8d 100644
--- a/examples/pytorch/graphsaint/train_sampling.py
+++ b/examples/pytorch/graphsaint/train_sampling.py
@@ -214,7 +214,7 @@ def main(args, task):
     # test
     if args.use_val:
         model.load_state_dict(
-            torch.load(os.path.join(log_dir, "best_model_{}.pkl".format(task)))
+            torch.load(os.path.join(log_dir, "best_model_{}.pkl".format(task)), weights_only=False)
         )
     if cpu_flag and cuda:
         model = model.to("cpu")
diff --git a/examples/pytorch/graphwriter/train.py b/examples/pytorch/graphwriter/train.py
index 7e637d2f025c..be5dcd605bc5 100644
--- a/examples/pytorch/graphwriter/train.py
+++ b/examples/pytorch/graphwriter/train.py
@@ -161,7 +161,7 @@ def main(args):
     model = GraphWriter(args)
     model.to(args.device)
     if args.test:
-        model = torch.load(args.save_model)
+        model = torch.load(args.save_model, weights_only=False)
         model.args = args
         print(model)
         test(model, test_dataloader, args)
diff --git a/examples/pytorch/han/utils.py b/examples/pytorch/han/utils.py
index 593b6260501c..0b6b6054b25f 100644
--- a/examples/pytorch/han/utils.py
+++ b/examples/pytorch/han/utils.py
@@ -304,4 +304,4 @@ def save_checkpoint(self, model):
 
     def load_checkpoint(self, model):
         """Load the latest checkpoint."""
-        model.load_state_dict(torch.load(self.filename))
+        model.load_state_dict(torch.load(self.filename, weights_only=False))
diff --git a/examples/pytorch/hardgat/train.py b/examples/pytorch/hardgat/train.py
index 836334fec04d..1359e250daaa 100644
--- a/examples/pytorch/hardgat/train.py
+++ b/examples/pytorch/hardgat/train.py
@@ -154,7 +154,7 @@ def main(args):
 
     print()
     if args.early_stop:
-        model.load_state_dict(torch.load("es_checkpoint.pt"))
+        model.load_state_dict(torch.load("es_checkpoint.pt", weights_only=False))
     acc = evaluate(model, features, labels, test_mask)
     print("Test Accuracy {:.4f}".format(acc))
 
diff --git a/examples/pytorch/hilander/PSS/Smooth_AP/src/netlib.py b/examples/pytorch/hilander/PSS/Smooth_AP/src/netlib.py
index fb7912e6ffe4..565e53547347 100644
--- a/examples/pytorch/hilander/PSS/Smooth_AP/src/netlib.py
+++ b/examples/pytorch/hilander/PSS/Smooth_AP/src/netlib.py
@@ -71,7 +71,7 @@ def networkselect(opt):
         raise Exception("Network {} not available!".format(opt.arch))
 
     if opt.resume:
-        weights = torch.load(os.path.join(opt.save_path, opt.resume))
+        weights = torch.load(os.path.join(opt.save_path, opt.resume), weights_only=False)
         weights_state_dict = weights["state_dict"]
 
         if torch.cuda.device_count() > 1:
diff --git a/examples/pytorch/hilander/PSS/test_subg_inat.py b/examples/pytorch/hilander/PSS/test_subg_inat.py
index 8a12a9f942da..de40ba85a39a 100644
--- a/examples/pytorch/hilander/PSS/test_subg_inat.py
+++ b/examples/pytorch/hilander/PSS/test_subg_inat.py
@@ -173,7 +173,7 @@
         use_cluster_feat=args.use_cluster_feat,
         use_focal_loss=args.use_focal_loss,
     )
-    model.load_state_dict(torch.load(args.model_filename))
+    model.load_state_dict(torch.load(args.model_filename, weights_only=False))
     model = model.to(device)
     model.eval()
 
diff --git a/examples/pytorch/hilander/test.py b/examples/pytorch/hilander/test.py
index 4f1de3be759c..369e485975c4 100644
--- a/examples/pytorch/hilander/test.py
+++ b/examples/pytorch/hilander/test.py
@@ -83,7 +83,7 @@
         use_cluster_feat=args.use_cluster_feat,
         use_focal_loss=args.use_focal_loss,
     )
-    model.load_state_dict(torch.load(args.model_filename))
+    model.load_state_dict(torch.load(args.model_filename, weights_only=False))
     model = model.to(device)
     model.eval()
 
diff --git a/examples/pytorch/hilander/test_subg.py b/examples/pytorch/hilander/test_subg.py
index d44e0e002689..a2c84a2f7a67 100644
--- a/examples/pytorch/hilander/test_subg.py
+++ b/examples/pytorch/hilander/test_subg.py
@@ -104,7 +104,7 @@
         use_cluster_feat=args.use_cluster_feat,
         use_focal_loss=args.use_focal_loss,
     )
-    model.load_state_dict(torch.load(args.model_filename))
+    model.load_state_dict(torch.load(args.model_filename, weights_only=False))
     model = model.to(device)
     model.eval()
 
diff --git a/examples/pytorch/jtnn/vaetrain_dgl.py b/examples/pytorch/jtnn/vaetrain_dgl.py
index 2deee95effbb..78be2b706c2c 100755
--- a/examples/pytorch/jtnn/vaetrain_dgl.py
+++ b/examples/pytorch/jtnn/vaetrain_dgl.py
@@ -54,7 +54,7 @@ def worker_init_fn(id_):
 model = DGLJTNNVAE(vocab, hidden_size, latent_size, depth)
 
 if opts.model_path is not None:
-    model.load_state_dict(torch.load(opts.model_path))
+    model.load_state_dict(torch.load(opts.model_path, weights_only=False))
 else:
     for param in model.parameters():
         if param.dim() == 1:
diff --git a/examples/pytorch/lda/lda_model.py b/examples/pytorch/lda/lda_model.py
index 2e559a546a88..0898f33baee6 100644
--- a/examples/pytorch/lda/lda_model.py
+++ b/examples/pytorch/lda/lda_model.py
@@ -496,6 +496,6 @@ def doc_subgraph(G, doc_ids):
     with io.BytesIO() as f:
         model.save(f)
         f.seek(0)
-        print(torch.load(f))
+        print(torch.load(f, weights_only=False))
 
     print("Testing LatentDirichletAllocation passed!")
diff --git a/examples/pytorch/ogb/ngnn_seal/main.py b/examples/pytorch/ogb/ngnn_seal/main.py
index 8c7c996f9870..e635df6316eb 100755
--- a/examples/pytorch/ogb/ngnn_seal/main.py
+++ b/examples/pytorch/ogb/ngnn_seal/main.py
@@ -625,8 +625,8 @@ def print_log(*x, sep="\n", end="\n", mode="a"):
                         args.res_dir,
                         f"run{run+1}_optimizer_checkpoint{epoch}.pth",
                     )
-                    model.load_state_dict(torch.load(model_name))
-                    optimizer.load_state_dict(torch.load(optimizer_name))
+                    model.load_state_dict(torch.load(model_name, weights_only=False))
+                    optimizer.load_state_dict(torch.load(optimizer_name, weights_only=False))
                     tested[epoch] = (
                         test(final_val_loader, dataset.eval_metric)[
                             dataset.eval_metric
diff --git a/examples/pytorch/ogb/ogbn-arxiv/correct_and_smooth.py b/examples/pytorch/ogb/ogbn-arxiv/correct_and_smooth.py
index 44bb13738e3d..3bb4621882bf 100644
--- a/examples/pytorch/ogb/ogbn-arxiv/correct_and_smooth.py
+++ b/examples/pytorch/ogb/ogbn-arxiv/correct_and_smooth.py
@@ -179,7 +179,7 @@ def main():
 
     for pred_file in glob.iglob(args.pred_files):
         print("load:", pred_file)
-        pred = torch.load(pred_file)
+        pred = torch.load(pred_file, weights_only=False)
         val_acc, test_acc = run(
             args, graph, labels, pred, train_idx, val_idx, test_idx, evaluator
         )
diff --git a/examples/pytorch/ogb/ogbn-proteins/main_proteins_full_dgl.py b/examples/pytorch/ogb/ogbn-proteins/main_proteins_full_dgl.py
index f3467be4ab7d..b1c7080c69a6 100644
--- a/examples/pytorch/ogb/ogbn-proteins/main_proteins_full_dgl.py
+++ b/examples/pytorch/ogb/ogbn-proteins/main_proteins_full_dgl.py
@@ -168,7 +168,7 @@ def main(args):
         if num_patient_epochs == args["patience"]:
             break
 
-    model.load_state_dict(torch.load(model_path))
+    model.load_state_dict(torch.load(model_path, weights_only=False))
     train_score, val_score, test_score = run_an_eval_epoch(
         graph, splitted_idx, model, evaluator
     )
diff --git a/examples/pytorch/ogb_lsc/MAG240M/train.py b/examples/pytorch/ogb_lsc/MAG240M/train.py
index da84c2a61553..ea5e81916115 100644
--- a/examples/pytorch/ogb_lsc/MAG240M/train.py
+++ b/examples/pytorch/ogb_lsc/MAG240M/train.py
@@ -247,7 +247,7 @@ def test(args, dataset, g, feats, paper_offset):
         0.5,
         "paper",
     ).cuda()
-    model.load_state_dict(torch.load(args.model_path))
+    model.load_state_dict(torch.load(args.model_path, weights_only=False))
 
     model.eval()
     correct = total = 0
diff --git a/examples/pytorch/ogb_lsc/MAG240M/train_multi_gpus.py b/examples/pytorch/ogb_lsc/MAG240M/train_multi_gpus.py
index a7fb9c4cbf18..c09465aa7e53 100644
--- a/examples/pytorch/ogb_lsc/MAG240M/train_multi_gpus.py
+++ b/examples/pytorch/ogb_lsc/MAG240M/train_multi_gpus.py
@@ -304,7 +304,7 @@ def test(args, dataset, g, feats, paper_offset):
     ).cuda()
 
     # load ddp's model parameters, we need to remove the name of 'module.'
-    state_dict = torch.load(args.model_path)
+    state_dict = torch.load(args.model_path, weights_only=False)
     new_state_dict = OrderedDict()
     for k, v in state_dict.items():
         name = k[7:]
diff --git a/examples/pytorch/ogb_lsc/PCQM4M/test_inference.py b/examples/pytorch/ogb_lsc/PCQM4M/test_inference.py
index 1bfead8b93be..6b8cedb1abe9 100644
--- a/examples/pytorch/ogb_lsc/PCQM4M/test_inference.py
+++ b/examples/pytorch/ogb_lsc/PCQM4M/test_inference.py
@@ -206,7 +206,7 @@ def main():
         raise RuntimeError(f"Checkpoint file not found at {checkpoint_path}")
 
     ## reading in checkpoint
-    checkpoint = torch.load(checkpoint_path)
+    checkpoint = torch.load(checkpoint_path, weights_only=False)
     model.load_state_dict(checkpoint["model_state_dict"])
 
     print("Predicting on test data...")
diff --git a/examples/pytorch/pointcloud/bipointnet/train_cls.py b/examples/pytorch/pointcloud/bipointnet/train_cls.py
index ed50ccf4f9ff..0e2c99b7736f 100644
--- a/examples/pytorch/pointcloud/bipointnet/train_cls.py
+++ b/examples/pytorch/pointcloud/bipointnet/train_cls.py
@@ -136,7 +136,7 @@ def evaluate(net, test_loader, dev):
 
 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    net.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
 
 opt = optim.Adam(net.parameters(), lr=1e-3, weight_decay=1e-4)
 
diff --git a/examples/pytorch/pointcloud/edgeconv/main.py b/examples/pytorch/pointcloud/edgeconv/main.py
index 36a1136f9544..18e612c88a45 100644
--- a/examples/pytorch/pointcloud/edgeconv/main.py
+++ b/examples/pytorch/pointcloud/edgeconv/main.py
@@ -115,7 +115,7 @@ def evaluate(model, test_loader, dev):
 model = Model(20, [64, 64, 128, 256], [512, 512, 256], 40)
 model = model.to(dev)
 if args.load_model_path:
-    model.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    model.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
 
 opt = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
 
diff --git a/examples/pytorch/pointcloud/pct/train_cls.py b/examples/pytorch/pointcloud/pct/train_cls.py
index 4f330ac98222..0b3372cfdbb0 100644
--- a/examples/pytorch/pointcloud/pct/train_cls.py
+++ b/examples/pytorch/pointcloud/pct/train_cls.py
@@ -138,7 +138,7 @@ def evaluate(net, test_loader, dev):
 
 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    net.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
 
 
 opt = torch.optim.SGD(
diff --git a/examples/pytorch/pointcloud/pct/train_partseg.py b/examples/pytorch/pointcloud/pct/train_partseg.py
index 4ca49a5bf328..7135072f33ad 100644
--- a/examples/pytorch/pointcloud/pct/train_partseg.py
+++ b/examples/pytorch/pointcloud/pct/train_partseg.py
@@ -181,7 +181,7 @@ def evaluate(net, test_loader, dev, per_cat_verbose=False):
 
 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    net.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
 
 opt = torch.optim.SGD(
     net.parameters(), lr=0.01, weight_decay=1e-4, momentum=0.9
diff --git a/examples/pytorch/pointcloud/point_transformer/train_cls.py b/examples/pytorch/pointcloud/point_transformer/train_cls.py
index 47ff6b186ba7..4006c98e7962 100644
--- a/examples/pytorch/pointcloud/point_transformer/train_cls.py
+++ b/examples/pytorch/pointcloud/point_transformer/train_cls.py
@@ -139,7 +139,7 @@ def evaluate(net, test_loader, dev):
 
 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    net.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
 
 if args.opt == "sgd":
     # The optimizer strategy described in paper:
diff --git a/examples/pytorch/pointcloud/point_transformer/train_partseg.py b/examples/pytorch/pointcloud/point_transformer/train_partseg.py
index 22de6967f353..e74c7f67e489 100644
--- a/examples/pytorch/pointcloud/point_transformer/train_partseg.py
+++ b/examples/pytorch/pointcloud/point_transformer/train_partseg.py
@@ -185,7 +185,7 @@ def evaluate(net, test_loader, dev, per_cat_verbose=False):
 
 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    net.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
 
 if args.opt == "sgd":
     # The optimizer strategy described in paper:
diff --git a/examples/pytorch/pointcloud/pointnet/train_cls.py b/examples/pytorch/pointcloud/pointnet/train_cls.py
index 8866eef6be78..7a9355b683b0 100644
--- a/examples/pytorch/pointcloud/pointnet/train_cls.py
+++ b/examples/pytorch/pointcloud/pointnet/train_cls.py
@@ -140,7 +140,7 @@ def evaluate(net, test_loader, dev):
 
 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    net.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
 
 opt = optim.Adam(net.parameters(), lr=1e-3, weight_decay=1e-4)
 
diff --git a/examples/pytorch/pointcloud/pointnet/train_partseg.py b/examples/pytorch/pointcloud/pointnet/train_partseg.py
index cbe3322fa79a..afc6d7188ef1 100644
--- a/examples/pytorch/pointcloud/pointnet/train_partseg.py
+++ b/examples/pytorch/pointcloud/pointnet/train_partseg.py
@@ -187,7 +187,7 @@ def evaluate(net, test_loader, dev, per_cat_verbose=False):
 
 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    net.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
 
 opt = optim.Adam(net.parameters(), lr=0.001, weight_decay=1e-4)
 scheduler = optim.lr_scheduler.StepLR(opt, step_size=20, gamma=0.5)
diff --git a/examples/pytorch/rgcn/link.py b/examples/pytorch/rgcn/link.py
index 3f0949521f87..4bb8bd394a0d 100644
--- a/examples/pytorch/rgcn/link.py
+++ b/examples/pytorch/rgcn/link.py
@@ -336,7 +336,7 @@ def train(
 
     # testing
     print("Testing...")
-    checkpoint = torch.load(model_state_file)
+    checkpoint = torch.load(model_state_file, weights_only=False)
     model = model.cpu()  # test on CPU
     model.eval()
     model.load_state_dict(checkpoint["state_dict"])
diff --git a/examples/pytorch/rrn/sudoku_solver.py b/examples/pytorch/rrn/sudoku_solver.py
index 9bb0438582e9..6b293a0fb04b 100644
--- a/examples/pytorch/rrn/sudoku_solver.py
+++ b/examples/pytorch/rrn/sudoku_solver.py
@@ -25,7 +25,7 @@ def solve_sudoku(puzzle):
         urllib.request.urlretrieve(url, model_filename)
 
     model = SudokuNN(num_steps=64, edge_drop=0.0)
-    model.load_state_dict(torch.load(model_filename, map_location="cpu"))
+    model.load_state_dict(torch.load(model_filename, weights_only=False, map_location="cpu"))
     model.eval()
 
     g = _basic_sudoku_graph()
diff --git a/examples/pytorch/rrn/train_sudoku.py b/examples/pytorch/rrn/train_sudoku.py
index 1e420758b38d..864852443c10 100644
--- a/examples/pytorch/rrn/train_sudoku.py
+++ b/examples/pytorch/rrn/train_sudoku.py
@@ -79,7 +79,7 @@ def main(args):
         if not os.path.exists(model_path):
             raise FileNotFoundError("Saved model not Found!")
 
-        model.load_state_dict(torch.load(model_path))
+        model.load_state_dict(torch.load(model_path, weights_only=False))
         model.to(device)
 
         test_dataloader = sudoku_dataloader(args.batch_size, segment="test")
diff --git a/examples/pytorch/stgcn_wave/main.py b/examples/pytorch/stgcn_wave/main.py
index 09b0b455de46..f9c825a28a84 100644
--- a/examples/pytorch/stgcn_wave/main.py
+++ b/examples/pytorch/stgcn_wave/main.py
@@ -175,7 +175,7 @@
 best_model = STGCN_WAVE(
     blocks, n_his, n_route, G, drop_prob, num_layers, device, args.control_str
 ).to(device)
-best_model.load_state_dict(torch.load(save_path))
+best_model.load_state_dict(torch.load(save_path, weights_only=False))
 
 
 l = evaluate_model(best_model, loss, test_iter)
diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py
index f74da1cf9685..4b180b9279eb 100644
--- a/python/dgl/distributed/partition.py
+++ b/python/dgl/distributed/partition.py
@@ -371,7 +371,7 @@ def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False):
         os.path.getsize(partition_path),
     )
     graph = (
-        torch.load(partition_path)
+        torch.load(partition_path, weights_only=False)
         if use_graphbolt
         else load_graphs(partition_path)[0][0]
     )
diff --git a/python/dgl/graphbolt/impl/torch_based_feature_store.py b/python/dgl/graphbolt/impl/torch_based_feature_store.py
index c1739415f3e2..dea445e72433 100644
--- a/python/dgl/graphbolt/impl/torch_based_feature_store.py
+++ b/python/dgl/graphbolt/impl/torch_based_feature_store.py
@@ -615,7 +615,8 @@ def __init__(self, feat_data: List[OnDiskFeatureData]):
                     f"but the feature {key} is loaded on disk."
                 )
                 features[key] = TorchBasedFeature(
-                    torch.load(spec.path), metadata=metadata
+                    torch.load(spec.path, weights_only=False),
+                    metadata=metadata
                 )
             elif spec.format == "numpy":
                 if spec.in_memory:
diff --git a/tests/python/pytorch/graphbolt/internal/test_utils.py b/tests/python/pytorch/graphbolt/internal/test_utils.py
index 831fb9470e3a..de78b93c3d72 100644
--- a/tests/python/pytorch/graphbolt/internal/test_utils.py
+++ b/tests/python/pytorch/graphbolt/internal/test_utils.py
@@ -77,7 +77,7 @@ def test_save_data(data_fmt, save_fmt, contiguous):
 
         # Step2. Load the data.
         if save_fmt == "torch":
-            loaded_data = torch.load(save_file_name)
+            loaded_data = torch.load(save_file_name, weights_only=False)
             assert loaded_data.is_contiguous()
             assert torch.equal(tensor_data, loaded_data)
         elif save_fmt == "numpy":

From 8ca1e4e9f3ba4a0884ebafd739456ed3ae46565a Mon Sep 17 00:00:00 2001
From: Andrei Ivanov <andreii@nvidia.com>
Date: Mon, 6 Jan 2025 13:49:05 -0800
Subject: [PATCH 5/6] Fixing lint issues

---
 dglgo/dglgo/apply_pipeline/graphpred/gen.py               | 7 +++----
 dglgo/dglgo/apply_pipeline/nodepred/gen.py                | 4 +++-
 dglgo/dglgo/apply_pipeline/nodepred_sample/gen.py         | 4 +++-
 dglgo/dglgo/utils/early_stop.py                           | 4 +++-
 examples/pytorch/GNN-FiLM/main.py                         | 4 +++-
 examples/pytorch/TAHIN/main.py                            | 4 +++-
 examples/pytorch/correct_and_smooth/main.py               | 4 +++-
 examples/pytorch/diffpool/train.py                        | 4 ++--
 examples/pytorch/dimenet/main.py                          | 4 +++-
 examples/pytorch/gatv2/train.py                           | 4 +++-
 examples/pytorch/graphsaint/train_sampling.py             | 5 ++++-
 examples/pytorch/hardgat/train.py                         | 4 +++-
 examples/pytorch/hilander/PSS/Smooth_AP/src/netlib.py     | 4 +++-
 examples/pytorch/ogb/ngnn_seal/main.py                    | 8 ++++++--
 examples/pytorch/pointcloud/bipointnet/train_cls.py       | 4 +++-
 examples/pytorch/pointcloud/edgeconv/main.py              | 4 +++-
 examples/pytorch/pointcloud/pct/train_cls.py              | 4 +++-
 examples/pytorch/pointcloud/pct/train_partseg.py          | 4 +++-
 .../pytorch/pointcloud/point_transformer/train_cls.py     | 4 +++-
 .../pytorch/pointcloud/point_transformer/train_partseg.py | 4 +++-
 examples/pytorch/pointcloud/pointnet/train_cls.py         | 4 +++-
 examples/pytorch/pointcloud/pointnet/train_partseg.py     | 4 +++-
 examples/pytorch/rrn/sudoku_solver.py                     | 4 +++-
 python/dgl/graphbolt/impl/torch_based_feature_store.py    | 3 +--
 24 files changed, 73 insertions(+), 30 deletions(-)

diff --git a/dglgo/dglgo/apply_pipeline/graphpred/gen.py b/dglgo/dglgo/apply_pipeline/graphpred/gen.py
index d6a1b457073c..4c05121f8715 100644
--- a/dglgo/dglgo/apply_pipeline/graphpred/gen.py
+++ b/dglgo/dglgo/apply_pipeline/graphpred/gen.py
@@ -119,10 +119,9 @@ def gen_script(cls, user_cfg_dict):
         cls.user_cfg_cls(**user_cfg_dict)
 
         # Training configuration
-        train_cfg = torch.load(
-            user_cfg_dict["cpt_path"],
-            weights_only=False
-        )["cfg"]
+        train_cfg = torch.load(user_cfg_dict["cpt_path"], weights_only=False)[
+            "cfg"
+        ]
 
         # Dict for code rendering
         render_cfg = deepcopy(user_cfg_dict)
diff --git a/dglgo/dglgo/apply_pipeline/nodepred/gen.py b/dglgo/dglgo/apply_pipeline/nodepred/gen.py
index f6e00c3a3370..945b16ae96ed 100644
--- a/dglgo/dglgo/apply_pipeline/nodepred/gen.py
+++ b/dglgo/dglgo/apply_pipeline/nodepred/gen.py
@@ -101,7 +101,9 @@ def gen_script(cls, user_cfg_dict):
         cls.user_cfg_cls(**user_cfg_dict)
 
         # Training configuration
-        train_cfg = torch.load(user_cfg_dict["cpt_path"], weights_only=False)["cfg"]
+        train_cfg = torch.load(user_cfg_dict["cpt_path"], weights_only=False)[
+            "cfg"
+        ]
 
         # Dict for code rendering
         render_cfg = deepcopy(user_cfg_dict)
diff --git a/dglgo/dglgo/apply_pipeline/nodepred_sample/gen.py b/dglgo/dglgo/apply_pipeline/nodepred_sample/gen.py
index bc866ed85903..20629d959ed8 100644
--- a/dglgo/dglgo/apply_pipeline/nodepred_sample/gen.py
+++ b/dglgo/dglgo/apply_pipeline/nodepred_sample/gen.py
@@ -101,7 +101,9 @@ def gen_script(cls, user_cfg_dict):
         cls.user_cfg_cls(**user_cfg_dict)
 
         # Training configuration
-        train_cfg = torch.load(user_cfg_dict["cpt_path"], weights_only=False)["cfg"]
+        train_cfg = torch.load(user_cfg_dict["cpt_path"], weights_only=False)[
+            "cfg"
+        ]
 
         # Dict for code rendering
         render_cfg = deepcopy(user_cfg_dict)
diff --git a/dglgo/dglgo/utils/early_stop.py b/dglgo/dglgo/utils/early_stop.py
index 946bb20109da..658cfb091fe5 100644
--- a/dglgo/dglgo/utils/early_stop.py
+++ b/dglgo/dglgo/utils/early_stop.py
@@ -34,4 +34,6 @@ def save_checkpoint(self, model):
         torch.save(model.state_dict(), self.checkpoint_path)
 
     def load_checkpoint(self, model):
-        model.load_state_dict(torch.load(self.checkpoint_path, weights_only=False))
+        model.load_state_dict(
+            torch.load(self.checkpoint_path, weights_only=False)
+        )
diff --git a/examples/pytorch/GNN-FiLM/main.py b/examples/pytorch/GNN-FiLM/main.py
index db125f345e46..dde0fa4fe46f 100644
--- a/examples/pytorch/GNN-FiLM/main.py
+++ b/examples/pytorch/GNN-FiLM/main.py
@@ -194,7 +194,9 @@ def main(args):
     model.eval()
     test_loss = []
     test_f1 = []
-    model.load_state_dict(torch.load(os.path.join(args.save_dir, args.name), weights_only=False))
+    model.load_state_dict(
+        torch.load(os.path.join(args.save_dir, args.name), weights_only=False)
+    )
     with torch.no_grad():
         for batch in test_set:
             g = batch.graph
diff --git a/examples/pytorch/TAHIN/main.py b/examples/pytorch/TAHIN/main.py
index 5e3062284f5b..98ae77b99475 100644
--- a/examples/pytorch/TAHIN/main.py
+++ b/examples/pytorch/TAHIN/main.py
@@ -111,7 +111,9 @@ def main(args):
     # test use the best model
     model.eval()
     with torch.no_grad():
-        model.load_state_dict(torch.load("TAHIN" + "_" + args.dataset, weights_only=False))
+        model.load_state_dict(
+            torch.load("TAHIN" + "_" + args.dataset, weights_only=False)
+        )
         test_loss = []
         test_acc = []
         test_auc = []
diff --git a/examples/pytorch/correct_and_smooth/main.py b/examples/pytorch/correct_and_smooth/main.py
index 67826e16b705..dfce0789f8d0 100644
--- a/examples/pytorch/correct_and_smooth/main.py
+++ b/examples/pytorch/correct_and_smooth/main.py
@@ -66,7 +66,9 @@ def main():
     if args.pretrain:
         print("---------- Before ----------")
         model.load_state_dict(
-            torch.load(f"base/{args.dataset}-{args.model}.pt", weights_only=False)
+            torch.load(
+                f"base/{args.dataset}-{args.model}.pt", weights_only=False
+            )
         )
         model.eval()
 
diff --git a/examples/pytorch/diffpool/train.py b/examples/pytorch/diffpool/train.py
index 4915c6d2fede..2887b7ea64c1 100755
--- a/examples/pytorch/diffpool/train.py
+++ b/examples/pytorch/diffpool/train.py
@@ -224,7 +224,7 @@ def graph_classify_task(prog_args):
                 + prog_args.dataset
                 + "/model.iter-"
                 + str(prog_args.load_epoch),
-                weights_only=False
+                weights_only=False,
             )
         )
 
@@ -336,7 +336,7 @@ def evaluate(dataloader, model, prog_args, logger=None):
                 + prog_args.dataset
                 + "/model.iter-"
                 + str(logger["best_epoch"]),
-                weights_only=False
+                weights_only=False,
             )
         )
     model.eval()
diff --git a/examples/pytorch/dimenet/main.py b/examples/pytorch/dimenet/main.py
index 12193dcbbcb1..9cd23f68bb0d 100644
--- a/examples/pytorch/dimenet/main.py
+++ b/examples/pytorch/dimenet/main.py
@@ -238,7 +238,9 @@ def main(model_cnf):
     if pretrain_params["flag"]:
         torch_path = pretrain_params["path"]
         target = model_params["targets"][0]
-        model.load_state_dict(torch.load(f"{torch_path}/{target}.pt", weights_only=False))
+        model.load_state_dict(
+            torch.load(f"{torch_path}/{target}.pt", weights_only=False)
+        )
 
         logger.info("Testing with Pretrained model")
         predictions, labels = evaluate(device, model, test_loader)
diff --git a/examples/pytorch/gatv2/train.py b/examples/pytorch/gatv2/train.py
index ef4f3031d66d..4c4f8fb4e212 100644
--- a/examples/pytorch/gatv2/train.py
+++ b/examples/pytorch/gatv2/train.py
@@ -178,7 +178,9 @@ def main(args):
 
     print()
     if args.early_stop:
-        model.load_state_dict(torch.load("es_checkpoint.pt", weights_only=False))
+        model.load_state_dict(
+            torch.load("es_checkpoint.pt", weights_only=False)
+        )
     acc = evaluate(g, model, features, labels, test_mask)
     print("Test Accuracy {:.4f}".format(acc))
 
diff --git a/examples/pytorch/graphsaint/train_sampling.py b/examples/pytorch/graphsaint/train_sampling.py
index 1db40fed4b8d..3b3aeee69571 100644
--- a/examples/pytorch/graphsaint/train_sampling.py
+++ b/examples/pytorch/graphsaint/train_sampling.py
@@ -214,7 +214,10 @@ def main(args, task):
     # test
     if args.use_val:
         model.load_state_dict(
-            torch.load(os.path.join(log_dir, "best_model_{}.pkl".format(task)), weights_only=False)
+            torch.load(
+                os.path.join(log_dir, "best_model_{}.pkl".format(task)),
+                weights_only=False,
+            )
         )
     if cpu_flag and cuda:
         model = model.to("cpu")
diff --git a/examples/pytorch/hardgat/train.py b/examples/pytorch/hardgat/train.py
index 1359e250daaa..0aae2b6ca944 100644
--- a/examples/pytorch/hardgat/train.py
+++ b/examples/pytorch/hardgat/train.py
@@ -154,7 +154,9 @@ def main(args):
 
     print()
     if args.early_stop:
-        model.load_state_dict(torch.load("es_checkpoint.pt", weights_only=False))
+        model.load_state_dict(
+            torch.load("es_checkpoint.pt", weights_only=False)
+        )
     acc = evaluate(model, features, labels, test_mask)
     print("Test Accuracy {:.4f}".format(acc))
 
diff --git a/examples/pytorch/hilander/PSS/Smooth_AP/src/netlib.py b/examples/pytorch/hilander/PSS/Smooth_AP/src/netlib.py
index 565e53547347..9a4e3df3dac6 100644
--- a/examples/pytorch/hilander/PSS/Smooth_AP/src/netlib.py
+++ b/examples/pytorch/hilander/PSS/Smooth_AP/src/netlib.py
@@ -71,7 +71,9 @@ def networkselect(opt):
         raise Exception("Network {} not available!".format(opt.arch))
 
     if opt.resume:
-        weights = torch.load(os.path.join(opt.save_path, opt.resume), weights_only=False)
+        weights = torch.load(
+            os.path.join(opt.save_path, opt.resume), weights_only=False
+        )
         weights_state_dict = weights["state_dict"]
 
         if torch.cuda.device_count() > 1:
diff --git a/examples/pytorch/ogb/ngnn_seal/main.py b/examples/pytorch/ogb/ngnn_seal/main.py
index e635df6316eb..231e799d95a1 100755
--- a/examples/pytorch/ogb/ngnn_seal/main.py
+++ b/examples/pytorch/ogb/ngnn_seal/main.py
@@ -625,8 +625,12 @@ def print_log(*x, sep="\n", end="\n", mode="a"):
                         args.res_dir,
                         f"run{run+1}_optimizer_checkpoint{epoch}.pth",
                     )
-                    model.load_state_dict(torch.load(model_name, weights_only=False))
-                    optimizer.load_state_dict(torch.load(optimizer_name, weights_only=False))
+                    model.load_state_dict(
+                        torch.load(model_name, weights_only=False)
+                    )
+                    optimizer.load_state_dict(
+                        torch.load(optimizer_name, weights_only=False)
+                    )
                     tested[epoch] = (
                         test(final_val_loader, dataset.eval_metric)[
                             dataset.eval_metric
diff --git a/examples/pytorch/pointcloud/bipointnet/train_cls.py b/examples/pytorch/pointcloud/bipointnet/train_cls.py
index 0e2c99b7736f..b9164819cf05 100644
--- a/examples/pytorch/pointcloud/bipointnet/train_cls.py
+++ b/examples/pytorch/pointcloud/bipointnet/train_cls.py
@@ -136,7 +136,9 @@ def evaluate(net, test_loader, dev):
 
 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
+    net.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )
 
 opt = optim.Adam(net.parameters(), lr=1e-3, weight_decay=1e-4)
 
diff --git a/examples/pytorch/pointcloud/edgeconv/main.py b/examples/pytorch/pointcloud/edgeconv/main.py
index 18e612c88a45..7433575c5c32 100644
--- a/examples/pytorch/pointcloud/edgeconv/main.py
+++ b/examples/pytorch/pointcloud/edgeconv/main.py
@@ -115,7 +115,9 @@ def evaluate(model, test_loader, dev):
 model = Model(20, [64, 64, 128, 256], [512, 512, 256], 40)
 model = model.to(dev)
 if args.load_model_path:
-    model.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
+    model.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )
 
 opt = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
 
diff --git a/examples/pytorch/pointcloud/pct/train_cls.py b/examples/pytorch/pointcloud/pct/train_cls.py
index 0b3372cfdbb0..0716893bf537 100644
--- a/examples/pytorch/pointcloud/pct/train_cls.py
+++ b/examples/pytorch/pointcloud/pct/train_cls.py
@@ -138,7 +138,9 @@ def evaluate(net, test_loader, dev):
 
 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
+    net.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )
 
 
 opt = torch.optim.SGD(
diff --git a/examples/pytorch/pointcloud/pct/train_partseg.py b/examples/pytorch/pointcloud/pct/train_partseg.py
index 7135072f33ad..2254a851a940 100644
--- a/examples/pytorch/pointcloud/pct/train_partseg.py
+++ b/examples/pytorch/pointcloud/pct/train_partseg.py
@@ -181,7 +181,9 @@ def evaluate(net, test_loader, dev, per_cat_verbose=False):
 
 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
+    net.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )
 
 opt = torch.optim.SGD(
     net.parameters(), lr=0.01, weight_decay=1e-4, momentum=0.9
diff --git a/examples/pytorch/pointcloud/point_transformer/train_cls.py b/examples/pytorch/pointcloud/point_transformer/train_cls.py
index 4006c98e7962..a8fb9093fd6d 100644
--- a/examples/pytorch/pointcloud/point_transformer/train_cls.py
+++ b/examples/pytorch/pointcloud/point_transformer/train_cls.py
@@ -139,7 +139,9 @@ def evaluate(net, test_loader, dev):
 
 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
+    net.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )
 
 if args.opt == "sgd":
     # The optimizer strategy described in paper:
diff --git a/examples/pytorch/pointcloud/point_transformer/train_partseg.py b/examples/pytorch/pointcloud/point_transformer/train_partseg.py
index e74c7f67e489..685fb7c28e74 100644
--- a/examples/pytorch/pointcloud/point_transformer/train_partseg.py
+++ b/examples/pytorch/pointcloud/point_transformer/train_partseg.py
@@ -185,7 +185,9 @@ def evaluate(net, test_loader, dev, per_cat_verbose=False):
 
 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
+    net.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )
 
 if args.opt == "sgd":
     # The optimizer strategy described in paper:
diff --git a/examples/pytorch/pointcloud/pointnet/train_cls.py b/examples/pytorch/pointcloud/pointnet/train_cls.py
index 7a9355b683b0..501bcf1eca8e 100644
--- a/examples/pytorch/pointcloud/pointnet/train_cls.py
+++ b/examples/pytorch/pointcloud/pointnet/train_cls.py
@@ -140,7 +140,9 @@ def evaluate(net, test_loader, dev):
 
 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
+    net.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )
 
 opt = optim.Adam(net.parameters(), lr=1e-3, weight_decay=1e-4)
 
diff --git a/examples/pytorch/pointcloud/pointnet/train_partseg.py b/examples/pytorch/pointcloud/pointnet/train_partseg.py
index afc6d7188ef1..1ad9419ee45e 100644
--- a/examples/pytorch/pointcloud/pointnet/train_partseg.py
+++ b/examples/pytorch/pointcloud/pointnet/train_partseg.py
@@ -187,7 +187,9 @@ def evaluate(net, test_loader, dev, per_cat_verbose=False):
 
 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, weights_only=False, map_location=dev))
+    net.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )
 
 opt = optim.Adam(net.parameters(), lr=0.001, weight_decay=1e-4)
 scheduler = optim.lr_scheduler.StepLR(opt, step_size=20, gamma=0.5)
diff --git a/examples/pytorch/rrn/sudoku_solver.py b/examples/pytorch/rrn/sudoku_solver.py
index 6b293a0fb04b..8e9d246254fc 100644
--- a/examples/pytorch/rrn/sudoku_solver.py
+++ b/examples/pytorch/rrn/sudoku_solver.py
@@ -25,7 +25,9 @@ def solve_sudoku(puzzle):
         urllib.request.urlretrieve(url, model_filename)
 
     model = SudokuNN(num_steps=64, edge_drop=0.0)
-    model.load_state_dict(torch.load(model_filename, weights_only=False, map_location="cpu"))
+    model.load_state_dict(
+        torch.load(model_filename, weights_only=False, map_location="cpu")
+    )
     model.eval()
 
     g = _basic_sudoku_graph()
diff --git a/python/dgl/graphbolt/impl/torch_based_feature_store.py b/python/dgl/graphbolt/impl/torch_based_feature_store.py
index dea445e72433..6806175de946 100644
--- a/python/dgl/graphbolt/impl/torch_based_feature_store.py
+++ b/python/dgl/graphbolt/impl/torch_based_feature_store.py
@@ -615,8 +615,7 @@ def __init__(self, feat_data: List[OnDiskFeatureData]):
                     f"but the feature {key} is loaded on disk."
                 )
                 features[key] = TorchBasedFeature(
-                    torch.load(spec.path, weights_only=False),
-                    metadata=metadata
+                    torch.load(spec.path, weights_only=False), metadata=metadata
                 )
             elif spec.format == "numpy":
                 if spec.in_memory:

From f67742e6bff0c7398186d5cc6dd71c54283fbcb0 Mon Sep 17 00:00:00 2001
From: Andrei Ivanov <andreii@nvidia.com>
Date: Mon, 6 Jan 2025 16:32:22 -0800
Subject: [PATCH 6/6] Fixing lint issues for
 examples/pytorch/stgcn_wave/main.py

---
 examples/pytorch/stgcn_wave/main.py | 363 ++++++++++++++--------------
 1 file changed, 180 insertions(+), 183 deletions(-)

diff --git a/examples/pytorch/stgcn_wave/main.py b/examples/pytorch/stgcn_wave/main.py
index f9c825a28a84..4b99e94c0805 100644
--- a/examples/pytorch/stgcn_wave/main.py
+++ b/examples/pytorch/stgcn_wave/main.py
@@ -1,183 +1,180 @@
-import argparse
-import random
-
-import numpy as np
-import pandas as pd
-import scipy.sparse as sp
-import torch
-import torch.nn as nn
-from load_data import *
-from model import *
-from sensors2graph import *
-from sklearn.preprocessing import StandardScaler
-from utils import *
-
-import dgl
-
-parser = argparse.ArgumentParser(description="STGCN_WAVE")
-parser.add_argument("--lr", default=0.001, type=float, help="learning rate")
-parser.add_argument("--disablecuda", action="store_true", help="Disable CUDA")
-parser.add_argument(
-    "--batch_size",
-    type=int,
-    default=50,
-    help="batch size for training and validation (default: 50)",
-)
-parser.add_argument(
-    "--epochs", type=int, default=50, help="epochs for training  (default: 50)"
-)
-parser.add_argument(
-    "--num_layers", type=int, default=9, help="number of layers"
-)
-parser.add_argument("--window", type=int, default=144, help="window length")
-parser.add_argument(
-    "--sensorsfilepath",
-    type=str,
-    default="./data/sensor_graph/graph_sensor_ids.txt",
-    help="sensors file path",
-)
-parser.add_argument(
-    "--disfilepath",
-    type=str,
-    default="./data/sensor_graph/distances_la_2012.csv",
-    help="distance file path",
-)
-parser.add_argument(
-    "--tsfilepath", type=str, default="./data/metr-la.h5", help="ts file path"
-)
-parser.add_argument(
-    "--savemodelpath",
-    type=str,
-    default="stgcnwavemodel.pt",
-    help="save model path",
-)
-parser.add_argument(
-    "--pred_len",
-    type=int,
-    default=5,
-    help="how many steps away we want to predict",
-)
-parser.add_argument(
-    "--control_str",
-    type=str,
-    default="TNTSTNTST",
-    help="model strcture controller, T: Temporal Layer, S: Spatio Layer, N: Norm Layer",
-)
-parser.add_argument(
-    "--channels",
-    type=int,
-    nargs="+",
-    default=[1, 16, 32, 64, 32, 128],
-    help="model strcture controller, T: Temporal Layer, S: Spatio Layer, N: Norm Layer",
-)
-args = parser.parse_args()
-
-device = (
-    torch.device("cuda")
-    if torch.cuda.is_available() and not args.disablecuda
-    else torch.device("cpu")
-)
-
-with open(args.sensorsfilepath) as f:
-    sensor_ids = f.read().strip().split(",")
-
-distance_df = pd.read_csv(args.disfilepath, dtype={"from": "str", "to": "str"})
-
-adj_mx = get_adjacency_matrix(distance_df, sensor_ids)
-sp_mx = sp.coo_matrix(adj_mx)
-G = dgl.from_scipy(sp_mx)
-
-
-df = pd.read_hdf(args.tsfilepath)
-num_samples, num_nodes = df.shape
-
-tsdata = df.to_numpy()
-
-
-n_his = args.window
-
-save_path = args.savemodelpath
-
-
-n_pred = args.pred_len
-n_route = num_nodes
-blocks = args.channels
-# blocks = [1, 16, 32, 64, 32, 128]
-drop_prob = 0
-num_layers = args.num_layers
-
-batch_size = args.batch_size
-epochs = args.epochs
-lr = args.lr
-
-
-W = adj_mx
-len_val = round(num_samples * 0.1)
-len_train = round(num_samples * 0.7)
-train = df[:len_train]
-val = df[len_train : len_train + len_val]
-test = df[len_train + len_val :]
-
-scaler = StandardScaler()
-train = scaler.fit_transform(train)
-val = scaler.transform(val)
-test = scaler.transform(test)
-
-
-x_train, y_train = data_transform(train, n_his, n_pred, device)
-x_val, y_val = data_transform(val, n_his, n_pred, device)
-x_test, y_test = data_transform(test, n_his, n_pred, device)
-
-train_data = torch.utils.data.TensorDataset(x_train, y_train)
-train_iter = torch.utils.data.DataLoader(train_data, batch_size, shuffle=True)
-val_data = torch.utils.data.TensorDataset(x_val, y_val)
-val_iter = torch.utils.data.DataLoader(val_data, batch_size)
-test_data = torch.utils.data.TensorDataset(x_test, y_test)
-test_iter = torch.utils.data.DataLoader(test_data, batch_size)
-
-
-loss = nn.MSELoss()
-G = G.to(device)
-model = STGCN_WAVE(
-    blocks, n_his, n_route, G, drop_prob, num_layers, device, args.control_str
-).to(device)
-optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
-
-scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.7)
-
-min_val_loss = np.inf
-for epoch in range(1, epochs + 1):
-    l_sum, n = 0.0, 0
-    model.train()
-    for x, y in train_iter:
-        y_pred = model(x).view(len(x), -1)
-        l = loss(y_pred, y)
-        optimizer.zero_grad()
-        l.backward()
-        optimizer.step()
-        l_sum += l.item() * y.shape[0]
-        n += y.shape[0]
-    scheduler.step()
-    val_loss = evaluate_model(model, loss, val_iter)
-    if val_loss < min_val_loss:
-        min_val_loss = val_loss
-        torch.save(model.state_dict(), save_path)
-    print(
-        "epoch",
-        epoch,
-        ", train loss:",
-        l_sum / n,
-        ", validation loss:",
-        val_loss,
-    )
-
-
-best_model = STGCN_WAVE(
-    blocks, n_his, n_route, G, drop_prob, num_layers, device, args.control_str
-).to(device)
-best_model.load_state_dict(torch.load(save_path, weights_only=False))
-
-
-l = evaluate_model(best_model, loss, test_iter)
-MAE, MAPE, RMSE = evaluate_metric(best_model, test_iter, scaler)
-print("test loss:", l, "\nMAE:", MAE, ", MAPE:", MAPE, ", RMSE:", RMSE)
+import argparse
+import random
+
+import numpy as np
+import pandas as pd
+import scipy.sparse as sp
+import torch
+import torch.nn as nn
+from load_data import *
+from model import *
+from sensors2graph import *
+from sklearn.preprocessing import StandardScaler
+from utils import *
+
+import dgl
+
+parser = argparse.ArgumentParser(description="STGCN_WAVE")
+parser.add_argument("--lr", default=0.001, type=float, help="learning rate")
+parser.add_argument("--disablecuda", action="store_true", help="Disable CUDA")
+parser.add_argument(
+    "--batch_size",
+    type=int,
+    default=50,
+    help="batch size for training and validation (default: 50)",
+)
+parser.add_argument(
+    "--epochs", type=int, default=50, help="epochs for training  (default: 50)"
+)
+parser.add_argument(
+    "--num_layers", type=int, default=9, help="number of layers"
+)
+parser.add_argument("--window", type=int, default=144, help="window length")
+parser.add_argument(
+    "--sensorsfilepath",
+    type=str,
+    default="./data/sensor_graph/graph_sensor_ids.txt",
+    help="sensors file path",
+)
+parser.add_argument(
+    "--disfilepath",
+    type=str,
+    default="./data/sensor_graph/distances_la_2012.csv",
+    help="distance file path",
+)
+parser.add_argument(
+    "--tsfilepath", type=str, default="./data/metr-la.h5", help="ts file path"
+)
+parser.add_argument(
+    "--savemodelpath",
+    type=str,
+    default="stgcnwavemodel.pt",
+    help="save model path",
+)
+parser.add_argument(
+    "--pred_len",
+    type=int,
+    default=5,
+    help="how many steps away we want to predict",
+)
+parser.add_argument(
+    "--control_str",
+    type=str,
+    default="TNTSTNTST",
+    help="model strcture controller, T: Temporal Layer, S: Spatio Layer, N: Norm Layer",
+)
+parser.add_argument(
+    "--channels",
+    type=int,
+    nargs="+",
+    default=[1, 16, 32, 64, 32, 128],
+    help="model strcture controller, T: Temporal Layer, S: Spatio Layer, N: Norm Layer",
+)
+args = parser.parse_args()
+
+device = (
+    torch.device("cuda")
+    if torch.cuda.is_available() and not args.disablecuda
+    else torch.device("cpu")
+)
+
+with open(args.sensorsfilepath) as f:
+    sensor_ids = f.read().strip().split(",")
+distance_df = pd.read_csv(args.disfilepath, dtype={"from": "str", "to": "str"})
+
+adj_mx = get_adjacency_matrix(distance_df, sensor_ids)
+sp_mx = sp.coo_matrix(adj_mx)
+G = dgl.from_scipy(sp_mx)
+
+
+df = pd.read_hdf(args.tsfilepath)
+num_samples, num_nodes = df.shape
+
+tsdata = df.to_numpy()
+
+
+n_his = args.window
+
+save_path = args.savemodelpath
+
+
+n_pred = args.pred_len
+n_route = num_nodes
+blocks = args.channels
+# blocks = [1, 16, 32, 64, 32, 128]
+drop_prob = 0
+num_layers = args.num_layers
+
+batch_size = args.batch_size
+epochs = args.epochs
+lr = args.lr
+
+
+W = adj_mx
+len_val = round(num_samples * 0.1)
+len_train = round(num_samples * 0.7)
+train = df[:len_train]
+val = df[len_train : len_train + len_val]
+test = df[len_train + len_val :]
+
+scaler = StandardScaler()
+train = scaler.fit_transform(train)
+val = scaler.transform(val)
+test = scaler.transform(test)
+
+
+x_train, y_train = data_transform(train, n_his, n_pred, device)
+x_val, y_val = data_transform(val, n_his, n_pred, device)
+x_test, y_test = data_transform(test, n_his, n_pred, device)
+
+train_data = torch.utils.data.TensorDataset(x_train, y_train)
+train_iter = torch.utils.data.DataLoader(train_data, batch_size, shuffle=True)
+val_data = torch.utils.data.TensorDataset(x_val, y_val)
+val_iter = torch.utils.data.DataLoader(val_data, batch_size)
+test_data = torch.utils.data.TensorDataset(x_test, y_test)
+test_iter = torch.utils.data.DataLoader(test_data, batch_size)
+
+
+loss = nn.MSELoss()
+G = G.to(device)
+model = STGCN_WAVE(
+    blocks, n_his, n_route, G, drop_prob, num_layers, device, args.control_str
+).to(device)
+optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
+
+scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.7)
+
+min_val_loss = np.inf
+for epoch in range(1, epochs + 1):
+    l_sum, n = 0.0, 0
+    model.train()
+    for x, y in train_iter:
+        y_pred = model(x).view(len(x), -1)
+        l = loss(y_pred, y)
+        optimizer.zero_grad()
+        l.backward()
+        optimizer.step()
+        l_sum += l.item() * y.shape[0]
+        n += y.shape[0]
+    scheduler.step()
+    val_loss = evaluate_model(model, loss, val_iter)
+    if val_loss < min_val_loss:
+        min_val_loss = val_loss
+        torch.save(model.state_dict(), save_path)
+    print(
+        "epoch",
+        epoch,
+        ", train loss:",
+        l_sum / n,
+        ", validation loss:",
+        val_loss,
+    )
+best_model = STGCN_WAVE(
+    blocks, n_his, n_route, G, drop_prob, num_layers, device, args.control_str
+).to(device)
+best_model.load_state_dict(torch.load(save_path, weights_only=False))
+
+
+l = evaluate_model(best_model, loss, test_iter)
+MAE, MAPE, RMSE = evaluate_metric(best_model, test_iter, scaler)
+print("test loss:", l, "\nMAE:", MAE, ", MAPE:", MAPE, ", RMSE:", RMSE)