diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4ea4da55ca..981a2b715c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -44,7 +44,7 @@ To run a single test from the command line ```sh pytest -vs {path_to_test}::{test_name} # or in cuda mode -CUDA_TEST=1 PYRO_TENSOR_TYPE=torch.cuda.DoubleTensor pytest -vs {path_to_test}::{test_name} +CUDA_TEST=1 PYRO_DTYPE=float64 PYRO_DEVICE=cuda pytest -vs {path_to_test}::{test_name} ``` To ensure documentation builds correctly, run diff --git a/Makefile b/Makefile index ee0b6945f8..f0585be7dc 100644 --- a/Makefile +++ b/Makefile @@ -69,11 +69,11 @@ test-all: lint FORCE | xargs pytest -vx --nbval-lax test-cuda: lint FORCE - CUDA_TEST=1 PYRO_TENSOR_TYPE=torch.cuda.DoubleTensor pytest -vx --stage unit + CUDA_TEST=1 PYRO_DTYPE=float64 PYRO_DEVICE=cuda pytest -vx --stage unit CUDA_TEST=1 pytest -vx tests/test_examples.py::test_cuda test-cuda-lax: lint FORCE - CUDA_TEST=1 PYRO_TENSOR_TYPE=torch.cuda.DoubleTensor pytest -vx --stage unit --lax + CUDA_TEST=1 PYRO_DTYPE=float64 PYRO_DEVICE=cuda pytest -vx --stage unit --lax CUDA_TEST=1 pytest -vx tests/test_examples.py::test_cuda test-jit: FORCE diff --git a/docs/source/conf.py b/docs/source/conf.py index e9c4bf84b8..1d55424c4d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -223,6 +223,6 @@ def setup(app): if "READTHEDOCS" in os.environ: os.system("pip install numpy") os.system( - "pip install torch==1.11.0+cpu torchvision==0.12.0+cpu " + "pip install torch==2.0+cpu torchvision==0.15.0+cpu " "-f https://download.pytorch.org/whl/torch_stable.html" ) diff --git a/examples/baseball.py b/examples/baseball.py index 6e40891dcf..210b88420b 100644 --- a/examples/baseball.py +++ b/examples/baseball.py @@ -418,6 +418,6 @@ def main(args): torch.multiprocessing.set_sharing_strategy("file_system") if args.cuda: - torch.set_default_tensor_type(torch.cuda.FloatTensor) + torch.set_default_device("cuda") main(args) diff --git a/examples/contrib/cevae/synthetic.py b/examples/contrib/cevae/synthetic.py index 6a5979eeb7..f249625951 100644 --- a/examples/contrib/cevae/synthetic.py +++ b/examples/contrib/cevae/synthetic.py @@ -45,7 +45,7 @@ def generate_data(args): def main(args): if args.cuda: - torch.set_default_tensor_type("torch.cuda.FloatTensor") + torch.set_default_device("cuda") # Generate synthetic data. pyro.set_rng_seed(args.seed) diff --git a/examples/contrib/epidemiology/regional.py b/examples/contrib/epidemiology/regional.py index ee80f869ee..0af5f5cf30 100644 --- a/examples/contrib/epidemiology/regional.py +++ b/examples/contrib/epidemiology/regional.py @@ -205,12 +205,9 @@ def main(args): if args.warmup_steps is None: args.warmup_steps = args.num_samples if args.double: - if args.cuda: - torch.set_default_tensor_type(torch.cuda.DoubleTensor) - else: - torch.set_default_dtype(torch.float64) - elif args.cuda: - torch.set_default_tensor_type(torch.cuda.FloatTensor) + torch.set_default_dtype(torch.float64) + if args.cuda: + torch.set_default_device("cuda") main(args) diff --git a/examples/contrib/epidemiology/sir.py b/examples/contrib/epidemiology/sir.py index 71fe6fb60f..8fcde5417b 100644 --- a/examples/contrib/epidemiology/sir.py +++ b/examples/contrib/epidemiology/sir.py @@ -391,12 +391,9 @@ def main(args): if args.warmup_steps is None: args.warmup_steps = args.num_samples if args.double: - if args.cuda: - torch.set_default_tensor_type(torch.cuda.DoubleTensor) - else: - torch.set_default_dtype(torch.float64) - elif args.cuda: - torch.set_default_tensor_type(torch.cuda.FloatTensor) + torch.set_default_dtype(torch.float64) + if args.cuda: + torch.set_default_device("cuda") main(args) diff --git a/examples/contrib/funsor/hmm.py b/examples/contrib/funsor/hmm.py index 117cf3af34..6756057b51 100644 --- a/examples/contrib/funsor/hmm.py +++ b/examples/contrib/funsor/hmm.py @@ -670,7 +670,7 @@ def model_7(sequences, lengths, args, batch_size=None, include_prior=True): def main(args): if args.cuda: - torch.set_default_tensor_type("torch.cuda.FloatTensor") + torch.set_default_device("cuda") logging.info("Loading data") data = poly.load_data(poly.JSB_CHORALES) diff --git a/examples/contrib/mue/FactorMuE.py b/examples/contrib/mue/FactorMuE.py index cd5ec11035..1fff31db2f 100644 --- a/examples/contrib/mue/FactorMuE.py +++ b/examples/contrib/mue/FactorMuE.py @@ -427,9 +427,8 @@ def main(args): ) args = parser.parse_args() + torch.set_default_dtype(torch.float64) if args.cuda: - torch.set_default_tensor_type(torch.cuda.DoubleTensor) - else: - torch.set_default_dtype(torch.float64) + torch.set_default_device("cuda") main(args) diff --git a/examples/contrib/mue/ProfileHMM.py b/examples/contrib/mue/ProfileHMM.py index 1226cb42b8..cae6b103da 100644 --- a/examples/contrib/mue/ProfileHMM.py +++ b/examples/contrib/mue/ProfileHMM.py @@ -316,9 +316,8 @@ def main(args): ) args = parser.parse_args() + torch.set_default_dtype(torch.float64) if args.cuda: - torch.set_default_tensor_type(torch.cuda.DoubleTensor) - else: - torch.set_default_dtype(torch.float64) + torch.set_default_device("cuda") main(args) diff --git a/examples/einsum.py b/examples/einsum.py index e61137a811..caafc1693f 100644 --- a/examples/einsum.py +++ b/examples/einsum.py @@ -174,9 +174,7 @@ def time_fn(fn, equation, *operands, **kwargs): def main(args): if args.cuda: - torch.set_default_tensor_type("torch.cuda.FloatTensor") - else: - torch.set_default_tensor_type("torch.FloatTensor") + torch.set_default_device("cuda") if args.method == "all": for method in ["prob", "logprob", "gradient", "marginal", "map", "sample"]: diff --git a/examples/hmm.py b/examples/hmm.py index 758f38eb1b..0c0c4418e1 100644 --- a/examples/hmm.py +++ b/examples/hmm.py @@ -620,7 +620,7 @@ def model_7(sequences, lengths, args, batch_size=None, include_prior=True): def main(args): if args.cuda: - torch.set_default_tensor_type("torch.cuda.FloatTensor") + torch.set_default_device("cuda") logging.info("Loading data") data = poly.load_data(poly.JSB_CHORALES) diff --git a/examples/sir_hmc.py b/examples/sir_hmc.py index db66247287..9eb92c41a6 100644 --- a/examples/sir_hmc.py +++ b/examples/sir_hmc.py @@ -663,12 +663,9 @@ def main(args): args = parser.parse_args() if args.double: - if args.cuda: - torch.set_default_tensor_type(torch.cuda.DoubleTensor) - else: - torch.set_default_tensor_type(torch.DoubleTensor) - elif args.cuda: - torch.set_default_tensor_type(torch.cuda.FloatTensor) + torch.set_default_dtype(torch.float64) + if args.cuda: + torch.set_default_device("cuda") main(args) diff --git a/examples/sparse_gamma_def.py b/examples/sparse_gamma_def.py index e774145e3f..318666e423 100644 --- a/examples/sparse_gamma_def.py +++ b/examples/sparse_gamma_def.py @@ -31,7 +31,7 @@ from pyro.infer import SVI, TraceMeanField_ELBO from pyro.infer.autoguide import AutoDiagonalNormal, init_to_feasible -torch.set_default_tensor_type("torch.FloatTensor") +torch.set_default_dtype(torch.float32) pyro.util.set_rng_seed(0) diff --git a/examples/sparse_regression.py b/examples/sparse_regression.py index f896c968db..5a7431ae05 100644 --- a/examples/sparse_regression.py +++ b/examples/sparse_regression.py @@ -41,7 +41,7 @@ """ -torch.set_default_tensor_type("torch.FloatTensor") +torch.set_default_dtype(torch.float32) def dot(X, Z): diff --git a/examples/svi_horovod.py b/examples/svi_horovod.py index f4bde1b72c..a6ceb61889 100644 --- a/examples/svi_horovod.py +++ b/examples/svi_horovod.py @@ -78,7 +78,7 @@ def main(args): if args.cuda: torch.cuda.set_device(hvd.local_rank()) if args.cuda: - torch.set_default_tensor_type("torch.cuda.FloatTensor") + torch.set_default_device("cuda") device = torch.tensor(0).device if args.horovod: diff --git a/examples/svi_lightning.py b/examples/svi_lightning.py index ce5089453e..e1ac8303cd 100644 --- a/examples/svi_lightning.py +++ b/examples/svi_lightning.py @@ -15,7 +15,7 @@ import argparse -import pytorch_lightning as pl +import lightning.pytorch as pl import torch import pyro diff --git a/profiler/gaussianhmm.py b/profiler/gaussianhmm.py index 898b37f4e0..729c87b5c6 100644 --- a/profiler/gaussianhmm.py +++ b/profiler/gaussianhmm.py @@ -21,7 +21,7 @@ def random_mvn(batch_shape, dim, requires_grad=False): def main(args): if args.cuda: - torch.set_default_tensor_type("torch.cuda.FloatTensor") + torch.set_default_device("cuda") hidden_dim = args.hidden_dim obs_dim = args.obs_dim diff --git a/pyro/contrib/gp/parameterized.py b/pyro/contrib/gp/parameterized.py index 04ddd7d280..c4f28176e8 100644 --- a/pyro/contrib/gp/parameterized.py +++ b/pyro/contrib/gp/parameterized.py @@ -82,7 +82,7 @@ class Parameterized(PyroModule): >>> assert "b_scale_unconstrained" in dict(linear.named_parameters()) Note that by default, data of a parameter is a float :class:`torch.Tensor` - (unless we use :func:`torch.set_default_tensor_type` to change default + (unless we use :func:`torch.set_default_dtype` to change default tensor type). To cast these parameters to a correct data type or GPU device, we can call methods such as :meth:`~torch.nn.Module.double` or :meth:`~torch.nn.Module.cuda`. See :class:`torch.nn.Module` for more diff --git a/pyro/infer/mcmc/api.py b/pyro/infer/mcmc/api.py index 4e515fd42a..d6df431e9c 100644 --- a/pyro/infer/mcmc/api.py +++ b/pyro/infer/mcmc/api.py @@ -107,13 +107,15 @@ def __init__( self.rng_seed = (torch.initial_seed() + chain_id) % MAX_SEED self.log_queue = log_queue self.result_queue = result_queue - self.default_tensor_type = torch.Tensor().type() + self.default_dtype = torch.Tensor().dtype + self.default_device = torch.Tensor().device self.hook = hook self.event = event def run(self, *args, **kwargs): pyro.set_rng_seed(self.rng_seed) - torch.set_default_tensor_type(self.default_tensor_type) + torch.set_default_dtype(self.default_dtype) + torch.set_default_device(self.default_device) kwargs = kwargs logger = logging.getLogger("pyro.infer.mcmc") logger_id = "CHAIN:{}".format(self.chain_id) diff --git a/pyro/ops/provenance.py b/pyro/ops/provenance.py index cde77aa45d..a6902a60cd 100644 --- a/pyro/ops/provenance.py +++ b/pyro/ops/provenance.py @@ -93,11 +93,6 @@ def _track_provenance_set(x, provenance: frozenset): @track_provenance.register(tuple) @track_provenance.register(dict) def _track_provenance_pytree(x, provenance: frozenset): - # avoid max-recursion depth error for torch<=2.0 - flat_args, _ = tree_flatten(x) - if not flat_args or flat_args[0] is x: - return x - return tree_map(partial(track_provenance, provenance=provenance), x) @@ -143,11 +138,6 @@ def _extract_provenance_set(x): @extract_provenance.register(tuple) @extract_provenance.register(dict) def _extract_provenance_pytree(x): - # avoid max-recursion depth error for torch<=2.0 - flat_args, _ = tree_flatten(x) - if not flat_args or flat_args[0] is x: - return x, frozenset() - flat_args, spec = tree_flatten(x) xs = [] provenance = frozenset() diff --git a/pyro/optim/pytorch_optimizers.py b/pyro/optim/pytorch_optimizers.py index 42412a233d..ac39ad12fe 100644 --- a/pyro/optim/pytorch_optimizers.py +++ b/pyro/optim/pytorch_optimizers.py @@ -34,11 +34,7 @@ del _PyroOptim # Load all schedulers from PyTorch -# breaking change in torch >= 1.14: LRScheduler is new base class -if hasattr(torch.optim.lr_scheduler, "LRScheduler"): - _torch_scheduler_base = torch.optim.lr_scheduler.LRScheduler # type: ignore -else: # for torch < 1.13, _LRScheduler is base class - _torch_scheduler_base = torch.optim.lr_scheduler._LRScheduler # type: ignore +_torch_scheduler_base = torch.optim.lr_scheduler.LRScheduler # type: ignore for _name, _Optim in torch.optim.lr_scheduler.__dict__.items(): if not isinstance(_Optim, type): diff --git a/setup.py b/setup.py index b7eec2ba1f..ea25c09887 100644 --- a/setup.py +++ b/setup.py @@ -68,10 +68,10 @@ "jupyter>=1.0.0", "graphviz>=0.8", "matplotlib>=1.3", - "torchvision>=0.12.0", + "torchvision>=0.15.0", "visdom>=0.1.4,<0.2.2", # FIXME visdom.utils is unavailable >=0.2.2 "pandas", - "pillow==8.2.0", # https://github.com/pytorch/pytorch/issues/61125 + "pillow>=8.3.1", # https://github.com/pytorch/pytorch/issues/61125 "scikit-learn", "seaborn>=0.11.0", "wget", @@ -102,7 +102,7 @@ "numpy>=1.7", "opt_einsum>=2.3.2", "pyro-api>=0.1.1", - "torch>=1.11.0", + "torch>=2.0", "tqdm>=4.36", ], extras_require={ @@ -135,7 +135,7 @@ "yapf", ], "horovod": ["horovod[pytorch]>=0.19"], - "lightning": ["pytorch_lightning"], + "lightning": ["lightning"], "funsor": [ # This must be a released version when Pyro is released. # "funsor[torch] @ git+git://github.com/pyro-ppl/funsor.git@7bb52d0eae3046d08a20d1b288544e1a21b4f461", diff --git a/tests/common.py b/tests/common.py index 385c5f7945..55ddddbe4a 100644 --- a/tests/common.py +++ b/tests/common.py @@ -69,11 +69,11 @@ def wrapper(*args, **kwargs): ) try: - import pytorch_lightning + import lightning except ImportError: - pytorch_lightning = None + lightning = None requires_lightning = pytest.mark.skipif( - pytorch_lightning is None, reason="pytorch lightning is not available" + lightning is None, reason="pytorch lightning is not available" ) try: @@ -93,23 +93,6 @@ def get_gpu_type(t): return getattr(torch.cuda, t.__name__) -@contextlib.contextmanager -def tensors_default_to(host): - """ - Context manager to temporarily use Cpu or Cuda tensors in PyTorch. - - :param str host: Either "cuda" or "cpu". - """ - assert host in ("cpu", "cuda"), host - old_module, name = torch.Tensor().type().rsplit(".", 1) - new_module = "torch.cuda" if host == "cuda" else "torch" - torch.set_default_tensor_type("{}.{}".format(new_module, name)) - try: - yield - finally: - torch.set_default_tensor_type("{}.{}".format(old_module, name)) - - @contextlib.contextmanager def default_dtype(dtype): """ diff --git a/tests/conftest.py b/tests/conftest.py index 4103356fe3..94b6479222 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,7 +9,9 @@ import pyro -torch.set_default_tensor_type(os.environ.get("PYRO_TENSOR_TYPE", "torch.DoubleTensor")) +DTYPE = getattr(torch, os.environ.get("PYRO_DTYPE", "float64")) +torch.set_default_dtype(DTYPE) +torch.set_default_device(os.environ.get("PYRO_DEVICE", "cpu")) def pytest_configure(config): diff --git a/tests/contrib/timeseries/test_gp.py b/tests/contrib/timeseries/test_gp.py index 2a6c73bf85..e931b2ef8d 100644 --- a/tests/contrib/timeseries/test_gp.py +++ b/tests/contrib/timeseries/test_gp.py @@ -40,7 +40,7 @@ ) @pytest.mark.parametrize("T", [11, 37]) def test_timeseries_models(model, nu_statedim, obs_dim, T): - torch.set_default_tensor_type("torch.DoubleTensor") + torch.set_default_dtype(torch.float64) dt = 0.1 + torch.rand(1).item() if model == "lcmgp": diff --git a/tests/contrib/timeseries/test_lgssm.py b/tests/contrib/timeseries/test_lgssm.py index 8bfcef31a2..8d7c68ea70 100644 --- a/tests/contrib/timeseries/test_lgssm.py +++ b/tests/contrib/timeseries/test_lgssm.py @@ -13,7 +13,7 @@ @pytest.mark.parametrize("obs_dim", [2, 4]) @pytest.mark.parametrize("T", [11, 17]) def test_generic_lgssm_forecast(model_class, state_dim, obs_dim, T): - torch.set_default_tensor_type("torch.DoubleTensor") + torch.set_default_dtype(torch.float64) if model_class == "lgssm": model = GenericLGSSM( diff --git a/tests/distributions/test_cuda.py b/tests/distributions/test_cuda.py index bd4d4ba044..40ee2f0e99 100644 --- a/tests/distributions/test_cuda.py +++ b/tests/distributions/test_cuda.py @@ -8,7 +8,6 @@ from tests.common import ( assert_equal, requires_cuda, - tensors_default_to, xfail_if_not_implemented, ) @@ -17,7 +16,7 @@ def test_sample(dist): for idx in range(len(dist.dist_params)): # Compute CPU value. - with tensors_default_to("cpu"): + with torch.device("cpu"): params = dist.get_dist_params(idx) try: with xfail_if_not_implemented(): @@ -27,7 +26,7 @@ def test_sample(dist): assert not cpu_value.is_cuda # Compute GPU value. - with tensors_default_to("cuda"): + with torch.device("cuda"): params = dist.get_dist_params(idx) cuda_value = dist.pyro_dist(**params).sample() assert cuda_value.is_cuda @@ -41,7 +40,7 @@ def test_rsample(dist): return for idx in range(len(dist.dist_params)): # Compute CPU value. - with tensors_default_to("cpu"): + with torch.device("cpu"): params = dist.get_dist_params(idx) grad_params = [ key @@ -61,7 +60,7 @@ def test_rsample(dist): assert not cpu_value.is_cuda # Compute GPU value. - with tensors_default_to("cuda"): + with torch.device("cuda"): params = dist.get_dist_params(idx) for key in grad_params: val = params[key].clone() @@ -80,7 +79,7 @@ def test_rsample(dist): def test_log_prob(dist): for idx in range(len(dist.dist_params)): # Compute CPU value. - with tensors_default_to("cpu"): + with torch.device("cpu"): data = dist.get_test_data(idx) params = dist.get_dist_params(idx) with xfail_if_not_implemented(): @@ -88,7 +87,7 @@ def test_log_prob(dist): assert not cpu_value.is_cuda # Compute GPU value. - with tensors_default_to("cuda"): + with torch.device("cuda"): data = dist.get_test_data(idx) params = dist.get_dist_params(idx) cuda_value = dist.pyro_dist(**params).log_prob(data) diff --git a/tests/infer/autoguide/test_gaussian.py b/tests/infer/autoguide/test_gaussian.py index 048ad25220..b0bb1284d7 100644 --- a/tests/infer/autoguide/test_gaussian.py +++ b/tests/infer/autoguide/test_gaussian.py @@ -857,9 +857,7 @@ def test_profile(backend, jit, n=1, num_steps=1, log_every=1): torch.set_default_dtype(torch.double if args.double else torch.float) if args.cuda: - torch.set_default_tensor_type( - torch.cuda.DoubleTensor if args.double else torch.cuda.FloatTensor - ) + torch.set_default_device("cuda") if args.profile: p = cProfile.Profile() diff --git a/tests/test_examples.py b/tests/test_examples.py index 8e62a7f770..d8abf2c450 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -123,7 +123,10 @@ "vae/ss_vae_M2.py --num-epochs=1 --enum-discrete=sequential", "vae/vae.py --num-epochs=1", "vae/vae_comparison.py --num-epochs=1", - "cvae/main.py --num-quadrant-inputs=1 --num-epochs=1", + pytest.param( + "cvae/main.py --num-quadrant-inputs=1 --num-epochs=1", + marks=pytest.mark.skip(reason="https://github.com/pyro-ppl/pyro/issues/3273"), + ), "contrib/funsor/hmm.py --num-steps=1 --truncate=10 --model=0 ", "contrib/funsor/hmm.py --num-steps=1 --truncate=10 --model=1 ", "contrib/funsor/hmm.py --num-steps=1 --truncate=10 --model=2 ", diff --git a/tutorial/source/logistic-growth.ipynb b/tutorial/source/logistic-growth.ipynb index ef5a3996a4..a3db7c8a48 100644 --- a/tutorial/source/logistic-growth.ipynb +++ b/tutorial/source/logistic-growth.ipynb @@ -62,7 +62,7 @@ "\n", "if torch.cuda.is_available():\n", " print(\"Using GPU\")\n", - " torch.set_default_tensor_type(\"torch.cuda.FloatTensor\")\n", + " torch.set_default_device(\"cuda\")\n", "else:\n", " print(\"Using CPU\")\n", "\n",