diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
index 2bb8493a3503f9..54852cebc00692 100755
--- a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
+++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
@@ -29,7 +29,7 @@ function is_absolute {
   [[ "$1" = /* ]] || [[ "$1" =~ ^[a-zA-Z]:[/\\].* ]]
 }
 
-export TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB:-4096}
+export TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB:-5120}
 
 # *******************************************************************
 #         This section of the script is needed to
diff --git a/third_party/xla/xla/backends/gpu/codegen/triton/fusion_emitter_device_test.cc b/third_party/xla/xla/backends/gpu/codegen/triton/fusion_emitter_device_test.cc
index 472d9943558127..e36bc5f6a43aa3 100644
--- a/third_party/xla/xla/backends/gpu/codegen/triton/fusion_emitter_device_test.cc
+++ b/third_party/xla/xla/backends/gpu/codegen/triton/fusion_emitter_device_test.cc
@@ -1445,8 +1445,6 @@ CHECK:     tt.store
 // Reproducer from b/384110192.
 TEST_F(TritonEmitterTest,
        FusionWithOutputContainingMoreThanInt32MaxElementsExecutesCorrectly) {
-  GTEST_SKIP() << "TODO(rocm): Weekly-sync 25-01-21: Skip Int32 max elements "
-                  "issue with triton.";
   // The point here is to check the output of the Triton fusion. The `slice` op
   // at the end is inserted to allow the comparison of output to run in a
   // reasonable amount of time, and has been proven to still correctly capture