Skip to content

Commit 36860c2

Browse files
TroyGardenfacebook-github-bot
authored andcommitted
fix validate nightly binaries
Summary: # context * ``` +++ conda run -n build_binary python -c 'import torch; import fbgemm_gpu; import torchrec' +++ local cmd=run +++ case "$cmd" in +++ __conda_exe run -n build_binary python -c 'import torch; import fbgemm_gpu; import torchrec' +++ /opt/conda/bin/conda run -n build_binary python -c 'import torch; import fbgemm_gpu; import torchrec' WARNING: overwriting environment variables set in the machine overwriting variable {'LD_LIBRARY_PATH'} Traceback (most recent call last): File "<string>", line 1, in <module> File "/pytorch/torchrec/torchrec/__init__.py", line 10, in <module> import torchrec.distributed # noqa File "/pytorch/torchrec/torchrec/distributed/__init__.py", line 38, in <module> from torchrec.distributed.model_parallel import DistributedModelParallel # noqa File "/pytorch/torchrec/torchrec/distributed/model_parallel.py", line 18, in <module> from fbgemm_gpu.split_table_batched_embeddings_ops_training import ( File "/opt/conda/envs/build_binary/lib/python3.9/site-packages/fbgemm_gpu/split_table_batched_embeddings_ops_training.py", line 54, in <module> from fbgemm_gpu.tbe.stats import TBEBenchmarkParamsReporter File "/opt/conda/envs/build_binary/lib/python3.9/site-packages/fbgemm_gpu/tbe/stats/__init__.py", line 10, in <module> from .bench_params_reporter import TBEBenchmarkParamsReporter # noqa F401 File "/opt/conda/envs/build_binary/lib/python3.9/site-packages/fbgemm_gpu/tbe/stats/bench_params_reporter.py", line 19, in <module> from fbgemm_gpu.tbe.bench.tbe_data_config import ( File "/opt/conda/envs/build_binary/lib/python3.9/site-packages/fbgemm_gpu/tbe/bench/__init__.py", line 12, in <module> from .bench_config import ( # noqa F401 Traceback (most recent call last): File "/home/ec2-user/actions-runner/_work/torchrec/torchrec/test-infra/.github/scripts/run_with_env_secrets.py", line 102, in <module> File "/opt/conda/envs/build_binary/lib/python3.9/site-packages/fbgemm_gpu/tbe/bench/bench_config.py", line 14, in <module> import click ModuleNotFoundError: No module named 'click' ERROR conda.cli.main_run:execute(47): `conda run python -c import torch; import fbgemm_gpu; import torchrec` failed. (See above for error) main() File "/home/ec2-user/actions-runner/_work/torchrec/torchrec/test-infra/.github/scripts/run_with_env_secrets.py", line 98, in main run_cmd_or_die(f"docker exec -t {container_name} /exec") File "/home/ec2-user/actions-runner/_work/torchrec/torchrec/test-infra/.github/scripts/run_with_env_secrets.py", line 39, in run_cmd_or_die raise RuntimeError(f"Command {cmd} failed with exit code {exit_code}") RuntimeError: Command docker exec -t 96827edf14ff626b7bc16b6cfaa56aa27b4b660029e1fd7755d14bf20a3c4e96 /exec failed with exit code 1 Error: Process completed with exit code 1. ``` Differential Revision: D76875546
1 parent 57abf4e commit 36860c2

File tree

2 files changed

+9
-11
lines changed

2 files changed

+9
-11
lines changed

.github/scripts/validate_binaries.sh

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,22 +73,20 @@ conda env config vars set -n ${CONDA_ENV} \
7373
# export PYTORCH_CUDA_PKG="pytorch-cuda=${MATRIX_GPU_ARCH_VERSION}"
7474
# fi
7575

76-
conda run -n "${CONDA_ENV}" pip install importlib-metadata
77-
7876
conda run -n "${CONDA_ENV}" pip install torch --index-url "$PYTORCH_URL"
7977

8078
# install fbgemm
8179
conda run -n "${CONDA_ENV}" pip install fbgemm-gpu --index-url "$PYTORCH_URL"
8280

83-
# install requirements from pypi
84-
conda run -n "${CONDA_ENV}" pip install torchmetrics==1.0.3
85-
8681
# install tensordict from pypi
8782
conda run -n "${CONDA_ENV}" pip install tensordict==0.8.1
8883

8984
# install torchrec
9085
conda run -n "${CONDA_ENV}" pip install torchrec --index-url "$PYTORCH_URL"
9186

87+
# install other requirements
88+
conda run -n "${CONDA_ENV}" pip install -r requirements.txt
89+
9290
# Run small import test
9391
conda run -n "${CONDA_ENV}" python -c "import torch; import fbgemm_gpu; import torchrec"
9492

.github/workflows/validate-nightly-binaries.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ on:
1111
branches:
1212
- main
1313
paths:
14-
- .github/workflows/validate-nightly-binaries.yml
15-
- .github/workflows/validate-binaries.yml
16-
- .github/scripts/validate-binaries.sh
14+
- '.github/workflows/validate-nightly-binaries.yml'
15+
- '.github/workflows/validate-binaries.yml'
16+
- '.github/scripts/validate-binaries.sh'
1717
pull_request:
1818
paths:
19-
- .github/workflows/validate-nightly-binaries.yml
20-
- .github/workflows/validate-binaries.yml
21-
- .github/scripts/validate-binaries.sh
19+
- '.github/workflows/validate-nightly-binaries.yml'
20+
- '.github/workflows/validate-binaries.yml'
21+
- '.github/scripts/validate-binaries.sh'
2222
jobs:
2323
nightly:
2424
uses: ./.github/workflows/validate-binaries.yml

0 commit comments

Comments
 (0)