Skip to content

Commit 750f412

Browse files
authored
tests: add llama 3.3 70b 2 nodes tests (#4391)
* add llama 3.3 70b 2 nodes tests Signed-off-by: xinhe-nv <[email protected]> * remove enable_overlap_scheduler parameter Signed-off-by: xinhe-nv <[email protected]> --------- Signed-off-by: xinhe-nv <[email protected]>
1 parent 6a35c59 commit 750f412

File tree

2 files changed

+34
-8
lines changed

2 files changed

+34
-8
lines changed

tests/integration/defs/test_e2e.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1551,20 +1551,19 @@ def test_ptq_quickstart_advanced_mtp(llm_root, llm_venv, model_name,
15511551

15521552
@pytest.mark.skip_less_device_memory(80000)
15531553
@pytest.mark.skip_less_device(8)
1554-
@pytest.mark.parametrize("model_name,model_path", [
1555-
pytest.param('DeepSeek-V3', 'DeepSeek-V3', marks=skip_pre_hopper),
1556-
])
1554+
@skip_pre_hopper
1555+
@skip_post_blackwell
1556+
@pytest.mark.parametrize("model_path", ['DeepSeek-V3'])
15571557
def test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus(
1558-
llm_root, llm_venv, model_name, model_path):
1558+
llm_root, llm_venv, model_path):
15591559
# "RCCA https://nvbugs/5163844"
1560-
print(f"Testing {model_name}.")
1560+
print(f"Testing {model_path}.")
15611561
example_root = Path(os.path.join(llm_root, "examples", "pytorch"))
15621562
run_cmd = [
15631563
"trtllm-llmapi-launch",
15641564
"python3",
15651565
str(example_root / "quickstart_advanced.py"),
1566-
"--model_dir",
1567-
f"{llm_models_root()}/{model_path}",
1566+
f"--model_dir={llm_models_root()}/{model_path}",
15681567
"--moe_ep_size=8",
15691568
"--tp_size=16",
15701569
"--use_cuda_graph",
@@ -2063,4 +2062,30 @@ def test_ptp_scaffolding(llm_root, llm_venv, model_name, model_path):
20632062
])
20642063

20652064

2065+
@pytest.mark.skip_less_device_memory(80000)
2066+
@pytest.mark.skip_less_device(4)
2067+
@pytest.mark.parametrize("model_path", [
2068+
pytest.param('llama-3.3-models/Llama-3.3-70B-Instruct',
2069+
marks=skip_pre_hopper),
2070+
pytest.param('Llama-4-Maverick-17B-128E-Instruct', marks=skip_pre_hopper),
2071+
])
2072+
def test_ptp_quickstart_advanced_llama_2nodes(llm_root, llm_venv, model_path):
2073+
print(f"Testing {model_path}.")
2074+
example_root = Path(os.path.join(llm_root, "examples", "pytorch"))
2075+
run_cmd = [
2076+
"trtllm-llmapi-launch",
2077+
"python3",
2078+
str(example_root / "quickstart_advanced.py"),
2079+
f"--model_dir={llm_models_root()}/{model_path}",
2080+
"--moe_ep_size=8",
2081+
"--tp_size=16",
2082+
"--use_cuda_graph",
2083+
f"--kv_cache_fraction={_MEM_FRACTION_50}",
2084+
"--max_batch_size=32",
2085+
"--max_num_tokens=2048",
2086+
"--disable_kv_cache_reuse",
2087+
]
2088+
check_call(" ".join(run_cmd), shell=True, env=llm_venv._new_env)
2089+
2090+
20662091
# End of Pivot-To-Python examples

tests/integration/test_lists/qa/llm_multinodes_function_test.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@ examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp
22
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp8-tp16pp1-infer]
33
examples/test_mixtral.py::test_llm_mixtral_2nodes_8gpus[Mixtral-8x22B-v0.1-plugin-renormalize-tensor_parallel-build]
44
examples/test_mixtral.py::test_llm_mixtral_2nodes_8gpus[Mixtral-8x22B-v0.1-plugin-renormalize-tensor_parallel-infer]
5-
test_e2e.py::test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus[DeepSeek-V3-DeepSeek-V3]
5+
test_e2e.py::test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus[DeepSeek-V3]
6+
test_e2e.py::test_ptp_quickstart_advanced_llama_2nodes[llama-3.3-models/Llama-3.3-70B-Instruct]
67
test_e2e.py::test_openai_multinodes_chat_tp16pp1

0 commit comments

Comments
 (0)