@@ -1551,20 +1551,19 @@ def test_ptq_quickstart_advanced_mtp(llm_root, llm_venv, model_name,
1551
1551
1552
1552
@pytest .mark .skip_less_device_memory (80000 )
1553
1553
@pytest .mark .skip_less_device (8 )
1554
- @pytest . mark . parametrize ( "model_name,model_path" , [
1555
- pytest . param ( 'DeepSeek-V3' , 'DeepSeek-V3' , marks = skip_pre_hopper ),
1556
- ])
1554
+ @skip_pre_hopper
1555
+ @ skip_post_blackwell
1556
+ @ pytest . mark . parametrize ( "model_path" , [ 'DeepSeek-V3' ])
1557
1557
def test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus (
1558
- llm_root , llm_venv , model_name , model_path ):
1558
+ llm_root , llm_venv , model_path ):
1559
1559
# "RCCA https://nvbugs/5163844"
1560
- print (f"Testing { model_name } ." )
1560
+ print (f"Testing { model_path } ." )
1561
1561
example_root = Path (os .path .join (llm_root , "examples" , "pytorch" ))
1562
1562
run_cmd = [
1563
1563
"trtllm-llmapi-launch" ,
1564
1564
"python3" ,
1565
1565
str (example_root / "quickstart_advanced.py" ),
1566
- "--model_dir" ,
1567
- f"{ llm_models_root ()} /{ model_path } " ,
1566
+ f"--model_dir={ llm_models_root ()} /{ model_path } " ,
1568
1567
"--moe_ep_size=8" ,
1569
1568
"--tp_size=16" ,
1570
1569
"--use_cuda_graph" ,
@@ -2063,4 +2062,30 @@ def test_ptp_scaffolding(llm_root, llm_venv, model_name, model_path):
2063
2062
])
2064
2063
2065
2064
2065
+ @pytest .mark .skip_less_device_memory (80000 )
2066
+ @pytest .mark .skip_less_device (4 )
2067
+ @pytest .mark .parametrize ("model_path" , [
2068
+ pytest .param ('llama-3.3-models/Llama-3.3-70B-Instruct' ,
2069
+ marks = skip_pre_hopper ),
2070
+ pytest .param ('Llama-4-Maverick-17B-128E-Instruct' , marks = skip_pre_hopper ),
2071
+ ])
2072
+ def test_ptp_quickstart_advanced_llama_2nodes (llm_root , llm_venv , model_path ):
2073
+ print (f"Testing { model_path } ." )
2074
+ example_root = Path (os .path .join (llm_root , "examples" , "pytorch" ))
2075
+ run_cmd = [
2076
+ "trtllm-llmapi-launch" ,
2077
+ "python3" ,
2078
+ str (example_root / "quickstart_advanced.py" ),
2079
+ f"--model_dir={ llm_models_root ()} /{ model_path } " ,
2080
+ "--moe_ep_size=8" ,
2081
+ "--tp_size=16" ,
2082
+ "--use_cuda_graph" ,
2083
+ f"--kv_cache_fraction={ _MEM_FRACTION_50 } " ,
2084
+ "--max_batch_size=32" ,
2085
+ "--max_num_tokens=2048" ,
2086
+ "--disable_kv_cache_reuse" ,
2087
+ ]
2088
+ check_call (" " .join (run_cmd ), shell = True , env = llm_venv ._new_env )
2089
+
2090
+
2066
2091
# End of Pivot-To-Python examples
0 commit comments