Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions notebooks/QEfficientGPT2.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
"outputs": [],
"source": [
"# Initiate the Original Transformer model\n",
"# Initiate the tokenizer for transformers library\n",
"from transformers import AutoTokenizer\n",
"\n",
"from QEfficient import QEFFAutoModelForCausalLM as AutoModelForCausalLM\n",
"\n",
"# Please uncomment and use appropriate Cache Directory for transformers, in case you don't want to use default ~/.cache dir.\n",
Expand Down Expand Up @@ -92,11 +95,7 @@
"# Compile the model for provided compilation arguments\n",
"# Please use platform SDK to Check num_cores for your card.\n",
"\n",
"qeff_model.compile(\n",
" num_cores=14,\n",
" mxfp6=True,\n",
" device_group=[0],\n",
")"
"qeff_model.compile(num_cores=14, mxfp6_matmul=True)"
]
},
{
Expand All @@ -116,8 +115,8 @@
"source": [
"# post compilation, we can print the latency stats for the kv models, We provide API to print token and Latency stats on Cloud AI 100\n",
"# We need the compiled prefill and decode qpc to compute the token generated, This is based on Greedy Sampling Approach\n",
"\n",
"qeff_model.generate(prompts=[\"My name is\"])"
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
"qeff_model.generate(prompts=[\"My name is\"], tokenizer=tokenizer)"
]
}
],
Expand Down
11 changes: 5 additions & 6 deletions notebooks/QEfficientMPT.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
"outputs": [],
"source": [
"# Initiate the Original Transformer model\n",
"# Initiate the tokenizer for transformers library\n",
"from transformers import AutoTokenizer\n",
"\n",
"from QEfficient import QEFFAutoModelForCausalLM as AutoModelForCausalLM\n",
"\n",
Expand Down Expand Up @@ -91,11 +93,7 @@
"# Compile the model for provided compilation arguments\n",
"# Please use platform SDK to Check num_cores for your card.\n",
"\n",
"qeff_model.compile(\n",
" num_cores=14,\n",
" mxfp6=True,\n",
" device_group=[0],\n",
")"
"qeff_model.compile(num_cores=14, mxfp6_matmul=True)"
]
},
{
Expand All @@ -116,7 +114,8 @@
"# post compilation, we can print the latency stats for the kv models, We provide API to print token and Latency stats on Cloud AI 100\n",
"# We need the compiled prefill and decode qpc to compute the token generated, This is based on Greedy Sampling Approach\n",
"\n",
"qeff_model.generate(prompts=[\"My name is\"])"
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
"qeff_model.generate(prompts=[\"My name is\"], tokenizer=tokenizer)"
]
}
],
Expand Down
Loading