Skip to content

Bug fixed: changes made in jupyter notebooks #532

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 105 additions & 17 deletions notebooks/QEfficientGPT2.ipynb
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "8a341fa4-b4dc-4cea-a4b3-249aa5fc9394",

Check failure on line 5 in notebooks/QEfficientGPT2.ipynb

View workflow job for this annotation

GitHub Actions / lint

Ruff (I001)

notebooks/QEfficientGPT2.ipynb:1:1: I001 Import block is un-sorted or un-formatted
"metadata": {},
"source": [
"### Demonstrate the LLM GPT2 Model OnBoarding on Cloud AI 100 Platform"
Expand All @@ -27,14 +27,64 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "c21f82d5-17df-4fc9-a180-05edd032f02d",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/sharvari/qeff_env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"/home/sharvari/qeff_env/lib/python3.10/site-packages/onnxscript/converter.py:816: FutureWarning: 'onnxscript.values.Op.param_schemas' is deprecated in version 0.1 and will be removed in the future. Please use '.op_signature' instead.\n",
" param_schemas = callee.param_schemas()\n",
"/home/sharvari/qeff_env/lib/python3.10/site-packages/onnxscript/converter.py:816: FutureWarning: 'onnxscript.values.OnnxFunction.param_schemas' is deprecated in version 0.1 and will be removed in the future. Please use '.op_signature' instead.\n",
" param_schemas = callee.param_schemas()\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"gpt2 optimized for Cloud AI 100 \n",
" QEFFAutoModelForCausalLM\n",
"QEffGPT2LMHeadModel(\n",
" (transformer): QEffGPT2Model(\n",
" (wte): Embedding(50257, 768)\n",
" (wpe): Embedding(1024, 768)\n",
" (drop): Dropout(p=0.1, inplace=False)\n",
" (h): ModuleList(\n",
" (0-11): 12 x QEffGPT2Block(\n",
" (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
" (attn): QEffGPT2Attention(\n",
" (c_attn): Conv1D(nf=2304, nx=768)\n",
" (c_proj): Conv1D(nf=768, nx=768)\n",
" (attn_dropout): Dropout(p=0.1, inplace=False)\n",
" (resid_dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
" (mlp): GPT2MLP(\n",
" (c_fc): Conv1D(nf=3072, nx=768)\n",
" (c_proj): Conv1D(nf=768, nx=3072)\n",
" (act): NewGELUActivation()\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" )\n",
" (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
" )\n",
" (lm_head): Linear(in_features=768, out_features=50257, bias=False)\n",
")\n"
]
}
],
"source": [
"# Initiate the Original Transformer model\n",
"from QEfficient import QEFFAutoModelForCausalLM as AutoModelForCausalLM\n",
"\n",
"# Initiate the tokenizer for transformers library\n",
"from transformers import AutoTokenizer\n",
"# Please uncomment and use appropriate Cache Directory for transformers, in case you don't want to use default ~/.cache dir.\n",
"# os.environ[\"TRANSFORMERS_CACHE\"] = \"/local/mnt/workspace/hf_cache\"\n",
"\n",
Expand All @@ -58,10 +108,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "0b293196-ba44-460e-94fb-4378283bc196",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"PosixPath('/home/sharvari/.cache/qeff_models/GPT2LMHeadModel-d4ac0dba02c16a59/GPT2LMHeadModel.onnx')"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# We can now export the modified models to ONNX framework\n",
"# This will generate single Onnx Model for both Prefill and Decode Variations which are optimized for\n",
Expand All @@ -84,19 +145,26 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "3fb4d6dd-9973-4608-b68b-ec6825cfef0e",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"PosixPath('/home/sharvari/.cache/qeff_models/GPT2LMHeadModel-d4ac0dba02c16a59/qpc-46bd7fd6377ab8fb/qpc')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Compile the model for provided compilation arguments\n",
"# Please use platform SDK to Check num_cores for your card.\n",
"\n",
"qeff_model.compile(\n",
" num_cores=14,\n",
" mxfp6=True,\n",
" device_group=[0],\n",
")"
"qeff_model.compile(num_cores=14, mxfp6_matmul=True)"
]
},
{
Expand All @@ -109,21 +177,41 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "4711fc74-aa5d-4e20-af0e-0d461d2e19bb",
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "NameError",
"evalue": "name 'AutoTokenizer' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# post compilation, we can print the latency stats for the kv models, We provide API to print token and Latency stats on Cloud AI 100\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m# We need the compiled prefill and decode qpc to compute the token generated, This is based on Greedy Sampling Approach\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m \u001b[43mAutoTokenizer\u001b[49m\u001b[38;5;241m.\u001b[39mfrom_pretrained(model_name)\n\u001b[1;32m 4\u001b[0m qeff_model\u001b[38;5;241m.\u001b[39mgenerate(prompts\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMy name is\u001b[39m\u001b[38;5;124m\"\u001b[39m], tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"\u001b[0;31mNameError\u001b[0m: name 'AutoTokenizer' is not defined"
]
}
],
"source": [
"# post compilation, we can print the latency stats for the kv models, We provide API to print token and Latency stats on Cloud AI 100\n",
"# We need the compiled prefill and decode qpc to compute the token generated, This is based on Greedy Sampling Approach\n",
"\n",
"qeff_model.generate(prompts=[\"My name is\"])"
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
"qeff_model.generate(prompts=[\"My name is\"], tokenizer=tokenizer)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1bab713e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "qeff_env",
"language": "python",
"name": "python3"
},
Expand All @@ -137,7 +225,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
34 changes: 23 additions & 11 deletions notebooks/QEfficientMPT.ipynb
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "8a341fa4-b4dc-4cea-a4b3-249aa5fc9394",

Check failure on line 5 in notebooks/QEfficientMPT.ipynb

View workflow job for this annotation

GitHub Actions / lint

Ruff (I001)

notebooks/QEfficientMPT.ipynb:1:1: I001 Import block is un-sorted or un-formatted
"metadata": {},
"source": [
"### Demonstrate the LLM MPT Model OnBoarding on Cloud AI 100 Platform"
Expand All @@ -29,12 +29,27 @@
"execution_count": null,
"id": "c21f82d5-17df-4fc9-a180-05edd032f02d",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/sharvari/qeff_env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"/home/sharvari/qeff_env/lib/python3.10/site-packages/onnxscript/converter.py:816: FutureWarning: 'onnxscript.values.Op.param_schemas' is deprecated in version 0.1 and will be removed in the future. Please use '.op_signature' instead.\n",
" param_schemas = callee.param_schemas()\n",
"/home/sharvari/qeff_env/lib/python3.10/site-packages/onnxscript/converter.py:816: FutureWarning: 'onnxscript.values.OnnxFunction.param_schemas' is deprecated in version 0.1 and will be removed in the future. Please use '.op_signature' instead.\n",
" param_schemas = callee.param_schemas()\n",
"Fetching 2 files: 0%| | 0/2 [00:00<?, ?it/s]"
]
}
],
"source": [
"# Initiate the Original Transformer model\n",
"\n",
"from QEfficient import QEFFAutoModelForCausalLM as AutoModelForCausalLM\n",
"\n",
"# Initiate the tokenizer for transformers library\n",
"from transformers import AutoTokenizer\n",
"# Please uncomment and use appropriate Cache Directory for transformers, in case you don't want to use default ~/.cache dir.\n",
"# os.environ[\"TRANSFORMERS_CACHE\"] = \"/local/mnt/workspace/hf_cache\"\n",
"\n",
Expand Down Expand Up @@ -91,11 +106,7 @@
"# Compile the model for provided compilation arguments\n",
"# Please use platform SDK to Check num_cores for your card.\n",
"\n",
"qeff_model.compile(\n",
" num_cores=14,\n",
" mxfp6=True,\n",
" device_group=[0],\n",
")"
"qeff_model.compile(num_cores=14, mxfp6_matmul=True)"
]
},
{
Expand All @@ -116,15 +127,16 @@
"# post compilation, we can print the latency stats for the kv models, We provide API to print token and Latency stats on Cloud AI 100\n",
"# We need the compiled prefill and decode qpc to compute the token generated, This is based on Greedy Sampling Approach\n",
"\n",
"qeff_model.generate(prompts=[\"My name is\"])"
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
"qeff_model.generate(prompts=[\"My name is\"], tokenizer=tokenizer)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "py38",
"display_name": "qeff_env",
"language": "python",
"name": "py38"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -136,7 +148,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
Loading