Can run on mobile or mini PC,it's much more faster than the normal version. Sample code for using this model :
from transformers import AutoTokenizer, pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer,GenerationConfig,TextIteratorStreamer
from optimum.onnxruntime import ORTModelForCausalLM
from transformers import logging
logging.set_verbosity_error()
import time
model_id="brianwoo/GPT2-Onnx-Quantized"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = ORTModelForCausalLM.from_pretrained(model_id)
streamer = TextStreamer(tokenizer,skip_prompt=True, skip_special_tokens=True,return_text=True)
onnx_gen = pipeline("text-generation", model=model, tokenizer=tokenizer,streamer=streamer,return_text=True)
while True:
text = input("\nBrian:")
print("Bot:\n")
t0=time.time()
gen = onnx_gen(text)
t=time.time()-t0
text=gen[0]["generated_text"]
print(t,len(text.split(" "))/t,"words /sec")
My sample code for quant modes on Colab :
from optimum.onnxruntime import ORTStableDiffusionXLImg2ImgPipeline , ORTStableDiffusionXLPipeline,ORTModelForCausalLM
from transformers import AutoTokenizer
from optimum.onnxruntime import ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
from optimum.onnxruntime.configuration import AutoQuantizationConfig
from optimum.onnxruntime import ORTQuantizer
import os,torch
from pathlib import Path
Gbase="/gdrive/MyDrive/onnx/"
model_checkpoint = "gpt2"
save_directory = Gbase+"onnx/gpt2_arm64"
tasks=['TinyLlama/TinyLlama-1.1B-Chat-v0.6',
'pankajmathur/orca_mini_3b',
'Fredithefish/RedPajama-INCITE-Chat-3B-Instruction-Tuning-with-GPT-4',
'CobraMamba/mamba-gpt-3b-v4',
'WizardLM/WizardCoder-3B-V1.0',
'GeneZC/MiniChat-3B']
#TinyLlama/TinyLlama-1.1B-Chat-v0.6
#pankajmathur/orca_mini_3b
#Fredithefish/RedPajama-INCITE-Chat-3B-Instruction-Tuning-with-GPT-4
#CobraMamba/mamba-gpt-3b-v4
#WizardLM/WizardCoder-3B-V1.0
#GeneZC/MiniChat-3B
def quantModel(model_checkpoint=model_checkpoint,save_directory=save_directory):
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
tokenizer.save_pretrained(save_directory)
ort_model = ORTModelForCausalLM.from_pretrained(model_checkpoint, export=True)
optimizer = ORTOptimizer.from_pretrained(ort_model)
optimization_config = OptimizationConfig(optimization_level=3)
qconfig = AutoQuantizationConfig.arm64(is_static=False, per_channel=False)
quantizer = ORTQuantizer.from_pretrained(ort_model)
quantizer.quantize(save_dir=save_directory, quantization_config=qconfig,use_external_data_format=True)
def doTasks(tasks=tasks,Gbase=Gbase):
for model_checkpoint in tasks:
save_directory=os.path.join(Gbase,Path(model_checkpoint).name)
try:quantModel(model_checkpoint=model_checkpoint,save_directory=save_directory)
except:
import traceback
traceback.print_exc()
doTasks()
Install Debian or Ubuntu on Termux and configure a relatively well-configured development environment. Not guaranteed to always work, don't forget to backup after installation. download and install Termux and Termux Widget,Termux API add the Termux-Widget to your Home
https://f-droid.org/en/packages/com.termux/ https://f-droid.org/en/packages/com.termux.widget/ https://f-droid.org/packages/com.termux.api/
Open the Termux install proot-distro,keep try termux-change-repo until you are able to install proot-distro, install Debian on proot-distro ,copy all below and run: Since the process is complex, it has to be done step by step,Just changed the command, no longer repeat the description process, because it takes a lot of time to reinstall the system, it is impossible to be perfect in every link, just tried a few times, and finally the desired function can work, the installation process is about one to two hours. http://blog.brianwoo.net/2023/06/termux-debian-vscode-llm-latest-working.html
####Debian:
termux-setup-storage
termux-wake-lock
termux-change-repo
pkg install tur-repo
pkg update && pkg upgrade
pkg install git proot-distro termimage vim termux-api
disname='debian'
user='brian'
echo "proot-distro login $disname" > .shortcuts/debianai.sh
echo "proot-distro login debian --user $user" > .shortcuts/debian.sh
proot-distro install debian
proot-distro login debian
user='brian'
apt update && apt upgrade
apt install sudo locales
echo "LANG=zh_CN.UTF-8" >> /etc/locale.conf
sudo locale-gen
adduser $user
gpasswd -a $user sudo
echo "$user ALL=(ALL:ALL) ALL" >> /etc/sudoers
login $user
echo 'source llm/bin/activate && source .cargo/env && cd /sdcard/Documents/Pydroid3/llm' > llm.env
sudo locale-gen zh_CN.UTF-8
sudo apt update && apt upgrade
sudo apt install python3-full git curl vim wget
sudo apt install python-is-python3
sudo apt install python3-pip
sudo apt install clang wget git cmake
sudo apt install opencl-headers
sudo apt install libopenblas-dev libopenblas0 libopenblas64-0 libblis-openmp-dev
sudo apt install python3-torch python3-torchaudio python3-torchtext python3-torchvision
sudo apt install libideep-dev libtorch-dev libonnx1
sudo apt install pandoc build-essential
sudo apt install libopenblas-dev libopenblas-openmp-dev libopenblas0 libopenblas64-0-openmp libopenblas64-dev libopenblas64-openmp-dev
sudo apt install opencl-c-headers opencl-clhpp-headers libasl-dev libasl0 libclblast-dev libclc-13
sudo apt install pandoc
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
cd
git clone https://github.com/flame/blis
cd blis
./configure --enable-cblas -t openmp auto
make -j
sudo make install
rm -rf CLBlast/
rm -rf llama.cpp
git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp
mkdir build
make LLAMA_BLIS=1 -j -B
cd
python -m venv llm
source llm/bin/activate
pip uninstall ctransformers llama-cpp-python
rm -rf ctransformers
git clone --recurse-submodules https://github.com/marella/ctransformers
cd ctransformers
bash ./scripts/build.sh
cd
python -m pip install ./ctransformers/
CMAKE_ARGS="-DLLAMA_BLAS=ON" pip install llama-cpp-python
pip install --upgrade pip
pip install --upgrade diffusers[torch]
pip install --upgrade accelerate peft openvino optimum onnx onnxruntime nncf
pip install opencv-python fastapi uvicorn flask
pip install fastapi python-multipart pydantic sqlalchemy opencc-python-reimplemented pandas
pip install --upgrade pip
pip install fast-sentence-transformers
pip install langchain
pip install wikipedia unstructured pypdf pdf2image pdfminer chromadb qdrant-client lark momento annoy
pip install doc2text pypandoc pandoc
pip install opencv-python fastapi uvicorn flask
pip install fastapi python-multipart pydantic sqlalchemy opencc-python-reimplemented pandas
pip install --upgrade-strategy eager install optimum[onnxruntime]
pip install optimum-intel
pip install --upgrade-strategy eager optimum[openvino,nncf]
pip install --force-reinstall transformers[torch]
pip install --force-reinstall diffusers[torch]
####Ubuntu :
termux-setup-storage
termux-wake-lock
termux-change-repo
pkg install tur-repo
pkg update && pkg upgrade
pkg install git proot-distro termimage vim termux-api
disname='ubuntu'
user='brian'
echo "proot-distro login $disname" > .shortcuts/debianai.sh
echo "proot-distro login ubuntu --user $user" > .shortcuts/ubuntu.sh
proot-distro install ubuntu
proot-distro login ubuntu
user='brian'
apt update && apt upgrade
apt install sudo locales adduser
echo "LANG=zh_CN.UTF-8" >> /etc/locale.conf
sudo locale-gen
adduser $user
sudo gpasswd -a $user sudo
echo "$user ALL=(ALL:ALL) ALL" >> /etc/sudoers
login $user
echo 'source llm/bin/activate && source .cargo/env && cd /sdcard/Documents/Pydroid3/llm' > llm.env
sudo locale-gen zh_CN.UTF-8
sudo apt update && apt upgrade
sudo apt install python3-full git curl vim wget
sudo apt install python-is-python3
sudo apt install python3-pip
sudo apt install clang wget git cmake
sudo apt install opencl-headers
sudo apt install libopenblas-dev libopenblas0 libopenblas64-0 libblis-openmp-dev
sudo apt install python3-torch python3-torchaudio python3-torchtext python3-torchvision
sudo apt install libideep-dev libtorch-dev libonnx1
sudo apt install pandoc build-essential
sudo apt install libopenblas-dev libopenblas-openmp-dev libopenblas0 libopenblas64-0-openmp libopenblas64-dev libopenblas64-openmp-dev
sudo apt install opencl-c-headers opencl-clhpp-headers libasl-dev libasl0 libclblast-dev libclc-13
sudo apt install pandoc
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
cd
git clone https://github.com/flame/blis
cd blis
./configure --enable-cblas -t openmp auto
make -j
sudo make install
rm -rf CLBlast/
rm -rf llama.cpp
git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp
mkdir build
make LLAMA_BLIS=1 -j -B
cd
python -m venv llm
source llm/bin/activate
pip uninstall ctransformers llama-cpp-python
rm -rf ctransformers
git clone --recurse-submodules https://github.com/marella/ctransformers
cd ctransformers
bash ./scripts/build.sh
cd
python -m pip install ./ctransformers/
CMAKE_ARGS="-DLLAMA_BLAS=ON" pip install llama-cpp-python
pip install --upgrade pip
pip install --upgrade diffusers[torch]
pip install --upgrade accelerate peft openvino optimum onnx onnxruntime nncf
pip install opencv-python fastapi uvicorn flask
pip install fastapi python-multipart pydantic sqlalchemy opencc-python-reimplemented pandas
pip install --upgrade pip
pip install fast-sentence-transformers
pip install langchain
pip install wikipedia unstructured pypdf pdf2image pdfminer chromadb qdrant-client lark momento annoy
pip install doc2text pypandoc pandoc
pip install opencv-python fastapi uvicorn flask
pip install fastapi python-multipart pydantic sqlalchemy opencc-python-reimplemented pandas
pip install --upgrade-strategy eager install optimum[onnxruntime]
pip install optimum-intel
pip install --upgrade-strategy eager optimum[openvino,nncf]
pip install --force-reinstall transformers[torch]
pip install --force-reinstall diffusers[torch]
- Downloads last month
- 1