remove snap
1
2
3
|
apt purge snap
apt autoremove --purge snapd
rm -fr /var/snap/*
|
installl docker
1
2
3
4
|
apt install docker.io
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
apt-get update
apt-get install -y nvidia-container-toolkit
|
test with TGI
TGI stands for Text Generation Inference from huggingface.
Intro:
https://huggingface.co/docs/text-generation-inference/index
Code:
https://github.com/huggingface/text-generation-inference
1
2
3
|
volume=$HUGGINGFACE_HUB_CACHE
model=HuggingFaceH4/zephyr-7b-beta
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3 --model-id $model
|
Start a new terminal and submit a request
1
2
3
4
5
6
7
8
9
10
|
import requests
data = {"inputs":"top 10 things to do at San Francisco, CA","parameters":{"max_new_tokens":1024}}
headers = {"Content-Type": "application/json"}
resp = requests.post("http://127.0.0.1:8080/generate_stream", json=data, headers=headers, stream=True)
resp.raise_for_status()
for chunk in resp.iter_content(chunk_size=None, decode_unicode=True):
# chunk will be like
# data:{"token":{"id":13,"text":"\n","logprob":-0.9716797,"special":false},"generated_text":null,"details":null}
content = json.loads(chunk[5:]) # to filter the "data:"
print(content['token']['text'], end="")
|