Ngrokkk

mail@pastecode.io avatar
unknown
python
10 months ago
638 B
3
Indexable
Never
Model = "TheBloke/Mythalion-13B-AWQ" #@param ["TheBloke/Mythalion-13B-AWQ", "TheBloke/MythoMax-L2-13B-AWQ", "TheBloke/Pygmalion-2-13B-AWQ", "TheBloke/MLewd-L2-Chat-13B-AWQ"]{allow-input: true}
Quantization = "awq" #@param ['awq', 'gptq', "None"]
!pip uninstall aphrodite-engine -y
!pip install aphrodite-engine
!curl -O 1 && unzip ngrok-stable-linux-amd64.zip # install ngrok v2.x
!./ngrok authtoken <your_ngrok_token> && ./ngrok http 2242 # start ngrok agent with your token and port number
!python -m aphrodite.endpoints.api_server_kobold --model $Model --dtype float16 --host 127.0.0.1 -q $Quantization --gpu-memory-utilization 0.88 &