20 lines
423 B
Bash
20 lines
423 B
Bash
export CUDA_VISIBLE_DEVICES=0
|
|
export PYTHONPATH="/app"
|
|
# export VLLM_LOGGING_CONFIG_PATH=/app/logging_config.json
|
|
export TZ="Asia/Tehran"
|
|
|
|
|
|
|
|
# mkdir -p /app/logs
|
|
|
|
# sleep 200
|
|
|
|
text-embeddings-router \
|
|
--model-id /app/data/models/Qwen3-Embedding-0.6B/model \
|
|
--port 8080 \
|
|
--dtype float16 \
|
|
--max-client-batch-size 1024 \
|
|
--max-concurrent-requests 1024 \
|
|
--max-batch-requests 1024 \
|
|
--max-batch-tokens 32768
|
|
|