Untitled
unknown
plain_text
2 years ago
1.1 kB
9
Indexable
#!/bin/bash
export WANDB_API_KEY="xxx"
interval=60
mem_threshold=4096
#!/bin/bash
# 您指定的GPU数量
TARGET_GPU_COUNT=8
# 显存阈值
MEMORY_THRESHOLD=2048
while true; do
# 使用nvidia-smi获取显存信息
mem_info=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits)
echo "$mem_info"
# 查找满足条件的GPU
idle_gpus=($(echo "$mem_info" | awk -v threshold="$MEMORY_THRESHOLD" '$1 < threshold {print NR-1}' | tr '\n' ' '))
# 检查是否有指定数量的显卡满足条件
if [ "${#idle_gpus[@]}" -ge "$TARGET_GPU_COUNT" ]; then
selected_gpus=$(echo "${idle_gpus[@]:0:$TARGET_GPU_COUNT}" | tr ' ' ',')
# 将选中的GPU的ID传递给您的脚本
echo "GPU:$selected_gpus are available"
# CUDA_VISIBLE_DEVICES="$selected_gpus" $YOUR_SCRIPT_PATH
echo "Run my code"
cd cpt_dev
bash syn_data_run_tinyllama.sh
exit 0
fi
# 休眠1分钟
echo "Sleep..."
sleep 300
done
Editor is loading...
Leave a Comment