Untitled

mail@pastecode.io avatar
unknown
plain_text
24 days ago
1.1 kB
3
Indexable
Never
#!/bin/bash




export WANDB_API_KEY="xxx"

interval=60
mem_threshold=4096


#!/bin/bash

# 您指定的GPU数量
TARGET_GPU_COUNT=8
# 显存阈值
MEMORY_THRESHOLD=2048

while true; do
    # 使用nvidia-smi获取显存信息
    mem_info=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits)
    
    echo "$mem_info"

    # 查找满足条件的GPU
    idle_gpus=($(echo "$mem_info" | awk -v threshold="$MEMORY_THRESHOLD" '$1 < threshold {print NR-1}' | tr '\n' ' '))

    # 检查是否有指定数量的显卡满足条件
    if [ "${#idle_gpus[@]}" -ge "$TARGET_GPU_COUNT" ]; then
        selected_gpus=$(echo "${idle_gpus[@]:0:$TARGET_GPU_COUNT}" | tr ' ' ',')
        # 将选中的GPU的ID传递给您的脚本
        echo "GPU:$selected_gpus are available"
        # CUDA_VISIBLE_DEVICES="$selected_gpus" $YOUR_SCRIPT_PATH
        echo "Run my code"
        cd cpt_dev
        bash syn_data_run_tinyllama.sh
        exit 0
    fi

    # 休眠1分钟
    echo "Sleep..."
    sleep 300
done



Leave a Comment