Untitled
unknown
plain_text
a year ago
1.1 kB
6
Indexable
#!/bin/bash export WANDB_API_KEY="xxx" interval=60 mem_threshold=4096 #!/bin/bash # 您指定的GPU数量 TARGET_GPU_COUNT=8 # 显存阈值 MEMORY_THRESHOLD=2048 while true; do # 使用nvidia-smi获取显存信息 mem_info=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits) echo "$mem_info" # 查找满足条件的GPU idle_gpus=($(echo "$mem_info" | awk -v threshold="$MEMORY_THRESHOLD" '$1 < threshold {print NR-1}' | tr '\n' ' ')) # 检查是否有指定数量的显卡满足条件 if [ "${#idle_gpus[@]}" -ge "$TARGET_GPU_COUNT" ]; then selected_gpus=$(echo "${idle_gpus[@]:0:$TARGET_GPU_COUNT}" | tr ' ' ',') # 将选中的GPU的ID传递给您的脚本 echo "GPU:$selected_gpus are available" # CUDA_VISIBLE_DEVICES="$selected_gpus" $YOUR_SCRIPT_PATH echo "Run my code" cd cpt_dev bash syn_data_run_tinyllama.sh exit 0 fi # 休眠1分钟 echo "Sleep..." sleep 300 done
Editor is loading...
Leave a Comment