#!/bin/bash

# Define your perturbations and BabyLM splits
PERTURBATIONS=("hop_control" "hop_tokens4" "hop_words4" "reverse_control" "reverse_partial" "reverse_full" "shuffle_control" 
"shuffle_nondeterministic" "shuffle_deterministic21" "shuffle_deterministic57" "shuffle_deterministic84" "shuffle_local3" 
"shuffle_local5" "shuffle_local10" "shuffle_even_odd")

# BABYLM_SPLITS=("100M" "10M" "dev" "test" "unittest")  # Add more splits as needed
BABYLM_SPLITS=("dev")

# Specify the GPUs to use
SPECIFIED_GPUS=(1 2 3 4 5 6 7)  # Set these to the GPUs you want to use

# Store PIDs and Gpu mapping to track running processes
declare -A GPU_PROCESS_MAP

# Iterate over all combinations of perturbations and splits
for perturbation in "${PERTURBATIONS[@]}"; do
  for split in "${BABYLM_SPLITS[@]}"; do
    
    # Check for a free GPU
    while true; do
      for gpu in "${SPECIFIED_GPUS[@]}"; do
        # Check if there's no process associated with this GPU
        if ! ps -p ${GPU_PROCESS_MAP[$gpu]} > /dev/null 2>&1; then
          # Run the Python perturbation script on the available GPU
          CUDA_VISIBLE_DEVICES=$gpu python perturb_llama.py "$perturbation" "$split" &
          GPU_PROCESS_MAP[$gpu]=$!
          echo "Running on GPU $gpu: Perturbation=$perturbation, Split=$split, PID=$!"
          break 2  # Break out of the loops once a GPU is assigned
        fi
      done
      sleep 1  # Wait a second before checking again
    done
  done
done

# Wait for all processes to finish
wait
echo "All tasks completed."