|
import argparse |
|
import json |
|
import multiprocessing |
|
import os |
|
import os.path as osp |
|
import shutil |
|
import sys |
|
import time |
|
from datetime import datetime |
|
|
|
import openai |
|
import torch |
|
from aider.coders import Coder |
|
from aider.io import InputOutput |
|
from aider.models import Model |
|
|
|
from ai_scientist.generate_ideas import check_idea_novelty, generate_ideas |
|
from ai_scientist.llm import allchoices |
|
from ai_scientist.perform_experiments import perform_experiments |
|
from ai_scientist.perform_review import load_paper, perform_improvement, perform_review |
|
from ai_scientist.perform_writeup import generate_latex, perform_writeup |
|
|
|
NUM_REFLECTIONS = 3 |
|
|
|
|
|
def print_time(): |
|
print(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) |
|
|
|
|
|
def parse_arguments(): |
|
parser = argparse.ArgumentParser(description="Run AI scientist experiments") |
|
parser.add_argument( |
|
"--skip-idea-generation", |
|
action="store_true", |
|
help="Skip idea generation and load existing ideas", |
|
) |
|
parser.add_argument( |
|
"--skip-novelty-check", |
|
action="store_true", |
|
help="Skip novelty check and use existing ideas", |
|
) |
|
|
|
parser.add_argument( |
|
"--experiment", |
|
type=str, |
|
default="nanoGPT_lite", |
|
help="Experiment to run AI Scientist on.", |
|
) |
|
parser.add_argument( |
|
"--model", |
|
type=str, |
|
default="Qwen/Qwen2.5-72B-Instruct", |
|
choices=allchoices, |
|
help="Model to use for AI Scientist.", |
|
) |
|
parser.add_argument( |
|
"--writeup", |
|
type=str, |
|
default="latex", |
|
choices=["latex"], |
|
help="What format to use for writeup", |
|
) |
|
parser.add_argument( |
|
"--parallel", |
|
type=int, |
|
default=0, |
|
help="Number of parallel processes to run. 0 for sequential execution.", |
|
) |
|
parser.add_argument( |
|
"--improvement", |
|
action="store_true", |
|
help="Improve based on reviews.", |
|
) |
|
parser.add_argument( |
|
"--gpus", |
|
type=str, |
|
default=None, |
|
help="Comma-separated list of GPU IDs to use (e.g., '0,1,2'). If not specified, all available GPUs will be used.", |
|
) |
|
parser.add_argument( |
|
"--num-ideas", |
|
type=int, |
|
default=2, |
|
help="Number of ideas to generate", |
|
) |
|
return parser.parse_args() |
|
|
|
|
|
def get_available_gpus(gpu_ids=None): |
|
if gpu_ids is not None: |
|
return [int(gpu_id) for gpu_id in gpu_ids.split(",")] |
|
return list(range(torch.cuda.device_count())) |
|
|
|
|
|
def worker( |
|
queue, |
|
base_dir, |
|
results_dir, |
|
model, |
|
client, |
|
client_model, |
|
writeup, |
|
improvement, |
|
gpu_id, |
|
): |
|
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) |
|
print(f"Worker {gpu_id} started.") |
|
while True: |
|
idea = queue.get() |
|
if idea is None: |
|
break |
|
success = do_idea( |
|
base_dir, |
|
results_dir, |
|
idea, |
|
model, |
|
client, |
|
client_model, |
|
writeup, |
|
improvement, |
|
log_file=True, |
|
) |
|
print(f"Completed idea: {idea['Name']}, Success: {success}") |
|
print(f"Worker {gpu_id} finished.") |
|
|
|
|
|
def do_idea( |
|
base_dir, |
|
results_dir, |
|
idea, |
|
model, |
|
client, |
|
client_model, |
|
writeup, |
|
improvement, |
|
log_file=False, |
|
): |
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
idea_name = f"{timestamp}_{idea['Name']}" |
|
folder_name = osp.join(results_dir, idea_name) |
|
assert not osp.exists(folder_name), f"Folder {folder_name} already exists." |
|
destination_dir = folder_name |
|
shutil.copytree(base_dir, destination_dir, dirs_exist_ok=True) |
|
with open(osp.join(base_dir, "run_0", "final_info.json"), "r") as f: |
|
baseline_results = json.load(f) |
|
baseline_results = {k: v["means"] for k, v in baseline_results.items()} |
|
exp_file = osp.join(folder_name, "experiment.py") |
|
vis_file = osp.join(folder_name, "plot.py") |
|
notes = osp.join(folder_name, "notes.txt") |
|
with open(notes, "w") as f: |
|
f.write(f"# Title: {idea['Title']}\n") |
|
f.write(f"# Experiment description: {idea['Experiment']}\n") |
|
f.write(f"## Run 0: Baseline\n") |
|
f.write(f"Results: {baseline_results}\n") |
|
f.write(f"Description: Baseline results.\n") |
|
if log_file: |
|
original_stdout = sys.stdout |
|
original_stderr = sys.stderr |
|
log_path = osp.join(folder_name, "log.txt") |
|
log = open(log_path, "a") |
|
sys.stdout = log |
|
sys.stderr = log |
|
try: |
|
print_time() |
|
print(f"*Starting idea: {idea_name}*") |
|
|
|
fnames = [exp_file, vis_file, notes] |
|
io = InputOutput( |
|
yes=True, chat_history_file=f"{folder_name}/{idea_name}_aider.txt" |
|
) |
|
if model == "hybrid": |
|
main_model = Model("claude-3-5-sonnet-20240620") |
|
elif model == "deepseek-coder-v2-0724": |
|
main_model = Model("deepseek-ai/DeepSeek-V2.5") |
|
elif model == "llama3.1-405b": |
|
main_model = Model("openrouter/meta-llama/llama-3.1-405b-instruct") |
|
|
|
|
|
|
|
elif args.model == "Qwen/Qwen2.5-72B-Instruct": |
|
print("aider model chosen") |
|
|
|
|
|
|
|
main_model = Model("friendli/Qwen2.5-72B-Instruct") |
|
|
|
elif model == "hyperbolic/meta-llama/Meta-Llama-3.1-70B-Instruct": |
|
main_model = Model("hyperbolic/meta-llama/Meta-Llama-3.1-70B-Instruct") |
|
|
|
|
|
|
|
else: |
|
main_model = Model(model) |
|
coder = Coder.create( |
|
main_model=main_model, |
|
fnames=fnames, |
|
io=io, |
|
stream=False, |
|
use_git=False, |
|
edit_format="diff", |
|
) |
|
|
|
print_time() |
|
print(f"*Starting Experiments*") |
|
try: |
|
success = perform_experiments(idea, folder_name, coder, baseline_results) |
|
except Exception as e: |
|
print(f"Error during experiments: {e}") |
|
print(f"Experiments failed for idea {idea_name}") |
|
return False |
|
|
|
if not success: |
|
print(f"Experiments failed for idea {idea_name}") |
|
return False |
|
|
|
print_time() |
|
print(f"*Starting Writeup*") |
|
|
|
if writeup == "latex": |
|
writeup_file = osp.join(folder_name, "latex", "template.tex") |
|
fnames = [exp_file, writeup_file, notes] |
|
if model == "deepseek-coder-v2-0724": |
|
main_model = Model("deepseek-ai/DeepSeek-V2.5") |
|
elif model == "llama3.1-405b": |
|
main_model = Model("openrouter/meta-llama/llama-3.1-405b-instruct") |
|
|
|
|
|
|
|
elif args.model == "Qwen/Qwen2.5-72B-Instruct": |
|
print("aider model chosen") |
|
|
|
main_model = Model("openai/Qwen/Qwen2.5-72B-Instruct") |
|
|
|
elif model == "hyperbolic/meta-llama/Meta-Llama-3.1-70B-Instruct": |
|
main_model = Model("hyperbolic/meta-llama/Meta-Llama-3.1-70B-Instruct") |
|
|
|
|
|
else: |
|
main_model = Model(model) |
|
coder = Coder.create( |
|
main_model=main_model, |
|
fnames=fnames, |
|
io=io, |
|
stream=False, |
|
use_git=False, |
|
edit_format="diff", |
|
) |
|
try: |
|
perform_writeup(idea, folder_name, coder, client, client_model) |
|
except Exception as e: |
|
print(f"Failed to perform writeup: {e}") |
|
return False |
|
print("Done writeup") |
|
else: |
|
raise ValueError(f"Writeup format {writeup} not supported.") |
|
|
|
print_time() |
|
print(f"*Starting Review*") |
|
|
|
if writeup == "latex": |
|
try: |
|
paper_text = load_paper(f"{folder_name}/{idea['Name']}.pdf") |
|
if model == "gpt-4o-2024-05-13": |
|
main_model = Model(model) |
|
review = perform_review( |
|
paper_text, |
|
model=main_model, |
|
client=openai.OpenAI(), |
|
num_reflections=5, |
|
num_fs_examples=1, |
|
num_reviews_ensemble=5, |
|
temperature=0.1, |
|
) |
|
elif model.startswith("ollama"): |
|
|
|
review = perform_review( |
|
paper_text, |
|
model=model.split("/")[-1], |
|
client=openai.OpenAI( |
|
api_key="ollama", base_url="http://localhost:11434/v1" |
|
), |
|
num_reflections=5, |
|
num_fs_examples=1, |
|
num_reviews_ensemble=5, |
|
temperature=0.1, |
|
) |
|
|
|
with open(osp.join(folder_name, "review.txt"), "w") as f: |
|
f.write(json.dumps(review, indent=4)) |
|
except Exception as e: |
|
print(f"Failed to perform review: {e}") |
|
return False |
|
|
|
|
|
if writeup == "latex" and improvement: |
|
print_time() |
|
print(f"*Starting Improvement*") |
|
try: |
|
perform_improvement(review, coder) |
|
generate_latex( |
|
coder, folder_name, f"{folder_name}/{idea['Name']}_improved.pdf" |
|
) |
|
paper_text = load_paper(f"{folder_name}/{idea['Name']}_improved.pdf") |
|
|
|
if model == "gpt-4o-2024-05-13": |
|
main_model = Model(model) |
|
review = perform_review( |
|
paper_text, |
|
model=main_model, |
|
client=openai.OpenAI(), |
|
num_reflections=5, |
|
num_fs_examples=1, |
|
num_reviews_ensemble=5, |
|
temperature=0.1, |
|
) |
|
elif model.startswith("ollama"): |
|
|
|
review = perform_review( |
|
paper_text, |
|
model=model.split("/")[-1], |
|
client=openai.OpenAI( |
|
api_key="ollama", base_url="http://localhost:11434/v1" |
|
), |
|
num_reflections=5, |
|
num_fs_examples=1, |
|
num_reviews_ensemble=5, |
|
temperature=0.1, |
|
) |
|
|
|
with open(osp.join(folder_name, "review_improved.txt"), "w") as f: |
|
f.write(json.dumps(review)) |
|
except Exception as e: |
|
print(f"Failed to perform improvement: {e}") |
|
return False |
|
return True |
|
except Exception as e: |
|
print(f"Failed to evaluate idea {idea_name}: {str(e)}") |
|
return False |
|
finally: |
|
print("FINISHED IDEA") |
|
if log_file: |
|
sys.stdout = original_stdout |
|
sys.stderr = original_stderr |
|
log.close() |
|
|
|
|
|
if __name__ == "__main__": |
|
import traceback |
|
try: |
|
args = parse_arguments() |
|
|
|
|
|
available_gpus = get_available_gpus(args.gpus) |
|
if args.parallel > len(available_gpus): |
|
print( |
|
f"Warning: Requested {args.parallel} parallel processes, but only {len(available_gpus)} GPUs available. Adjusting to {len(available_gpus)}." |
|
) |
|
args.parallel = len(available_gpus) |
|
|
|
print(f"Using GPUs: {available_gpus}") |
|
|
|
|
|
if args.model == "claude-3-5-sonnet-20240620": |
|
import anthropic |
|
|
|
print(f"Using Anthropic API with model {args.model}.") |
|
client_model = "claude-3-5-sonnet-20240620" |
|
client = anthropic.Anthropic() |
|
elif args.model.startswith("bedrock") and "claude" in args.model: |
|
import anthropic |
|
|
|
|
|
client_model = args.model.split("/")[-1] |
|
|
|
print(f"Using Amazon Bedrock with model {client_model}.") |
|
client = anthropic.AnthropicBedrock( |
|
aws_access_key=os.getenv("AWS_ACCESS_KEY_ID"), |
|
aws_secret_key=os.getenv("AWS_SECRET_ACCESS_KEY"), |
|
aws_region=os.getenv("AWS_REGION_NAME"), |
|
) |
|
elif args.model.startswith("vertex_ai") and "claude" in args.model: |
|
import anthropic |
|
|
|
|
|
client_model = args.model.split("/")[-1] |
|
|
|
print(f"Using Vertex AI with model {client_model}.") |
|
client = anthropic.AnthropicVertex() |
|
elif args.model == "gpt-4o-2024-05-13": |
|
import openai |
|
|
|
print(f"Using OpenAI API with model {args.model}.") |
|
client_model = "gpt-4o-2024-05-13" |
|
client = openai.OpenAI() |
|
|
|
|
|
elif args.model == "Qwen/Qwen2.5-72B-Instruct": |
|
|
|
print(f"Welcome to the PARADISE of debug <launch_scientist.py> {args.model}.") |
|
|
|
import openai |
|
import os |
|
|
|
client_model = args.model |
|
client = openai.OpenAI( |
|
api_key=os.environ["OPENAI_API_KEY"], base_url="https://api.hyperbolic.xyz/v1" |
|
) |
|
|
|
|
|
elif args.model.startswith("ollama"): |
|
import openai |
|
|
|
print(f"Using Ollama with {args.model}.") |
|
client_model = args.model.split("/")[-1] |
|
client = openai.OpenAI(api_key="ollama", base_url="http://localhost:11434/v1") |
|
else: |
|
raise ValueError(f"Model {args.model} not supported.") |
|
|
|
base_dir = osp.join("templates", args.experiment) |
|
results_dir = osp.join("results", args.experiment) |
|
ideas = generate_ideas( |
|
base_dir, |
|
client=client, |
|
model=client_model, |
|
skip_generation=args.skip_idea_generation, |
|
max_num_generations=args.num_ideas, |
|
num_reflections=NUM_REFLECTIONS, |
|
) |
|
ideas = check_idea_novelty( |
|
ideas, |
|
base_dir=base_dir, |
|
client=client, |
|
model=client_model, |
|
) |
|
|
|
with open(osp.join(base_dir, "ideas.json"), "w") as f: |
|
json.dump(ideas, f, indent=4) |
|
|
|
novel_ideas = [idea for idea in ideas if idea["novel"]] |
|
|
|
|
|
if args.parallel > 0: |
|
print(f"Running {args.parallel} parallel processes") |
|
queue = multiprocessing.Queue() |
|
for idea in novel_ideas: |
|
queue.put(idea) |
|
|
|
processes = [] |
|
for i in range(args.parallel): |
|
gpu_id = available_gpus[i % len(available_gpus)] |
|
p = multiprocessing.Process( |
|
target=worker, |
|
args=( |
|
queue, |
|
base_dir, |
|
results_dir, |
|
args.model, |
|
client, |
|
client_model, |
|
args.writeup, |
|
args.improvement, |
|
gpu_id, |
|
), |
|
) |
|
p.start() |
|
time.sleep(150) |
|
processes.append(p) |
|
|
|
|
|
for _ in range(args.parallel): |
|
queue.put(None) |
|
|
|
for p in processes: |
|
p.join() |
|
|
|
print("All parallel processes completed.") |
|
else: |
|
for idea in novel_ideas: |
|
print(f"Processing idea: {idea['Name']}") |
|
try: |
|
success = do_idea( |
|
base_dir, |
|
results_dir, |
|
idea, |
|
args.model, |
|
client, |
|
client_model, |
|
args.writeup, |
|
args.improvement, |
|
) |
|
print(f"Completed idea: {idea['Name']}, Success: {success}") |
|
except Exception as e: |
|
print(f"Failed to evaluate idea {idea['Name']}: {str(e)}") |
|
|
|
print("All ideas evaluated.") |
|
|
|
except Exception as e: |
|
print("error aya re baba") |
|
traceback.print_exc() |