bstraehle commited on
Commit
092da5d
1 Parent(s): 20e1bf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -31
app.py CHANGED
@@ -29,23 +29,23 @@ def process(action, base_model_name, ft_model_name, dataset_name, system_prompt,
29
  def fine_tune_model(base_model_name, dataset_name):
30
  # Load dataset
31
 
32
- #dataset = load_dataset(dataset_name)
33
 
34
- #print("### Dataset")
35
- #print(dataset)
36
- #print("### Example")
37
- #print(dataset["train"][:1])
38
- #print("###")
39
 
40
  # Load model
41
 
42
- #model, tokenizer = load_model(base_model_name)
43
 
44
- #print("### Model")
45
- #print(model)
46
- #print("### Tokenizer")
47
- #print(tokenizer)
48
- #print("###")
49
 
50
  # Pre-process dataset
51
 
@@ -53,26 +53,26 @@ def fine_tune_model(base_model_name, dataset_name):
53
  model_inputs = tokenizer(examples["sql_prompt"], text_target=examples["sql"], max_length=512, padding="max_length", truncation=True)
54
  return model_inputs
55
 
56
- #dataset = dataset.map(preprocess, batched=True)
57
 
58
- #print("### Pre-processed dataset")
59
- #print(dataset)
60
- #print("### Example")
61
- #print(dataset["train"][:1])
62
- #print("###")
63
 
64
  # Split dataset into training and validation sets
65
 
66
- ##train_dataset = dataset["train"]
67
- ##test_dataset = dataset["test"]
68
- #train_dataset = dataset["train"].shuffle(seed=42).select(range(1000))
69
- #test_dataset = dataset["test"].shuffle(seed=42).select(range(100))
70
 
71
- #print("### Training dataset")
72
- #print(train_dataset)
73
- #print("### Validation dataset")
74
- #print(test_dataset)
75
- #print("###")
76
 
77
  # Configure training arguments
78
 
@@ -120,10 +120,10 @@ def fine_tune_model(base_model_name, dataset_name):
120
 
121
  api = HfApi()
122
  api.create_repo(repo_id=FT_MODEL_NAME)
123
- api.upload_folder(
124
- folder_path="./output/",
125
- repo_id="Meta-Llama-3.1-8B-Instruct-text-to-sql"
126
- )
127
 
128
  tokenizer.push_to_hub(FT_MODEL_NAME)
129
 
 
29
  def fine_tune_model(base_model_name, dataset_name):
30
  # Load dataset
31
 
32
+ dataset = load_dataset(dataset_name)
33
 
34
+ print("### Dataset")
35
+ print(dataset)
36
+ print("### Example")
37
+ print(dataset["train"][:1])
38
+ print("###")
39
 
40
  # Load model
41
 
42
+ model, tokenizer = load_model(base_model_name)
43
 
44
+ print("### Model")
45
+ print(model)
46
+ print("### Tokenizer")
47
+ print(tokenizer)
48
+ print("###")
49
 
50
  # Pre-process dataset
51
 
 
53
  model_inputs = tokenizer(examples["sql_prompt"], text_target=examples["sql"], max_length=512, padding="max_length", truncation=True)
54
  return model_inputs
55
 
56
+ dataset = dataset.map(preprocess, batched=True)
57
 
58
+ print("### Pre-processed dataset")
59
+ print(dataset)
60
+ print("### Example")
61
+ print(dataset["train"][:1])
62
+ print("###")
63
 
64
  # Split dataset into training and validation sets
65
 
66
+ #train_dataset = dataset["train"]
67
+ #test_dataset = dataset["test"]
68
+ train_dataset = dataset["train"].shuffle(seed=42).select(range(1000))
69
+ test_dataset = dataset["test"].shuffle(seed=42).select(range(100))
70
 
71
+ print("### Training dataset")
72
+ print(train_dataset)
73
+ print("### Validation dataset")
74
+ print(test_dataset)
75
+ print("###")
76
 
77
  # Configure training arguments
78
 
 
120
 
121
  api = HfApi()
122
  api.create_repo(repo_id=FT_MODEL_NAME)
123
+ #api.upload_folder(
124
+ # folder_path="./output",
125
+ # repo_id="Meta-Llama-3.1-8B-Instruct-text-to-sql"
126
+ #)
127
 
128
  tokenizer.push_to_hub(FT_MODEL_NAME)
129