Spaces:

izumi-lab
/

llama-13b-japanese-lora-v0-1ep

Paused

App Files Files Community

masanorihirano commited on May 22, 2023

Commit

0d4eedd

•

1 Parent(s): d91928f

update

Browse files

Files changed (1) hide show

app.py +14 -5

app.py CHANGED Viewed

@@ -11,8 +11,9 @@ from fastchat.serve.inference import compress_module
 from fastchat.serve.inference import raise_warning_for_old_weights
 from huggingface_hub import Repository
 from huggingface_hub import hf_hub_download
 from peft import LoraConfig
-from peft import PeftModel
 from peft import set_peft_model_state_dict
 from transformers import AutoModelForCausalLM
 from transformers import GenerationConfig
@@ -63,7 +64,12 @@ try:
 except Exception:
     pass
-checkpoint_name = hf_hub_download(repo_id=LORA_WEIGHTS, filename="adapter_model.bin", use_auth_token=HF_TOKEN)
 if device == "cuda":
     model = AutoModelForCausalLM.from_pretrained(
         BASE_MODEL, load_in_8bit=True, device_map="auto", torch_dtype=torch.float16
@@ -83,12 +89,15 @@ else:
         low_cpu_mem_usage=True,
         torch_dtype=torch.float16,
     )
 adapters_weights = torch.load(checkpoint_name)
 set_peft_model_state_dict(model, adapters_weights)
 raise_warning_for_old_weights(BASE_MODEL, model)
 compress_module(model, device)
-if device == "cuda" or device == "mps":
-    model = model.to(device)
 def generate_prompt(instruction: str, input: Optional[str] = None):
@@ -308,5 +317,5 @@ with gr.Blocks(
     clear_button.click(reset_textbox, [], [instruction, inputs, outputs], queue=False)
     demo.queue(max_size=20, concurrency_count=NUM_THREADS, api_open=False).launch(
-        server_name="0.0.0.0", server_port=7860
     )

 from fastchat.serve.inference import raise_warning_for_old_weights
 from huggingface_hub import Repository
 from huggingface_hub import hf_hub_download
+from huggingface_hub import snapshot_download
 from peft import LoraConfig
+from peft import get_peft_model
 from peft import set_peft_model_state_dict
 from transformers import AutoModelForCausalLM
 from transformers import GenerationConfig
 except Exception:
     pass
+resume_from_checkpoint = snapshot_download(
+    repo_id=LORA_WEIGHTS, use_auth_token=HF_TOKEN
+)
+checkpoint_name = hf_hub_download(
+    repo_id=LORA_WEIGHTS, filename="adapter_model.bin", use_auth_token=HF_TOKEN
+)
 if device == "cuda":
     model = AutoModelForCausalLM.from_pretrained(
         BASE_MODEL, load_in_8bit=True, device_map="auto", torch_dtype=torch.float16
         low_cpu_mem_usage=True,
         torch_dtype=torch.float16,
     )
+config = LoraConfig.from_pretrained(resume_from_checkpoint)
+model = get_peft_model(model, config)
 adapters_weights = torch.load(checkpoint_name)
 set_peft_model_state_dict(model, adapters_weights)
 raise_warning_for_old_weights(BASE_MODEL, model)
 compress_module(model, device)
+# if device == "cuda" or device == "mps":
+#     model = model.to(device)
 def generate_prompt(instruction: str, input: Optional[str] = None):
     clear_button.click(reset_textbox, [], [instruction, inputs, outputs], queue=False)
     demo.queue(max_size=20, concurrency_count=NUM_THREADS, api_open=False).launch(
+        share=True, server_name="0.0.0.0", server_port=7860
     )