Spaces:

John6666
/

testpipeline

Running

John6666 commited on about 15 hours ago

Commit

c0c0901

•

1 Parent(s): a1abf89

Upload 3 files

Files changed (3) hide show

README.md CHANGED Viewed

@@ -5,7 +5,6 @@ colorFrom: yellow
 colorTo: yellow
 sdk: gradio
 sdk_version: 5.6.0
-python_version: 3.11
 app_file: app.py
 pinned: false
 license: mit

 colorTo: yellow
 sdk: gradio
 sdk_version: 5.6.0
 app_file: app.py
 pinned: false
 license: mit

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 # Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
-model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16).eval().to(device)
 terminators = [tokenizer.eos_token_id,
                tokenizer.convert_tokens_to_ids("<|eot_id|>"),
                tokenizer.convert_tokens_to_ids("<|reserved_special_token_26|>")]
@@ -59,4 +59,4 @@ interface = gr.Interface(
 )
 # Launch the Gradio app
-interface.launch(ssr_mode=False)

 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 # Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
+model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16, device_map="auto").eval()
 terminators = [tokenizer.eos_token_id,
                tokenizer.convert_tokens_to_ids("<|eot_id|>"),
                tokenizer.convert_tokens_to_ids("<|reserved_special_token_26|>")]
 )
 # Launch the Gradio app
+interface.queue().launch(ssr_mode=False)

requirements.txt CHANGED Viewed

@@ -4,3 +4,4 @@ torch
 numpy<2
 einops
 sentencepiece

 numpy<2
 einops
 sentencepiece
+accelerate