moxin-org
/

moxin-llm-7b

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

piuzha commited on Dec 4, 2024

Commit

03b9eca

·

verified ·

1 Parent(s): 78f3e42

Update README.md

Files changed (1) hide show

README.md +5 -1

README.md CHANGED Viewed

@@ -23,6 +23,10 @@ You can use the following code to run inference with the model. The model is sav
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 model_name = 'moxin-org/moxin-7b'
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
@@ -45,7 +49,7 @@ prompt = "Can you explain the concept of regularization in machine learning?"
 sequences = pipe(
     prompt,
     do_sample=True,
-    max_new_tokens=100,
     temperature=0.7,
     top_k=50,
     top_p=0.95,

 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+torch.backends.cuda.enable_mem_efficient_sdp(False)
+torch.backends.cuda.enable_flash_sdp(False)
 model_name = 'moxin-org/moxin-7b'
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
 sequences = pipe(
     prompt,
     do_sample=True,
+    max_new_tokens=1000,
     temperature=0.7,
     top_k=50,
     top_p=0.95,