Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,6 @@ import optimum
|
|
6 |
from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)
|
7 |
|
8 |
app = FastAPI()
|
9 |
-
|
10 |
|
11 |
# Load the model and tokenizer
|
12 |
model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPT/"
|
@@ -38,7 +37,7 @@ model, tokenizer = load_quantized_model(model_name_or_path, "model.safetensors")
|
|
38 |
|
39 |
def load_model_norm():
|
40 |
if torch.cuda.is_available():
|
41 |
-
|
42 |
else:
|
43 |
print("CUDA is not available. CPU will be used.")
|
44 |
# Load model directly
|
|
|
6 |
from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)
|
7 |
|
8 |
app = FastAPI()
|
|
|
9 |
|
10 |
# Load the model and tokenizer
|
11 |
model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPT/"
|
|
|
37 |
|
38 |
def load_model_norm():
|
39 |
if torch.cuda.is_available():
|
40 |
+
print("CUDA is available. GPU will be used.")
|
41 |
else:
|
42 |
print("CUDA is not available. CPU will be used.")
|
43 |
# Load model directly
|