Spaces:

vishal0719
/

infogen-qa-bot

Runtime error

vishal0719 commited on Dec 24, 2023

Commit

cb566b1

•

1 Parent(s): eb27c80

removed quantization config

Files changed (1) hide show

app.py CHANGED Viewed

@@ -35,12 +35,12 @@ device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
 # set quantization configuration to load large model with less GPU memory
 # this requires the `bitsandbytes` library
-bnb_config = transformers.BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_quant_type='nf4',
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_compute_dtype=bfloat16
-)
 # Hugging Face Access Token
 hf_auth = os.environ.get("hf_auth")
@@ -56,7 +56,7 @@ model = transformers.AutoModelForCausalLM.from_pretrained(
     model_id,
     trust_remote_code=True,
     config=model_config,
-    quantization_config=bnb_config,
     device_map='auto',
     token=hf_auth
 )

 # set quantization configuration to load large model with less GPU memory
 # this requires the `bitsandbytes` library
+# bnb_config = transformers.BitsAndBytesConfig(
+#     load_in_4bit=True,
+#     bnb_4bit_quant_type='nf4',
+#     bnb_4bit_use_double_quant=True,
+#     bnb_4bit_compute_dtype=bfloat16
+# )
 # Hugging Face Access Token
 hf_auth = os.environ.get("hf_auth")
     model_id,
     trust_remote_code=True,
     config=model_config,
+    # quantization_config=bnb_config,
     device_map='auto',
     token=hf_auth
 )