vishal0719 commited on
Commit
cb566b1
β€’
1 Parent(s): eb27c80

removed quantization config

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -35,12 +35,12 @@ device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
35
 
36
  # set quantization configuration to load large model with less GPU memory
37
  # this requires the `bitsandbytes` library
38
- bnb_config = transformers.BitsAndBytesConfig(
39
- load_in_4bit=True,
40
- bnb_4bit_quant_type='nf4',
41
- bnb_4bit_use_double_quant=True,
42
- bnb_4bit_compute_dtype=bfloat16
43
- )
44
 
45
  # Hugging Face Access Token
46
  hf_auth = os.environ.get("hf_auth")
@@ -56,7 +56,7 @@ model = transformers.AutoModelForCausalLM.from_pretrained(
56
  model_id,
57
  trust_remote_code=True,
58
  config=model_config,
59
- quantization_config=bnb_config,
60
  device_map='auto',
61
  token=hf_auth
62
  )
 
35
 
36
  # set quantization configuration to load large model with less GPU memory
37
  # this requires the `bitsandbytes` library
38
+ # bnb_config = transformers.BitsAndBytesConfig(
39
+ # load_in_4bit=True,
40
+ # bnb_4bit_quant_type='nf4',
41
+ # bnb_4bit_use_double_quant=True,
42
+ # bnb_4bit_compute_dtype=bfloat16
43
+ # )
44
 
45
  # Hugging Face Access Token
46
  hf_auth = os.environ.get("hf_auth")
 
56
  model_id,
57
  trust_remote_code=True,
58
  config=model_config,
59
+ # quantization_config=bnb_config,
60
  device_map='auto',
61
  token=hf_auth
62
  )