|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import warnings |
|
import shutil |
|
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, BitsAndBytesConfig |
|
from transformers.models.clip.image_processing_clip import CLIPImageProcessor |
|
import torch |
|
from mplug_docowl.model import * |
|
from icecream import ic |
|
def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, load_4bit=False, device_map="auto", device="cuda"): |
|
kwargs = {"device_map": device_map} |
|
|
|
if device != "cuda": |
|
kwargs['device_map'] = {"": device} |
|
|
|
if load_8bit: |
|
kwargs['load_in_8bit'] = True |
|
elif load_4bit: |
|
kwargs['load_in_4bit'] = True |
|
kwargs['quantization_config'] = BitsAndBytesConfig( |
|
load_in_4bit=True, |
|
bnb_4bit_compute_dtype=torch.float16, |
|
bnb_4bit_use_double_quant=True, |
|
bnb_4bit_quant_type='nf4' |
|
) |
|
else: |
|
kwargs['torch_dtype'] = torch.float16 |
|
if 'paperowl' or 'docowl' in model_name.lower(): |
|
if model_base is not None: |
|
|
|
print('Loading mPLUG-DocOwl from base model...') |
|
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False) |
|
cfg_pretrained = AutoConfig.from_pretrained(model_path) |
|
model = MPLUGDocOwlLlamaForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=cfg_pretrained, **kwargs) |
|
else: |
|
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False) |
|
model = MPLUGDocOwlLlamaForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs) |
|
else: |
|
|
|
if model_base is not None: |
|
|
|
from peft import PeftModel |
|
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False) |
|
model = AutoModelForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, **kwargs) |
|
print(f"Loading LoRA weights from {model_path}") |
|
model = PeftModel.from_pretrained(model, model_path) |
|
print(f"Merging weights") |
|
model = model.merge_and_unload() |
|
print('Convert to FP16...') |
|
model.to(torch.float16) |
|
else: |
|
use_fast = False |
|
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False) |
|
model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs) |
|
|
|
|
|
|
|
|
|
image_processor = CLIPImageProcessor.from_pretrained(model_path) |
|
|
|
if hasattr(model.config, "max_sequence_length"): |
|
context_len = model.config.max_sequence_length |
|
else: |
|
context_len = 2048 |
|
|
|
return tokenizer, model, image_processor, context_len |