Disty0
/

FLUX.1-dev-qint8

image-generation

Inference Endpoints

Model card Files Files and versions Community

FLUX.1-dev-qint8 / README.md

Disty0's picture

Update README.md

fd65655 verified 3 months ago

|

history blame contribute delete

2.31 kB

	---
	language:
	- en
	license: other
	license_name: flux-1-dev-non-commercial-license
	license_link: LICENSE.md
	tags:
	- text-to-image
	- image-generation
	- flux
	---

	`black-forest-labs/FLUX.1-dev` quantized to INT8 using Optimum Quanto.

	```shell
	pip install diffusers optimum-quanto
	```

	```python
	import json
	import torch
	import diffusers
	import transformers
	from optimum.quanto import requantize
	from safetensors.torch import load_file
	from huggingface_hub import hf_hub_download


	def load_quanto_transformer(repo_path):
	with open(hf_hub_download(repo_path, "transformer/quantization_map.json"), "r") as f:
	quantization_map = json.load(f)
	with torch.device("meta"):
	transformer = diffusers.FluxTransformer2DModel.from_config(hf_hub_download(repo_path, "transformer/config.json")).to(torch.bfloat16)
	state_dict = load_file(hf_hub_download(repo_path, "transformer/diffusion_pytorch_model.safetensors"))
	requantize(transformer, state_dict, quantization_map, device=torch.device("cuda"))
	return transformer


	def load_quanto_text_encoder_2(repo_path):
	with open(hf_hub_download(repo_path, "text_encoder_2/quantization_map.json"), "r") as f:
	quantization_map = json.load(f)
	with open(hf_hub_download(repo_path, "text_encoder_2/config.json")) as f:
	t5_config = transformers.T5Config(**json.load(f))
	with torch.device("meta"):
	text_encoder_2 = transformers.T5EncoderModel(t5_config).to(torch.bfloat16)
	state_dict = load_file(hf_hub_download(repo_path, "text_encoder_2/model.safetensors"))
	requantize(text_encoder_2, state_dict, quantization_map, device=torch.device("cuda"))
	return text_encoder_2


	pipe = diffusers.AutoPipelineForText2Image.from_pretrained("Disty0/FLUX.1-dev-qint8", transformer=None, text_encoder_2=None, torch_dtype=torch.bfloat16)
	pipe.transformer = load_quanto_transformer("Disty0/FLUX.1-dev-qint8")
	pipe.text_encoder_2 = load_quanto_text_encoder_2("Disty0/FLUX.1-dev-qint8")
	pipe = pipe.to("cuda", dtype=torch.bfloat16)


	prompt = "A cat holding a sign that says hello world"
	image = pipe(
	prompt,
	height=1024,
	width=1024,
	guidance_scale=3.5,
	num_inference_steps=50,
	max_sequence_length=512,
	generator=torch.Generator("cpu").manual_seed(0)
	).images[0]
	image.save("flux-dev.png")
	```