Text-to-Image
Diffusers
English
xingpng commited on
Commit
5c27e5c
β€’
1 Parent(s): ca97735
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
37
+ *.jpg filter=lfs diff=lfs merge=lfs -text
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/CSGO.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
5
+ <Languages>
6
+ <language minSize="1197" name="Python" />
7
+ </Languages>
8
+ </inspection_tool>
9
+ <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
10
+ <option name="ignoredErrors">
11
+ <list>
12
+ <option value="N801" />
13
+ </list>
14
+ </option>
15
+ </inspection_tool>
16
+ </profile>
17
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="Python 3.9" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/CSGO.iml" filepath="$PROJECT_DIR$/.idea/CSGO.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
README.md CHANGED
@@ -1,3 +1,257 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+ <div align="center">
5
+
6
+ [//]: # (<h1>CSGO: Content-Style Composition in Text-to-Image Generation</h1>)
7
+
8
+ [//]: # ()
9
+ [//]: # ([**Peng Xing**]&#40;https://github.com/xingp-ng&#41;<sup>12*</sup> Β· [**Haofan Wang**]&#40;https://haofanwang.github.io/&#41;<sup>1*</sup> Β· [**Yanpeng Sun**]&#40;https://scholar.google.com.hk/citations?user=a3FI8c4AAAAJ&hl=zh-CN&oi=ao/&#41;<sup>2</sup> Β· [**Qixun Wang**]&#40;https://github.com/wangqixun&#41;<sup>1</sup> Β· [**Xu Bai**]&#40;https://huggingface.co/baymin0220&#41;<sup>1</sup> Β· [**Hao Ai**]&#40;https://github.com/aihao2000&#41;<sup>13</sup> Β· [**Renyuan Huang**]&#40;https://github.com/DannHuang&#41;<sup>14</sup> Β· [**Zechao Li**]&#40;https://zechao-li.github.io/&#41;<sup>2βœ‰</sup>)
10
+
11
+ [//]: # ()
12
+ [//]: # (<sup>1</sup>InstantX Team Β· <sup>2</sup>Nanjing University of Science and Technology Β· <sup>3</sup>Beihang University Β· <sup>4</sup>Peking University)
13
+
14
+ <sup>*</sup>equal contributions, <sup>βœ‰</sup>corresponding authors
15
+
16
+ <a href='https://csgo-gen.github.io/'><img src='https://img.shields.io/badge/Project-Page-green'></a>
17
+ <a href='https://arxiv.org/abs/2404.02733'><img src='https://img.shields.io/badge/Technique-Report-red'></a>
18
+ [![Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Space-red)](https://huggingface.co/spaces/InstantX/InstantStyle)
19
+ [![ModelScope](https://img.shields.io/badge/ModelScope-Studios-blue)](https://modelscope.cn/studios/instantx/InstantStyle/summary)
20
+ [![GitHub](https://img.shields.io/github/stars/instantX-research/CSGO?style=social)](https://github.com/instantX-research/CSGO)
21
+ </div>
22
+
23
+
24
+ [//]: # (## Updates πŸ”₯)
25
+
26
+ [//]: # ()
27
+ [//]: # ([//]: # &#40;- **`2024/07/19`**: ✨ We support 🎞️ portrait video editing &#40;aka v2v&#41;! More to see [here]&#40;assets/docs/changelog/2024-07-19.md&#41;.&#41;)
28
+ [//]: # ()
29
+ [//]: # ([//]: # &#40;- **`2024/07/17`**: 🍎 We support macOS with Apple Silicon, modified from [jeethu]&#40;https://github.com/jeethu&#41;'s PR [#143]&#40;https://github.com/KwaiVGI/LivePortrait/pull/143&#41;.&#41;)
30
+ [//]: # ()
31
+ [//]: # ([//]: # &#40;- **`2024/07/10`**: πŸ’ͺ We support audio and video concatenating, driving video auto-cropping, and template making to protect privacy. More to see [here]&#40;assets/docs/changelog/2024-07-10.md&#41;.&#41;)
32
+ [//]: # ()
33
+ [//]: # ([//]: # &#40;- **`2024/07/09`**: πŸ€— We released the [HuggingFace Space]&#40;https://huggingface.co/spaces/KwaiVGI/liveportrait&#41;, thanks to the HF team and [Gradio]&#40;https://github.com/gradio-app/gradio&#41;!&#41;)
34
+ [//]: # ([//]: # &#40;Continuous updates, stay tuned!&#41;)
35
+ [//]: # (- **`2024/08/30`**: 😊 We released the initial version of the inference code.)
36
+
37
+ [//]: # (- **`2024/08/30`**: 😊 We released the technical report on [arXiv]&#40;https://arxiv.org/pdf/2408.16766&#41;)
38
+
39
+ [//]: # (- **`2024/07/15`**: πŸ”₯ We released the [homepage]&#40;https://csgo-gen.github.io&#41;.)
40
+
41
+ [//]: # ()
42
+ [//]: # (## Plan πŸ’ͺ)
43
+
44
+ [//]: # (- [x] technical report)
45
+
46
+ [//]: # (- [x] inference code)
47
+
48
+ [//]: # (- [ ] pre-trained weight)
49
+
50
+ [//]: # (- [ ] IMAGStyle dataset)
51
+
52
+ [//]: # (- [ ] training code)
53
+
54
+ ## Introduction πŸ“–
55
+ This repo, named **CSGO**, contains the official PyTorch implementation of our paper [CSGO: Content-Style Composition in Text-to-Image Generation](https://arxiv.org/pdf/).
56
+ We are actively updating and improving this repository. If you find any bugs or have suggestions, welcome to raise issues or submit pull requests (PR) πŸ’–.
57
+
58
+ ## Pipeline πŸ’»
59
+ <p align="center">
60
+ <img src="assets/image3_1.jpg">
61
+ </p>
62
+
63
+ ## Capabilities πŸš…
64
+
65
+ πŸ”₯ Our CSGO achieves **image-driven style transfer, text-driven stylized synthesis, and text editing-driven stylized synthesis**.
66
+
67
+ πŸ”₯ For more results, visit our <a href="https://csgo-gen.github.io"><strong>homepage</strong></a> πŸ”₯
68
+
69
+ <p align="center">
70
+ <img src="assets/vis.jpg">
71
+ </p>
72
+
73
+
74
+ ## Getting Started 🏁
75
+ ### 1. Clone the code and prepare the environment
76
+ ```bash
77
+ git clone https://github.com/instantX-research/CSGO
78
+ cd CSGO
79
+
80
+ # create env using conda
81
+ conda create -n CSGO python=3.9
82
+ conda activate CSGO
83
+
84
+ # install dependencies with pip
85
+ # for Linux and Windows users
86
+ pip install -r requirements.txt
87
+ ```
88
+
89
+ ### 2. Download pretrained weights(coming soon)
90
+
91
+ The easiest way to download the pretrained weights is from HuggingFace:
92
+ ```bash
93
+ # first, ensure git-lfs is installed, see: https://docs.github.com/en/repositories/working-with-files/managing-large-files/installing-git-large-file-storage
94
+ git lfs install
95
+ # clone and move the weights
96
+ git clone https://huggingface.co/InstanX/CSGO CSGO
97
+ ```
98
+ Our method is fully compatible with [SDXL](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0), [VAE](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix), [ControlNet](https://huggingface.co/TTPlanet/TTPLanet_SDXL_Controlnet_Tile_Realistic), and [Image Encoder](https://huggingface.co/h94/IP-Adapter/tree/main/sdxl_models/image_encoder).
99
+ Please download them and place them in the ./base_models folder.
100
+ ### 3. Inference πŸš€
101
+
102
+ ```python
103
+ import torch
104
+ from ip_adapter.utils import BLOCKS as BLOCKS
105
+ from ip_adapter.utils import controlnet_BLOCKS as controlnet_BLOCKS
106
+ from PIL import Image
107
+ from diffusers import (
108
+ AutoencoderKL,
109
+ ControlNetModel,
110
+ StableDiffusionXLControlNetPipeline,
111
+
112
+ )
113
+ from ip_adapter import CSGO
114
+
115
+
116
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
117
+
118
+ base_model_path = "./base_models/stable-diffusion-xl-base-1.0"
119
+ image_encoder_path = "./base_models/IP-Adapter/sdxl_models/image_encoder"
120
+ csgo_ckpt = "./CSGO/csgo.bin"
121
+ pretrained_vae_name_or_path ='./base_models/sdxl-vae-fp16-fix'
122
+ controlnet_path = "./base_models/TTPLanet_SDXL_Controlnet_Tile_Realistic"
123
+ weight_dtype = torch.float16
124
+
125
+
126
+ vae = AutoencoderKL.from_pretrained(pretrained_vae_name_or_path,torch_dtype=torch.float16)
127
+ controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16,use_safetensors=True)
128
+ pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
129
+ base_model_path,
130
+ controlnet=controlnet,
131
+ torch_dtype=torch.float16,
132
+ add_watermarker=False,
133
+ vae=vae
134
+ )
135
+ pipe.enable_vae_tiling()
136
+
137
+
138
+ target_content_blocks = BLOCKS['content']
139
+ target_style_blocks = BLOCKS['style']
140
+ controlnet_target_content_blocks = controlnet_BLOCKS['content']
141
+ controlnet_target_style_blocks = controlnet_BLOCKS['style']
142
+
143
+ csgo = CSGO(pipe, image_encoder_path, csgo_ckpt, device, num_content_tokens=4,num_style_tokens=32,
144
+ target_content_blocks=target_content_blocks, target_style_blocks=target_style_blocks,controlnet=False,controlnet_adapter=True,
145
+ controlnet_target_content_blocks=controlnet_target_content_blocks,
146
+ controlnet_target_style_blocks=controlnet_target_style_blocks,
147
+ content_model_resampler=True,
148
+ style_model_resampler=True,
149
+ load_controlnet=False,
150
+
151
+ )
152
+
153
+ style_name = 'img_0.png'
154
+ content_name = 'img_0.png'
155
+ style_image = "../assets/{}".format(style_name)
156
+ content_image = Image.open('../assets/{}'.format(content_name)).convert('RGB')
157
+
158
+ caption ='a small house with a sheep statue on top of it'
159
+
160
+ num_sample=4
161
+
162
+ #image-driven style transfer
163
+ images = csgo.generate(pil_content_image= content_image, pil_style_image=style_image,
164
+ prompt=caption,
165
+ negative_prompt= "text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry",
166
+ content_scale=1.0,
167
+ style_scale=1.0,
168
+ guidance_scale=10,
169
+ num_images_per_prompt=num_sample,
170
+ num_samples=1,
171
+ num_inference_steps=50,
172
+ seed=42,
173
+ image=content_image.convert('RGB'),
174
+ controlnet_conditioning_scale=0.6,
175
+ )
176
+
177
+ #text-driven stylized synthesis
178
+ caption='a cat'
179
+ images = csgo.generate(pil_content_image= content_image, pil_style_image=style_image,
180
+ prompt=caption,
181
+ negative_prompt= "text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry",
182
+ content_scale=1.0,
183
+ style_scale=1.0,
184
+ guidance_scale=10,
185
+ num_images_per_prompt=num_sample,
186
+ num_samples=1,
187
+ num_inference_steps=50,
188
+ seed=42,
189
+ image=content_image.convert('RGB'),
190
+ controlnet_conditioning_scale=0.01,
191
+ )
192
+
193
+ #text editing-driven stylized synthesis
194
+ caption='a small house'
195
+ images = csgo.generate(pil_content_image= content_image, pil_style_image=style_image,
196
+ prompt=caption,
197
+ negative_prompt= "text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry",
198
+ content_scale=1.0,
199
+ style_scale=1.0,
200
+ guidance_scale=10,
201
+ num_images_per_prompt=num_sample,
202
+ num_samples=1,
203
+ num_inference_steps=50,
204
+ seed=42,
205
+ image=content_image.convert('RGB'),
206
+ controlnet_conditioning_scale=0.4,
207
+ )
208
+ ```
209
+
210
+ ## Demos
211
+ <p align="center">
212
+ <br>
213
+ πŸ”₯ For more results, visit our <a href="https://csgo-gen.github.io"><strong>homepage</strong></a> πŸ”₯
214
+ </p>
215
+
216
+ ### Content-Style Composition
217
+ <p align="center">
218
+ <img src="assets/page1.png">
219
+ </p>
220
+
221
+ <p align="center">
222
+ <img src="assets/page4.png">
223
+ </p>
224
+
225
+ ### Cycle Translation
226
+ <p align="center">
227
+ <img src="assets/page8.png">
228
+ </p>
229
+
230
+ ### Text-Driven Style Synthesis
231
+ <p align="center">
232
+ <img src="assets/page10.png">
233
+ </p>
234
+
235
+ ### Text Editing-Driven Style Synthesis
236
+ <p align="center">
237
+ <img src="assets/page11.jpg">
238
+ </p>
239
+
240
+ ## Star History
241
+ [![Star History Chart](https://api.star-history.com/svg?repos=instantX-research/CSGO&type=Date)](https://star-history.com/#instantX-research/CSGO&Date)
242
+
243
+
244
+
245
+ ## Acknowledgements
246
+ This project is developed by InstantX Team, all copyright reserved.
247
+
248
+ ## Citation πŸ’–
249
+ If you find CSGO useful for your research, welcome to 🌟 this repo and cite our work using the following BibTeX:
250
+ ```bibtex
251
+ @article{xing2024csgo,
252
+ title={CSGO: Content-Style Composition in Text-to-Image Generation},
253
+ author={Peng Xing and Haofan Wang and Yanpeng Sun and Qixun Wang and Xu Bai and Hao Ai and Renyuan Huang and Zechao Li},
254
+ year={2024},
255
+ journal = {arXiv 2408.16766},
256
+ }
257
+ ```
assets/image3_1.jpg ADDED

Git LFS Details

  • SHA256: d70cfe41896daa28c961c9308aa62b1397550bba852710104715bfaee27e331a
  • Pointer size: 132 Bytes
  • Size of remote file: 1.02 MB
assets/page1.png ADDED

Git LFS Details

  • SHA256: c083f81dba2a7f5d214c204863623bb5c94505fad686bb88d3cdfd3f1bc6128e
  • Pointer size: 132 Bytes
  • Size of remote file: 2.09 MB
assets/page11.jpg ADDED

Git LFS Details

  • SHA256: 651aa1154cfa5c17b5f2475e19b7ad55e89a5f0552d15180961b02fcdfea52d3
  • Pointer size: 132 Bytes
  • Size of remote file: 8.92 MB
assets/page4.png ADDED

Git LFS Details

  • SHA256: 41580401cfc58fef3cfcd6fb1cde58e6a9df0869ee758185e9ad485c7a7ad211
  • Pointer size: 132 Bytes
  • Size of remote file: 4.59 MB
assets/page8.png ADDED

Git LFS Details

  • SHA256: 05c4fce64fa99c096301d04b9011c6ddc9bb791fbef1dd6d318d4195e15ee0e4
  • Pointer size: 132 Bytes
  • Size of remote file: 9.3 MB
assets/vis.jpg ADDED

Git LFS Details

  • SHA256: 17d78d200ab12d6994323d233a9be42cac8780ecfabab071a51667b250fcc2dc
  • Pointer size: 132 Bytes
  • Size of remote file: 4.56 MB