K1Z3M1112 commited on
Commit
5e7a71d
·
verified ·
1 Parent(s): 3014548

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +428 -224
app.py CHANGED
@@ -124,10 +124,8 @@ CHINESE_MODELS = [
124
  "AI-ModelScope/stable-diffusion-v1-5-chinese"
125
  ]
126
 
127
- ALL_MODELS = SD15_MODELS + SDXL_MODELS + CHINESE_MODELS
128
-
129
- # ControlNet models
130
- CONTROLNET_MODELS = {
131
  "lineart": "lllyasviel/control_v11p_sd15_lineart",
132
  "lineart_anime": "lllyasviel/control_v11p_sd15s2_lineart_anime",
133
  "canny": "lllyasviel/control_v11p_sd15_canny",
@@ -142,8 +140,8 @@ CONTROLNET_MODELS = {
142
  "tile": "lllyasviel/control_v11f1e_sd15_tile"
143
  }
144
 
145
- # SDXL ControlNet models
146
- SDXL_CONTROLNET_MODELS = {
147
  "canny_sdxl": "diffusers/controlnet-canny-sdxl-1.0",
148
  "depth_sdxl": "diffusers/controlnet-depth-sdxl-1.0",
149
  "openpose_sdxl": "thibaud/controlnet-openpose-sdxl-1.0"
@@ -249,45 +247,43 @@ def load_detector(detector_type: str):
249
  print(f"❌ Error loading {detector_type} detector: {e}")
250
  return None
251
 
252
- def get_controlnet_model(controlnet_type: str):
253
  """Get ControlNet model based on type"""
254
- if controlnet_type in CONTROLNET_MODELS:
255
- return CONTROLNET_MODELS[controlnet_type]
256
- elif controlnet_type in SDXL_CONTROLNET_MODELS:
257
- return SDXL_CONTROLNET_MODELS[controlnet_type]
258
  else:
259
- raise ValueError(f"Unknown ControlNet type: {controlnet_type}")
260
 
261
- def prepare_condition_image(image, controlnet_type):
262
  """Prepare condition image for ControlNet"""
263
  if controlnet_type in ["lineart", "lineart_anime"]:
264
  detector = load_detector("lineart_anime" if controlnet_type == "lineart_anime" else "lineart")
265
  if detector:
266
- result = detector(image, detect_resolution=512, image_resolution=512)
267
  return Image.fromarray(result) if isinstance(result, np.ndarray) else result
268
 
269
- elif controlnet_type == "canny":
270
  detector = load_detector("canny")
271
  if detector:
272
- result = detector(image, detect_resolution=512, image_resolution=512)
273
  return Image.fromarray(result) if isinstance(result, np.ndarray) else result
274
 
275
- elif controlnet_type == "depth":
276
  detector = load_detector("depth")
277
  if detector:
278
- result = detector(image, detect_resolution=512, image_resolution=512)
279
  return Image.fromarray(result) if isinstance(result, np.ndarray) else result
280
 
281
  elif controlnet_type == "normal":
282
  detector = load_detector("normal")
283
  if detector:
284
- result = detector(image, detect_resolution=512, image_resolution=512)
285
  return Image.fromarray(result) if isinstance(result, np.ndarray) else result
286
 
287
- elif controlnet_type == "openpose":
288
  detector = load_detector("openpose")
289
  if detector:
290
- result = detector(image, detect_resolution=512, image_resolution=512)
291
  return Image.fromarray(result) if isinstance(result, np.ndarray) else result
292
 
293
  return image
@@ -315,32 +311,30 @@ def get_pipeline(model_name: str, controlnet_type: str = "lineart", lora_model:
315
  print(f"📥 Loading ControlNet pipeline for model: {model_name}, type: {controlnet_type}")
316
 
317
  try:
318
- if is_sdxl_model(model_name):
319
- if controlnet_type in SDXL_CONTROLNET_MODELS:
320
- controlnet_model_name = get_controlnet_model(controlnet_type)
321
- controlnet = ControlNetModel.from_pretrained(
322
- controlnet_model_name,
323
- torch_dtype=dtype
324
- ).to(device)
325
-
326
- pipe = StableDiffusionXLPipeline.from_pretrained(
327
- model_name,
328
- controlnet=controlnet,
329
- torch_dtype=dtype,
330
- safety_checker=None,
331
- requires_safety_checker=False,
332
- use_safetensors=True,
333
- variant="fp16" if dtype == torch.float16 else None
334
- ).to(device)
335
- else:
336
- raise ValueError(f"SDXL model only supports: {list(SDXL_CONTROLNET_MODELS.keys())}")
337
- else:
338
- controlnet_model_name = get_controlnet_model(controlnet_type)
339
- controlnet = ControlNetModel.from_pretrained(
340
- controlnet_model_name,
341
- torch_dtype=dtype
342
  ).to(device)
343
-
344
  pipe = StableDiffusionControlNetPipeline.from_pretrained(
345
  model_name,
346
  controlnet=controlnet,
@@ -365,13 +359,11 @@ def get_pipeline(model_name: str, controlnet_type: str = "lineart", lora_model:
365
  if lora_model and lora_model != "None":
366
  print(f"🔄 Applying LoRA: {lora_model} with weight: {lora_weight}")
367
  try:
368
- # ตรวจสอบว่า lora_model อยู่ใน dictionary หรือไม่
369
  if lora_model in LORA_MODELS:
370
  lora_path = LORA_MODELS[lora_model]
371
  pipe.load_lora_weights(lora_path)
372
  pipe.fuse_lora(lora_scale=lora_weight)
373
  else:
374
- # ถ้าเป็น full path โดยตรง
375
  pipe.load_lora_weights(lora_model)
376
  pipe.fuse_lora(lora_scale=lora_weight)
377
  except Exception as e:
@@ -494,13 +486,11 @@ def load_t2i_model(model_name: str, lora_model: str = None, lora_weight: float =
494
  if lora_model and lora_model != "None":
495
  print(f"🔄 Applying LoRA: {lora_model} with weight: {lora_weight}")
496
  try:
497
- # ตรวจสอบว่า lora_model อยู่ใน dictionary หรือไม่
498
  if lora_model in LORA_MODELS:
499
  lora_path = LORA_MODELS[lora_model]
500
  CURRENT_T2I_PIPE.load_lora_weights(lora_path)
501
  CURRENT_T2I_PIPE.fuse_lora(lora_scale=lora_weight)
502
  else:
503
- # ถ้าเป็น full path โดยตรง
504
  CURRENT_T2I_PIPE.load_lora_weights(lora_model)
505
  CURRENT_T2I_PIPE.fuse_lora(lora_scale=lora_weight)
506
  except Exception as e:
@@ -537,21 +527,17 @@ def load_t2i_model(model_name: str, lora_model: str = None, lora_weight: float =
537
  CURRENT_T2I_MODEL = None
538
  raise
539
 
540
- def colorize(sketch, base_model, controlnet_type, lora_model, lora_weight, vae_model,
541
- prompt, negative_prompt, seed, steps, scale, cn_weight):
 
542
  try:
543
- if is_sdxl_model(base_model) and controlnet_type not in SDXL_CONTROLNET_MODELS:
544
  error_img = Image.new('RGB', (512, 512), color='red')
545
- error_msg_img = Image.new('RGB', (512, 512), color='yellow')
546
- from PIL import ImageDraw, ImageFont
547
- draw = ImageDraw.Draw(error_msg_img)
548
- try:
549
- font = ImageFont.truetype("arial.ttf", 20)
550
- except:
551
- font = ImageFont.load_default()
552
- draw.text((50, 200), f"SDXL model only supports:", fill="black", font=font)
553
- draw.text((50, 230), f"{', '.join(SDXL_CONTROLNET_MODELS.keys())}", fill="black", font=font)
554
- return error_img, error_msg_img
555
 
556
  pipe = get_pipeline(base_model, controlnet_type, lora_model, lora_weight, vae_model)
557
 
@@ -560,7 +546,7 @@ def colorize(sketch, base_model, controlnet_type, lora_model, lora_weight, vae_m
560
  status_msg += f" + {lora_model}"
561
  print(status_msg)
562
 
563
- condition_img = prepare_condition_image(sketch, controlnet_type)
564
 
565
  gen = torch.Generator(device=device).manual_seed(int(seed))
566
 
@@ -582,22 +568,117 @@ def colorize(sketch, base_model, controlnet_type, lora_model, lora_weight, vae_m
582
 
583
  return out, condition_img
584
  except Exception as e:
585
- print(f"❌ Error in colorize: {e}")
586
  error_img = Image.new('RGB', (512, 512), color='red')
587
  return error_img, Image.new('RGB', (512, 512), color='gray')
588
 
589
- def t2i(prompt, negative_prompt, model, lora_model, lora_weight, vae_model,
590
- seed, steps, scale, w, h, use_refiner=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
  try:
 
 
 
 
592
  model_to_load = model
593
  if use_refiner and "refiner" not in model.lower():
594
  model_to_load = "stabilityai/stable-diffusion-xl-refiner-1.0"
595
 
596
  load_t2i_model(model_to_load, lora_model, lora_weight, vae_model)
597
 
598
- print(f"🖼️ Using T2I model: {model}")
599
  if lora_model and lora_model != "None":
600
  print(f" with LoRA: {lora_model} (weight: {lora_weight})")
 
 
601
 
602
  gen = torch.Generator(device=device).manual_seed(int(seed))
603
 
@@ -623,35 +704,22 @@ def t2i(prompt, negative_prompt, model, lora_model, lora_weight, vae_model,
623
  generator=gen
624
  ).images[0]
625
  else:
626
- if is_sdxl_model(model):
627
- width = max(int(w), 512)
628
- height = max(int(h), 512)
629
- result = CURRENT_T2I_PIPE(
630
- prompt,
631
- negative_prompt=negative_prompt,
632
- width=width,
633
- height=height,
634
- num_inference_steps=int(steps),
635
- guidance_scale=float(scale),
636
- generator=gen
637
- ).images[0]
638
- else:
639
- result = CURRENT_T2I_PIPE(
640
- prompt,
641
- negative_prompt=negative_prompt,
642
- width=int(w),
643
- height=int(h),
644
- num_inference_steps=int(steps),
645
- guidance_scale=float(scale),
646
- generator=gen
647
- ).images[0]
648
 
649
  if device.type == "cuda":
650
  torch.cuda.empty_cache()
651
 
652
  return result
653
  except Exception as e:
654
- print(f"❌ Error in t2i: {e}")
655
  error_img = Image.new('RGB', (int(w), int(h)), color='red')
656
  from PIL import ImageDraw, ImageFont
657
  draw = ImageDraw.Draw(error_img)
@@ -727,129 +795,187 @@ with gr.Blocks(title="🎨 AI Image Generator Pro", theme=gr.themes.Soft()) as d
727
  status_text = gr.Textbox(label="Status", interactive=False, scale=3)
728
  unload_btn.click(unload_all_models, outputs=status_text)
729
 
730
- with gr.Tab("🎨 ControlNet Image-to-Image"):
731
  gr.Markdown("""
732
- ### Transform sketches/images using ControlNet
733
- - **SD1.5 Models:** Support all ControlNet types
734
- - **SDXL Models:** Support canny_sdxl, depth_sdxl, openpose_sdxl only
735
  """)
736
 
737
  with gr.Row():
738
  with gr.Column(scale=1):
739
- inp = gr.Image(label="Input Sketch/Image", type="pil")
740
 
741
  gr.Markdown("### Model Settings")
742
- base_model = gr.Dropdown(
743
- choices=ALL_MODELS,
744
  value="digiplay/ChikMix_V3",
745
- label="Base Model"
746
  )
747
- controlnet_type = gr.Dropdown(
748
- choices=list(CONTROLNET_MODELS.keys()) + list(SDXL_CONTROLNET_MODELS.keys()),
749
  value="lineart_anime",
750
  label="ControlNet Type"
751
  )
752
 
753
  gr.Markdown("### Enhancement Options")
754
  with gr.Row():
755
- lora_model = gr.Dropdown(
756
  choices=list(LORA_MODELS.keys()),
757
  value="None",
758
  label="LoRA Model"
759
  )
760
- lora_weight = gr.Slider(0.1, 2.0, 0.8, step=0.1, label="LoRA Weight")
761
 
762
- vae_model = gr.Dropdown(
763
- choices=list(VAE_MODELS.keys()),
764
  value="None",
765
  label="VAE Model (Optional)"
766
  )
767
 
768
  with gr.Column(scale=1):
769
- out = gr.Image(label="Generated Output")
770
- condition_out = gr.Image(label="Processed Condition", type="pil")
771
 
772
  gr.Markdown("### Generation Parameters")
773
  with gr.Row():
774
- prompt = gr.Textbox(
775
  label="Prompt",
776
  placeholder="masterpiece, best quality, 1girl, beautiful detailed eyes, long hair",
777
  lines=3
778
  )
779
- negative_prompt = gr.Textbox(
780
  label="Negative Prompt",
781
  placeholder="lowres, bad anatomy, bad hands, text, error, missing fingers",
782
  lines=3
783
  )
784
 
785
  with gr.Row():
786
- seed = gr.Number(value=-1, label="Seed (-1 for random)")
787
- steps = gr.Slider(10, 150, 30, step=1, label="Steps")
788
- scale = gr.Slider(1, 30, 7.5, step=0.5, label="CFG Scale")
789
- cn_weight = gr.Slider(0.1, 2.0, 1.0, step=0.1, label="ControlNet Weight")
790
-
791
- run = gr.Button("🎨 Generate", variant="primary", size="lg")
792
- run.click(
793
- colorize,
794
- [inp, base_model, controlnet_type, lora_model, lora_weight, vae_model,
795
- prompt, negative_prompt, seed, steps, scale, cn_weight],
796
- [out, condition_out]
797
- )
798
 
 
 
 
 
 
 
 
 
 
799
  gr.Markdown("""
800
- ### Tips for Better Results:
801
- - Use detailed prompts for better control
802
- - Adjust ControlNet weight to balance between condition and creativity
803
- - Try different LoRA models for various styles
804
- - Higher steps = better quality but slower generation
805
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
806
 
807
- with gr.Tab("🖼️ Text-to-Image Generation"):
808
  gr.Markdown("""
809
- ### Generate images from text descriptions
810
- Supports both SD1.5 and SDXL models with advanced features
 
811
  """)
812
 
813
  with gr.Row():
814
  with gr.Column(scale=1):
815
  gr.Markdown("### Model Configuration")
816
- t2i_model = gr.Dropdown(
817
- choices=ALL_MODELS,
818
  value="digiplay/ChikMix_V3",
819
- label="Base Model"
820
  )
821
 
822
  gr.Markdown("### Enhancement Options")
823
  with gr.Row():
824
- t2i_lora = gr.Dropdown(
825
  choices=list(LORA_MODELS.keys()),
826
  value="None",
827
  label="LoRA Model"
828
  )
829
- t2i_lora_weight = gr.Slider(0.1, 2.0, 0.8, step=0.1, label="LoRA Weight")
830
 
831
- t2i_vae = gr.Dropdown(
832
- choices=list(VAE_MODELS.keys()),
833
  value="None",
834
  label="VAE Model"
835
  )
836
-
837
- use_refiner = gr.Checkbox(
838
- label="Use SDXL Refiner (SDXL only)",
839
- value=False
840
- )
841
 
842
  with gr.Column(scale=1):
843
- t2i_out = gr.Image(label="Generated Image", type="pil")
844
 
845
  gr.Markdown("### Prompts")
846
  with gr.Row():
847
- t2i_prompt = gr.Textbox(
848
  label="Prompt",
849
  lines=4,
850
- placeholder="masterpiece, best quality, highly detailed, 8k, photorealistic, beautiful lighting"
851
  )
852
- t2i_negative_prompt = gr.Textbox(
853
  label="Negative Prompt",
854
  lines=4,
855
  placeholder="lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality"
@@ -857,116 +983,194 @@ with gr.Blocks(title="🎨 AI Image Generator Pro", theme=gr.themes.Soft()) as d
857
 
858
  gr.Markdown("### Generation Parameters")
859
  with gr.Row():
860
- t2i_seed = gr.Number(value=-1, label="Seed (-1 for random)")
861
- t2i_steps = gr.Slider(10, 150, 30, step=1, label="Steps")
862
- t2i_scale = gr.Slider(1, 30, 7.5, step=0.5, label="CFG Scale")
863
 
864
  with gr.Row():
865
- w = gr.Slider(256, 2048, 512, step=64, label="Width")
866
- h = gr.Slider(256, 2048, 768, step=64, label="Height")
867
-
868
- gen_btn = gr.Button("🖼️ Generate Image", variant="primary", size="lg")
869
- gen_btn.click(
870
- t2i,
871
- [t2i_prompt, t2i_negative_prompt, t2i_model, t2i_lora, t2i_lora_weight,
872
- t2i_vae, t2i_seed, t2i_steps, t2i_scale, w, h, use_refiner],
873
- t2i_out
874
- )
875
 
 
 
 
 
 
 
 
 
 
876
  gr.Markdown("""
877
- ### Pro Tips:
878
- - **SDXL models** produce higher quality at 1024x1024
879
- - **SD1.5 models** work best at 512x512 or 512x768
880
- - Use **LoRA** for specific styles (anime, realistic, etc.)
881
- - Use **VAE** for better colors and details
882
- - **Refiner** adds extra polish to SDXL generations
883
- - Higher **CFG Scale** = more prompt adherence
884
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885
 
886
  with gr.Tab("📚 Quick Reference"):
887
  gr.Markdown("""
888
  # Model & Feature Guide
889
 
890
- ## 🎯 Recommended Models for Different Purposes
891
-
892
- ### Realistic/Photorealistic
893
- - `emilianJR/epiCRealism` - Excellent for realistic portraits
894
- - `stablediffusionapi/realistic-vision-v51` - High quality realistic images
895
- - `digiplay/majicMIX_realistic_v7` - Great for realistic characters
896
- - `SG161222/RealVisXL_V4.0` - SDXL realistic model
897
-
898
- ### Anime/Cartoon
899
- - `digiplay/ChikMix_V3` - Versatile anime style
900
- - `gsdf/Counterfeit-V2.5` - High quality anime
901
- - `stablediffusionapi/anything-v5` - Popular anime model
902
- - `digiplay/Pony_Diffusion_V6_XL` - SDXL anime model
903
-
904
- ### Artistic/Stylized
905
- - `stablediffusionapi/dreamshaper-v8` - Dream-like artistic style
906
- - `wavymulder/Analog-Diffusion` - Analog photo aesthetic
907
- - `Lykon/dreamshaper-xl-1-0` - SDXL artistic model
908
-
909
- ## 🎨 ControlNet Types Explained
910
-
911
- - **lineart/lineart_anime**: Convert line drawings to colored images
912
- - **canny**: Edge detection based generation
913
- - **depth**: Depth map based generation
914
- - **openpose**: Human pose based generation
915
- - **normal**: Normal map based generation
916
- - **softedge**: Soft edge detection
917
  - **scribble**: Scribble to image
918
- - **tile**: Upscaling and detail enhancement
 
 
 
 
 
919
 
920
- ## 💎 Popular LoRA Combinations
921
 
922
- ### For Portraits
923
  - Base: `digiplay/majicMIX_realistic_v7`
924
- - LoRA: `detail-tweaker` or `face-detail`
925
  - VAE: `SD1.5 VAE`
 
 
926
 
927
- ### For Anime Characters
928
  - Base: `digiplay/ChikMix_V3`
929
- - LoRA: `anime-art` or `manga-style`
930
  - VAE: `Anime VAE`
 
 
 
 
 
 
 
 
 
 
931
 
932
- ### For NSFW Content
933
- - Base: Any NSFW-capable model
934
- - LoRA: `nsfw-master`, `realistic-nsfw`, or `anime-nsfw`
935
- - Note: Always use responsibly and legally
 
 
936
 
937
  ## ⚙️ Parameter Guidelines
938
 
939
  ### Steps
940
- - **20-30**: Fast, good quality
941
- - **30-50**: Balanced
942
- - **50-100**: High quality, slow
943
 
944
  ### CFG Scale
945
- - **5-7**: Creative, loose interpretation
946
- - **7-10**: Balanced
947
- - **10-15**: Strict prompt adherence
948
- - **15+**: Very strict, may oversaturate
949
 
950
  ### Resolution
951
- - **SD1.5**: 512x512, 512x768, 768x512
952
- - **SDXL**: 1024x1024, 1024x1536, 1536x1024
953
 
954
- ## 🔞 NSFW Generation Guidelines
955
 
956
- 1. Use NSFW-capable base models
957
- 2. Apply relevant LoRA for style enhancement
958
- 3. Use detailed prompts
959
- 4. Adjust CFG scale (7-12 recommended)
960
- 5. Consider using higher steps (40-60)
961
- 6. **Always comply with local laws and regulations**
962
 
963
- ## 🚀 Performance Tips
 
 
 
964
 
965
- - Unload models when switching between different types
966
- - Use lower resolutions for testing
967
- - Enable xFormers if available (automatic)
968
- - Use appropriate batch sizes for your GPU
969
- - Monitor GPU memory usage
970
  """)
971
 
972
  try:
 
124
  "AI-ModelScope/stable-diffusion-v1-5-chinese"
125
  ]
126
 
127
+ # ControlNet models for SD1.5
128
+ CONTROLNET_MODELS_SD15 = {
 
 
129
  "lineart": "lllyasviel/control_v11p_sd15_lineart",
130
  "lineart_anime": "lllyasviel/control_v11p_sd15s2_lineart_anime",
131
  "canny": "lllyasviel/control_v11p_sd15_canny",
 
140
  "tile": "lllyasviel/control_v11f1e_sd15_tile"
141
  }
142
 
143
+ # ControlNet models for SDXL
144
+ CONTROLNET_MODELS_SDXL = {
145
  "canny_sdxl": "diffusers/controlnet-canny-sdxl-1.0",
146
  "depth_sdxl": "diffusers/controlnet-depth-sdxl-1.0",
147
  "openpose_sdxl": "thibaud/controlnet-openpose-sdxl-1.0"
 
247
  print(f"❌ Error loading {detector_type} detector: {e}")
248
  return None
249
 
250
+ def get_controlnet_model(controlnet_type: str, is_sdxl: bool = False):
251
  """Get ControlNet model based on type"""
252
+ if is_sdxl:
253
+ return CONTROLNET_MODELS_SDXL[controlnet_type]
 
 
254
  else:
255
+ return CONTROLNET_MODELS_SD15[controlnet_type]
256
 
257
+ def prepare_condition_image(image, controlnet_type, is_sdxl=False):
258
  """Prepare condition image for ControlNet"""
259
  if controlnet_type in ["lineart", "lineart_anime"]:
260
  detector = load_detector("lineart_anime" if controlnet_type == "lineart_anime" else "lineart")
261
  if detector:
262
+ result = detector(image, detect_resolution=512 if not is_sdxl else 1024, image_resolution=512 if not is_sdxl else 1024)
263
  return Image.fromarray(result) if isinstance(result, np.ndarray) else result
264
 
265
+ elif "canny" in controlnet_type:
266
  detector = load_detector("canny")
267
  if detector:
268
+ result = detector(image, detect_resolution=512 if not is_sdxl else 1024, image_resolution=512 if not is_sdxl else 1024)
269
  return Image.fromarray(result) if isinstance(result, np.ndarray) else result
270
 
271
+ elif "depth" in controlnet_type:
272
  detector = load_detector("depth")
273
  if detector:
274
+ result = detector(image, detect_resolution=512 if not is_sdxl else 1024, image_resolution=512 if not is_sdxl else 1024)
275
  return Image.fromarray(result) if isinstance(result, np.ndarray) else result
276
 
277
  elif controlnet_type == "normal":
278
  detector = load_detector("normal")
279
  if detector:
280
+ result = detector(image, detect_resolution=512 if not is_sdxl else 1024, image_resolution=512 if not is_sdxl else 1024)
281
  return Image.fromarray(result) if isinstance(result, np.ndarray) else result
282
 
283
+ elif "openpose" in controlnet_type:
284
  detector = load_detector("openpose")
285
  if detector:
286
+ result = detector(image, detect_resolution=512 if not is_sdxl else 1024, image_resolution=512 if not is_sdxl else 1024)
287
  return Image.fromarray(result) if isinstance(result, np.ndarray) else result
288
 
289
  return image
 
311
  print(f"📥 Loading ControlNet pipeline for model: {model_name}, type: {controlnet_type}")
312
 
313
  try:
314
+ is_sdxl = is_sdxl_model(model_name)
315
+
316
+ if is_sdxl and controlnet_type not in CONTROLNET_MODELS_SDXL:
317
+ raise ValueError(f"SDXL model only supports: {list(CONTROLNET_MODELS_SDXL.keys())}")
318
+ elif not is_sdxl and controlnet_type not in CONTROLNET_MODELS_SD15:
319
+ raise ValueError(f"SD1.5 model only supports: {list(CONTROLNET_MODELS_SD15.keys())}")
320
+
321
+ controlnet_model_name = get_controlnet_model(controlnet_type, is_sdxl)
322
+ controlnet = ControlNetModel.from_pretrained(
323
+ controlnet_model_name,
324
+ torch_dtype=dtype
325
+ ).to(device)
326
+
327
+ if is_sdxl:
328
+ pipe = StableDiffusionXLPipeline.from_pretrained(
329
+ model_name,
330
+ controlnet=controlnet,
331
+ torch_dtype=dtype,
332
+ safety_checker=None,
333
+ requires_safety_checker=False,
334
+ use_safetensors=True,
335
+ variant="fp16" if dtype == torch.float16 else None
 
 
336
  ).to(device)
337
+ else:
338
  pipe = StableDiffusionControlNetPipeline.from_pretrained(
339
  model_name,
340
  controlnet=controlnet,
 
359
  if lora_model and lora_model != "None":
360
  print(f"🔄 Applying LoRA: {lora_model} with weight: {lora_weight}")
361
  try:
 
362
  if lora_model in LORA_MODELS:
363
  lora_path = LORA_MODELS[lora_model]
364
  pipe.load_lora_weights(lora_path)
365
  pipe.fuse_lora(lora_scale=lora_weight)
366
  else:
 
367
  pipe.load_lora_weights(lora_model)
368
  pipe.fuse_lora(lora_scale=lora_weight)
369
  except Exception as e:
 
486
  if lora_model and lora_model != "None":
487
  print(f"🔄 Applying LoRA: {lora_model} with weight: {lora_weight}")
488
  try:
 
489
  if lora_model in LORA_MODELS:
490
  lora_path = LORA_MODELS[lora_model]
491
  CURRENT_T2I_PIPE.load_lora_weights(lora_path)
492
  CURRENT_T2I_PIPE.fuse_lora(lora_scale=lora_weight)
493
  else:
 
494
  CURRENT_T2I_PIPE.load_lora_weights(lora_model)
495
  CURRENT_T2I_PIPE.fuse_lora(lora_scale=lora_weight)
496
  except Exception as e:
 
527
  CURRENT_T2I_MODEL = None
528
  raise
529
 
530
+ def colorize_sd15(sketch, base_model, controlnet_type, lora_model, lora_weight, vae_model,
531
+ prompt, negative_prompt, seed, steps, scale, cn_weight):
532
+ """Colorize function for SD1.5 models"""
533
  try:
534
+ if base_model not in SD15_MODELS:
535
  error_img = Image.new('RGB', (512, 512), color='red')
536
+ return error_img, Image.new('RGB', (512, 512), color='gray')
537
+
538
+ if controlnet_type not in CONTROLNET_MODELS_SD15:
539
+ error_img = Image.new('RGB', (512, 512), color='red')
540
+ return error_img, Image.new('RGB', (512, 512), color='gray')
 
 
 
 
 
541
 
542
  pipe = get_pipeline(base_model, controlnet_type, lora_model, lora_weight, vae_model)
543
 
 
546
  status_msg += f" + {lora_model}"
547
  print(status_msg)
548
 
549
+ condition_img = prepare_condition_image(sketch, controlnet_type, is_sdxl=False)
550
 
551
  gen = torch.Generator(device=device).manual_seed(int(seed))
552
 
 
568
 
569
  return out, condition_img
570
  except Exception as e:
571
+ print(f"❌ Error in colorize_sd15: {e}")
572
  error_img = Image.new('RGB', (512, 512), color='red')
573
  return error_img, Image.new('RGB', (512, 512), color='gray')
574
 
575
+ def colorize_sdxl(sketch, base_model, controlnet_type, lora_model, lora_weight, vae_model,
576
+ prompt, negative_prompt, seed, steps, scale, cn_weight):
577
+ """Colorize function for SDXL models"""
578
+ try:
579
+ if base_model not in SDXL_MODELS:
580
+ error_img = Image.new('RGB', (1024, 1024), color='red')
581
+ return error_img, Image.new('RGB', (1024, 1024), color='gray')
582
+
583
+ if controlnet_type not in CONTROLNET_MODELS_SDXL:
584
+ error_img = Image.new('RGB', (1024, 1024), color='red')
585
+ return error_img, Image.new('RGB', (1024, 1024), color='gray')
586
+
587
+ pipe = get_pipeline(base_model, controlnet_type, lora_model, lora_weight, vae_model)
588
+
589
+ status_msg = f"🎨 Using: {base_model} + {controlnet_type}"
590
+ if lora_model and lora_model != "None":
591
+ status_msg += f" + {lora_model}"
592
+ print(status_msg)
593
+
594
+ condition_img = prepare_condition_image(sketch, controlnet_type, is_sdxl=True)
595
+
596
+ gen = torch.Generator(device=device).manual_seed(int(seed))
597
+
598
+ with torch.inference_mode():
599
+ out = pipe(
600
+ prompt,
601
+ negative_prompt=negative_prompt,
602
+ image=condition_img,
603
+ num_inference_steps=int(steps),
604
+ guidance_scale=float(scale),
605
+ controlnet_conditioning_scale=float(cn_weight),
606
+ generator=gen,
607
+ height=1024,
608
+ width=1024
609
+ ).images[0]
610
+
611
+ if device.type == "cuda":
612
+ torch.cuda.empty_cache()
613
+
614
+ return out, condition_img
615
+ except Exception as e:
616
+ print(f"❌ Error in colorize_sdxl: {e}")
617
+ error_img = Image.new('RGB', (1024, 1024), color='red')
618
+ return error_img, Image.new('RGB', (1024, 1024), color='gray')
619
+
620
+ def t2i_sd15(prompt, negative_prompt, model, lora_model, lora_weight, vae_model,
621
+ seed, steps, scale, w, h):
622
+ """Text-to-image for SD1.5 models"""
623
+ try:
624
+ if model not in SD15_MODELS:
625
+ error_img = Image.new('RGB', (int(w), int(h)), color='red')
626
+ return error_img
627
+
628
+ load_t2i_model(model, lora_model, lora_weight, vae_model)
629
+
630
+ print(f"🖼️ Using SD1.5 model: {model}")
631
+ if lora_model and lora_model != "None":
632
+ print(f" with LoRA: {lora_model} (weight: {lora_weight})")
633
+
634
+ gen = torch.Generator(device=device).manual_seed(int(seed))
635
+
636
+ with torch.inference_mode():
637
+ result = CURRENT_T2I_PIPE(
638
+ prompt,
639
+ negative_prompt=negative_prompt,
640
+ width=int(w),
641
+ height=int(h),
642
+ num_inference_steps=int(steps),
643
+ guidance_scale=float(scale),
644
+ generator=gen
645
+ ).images[0]
646
+
647
+ if device.type == "cuda":
648
+ torch.cuda.empty_cache()
649
+
650
+ return result
651
+ except Exception as e:
652
+ print(f"❌ Error in t2i_sd15: {e}")
653
+ error_img = Image.new('RGB', (int(w), int(h)), color='red')
654
+ from PIL import ImageDraw, ImageFont
655
+ draw = ImageDraw.Draw(error_img)
656
+ try:
657
+ font = ImageFont.truetype("arial.ttf", 20)
658
+ except:
659
+ font = ImageFont.load_default()
660
+ draw.text((50, 50), f"Error: {str(e)[:50]}...", fill="white", font=font)
661
+ return error_img
662
+
663
+ def t2i_sdxl(prompt, negative_prompt, model, lora_model, lora_weight, vae_model,
664
+ seed, steps, scale, w, h, use_refiner=False):
665
+ """Text-to-image for SDXL models"""
666
  try:
667
+ if model not in SDXL_MODELS:
668
+ error_img = Image.new('RGB', (int(w), int(h)), color='red')
669
+ return error_img
670
+
671
  model_to_load = model
672
  if use_refiner and "refiner" not in model.lower():
673
  model_to_load = "stabilityai/stable-diffusion-xl-refiner-1.0"
674
 
675
  load_t2i_model(model_to_load, lora_model, lora_weight, vae_model)
676
 
677
+ print(f"🖼️ Using SDXL model: {model}")
678
  if lora_model and lora_model != "None":
679
  print(f" with LoRA: {lora_model} (weight: {lora_weight})")
680
+ if use_refiner:
681
+ print(f" with refiner")
682
 
683
  gen = torch.Generator(device=device).manual_seed(int(seed))
684
 
 
704
  generator=gen
705
  ).images[0]
706
  else:
707
+ result = CURRENT_T2I_PIPE(
708
+ prompt,
709
+ negative_prompt=negative_prompt,
710
+ width=int(w),
711
+ height=int(h),
712
+ num_inference_steps=int(steps),
713
+ guidance_scale=float(scale),
714
+ generator=gen
715
+ ).images[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
716
 
717
  if device.type == "cuda":
718
  torch.cuda.empty_cache()
719
 
720
  return result
721
  except Exception as e:
722
+ print(f"❌ Error in t2i_sdxl: {e}")
723
  error_img = Image.new('RGB', (int(w), int(h)), color='red')
724
  from PIL import ImageDraw, ImageFont
725
  draw = ImageDraw.Draw(error_img)
 
795
  status_text = gr.Textbox(label="Status", interactive=False, scale=3)
796
  unload_btn.click(unload_all_models, outputs=status_text)
797
 
798
+ with gr.Tab("🎨 SD1.5 ControlNet"):
799
  gr.Markdown("""
800
+ ### Transform sketches/images using SD1.5 with ControlNet
801
+ - **Supports:** lineart, lineart_anime, canny, depth, normal, openpose, softedge, segmentation, mlsd, shuffle, scribble, tile
802
+ - **Best Resolution:** 512x512
803
  """)
804
 
805
  with gr.Row():
806
  with gr.Column(scale=1):
807
+ inp_sd15 = gr.Image(label="Input Sketch/Image", type="pil")
808
 
809
  gr.Markdown("### Model Settings")
810
+ base_model_sd15 = gr.Dropdown(
811
+ choices=SD15_MODELS,
812
  value="digiplay/ChikMix_V3",
813
+ label="SD1.5 Base Model"
814
  )
815
+ controlnet_type_sd15 = gr.Dropdown(
816
+ choices=list(CONTROLNET_MODELS_SD15.keys()),
817
  value="lineart_anime",
818
  label="ControlNet Type"
819
  )
820
 
821
  gr.Markdown("### Enhancement Options")
822
  with gr.Row():
823
+ lora_model_sd15 = gr.Dropdown(
824
  choices=list(LORA_MODELS.keys()),
825
  value="None",
826
  label="LoRA Model"
827
  )
828
+ lora_weight_sd15 = gr.Slider(0.1, 2.0, 0.8, step=0.1, label="LoRA Weight")
829
 
830
+ vae_model_sd15 = gr.Dropdown(
831
+ choices=["None", "SD1.5 VAE", "Anime VAE"],
832
  value="None",
833
  label="VAE Model (Optional)"
834
  )
835
 
836
  with gr.Column(scale=1):
837
+ out_sd15 = gr.Image(label="Generated Output")
838
+ condition_out_sd15 = gr.Image(label="Processed Condition", type="pil")
839
 
840
  gr.Markdown("### Generation Parameters")
841
  with gr.Row():
842
+ prompt_sd15 = gr.Textbox(
843
  label="Prompt",
844
  placeholder="masterpiece, best quality, 1girl, beautiful detailed eyes, long hair",
845
  lines=3
846
  )
847
+ negative_prompt_sd15 = gr.Textbox(
848
  label="Negative Prompt",
849
  placeholder="lowres, bad anatomy, bad hands, text, error, missing fingers",
850
  lines=3
851
  )
852
 
853
  with gr.Row():
854
+ seed_sd15 = gr.Number(value=-1, label="Seed (-1 for random)")
855
+ steps_sd15 = gr.Slider(10, 100, 30, step=1, label="Steps")
856
+ scale_sd15 = gr.Slider(1, 30, 7.5, step=0.5, label="CFG Scale")
857
+ cn_weight_sd15 = gr.Slider(0.1, 2.0, 1.0, step=0.1, label="ControlNet Weight")
 
 
 
 
 
 
 
 
858
 
859
+ run_sd15 = gr.Button("🎨 Generate (SD1.5)", variant="primary", size="lg")
860
+ run_sd15.click(
861
+ colorize_sd15,
862
+ [inp_sd15, base_model_sd15, controlnet_type_sd15, lora_model_sd15, lora_weight_sd15, vae_model_sd15,
863
+ prompt_sd15, negative_prompt_sd15, seed_sd15, steps_sd15, scale_sd15, cn_weight_sd15],
864
+ [out_sd15, condition_out_sd15]
865
+ )
866
+
867
+ with gr.Tab("🎨 SDXL ControlNet"):
868
  gr.Markdown("""
869
+ ### Transform sketches/images using SDXL with ControlNet
870
+ - **Supports:** canny_sdxl, depth_sdxl, openpose_sdxl
871
+ - **Best Resolution:** 1024x1024
872
+ - **Higher quality, more VRAM required**
 
873
  """)
874
+
875
+ with gr.Row():
876
+ with gr.Column(scale=1):
877
+ inp_sdxl = gr.Image(label="Input Sketch/Image", type="pil")
878
+
879
+ gr.Markdown("### Model Settings")
880
+ base_model_sdxl = gr.Dropdown(
881
+ choices=SDXL_MODELS,
882
+ value="stabilityai/stable-diffusion-xl-base-1.0",
883
+ label="SDXL Base Model"
884
+ )
885
+ controlnet_type_sdxl = gr.Dropdown(
886
+ choices=list(CONTROLNET_MODELS_SDXL.keys()),
887
+ value="canny_sdxl",
888
+ label="ControlNet Type"
889
+ )
890
+
891
+ gr.Markdown("### Enhancement Options")
892
+ with gr.Row():
893
+ lora_model_sdxl = gr.Dropdown(
894
+ choices=list(LORA_MODELS.keys()),
895
+ value="None",
896
+ label="LoRA Model"
897
+ )
898
+ lora_weight_sdxl = gr.Slider(0.1, 2.0, 0.8, step=0.1, label="LoRA Weight")
899
+
900
+ vae_model_sdxl = gr.Dropdown(
901
+ choices=["None", "SDXL VAE"],
902
+ value="None",
903
+ label="VAE Model (Optional)"
904
+ )
905
+
906
+ with gr.Column(scale=1):
907
+ out_sdxl = gr.Image(label="Generated Output")
908
+ condition_out_sdxl = gr.Image(label="Processed Condition", type="pil")
909
+
910
+ gr.Markdown("### Generation Parameters")
911
+ with gr.Row():
912
+ prompt_sdxl = gr.Textbox(
913
+ label="Prompt",
914
+ placeholder="masterpiece, best quality, 8k, ultra-detailed, photorealistic, beautiful lighting",
915
+ lines=3
916
+ )
917
+ negative_prompt_sdxl = gr.Textbox(
918
+ label="Negative Prompt",
919
+ placeholder="lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits",
920
+ lines=3
921
+ )
922
+
923
+ with gr.Row():
924
+ seed_sdxl = gr.Number(value=-1, label="Seed (-1 for random)")
925
+ steps_sdxl = gr.Slider(10, 100, 30, step=1, label="Steps")
926
+ scale_sdxl = gr.Slider(1, 30, 7.5, step=0.5, label="CFG Scale")
927
+ cn_weight_sdxl = gr.Slider(0.1, 2.0, 1.0, step=0.1, label="ControlNet Weight")
928
+
929
+ run_sdxl = gr.Button("🎨 Generate (SDXL)", variant="primary", size="lg")
930
+ run_sdxl.click(
931
+ colorize_sdxl,
932
+ [inp_sdxl, base_model_sdxl, controlnet_type_sdxl, lora_model_sdxl, lora_weight_sdxl, vae_model_sdxl,
933
+ prompt_sdxl, negative_prompt_sdxl, seed_sdxl, steps_sdxl, scale_sdxl, cn_weight_sdxl],
934
+ [out_sdxl, condition_out_sdxl]
935
+ )
936
 
937
+ with gr.Tab("🖼️ SD1.5 Text-to-Image"):
938
  gr.Markdown("""
939
+ ### Generate images from text descriptions using SD1.5
940
+ - **Best Resolution:** 512x512, 512x768, 768x512
941
+ - **Faster generation, lower VRAM usage**
942
  """)
943
 
944
  with gr.Row():
945
  with gr.Column(scale=1):
946
  gr.Markdown("### Model Configuration")
947
+ t2i_model_sd15 = gr.Dropdown(
948
+ choices=SD15_MODELS,
949
  value="digiplay/ChikMix_V3",
950
+ label="SD1.5 Base Model"
951
  )
952
 
953
  gr.Markdown("### Enhancement Options")
954
  with gr.Row():
955
+ t2i_lora_sd15 = gr.Dropdown(
956
  choices=list(LORA_MODELS.keys()),
957
  value="None",
958
  label="LoRA Model"
959
  )
960
+ t2i_lora_weight_sd15 = gr.Slider(0.1, 2.0, 0.8, step=0.1, label="LoRA Weight")
961
 
962
+ t2i_vae_sd15 = gr.Dropdown(
963
+ choices=["None", "SD1.5 VAE", "Anime VAE"],
964
  value="None",
965
  label="VAE Model"
966
  )
 
 
 
 
 
967
 
968
  with gr.Column(scale=1):
969
+ t2i_out_sd15 = gr.Image(label="Generated Image", type="pil")
970
 
971
  gr.Markdown("### Prompts")
972
  with gr.Row():
973
+ t2i_prompt_sd15 = gr.Textbox(
974
  label="Prompt",
975
  lines=4,
976
+ placeholder="masterpiece, best quality, highly detailed, beautiful, 1girl"
977
  )
978
+ t2i_negative_prompt_sd15 = gr.Textbox(
979
  label="Negative Prompt",
980
  lines=4,
981
  placeholder="lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality"
 
983
 
984
  gr.Markdown("### Generation Parameters")
985
  with gr.Row():
986
+ t2i_seed_sd15 = gr.Number(value=-1, label="Seed (-1 for random)")
987
+ t2i_steps_sd15 = gr.Slider(10, 100, 30, step=1, label="Steps")
988
+ t2i_scale_sd15 = gr.Slider(1, 30, 7.5, step=0.5, label="CFG Scale")
989
 
990
  with gr.Row():
991
+ w_sd15 = gr.Slider(256, 1024, 512, step=64, label="Width")
992
+ h_sd15 = gr.Slider(256, 1024, 768, step=64, label="Height")
 
 
 
 
 
 
 
 
993
 
994
+ gen_btn_sd15 = gr.Button("🖼️ Generate (SD1.5)", variant="primary", size="lg")
995
+ gen_btn_sd15.click(
996
+ t2i_sd15,
997
+ [t2i_prompt_sd15, t2i_negative_prompt_sd15, t2i_model_sd15, t2i_lora_sd15, t2i_lora_weight_sd15,
998
+ t2i_vae_sd15, t2i_seed_sd15, t2i_steps_sd15, t2i_scale_sd15, w_sd15, h_sd15],
999
+ t2i_out_sd15
1000
+ )
1001
+
1002
+ with gr.Tab("🖼️ SDXL Text-to-Image"):
1003
  gr.Markdown("""
1004
+ ### Generate images from text descriptions using SDXL
1005
+ - **Best Resolution:** 1024x1024, 1024x1536, 1536x1024
1006
+ - **Higher quality, more detail, better composition**
 
 
 
 
1007
  """)
1008
+
1009
+ with gr.Row():
1010
+ with gr.Column(scale=1):
1011
+ gr.Markdown("### Model Configuration")
1012
+ t2i_model_sdxl = gr.Dropdown(
1013
+ choices=SDXL_MODELS,
1014
+ value="stabilityai/stable-diffusion-xl-base-1.0",
1015
+ label="SDXL Base Model"
1016
+ )
1017
+
1018
+ gr.Markdown("### Enhancement Options")
1019
+ with gr.Row():
1020
+ t2i_lora_sdxl = gr.Dropdown(
1021
+ choices=list(LORA_MODELS.keys()),
1022
+ value="None",
1023
+ label="LoRA Model"
1024
+ )
1025
+ t2i_lora_weight_sdxl = gr.Slider(0.1, 2.0, 0.8, step=0.1, label="LoRA Weight")
1026
+
1027
+ t2i_vae_sdxl = gr.Dropdown(
1028
+ choices=["None", "SDXL VAE"],
1029
+ value="None",
1030
+ label="VAE Model"
1031
+ )
1032
+
1033
+ use_refiner_sdxl = gr.Checkbox(
1034
+ label="Use Refiner (for better quality)",
1035
+ value=False
1036
+ )
1037
+
1038
+ with gr.Column(scale=1):
1039
+ t2i_out_sdxl = gr.Image(label="Generated Image", type="pil")
1040
+
1041
+ gr.Markdown("### Prompts")
1042
+ with gr.Row():
1043
+ t2i_prompt_sdxl = gr.Textbox(
1044
+ label="Prompt",
1045
+ lines=4,
1046
+ placeholder="masterpiece, best quality, 8k, ultra-detailed, photorealistic, cinematic lighting"
1047
+ )
1048
+ t2i_negative_prompt_sdxl = gr.Textbox(
1049
+ label="Negative Prompt",
1050
+ lines=4,
1051
+ placeholder="lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, jpeg artifacts, signature, watermark, username, blurry"
1052
+ )
1053
+
1054
+ gr.Markdown("### Generation Parameters")
1055
+ with gr.Row():
1056
+ t2i_seed_sdxl = gr.Number(value=-1, label="Seed (-1 for random)")
1057
+ t2i_steps_sdxl = gr.Slider(10, 100, 30, step=1, label="Steps")
1058
+ t2i_scale_sdxl = gr.Slider(1, 30, 7.5, step=0.5, label="CFG Scale")
1059
+
1060
+ with gr.Row():
1061
+ w_sdxl = gr.Slider(512, 2048, 1024, step=64, label="Width")
1062
+ h_sdxl = gr.Slider(512, 2048, 1024, step=64, label="Height")
1063
+
1064
+ gen_btn_sdxl = gr.Button("🖼️ Generate (SDXL)", variant="primary", size="lg")
1065
+ gen_btn_sdxl.click(
1066
+ t2i_sdxl,
1067
+ [t2i_prompt_sdxl, t2i_negative_prompt_sdxl, t2i_model_sdxl, t2i_lora_sdxl, t2i_lora_weight_sdxl,
1068
+ t2i_vae_sdxl, t2i_seed_sdxl, t2i_steps_sdxl, t2i_scale_sdxl, w_sdxl, h_sdxl, use_refiner_sdxl],
1069
+ t2i_out_sdxl
1070
+ )
1071
 
1072
  with gr.Tab("📚 Quick Reference"):
1073
  gr.Markdown("""
1074
  # Model & Feature Guide
1075
 
1076
+ ## 🎯 SD1.5 vs SDXL Comparison
1077
+
1078
+ ### SD1.5 (Stable Diffusion 1.5)
1079
+ - **Pros:** Faster, lower VRAM usage, many specialized models
1080
+ - **Cons:** Lower quality, less detail
1081
+ - **Best for:** Quick generations, testing, lower-end hardware
1082
+ - **Resolution:** 512x512 optimal
1083
+
1084
+ ### SDXL (Stable Diffusion XL)
1085
+ - **Pros:** Higher quality, better composition, more detail
1086
+ - **Cons:** Slower, higher VRAM usage
1087
+ - **Best for:** Final quality images, professional work
1088
+ - **Resolution:** 1024x1024 optimal
1089
+
1090
+ ## 🎨 ControlNet Types
1091
+
1092
+ ### SD1.5 ControlNet (12 types)
1093
+ - **lineart/lineart_anime**: Line art to image
1094
+ - **canny**: Edge detection
1095
+ - **depth**: Depth map based
1096
+ - **openpose**: Human pose
1097
+ - **normal**: Normal map
1098
+ - **softedge**: Soft edges
1099
+ - **segmentation**: Semantic segmentation
1100
+ - **mlsd**: Straight line detection
1101
+ - **shuffle**: Color shuffle
 
1102
  - **scribble**: Scribble to image
1103
+ - **tile**: Upscaling
1104
+
1105
+ ### SDXL ControlNet (3 types)
1106
+ - **canny_sdxl**: Edge detection
1107
+ - **depth_sdxl**: Depth map
1108
+ - **openpose_sdxl**: Human pose
1109
 
1110
+ ## 💎 Recommended Model Combinations
1111
 
1112
+ ### SD1.5 - Realistic Portraits
1113
  - Base: `digiplay/majicMIX_realistic_v7`
1114
+ - LoRA: `detail-tweaker`
1115
  - VAE: `SD1.5 VAE`
1116
+ - Steps: 30-40
1117
+ - CFG: 7-9
1118
 
1119
+ ### SD1.5 - Anime Style
1120
  - Base: `digiplay/ChikMix_V3`
1121
+ - LoRA: `anime-art`
1122
  - VAE: `Anime VAE`
1123
+ - Steps: 25-35
1124
+ - CFG: 6-8
1125
+
1126
+ ### SDXL - Photorealistic
1127
+ - Base: `SG161222/RealVisXL_V4.0`
1128
+ - LoRA: `photorealistic`
1129
+ - VAE: `SDXL VAE`
1130
+ - Use Refiner: Yes
1131
+ - Steps: 35-45
1132
+ - CFG: 7-10
1133
 
1134
+ ### SDXL - Artistic
1135
+ - Base: `Lykon/dreamshaper-xl-1-0`
1136
+ - LoRA: `watercolor-style`
1137
+ - VAE: `SDXL VAE`
1138
+ - Steps: 30-40
1139
+ - CFG: 6-8
1140
 
1141
  ## ⚙️ Parameter Guidelines
1142
 
1143
  ### Steps
1144
+ - **SD1.5:** 20-35 (fast), 35-50 (quality)
1145
+ - **SDXL:** 30-45 (fast), 45-60 (quality)
 
1146
 
1147
  ### CFG Scale
1148
+ - **4-6:** Creative, loose interpretation
1149
+ - **6-9:** Balanced (recommended)
1150
+ - **9-12:** Strict prompt adherence
1151
+ - **12+:** Very strict
1152
 
1153
  ### Resolution
1154
+ - **SD1.5:** 512x512, 512x768, 768x512 (max 1024x1024)
1155
+ - **SDXL:** 1024x1024, 1024x1536, 1536x1024 (max 2048x2048)
1156
 
1157
+ ## 🚀 Performance Tips
1158
 
1159
+ ### For Low VRAM (<8GB)
1160
+ - Use SD1.5 models only
1161
+ - Enable attention slicing
1162
+ - Use lower resolutions (512x512)
1163
+ - Reduce steps (20-30)
 
1164
 
1165
+ ### For Medium VRAM (8-12GB)
1166
+ - Can use SD1.5 and some SDXL
1167
+ - For SDXL, use 1024x1024
1168
+ - Enable xFormers
1169
 
1170
+ ### For High VRAM (12GB+)
1171
+ - Can use all models
1172
+ - SDXL with refiner
1173
+ - Higher resolutions
 
1174
  """)
1175
 
1176
  try: