Desm0nt
/

Phi-3-HornyVision-128k-instruct

+import base64
+import requests
+import os
+from openai import OpenAI
+from tqdm import tqdm
+import time
+import sys
+# Проверка наличия аргумента командной строки
+if len(sys.argv) < 2:
+    print("Please, provide the path to image folder.")
+    sys.exit(1)
+# Get the path to image dir from command line.
+image_dir = sys.argv[1]
+openai_api_key = "EMPTY"
+openai_api_base = "http://localhost:8000/v1"
+client = OpenAI(
+    api_key=openai_api_key,
+    base_url=openai_api_base,
+)
+model_type = client.models.list().data[0].id
+print(f'model_type: {model_type}')
+# Function to encode the image
+def encode_image(image_path):
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode('utf-8')
+# Directories
+#dir with tags captions from wd tagger
+txt_dir = './txt/'
+#dir with result captions
+maintxt_dir = './maintxt/'
+image_path =''
+# Ensure the output directory exists
+os.makedirs(maintxt_dir, exist_ok=True)
+# Get list of all JPEG images in the directory
+image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg'))]
+total_files = len(image_files)
+start_time = time.time()
+progress_bar = tqdm(total=total_files, unit='file', bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]')
+total_elapsed_time = 0
+processed_files = 0
+# Process all images in the image directory
+for image_file in image_files:
+    image_path = os.path.join(image_dir, image_file)
+    txt_file = os.path.join(txt_dir, os.path.splitext(image_file)[0] + '.txt')
+    output_file = os.path.join(maintxt_dir, os.path.splitext(image_file)[0] + '.txt')
+    # Read tags from the corresponding txt file
+    with open(txt_file, 'r') as f:
+        tags = f.read().strip()
+    base64_image = encode_image(image_path)
+    step_start_time = time.time()
+    chat_response = client.chat.completions.create(
+        model="./phi3_v14_800-merged",
+        messages=[{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": f"Make a caption that describe this image. Here is the tags for this image: {tags}"},
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{base64_image}"
+                    },
+                },
+            ],
+        }],
+        extra_body={'repetition_penalty': 1.05, 'top_k': -1,'top_p': 1,'temperature': 0, 'use_beam_search': True, 'best_of':5},
+    )
+    step_end_time = time.time()
+    step_time = step_end_time - step_start_time
+    total_elapsed_time += step_time
+    remaining_time = (total_elapsed_time / (processed_files + 1)) * (total_files - processed_files - 1)
+    # Convert remaining time to hours, minutes and seconds
+    remaining_hours = int(remaining_time // 3600)
+    remaining_minutes = int((remaining_time % 3600) // 60)
+    remaining_seconds = int(remaining_time % 60)
+    # Extract the content from the response
+    content = chat_response.choices[0].message.content
+    content = content.lstrip()
+    # Write the content to the output file
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(content)
+    print(f"\n\nFile {image_file}\nProcessing time: {step_time:.2f} seconds\n{content}")
+    print(f"Response saved to file: {output_file}")
+    processed_files += 1
+    progress_bar.update(1)
+    progress_bar.set_postfix(remaining=f'{remaining_hours:02d}:{remaining_minutes:02d}:{remaining_seconds:02d}', refresh=True)
+progress_bar.close()
+print("All images processed.")
+print(f"Total time: {time.time() - start_time:.2f} seconds")