|
| 1 | +from openai import OpenAI |
| 2 | +from pydantic import BaseModel |
| 3 | +import os |
| 4 | +from typing import Union |
| 5 | +from PIL import Image |
| 6 | +import json |
| 7 | +from concurrent.futures import ThreadPoolExecutor, as_completed |
| 8 | +import sys |
| 9 | + |
| 10 | +current_dir = os.path.dirname(os.path.abspath(__file__)) |
| 11 | +root_dir = os.path.abspath(os.path.join(current_dir, "..")) |
| 12 | + |
| 13 | +sys.path.insert(0, current_dir) |
| 14 | +sys.path.insert(0, root_dir) |
| 15 | + |
| 16 | +from base_verifier import BaseVerifier |
| 17 | +from utils import convert_to_bytes |
| 18 | + |
| 19 | + |
| 20 | +class Score(BaseModel): |
| 21 | + score: int |
| 22 | + explanation: str |
| 23 | + |
| 24 | + |
| 25 | +class Grading(BaseModel): |
| 26 | + accuracy_to_prompt: Score |
| 27 | + creativity_and_originality: Score |
| 28 | + visual_quality_and_realism: Score |
| 29 | + consistency_and_cohesion: Score |
| 30 | + emotional_or_thematic_resonance: Score |
| 31 | + overall_score: Score |
| 32 | + |
| 33 | + |
| 34 | +class OpenAIVerifier(BaseVerifier): |
| 35 | + SUPPORTED_METRIC_CHOICES = [ |
| 36 | + "accuracy_to_prompt", |
| 37 | + "creativity_and_originality", |
| 38 | + "visual_quality_and_realism", |
| 39 | + "consistency_and_cohesion", |
| 40 | + "emotional_or_thematic_resonance", |
| 41 | + "overall_score", |
| 42 | + ] |
| 43 | + |
| 44 | + def __init__(self, seed=1994, model_name="gpt-4o-2024-11-20", **kwargs): |
| 45 | + self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
| 46 | + super().__init__(seed=seed, prompt_path=kwargs.pop("prompt_path", None)) |
| 47 | + self.model_name = model_name |
| 48 | + self.seed = seed |
| 49 | + |
| 50 | + def prepare_inputs(self, images: Union[list[Image.Image], Image.Image], prompts: Union[list[str], str], **kwargs): |
| 51 | + """Prepare inputs for the API from a given prompt and image.""" |
| 52 | + inputs = [] |
| 53 | + images = images if isinstance(images, list) else [images] |
| 54 | + prompts = prompts if isinstance(prompts, list) else [prompts] |
| 55 | + |
| 56 | + for prompt, image in zip(prompts, images): |
| 57 | + # Convert image to base64 |
| 58 | + base64_image = convert_to_bytes(image, b64_encode=True) |
| 59 | + |
| 60 | + message = { |
| 61 | + "role": "user", |
| 62 | + "content": [ |
| 63 | + {"type": "text", "text": prompt}, |
| 64 | + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}, |
| 65 | + ], |
| 66 | + } |
| 67 | + inputs.append(message) |
| 68 | + |
| 69 | + return inputs |
| 70 | + |
| 71 | + def score(self, inputs, **kwargs) -> list[dict[str, float]]: |
| 72 | + system_message = {"role": "system", "content": self.verifier_prompt} |
| 73 | + |
| 74 | + def call_generate_content(parts): |
| 75 | + conversation = [system_message, parts] |
| 76 | + response = self.client.beta.chat.completions.parse( |
| 77 | + model=self.model_name, messages=conversation, temperature=1, response_format=Grading |
| 78 | + ) |
| 79 | + return response.choices[0].message.parsed.model_dump() |
| 80 | + |
| 81 | + results = [] |
| 82 | + max_workers = min(len(inputs), 4) |
| 83 | + with ThreadPoolExecutor(max_workers=max_workers) as executor: |
| 84 | + futures = [executor.submit(call_generate_content, group) for group in inputs] |
| 85 | + for future in as_completed(futures): |
| 86 | + try: |
| 87 | + results.append(future.result()) |
| 88 | + except Exception as e: |
| 89 | + # Handle exceptions as appropriate. |
| 90 | + print(f"An error occurred during API call: {e}") |
| 91 | + return results |
| 92 | + |
| 93 | + |
| 94 | +# Define inputs |
| 95 | +if __name__ == "__main__": |
| 96 | + verifier = OpenAIVerifier() |
| 97 | + image_urls = [ |
| 98 | + ( |
| 99 | + "realistic photo a shiny black SUV car with a mountain in the background.", |
| 100 | + "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/flux-edit-artifacts/assets/car.jpg", |
| 101 | + ), |
| 102 | + ( |
| 103 | + "photo a green and funny creature standing in front a lightweight forest.", |
| 104 | + "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/flux-edit-artifacts/assets/green_creature.jpg", |
| 105 | + ), |
| 106 | + ] |
| 107 | + |
| 108 | + prompts = [] |
| 109 | + images = [] |
| 110 | + for text, path_or_url in image_urls: |
| 111 | + prompts.append(text) |
| 112 | + images.append(path_or_url) |
| 113 | + |
| 114 | + # # Single image |
| 115 | + # response = client.models.generate_content( |
| 116 | + # model='gemini-2.0-flash', |
| 117 | + # contents=[ |
| 118 | + # "realistic photo a shiny black SUV car with a mountain in the background.", |
| 119 | + # load_image("https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/flux-edit-artifacts/assets/car.jpg") |
| 120 | + # ], |
| 121 | + # config=generation_config |
| 122 | + # ) |
| 123 | + inputs = verifier.prepare_inputs(images=images, prompts=prompts) |
| 124 | + response = verifier.score(inputs) |
| 125 | + |
| 126 | + with open("results.json", "w") as f: |
| 127 | + json.dump(response, f) |
| 128 | + |
| 129 | + print(json.dumps(response, indent=4)) |
0 commit comments