first commit

2025-11-23 13:05:33 +00:00 · 2025-11-23 13:05:33 +00:00 · 83ad2b88c0
commit 83ad2b88c0
3 changed files with 229 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 .env
--- a/src/configuration.py
+++ b/src/configuration.py
@ -0,0 +1,39 @@
 class Configuration:
    def __init__(self):
        pass
    def get_config_prompt(self, passage, character, language):
        config_prompt = f"""Given a **Passage** and **Character**, select the appropriate option from
 three fields: Character, Question_Type, Difficulty, and return the output
 in JSON format.
 First, select the Character who are likely to be interested in the Passage
 from the candidates. Then select the Question_Type that the Character
 might ask about the Passage; Finally, choose the Difficulty of the
 possible question based on the Passage, the Character, and the
 Question_Type.
 Character: Given by input **Character**
 Question_Type:
 - keywords: ...
 - acquire_knowledge: ...
 - summary: ...
 - yes_or_no: ...
 - background: ...
 Difficulty:
 - high_school: ...
 - university: ...
 - phd: ...
 Here are some examples
 <Example1> <Example2> <Example3>
 Now, generate the **output** based on the **Passage** and **Character** from
 user, the **Passage** will be in {language} language and the **Character**
 will be in English.
 ,→
 ,→
 Ensure to generate only the JSON output with content in English.
 **Passage**:
 {passage}
 **Character**:
 {character}"""
        return config_prompt
--- a/src/openai_responder.py
+++ b/src/openai_responder.py
@ -0,0 +1,189 @@
 import json
 import importlib
 import os
 import time
 from types import SimpleNamespace
 def import_lib(file_path, module_name, package_name=None):
    spec = importlib.util.spec_from_file_location(module_name, file_path)
    imported_file = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(imported_file)
    if package_name is not None:
        return getattr(imported_file, package_name)
    else:
        return imported_file
 class OpenAIResponder:
    def __init__(self,
                 client,
                 model="gpt-4o-mini",
                 price_per_1m_input_tokens=0,
                 price_per_1m_output_tokens=0) -> None:
        self.file_path = os.path.dirname(__file__)
        self.client = client
        self.model = model
        self.price_per_input_tokens = price_per_1m_input_tokens/1000000
        self.price_per_output_tokens = price_per_1m_output_tokens/1000000
        self.max_search_per_prompt = 1
        if not os.path.exists(self.file_path + "/data"):
            os.makedirs(self.file_path + "/data")
    def get_body_to_request(self, messages, temperature):
        body = {"model": self.model, "messages": messages,"max_tokens": 8000}
        if temperature != None:
            body["temperature"] = temperature
        return body
    def handle_normal_response(self, response):
        content = response.choices[0].message.content
        price = self.compute_price(response)
        return content, price
    def run(self, messages, temperature=None):
        body = self.get_body_to_request(messages, temperature)
        response = self.client.chat.completions.create(**body)
        if response.choices[0].finish_reason == "stop":
            content, price = self.handle_normal_response(response)
        return content, price
    def prepare_bodies(self, all_messages):
        bodies = []
        for i in range(len(all_messages)):
            messages = all_messages[i]
            body = self.get_body_to_request(messages)
            bodies += [body]
        return bodies
    def prepare_requests(self, bodies):
        reqs = []
        for i in range(len(bodies)):
            body = bodies[i]
            req = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions", "body": body}
            reqs += [req]
        with open(self.file_path + "/data/batchinput.jsonl", "w", encoding="utf-8") as f:
            for entry in reqs:
                f.write(f"{json.dumps(entry, ensure_ascii=False)}\n")
        # with open(self.file_path + "/data/batchinput.jsonl", "w") as f:
        #     for entry in reqs:
        #         f.write(f"{json.dumps(entry)}\n")
    def batch_request(self):
        batch_input_file = self.client.files.create(
          file=open(self.file_path + "/data/batchinput.jsonl", "rb"),
          purpose="batch"
        )
        batch_input_file_id = batch_input_file.id
        batch = self.client.batches.create(
            input_file_id=batch_input_file_id,
            endpoint="/v1/chat/completions",
            completion_window="24h",
            metadata={
              "description": "nightly eval job"
            }
        )
        return batch
    def check_and_get_batch_result(self, batch):
        num_error = 0
        for _ in range(86400):
            if batch.status == "completed":
                break
            time.sleep(1)
            try:
                batch = self.client.batches.retrieve(batch.id)
                num_error = 0
            except Exception as _:
                num_error += 1
                if num_error > 10:
                    print("OpenAIResponder: error: batch = self.client.batches.retrieve(batch.id)")
                continue
        file_response = self.client.files.content(batch.output_file_id)
        # file_response = self.client.files.content(batch.error_file_id)
        # a = json.loads(file_response.text)
        list_of_text_response = file_response.text.split("\n")
        all_response = []
        for r in list_of_text_response:
            if r == "":
                continue
            one_response = json.loads(r, object_hook=lambda d: SimpleNamespace(**d))
            all_response += [one_response.response.body]
        file_response.stream_to_file(self.file_path + "/data/batch_output.jsonl")
        return all_response
    def compute_price(self, response):
        prompt_tokens = response.usage.prompt_tokens
        completion_tokens = response.usage.completion_tokens
        price = prompt_tokens * self.price_per_input_tokens
        price += completion_tokens * self.price_per_output_tokens
        return price
    def make_batch_request(self, all_messages, use_search=False):
        bodies = self.prepare_bodies(all_messages, use_search)
        self.prepare_requests(bodies)
        batch = self.batch_request()
        return batch
    def wait_until_the_answer_is_ready(self, batch, all_messages):
        all_response = self.check_and_get_batch_result(batch)
        out = dict()
        for i in range(len(all_response)):
            response = all_response[i]
            if response.choices[0].finish_reason == "stop":
                content, price = self.handle_normal_response(response)
                out[i] = (content, price)
        all_content = [out[i][0] for i in range(len(out))]
        prices = [out[i][1] for i in range(len(out))]
        return all_content, prices
    def run_batch(self, all_messages, use_search=False):
        batch = self.make_batch_request(all_messages, use_search)
        all_content, prices = self.wait_until_the_answer_is_ready(batch, all_messages)
        return all_content, prices
 def main():
    def get_messages(text):
        messages = [
            {"role": "system", "content": ""},
            {"role": "user", "content": text}
        ]
        return messages
    client = OpenAI(api_key=dict(os.environ)["OPENAI_API_KEY"])
    price_per_1m_input_tokens = 0.075 * 2
    price_per_1m_output_tokens = 0.300 * 2
    # text = "شرکت openai چیست؟"
    text2 = "تو چی هستی؟"
    text = "قیمت دلار چنده؟"
    all_messages = [get_messages(text)]
    openai_responder = OpenAIResponder(client=client, model="gpt-4o-mini", price_per_1m_input_tokens=price_per_1m_input_tokens, price_per_1m_output_tokens=price_per_1m_output_tokens)
    all_content, price = openai_responder.run(get_messages(text))
    # all_content, prices = openai_responder.run_batch(all_messages, use_search=True)
    print("finished")
 if __name__ == "__main__":
    from openai import OpenAI
    from dotenv import load_dotenv
    load_dotenv()
    main()