first commit

2025-11-23 13:05:33 +00:00 · 2025-11-23 13:05:33 +00:00 · 83ad2b88c0
commit 83ad2b88c0
3 changed files with 229 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+.env
--- a/src/configuration.py
+++ b/src/configuration.py
@ -0,0 +1,39 @@
+
+
+class Configuration:
+    def __init__(self):
+        pass
+
+    def get_config_prompt(self, passage, character, language):
+        config_prompt = f"""Given a **Passage** and **Character**, select the appropriate option from
+three fields: Character, Question_Type, Difficulty, and return the output
+in JSON format.
+First, select the Character who are likely to be interested in the Passage
+from the candidates. Then select the Question_Type that the Character
+might ask about the Passage; Finally, choose the Difficulty of the
+possible question based on the Passage, the Character, and the
+Question_Type.
+Character: Given by input **Character**
+Question_Type:
+- keywords: ...
+- acquire_knowledge: ...
+- summary: ...
+- yes_or_no: ...
+- background: ...
+Difficulty:
+- high_school: ...
+- university: ...
+- phd: ...
+Here are some examples
+<Example1> <Example2> <Example3>
+Now, generate the **output** based on the **Passage** and **Character** from
+user, the **Passage** will be in {language} language and the **Character**
+will be in English.
+,→
+,→
+Ensure to generate only the JSON output with content in English.
+**Passage**:
+{passage}
+**Character**:
+{character}"""
+        return config_prompt
--- a/src/openai_responder.py
+++ b/src/openai_responder.py
@ -0,0 +1,189 @@
+import json
+import importlib
+import os
+import time
+from types import SimpleNamespace
+
+def import_lib(file_path, module_name, package_name=None):
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    imported_file = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(imported_file)
+    if package_name is not None:
+        return getattr(imported_file, package_name)
+    else:
+        return imported_file
+
+
+class OpenAIResponder:
+    def __init__(self,
+                 client,
+                 model="gpt-4o-mini",
+                 price_per_1m_input_tokens=0,
+                 price_per_1m_output_tokens=0) -> None:
+        self.file_path = os.path.dirname(__file__)
+        self.client = client
+        self.model = model
+        self.price_per_input_tokens = price_per_1m_input_tokens/1000000
+        self.price_per_output_tokens = price_per_1m_output_tokens/1000000
+        self.max_search_per_prompt = 1
+
+
+        if not os.path.exists(self.file_path + "/data"):
+            os.makedirs(self.file_path + "/data")
+
+
+
+    def get_body_to_request(self, messages, temperature):
+
+        body = {"model": self.model, "messages": messages,"max_tokens": 8000}
+        if temperature != None:
+            body["temperature"] = temperature
+        return body
+
+    def handle_normal_response(self, response):
+        content = response.choices[0].message.content
+        price = self.compute_price(response)
+        return content, price
+
+    def run(self, messages, temperature=None):
+        body = self.get_body_to_request(messages, temperature)
+
+        response = self.client.chat.completions.create(**body)
+
+        if response.choices[0].finish_reason == "stop":
+            content, price = self.handle_normal_response(response)
+
+        return content, price
+
+    def prepare_bodies(self, all_messages):
+        bodies = []
+        for i in range(len(all_messages)):
+            messages = all_messages[i]
+            body = self.get_body_to_request(messages)
+            bodies += [body]
+        return bodies
+
+    def prepare_requests(self, bodies):
+        reqs = []
+        for i in range(len(bodies)):
+            body = bodies[i]
+            req = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions", "body": body}
+            reqs += [req]
+
+        with open(self.file_path + "/data/batchinput.jsonl", "w", encoding="utf-8") as f:
+            for entry in reqs:
+                f.write(f"{json.dumps(entry, ensure_ascii=False)}\n")
+
+        # with open(self.file_path + "/data/batchinput.jsonl", "w") as f:
+        #     for entry in reqs:
+        #         f.write(f"{json.dumps(entry)}\n")
+    def batch_request(self):
+        batch_input_file = self.client.files.create(
+          file=open(self.file_path + "/data/batchinput.jsonl", "rb"),
+          purpose="batch"
+        )
+        batch_input_file_id = batch_input_file.id
+
+        batch = self.client.batches.create(
+            input_file_id=batch_input_file_id,
+            endpoint="/v1/chat/completions",
+            completion_window="24h",
+            metadata={
+              "description": "nightly eval job"
+            }
+        )
+
+        return batch
+
+    def check_and_get_batch_result(self, batch):
+        num_error = 0
+        for _ in range(86400):
+            if batch.status == "completed":
+                break
+            time.sleep(1)
+            try:
+                batch = self.client.batches.retrieve(batch.id)
+                num_error = 0
+            except Exception as _:
+                num_error += 1
+                if num_error > 10:
+                    print("OpenAIResponder: error: batch = self.client.batches.retrieve(batch.id)")
+                continue
+
+        file_response = self.client.files.content(batch.output_file_id)
+        # file_response = self.client.files.content(batch.error_file_id)
+        # a = json.loads(file_response.text)
+
+
+        list_of_text_response = file_response.text.split("\n")
+        all_response = []
+        for r in list_of_text_response:
+            if r == "":
+                continue
+            one_response = json.loads(r, object_hook=lambda d: SimpleNamespace(**d))
+            all_response += [one_response.response.body]
+
+        file_response.stream_to_file(self.file_path + "/data/batch_output.jsonl")
+
+        return all_response
+
+    def compute_price(self, response):
+        prompt_tokens = response.usage.prompt_tokens
+        completion_tokens = response.usage.completion_tokens
+        price = prompt_tokens * self.price_per_input_tokens
+        price += completion_tokens * self.price_per_output_tokens
+        return price
+
+    def make_batch_request(self, all_messages, use_search=False):
+        bodies = self.prepare_bodies(all_messages, use_search)
+        self.prepare_requests(bodies)
+        batch = self.batch_request()
+        return batch
+
+    def wait_until_the_answer_is_ready(self, batch, all_messages):
+        all_response = self.check_and_get_batch_result(batch)
+
+        out = dict()
+        for i in range(len(all_response)):
+            response = all_response[i]
+            if response.choices[0].finish_reason == "stop":
+                content, price = self.handle_normal_response(response)
+                out[i] = (content, price)
+
+        all_content = [out[i][0] for i in range(len(out))]
+        prices = [out[i][1] for i in range(len(out))]
+        return all_content, prices
+
+    def run_batch(self, all_messages, use_search=False):
+        batch = self.make_batch_request(all_messages, use_search)
+        all_content, prices = self.wait_until_the_answer_is_ready(batch, all_messages)
+        return all_content, prices
+
+
+def main():
+    def get_messages(text):
+        messages = [
+            {"role": "system", "content": ""},
+            {"role": "user", "content": text}
+        ]
+        return messages
+    
+    client = OpenAI(api_key=dict(os.environ)["OPENAI_API_KEY"])
+    price_per_1m_input_tokens = 0.075 * 2
+    price_per_1m_output_tokens = 0.300 * 2
+    # text = "شرکت openai چیست؟"
+    text2 = "تو چی هستی؟"
+    text = "قیمت دلار چنده؟"
+    all_messages = [get_messages(text)]
+
+    openai_responder = OpenAIResponder(client=client, model="gpt-4o-mini", price_per_1m_input_tokens=price_per_1m_input_tokens, price_per_1m_output_tokens=price_per_1m_output_tokens)
+    all_content, price = openai_responder.run(get_messages(text))
+    # all_content, prices = openai_responder.run_batch(all_messages, use_search=True)
+
+    print("finished")
+
+if __name__ == "__main__":
+    from openai import OpenAI
+    from dotenv import load_dotenv
+    load_dotenv()
+    main()