commit 83ad2b88c0b76df53fdd1ae15f432130021ea46d Author: hediehloo Date: Sun Nov 23 13:05:33 2025 +0000 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4c49bd7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.env diff --git a/src/configuration.py b/src/configuration.py new file mode 100644 index 0000000..f1705b8 --- /dev/null +++ b/src/configuration.py @@ -0,0 +1,39 @@ + + +class Configuration: + def __init__(self): + pass + + def get_config_prompt(self, passage, character, language): + config_prompt = f"""Given a **Passage** and **Character**, select the appropriate option from +three fields: Character, Question_Type, Difficulty, and return the output +in JSON format. +First, select the Character who are likely to be interested in the Passage +from the candidates. Then select the Question_Type that the Character +might ask about the Passage; Finally, choose the Difficulty of the +possible question based on the Passage, the Character, and the +Question_Type. +Character: Given by input **Character** +Question_Type: +- keywords: ... +- acquire_knowledge: ... +- summary: ... +- yes_or_no: ... +- background: ... +Difficulty: +- high_school: ... +- university: ... +- phd: ... +Here are some examples + +Now, generate the **output** based on the **Passage** and **Character** from +user, the **Passage** will be in {language} language and the **Character** +will be in English. +,→ +,→ +Ensure to generate only the JSON output with content in English. +**Passage**: +{passage} +**Character**: +{character}""" + return config_prompt \ No newline at end of file diff --git a/src/openai_responder.py b/src/openai_responder.py new file mode 100644 index 0000000..abae37c --- /dev/null +++ b/src/openai_responder.py @@ -0,0 +1,189 @@ +import json +import importlib +import os +import time +from types import SimpleNamespace + +def import_lib(file_path, module_name, package_name=None): + spec = importlib.util.spec_from_file_location(module_name, file_path) + imported_file = importlib.util.module_from_spec(spec) + spec.loader.exec_module(imported_file) + if package_name is not None: + return getattr(imported_file, package_name) + else: + return imported_file + + +class OpenAIResponder: + def __init__(self, + client, + model="gpt-4o-mini", + price_per_1m_input_tokens=0, + price_per_1m_output_tokens=0) -> None: + self.file_path = os.path.dirname(__file__) + self.client = client + self.model = model + self.price_per_input_tokens = price_per_1m_input_tokens/1000000 + self.price_per_output_tokens = price_per_1m_output_tokens/1000000 + self.max_search_per_prompt = 1 + + + if not os.path.exists(self.file_path + "/data"): + os.makedirs(self.file_path + "/data") + + + + def get_body_to_request(self, messages, temperature): + + body = {"model": self.model, "messages": messages,"max_tokens": 8000} + if temperature != None: + body["temperature"] = temperature + return body + + def handle_normal_response(self, response): + content = response.choices[0].message.content + price = self.compute_price(response) + return content, price + + def run(self, messages, temperature=None): + body = self.get_body_to_request(messages, temperature) + + response = self.client.chat.completions.create(**body) + + if response.choices[0].finish_reason == "stop": + content, price = self.handle_normal_response(response) + + return content, price + + def prepare_bodies(self, all_messages): + bodies = [] + for i in range(len(all_messages)): + messages = all_messages[i] + body = self.get_body_to_request(messages) + bodies += [body] + return bodies + + def prepare_requests(self, bodies): + reqs = [] + for i in range(len(bodies)): + body = bodies[i] + req = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions", "body": body} + reqs += [req] + + with open(self.file_path + "/data/batchinput.jsonl", "w", encoding="utf-8") as f: + for entry in reqs: + f.write(f"{json.dumps(entry, ensure_ascii=False)}\n") + + # with open(self.file_path + "/data/batchinput.jsonl", "w") as f: + # for entry in reqs: + # f.write(f"{json.dumps(entry)}\n") + def batch_request(self): + batch_input_file = self.client.files.create( + file=open(self.file_path + "/data/batchinput.jsonl", "rb"), + purpose="batch" + ) + batch_input_file_id = batch_input_file.id + + batch = self.client.batches.create( + input_file_id=batch_input_file_id, + endpoint="/v1/chat/completions", + completion_window="24h", + metadata={ + "description": "nightly eval job" + } + ) + + return batch + + def check_and_get_batch_result(self, batch): + num_error = 0 + for _ in range(86400): + if batch.status == "completed": + break + time.sleep(1) + try: + batch = self.client.batches.retrieve(batch.id) + num_error = 0 + except Exception as _: + num_error += 1 + if num_error > 10: + print("OpenAIResponder: error: batch = self.client.batches.retrieve(batch.id)") + continue + + file_response = self.client.files.content(batch.output_file_id) + # file_response = self.client.files.content(batch.error_file_id) + # a = json.loads(file_response.text) + + + list_of_text_response = file_response.text.split("\n") + all_response = [] + for r in list_of_text_response: + if r == "": + continue + one_response = json.loads(r, object_hook=lambda d: SimpleNamespace(**d)) + all_response += [one_response.response.body] + + file_response.stream_to_file(self.file_path + "/data/batch_output.jsonl") + + return all_response + + def compute_price(self, response): + prompt_tokens = response.usage.prompt_tokens + completion_tokens = response.usage.completion_tokens + price = prompt_tokens * self.price_per_input_tokens + price += completion_tokens * self.price_per_output_tokens + return price + + def make_batch_request(self, all_messages, use_search=False): + bodies = self.prepare_bodies(all_messages, use_search) + self.prepare_requests(bodies) + batch = self.batch_request() + return batch + + def wait_until_the_answer_is_ready(self, batch, all_messages): + all_response = self.check_and_get_batch_result(batch) + + out = dict() + for i in range(len(all_response)): + response = all_response[i] + if response.choices[0].finish_reason == "stop": + content, price = self.handle_normal_response(response) + out[i] = (content, price) + + all_content = [out[i][0] for i in range(len(out))] + prices = [out[i][1] for i in range(len(out))] + return all_content, prices + + def run_batch(self, all_messages, use_search=False): + batch = self.make_batch_request(all_messages, use_search) + all_content, prices = self.wait_until_the_answer_is_ready(batch, all_messages) + return all_content, prices + + +def main(): + def get_messages(text): + messages = [ + {"role": "system", "content": ""}, + {"role": "user", "content": text} + ] + return messages + + client = OpenAI(api_key=dict(os.environ)["OPENAI_API_KEY"]) + price_per_1m_input_tokens = 0.075 * 2 + price_per_1m_output_tokens = 0.300 * 2 + # text = "شرکت openai چیست؟" + text2 = "تو چی هستی؟" + text = "قیمت دلار چنده؟" + all_messages = [get_messages(text)] + + openai_responder = OpenAIResponder(client=client, model="gpt-4o-mini", price_per_1m_input_tokens=price_per_1m_input_tokens, price_per_1m_output_tokens=price_per_1m_output_tokens) + all_content, price = openai_responder.run(get_messages(text)) + # all_content, prices = openai_responder.run_batch(all_messages, use_search=True) + + print("finished") + +if __name__ == "__main__": + from openai import OpenAI + from dotenv import load_dotenv + load_dotenv() + main()