first commit

This commit is contained in:
hediehloo 2025-11-23 13:05:33 +00:00
commit 83ad2b88c0
3 changed files with 229 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.env

39
src/configuration.py Normal file
View File

@ -0,0 +1,39 @@
class Configuration:
def __init__(self):
pass
def get_config_prompt(self, passage, character, language):
config_prompt = f"""Given a **Passage** and **Character**, select the appropriate option from
three fields: Character, Question_Type, Difficulty, and return the output
in JSON format.
First, select the Character who are likely to be interested in the Passage
from the candidates. Then select the Question_Type that the Character
might ask about the Passage; Finally, choose the Difficulty of the
possible question based on the Passage, the Character, and the
Question_Type.
Character: Given by input **Character**
Question_Type:
- keywords: ...
- acquire_knowledge: ...
- summary: ...
- yes_or_no: ...
- background: ...
Difficulty:
- high_school: ...
- university: ...
- phd: ...
Here are some examples
<Example1> <Example2> <Example3>
Now, generate the **output** based on the **Passage** and **Character** from
user, the **Passage** will be in {language} language and the **Character**
will be in English.
,
,
Ensure to generate only the JSON output with content in English.
**Passage**:
{passage}
**Character**:
{character}"""
return config_prompt

189
src/openai_responder.py Normal file
View File

@ -0,0 +1,189 @@
import json
import importlib
import os
import time
from types import SimpleNamespace
def import_lib(file_path, module_name, package_name=None):
spec = importlib.util.spec_from_file_location(module_name, file_path)
imported_file = importlib.util.module_from_spec(spec)
spec.loader.exec_module(imported_file)
if package_name is not None:
return getattr(imported_file, package_name)
else:
return imported_file
class OpenAIResponder:
def __init__(self,
client,
model="gpt-4o-mini",
price_per_1m_input_tokens=0,
price_per_1m_output_tokens=0) -> None:
self.file_path = os.path.dirname(__file__)
self.client = client
self.model = model
self.price_per_input_tokens = price_per_1m_input_tokens/1000000
self.price_per_output_tokens = price_per_1m_output_tokens/1000000
self.max_search_per_prompt = 1
if not os.path.exists(self.file_path + "/data"):
os.makedirs(self.file_path + "/data")
def get_body_to_request(self, messages, temperature):
body = {"model": self.model, "messages": messages,"max_tokens": 8000}
if temperature != None:
body["temperature"] = temperature
return body
def handle_normal_response(self, response):
content = response.choices[0].message.content
price = self.compute_price(response)
return content, price
def run(self, messages, temperature=None):
body = self.get_body_to_request(messages, temperature)
response = self.client.chat.completions.create(**body)
if response.choices[0].finish_reason == "stop":
content, price = self.handle_normal_response(response)
return content, price
def prepare_bodies(self, all_messages):
bodies = []
for i in range(len(all_messages)):
messages = all_messages[i]
body = self.get_body_to_request(messages)
bodies += [body]
return bodies
def prepare_requests(self, bodies):
reqs = []
for i in range(len(bodies)):
body = bodies[i]
req = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions", "body": body}
reqs += [req]
with open(self.file_path + "/data/batchinput.jsonl", "w", encoding="utf-8") as f:
for entry in reqs:
f.write(f"{json.dumps(entry, ensure_ascii=False)}\n")
# with open(self.file_path + "/data/batchinput.jsonl", "w") as f:
# for entry in reqs:
# f.write(f"{json.dumps(entry)}\n")
def batch_request(self):
batch_input_file = self.client.files.create(
file=open(self.file_path + "/data/batchinput.jsonl", "rb"),
purpose="batch"
)
batch_input_file_id = batch_input_file.id
batch = self.client.batches.create(
input_file_id=batch_input_file_id,
endpoint="/v1/chat/completions",
completion_window="24h",
metadata={
"description": "nightly eval job"
}
)
return batch
def check_and_get_batch_result(self, batch):
num_error = 0
for _ in range(86400):
if batch.status == "completed":
break
time.sleep(1)
try:
batch = self.client.batches.retrieve(batch.id)
num_error = 0
except Exception as _:
num_error += 1
if num_error > 10:
print("OpenAIResponder: error: batch = self.client.batches.retrieve(batch.id)")
continue
file_response = self.client.files.content(batch.output_file_id)
# file_response = self.client.files.content(batch.error_file_id)
# a = json.loads(file_response.text)
list_of_text_response = file_response.text.split("\n")
all_response = []
for r in list_of_text_response:
if r == "":
continue
one_response = json.loads(r, object_hook=lambda d: SimpleNamespace(**d))
all_response += [one_response.response.body]
file_response.stream_to_file(self.file_path + "/data/batch_output.jsonl")
return all_response
def compute_price(self, response):
prompt_tokens = response.usage.prompt_tokens
completion_tokens = response.usage.completion_tokens
price = prompt_tokens * self.price_per_input_tokens
price += completion_tokens * self.price_per_output_tokens
return price
def make_batch_request(self, all_messages, use_search=False):
bodies = self.prepare_bodies(all_messages, use_search)
self.prepare_requests(bodies)
batch = self.batch_request()
return batch
def wait_until_the_answer_is_ready(self, batch, all_messages):
all_response = self.check_and_get_batch_result(batch)
out = dict()
for i in range(len(all_response)):
response = all_response[i]
if response.choices[0].finish_reason == "stop":
content, price = self.handle_normal_response(response)
out[i] = (content, price)
all_content = [out[i][0] for i in range(len(out))]
prices = [out[i][1] for i in range(len(out))]
return all_content, prices
def run_batch(self, all_messages, use_search=False):
batch = self.make_batch_request(all_messages, use_search)
all_content, prices = self.wait_until_the_answer_is_ready(batch, all_messages)
return all_content, prices
def main():
def get_messages(text):
messages = [
{"role": "system", "content": ""},
{"role": "user", "content": text}
]
return messages
client = OpenAI(api_key=dict(os.environ)["OPENAI_API_KEY"])
price_per_1m_input_tokens = 0.075 * 2
price_per_1m_output_tokens = 0.300 * 2
# text = "شرکت openai چیست؟"
text2 = "تو چی هستی؟"
text = "قیمت دلار چنده؟"
all_messages = [get_messages(text)]
openai_responder = OpenAIResponder(client=client, model="gpt-4o-mini", price_per_1m_input_tokens=price_per_1m_input_tokens, price_per_1m_output_tokens=price_per_1m_output_tokens)
all_content, price = openai_responder.run(get_messages(text))
# all_content, prices = openai_responder.run_batch(all_messages, use_search=True)
print("finished")
if __name__ == "__main__":
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
main()