first commit
This commit is contained in:
commit
83ad2b88c0
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
.env
|
||||||
39
src/configuration.py
Normal file
39
src/configuration.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
|
||||||
|
|
||||||
|
class Configuration:
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_config_prompt(self, passage, character, language):
|
||||||
|
config_prompt = f"""Given a **Passage** and **Character**, select the appropriate option from
|
||||||
|
three fields: Character, Question_Type, Difficulty, and return the output
|
||||||
|
in JSON format.
|
||||||
|
First, select the Character who are likely to be interested in the Passage
|
||||||
|
from the candidates. Then select the Question_Type that the Character
|
||||||
|
might ask about the Passage; Finally, choose the Difficulty of the
|
||||||
|
possible question based on the Passage, the Character, and the
|
||||||
|
Question_Type.
|
||||||
|
Character: Given by input **Character**
|
||||||
|
Question_Type:
|
||||||
|
- keywords: ...
|
||||||
|
- acquire_knowledge: ...
|
||||||
|
- summary: ...
|
||||||
|
- yes_or_no: ...
|
||||||
|
- background: ...
|
||||||
|
Difficulty:
|
||||||
|
- high_school: ...
|
||||||
|
- university: ...
|
||||||
|
- phd: ...
|
||||||
|
Here are some examples
|
||||||
|
<Example1> <Example2> <Example3>
|
||||||
|
Now, generate the **output** based on the **Passage** and **Character** from
|
||||||
|
user, the **Passage** will be in {language} language and the **Character**
|
||||||
|
will be in English.
|
||||||
|
,→
|
||||||
|
,→
|
||||||
|
Ensure to generate only the JSON output with content in English.
|
||||||
|
**Passage**:
|
||||||
|
{passage}
|
||||||
|
**Character**:
|
||||||
|
{character}"""
|
||||||
|
return config_prompt
|
||||||
189
src/openai_responder.py
Normal file
189
src/openai_responder.py
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
import json
|
||||||
|
import importlib
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
def import_lib(file_path, module_name, package_name=None):
|
||||||
|
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
||||||
|
imported_file = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(imported_file)
|
||||||
|
if package_name is not None:
|
||||||
|
return getattr(imported_file, package_name)
|
||||||
|
else:
|
||||||
|
return imported_file
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAIResponder:
|
||||||
|
def __init__(self,
|
||||||
|
client,
|
||||||
|
model="gpt-4o-mini",
|
||||||
|
price_per_1m_input_tokens=0,
|
||||||
|
price_per_1m_output_tokens=0) -> None:
|
||||||
|
self.file_path = os.path.dirname(__file__)
|
||||||
|
self.client = client
|
||||||
|
self.model = model
|
||||||
|
self.price_per_input_tokens = price_per_1m_input_tokens/1000000
|
||||||
|
self.price_per_output_tokens = price_per_1m_output_tokens/1000000
|
||||||
|
self.max_search_per_prompt = 1
|
||||||
|
|
||||||
|
|
||||||
|
if not os.path.exists(self.file_path + "/data"):
|
||||||
|
os.makedirs(self.file_path + "/data")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_body_to_request(self, messages, temperature):
|
||||||
|
|
||||||
|
body = {"model": self.model, "messages": messages,"max_tokens": 8000}
|
||||||
|
if temperature != None:
|
||||||
|
body["temperature"] = temperature
|
||||||
|
return body
|
||||||
|
|
||||||
|
def handle_normal_response(self, response):
|
||||||
|
content = response.choices[0].message.content
|
||||||
|
price = self.compute_price(response)
|
||||||
|
return content, price
|
||||||
|
|
||||||
|
def run(self, messages, temperature=None):
|
||||||
|
body = self.get_body_to_request(messages, temperature)
|
||||||
|
|
||||||
|
response = self.client.chat.completions.create(**body)
|
||||||
|
|
||||||
|
if response.choices[0].finish_reason == "stop":
|
||||||
|
content, price = self.handle_normal_response(response)
|
||||||
|
|
||||||
|
return content, price
|
||||||
|
|
||||||
|
def prepare_bodies(self, all_messages):
|
||||||
|
bodies = []
|
||||||
|
for i in range(len(all_messages)):
|
||||||
|
messages = all_messages[i]
|
||||||
|
body = self.get_body_to_request(messages)
|
||||||
|
bodies += [body]
|
||||||
|
return bodies
|
||||||
|
|
||||||
|
def prepare_requests(self, bodies):
|
||||||
|
reqs = []
|
||||||
|
for i in range(len(bodies)):
|
||||||
|
body = bodies[i]
|
||||||
|
req = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions", "body": body}
|
||||||
|
reqs += [req]
|
||||||
|
|
||||||
|
with open(self.file_path + "/data/batchinput.jsonl", "w", encoding="utf-8") as f:
|
||||||
|
for entry in reqs:
|
||||||
|
f.write(f"{json.dumps(entry, ensure_ascii=False)}\n")
|
||||||
|
|
||||||
|
# with open(self.file_path + "/data/batchinput.jsonl", "w") as f:
|
||||||
|
# for entry in reqs:
|
||||||
|
# f.write(f"{json.dumps(entry)}\n")
|
||||||
|
def batch_request(self):
|
||||||
|
batch_input_file = self.client.files.create(
|
||||||
|
file=open(self.file_path + "/data/batchinput.jsonl", "rb"),
|
||||||
|
purpose="batch"
|
||||||
|
)
|
||||||
|
batch_input_file_id = batch_input_file.id
|
||||||
|
|
||||||
|
batch = self.client.batches.create(
|
||||||
|
input_file_id=batch_input_file_id,
|
||||||
|
endpoint="/v1/chat/completions",
|
||||||
|
completion_window="24h",
|
||||||
|
metadata={
|
||||||
|
"description": "nightly eval job"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return batch
|
||||||
|
|
||||||
|
def check_and_get_batch_result(self, batch):
|
||||||
|
num_error = 0
|
||||||
|
for _ in range(86400):
|
||||||
|
if batch.status == "completed":
|
||||||
|
break
|
||||||
|
time.sleep(1)
|
||||||
|
try:
|
||||||
|
batch = self.client.batches.retrieve(batch.id)
|
||||||
|
num_error = 0
|
||||||
|
except Exception as _:
|
||||||
|
num_error += 1
|
||||||
|
if num_error > 10:
|
||||||
|
print("OpenAIResponder: error: batch = self.client.batches.retrieve(batch.id)")
|
||||||
|
continue
|
||||||
|
|
||||||
|
file_response = self.client.files.content(batch.output_file_id)
|
||||||
|
# file_response = self.client.files.content(batch.error_file_id)
|
||||||
|
# a = json.loads(file_response.text)
|
||||||
|
|
||||||
|
|
||||||
|
list_of_text_response = file_response.text.split("\n")
|
||||||
|
all_response = []
|
||||||
|
for r in list_of_text_response:
|
||||||
|
if r == "":
|
||||||
|
continue
|
||||||
|
one_response = json.loads(r, object_hook=lambda d: SimpleNamespace(**d))
|
||||||
|
all_response += [one_response.response.body]
|
||||||
|
|
||||||
|
file_response.stream_to_file(self.file_path + "/data/batch_output.jsonl")
|
||||||
|
|
||||||
|
return all_response
|
||||||
|
|
||||||
|
def compute_price(self, response):
|
||||||
|
prompt_tokens = response.usage.prompt_tokens
|
||||||
|
completion_tokens = response.usage.completion_tokens
|
||||||
|
price = prompt_tokens * self.price_per_input_tokens
|
||||||
|
price += completion_tokens * self.price_per_output_tokens
|
||||||
|
return price
|
||||||
|
|
||||||
|
def make_batch_request(self, all_messages, use_search=False):
|
||||||
|
bodies = self.prepare_bodies(all_messages, use_search)
|
||||||
|
self.prepare_requests(bodies)
|
||||||
|
batch = self.batch_request()
|
||||||
|
return batch
|
||||||
|
|
||||||
|
def wait_until_the_answer_is_ready(self, batch, all_messages):
|
||||||
|
all_response = self.check_and_get_batch_result(batch)
|
||||||
|
|
||||||
|
out = dict()
|
||||||
|
for i in range(len(all_response)):
|
||||||
|
response = all_response[i]
|
||||||
|
if response.choices[0].finish_reason == "stop":
|
||||||
|
content, price = self.handle_normal_response(response)
|
||||||
|
out[i] = (content, price)
|
||||||
|
|
||||||
|
all_content = [out[i][0] for i in range(len(out))]
|
||||||
|
prices = [out[i][1] for i in range(len(out))]
|
||||||
|
return all_content, prices
|
||||||
|
|
||||||
|
def run_batch(self, all_messages, use_search=False):
|
||||||
|
batch = self.make_batch_request(all_messages, use_search)
|
||||||
|
all_content, prices = self.wait_until_the_answer_is_ready(batch, all_messages)
|
||||||
|
return all_content, prices
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
def get_messages(text):
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": ""},
|
||||||
|
{"role": "user", "content": text}
|
||||||
|
]
|
||||||
|
return messages
|
||||||
|
|
||||||
|
client = OpenAI(api_key=dict(os.environ)["OPENAI_API_KEY"])
|
||||||
|
price_per_1m_input_tokens = 0.075 * 2
|
||||||
|
price_per_1m_output_tokens = 0.300 * 2
|
||||||
|
# text = "شرکت openai چیست؟"
|
||||||
|
text2 = "تو چی هستی؟"
|
||||||
|
text = "قیمت دلار چنده؟"
|
||||||
|
all_messages = [get_messages(text)]
|
||||||
|
|
||||||
|
openai_responder = OpenAIResponder(client=client, model="gpt-4o-mini", price_per_1m_input_tokens=price_per_1m_input_tokens, price_per_1m_output_tokens=price_per_1m_output_tokens)
|
||||||
|
all_content, price = openai_responder.run(get_messages(text))
|
||||||
|
# all_content, prices = openai_responder.run_batch(all_messages, use_search=True)
|
||||||
|
|
||||||
|
print("finished")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
from openai import OpenAI
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
load_dotenv()
|
||||||
|
main()
|
||||||
Loading…
x
Reference in New Issue
Block a user