first commit
This commit is contained in:
commit
83ad2b88c0
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
.env
|
||||
39
src/configuration.py
Normal file
39
src/configuration.py
Normal file
@ -0,0 +1,39 @@
|
||||
|
||||
|
||||
class Configuration:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def get_config_prompt(self, passage, character, language):
|
||||
config_prompt = f"""Given a **Passage** and **Character**, select the appropriate option from
|
||||
three fields: Character, Question_Type, Difficulty, and return the output
|
||||
in JSON format.
|
||||
First, select the Character who are likely to be interested in the Passage
|
||||
from the candidates. Then select the Question_Type that the Character
|
||||
might ask about the Passage; Finally, choose the Difficulty of the
|
||||
possible question based on the Passage, the Character, and the
|
||||
Question_Type.
|
||||
Character: Given by input **Character**
|
||||
Question_Type:
|
||||
- keywords: ...
|
||||
- acquire_knowledge: ...
|
||||
- summary: ...
|
||||
- yes_or_no: ...
|
||||
- background: ...
|
||||
Difficulty:
|
||||
- high_school: ...
|
||||
- university: ...
|
||||
- phd: ...
|
||||
Here are some examples
|
||||
<Example1> <Example2> <Example3>
|
||||
Now, generate the **output** based on the **Passage** and **Character** from
|
||||
user, the **Passage** will be in {language} language and the **Character**
|
||||
will be in English.
|
||||
,→
|
||||
,→
|
||||
Ensure to generate only the JSON output with content in English.
|
||||
**Passage**:
|
||||
{passage}
|
||||
**Character**:
|
||||
{character}"""
|
||||
return config_prompt
|
||||
189
src/openai_responder.py
Normal file
189
src/openai_responder.py
Normal file
@ -0,0 +1,189 @@
|
||||
import json
|
||||
import importlib
|
||||
import os
|
||||
import time
|
||||
from types import SimpleNamespace
|
||||
|
||||
def import_lib(file_path, module_name, package_name=None):
|
||||
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
||||
imported_file = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(imported_file)
|
||||
if package_name is not None:
|
||||
return getattr(imported_file, package_name)
|
||||
else:
|
||||
return imported_file
|
||||
|
||||
|
||||
class OpenAIResponder:
|
||||
def __init__(self,
|
||||
client,
|
||||
model="gpt-4o-mini",
|
||||
price_per_1m_input_tokens=0,
|
||||
price_per_1m_output_tokens=0) -> None:
|
||||
self.file_path = os.path.dirname(__file__)
|
||||
self.client = client
|
||||
self.model = model
|
||||
self.price_per_input_tokens = price_per_1m_input_tokens/1000000
|
||||
self.price_per_output_tokens = price_per_1m_output_tokens/1000000
|
||||
self.max_search_per_prompt = 1
|
||||
|
||||
|
||||
if not os.path.exists(self.file_path + "/data"):
|
||||
os.makedirs(self.file_path + "/data")
|
||||
|
||||
|
||||
|
||||
def get_body_to_request(self, messages, temperature):
|
||||
|
||||
body = {"model": self.model, "messages": messages,"max_tokens": 8000}
|
||||
if temperature != None:
|
||||
body["temperature"] = temperature
|
||||
return body
|
||||
|
||||
def handle_normal_response(self, response):
|
||||
content = response.choices[0].message.content
|
||||
price = self.compute_price(response)
|
||||
return content, price
|
||||
|
||||
def run(self, messages, temperature=None):
|
||||
body = self.get_body_to_request(messages, temperature)
|
||||
|
||||
response = self.client.chat.completions.create(**body)
|
||||
|
||||
if response.choices[0].finish_reason == "stop":
|
||||
content, price = self.handle_normal_response(response)
|
||||
|
||||
return content, price
|
||||
|
||||
def prepare_bodies(self, all_messages):
|
||||
bodies = []
|
||||
for i in range(len(all_messages)):
|
||||
messages = all_messages[i]
|
||||
body = self.get_body_to_request(messages)
|
||||
bodies += [body]
|
||||
return bodies
|
||||
|
||||
def prepare_requests(self, bodies):
|
||||
reqs = []
|
||||
for i in range(len(bodies)):
|
||||
body = bodies[i]
|
||||
req = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions", "body": body}
|
||||
reqs += [req]
|
||||
|
||||
with open(self.file_path + "/data/batchinput.jsonl", "w", encoding="utf-8") as f:
|
||||
for entry in reqs:
|
||||
f.write(f"{json.dumps(entry, ensure_ascii=False)}\n")
|
||||
|
||||
# with open(self.file_path + "/data/batchinput.jsonl", "w") as f:
|
||||
# for entry in reqs:
|
||||
# f.write(f"{json.dumps(entry)}\n")
|
||||
def batch_request(self):
|
||||
batch_input_file = self.client.files.create(
|
||||
file=open(self.file_path + "/data/batchinput.jsonl", "rb"),
|
||||
purpose="batch"
|
||||
)
|
||||
batch_input_file_id = batch_input_file.id
|
||||
|
||||
batch = self.client.batches.create(
|
||||
input_file_id=batch_input_file_id,
|
||||
endpoint="/v1/chat/completions",
|
||||
completion_window="24h",
|
||||
metadata={
|
||||
"description": "nightly eval job"
|
||||
}
|
||||
)
|
||||
|
||||
return batch
|
||||
|
||||
def check_and_get_batch_result(self, batch):
|
||||
num_error = 0
|
||||
for _ in range(86400):
|
||||
if batch.status == "completed":
|
||||
break
|
||||
time.sleep(1)
|
||||
try:
|
||||
batch = self.client.batches.retrieve(batch.id)
|
||||
num_error = 0
|
||||
except Exception as _:
|
||||
num_error += 1
|
||||
if num_error > 10:
|
||||
print("OpenAIResponder: error: batch = self.client.batches.retrieve(batch.id)")
|
||||
continue
|
||||
|
||||
file_response = self.client.files.content(batch.output_file_id)
|
||||
# file_response = self.client.files.content(batch.error_file_id)
|
||||
# a = json.loads(file_response.text)
|
||||
|
||||
|
||||
list_of_text_response = file_response.text.split("\n")
|
||||
all_response = []
|
||||
for r in list_of_text_response:
|
||||
if r == "":
|
||||
continue
|
||||
one_response = json.loads(r, object_hook=lambda d: SimpleNamespace(**d))
|
||||
all_response += [one_response.response.body]
|
||||
|
||||
file_response.stream_to_file(self.file_path + "/data/batch_output.jsonl")
|
||||
|
||||
return all_response
|
||||
|
||||
def compute_price(self, response):
|
||||
prompt_tokens = response.usage.prompt_tokens
|
||||
completion_tokens = response.usage.completion_tokens
|
||||
price = prompt_tokens * self.price_per_input_tokens
|
||||
price += completion_tokens * self.price_per_output_tokens
|
||||
return price
|
||||
|
||||
def make_batch_request(self, all_messages, use_search=False):
|
||||
bodies = self.prepare_bodies(all_messages, use_search)
|
||||
self.prepare_requests(bodies)
|
||||
batch = self.batch_request()
|
||||
return batch
|
||||
|
||||
def wait_until_the_answer_is_ready(self, batch, all_messages):
|
||||
all_response = self.check_and_get_batch_result(batch)
|
||||
|
||||
out = dict()
|
||||
for i in range(len(all_response)):
|
||||
response = all_response[i]
|
||||
if response.choices[0].finish_reason == "stop":
|
||||
content, price = self.handle_normal_response(response)
|
||||
out[i] = (content, price)
|
||||
|
||||
all_content = [out[i][0] for i in range(len(out))]
|
||||
prices = [out[i][1] for i in range(len(out))]
|
||||
return all_content, prices
|
||||
|
||||
def run_batch(self, all_messages, use_search=False):
|
||||
batch = self.make_batch_request(all_messages, use_search)
|
||||
all_content, prices = self.wait_until_the_answer_is_ready(batch, all_messages)
|
||||
return all_content, prices
|
||||
|
||||
|
||||
def main():
|
||||
def get_messages(text):
|
||||
messages = [
|
||||
{"role": "system", "content": ""},
|
||||
{"role": "user", "content": text}
|
||||
]
|
||||
return messages
|
||||
|
||||
client = OpenAI(api_key=dict(os.environ)["OPENAI_API_KEY"])
|
||||
price_per_1m_input_tokens = 0.075 * 2
|
||||
price_per_1m_output_tokens = 0.300 * 2
|
||||
# text = "شرکت openai چیست؟"
|
||||
text2 = "تو چی هستی؟"
|
||||
text = "قیمت دلار چنده؟"
|
||||
all_messages = [get_messages(text)]
|
||||
|
||||
openai_responder = OpenAIResponder(client=client, model="gpt-4o-mini", price_per_1m_input_tokens=price_per_1m_input_tokens, price_per_1m_output_tokens=price_per_1m_output_tokens)
|
||||
all_content, price = openai_responder.run(get_messages(text))
|
||||
# all_content, prices = openai_responder.run_batch(all_messages, use_search=True)
|
||||
|
||||
print("finished")
|
||||
|
||||
if __name__ == "__main__":
|
||||
from openai import OpenAI
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
main()
|
||||
Loading…
x
Reference in New Issue
Block a user