from hazm import Normalizer import requests import numpy as np from dotenv import load_dotenv import os load_dotenv() class TextEmbedder: def __init__(self, model_name="BAAI/bge-m3"): self.model_name = model_name self.headers = {"Content-Type": "application/json", "Authorization": f"Bearer {os.getenv('EMBEDDING_PASS')}"} self.normalizer = Normalizer() def preprocess_embedder(self, text:str): text = text.replace("\n", ".") text = self.normalizer.normalize(text) return text def embed_texts(self, texts:list[str])->list[list[float]]: """ Embed texts using the model. """ if texts == []: return [] texts = [self.preprocess_embedder(text) for text in texts] payload = { "model": self.model_name, "input": texts } responses = requests.post("http://78.38.161.78:3094/v1/embeddings", headers=self.headers, json=payload) embeddings = [np.array(response["embedding"], dtype=np.float32) for response in responses.json()["data"]] return embeddings