From 316f6f7388b4c961a074282073e6aacf62233931 Mon Sep 17 00:00:00 2001 From: azhidev Date: Wed, 15 Oct 2025 10:51:30 +0330 Subject: [PATCH] deploy --- __init__.py | 2 +- asr/README.md | 125 ++++ asr/__init__.py | 1 + asr/decode_stream.py | 80 +++ asr/run.py | 164 +++++ asr/service.py | 610 ++++++++++++++++++ requirements.txt | 31 +- setup.py | 30 +- venv310/LICENSE.txt | 25 + venv310/bin/Activate.ps1 | 247 +++++++ venv310/bin/activate | 69 ++ venv310/bin/activate.csh | 26 + venv310/bin/activate.fish | 69 ++ venv310/bin/dotenv | 8 + venv310/bin/f2py | 8 + venv310/bin/numpy-config | 8 + venv310/bin/perf_analyzer | 8 + venv310/bin/pip | 8 + venv310/bin/pip3 | 8 + venv310/bin/pip3.10 | 8 + venv310/bin/python | 1 + venv310/bin/python3 | 1 + venv310/bin/python3.10 | 1 + venv310/bin/websockets | 8 + .../site/python3.10/greenlet/greenlet.h | 164 +++++ venv310/lib64 | 1 + venv310/pyvenv.cfg | 3 + xl/README.md | 35 - xl/__init__.py | 1 - xl/main.py | 67 -- 30 files changed, 1694 insertions(+), 123 deletions(-) create mode 100644 asr/README.md create mode 100644 asr/__init__.py create mode 100644 asr/decode_stream.py create mode 100644 asr/run.py create mode 100644 asr/service.py create mode 100644 venv310/LICENSE.txt create mode 100644 venv310/bin/Activate.ps1 create mode 100644 venv310/bin/activate create mode 100644 venv310/bin/activate.csh create mode 100644 venv310/bin/activate.fish create mode 100755 venv310/bin/dotenv create mode 100755 venv310/bin/f2py create mode 100755 venv310/bin/numpy-config create mode 100755 venv310/bin/perf_analyzer create mode 100755 venv310/bin/pip create mode 100755 venv310/bin/pip3 create mode 100755 venv310/bin/pip3.10 create mode 120000 venv310/bin/python create mode 120000 venv310/bin/python3 create mode 120000 venv310/bin/python3.10 create mode 100755 venv310/bin/websockets create mode 100644 venv310/include/site/python3.10/greenlet/greenlet.h create mode 120000 venv310/lib64 create mode 100644 venv310/pyvenv.cfg delete mode 100644 xl/README.md delete mode 100644 xl/__init__.py delete mode 100644 xl/main.py diff --git a/__init__.py b/__init__.py index 82b5aa8..b98c1ea 100644 --- a/__init__.py +++ b/__init__.py @@ -1 +1 @@ -from .xl import * \ No newline at end of file +from .asr import * diff --git a/asr/README.md b/asr/README.md new file mode 100644 index 0000000..e3a24d0 --- /dev/null +++ b/asr/README.md @@ -0,0 +1,125 @@ +# Triton ASR Client + +Async Python client for sending audio to a Triton ASR service using **SSE** or **WebSocket** streaming. +It yields real-time transcription results as JSON-like events. + +--- + +## Features +- 🚀 Async SSE & WebSocket streaming modes +- 🎧 Streams audio bytes directly +- 🧩 Yields parsed dict events (partial/final text, status, errors) +- 🔧 Simple integration with Triton Inference Server + +--- + +## Requirements +- Python 3.10+ +- `tritonclient[grpc]`, `tritonclient[http]`, `websockets` + +Install: +```bash +pip install -r requirements.txt +``` + +--- + +## Environment +Set your Triton endpoint (default: `localhost:8001`): + +```bash +export TRITON_URL="your.triton.server:8001" +``` + +--- + +## Example — SSE Mode +```python +import asyncio +from .service import TritonGrpcClient +import json, os +from pathlib import Path + +TRITON_URL = os.getenv("TRITON_URL", "localhost:8001") +client = TritonGrpcClient(triton_url=TRITON_URL) + +async def asr_sse_mode(client, audio_path: Path): + print("=== SSE MODE ===") + raw = audio_path.read_bytes() + try: + async for s in client.event_stream_from_bytes(raw=raw, filename=audio_path.name): + print("[SSE]", json.dumps(s, ensure_ascii=False)) + except Exception as exc: + print("SSE error:", exc) + +asyncio.run(asr_sse_mode(client, Path("/test_audio_file.mp3"))) +``` + +--- + +## Example — WebSocket Mode +```python +import asyncio +from .service import TritonGrpcClient +import json, os +from pathlib import Path + +TRITON_URL = os.getenv("TRITON_URL", "localhost:8001") +client = TritonGrpcClient(triton_url=TRITON_URL) + +async def asr_ws_mode(client, audio_path: Path): + print("=== WS MODE ===") + raw = audio_path.read_bytes() + try: + async for s in client.event_stream_via_ws(raw=raw, filename=audio_path.name): + print("[WS]", json.dumps(s, ensure_ascii=False)) + except Exception as exc: + print("WS error:", exc) + +asyncio.run(asr_ws_mode(client, Path("/test_audio_file.mp3"))) +``` + +--- + +## Example Output +Below is a real example of SSE output while streaming an MP3 file: + +``` +[BG] Entering stream_transcript loop +[SSE] OUT: "data: {"time": 1760512211.4691734, "text": "سلام وقت", "is_final": false}\n\n" +[SSE] OUT: "data: {"time": 1760512211.55668, "text": "به‌خیر", "is_final": false}\n\n" +... +[SSE] OUT: "data: {"time": 1760512226.2526345, "text": "سلام وقت‌به‌خیر امروز درباره طراحی جدید صحبت می‌کنیم", "is_final": true}\n\n" +``` + +Each event contains: +- `time`: event timestamp +- `text`: recognized speech fragment +- `is_final`: indicates final transcript segment + +--- + +## API Overview +| Method | Description | +|---------|-------------| +| `TritonGrpcClient(triton_url)` | Create a client connected to Triton | +| `event_stream_from_bytes(raw, filename)` | SSE-based audio streaming | +| `event_stream_via_ws(raw, filename)` | WebSocket-based audio streaming | + +Each stream yields dict events like: +```python +{ + "time": 1760512211.47, + "text": "hello world", + "is_final": True +} +``` + +--- + +## Troubleshooting +- **Connection refused** → Check `TRITON_URL` and Triton server status +- **Bad event data** → Verify model/gateway returns valid JSON events +- **WS handshake failed** → Ensure the server supports WebSocket + +--- diff --git a/asr/__init__.py b/asr/__init__.py new file mode 100644 index 0000000..2762695 --- /dev/null +++ b/asr/__init__.py @@ -0,0 +1 @@ +from .service import TritonGrpcClient diff --git a/asr/decode_stream.py b/asr/decode_stream.py new file mode 100644 index 0000000..e0342d2 --- /dev/null +++ b/asr/decode_stream.py @@ -0,0 +1,80 @@ +# decode_stream.py +import time +import threading +import av +from av.audio.resampler import AudioResampler +import asyncio + + +class HLSReader: + def __init__( + self, + url: str, + *, + sample_rate: int = 48_000, + channels: int = 1, + chunk_ms: int = 20, + ): + self.url = url + self.sample_rate = sample_rate + self.channels = channels + self.chunk_ms = chunk_ms + + self._stop_evt = threading.Event() + self._thread: threading.Thread | None = None + self._loop: asyncio.AbstractEventLoop | None = None + self._queue: "asyncio.Queue[bytes] | None" = None + + # derived + self._bytes_per_sample = 2 # s16 + self._samples_per_chunk = int(self.sample_rate * (self.chunk_ms / 1000.0)) + self._chunk_bytes = self._samples_per_chunk * self.channels * self._bytes_per_sample + + def start(self, loop: asyncio.AbstractEventLoop, out_queue: "asyncio.Queue[bytes]") -> None: + """Begin reading in a daemon thread, pushing chunks onto out_queue (on 'loop').""" + self._loop = loop + self._queue = out_queue + self._thread = threading.Thread(target=self._run, name=f"hls-{id(self):x}", daemon=True) + self._thread.start() + + def stop(self) -> None: + self._stop_evt.set() + + # ---------- internal ---------- + def _run(self) -> None: + assert self._loop and self._queue + while not self._stop_evt.is_set(): + try: + container = av.open(self.url, mode="r") + audio_stream = next(s for s in container.streams if s.type == "audio") + + resampler = AudioResampler( + format="s16", + layout="mono" if self.channels == 1 else "stereo", + rate=self.sample_rate, + ) + + buf = bytearray() + for packet in container.demux(audio_stream): + if self._stop_evt.is_set(): + break + for frame in packet.decode(): + for out in resampler.resample(frame): + buf.extend(out.planes[0].to_bytes()) + while len(buf) >= self._chunk_bytes: + chunk = bytes(buf[: self._chunk_bytes]) + del buf[: self._chunk_bytes] + # push into asyncio queue on the event loop thread + self._loop.call_soon_threadsafe(self._queue.put_nowait, chunk) + + # Some HLS variants EOF → reopen + try: + container.close() + except Exception: + pass + if not self._stop_evt.is_set(): + time.sleep(0.5) + + except Exception: + # transient network/playlist issues → backoff & retry + time.sleep(1.0) diff --git a/asr/run.py b/asr/run.py new file mode 100644 index 0000000..85ec3ad --- /dev/null +++ b/asr/run.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +""" +run.py — demo runner for TritonGrpcClient service methods. + +Usage: + python run.py # auto-find an audio file in cwd and run both modes + python run.py /path/to/file # run both modes on a specific file + python run.py --mode sse # run only SSE-mode + python run.py --mode ws # run only WebSocket-mode +""" + +import argparse +import asyncio +import json +import os +import sys +from pathlib import Path +from typing import List + +# Adjust import to where your client class lives +# e.g. from triton_client import TritonGrpcClient +from .service import TritonGrpcClient + +# Small helper to find an audio file if none provided +AUDIO_EXTS = [".wav", ".mp3", ".m4a", ".flac", ".ogg", ".aac"] + + +def find_first_audio_in_cwd() -> Path | None: + cwd = Path.cwd() + for p in cwd.iterdir(): + if p.suffix.lower() in AUDIO_EXTS and p.is_file(): + return p + return None + + +# ---------- Fake WebSocket for exercising websocket_stream_from_websocket ---------- +class FakeWebSocket: + """ + Minimal fake WebSocket implementing the methods used by websocket_stream_from_websocket: + - accept() + - receive() -> dict with "bytes" or "text" + - send_json(obj) + - close() + It streams the provided bytes in small binary frames, then a JSON text frame {"event":"end"}. + """ + + def __init__(self, data: bytes, frame_size: int = 16 * 1024): + self._data = data + self._frame_size = frame_size + self._offset = 0 + self._sent_end = False + self.sent_messages: List[dict] = [] + self.closed = False + + async def accept(self): + # server expects to optionally call accept; nothing to do + print("[FakeWebSocket] accept() called") + + async def receive(self): + """ + Return one frame at a time: + - {"bytes": b"..."} while data remains + - then {"text": json.dumps({"event":"end"})} + After that, sleep forever (server won't call receive again in your code). + """ + if self._offset < len(self._data): + end = min(len(self._data), self._offset + self._frame_size) + chunk = self._data[self._offset : end] + self._offset = end + # mimic the WebSocket dict shape used in your code + return {"bytes": chunk} + if not self._sent_end: + self._sent_end = True + return {"text": json.dumps({"event": "end"})} + # Block a bit — server should have stopped receiving after 'end' + await asyncio.sleep(3600) + return {} + + async def send_json(self, obj): + # server sends results here; capture & print + self.sent_messages.append(obj) + print("[FakeWebSocket] send_json:", json.dumps(obj, ensure_ascii=False)) + + async def close(self): + self.closed = True + print("[FakeWebSocket] close() called") + + +# ---------- Demo runners ---------- +async def asr_sse_mode(client: TritonGrpcClient, audio_path: Path): + print("\n=== SSE MODE (event_stream_from_bytes) ===") + raw = audio_path.read_bytes() + # event_stream_from_bytes returns an async generator — iterate it + try: + async for s in client.event_stream_from_bytes(raw=raw, filename=audio_path.name): + # s is already a dict-like object emitted by your SSE generator + print("[SSE] OUT:", json.dumps(s, ensure_ascii=False)) + except Exception as exc: + print("[SSE] Exception while streaming SSE:", exc) + + +async def asr_ws_mode(client: TritonGrpcClient, audio_path: Path): + print("\n=== WEBSOCKET MODE (websocket_stream_from_websocket) ===") + raw = audio_path.read_bytes() + fake_ws = FakeWebSocket(raw, frame_size=16 * 1024) + + # Run the server-side websocket handler. It will call fake_ws.receive() and fake_ws.send_json() + try: + await client.websocket_stream_from_websocket(fake_ws, filename=audio_path.name) + except Exception as exc: + print("[WS] Exception while running websocket_stream_from_websocket:", exc) + finally: + print("[WS] Collected messages from server (send_json calls):") + for i, m in enumerate(fake_ws.sent_messages, 1): + print(f" [{i}] {json.dumps(m, ensure_ascii=False)}") + + +async def main_async(audio_path: Path, modes: List[str]): + client = TritonGrpcClient() # init; if your client needs args, adjust here + + if "sse" in modes: + await asr_sse_mode(client, audio_path) + + if "ws" in modes: + await asr_ws_mode(client, audio_path) + + +def parse_args(): + p = argparse.ArgumentParser() + p.add_argument("file", nargs="?", help="audio file path (optional). If omitted, searches cwd.") + p.add_argument( + "--mode", + choices=["sse", "ws", "both"], + default="both", + help="Which method(s) to run against the service", + ) + return p.parse_args() + + +def main(): + args = parse_args() + if args.file: + audio_path = Path(args.file) + if not audio_path.exists(): + print("Audio file does not exist:", audio_path) + sys.exit(2) + else: + found = find_first_audio_in_cwd() + if not found: + print("No audio file found in cwd. Place an audio file (wav/mp3/m4a/flac/ogg) here or pass a path.") + sys.exit(2) + audio_path = found + + modes = ["sse", "ws"] if args.mode == "both" else [args.mode] + print(f"Using audio file: {audio_path} — running modes: {modes}") + + try: + asyncio.run(main_async(audio_path, modes)) + except KeyboardInterrupt: + print("Interrupted.") + + +if __name__ == "__main__": + main() diff --git a/asr/service.py b/asr/service.py new file mode 100644 index 0000000..702d155 --- /dev/null +++ b/asr/service.py @@ -0,0 +1,610 @@ +# clients/triton_grpc_client.py +from __future__ import annotations + +import os, uuid, time, json, asyncio, tempfile +from pathlib import Path +from contextlib import asynccontextmanager, suppress +from typing import AsyncGenerator, List, Tuple, Optional, Callable, Awaitable, TYPE_CHECKING + +import numpy as np +from pydub import AudioSegment +import tritonclient.grpc.aio as grpcclient +from tritonclient.utils import np_to_triton_dtype +import websockets +from dotenv import load_dotenv + +load_dotenv() + +# if TYPE_CHECKING: +# from fastapi import WebSocket +# from starlette.websockets import WebSocketDisconnect + +# ---- constants (same as your code) ---- +SAMPLE_RATE_HZ = 16_000 +INPUT_WAV_TENSOR = "WAV" +INPUT_LEN_TENSOR = "WAV_LENS" +OUTPUT_TEXT_TENSOR = "TRANSCRIPTS" +MODEL_NAME = "transducer" +ZERO_PAD_REQUEST_CONTENT = True +TRITON_URL = os.getenv("TRITON_URL") + + +class _WebsocketsAdapter: + """ + Adapter to make a `websockets` WebSocketServerProtocol behave like + a Starlette/FastAPI WebSocket for the shape your code expects: + - accept() + - receive() -> dict with "bytes" or "text" + - send_json(obj) + - close() + """ + + def __init__(self, ws): + self._ws = ws + self._is_server_protocol = True # semantic flag + + async def accept(self): + # websockets server does its own accept when created; no-op here + return + + async def receive(self): + # websockets.recv() returns bytes or str + data = await self._ws.recv() + if isinstance(data, bytes): + return {"bytes": data} + else: + return {"text": data} + + async def send_json(self, obj): + import json + + await self._ws.send(json.dumps(obj, ensure_ascii=False)) + + async def close(self): + await self._ws.close() + + +class TritonGrpcClient: + def __init__(self, triton_url: str): + self.triton_url = triton_url + + """ + Owns only: audio -> chunking -> Triton -> queue -> streaming. + Keeps producing even if the HTTP/WS request disconnects. + MinIO/DB/file finalize stays outside via callbacks. + """ + + # ---------- SSE: return async generator ---------- + def event_stream_from_bytes( + self, + *, + raw: bytes, + filename: str, + content_type: Optional[str] = None, + on_final_text: Optional[Callable[[str], Awaitable[None]]] = None, + on_error: Optional[Callable[[str], Awaitable[None]]] = None, + ) -> AsyncGenerator[str, None]: + async def _gen() -> AsyncGenerator[str, None]: + if not raw: + raise ValueError("Uploaded file is empty") + + tmp_path = self._write_temp_file(raw, filename) + queue: asyncio.Queue[dict[str, str | None]] = asyncio.Queue() + drop_event = asyncio.Event() # set on disconnect → stop emitting only + + # spawn producer and DO NOT await it here; it lives independently + asyncio.create_task( + self._produce_transcripts(tmp_path, queue, on_final_text, on_error, drop_event), + name=f"triton-producer-{uuid.uuid4().hex[:8]}", + ) + + print("[SSE] Client connected", flush=True) + try: + while True: + msg = await queue.get() + if msg.get("event") == "done": + print("[SSE] Job finished → closing stream", flush=True) + break + yield f"data: {json.dumps(msg, ensure_ascii=False)}\n\n" + except asyncio.CancelledError: + # request closed; keep producer alive, just stop emitting + print("[SSE] Client disconnected (background continues)", flush=True) + drop_event.set() + return + finally: + print("[SSE] event_stream END", flush=True) + + return _gen() + + # ---------- WebSocket: same producer, push over WS ---------- + @asynccontextmanager + async def _open_triton(self): + client = grpcclient.InferenceServerClient(self.triton_url, verbose=False) + try: + yield client + finally: + with suppress(Exception): + await client.close() + + async def _produce_transcripts( + self, + tmp_path: Path, + queue: "asyncio.Queue[dict[str, str | None]]", + on_final_text: Optional[Callable[[str], Awaitable[None]]], + on_error: Optional[Callable[[str], Awaitable[None]]], + drop_event: asyncio.Event, + ) -> None: + print("[BG] Started producer", flush=True) + last_msg: dict[str, str] | None = None + + try: + print("[BG] Entering stream_transcript loop", flush=True) + async for last_msg in self._stream_transcript(str(tmp_path)): + if not drop_event.is_set(): + await queue.put(last_msg) + + print("[BG] stream_transcript finished", flush=True) + final_text = (last_msg or {}).get("text", "").strip() + + if on_final_text: + with suppress(Exception): + await on_final_text(final_text) + + except Exception as exc: + print(f"[BG] EXCEPTION: {exc!r}", flush=True) + if on_error: + with suppress(Exception): + await on_error(str(exc)) + if not drop_event.is_set(): + await queue.put({"event": "error", "detail": str(exc)}) + finally: + print("[BG] Cleaning up temp file", flush=True) + with suppress(FileNotFoundError): + tmp_path.unlink(missing_ok=True) + + if not drop_event.is_set(): + await queue.put({"event": "done"}) + print("[BG] producer END", flush=True) + + # Replace your existing websocket_stream_from_websocket with this version + async def websocket_stream_from_websocket( + self, + websocket, + *, + filename: str = "stream", + content_type: Optional[str] = None, + on_final_text: Optional[Callable[..., Awaitable[None]]] = None, + on_error: Optional[Callable[[str], Awaitable[None]]] = None, + accept_in_client: bool = False, + ) -> None: + """ + Supports both: + - Starlette/FastAPI WebSocket objects (they expose .receive(), .send_json(), .accept()) + - `websockets` WebSocketServerProtocol (they expose .recv(), .send(), .close()) + The adapter wraps the latter so the rest of your existing logic can stay unchanged. + """ + # Lazy import here so file-level imports need not change + import json + import uuid + import time + import asyncio + import tempfile + from contextlib import suppress + + # If this is a websockets (WebSocketServerProtocol) instance, wrap it + # Heuristic: Starlette has .receive(); websockets has .recv() + if not hasattr(websocket, "receive") and hasattr(websocket, "recv"): + websocket = _WebsocketsAdapter(websocket) + + # If caller requested server-side accept for ASGI websockets, call it. + # For websockets adapter accept() is a no-op. + if accept_in_client: + # In FastAPI typical pattern is server calls accept(); keep that behavior + await websocket.accept() + + # We'll append all raw bytes to a temp file so the endpoint can upload later in the finalizer + src_suffix = Path(filename).suffix or ".bin" + src_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}{src_suffix}" + f_src = src_path.open("wb") + + # Background pipeline control + queue: asyncio.Queue[dict[str, str | None]] = asyncio.Queue() + drop_event = asyncio.Event() # if client disconnects, stop SENDING but keep producing + + async def _call_final_cb(final_text: str) -> None: + if not on_final_text: + return + try: + # Try callback(final_text, source_audio_path) first, fallback to (final_text) + if on_final_text.__code__.co_argcount >= 2: + await on_final_text(final_text, src_path) + else: + await on_final_text(final_text) + except Exception: + pass + + async def recv_frames_and_transcode(proc): + """ + Read frames using the (possibly-adapted) `websocket.receive()` and write to ffmpeg stdin; + close stdin when we get {"event":"end"} or disconnect. + """ + try: + while True: + msg = await websocket.receive() + if "bytes" in msg and msg["bytes"] is not None: + chunk = msg["bytes"] + f_src.write(chunk) + proc.stdin.write(chunk) # type: ignore[attr-defined] + await proc.stdin.drain() # type: ignore[attr-defined] + elif "text" in msg and msg["text"] is not None: + try: + payload = json.loads(msg["text"]) + if payload.get("event") == "end": + break + except Exception: + # ignore non-JSON text frames + pass + else: + # ignore pings/other control frames + await asyncio.sleep(0) + except Exception: + # If underlying connection closed abruptly, just stop receiving + pass + finally: + with suppress(Exception): + f_src.flush() + f_src.close() + with suppress(Exception): + # Some implementations use close(); others have no stdin to close + proc.stdin.close() # type: ignore[attr-defined] + + async def transcribe_from_ffmpeg_stdout(proc): + """ + Read float32 PCM from ffmpeg stdout, chunk on-the-fly, call Triton per chunk, + and push partial/final messages to the queue. + """ + # Triton session + chunk sizes + async with self._open_triton() as client: + first_sz, chunk_sz = await self._get_chunk_sizes(client, MODEL_NAME) + seq_id = uuid.uuid4().int & 0x7FFF_FFFF_FFFF_FFFF + full_tx = "" + have_sent_any = False + need = first_sz # samples needed for the next chunk + + # PCM sample buffer (float32) + buf = np.empty(0, dtype=np.float32) + + async def infer_one(raw: np.ndarray, eff_len: int, is_first: bool, is_last: bool): + nonlocal full_tx + wav_np = raw[None, :] # (1, T) + len_np = np.array([[eff_len]], np.int32) + inp_wav = grpcclient.InferInput(INPUT_WAV_TENSOR, wav_np.shape, np_to_triton_dtype(np.float32)) + inp_len = grpcclient.InferInput(INPUT_LEN_TENSOR, len_np.shape, np_to_triton_dtype(np.int32)) + inp_wav.set_data_from_numpy(wav_np) + inp_len.set_data_from_numpy(len_np) + outs = [grpcclient.InferRequestedOutput(OUTPUT_TEXT_TENSOR)] + + resp = await client.infer( + MODEL_NAME, + inputs=[inp_wav, inp_len], + outputs=outs, + sequence_id=seq_id, + sequence_start=is_first, + sequence_end=is_last, + ) + txt = b" ".join(resp.as_numpy(OUTPUT_TEXT_TENSOR)).decode().strip() + if not txt: + return None + delta = txt[len(full_tx) :] if txt.startswith(full_tx) else txt + full_tx = txt + return delta or None + + # Read ffmpeg stdout in bytes; convert to float32 + try: + while True: + chunk = await proc.stdout.read(8192 * 4) # 8192 samples (float32) + if not chunk: + break + # append decoded samples + new = np.frombuffer(chunk, dtype=np.float32) + if new.size == 0: + continue + if buf.size == 0: + buf = new + else: + buf = np.concatenate((buf, new), axis=0) + + # while enough samples for the next piece, send it + while buf.size >= need: + take = need + piece = buf[:take] + buf = buf[take:] + is_first = not have_sent_any + have_sent_any = True + + delta = await infer_one(piece, take, is_first=is_first, is_last=False) + if delta and not drop_event.is_set(): + await queue.put({"time": time.time(), "text": delta, "is_final": False}) + + # after first, normal chunk size + need = chunk_sz + + finally: + # End of stream: flush any remainder and a zero-length piece if configured + if buf.size > 0: + # pad to chunk_sz for model framing, but eff_len is the real (short) length + eff = int(buf.size) + pad = np.zeros(chunk_sz, dtype=np.float32) + pad[:eff] = buf[:eff] + delta = await infer_one(pad, eff, is_first=not have_sent_any, is_last=False) + if delta and not drop_event.is_set(): + await queue.put({"time": time.time(), "text": delta, "is_final": False}) + have_sent_any = True + + if ZERO_PAD_REQUEST_CONTENT: + # zero-length "flush" chunk + zero = np.zeros(0, dtype=np.float32) + await infer_one(zero, 0, is_first=not have_sent_any, is_last=True) + else: + # If not sending the explicit flush, still mark last by sending empty with last=True + zero = np.zeros(0, dtype=np.float32) + await infer_one(zero, 0, is_first=False, is_last=True) + + # Emit final full transcript as a message, like your file-based path + if not drop_event.is_set(): + await queue.put({"time": time.time(), "text": full_tx.strip(), "is_final": True}) + + # Call user finalizer + await _call_final_cb(full_tx.strip()) + + # signal done to sender + if not drop_event.is_set(): + await queue.put({"event": "done"}) + + async def send_messages(): + HEARTBEAT_SECS = 10 + print("[WS] Client connected", flush=True) + try: + while True: + try: + # wait for a real message up to HEARTBEAT_SECS + msg = await asyncio.wait_for(queue.get(), timeout=HEARTBEAT_SECS) + except asyncio.TimeoutError: + # no real message → send heartbeat + hb = {"event": "heartbeat", "t": time.time()} + await websocket.send_json(hb) + continue + + if msg.get("event") == "done": + print("[WS] Job finished → closing socket", flush=True) + break + + await websocket.send_json(msg) + + except WebSocketDisconnect: + print("[WS] Client disconnected (background continues)", flush=True) + drop_event.set() + except asyncio.CancelledError: + print("[WS] Client cancelled (background continues)", flush=True) + drop_event.set() + finally: + with suppress(Exception): + await websocket.close() + print("[WS] send_messages END", flush=True) + + # The remainder of this function is unchanged from your original implementation: + # - transcribe_from_ffmpeg_stdout(proc) + # - send_messages() + # - ffmpeg process startup + # - task creation + gather + cleanup + # + # To keep this patch minimal I re-use your original nested function bodies as-is. + # Copy the original nested functions transcribe_from_ffmpeg_stdout and send_messages + # exactly as you had them (they will reference `websocket`, `queue`, `drop_event`, + # `_open_triton`, etc.). Below I paste them unchanged but with no modifications so + # you can drop them in place. + # + # ---- paste your original nested functions here exactly (no changes) ---- + + # ----- Start ffmpeg (stdin bytes -> stdout float32 PCM @ 16k mono) + proc = await asyncio.create_subprocess_exec( + "ffmpeg", + "-hide_banner", + "-loglevel", + "error", + "-i", + "pipe:0", + "-ac", + "1", + "-ar", + str(SAMPLE_RATE_HZ), + "-f", + "f32le", + "pipe:1", + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + async def drain_ffmpeg_stderr(proc): + try: + while True: + line = await proc.stderr.readline() + if not line: + break + # You can log it if needed: + # print(f"[ffmpeg] {line.decode().rstrip()}", flush=True) + await asyncio.sleep(0) + except asyncio.CancelledError: + pass + + stderr_task = asyncio.create_task(drain_ffmpeg_stderr(proc), name="ffmpeg-stderr") + + # Run 3 tasks concurrently: + # - recv frames & feed ffmpeg + # - read pcm & call Triton + # - send queue messages to client + # Note: `transcribe_from_ffmpeg_stdout` and `send_messages` are your original nested functions. + # Keep them unchanged and ensure they reference `websocket` only for send_json/receive (adapter handles it). + tasks = [ + asyncio.create_task(recv_frames_and_transcode(proc), name="ws-recv"), + asyncio.create_task(transcribe_from_ffmpeg_stdout(proc), name="ws-triton"), + asyncio.create_task(send_messages(), name="ws-send"), + stderr_task, + ] + + try: + await asyncio.gather(*tasks) + finally: + for t in tasks: + if not t.done(): + t.cancel() + with suppress(Exception): + proc.terminate() + with suppress(Exception): + await proc.wait() + with suppress(Exception): + if not f_src.closed: + f_src.close() + + @staticmethod + def _write_temp_file(raw: bytes, filename: str) -> Path: + tmp = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}{Path(filename).suffix}" + tmp.write_bytes(raw) + print(f"[REQ] Temp file written at {tmp}", flush=True) + return tmp + + @staticmethod + def _pcm_int16_to_float32(x: np.ndarray) -> np.ndarray: + return (x.astype(np.float32) / 32768.0).clip(-1.0, 1.0) + + def _load_audio(self, path: str, target_sr: int = SAMPLE_RATE_HZ): + audio = AudioSegment.from_file(path) + audio = audio.set_channels(1).set_frame_rate(target_sr).set_sample_width(2) + pcm_int16 = np.frombuffer(audio.raw_data, dtype=np.int16) + return pcm_int16, target_sr + + async def _get_chunk_sizes(self, client: grpcclient.InferenceServerClient, model: str) -> Tuple[int, int]: + try: + cfg = await client.get_model_config(model, as_json=True) + params = {p["key"]: p["value"]["string_param"] for p in cfg.get("parameters", [])} + first_sec = float(params.get("chunk_size_first", params.get("chunk_size", "0.465"))) + norm_sec = float(params.get("chunk_size", "0.32")) + except Exception: + first_sec, norm_sec = 0.465, 0.32 + return int(first_sec * SAMPLE_RATE_HZ), int(norm_sec * SAMPLE_RATE_HZ) + + async def _stream_transcript(self, path: str) -> AsyncGenerator[dict, None]: + """ + Read audio from `path`, chunk it and stream to Triton, yielding partial/final messages. + + Yields dicts of the form: + {"time": , "text": "", "is_final": False} + and finally: + {"time": , "text": "", "is_final": True} + """ + import time + import uuid + from contextlib import suppress + + # Load audio (int16 PCM) and convert to float32 in [-1, 1] + pcm_int16, sr = self._load_audio(path, target_sr=SAMPLE_RATE_HZ) + if pcm_int16.size == 0: + return + + wav = self._pcm_int16_to_float32(pcm_int16) # float32 1-D numpy array + + # Triton client + chunk sizes + async with self._open_triton() as client: + first_sz, chunk_sz = await self._get_chunk_sizes(client, MODEL_NAME) + seq_id = uuid.uuid4().int & 0x7FFF_FFFF_FFFF_FFFF + + full_tx = "" # accumulated full transcript text + have_sent_any = False + + async def infer_one(raw: np.ndarray, eff_len: int, is_first: bool, is_last: bool): + """ + Send one request to Triton and return the delta text (or None). + raw: 1-D float32 numpy array length == raw.shape[0] (should be >= eff_len; padded if needed) + eff_len: number of valid samples in raw (int) + """ + nonlocal full_tx + wav_np = raw[None, :] # shape (1, T) + len_np = np.array([[eff_len]], np.int32) + inp_wav = grpcclient.InferInput(INPUT_WAV_TENSOR, wav_np.shape, np_to_triton_dtype(np.float32)) + inp_len = grpcclient.InferInput(INPUT_LEN_TENSOR, len_np.shape, np_to_triton_dtype(np.int32)) + inp_wav.set_data_from_numpy(wav_np) + inp_len.set_data_from_numpy(len_np) + outs = [grpcclient.InferRequestedOutput(OUTPUT_TEXT_TENSOR)] + + resp = await client.infer( + MODEL_NAME, + inputs=[inp_wav, inp_len], + outputs=outs, + sequence_id=seq_id, + sequence_start=is_first, + sequence_end=is_last, + ) + # join parts if model returned multiple tensors; decode bytes -> str + txt = b" ".join(resp.as_numpy(OUTPUT_TEXT_TENSOR)).decode().strip() + if not txt: + return None + delta = txt[len(full_tx) :] if txt.startswith(full_tx) else txt + full_tx = txt + return delta or None + + # iterate over wav in chunks + T = wav.shape[0] + offset = 0 + need = first_sz + # If first_sz is 0 for some reason, fall back to chunk_sz + if need <= 0: + need = chunk_sz + + try: + while offset < T: + take = min(need, T - offset) + piece = wav[offset : offset + take] + + # If piece is shorter than need (shouldn't happen except maybe for first), + # pad to `need` as model may expect framing; eff_len tracks real length. + eff = int(piece.size) + if eff < need: + pad = np.zeros(need, dtype=np.float32) + pad[:eff] = piece + to_send = pad + else: + to_send = piece + + is_first = not have_sent_any + have_sent_any = True + + # Not last unless remaining after this is zero + is_last = False + + delta = await infer_one(to_send, eff, is_first=is_first, is_last=is_last) + if delta: + yield {"time": time.time(), "text": delta, "is_final": False} + + offset += take + need = chunk_sz # after first, normal chunk size + + # End of stream: if there is leftover silence/padding behavior handled above. + # After all real chunks sent, optionally flush remainder (zero-length) as last=True. + if ZERO_PAD_REQUEST_CONTENT: + # send an explicit flush chunk (zero-length) with sequence_end=True + zero = np.zeros(0, dtype=np.float32) + # When sending zero-length, eff_len = 0. Mark is_first True only if never sent anything. + await infer_one(zero, 0, is_first=(not have_sent_any), is_last=True) + else: + # send empty with last=True but not as first + zero = np.zeros(0, dtype=np.float32) + await infer_one(zero, 0, is_first=False, is_last=True) + + # Emit final full transcript + yield {"time": time.time(), "text": full_tx.strip(), "is_final": True} + + finally: + # nothing specific to clean in this generator; Triton client closed by context manager + with suppress(Exception): + pass diff --git a/requirements.txt b/requirements.txt index 24d5b06..a365e65 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,29 @@ -et_xmlfile==2.0.0 -openpyxl==3.1.5 +aiohappyeyeballs==2.6.1 +aiohttp==3.13.0 +aiosignal==1.4.0 +async-timeout==5.0.1 +attrs==25.4.0 +Brotli==1.1.0 +certifi==2025.10.5 +frozenlist==1.8.0 +gevent==25.9.1 +geventhttpclient==2.3.4 +greenlet==3.2.4 +grpcio==1.67.1 +idna==3.11 +multidict==6.7.0 +numpy==2.2.6 +packaging==25.0 +perf-analyzer==2.59.1 +propcache==0.4.1 +protobuf==5.29.5 +pydub==0.25.1 +python-dotenv==1.1.1 +python-rapidjson==1.21 +tritonclient==2.61.0 +typing_extensions==4.15.0 +urllib3==2.5.0 +websockets==15.0.1 +yarl==1.22.0 +zope.event==6.0 +zope.interface==8.0.1 diff --git a/setup.py b/setup.py index 32ed44b..510db5d 100644 --- a/setup.py +++ b/setup.py @@ -1,27 +1,23 @@ -# setup.py - from setuptools import setup, find_packages -import os -# Read requirements from requirements.txt with open("requirements.txt") as f: requirements = f.read().splitlines() setup( - name='bi_utils', - version='0.1.0', - description='A Bi Utils Code', + name="asr_client", + version="0.1.0", + description="extract text from audio file", long_description_content_type="text/markdown", - author='BI', - author_email='BI@bi.ir', - url='https://git.d.aiengines.ir/bi/bi_utils', # Replace with your repository URL + author="BI", + author_email="BI@bi.ir", + url="https://git.d.aiengines.ir/bi/asr_triton_client.git", packages=find_packages(), - install_requires=requirements, # Use the requirements from the file - include_package_data=True, # Ensure non-code files are included if necessary + install_requires=requirements, + include_package_data=True, classifiers=[ - 'Programming Language :: Python :: 3', - 'License :: OSI Approved :: MIT License', # Change if using a different license - 'Operating System :: OS Independent', + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", ], - python_requires='>=3.7', -) \ No newline at end of file + python_requires=">=3.10", +) diff --git a/venv310/LICENSE.txt b/venv310/LICENSE.txt new file mode 100644 index 0000000..b35a534 --- /dev/null +++ b/venv310/LICENSE.txt @@ -0,0 +1,25 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/venv310/bin/Activate.ps1 b/venv310/bin/Activate.ps1 new file mode 100644 index 0000000..b49d77b --- /dev/null +++ b/venv310/bin/Activate.ps1 @@ -0,0 +1,247 @@ +<# +.Synopsis +Activate a Python virtual environment for the current PowerShell session. + +.Description +Pushes the python executable for a virtual environment to the front of the +$Env:PATH environment variable and sets the prompt to signify that you are +in a Python virtual environment. Makes use of the command line switches as +well as the `pyvenv.cfg` file values present in the virtual environment. + +.Parameter VenvDir +Path to the directory that contains the virtual environment to activate. The +default value for this is the parent of the directory that the Activate.ps1 +script is located within. + +.Parameter Prompt +The prompt prefix to display when this virtual environment is activated. By +default, this prompt is the name of the virtual environment folder (VenvDir) +surrounded by parentheses and followed by a single space (ie. '(.venv) '). + +.Example +Activate.ps1 +Activates the Python virtual environment that contains the Activate.ps1 script. + +.Example +Activate.ps1 -Verbose +Activates the Python virtual environment that contains the Activate.ps1 script, +and shows extra information about the activation as it executes. + +.Example +Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv +Activates the Python virtual environment located in the specified location. + +.Example +Activate.ps1 -Prompt "MyPython" +Activates the Python virtual environment that contains the Activate.ps1 script, +and prefixes the current prompt with the specified string (surrounded in +parentheses) while the virtual environment is active. + +.Notes +On Windows, it may be required to enable this Activate.ps1 script by setting the +execution policy for the user. You can do this by issuing the following PowerShell +command: + +PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser + +For more information on Execution Policies: +https://go.microsoft.com/fwlink/?LinkID=135170 + +#> +Param( + [Parameter(Mandatory = $false)] + [String] + $VenvDir, + [Parameter(Mandatory = $false)] + [String] + $Prompt +) + +<# Function declarations --------------------------------------------------- #> + +<# +.Synopsis +Remove all shell session elements added by the Activate script, including the +addition of the virtual environment's Python executable from the beginning of +the PATH variable. + +.Parameter NonDestructive +If present, do not remove this function from the global namespace for the +session. + +#> +function global:deactivate ([switch]$NonDestructive) { + # Revert to original values + + # The prior prompt: + if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) { + Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt + Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT + } + + # The prior PYTHONHOME: + if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) { + Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME + Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME + } + + # The prior PATH: + if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) { + Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH + Remove-Item -Path Env:_OLD_VIRTUAL_PATH + } + + # Just remove the VIRTUAL_ENV altogether: + if (Test-Path -Path Env:VIRTUAL_ENV) { + Remove-Item -Path env:VIRTUAL_ENV + } + + # Just remove VIRTUAL_ENV_PROMPT altogether. + if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) { + Remove-Item -Path env:VIRTUAL_ENV_PROMPT + } + + # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether: + if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) { + Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force + } + + # Leave deactivate function in the global namespace if requested: + if (-not $NonDestructive) { + Remove-Item -Path function:deactivate + } +} + +<# +.Description +Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the +given folder, and returns them in a map. + +For each line in the pyvenv.cfg file, if that line can be parsed into exactly +two strings separated by `=` (with any amount of whitespace surrounding the =) +then it is considered a `key = value` line. The left hand string is the key, +the right hand is the value. + +If the value starts with a `'` or a `"` then the first and last character is +stripped from the value before being captured. + +.Parameter ConfigDir +Path to the directory that contains the `pyvenv.cfg` file. +#> +function Get-PyVenvConfig( + [String] + $ConfigDir +) { + Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg" + + # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue). + $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue + + # An empty map will be returned if no config file is found. + $pyvenvConfig = @{ } + + if ($pyvenvConfigPath) { + + Write-Verbose "File exists, parse `key = value` lines" + $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath + + $pyvenvConfigContent | ForEach-Object { + $keyval = $PSItem -split "\s*=\s*", 2 + if ($keyval[0] -and $keyval[1]) { + $val = $keyval[1] + + # Remove extraneous quotations around a string value. + if ("'""".Contains($val.Substring(0, 1))) { + $val = $val.Substring(1, $val.Length - 2) + } + + $pyvenvConfig[$keyval[0]] = $val + Write-Verbose "Adding Key: '$($keyval[0])'='$val'" + } + } + } + return $pyvenvConfig +} + + +<# Begin Activate script --------------------------------------------------- #> + +# Determine the containing directory of this script +$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition +$VenvExecDir = Get-Item -Path $VenvExecPath + +Write-Verbose "Activation script is located in path: '$VenvExecPath'" +Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)" +Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)" + +# Set values required in priority: CmdLine, ConfigFile, Default +# First, get the location of the virtual environment, it might not be +# VenvExecDir if specified on the command line. +if ($VenvDir) { + Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values" +} +else { + Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir." + $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/") + Write-Verbose "VenvDir=$VenvDir" +} + +# Next, read the `pyvenv.cfg` file to determine any required value such +# as `prompt`. +$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir + +# Next, set the prompt from the command line, or the config file, or +# just use the name of the virtual environment folder. +if ($Prompt) { + Write-Verbose "Prompt specified as argument, using '$Prompt'" +} +else { + Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value" + if ($pyvenvCfg -and $pyvenvCfg['prompt']) { + Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'" + $Prompt = $pyvenvCfg['prompt']; + } + else { + Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)" + Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'" + $Prompt = Split-Path -Path $venvDir -Leaf + } +} + +Write-Verbose "Prompt = '$Prompt'" +Write-Verbose "VenvDir='$VenvDir'" + +# Deactivate any currently active virtual environment, but leave the +# deactivate function in place. +deactivate -nondestructive + +# Now set the environment variable VIRTUAL_ENV, used by many tools to determine +# that there is an activated venv. +$env:VIRTUAL_ENV = $VenvDir + +if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) { + + Write-Verbose "Setting prompt to '$Prompt'" + + # Set the prompt to include the env name + # Make sure _OLD_VIRTUAL_PROMPT is global + function global:_OLD_VIRTUAL_PROMPT { "" } + Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT + New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt + + function global:prompt { + Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) " + _OLD_VIRTUAL_PROMPT + } + $env:VIRTUAL_ENV_PROMPT = $Prompt +} + +# Clear PYTHONHOME +if (Test-Path -Path Env:PYTHONHOME) { + Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME + Remove-Item -Path Env:PYTHONHOME +} + +# Add the venv to the PATH +Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH +$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH" diff --git a/venv310/bin/activate b/venv310/bin/activate new file mode 100644 index 0000000..3fbe35d --- /dev/null +++ b/venv310/bin/activate @@ -0,0 +1,69 @@ +# This file must be used with "source bin/activate" *from bash* +# you cannot run it directly + +deactivate () { + # reset old environment variables + if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then + PATH="${_OLD_VIRTUAL_PATH:-}" + export PATH + unset _OLD_VIRTUAL_PATH + fi + if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then + PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}" + export PYTHONHOME + unset _OLD_VIRTUAL_PYTHONHOME + fi + + # This should detect bash and zsh, which have a hash command that must + # be called to get it to forget past commands. Without forgetting + # past commands the $PATH changes we made may not be respected + if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then + hash -r 2> /dev/null + fi + + if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then + PS1="${_OLD_VIRTUAL_PS1:-}" + export PS1 + unset _OLD_VIRTUAL_PS1 + fi + + unset VIRTUAL_ENV + unset VIRTUAL_ENV_PROMPT + if [ ! "${1:-}" = "nondestructive" ] ; then + # Self destruct! + unset -f deactivate + fi +} + +# unset irrelevant variables +deactivate nondestructive + +VIRTUAL_ENV=/home/azhidev/bagher/packages/asr_triton_client/venv310 +export VIRTUAL_ENV + +_OLD_VIRTUAL_PATH="$PATH" +PATH="$VIRTUAL_ENV/"bin":$PATH" +export PATH + +# unset PYTHONHOME if set +# this will fail if PYTHONHOME is set to the empty string (which is bad anyway) +# could use `if (set -u; : $PYTHONHOME) ;` in bash +if [ -n "${PYTHONHOME:-}" ] ; then + _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}" + unset PYTHONHOME +fi + +if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then + _OLD_VIRTUAL_PS1="${PS1:-}" + PS1='(venv310) '"${PS1:-}" + export PS1 + VIRTUAL_ENV_PROMPT='(venv310) ' + export VIRTUAL_ENV_PROMPT +fi + +# This should detect bash and zsh, which have a hash command that must +# be called to get it to forget past commands. Without forgetting +# past commands the $PATH changes we made may not be respected +if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then + hash -r 2> /dev/null +fi diff --git a/venv310/bin/activate.csh b/venv310/bin/activate.csh new file mode 100644 index 0000000..c7dae4e --- /dev/null +++ b/venv310/bin/activate.csh @@ -0,0 +1,26 @@ +# This file must be used with "source bin/activate.csh" *from csh*. +# You cannot run it directly. +# Created by Davide Di Blasi . +# Ported to Python 3.3 venv by Andrew Svetlov + +alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate' + +# Unset irrelevant variables. +deactivate nondestructive + +setenv VIRTUAL_ENV /home/azhidev/bagher/packages/asr_triton_client/venv310 + +set _OLD_VIRTUAL_PATH="$PATH" +setenv PATH "$VIRTUAL_ENV/"bin":$PATH" + + +set _OLD_VIRTUAL_PROMPT="$prompt" + +if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then + set prompt = '(venv310) '"$prompt" + setenv VIRTUAL_ENV_PROMPT '(venv310) ' +endif + +alias pydoc python -m pydoc + +rehash diff --git a/venv310/bin/activate.fish b/venv310/bin/activate.fish new file mode 100644 index 0000000..0edb751 --- /dev/null +++ b/venv310/bin/activate.fish @@ -0,0 +1,69 @@ +# This file must be used with "source /bin/activate.fish" *from fish* +# (https://fishshell.com/); you cannot run it directly. + +function deactivate -d "Exit virtual environment and return to normal shell environment" + # reset old environment variables + if test -n "$_OLD_VIRTUAL_PATH" + set -gx PATH $_OLD_VIRTUAL_PATH + set -e _OLD_VIRTUAL_PATH + end + if test -n "$_OLD_VIRTUAL_PYTHONHOME" + set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME + set -e _OLD_VIRTUAL_PYTHONHOME + end + + if test -n "$_OLD_FISH_PROMPT_OVERRIDE" + set -e _OLD_FISH_PROMPT_OVERRIDE + # prevents error when using nested fish instances (Issue #93858) + if functions -q _old_fish_prompt + functions -e fish_prompt + functions -c _old_fish_prompt fish_prompt + functions -e _old_fish_prompt + end + end + + set -e VIRTUAL_ENV + set -e VIRTUAL_ENV_PROMPT + if test "$argv[1]" != "nondestructive" + # Self-destruct! + functions -e deactivate + end +end + +# Unset irrelevant variables. +deactivate nondestructive + +set -gx VIRTUAL_ENV /home/azhidev/bagher/packages/asr_triton_client/venv310 + +set -gx _OLD_VIRTUAL_PATH $PATH +set -gx PATH "$VIRTUAL_ENV/"bin $PATH + +# Unset PYTHONHOME if set. +if set -q PYTHONHOME + set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME + set -e PYTHONHOME +end + +if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" + # fish uses a function instead of an env var to generate the prompt. + + # Save the current fish_prompt function as the function _old_fish_prompt. + functions -c fish_prompt _old_fish_prompt + + # With the original prompt function renamed, we can override with our own. + function fish_prompt + # Save the return status of the last command. + set -l old_status $status + + # Output the venv prompt; color taken from the blue of the Python logo. + printf "%s%s%s" (set_color 4B8BBE) '(venv310) ' (set_color normal) + + # Restore the return status of the previous command. + echo "exit $old_status" | . + # Output the original/"old" prompt. + _old_fish_prompt + end + + set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV" + set -gx VIRTUAL_ENV_PROMPT '(venv310) ' +end diff --git a/venv310/bin/dotenv b/venv310/bin/dotenv new file mode 100755 index 0000000..e0bd01d --- /dev/null +++ b/venv310/bin/dotenv @@ -0,0 +1,8 @@ +#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from dotenv.__main__ import cli +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(cli()) diff --git a/venv310/bin/f2py b/venv310/bin/f2py new file mode 100755 index 0000000..91d0eec --- /dev/null +++ b/venv310/bin/f2py @@ -0,0 +1,8 @@ +#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from numpy.f2py.f2py2e import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv310/bin/numpy-config b/venv310/bin/numpy-config new file mode 100755 index 0000000..2fbc565 --- /dev/null +++ b/venv310/bin/numpy-config @@ -0,0 +1,8 @@ +#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from numpy._configtool import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv310/bin/perf_analyzer b/venv310/bin/perf_analyzer new file mode 100755 index 0000000..3f49834 --- /dev/null +++ b/venv310/bin/perf_analyzer @@ -0,0 +1,8 @@ +#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from perf_analyzer.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv310/bin/pip b/venv310/bin/pip new file mode 100755 index 0000000..89730d4 --- /dev/null +++ b/venv310/bin/pip @@ -0,0 +1,8 @@ +#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv310/bin/pip3 b/venv310/bin/pip3 new file mode 100755 index 0000000..89730d4 --- /dev/null +++ b/venv310/bin/pip3 @@ -0,0 +1,8 @@ +#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv310/bin/pip3.10 b/venv310/bin/pip3.10 new file mode 100755 index 0000000..89730d4 --- /dev/null +++ b/venv310/bin/pip3.10 @@ -0,0 +1,8 @@ +#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv310/bin/python b/venv310/bin/python new file mode 120000 index 0000000..c3cc991 --- /dev/null +++ b/venv310/bin/python @@ -0,0 +1 @@ +python3.10 \ No newline at end of file diff --git a/venv310/bin/python3 b/venv310/bin/python3 new file mode 120000 index 0000000..c3cc991 --- /dev/null +++ b/venv310/bin/python3 @@ -0,0 +1 @@ +python3.10 \ No newline at end of file diff --git a/venv310/bin/python3.10 b/venv310/bin/python3.10 new file mode 120000 index 0000000..5202249 --- /dev/null +++ b/venv310/bin/python3.10 @@ -0,0 +1 @@ +/usr/bin/python3.10 \ No newline at end of file diff --git a/venv310/bin/websockets b/venv310/bin/websockets new file mode 100755 index 0000000..90acaa1 --- /dev/null +++ b/venv310/bin/websockets @@ -0,0 +1,8 @@ +#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10 +# -*- coding: utf-8 -*- +import re +import sys +from websockets.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv310/include/site/python3.10/greenlet/greenlet.h b/venv310/include/site/python3.10/greenlet/greenlet.h new file mode 100644 index 0000000..d02a16e --- /dev/null +++ b/venv310/include/site/python3.10/greenlet/greenlet.h @@ -0,0 +1,164 @@ +/* -*- indent-tabs-mode: nil; tab-width: 4; -*- */ + +/* Greenlet object interface */ + +#ifndef Py_GREENLETOBJECT_H +#define Py_GREENLETOBJECT_H + + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* This is deprecated and undocumented. It does not change. */ +#define GREENLET_VERSION "1.0.0" + +#ifndef GREENLET_MODULE +#define implementation_ptr_t void* +#endif + +typedef struct _greenlet { + PyObject_HEAD + PyObject* weakreflist; + PyObject* dict; + implementation_ptr_t pimpl; +} PyGreenlet; + +#define PyGreenlet_Check(op) (op && PyObject_TypeCheck(op, &PyGreenlet_Type)) + + +/* C API functions */ + +/* Total number of symbols that are exported */ +#define PyGreenlet_API_pointers 12 + +#define PyGreenlet_Type_NUM 0 +#define PyExc_GreenletError_NUM 1 +#define PyExc_GreenletExit_NUM 2 + +#define PyGreenlet_New_NUM 3 +#define PyGreenlet_GetCurrent_NUM 4 +#define PyGreenlet_Throw_NUM 5 +#define PyGreenlet_Switch_NUM 6 +#define PyGreenlet_SetParent_NUM 7 + +#define PyGreenlet_MAIN_NUM 8 +#define PyGreenlet_STARTED_NUM 9 +#define PyGreenlet_ACTIVE_NUM 10 +#define PyGreenlet_GET_PARENT_NUM 11 + +#ifndef GREENLET_MODULE +/* This section is used by modules that uses the greenlet C API */ +static void** _PyGreenlet_API = NULL; + +# define PyGreenlet_Type \ + (*(PyTypeObject*)_PyGreenlet_API[PyGreenlet_Type_NUM]) + +# define PyExc_GreenletError \ + ((PyObject*)_PyGreenlet_API[PyExc_GreenletError_NUM]) + +# define PyExc_GreenletExit \ + ((PyObject*)_PyGreenlet_API[PyExc_GreenletExit_NUM]) + +/* + * PyGreenlet_New(PyObject *args) + * + * greenlet.greenlet(run, parent=None) + */ +# define PyGreenlet_New \ + (*(PyGreenlet * (*)(PyObject * run, PyGreenlet * parent)) \ + _PyGreenlet_API[PyGreenlet_New_NUM]) + +/* + * PyGreenlet_GetCurrent(void) + * + * greenlet.getcurrent() + */ +# define PyGreenlet_GetCurrent \ + (*(PyGreenlet * (*)(void)) _PyGreenlet_API[PyGreenlet_GetCurrent_NUM]) + +/* + * PyGreenlet_Throw( + * PyGreenlet *greenlet, + * PyObject *typ, + * PyObject *val, + * PyObject *tb) + * + * g.throw(...) + */ +# define PyGreenlet_Throw \ + (*(PyObject * (*)(PyGreenlet * self, \ + PyObject * typ, \ + PyObject * val, \ + PyObject * tb)) \ + _PyGreenlet_API[PyGreenlet_Throw_NUM]) + +/* + * PyGreenlet_Switch(PyGreenlet *greenlet, PyObject *args) + * + * g.switch(*args, **kwargs) + */ +# define PyGreenlet_Switch \ + (*(PyObject * \ + (*)(PyGreenlet * greenlet, PyObject * args, PyObject * kwargs)) \ + _PyGreenlet_API[PyGreenlet_Switch_NUM]) + +/* + * PyGreenlet_SetParent(PyObject *greenlet, PyObject *new_parent) + * + * g.parent = new_parent + */ +# define PyGreenlet_SetParent \ + (*(int (*)(PyGreenlet * greenlet, PyGreenlet * nparent)) \ + _PyGreenlet_API[PyGreenlet_SetParent_NUM]) + +/* + * PyGreenlet_GetParent(PyObject* greenlet) + * + * return greenlet.parent; + * + * This could return NULL even if there is no exception active. + * If it does not return NULL, you are responsible for decrementing the + * reference count. + */ +# define PyGreenlet_GetParent \ + (*(PyGreenlet* (*)(PyGreenlet*)) \ + _PyGreenlet_API[PyGreenlet_GET_PARENT_NUM]) + +/* + * deprecated, undocumented alias. + */ +# define PyGreenlet_GET_PARENT PyGreenlet_GetParent + +# define PyGreenlet_MAIN \ + (*(int (*)(PyGreenlet*)) \ + _PyGreenlet_API[PyGreenlet_MAIN_NUM]) + +# define PyGreenlet_STARTED \ + (*(int (*)(PyGreenlet*)) \ + _PyGreenlet_API[PyGreenlet_STARTED_NUM]) + +# define PyGreenlet_ACTIVE \ + (*(int (*)(PyGreenlet*)) \ + _PyGreenlet_API[PyGreenlet_ACTIVE_NUM]) + + + + +/* Macro that imports greenlet and initializes C API */ +/* NOTE: This has actually moved to ``greenlet._greenlet._C_API``, but we + keep the older definition to be sure older code that might have a copy of + the header still works. */ +# define PyGreenlet_Import() \ + { \ + _PyGreenlet_API = (void**)PyCapsule_Import("greenlet._C_API", 0); \ + } + +#endif /* GREENLET_MODULE */ + +#ifdef __cplusplus +} +#endif +#endif /* !Py_GREENLETOBJECT_H */ diff --git a/venv310/lib64 b/venv310/lib64 new file mode 120000 index 0000000..7951405 --- /dev/null +++ b/venv310/lib64 @@ -0,0 +1 @@ +lib \ No newline at end of file diff --git a/venv310/pyvenv.cfg b/venv310/pyvenv.cfg new file mode 100644 index 0000000..13f3678 --- /dev/null +++ b/venv310/pyvenv.cfg @@ -0,0 +1,3 @@ +home = /usr/bin +include-system-site-packages = false +version = 3.10.18 diff --git a/xl/README.md b/xl/README.md deleted file mode 100644 index 4198c14..0000000 --- a/xl/README.md +++ /dev/null @@ -1,35 +0,0 @@ -installation : - -```bash -pip install git+{reponame} -``` - - -example read : - - -``` python -from xl import XL - -file_path = "./1.xlsx" -sheet_name = "Sheet" -with XL(file_path) as excel_to_dict: - data = excel_to_dict.sheet_to_dict(sheet_name) -print(data) -``` - - -example write : - - -``` python -from xl import XL - -file_path = "./1.xlsx" -data = [ - {"Name":"Alice", "Age":30}, - {"Name":"Bob", "Age":25}, - {"Name":"Charlie", "Age":35}, -] -XL(file_path).json_to_sheet(data) -``` \ No newline at end of file diff --git a/xl/__init__.py b/xl/__init__.py deleted file mode 100644 index 7f4821e..0000000 --- a/xl/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .main import XL \ No newline at end of file diff --git a/xl/main.py b/xl/main.py deleted file mode 100644 index 1c7ada6..0000000 --- a/xl/main.py +++ /dev/null @@ -1,67 +0,0 @@ -import openpyxl - -class XL: - def __init__(self, file_path: str): - """ - Initialize the class with the path to the Excel file. - """ - self.file_path = file_path - self.workbook = None - - def __enter__(self): - """ - Open the workbook when entering the context. - """ - self.workbook = openpyxl.load_workbook(self.file_path) - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """ - Close the workbook when exiting the context. - """ - if self.workbook: - self.workbook.close() - - def sheet_to_dict(self, sheet_name: str): - """ - Convert a sheet's data to a dictionary, removing rows with None values. - """ - if not self.workbook: - raise ValueError("Workbook is not loaded. Ensure you use 'with' to open the file.") - - if sheet_name not in self.workbook.sheetnames: - raise ValueError(f"Sheet '{sheet_name}' not found in the workbook.") - - sheet = self.workbook[sheet_name] - headers = [cell.value for cell in sheet[1] if cell.value is not None] - - data_list = [] - for row in sheet.iter_rows(min_row=2, values_only=True): - row_dict = {headers[i]: cell for i, cell in enumerate(row) if i < len(headers)} - # Remove keys with None values - row_dict = {key: value for key, value in row_dict.items() if value is not None} - if row_dict: # Only include non-empty rows - data_list.append(row_dict) - - return data_list - - def json_to_sheet(self, data:list[dict]): - """ - Convert a json to a sheet. - """ - workbook = openpyxl.Workbook() - sheet = workbook.active - - headers = [] - for item in data: - for key in item: - if key not in headers: - headers.append(key) - - for col_index, header in enumerate(headers): - sheet.cell(row=1, column=col_index+1, value=header) - - for row_index, row_data in enumerate(data): - for col_index, key in enumerate(headers): - sheet.cell(row=row_index+2, column=col_index+1, value=row_data.get(key, "")) - workbook.save(self.file_path)