Compare commits
No commits in common. "316f6f7388b4c961a074282073e6aacf62233931" and "24b71963e404203ae4589b5aed81f0e38b191616" have entirely different histories.
316f6f7388
...
24b71963e4
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
et_xmlfile==2.0.0
|
||||||
|
openpyxl==3.1.5
|
||||||
2
.gitignore
vendored
2
.gitignore
vendored
@ -26,6 +26,8 @@ share/python-wheels/
|
|||||||
.installed.cfg
|
.installed.cfg
|
||||||
*.egg
|
*.egg
|
||||||
MANIFEST
|
MANIFEST
|
||||||
|
requirements.txt
|
||||||
|
setup.py
|
||||||
ultralytics.egg-info
|
ultralytics.egg-info
|
||||||
|
|
||||||
# PyInstaller
|
# PyInstaller
|
||||||
|
|||||||
@ -1 +0,0 @@
|
|||||||
from .asr import *
|
|
||||||
125
asr/README.md
125
asr/README.md
@ -1,125 +0,0 @@
|
|||||||
# Triton ASR Client
|
|
||||||
|
|
||||||
Async Python client for sending audio to a Triton ASR service using **SSE** or **WebSocket** streaming.
|
|
||||||
It yields real-time transcription results as JSON-like events.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Features
|
|
||||||
- 🚀 Async SSE & WebSocket streaming modes
|
|
||||||
- 🎧 Streams audio bytes directly
|
|
||||||
- 🧩 Yields parsed dict events (partial/final text, status, errors)
|
|
||||||
- 🔧 Simple integration with Triton Inference Server
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Requirements
|
|
||||||
- Python 3.10+
|
|
||||||
- `tritonclient[grpc]`, `tritonclient[http]`, `websockets`
|
|
||||||
|
|
||||||
Install:
|
|
||||||
```bash
|
|
||||||
pip install -r requirements.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Environment
|
|
||||||
Set your Triton endpoint (default: `localhost:8001`):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export TRITON_URL="your.triton.server:8001"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Example — SSE Mode
|
|
||||||
```python
|
|
||||||
import asyncio
|
|
||||||
from .service import TritonGrpcClient
|
|
||||||
import json, os
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
TRITON_URL = os.getenv("TRITON_URL", "localhost:8001")
|
|
||||||
client = TritonGrpcClient(triton_url=TRITON_URL)
|
|
||||||
|
|
||||||
async def asr_sse_mode(client, audio_path: Path):
|
|
||||||
print("=== SSE MODE ===")
|
|
||||||
raw = audio_path.read_bytes()
|
|
||||||
try:
|
|
||||||
async for s in client.event_stream_from_bytes(raw=raw, filename=audio_path.name):
|
|
||||||
print("[SSE]", json.dumps(s, ensure_ascii=False))
|
|
||||||
except Exception as exc:
|
|
||||||
print("SSE error:", exc)
|
|
||||||
|
|
||||||
asyncio.run(asr_sse_mode(client, Path("/test_audio_file.mp3")))
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Example — WebSocket Mode
|
|
||||||
```python
|
|
||||||
import asyncio
|
|
||||||
from .service import TritonGrpcClient
|
|
||||||
import json, os
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
TRITON_URL = os.getenv("TRITON_URL", "localhost:8001")
|
|
||||||
client = TritonGrpcClient(triton_url=TRITON_URL)
|
|
||||||
|
|
||||||
async def asr_ws_mode(client, audio_path: Path):
|
|
||||||
print("=== WS MODE ===")
|
|
||||||
raw = audio_path.read_bytes()
|
|
||||||
try:
|
|
||||||
async for s in client.event_stream_via_ws(raw=raw, filename=audio_path.name):
|
|
||||||
print("[WS]", json.dumps(s, ensure_ascii=False))
|
|
||||||
except Exception as exc:
|
|
||||||
print("WS error:", exc)
|
|
||||||
|
|
||||||
asyncio.run(asr_ws_mode(client, Path("/test_audio_file.mp3")))
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Example Output
|
|
||||||
Below is a real example of SSE output while streaming an MP3 file:
|
|
||||||
|
|
||||||
```
|
|
||||||
[BG] Entering stream_transcript loop
|
|
||||||
[SSE] OUT: "data: {"time": 1760512211.4691734, "text": "سلام وقت", "is_final": false}\n\n"
|
|
||||||
[SSE] OUT: "data: {"time": 1760512211.55668, "text": "بهخیر", "is_final": false}\n\n"
|
|
||||||
...
|
|
||||||
[SSE] OUT: "data: {"time": 1760512226.2526345, "text": "سلام وقتبهخیر امروز درباره طراحی جدید صحبت میکنیم", "is_final": true}\n\n"
|
|
||||||
```
|
|
||||||
|
|
||||||
Each event contains:
|
|
||||||
- `time`: event timestamp
|
|
||||||
- `text`: recognized speech fragment
|
|
||||||
- `is_final`: indicates final transcript segment
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## API Overview
|
|
||||||
| Method | Description |
|
|
||||||
|---------|-------------|
|
|
||||||
| `TritonGrpcClient(triton_url)` | Create a client connected to Triton |
|
|
||||||
| `event_stream_from_bytes(raw, filename)` | SSE-based audio streaming |
|
|
||||||
| `event_stream_via_ws(raw, filename)` | WebSocket-based audio streaming |
|
|
||||||
|
|
||||||
Each stream yields dict events like:
|
|
||||||
```python
|
|
||||||
{
|
|
||||||
"time": 1760512211.47,
|
|
||||||
"text": "hello world",
|
|
||||||
"is_final": True
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
- **Connection refused** → Check `TRITON_URL` and Triton server status
|
|
||||||
- **Bad event data** → Verify model/gateway returns valid JSON events
|
|
||||||
- **WS handshake failed** → Ensure the server supports WebSocket
|
|
||||||
|
|
||||||
---
|
|
||||||
@ -1 +0,0 @@
|
|||||||
from .service import TritonGrpcClient
|
|
||||||
@ -1,80 +0,0 @@
|
|||||||
# decode_stream.py
|
|
||||||
import time
|
|
||||||
import threading
|
|
||||||
import av
|
|
||||||
from av.audio.resampler import AudioResampler
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
|
|
||||||
class HLSReader:
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
url: str,
|
|
||||||
*,
|
|
||||||
sample_rate: int = 48_000,
|
|
||||||
channels: int = 1,
|
|
||||||
chunk_ms: int = 20,
|
|
||||||
):
|
|
||||||
self.url = url
|
|
||||||
self.sample_rate = sample_rate
|
|
||||||
self.channels = channels
|
|
||||||
self.chunk_ms = chunk_ms
|
|
||||||
|
|
||||||
self._stop_evt = threading.Event()
|
|
||||||
self._thread: threading.Thread | None = None
|
|
||||||
self._loop: asyncio.AbstractEventLoop | None = None
|
|
||||||
self._queue: "asyncio.Queue[bytes] | None" = None
|
|
||||||
|
|
||||||
# derived
|
|
||||||
self._bytes_per_sample = 2 # s16
|
|
||||||
self._samples_per_chunk = int(self.sample_rate * (self.chunk_ms / 1000.0))
|
|
||||||
self._chunk_bytes = self._samples_per_chunk * self.channels * self._bytes_per_sample
|
|
||||||
|
|
||||||
def start(self, loop: asyncio.AbstractEventLoop, out_queue: "asyncio.Queue[bytes]") -> None:
|
|
||||||
"""Begin reading in a daemon thread, pushing chunks onto out_queue (on 'loop')."""
|
|
||||||
self._loop = loop
|
|
||||||
self._queue = out_queue
|
|
||||||
self._thread = threading.Thread(target=self._run, name=f"hls-{id(self):x}", daemon=True)
|
|
||||||
self._thread.start()
|
|
||||||
|
|
||||||
def stop(self) -> None:
|
|
||||||
self._stop_evt.set()
|
|
||||||
|
|
||||||
# ---------- internal ----------
|
|
||||||
def _run(self) -> None:
|
|
||||||
assert self._loop and self._queue
|
|
||||||
while not self._stop_evt.is_set():
|
|
||||||
try:
|
|
||||||
container = av.open(self.url, mode="r")
|
|
||||||
audio_stream = next(s for s in container.streams if s.type == "audio")
|
|
||||||
|
|
||||||
resampler = AudioResampler(
|
|
||||||
format="s16",
|
|
||||||
layout="mono" if self.channels == 1 else "stereo",
|
|
||||||
rate=self.sample_rate,
|
|
||||||
)
|
|
||||||
|
|
||||||
buf = bytearray()
|
|
||||||
for packet in container.demux(audio_stream):
|
|
||||||
if self._stop_evt.is_set():
|
|
||||||
break
|
|
||||||
for frame in packet.decode():
|
|
||||||
for out in resampler.resample(frame):
|
|
||||||
buf.extend(out.planes[0].to_bytes())
|
|
||||||
while len(buf) >= self._chunk_bytes:
|
|
||||||
chunk = bytes(buf[: self._chunk_bytes])
|
|
||||||
del buf[: self._chunk_bytes]
|
|
||||||
# push into asyncio queue on the event loop thread
|
|
||||||
self._loop.call_soon_threadsafe(self._queue.put_nowait, chunk)
|
|
||||||
|
|
||||||
# Some HLS variants EOF → reopen
|
|
||||||
try:
|
|
||||||
container.close()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
if not self._stop_evt.is_set():
|
|
||||||
time.sleep(0.5)
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
# transient network/playlist issues → backoff & retry
|
|
||||||
time.sleep(1.0)
|
|
||||||
164
asr/run.py
164
asr/run.py
@ -1,164 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
run.py — demo runner for TritonGrpcClient service methods.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
python run.py # auto-find an audio file in cwd and run both modes
|
|
||||||
python run.py /path/to/file # run both modes on a specific file
|
|
||||||
python run.py --mode sse # run only SSE-mode
|
|
||||||
python run.py --mode ws # run only WebSocket-mode
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import asyncio
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import List
|
|
||||||
|
|
||||||
# Adjust import to where your client class lives
|
|
||||||
# e.g. from triton_client import TritonGrpcClient
|
|
||||||
from .service import TritonGrpcClient
|
|
||||||
|
|
||||||
# Small helper to find an audio file if none provided
|
|
||||||
AUDIO_EXTS = [".wav", ".mp3", ".m4a", ".flac", ".ogg", ".aac"]
|
|
||||||
|
|
||||||
|
|
||||||
def find_first_audio_in_cwd() -> Path | None:
|
|
||||||
cwd = Path.cwd()
|
|
||||||
for p in cwd.iterdir():
|
|
||||||
if p.suffix.lower() in AUDIO_EXTS and p.is_file():
|
|
||||||
return p
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
# ---------- Fake WebSocket for exercising websocket_stream_from_websocket ----------
|
|
||||||
class FakeWebSocket:
|
|
||||||
"""
|
|
||||||
Minimal fake WebSocket implementing the methods used by websocket_stream_from_websocket:
|
|
||||||
- accept()
|
|
||||||
- receive() -> dict with "bytes" or "text"
|
|
||||||
- send_json(obj)
|
|
||||||
- close()
|
|
||||||
It streams the provided bytes in small binary frames, then a JSON text frame {"event":"end"}.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, data: bytes, frame_size: int = 16 * 1024):
|
|
||||||
self._data = data
|
|
||||||
self._frame_size = frame_size
|
|
||||||
self._offset = 0
|
|
||||||
self._sent_end = False
|
|
||||||
self.sent_messages: List[dict] = []
|
|
||||||
self.closed = False
|
|
||||||
|
|
||||||
async def accept(self):
|
|
||||||
# server expects to optionally call accept; nothing to do
|
|
||||||
print("[FakeWebSocket] accept() called")
|
|
||||||
|
|
||||||
async def receive(self):
|
|
||||||
"""
|
|
||||||
Return one frame at a time:
|
|
||||||
- {"bytes": b"..."} while data remains
|
|
||||||
- then {"text": json.dumps({"event":"end"})}
|
|
||||||
After that, sleep forever (server won't call receive again in your code).
|
|
||||||
"""
|
|
||||||
if self._offset < len(self._data):
|
|
||||||
end = min(len(self._data), self._offset + self._frame_size)
|
|
||||||
chunk = self._data[self._offset : end]
|
|
||||||
self._offset = end
|
|
||||||
# mimic the WebSocket dict shape used in your code
|
|
||||||
return {"bytes": chunk}
|
|
||||||
if not self._sent_end:
|
|
||||||
self._sent_end = True
|
|
||||||
return {"text": json.dumps({"event": "end"})}
|
|
||||||
# Block a bit — server should have stopped receiving after 'end'
|
|
||||||
await asyncio.sleep(3600)
|
|
||||||
return {}
|
|
||||||
|
|
||||||
async def send_json(self, obj):
|
|
||||||
# server sends results here; capture & print
|
|
||||||
self.sent_messages.append(obj)
|
|
||||||
print("[FakeWebSocket] send_json:", json.dumps(obj, ensure_ascii=False))
|
|
||||||
|
|
||||||
async def close(self):
|
|
||||||
self.closed = True
|
|
||||||
print("[FakeWebSocket] close() called")
|
|
||||||
|
|
||||||
|
|
||||||
# ---------- Demo runners ----------
|
|
||||||
async def asr_sse_mode(client: TritonGrpcClient, audio_path: Path):
|
|
||||||
print("\n=== SSE MODE (event_stream_from_bytes) ===")
|
|
||||||
raw = audio_path.read_bytes()
|
|
||||||
# event_stream_from_bytes returns an async generator — iterate it
|
|
||||||
try:
|
|
||||||
async for s in client.event_stream_from_bytes(raw=raw, filename=audio_path.name):
|
|
||||||
# s is already a dict-like object emitted by your SSE generator
|
|
||||||
print("[SSE] OUT:", json.dumps(s, ensure_ascii=False))
|
|
||||||
except Exception as exc:
|
|
||||||
print("[SSE] Exception while streaming SSE:", exc)
|
|
||||||
|
|
||||||
|
|
||||||
async def asr_ws_mode(client: TritonGrpcClient, audio_path: Path):
|
|
||||||
print("\n=== WEBSOCKET MODE (websocket_stream_from_websocket) ===")
|
|
||||||
raw = audio_path.read_bytes()
|
|
||||||
fake_ws = FakeWebSocket(raw, frame_size=16 * 1024)
|
|
||||||
|
|
||||||
# Run the server-side websocket handler. It will call fake_ws.receive() and fake_ws.send_json()
|
|
||||||
try:
|
|
||||||
await client.websocket_stream_from_websocket(fake_ws, filename=audio_path.name)
|
|
||||||
except Exception as exc:
|
|
||||||
print("[WS] Exception while running websocket_stream_from_websocket:", exc)
|
|
||||||
finally:
|
|
||||||
print("[WS] Collected messages from server (send_json calls):")
|
|
||||||
for i, m in enumerate(fake_ws.sent_messages, 1):
|
|
||||||
print(f" [{i}] {json.dumps(m, ensure_ascii=False)}")
|
|
||||||
|
|
||||||
|
|
||||||
async def main_async(audio_path: Path, modes: List[str]):
|
|
||||||
client = TritonGrpcClient() # init; if your client needs args, adjust here
|
|
||||||
|
|
||||||
if "sse" in modes:
|
|
||||||
await asr_sse_mode(client, audio_path)
|
|
||||||
|
|
||||||
if "ws" in modes:
|
|
||||||
await asr_ws_mode(client, audio_path)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
|
||||||
p = argparse.ArgumentParser()
|
|
||||||
p.add_argument("file", nargs="?", help="audio file path (optional). If omitted, searches cwd.")
|
|
||||||
p.add_argument(
|
|
||||||
"--mode",
|
|
||||||
choices=["sse", "ws", "both"],
|
|
||||||
default="both",
|
|
||||||
help="Which method(s) to run against the service",
|
|
||||||
)
|
|
||||||
return p.parse_args()
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
args = parse_args()
|
|
||||||
if args.file:
|
|
||||||
audio_path = Path(args.file)
|
|
||||||
if not audio_path.exists():
|
|
||||||
print("Audio file does not exist:", audio_path)
|
|
||||||
sys.exit(2)
|
|
||||||
else:
|
|
||||||
found = find_first_audio_in_cwd()
|
|
||||||
if not found:
|
|
||||||
print("No audio file found in cwd. Place an audio file (wav/mp3/m4a/flac/ogg) here or pass a path.")
|
|
||||||
sys.exit(2)
|
|
||||||
audio_path = found
|
|
||||||
|
|
||||||
modes = ["sse", "ws"] if args.mode == "both" else [args.mode]
|
|
||||||
print(f"Using audio file: {audio_path} — running modes: {modes}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
asyncio.run(main_async(audio_path, modes))
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("Interrupted.")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
610
asr/service.py
610
asr/service.py
@ -1,610 +0,0 @@
|
|||||||
# clients/triton_grpc_client.py
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import os, uuid, time, json, asyncio, tempfile
|
|
||||||
from pathlib import Path
|
|
||||||
from contextlib import asynccontextmanager, suppress
|
|
||||||
from typing import AsyncGenerator, List, Tuple, Optional, Callable, Awaitable, TYPE_CHECKING
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from pydub import AudioSegment
|
|
||||||
import tritonclient.grpc.aio as grpcclient
|
|
||||||
from tritonclient.utils import np_to_triton_dtype
|
|
||||||
import websockets
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
# if TYPE_CHECKING:
|
|
||||||
# from fastapi import WebSocket
|
|
||||||
# from starlette.websockets import WebSocketDisconnect
|
|
||||||
|
|
||||||
# ---- constants (same as your code) ----
|
|
||||||
SAMPLE_RATE_HZ = 16_000
|
|
||||||
INPUT_WAV_TENSOR = "WAV"
|
|
||||||
INPUT_LEN_TENSOR = "WAV_LENS"
|
|
||||||
OUTPUT_TEXT_TENSOR = "TRANSCRIPTS"
|
|
||||||
MODEL_NAME = "transducer"
|
|
||||||
ZERO_PAD_REQUEST_CONTENT = True
|
|
||||||
TRITON_URL = os.getenv("TRITON_URL")
|
|
||||||
|
|
||||||
|
|
||||||
class _WebsocketsAdapter:
|
|
||||||
"""
|
|
||||||
Adapter to make a `websockets` WebSocketServerProtocol behave like
|
|
||||||
a Starlette/FastAPI WebSocket for the shape your code expects:
|
|
||||||
- accept()
|
|
||||||
- receive() -> dict with "bytes" or "text"
|
|
||||||
- send_json(obj)
|
|
||||||
- close()
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, ws):
|
|
||||||
self._ws = ws
|
|
||||||
self._is_server_protocol = True # semantic flag
|
|
||||||
|
|
||||||
async def accept(self):
|
|
||||||
# websockets server does its own accept when created; no-op here
|
|
||||||
return
|
|
||||||
|
|
||||||
async def receive(self):
|
|
||||||
# websockets.recv() returns bytes or str
|
|
||||||
data = await self._ws.recv()
|
|
||||||
if isinstance(data, bytes):
|
|
||||||
return {"bytes": data}
|
|
||||||
else:
|
|
||||||
return {"text": data}
|
|
||||||
|
|
||||||
async def send_json(self, obj):
|
|
||||||
import json
|
|
||||||
|
|
||||||
await self._ws.send(json.dumps(obj, ensure_ascii=False))
|
|
||||||
|
|
||||||
async def close(self):
|
|
||||||
await self._ws.close()
|
|
||||||
|
|
||||||
|
|
||||||
class TritonGrpcClient:
|
|
||||||
def __init__(self, triton_url: str):
|
|
||||||
self.triton_url = triton_url
|
|
||||||
|
|
||||||
"""
|
|
||||||
Owns only: audio -> chunking -> Triton -> queue -> streaming.
|
|
||||||
Keeps producing even if the HTTP/WS request disconnects.
|
|
||||||
MinIO/DB/file finalize stays outside via callbacks.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# ---------- SSE: return async generator ----------
|
|
||||||
def event_stream_from_bytes(
|
|
||||||
self,
|
|
||||||
*,
|
|
||||||
raw: bytes,
|
|
||||||
filename: str,
|
|
||||||
content_type: Optional[str] = None,
|
|
||||||
on_final_text: Optional[Callable[[str], Awaitable[None]]] = None,
|
|
||||||
on_error: Optional[Callable[[str], Awaitable[None]]] = None,
|
|
||||||
) -> AsyncGenerator[str, None]:
|
|
||||||
async def _gen() -> AsyncGenerator[str, None]:
|
|
||||||
if not raw:
|
|
||||||
raise ValueError("Uploaded file is empty")
|
|
||||||
|
|
||||||
tmp_path = self._write_temp_file(raw, filename)
|
|
||||||
queue: asyncio.Queue[dict[str, str | None]] = asyncio.Queue()
|
|
||||||
drop_event = asyncio.Event() # set on disconnect → stop emitting only
|
|
||||||
|
|
||||||
# spawn producer and DO NOT await it here; it lives independently
|
|
||||||
asyncio.create_task(
|
|
||||||
self._produce_transcripts(tmp_path, queue, on_final_text, on_error, drop_event),
|
|
||||||
name=f"triton-producer-{uuid.uuid4().hex[:8]}",
|
|
||||||
)
|
|
||||||
|
|
||||||
print("[SSE] Client connected", flush=True)
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
msg = await queue.get()
|
|
||||||
if msg.get("event") == "done":
|
|
||||||
print("[SSE] Job finished → closing stream", flush=True)
|
|
||||||
break
|
|
||||||
yield f"data: {json.dumps(msg, ensure_ascii=False)}\n\n"
|
|
||||||
except asyncio.CancelledError:
|
|
||||||
# request closed; keep producer alive, just stop emitting
|
|
||||||
print("[SSE] Client disconnected (background continues)", flush=True)
|
|
||||||
drop_event.set()
|
|
||||||
return
|
|
||||||
finally:
|
|
||||||
print("[SSE] event_stream END", flush=True)
|
|
||||||
|
|
||||||
return _gen()
|
|
||||||
|
|
||||||
# ---------- WebSocket: same producer, push over WS ----------
|
|
||||||
@asynccontextmanager
|
|
||||||
async def _open_triton(self):
|
|
||||||
client = grpcclient.InferenceServerClient(self.triton_url, verbose=False)
|
|
||||||
try:
|
|
||||||
yield client
|
|
||||||
finally:
|
|
||||||
with suppress(Exception):
|
|
||||||
await client.close()
|
|
||||||
|
|
||||||
async def _produce_transcripts(
|
|
||||||
self,
|
|
||||||
tmp_path: Path,
|
|
||||||
queue: "asyncio.Queue[dict[str, str | None]]",
|
|
||||||
on_final_text: Optional[Callable[[str], Awaitable[None]]],
|
|
||||||
on_error: Optional[Callable[[str], Awaitable[None]]],
|
|
||||||
drop_event: asyncio.Event,
|
|
||||||
) -> None:
|
|
||||||
print("[BG] Started producer", flush=True)
|
|
||||||
last_msg: dict[str, str] | None = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
print("[BG] Entering stream_transcript loop", flush=True)
|
|
||||||
async for last_msg in self._stream_transcript(str(tmp_path)):
|
|
||||||
if not drop_event.is_set():
|
|
||||||
await queue.put(last_msg)
|
|
||||||
|
|
||||||
print("[BG] stream_transcript finished", flush=True)
|
|
||||||
final_text = (last_msg or {}).get("text", "").strip()
|
|
||||||
|
|
||||||
if on_final_text:
|
|
||||||
with suppress(Exception):
|
|
||||||
await on_final_text(final_text)
|
|
||||||
|
|
||||||
except Exception as exc:
|
|
||||||
print(f"[BG] EXCEPTION: {exc!r}", flush=True)
|
|
||||||
if on_error:
|
|
||||||
with suppress(Exception):
|
|
||||||
await on_error(str(exc))
|
|
||||||
if not drop_event.is_set():
|
|
||||||
await queue.put({"event": "error", "detail": str(exc)})
|
|
||||||
finally:
|
|
||||||
print("[BG] Cleaning up temp file", flush=True)
|
|
||||||
with suppress(FileNotFoundError):
|
|
||||||
tmp_path.unlink(missing_ok=True)
|
|
||||||
|
|
||||||
if not drop_event.is_set():
|
|
||||||
await queue.put({"event": "done"})
|
|
||||||
print("[BG] producer END", flush=True)
|
|
||||||
|
|
||||||
# Replace your existing websocket_stream_from_websocket with this version
|
|
||||||
async def websocket_stream_from_websocket(
|
|
||||||
self,
|
|
||||||
websocket,
|
|
||||||
*,
|
|
||||||
filename: str = "stream",
|
|
||||||
content_type: Optional[str] = None,
|
|
||||||
on_final_text: Optional[Callable[..., Awaitable[None]]] = None,
|
|
||||||
on_error: Optional[Callable[[str], Awaitable[None]]] = None,
|
|
||||||
accept_in_client: bool = False,
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Supports both:
|
|
||||||
- Starlette/FastAPI WebSocket objects (they expose .receive(), .send_json(), .accept())
|
|
||||||
- `websockets` WebSocketServerProtocol (they expose .recv(), .send(), .close())
|
|
||||||
The adapter wraps the latter so the rest of your existing logic can stay unchanged.
|
|
||||||
"""
|
|
||||||
# Lazy import here so file-level imports need not change
|
|
||||||
import json
|
|
||||||
import uuid
|
|
||||||
import time
|
|
||||||
import asyncio
|
|
||||||
import tempfile
|
|
||||||
from contextlib import suppress
|
|
||||||
|
|
||||||
# If this is a websockets (WebSocketServerProtocol) instance, wrap it
|
|
||||||
# Heuristic: Starlette has .receive(); websockets has .recv()
|
|
||||||
if not hasattr(websocket, "receive") and hasattr(websocket, "recv"):
|
|
||||||
websocket = _WebsocketsAdapter(websocket)
|
|
||||||
|
|
||||||
# If caller requested server-side accept for ASGI websockets, call it.
|
|
||||||
# For websockets adapter accept() is a no-op.
|
|
||||||
if accept_in_client:
|
|
||||||
# In FastAPI typical pattern is server calls accept(); keep that behavior
|
|
||||||
await websocket.accept()
|
|
||||||
|
|
||||||
# We'll append all raw bytes to a temp file so the endpoint can upload later in the finalizer
|
|
||||||
src_suffix = Path(filename).suffix or ".bin"
|
|
||||||
src_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}{src_suffix}"
|
|
||||||
f_src = src_path.open("wb")
|
|
||||||
|
|
||||||
# Background pipeline control
|
|
||||||
queue: asyncio.Queue[dict[str, str | None]] = asyncio.Queue()
|
|
||||||
drop_event = asyncio.Event() # if client disconnects, stop SENDING but keep producing
|
|
||||||
|
|
||||||
async def _call_final_cb(final_text: str) -> None:
|
|
||||||
if not on_final_text:
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
# Try callback(final_text, source_audio_path) first, fallback to (final_text)
|
|
||||||
if on_final_text.__code__.co_argcount >= 2:
|
|
||||||
await on_final_text(final_text, src_path)
|
|
||||||
else:
|
|
||||||
await on_final_text(final_text)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def recv_frames_and_transcode(proc):
|
|
||||||
"""
|
|
||||||
Read frames using the (possibly-adapted) `websocket.receive()` and write to ffmpeg stdin;
|
|
||||||
close stdin when we get {"event":"end"} or disconnect.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
msg = await websocket.receive()
|
|
||||||
if "bytes" in msg and msg["bytes"] is not None:
|
|
||||||
chunk = msg["bytes"]
|
|
||||||
f_src.write(chunk)
|
|
||||||
proc.stdin.write(chunk) # type: ignore[attr-defined]
|
|
||||||
await proc.stdin.drain() # type: ignore[attr-defined]
|
|
||||||
elif "text" in msg and msg["text"] is not None:
|
|
||||||
try:
|
|
||||||
payload = json.loads(msg["text"])
|
|
||||||
if payload.get("event") == "end":
|
|
||||||
break
|
|
||||||
except Exception:
|
|
||||||
# ignore non-JSON text frames
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# ignore pings/other control frames
|
|
||||||
await asyncio.sleep(0)
|
|
||||||
except Exception:
|
|
||||||
# If underlying connection closed abruptly, just stop receiving
|
|
||||||
pass
|
|
||||||
finally:
|
|
||||||
with suppress(Exception):
|
|
||||||
f_src.flush()
|
|
||||||
f_src.close()
|
|
||||||
with suppress(Exception):
|
|
||||||
# Some implementations use close(); others have no stdin to close
|
|
||||||
proc.stdin.close() # type: ignore[attr-defined]
|
|
||||||
|
|
||||||
async def transcribe_from_ffmpeg_stdout(proc):
|
|
||||||
"""
|
|
||||||
Read float32 PCM from ffmpeg stdout, chunk on-the-fly, call Triton per chunk,
|
|
||||||
and push partial/final messages to the queue.
|
|
||||||
"""
|
|
||||||
# Triton session + chunk sizes
|
|
||||||
async with self._open_triton() as client:
|
|
||||||
first_sz, chunk_sz = await self._get_chunk_sizes(client, MODEL_NAME)
|
|
||||||
seq_id = uuid.uuid4().int & 0x7FFF_FFFF_FFFF_FFFF
|
|
||||||
full_tx = ""
|
|
||||||
have_sent_any = False
|
|
||||||
need = first_sz # samples needed for the next chunk
|
|
||||||
|
|
||||||
# PCM sample buffer (float32)
|
|
||||||
buf = np.empty(0, dtype=np.float32)
|
|
||||||
|
|
||||||
async def infer_one(raw: np.ndarray, eff_len: int, is_first: bool, is_last: bool):
|
|
||||||
nonlocal full_tx
|
|
||||||
wav_np = raw[None, :] # (1, T)
|
|
||||||
len_np = np.array([[eff_len]], np.int32)
|
|
||||||
inp_wav = grpcclient.InferInput(INPUT_WAV_TENSOR, wav_np.shape, np_to_triton_dtype(np.float32))
|
|
||||||
inp_len = grpcclient.InferInput(INPUT_LEN_TENSOR, len_np.shape, np_to_triton_dtype(np.int32))
|
|
||||||
inp_wav.set_data_from_numpy(wav_np)
|
|
||||||
inp_len.set_data_from_numpy(len_np)
|
|
||||||
outs = [grpcclient.InferRequestedOutput(OUTPUT_TEXT_TENSOR)]
|
|
||||||
|
|
||||||
resp = await client.infer(
|
|
||||||
MODEL_NAME,
|
|
||||||
inputs=[inp_wav, inp_len],
|
|
||||||
outputs=outs,
|
|
||||||
sequence_id=seq_id,
|
|
||||||
sequence_start=is_first,
|
|
||||||
sequence_end=is_last,
|
|
||||||
)
|
|
||||||
txt = b" ".join(resp.as_numpy(OUTPUT_TEXT_TENSOR)).decode().strip()
|
|
||||||
if not txt:
|
|
||||||
return None
|
|
||||||
delta = txt[len(full_tx) :] if txt.startswith(full_tx) else txt
|
|
||||||
full_tx = txt
|
|
||||||
return delta or None
|
|
||||||
|
|
||||||
# Read ffmpeg stdout in bytes; convert to float32
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
chunk = await proc.stdout.read(8192 * 4) # 8192 samples (float32)
|
|
||||||
if not chunk:
|
|
||||||
break
|
|
||||||
# append decoded samples
|
|
||||||
new = np.frombuffer(chunk, dtype=np.float32)
|
|
||||||
if new.size == 0:
|
|
||||||
continue
|
|
||||||
if buf.size == 0:
|
|
||||||
buf = new
|
|
||||||
else:
|
|
||||||
buf = np.concatenate((buf, new), axis=0)
|
|
||||||
|
|
||||||
# while enough samples for the next piece, send it
|
|
||||||
while buf.size >= need:
|
|
||||||
take = need
|
|
||||||
piece = buf[:take]
|
|
||||||
buf = buf[take:]
|
|
||||||
is_first = not have_sent_any
|
|
||||||
have_sent_any = True
|
|
||||||
|
|
||||||
delta = await infer_one(piece, take, is_first=is_first, is_last=False)
|
|
||||||
if delta and not drop_event.is_set():
|
|
||||||
await queue.put({"time": time.time(), "text": delta, "is_final": False})
|
|
||||||
|
|
||||||
# after first, normal chunk size
|
|
||||||
need = chunk_sz
|
|
||||||
|
|
||||||
finally:
|
|
||||||
# End of stream: flush any remainder and a zero-length piece if configured
|
|
||||||
if buf.size > 0:
|
|
||||||
# pad to chunk_sz for model framing, but eff_len is the real (short) length
|
|
||||||
eff = int(buf.size)
|
|
||||||
pad = np.zeros(chunk_sz, dtype=np.float32)
|
|
||||||
pad[:eff] = buf[:eff]
|
|
||||||
delta = await infer_one(pad, eff, is_first=not have_sent_any, is_last=False)
|
|
||||||
if delta and not drop_event.is_set():
|
|
||||||
await queue.put({"time": time.time(), "text": delta, "is_final": False})
|
|
||||||
have_sent_any = True
|
|
||||||
|
|
||||||
if ZERO_PAD_REQUEST_CONTENT:
|
|
||||||
# zero-length "flush" chunk
|
|
||||||
zero = np.zeros(0, dtype=np.float32)
|
|
||||||
await infer_one(zero, 0, is_first=not have_sent_any, is_last=True)
|
|
||||||
else:
|
|
||||||
# If not sending the explicit flush, still mark last by sending empty with last=True
|
|
||||||
zero = np.zeros(0, dtype=np.float32)
|
|
||||||
await infer_one(zero, 0, is_first=False, is_last=True)
|
|
||||||
|
|
||||||
# Emit final full transcript as a message, like your file-based path
|
|
||||||
if not drop_event.is_set():
|
|
||||||
await queue.put({"time": time.time(), "text": full_tx.strip(), "is_final": True})
|
|
||||||
|
|
||||||
# Call user finalizer
|
|
||||||
await _call_final_cb(full_tx.strip())
|
|
||||||
|
|
||||||
# signal done to sender
|
|
||||||
if not drop_event.is_set():
|
|
||||||
await queue.put({"event": "done"})
|
|
||||||
|
|
||||||
async def send_messages():
|
|
||||||
HEARTBEAT_SECS = 10
|
|
||||||
print("[WS] Client connected", flush=True)
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
# wait for a real message up to HEARTBEAT_SECS
|
|
||||||
msg = await asyncio.wait_for(queue.get(), timeout=HEARTBEAT_SECS)
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
# no real message → send heartbeat
|
|
||||||
hb = {"event": "heartbeat", "t": time.time()}
|
|
||||||
await websocket.send_json(hb)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if msg.get("event") == "done":
|
|
||||||
print("[WS] Job finished → closing socket", flush=True)
|
|
||||||
break
|
|
||||||
|
|
||||||
await websocket.send_json(msg)
|
|
||||||
|
|
||||||
except WebSocketDisconnect:
|
|
||||||
print("[WS] Client disconnected (background continues)", flush=True)
|
|
||||||
drop_event.set()
|
|
||||||
except asyncio.CancelledError:
|
|
||||||
print("[WS] Client cancelled (background continues)", flush=True)
|
|
||||||
drop_event.set()
|
|
||||||
finally:
|
|
||||||
with suppress(Exception):
|
|
||||||
await websocket.close()
|
|
||||||
print("[WS] send_messages END", flush=True)
|
|
||||||
|
|
||||||
# The remainder of this function is unchanged from your original implementation:
|
|
||||||
# - transcribe_from_ffmpeg_stdout(proc)
|
|
||||||
# - send_messages()
|
|
||||||
# - ffmpeg process startup
|
|
||||||
# - task creation + gather + cleanup
|
|
||||||
#
|
|
||||||
# To keep this patch minimal I re-use your original nested function bodies as-is.
|
|
||||||
# Copy the original nested functions transcribe_from_ffmpeg_stdout and send_messages
|
|
||||||
# exactly as you had them (they will reference `websocket`, `queue`, `drop_event`,
|
|
||||||
# `_open_triton`, etc.). Below I paste them unchanged but with no modifications so
|
|
||||||
# you can drop them in place.
|
|
||||||
#
|
|
||||||
# ---- paste your original nested functions here exactly (no changes) ----
|
|
||||||
|
|
||||||
# ----- Start ffmpeg (stdin bytes -> stdout float32 PCM @ 16k mono)
|
|
||||||
proc = await asyncio.create_subprocess_exec(
|
|
||||||
"ffmpeg",
|
|
||||||
"-hide_banner",
|
|
||||||
"-loglevel",
|
|
||||||
"error",
|
|
||||||
"-i",
|
|
||||||
"pipe:0",
|
|
||||||
"-ac",
|
|
||||||
"1",
|
|
||||||
"-ar",
|
|
||||||
str(SAMPLE_RATE_HZ),
|
|
||||||
"-f",
|
|
||||||
"f32le",
|
|
||||||
"pipe:1",
|
|
||||||
stdin=asyncio.subprocess.PIPE,
|
|
||||||
stdout=asyncio.subprocess.PIPE,
|
|
||||||
stderr=asyncio.subprocess.PIPE,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def drain_ffmpeg_stderr(proc):
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
line = await proc.stderr.readline()
|
|
||||||
if not line:
|
|
||||||
break
|
|
||||||
# You can log it if needed:
|
|
||||||
# print(f"[ffmpeg] {line.decode().rstrip()}", flush=True)
|
|
||||||
await asyncio.sleep(0)
|
|
||||||
except asyncio.CancelledError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
stderr_task = asyncio.create_task(drain_ffmpeg_stderr(proc), name="ffmpeg-stderr")
|
|
||||||
|
|
||||||
# Run 3 tasks concurrently:
|
|
||||||
# - recv frames & feed ffmpeg
|
|
||||||
# - read pcm & call Triton
|
|
||||||
# - send queue messages to client
|
|
||||||
# Note: `transcribe_from_ffmpeg_stdout` and `send_messages` are your original nested functions.
|
|
||||||
# Keep them unchanged and ensure they reference `websocket` only for send_json/receive (adapter handles it).
|
|
||||||
tasks = [
|
|
||||||
asyncio.create_task(recv_frames_and_transcode(proc), name="ws-recv"),
|
|
||||||
asyncio.create_task(transcribe_from_ffmpeg_stdout(proc), name="ws-triton"),
|
|
||||||
asyncio.create_task(send_messages(), name="ws-send"),
|
|
||||||
stderr_task,
|
|
||||||
]
|
|
||||||
|
|
||||||
try:
|
|
||||||
await asyncio.gather(*tasks)
|
|
||||||
finally:
|
|
||||||
for t in tasks:
|
|
||||||
if not t.done():
|
|
||||||
t.cancel()
|
|
||||||
with suppress(Exception):
|
|
||||||
proc.terminate()
|
|
||||||
with suppress(Exception):
|
|
||||||
await proc.wait()
|
|
||||||
with suppress(Exception):
|
|
||||||
if not f_src.closed:
|
|
||||||
f_src.close()
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _write_temp_file(raw: bytes, filename: str) -> Path:
|
|
||||||
tmp = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}{Path(filename).suffix}"
|
|
||||||
tmp.write_bytes(raw)
|
|
||||||
print(f"[REQ] Temp file written at {tmp}", flush=True)
|
|
||||||
return tmp
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _pcm_int16_to_float32(x: np.ndarray) -> np.ndarray:
|
|
||||||
return (x.astype(np.float32) / 32768.0).clip(-1.0, 1.0)
|
|
||||||
|
|
||||||
def _load_audio(self, path: str, target_sr: int = SAMPLE_RATE_HZ):
|
|
||||||
audio = AudioSegment.from_file(path)
|
|
||||||
audio = audio.set_channels(1).set_frame_rate(target_sr).set_sample_width(2)
|
|
||||||
pcm_int16 = np.frombuffer(audio.raw_data, dtype=np.int16)
|
|
||||||
return pcm_int16, target_sr
|
|
||||||
|
|
||||||
async def _get_chunk_sizes(self, client: grpcclient.InferenceServerClient, model: str) -> Tuple[int, int]:
|
|
||||||
try:
|
|
||||||
cfg = await client.get_model_config(model, as_json=True)
|
|
||||||
params = {p["key"]: p["value"]["string_param"] for p in cfg.get("parameters", [])}
|
|
||||||
first_sec = float(params.get("chunk_size_first", params.get("chunk_size", "0.465")))
|
|
||||||
norm_sec = float(params.get("chunk_size", "0.32"))
|
|
||||||
except Exception:
|
|
||||||
first_sec, norm_sec = 0.465, 0.32
|
|
||||||
return int(first_sec * SAMPLE_RATE_HZ), int(norm_sec * SAMPLE_RATE_HZ)
|
|
||||||
|
|
||||||
async def _stream_transcript(self, path: str) -> AsyncGenerator[dict, None]:
|
|
||||||
"""
|
|
||||||
Read audio from `path`, chunk it and stream to Triton, yielding partial/final messages.
|
|
||||||
|
|
||||||
Yields dicts of the form:
|
|
||||||
{"time": <unix-time>, "text": "<delta text>", "is_final": False}
|
|
||||||
and finally:
|
|
||||||
{"time": <unix-time>, "text": "<full transcript>", "is_final": True}
|
|
||||||
"""
|
|
||||||
import time
|
|
||||||
import uuid
|
|
||||||
from contextlib import suppress
|
|
||||||
|
|
||||||
# Load audio (int16 PCM) and convert to float32 in [-1, 1]
|
|
||||||
pcm_int16, sr = self._load_audio(path, target_sr=SAMPLE_RATE_HZ)
|
|
||||||
if pcm_int16.size == 0:
|
|
||||||
return
|
|
||||||
|
|
||||||
wav = self._pcm_int16_to_float32(pcm_int16) # float32 1-D numpy array
|
|
||||||
|
|
||||||
# Triton client + chunk sizes
|
|
||||||
async with self._open_triton() as client:
|
|
||||||
first_sz, chunk_sz = await self._get_chunk_sizes(client, MODEL_NAME)
|
|
||||||
seq_id = uuid.uuid4().int & 0x7FFF_FFFF_FFFF_FFFF
|
|
||||||
|
|
||||||
full_tx = "" # accumulated full transcript text
|
|
||||||
have_sent_any = False
|
|
||||||
|
|
||||||
async def infer_one(raw: np.ndarray, eff_len: int, is_first: bool, is_last: bool):
|
|
||||||
"""
|
|
||||||
Send one request to Triton and return the delta text (or None).
|
|
||||||
raw: 1-D float32 numpy array length == raw.shape[0] (should be >= eff_len; padded if needed)
|
|
||||||
eff_len: number of valid samples in raw (int)
|
|
||||||
"""
|
|
||||||
nonlocal full_tx
|
|
||||||
wav_np = raw[None, :] # shape (1, T)
|
|
||||||
len_np = np.array([[eff_len]], np.int32)
|
|
||||||
inp_wav = grpcclient.InferInput(INPUT_WAV_TENSOR, wav_np.shape, np_to_triton_dtype(np.float32))
|
|
||||||
inp_len = grpcclient.InferInput(INPUT_LEN_TENSOR, len_np.shape, np_to_triton_dtype(np.int32))
|
|
||||||
inp_wav.set_data_from_numpy(wav_np)
|
|
||||||
inp_len.set_data_from_numpy(len_np)
|
|
||||||
outs = [grpcclient.InferRequestedOutput(OUTPUT_TEXT_TENSOR)]
|
|
||||||
|
|
||||||
resp = await client.infer(
|
|
||||||
MODEL_NAME,
|
|
||||||
inputs=[inp_wav, inp_len],
|
|
||||||
outputs=outs,
|
|
||||||
sequence_id=seq_id,
|
|
||||||
sequence_start=is_first,
|
|
||||||
sequence_end=is_last,
|
|
||||||
)
|
|
||||||
# join parts if model returned multiple tensors; decode bytes -> str
|
|
||||||
txt = b" ".join(resp.as_numpy(OUTPUT_TEXT_TENSOR)).decode().strip()
|
|
||||||
if not txt:
|
|
||||||
return None
|
|
||||||
delta = txt[len(full_tx) :] if txt.startswith(full_tx) else txt
|
|
||||||
full_tx = txt
|
|
||||||
return delta or None
|
|
||||||
|
|
||||||
# iterate over wav in chunks
|
|
||||||
T = wav.shape[0]
|
|
||||||
offset = 0
|
|
||||||
need = first_sz
|
|
||||||
# If first_sz is 0 for some reason, fall back to chunk_sz
|
|
||||||
if need <= 0:
|
|
||||||
need = chunk_sz
|
|
||||||
|
|
||||||
try:
|
|
||||||
while offset < T:
|
|
||||||
take = min(need, T - offset)
|
|
||||||
piece = wav[offset : offset + take]
|
|
||||||
|
|
||||||
# If piece is shorter than need (shouldn't happen except maybe for first),
|
|
||||||
# pad to `need` as model may expect framing; eff_len tracks real length.
|
|
||||||
eff = int(piece.size)
|
|
||||||
if eff < need:
|
|
||||||
pad = np.zeros(need, dtype=np.float32)
|
|
||||||
pad[:eff] = piece
|
|
||||||
to_send = pad
|
|
||||||
else:
|
|
||||||
to_send = piece
|
|
||||||
|
|
||||||
is_first = not have_sent_any
|
|
||||||
have_sent_any = True
|
|
||||||
|
|
||||||
# Not last unless remaining after this is zero
|
|
||||||
is_last = False
|
|
||||||
|
|
||||||
delta = await infer_one(to_send, eff, is_first=is_first, is_last=is_last)
|
|
||||||
if delta:
|
|
||||||
yield {"time": time.time(), "text": delta, "is_final": False}
|
|
||||||
|
|
||||||
offset += take
|
|
||||||
need = chunk_sz # after first, normal chunk size
|
|
||||||
|
|
||||||
# End of stream: if there is leftover silence/padding behavior handled above.
|
|
||||||
# After all real chunks sent, optionally flush remainder (zero-length) as last=True.
|
|
||||||
if ZERO_PAD_REQUEST_CONTENT:
|
|
||||||
# send an explicit flush chunk (zero-length) with sequence_end=True
|
|
||||||
zero = np.zeros(0, dtype=np.float32)
|
|
||||||
# When sending zero-length, eff_len = 0. Mark is_first True only if never sent anything.
|
|
||||||
await infer_one(zero, 0, is_first=(not have_sent_any), is_last=True)
|
|
||||||
else:
|
|
||||||
# send empty with last=True but not as first
|
|
||||||
zero = np.zeros(0, dtype=np.float32)
|
|
||||||
await infer_one(zero, 0, is_first=False, is_last=True)
|
|
||||||
|
|
||||||
# Emit final full transcript
|
|
||||||
yield {"time": time.time(), "text": full_tx.strip(), "is_final": True}
|
|
||||||
|
|
||||||
finally:
|
|
||||||
# nothing specific to clean in this generator; Triton client closed by context manager
|
|
||||||
with suppress(Exception):
|
|
||||||
pass
|
|
||||||
@ -1,29 +0,0 @@
|
|||||||
aiohappyeyeballs==2.6.1
|
|
||||||
aiohttp==3.13.0
|
|
||||||
aiosignal==1.4.0
|
|
||||||
async-timeout==5.0.1
|
|
||||||
attrs==25.4.0
|
|
||||||
Brotli==1.1.0
|
|
||||||
certifi==2025.10.5
|
|
||||||
frozenlist==1.8.0
|
|
||||||
gevent==25.9.1
|
|
||||||
geventhttpclient==2.3.4
|
|
||||||
greenlet==3.2.4
|
|
||||||
grpcio==1.67.1
|
|
||||||
idna==3.11
|
|
||||||
multidict==6.7.0
|
|
||||||
numpy==2.2.6
|
|
||||||
packaging==25.0
|
|
||||||
perf-analyzer==2.59.1
|
|
||||||
propcache==0.4.1
|
|
||||||
protobuf==5.29.5
|
|
||||||
pydub==0.25.1
|
|
||||||
python-dotenv==1.1.1
|
|
||||||
python-rapidjson==1.21
|
|
||||||
tritonclient==2.61.0
|
|
||||||
typing_extensions==4.15.0
|
|
||||||
urllib3==2.5.0
|
|
||||||
websockets==15.0.1
|
|
||||||
yarl==1.22.0
|
|
||||||
zope.event==6.0
|
|
||||||
zope.interface==8.0.1
|
|
||||||
23
setup.py
23
setup.py
@ -1,23 +0,0 @@
|
|||||||
from setuptools import setup, find_packages
|
|
||||||
|
|
||||||
with open("requirements.txt") as f:
|
|
||||||
requirements = f.read().splitlines()
|
|
||||||
|
|
||||||
setup(
|
|
||||||
name="asr_client",
|
|
||||||
version="0.1.0",
|
|
||||||
description="extract text from audio file",
|
|
||||||
long_description_content_type="text/markdown",
|
|
||||||
author="BI",
|
|
||||||
author_email="BI@bi.ir",
|
|
||||||
url="https://git.d.aiengines.ir/bi/asr_triton_client.git",
|
|
||||||
packages=find_packages(),
|
|
||||||
install_requires=requirements,
|
|
||||||
include_package_data=True,
|
|
||||||
classifiers=[
|
|
||||||
"Programming Language :: Python :: 3",
|
|
||||||
"License :: OSI Approved :: MIT License",
|
|
||||||
"Operating System :: OS Independent",
|
|
||||||
],
|
|
||||||
python_requires=">=3.10",
|
|
||||||
)
|
|
||||||
@ -1,25 +0,0 @@
|
|||||||
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
# * Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived
|
|
||||||
# from this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
@ -1,247 +0,0 @@
|
|||||||
<#
|
|
||||||
.Synopsis
|
|
||||||
Activate a Python virtual environment for the current PowerShell session.
|
|
||||||
|
|
||||||
.Description
|
|
||||||
Pushes the python executable for a virtual environment to the front of the
|
|
||||||
$Env:PATH environment variable and sets the prompt to signify that you are
|
|
||||||
in a Python virtual environment. Makes use of the command line switches as
|
|
||||||
well as the `pyvenv.cfg` file values present in the virtual environment.
|
|
||||||
|
|
||||||
.Parameter VenvDir
|
|
||||||
Path to the directory that contains the virtual environment to activate. The
|
|
||||||
default value for this is the parent of the directory that the Activate.ps1
|
|
||||||
script is located within.
|
|
||||||
|
|
||||||
.Parameter Prompt
|
|
||||||
The prompt prefix to display when this virtual environment is activated. By
|
|
||||||
default, this prompt is the name of the virtual environment folder (VenvDir)
|
|
||||||
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
|
|
||||||
|
|
||||||
.Example
|
|
||||||
Activate.ps1
|
|
||||||
Activates the Python virtual environment that contains the Activate.ps1 script.
|
|
||||||
|
|
||||||
.Example
|
|
||||||
Activate.ps1 -Verbose
|
|
||||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
|
||||||
and shows extra information about the activation as it executes.
|
|
||||||
|
|
||||||
.Example
|
|
||||||
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
|
|
||||||
Activates the Python virtual environment located in the specified location.
|
|
||||||
|
|
||||||
.Example
|
|
||||||
Activate.ps1 -Prompt "MyPython"
|
|
||||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
|
||||||
and prefixes the current prompt with the specified string (surrounded in
|
|
||||||
parentheses) while the virtual environment is active.
|
|
||||||
|
|
||||||
.Notes
|
|
||||||
On Windows, it may be required to enable this Activate.ps1 script by setting the
|
|
||||||
execution policy for the user. You can do this by issuing the following PowerShell
|
|
||||||
command:
|
|
||||||
|
|
||||||
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
|
||||||
|
|
||||||
For more information on Execution Policies:
|
|
||||||
https://go.microsoft.com/fwlink/?LinkID=135170
|
|
||||||
|
|
||||||
#>
|
|
||||||
Param(
|
|
||||||
[Parameter(Mandatory = $false)]
|
|
||||||
[String]
|
|
||||||
$VenvDir,
|
|
||||||
[Parameter(Mandatory = $false)]
|
|
||||||
[String]
|
|
||||||
$Prompt
|
|
||||||
)
|
|
||||||
|
|
||||||
<# Function declarations --------------------------------------------------- #>
|
|
||||||
|
|
||||||
<#
|
|
||||||
.Synopsis
|
|
||||||
Remove all shell session elements added by the Activate script, including the
|
|
||||||
addition of the virtual environment's Python executable from the beginning of
|
|
||||||
the PATH variable.
|
|
||||||
|
|
||||||
.Parameter NonDestructive
|
|
||||||
If present, do not remove this function from the global namespace for the
|
|
||||||
session.
|
|
||||||
|
|
||||||
#>
|
|
||||||
function global:deactivate ([switch]$NonDestructive) {
|
|
||||||
# Revert to original values
|
|
||||||
|
|
||||||
# The prior prompt:
|
|
||||||
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
|
|
||||||
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
|
|
||||||
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
|
|
||||||
}
|
|
||||||
|
|
||||||
# The prior PYTHONHOME:
|
|
||||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
|
|
||||||
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
|
|
||||||
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
|
|
||||||
}
|
|
||||||
|
|
||||||
# The prior PATH:
|
|
||||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
|
|
||||||
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
|
|
||||||
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
|
|
||||||
}
|
|
||||||
|
|
||||||
# Just remove the VIRTUAL_ENV altogether:
|
|
||||||
if (Test-Path -Path Env:VIRTUAL_ENV) {
|
|
||||||
Remove-Item -Path env:VIRTUAL_ENV
|
|
||||||
}
|
|
||||||
|
|
||||||
# Just remove VIRTUAL_ENV_PROMPT altogether.
|
|
||||||
if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
|
|
||||||
Remove-Item -Path env:VIRTUAL_ENV_PROMPT
|
|
||||||
}
|
|
||||||
|
|
||||||
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
|
|
||||||
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
|
|
||||||
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
|
|
||||||
}
|
|
||||||
|
|
||||||
# Leave deactivate function in the global namespace if requested:
|
|
||||||
if (-not $NonDestructive) {
|
|
||||||
Remove-Item -Path function:deactivate
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
<#
|
|
||||||
.Description
|
|
||||||
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
|
|
||||||
given folder, and returns them in a map.
|
|
||||||
|
|
||||||
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
|
|
||||||
two strings separated by `=` (with any amount of whitespace surrounding the =)
|
|
||||||
then it is considered a `key = value` line. The left hand string is the key,
|
|
||||||
the right hand is the value.
|
|
||||||
|
|
||||||
If the value starts with a `'` or a `"` then the first and last character is
|
|
||||||
stripped from the value before being captured.
|
|
||||||
|
|
||||||
.Parameter ConfigDir
|
|
||||||
Path to the directory that contains the `pyvenv.cfg` file.
|
|
||||||
#>
|
|
||||||
function Get-PyVenvConfig(
|
|
||||||
[String]
|
|
||||||
$ConfigDir
|
|
||||||
) {
|
|
||||||
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
|
|
||||||
|
|
||||||
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
|
|
||||||
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
|
|
||||||
|
|
||||||
# An empty map will be returned if no config file is found.
|
|
||||||
$pyvenvConfig = @{ }
|
|
||||||
|
|
||||||
if ($pyvenvConfigPath) {
|
|
||||||
|
|
||||||
Write-Verbose "File exists, parse `key = value` lines"
|
|
||||||
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
|
|
||||||
|
|
||||||
$pyvenvConfigContent | ForEach-Object {
|
|
||||||
$keyval = $PSItem -split "\s*=\s*", 2
|
|
||||||
if ($keyval[0] -and $keyval[1]) {
|
|
||||||
$val = $keyval[1]
|
|
||||||
|
|
||||||
# Remove extraneous quotations around a string value.
|
|
||||||
if ("'""".Contains($val.Substring(0, 1))) {
|
|
||||||
$val = $val.Substring(1, $val.Length - 2)
|
|
||||||
}
|
|
||||||
|
|
||||||
$pyvenvConfig[$keyval[0]] = $val
|
|
||||||
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return $pyvenvConfig
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
<# Begin Activate script --------------------------------------------------- #>
|
|
||||||
|
|
||||||
# Determine the containing directory of this script
|
|
||||||
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
|
||||||
$VenvExecDir = Get-Item -Path $VenvExecPath
|
|
||||||
|
|
||||||
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
|
|
||||||
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
|
|
||||||
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
|
|
||||||
|
|
||||||
# Set values required in priority: CmdLine, ConfigFile, Default
|
|
||||||
# First, get the location of the virtual environment, it might not be
|
|
||||||
# VenvExecDir if specified on the command line.
|
|
||||||
if ($VenvDir) {
|
|
||||||
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
|
|
||||||
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
|
|
||||||
Write-Verbose "VenvDir=$VenvDir"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Next, read the `pyvenv.cfg` file to determine any required value such
|
|
||||||
# as `prompt`.
|
|
||||||
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
|
|
||||||
|
|
||||||
# Next, set the prompt from the command line, or the config file, or
|
|
||||||
# just use the name of the virtual environment folder.
|
|
||||||
if ($Prompt) {
|
|
||||||
Write-Verbose "Prompt specified as argument, using '$Prompt'"
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
|
|
||||||
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
|
|
||||||
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
|
|
||||||
$Prompt = $pyvenvCfg['prompt'];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
|
|
||||||
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
|
|
||||||
$Prompt = Split-Path -Path $venvDir -Leaf
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Write-Verbose "Prompt = '$Prompt'"
|
|
||||||
Write-Verbose "VenvDir='$VenvDir'"
|
|
||||||
|
|
||||||
# Deactivate any currently active virtual environment, but leave the
|
|
||||||
# deactivate function in place.
|
|
||||||
deactivate -nondestructive
|
|
||||||
|
|
||||||
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
|
|
||||||
# that there is an activated venv.
|
|
||||||
$env:VIRTUAL_ENV = $VenvDir
|
|
||||||
|
|
||||||
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
|
|
||||||
|
|
||||||
Write-Verbose "Setting prompt to '$Prompt'"
|
|
||||||
|
|
||||||
# Set the prompt to include the env name
|
|
||||||
# Make sure _OLD_VIRTUAL_PROMPT is global
|
|
||||||
function global:_OLD_VIRTUAL_PROMPT { "" }
|
|
||||||
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
|
|
||||||
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
|
|
||||||
|
|
||||||
function global:prompt {
|
|
||||||
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
|
|
||||||
_OLD_VIRTUAL_PROMPT
|
|
||||||
}
|
|
||||||
$env:VIRTUAL_ENV_PROMPT = $Prompt
|
|
||||||
}
|
|
||||||
|
|
||||||
# Clear PYTHONHOME
|
|
||||||
if (Test-Path -Path Env:PYTHONHOME) {
|
|
||||||
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
|
|
||||||
Remove-Item -Path Env:PYTHONHOME
|
|
||||||
}
|
|
||||||
|
|
||||||
# Add the venv to the PATH
|
|
||||||
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
|
|
||||||
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
|
|
||||||
@ -1,69 +0,0 @@
|
|||||||
# This file must be used with "source bin/activate" *from bash*
|
|
||||||
# you cannot run it directly
|
|
||||||
|
|
||||||
deactivate () {
|
|
||||||
# reset old environment variables
|
|
||||||
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
|
|
||||||
PATH="${_OLD_VIRTUAL_PATH:-}"
|
|
||||||
export PATH
|
|
||||||
unset _OLD_VIRTUAL_PATH
|
|
||||||
fi
|
|
||||||
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
|
|
||||||
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
|
|
||||||
export PYTHONHOME
|
|
||||||
unset _OLD_VIRTUAL_PYTHONHOME
|
|
||||||
fi
|
|
||||||
|
|
||||||
# This should detect bash and zsh, which have a hash command that must
|
|
||||||
# be called to get it to forget past commands. Without forgetting
|
|
||||||
# past commands the $PATH changes we made may not be respected
|
|
||||||
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
|
||||||
hash -r 2> /dev/null
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
|
|
||||||
PS1="${_OLD_VIRTUAL_PS1:-}"
|
|
||||||
export PS1
|
|
||||||
unset _OLD_VIRTUAL_PS1
|
|
||||||
fi
|
|
||||||
|
|
||||||
unset VIRTUAL_ENV
|
|
||||||
unset VIRTUAL_ENV_PROMPT
|
|
||||||
if [ ! "${1:-}" = "nondestructive" ] ; then
|
|
||||||
# Self destruct!
|
|
||||||
unset -f deactivate
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# unset irrelevant variables
|
|
||||||
deactivate nondestructive
|
|
||||||
|
|
||||||
VIRTUAL_ENV=/home/azhidev/bagher/packages/asr_triton_client/venv310
|
|
||||||
export VIRTUAL_ENV
|
|
||||||
|
|
||||||
_OLD_VIRTUAL_PATH="$PATH"
|
|
||||||
PATH="$VIRTUAL_ENV/"bin":$PATH"
|
|
||||||
export PATH
|
|
||||||
|
|
||||||
# unset PYTHONHOME if set
|
|
||||||
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
|
|
||||||
# could use `if (set -u; : $PYTHONHOME) ;` in bash
|
|
||||||
if [ -n "${PYTHONHOME:-}" ] ; then
|
|
||||||
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
|
|
||||||
unset PYTHONHOME
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
|
|
||||||
_OLD_VIRTUAL_PS1="${PS1:-}"
|
|
||||||
PS1='(venv310) '"${PS1:-}"
|
|
||||||
export PS1
|
|
||||||
VIRTUAL_ENV_PROMPT='(venv310) '
|
|
||||||
export VIRTUAL_ENV_PROMPT
|
|
||||||
fi
|
|
||||||
|
|
||||||
# This should detect bash and zsh, which have a hash command that must
|
|
||||||
# be called to get it to forget past commands. Without forgetting
|
|
||||||
# past commands the $PATH changes we made may not be respected
|
|
||||||
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
|
||||||
hash -r 2> /dev/null
|
|
||||||
fi
|
|
||||||
@ -1,26 +0,0 @@
|
|||||||
# This file must be used with "source bin/activate.csh" *from csh*.
|
|
||||||
# You cannot run it directly.
|
|
||||||
# Created by Davide Di Blasi <davidedb@gmail.com>.
|
|
||||||
# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
|
|
||||||
|
|
||||||
alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
|
|
||||||
|
|
||||||
# Unset irrelevant variables.
|
|
||||||
deactivate nondestructive
|
|
||||||
|
|
||||||
setenv VIRTUAL_ENV /home/azhidev/bagher/packages/asr_triton_client/venv310
|
|
||||||
|
|
||||||
set _OLD_VIRTUAL_PATH="$PATH"
|
|
||||||
setenv PATH "$VIRTUAL_ENV/"bin":$PATH"
|
|
||||||
|
|
||||||
|
|
||||||
set _OLD_VIRTUAL_PROMPT="$prompt"
|
|
||||||
|
|
||||||
if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
|
|
||||||
set prompt = '(venv310) '"$prompt"
|
|
||||||
setenv VIRTUAL_ENV_PROMPT '(venv310) '
|
|
||||||
endif
|
|
||||||
|
|
||||||
alias pydoc python -m pydoc
|
|
||||||
|
|
||||||
rehash
|
|
||||||
@ -1,69 +0,0 @@
|
|||||||
# This file must be used with "source <venv>/bin/activate.fish" *from fish*
|
|
||||||
# (https://fishshell.com/); you cannot run it directly.
|
|
||||||
|
|
||||||
function deactivate -d "Exit virtual environment and return to normal shell environment"
|
|
||||||
# reset old environment variables
|
|
||||||
if test -n "$_OLD_VIRTUAL_PATH"
|
|
||||||
set -gx PATH $_OLD_VIRTUAL_PATH
|
|
||||||
set -e _OLD_VIRTUAL_PATH
|
|
||||||
end
|
|
||||||
if test -n "$_OLD_VIRTUAL_PYTHONHOME"
|
|
||||||
set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
|
|
||||||
set -e _OLD_VIRTUAL_PYTHONHOME
|
|
||||||
end
|
|
||||||
|
|
||||||
if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
|
|
||||||
set -e _OLD_FISH_PROMPT_OVERRIDE
|
|
||||||
# prevents error when using nested fish instances (Issue #93858)
|
|
||||||
if functions -q _old_fish_prompt
|
|
||||||
functions -e fish_prompt
|
|
||||||
functions -c _old_fish_prompt fish_prompt
|
|
||||||
functions -e _old_fish_prompt
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
set -e VIRTUAL_ENV
|
|
||||||
set -e VIRTUAL_ENV_PROMPT
|
|
||||||
if test "$argv[1]" != "nondestructive"
|
|
||||||
# Self-destruct!
|
|
||||||
functions -e deactivate
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Unset irrelevant variables.
|
|
||||||
deactivate nondestructive
|
|
||||||
|
|
||||||
set -gx VIRTUAL_ENV /home/azhidev/bagher/packages/asr_triton_client/venv310
|
|
||||||
|
|
||||||
set -gx _OLD_VIRTUAL_PATH $PATH
|
|
||||||
set -gx PATH "$VIRTUAL_ENV/"bin $PATH
|
|
||||||
|
|
||||||
# Unset PYTHONHOME if set.
|
|
||||||
if set -q PYTHONHOME
|
|
||||||
set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
|
|
||||||
set -e PYTHONHOME
|
|
||||||
end
|
|
||||||
|
|
||||||
if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
|
|
||||||
# fish uses a function instead of an env var to generate the prompt.
|
|
||||||
|
|
||||||
# Save the current fish_prompt function as the function _old_fish_prompt.
|
|
||||||
functions -c fish_prompt _old_fish_prompt
|
|
||||||
|
|
||||||
# With the original prompt function renamed, we can override with our own.
|
|
||||||
function fish_prompt
|
|
||||||
# Save the return status of the last command.
|
|
||||||
set -l old_status $status
|
|
||||||
|
|
||||||
# Output the venv prompt; color taken from the blue of the Python logo.
|
|
||||||
printf "%s%s%s" (set_color 4B8BBE) '(venv310) ' (set_color normal)
|
|
||||||
|
|
||||||
# Restore the return status of the previous command.
|
|
||||||
echo "exit $old_status" | .
|
|
||||||
# Output the original/"old" prompt.
|
|
||||||
_old_fish_prompt
|
|
||||||
end
|
|
||||||
|
|
||||||
set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
|
|
||||||
set -gx VIRTUAL_ENV_PROMPT '(venv310) '
|
|
||||||
end
|
|
||||||
@ -1,8 +0,0 @@
|
|||||||
#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
from dotenv.__main__ import cli
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
|
||||||
sys.exit(cli())
|
|
||||||
@ -1,8 +0,0 @@
|
|||||||
#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
from numpy.f2py.f2py2e import main
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
|
||||||
sys.exit(main())
|
|
||||||
@ -1,8 +0,0 @@
|
|||||||
#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
from numpy._configtool import main
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
|
||||||
sys.exit(main())
|
|
||||||
@ -1,8 +0,0 @@
|
|||||||
#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
from perf_analyzer.cli import main
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
|
||||||
sys.exit(main())
|
|
||||||
@ -1,8 +0,0 @@
|
|||||||
#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
from pip._internal.cli.main import main
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
|
||||||
sys.exit(main())
|
|
||||||
@ -1,8 +0,0 @@
|
|||||||
#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
from pip._internal.cli.main import main
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
|
||||||
sys.exit(main())
|
|
||||||
@ -1,8 +0,0 @@
|
|||||||
#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
from pip._internal.cli.main import main
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
|
||||||
sys.exit(main())
|
|
||||||
@ -1 +0,0 @@
|
|||||||
python3.10
|
|
||||||
@ -1 +0,0 @@
|
|||||||
python3.10
|
|
||||||
@ -1 +0,0 @@
|
|||||||
/usr/bin/python3.10
|
|
||||||
@ -1,8 +0,0 @@
|
|||||||
#!/home/azhidev/bagher/packages/asr_triton_client/venv310/bin/python3.10
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
from websockets.cli import main
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
|
||||||
sys.exit(main())
|
|
||||||
@ -1,164 +0,0 @@
|
|||||||
/* -*- indent-tabs-mode: nil; tab-width: 4; -*- */
|
|
||||||
|
|
||||||
/* Greenlet object interface */
|
|
||||||
|
|
||||||
#ifndef Py_GREENLETOBJECT_H
|
|
||||||
#define Py_GREENLETOBJECT_H
|
|
||||||
|
|
||||||
|
|
||||||
#include <Python.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* This is deprecated and undocumented. It does not change. */
|
|
||||||
#define GREENLET_VERSION "1.0.0"
|
|
||||||
|
|
||||||
#ifndef GREENLET_MODULE
|
|
||||||
#define implementation_ptr_t void*
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef struct _greenlet {
|
|
||||||
PyObject_HEAD
|
|
||||||
PyObject* weakreflist;
|
|
||||||
PyObject* dict;
|
|
||||||
implementation_ptr_t pimpl;
|
|
||||||
} PyGreenlet;
|
|
||||||
|
|
||||||
#define PyGreenlet_Check(op) (op && PyObject_TypeCheck(op, &PyGreenlet_Type))
|
|
||||||
|
|
||||||
|
|
||||||
/* C API functions */
|
|
||||||
|
|
||||||
/* Total number of symbols that are exported */
|
|
||||||
#define PyGreenlet_API_pointers 12
|
|
||||||
|
|
||||||
#define PyGreenlet_Type_NUM 0
|
|
||||||
#define PyExc_GreenletError_NUM 1
|
|
||||||
#define PyExc_GreenletExit_NUM 2
|
|
||||||
|
|
||||||
#define PyGreenlet_New_NUM 3
|
|
||||||
#define PyGreenlet_GetCurrent_NUM 4
|
|
||||||
#define PyGreenlet_Throw_NUM 5
|
|
||||||
#define PyGreenlet_Switch_NUM 6
|
|
||||||
#define PyGreenlet_SetParent_NUM 7
|
|
||||||
|
|
||||||
#define PyGreenlet_MAIN_NUM 8
|
|
||||||
#define PyGreenlet_STARTED_NUM 9
|
|
||||||
#define PyGreenlet_ACTIVE_NUM 10
|
|
||||||
#define PyGreenlet_GET_PARENT_NUM 11
|
|
||||||
|
|
||||||
#ifndef GREENLET_MODULE
|
|
||||||
/* This section is used by modules that uses the greenlet C API */
|
|
||||||
static void** _PyGreenlet_API = NULL;
|
|
||||||
|
|
||||||
# define PyGreenlet_Type \
|
|
||||||
(*(PyTypeObject*)_PyGreenlet_API[PyGreenlet_Type_NUM])
|
|
||||||
|
|
||||||
# define PyExc_GreenletError \
|
|
||||||
((PyObject*)_PyGreenlet_API[PyExc_GreenletError_NUM])
|
|
||||||
|
|
||||||
# define PyExc_GreenletExit \
|
|
||||||
((PyObject*)_PyGreenlet_API[PyExc_GreenletExit_NUM])
|
|
||||||
|
|
||||||
/*
|
|
||||||
* PyGreenlet_New(PyObject *args)
|
|
||||||
*
|
|
||||||
* greenlet.greenlet(run, parent=None)
|
|
||||||
*/
|
|
||||||
# define PyGreenlet_New \
|
|
||||||
(*(PyGreenlet * (*)(PyObject * run, PyGreenlet * parent)) \
|
|
||||||
_PyGreenlet_API[PyGreenlet_New_NUM])
|
|
||||||
|
|
||||||
/*
|
|
||||||
* PyGreenlet_GetCurrent(void)
|
|
||||||
*
|
|
||||||
* greenlet.getcurrent()
|
|
||||||
*/
|
|
||||||
# define PyGreenlet_GetCurrent \
|
|
||||||
(*(PyGreenlet * (*)(void)) _PyGreenlet_API[PyGreenlet_GetCurrent_NUM])
|
|
||||||
|
|
||||||
/*
|
|
||||||
* PyGreenlet_Throw(
|
|
||||||
* PyGreenlet *greenlet,
|
|
||||||
* PyObject *typ,
|
|
||||||
* PyObject *val,
|
|
||||||
* PyObject *tb)
|
|
||||||
*
|
|
||||||
* g.throw(...)
|
|
||||||
*/
|
|
||||||
# define PyGreenlet_Throw \
|
|
||||||
(*(PyObject * (*)(PyGreenlet * self, \
|
|
||||||
PyObject * typ, \
|
|
||||||
PyObject * val, \
|
|
||||||
PyObject * tb)) \
|
|
||||||
_PyGreenlet_API[PyGreenlet_Throw_NUM])
|
|
||||||
|
|
||||||
/*
|
|
||||||
* PyGreenlet_Switch(PyGreenlet *greenlet, PyObject *args)
|
|
||||||
*
|
|
||||||
* g.switch(*args, **kwargs)
|
|
||||||
*/
|
|
||||||
# define PyGreenlet_Switch \
|
|
||||||
(*(PyObject * \
|
|
||||||
(*)(PyGreenlet * greenlet, PyObject * args, PyObject * kwargs)) \
|
|
||||||
_PyGreenlet_API[PyGreenlet_Switch_NUM])
|
|
||||||
|
|
||||||
/*
|
|
||||||
* PyGreenlet_SetParent(PyObject *greenlet, PyObject *new_parent)
|
|
||||||
*
|
|
||||||
* g.parent = new_parent
|
|
||||||
*/
|
|
||||||
# define PyGreenlet_SetParent \
|
|
||||||
(*(int (*)(PyGreenlet * greenlet, PyGreenlet * nparent)) \
|
|
||||||
_PyGreenlet_API[PyGreenlet_SetParent_NUM])
|
|
||||||
|
|
||||||
/*
|
|
||||||
* PyGreenlet_GetParent(PyObject* greenlet)
|
|
||||||
*
|
|
||||||
* return greenlet.parent;
|
|
||||||
*
|
|
||||||
* This could return NULL even if there is no exception active.
|
|
||||||
* If it does not return NULL, you are responsible for decrementing the
|
|
||||||
* reference count.
|
|
||||||
*/
|
|
||||||
# define PyGreenlet_GetParent \
|
|
||||||
(*(PyGreenlet* (*)(PyGreenlet*)) \
|
|
||||||
_PyGreenlet_API[PyGreenlet_GET_PARENT_NUM])
|
|
||||||
|
|
||||||
/*
|
|
||||||
* deprecated, undocumented alias.
|
|
||||||
*/
|
|
||||||
# define PyGreenlet_GET_PARENT PyGreenlet_GetParent
|
|
||||||
|
|
||||||
# define PyGreenlet_MAIN \
|
|
||||||
(*(int (*)(PyGreenlet*)) \
|
|
||||||
_PyGreenlet_API[PyGreenlet_MAIN_NUM])
|
|
||||||
|
|
||||||
# define PyGreenlet_STARTED \
|
|
||||||
(*(int (*)(PyGreenlet*)) \
|
|
||||||
_PyGreenlet_API[PyGreenlet_STARTED_NUM])
|
|
||||||
|
|
||||||
# define PyGreenlet_ACTIVE \
|
|
||||||
(*(int (*)(PyGreenlet*)) \
|
|
||||||
_PyGreenlet_API[PyGreenlet_ACTIVE_NUM])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Macro that imports greenlet and initializes C API */
|
|
||||||
/* NOTE: This has actually moved to ``greenlet._greenlet._C_API``, but we
|
|
||||||
keep the older definition to be sure older code that might have a copy of
|
|
||||||
the header still works. */
|
|
||||||
# define PyGreenlet_Import() \
|
|
||||||
{ \
|
|
||||||
_PyGreenlet_API = (void**)PyCapsule_Import("greenlet._C_API", 0); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* GREENLET_MODULE */
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif /* !Py_GREENLETOBJECT_H */
|
|
||||||
@ -1 +0,0 @@
|
|||||||
lib
|
|
||||||
@ -1,3 +0,0 @@
|
|||||||
home = /usr/bin
|
|
||||||
include-system-site-packages = false
|
|
||||||
version = 3.10.18
|
|
||||||
10
xl/README.md
Normal file
10
xl/README.md
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
example usage :
|
||||||
|
|
||||||
|
|
||||||
|
``` python
|
||||||
|
file_path = "./1.xlsx"
|
||||||
|
sheet_name = "Sheet"
|
||||||
|
with XL(file_path) as excel_to_dict:
|
||||||
|
data = excel_to_dict.sheet_to_dict(sheet_name)
|
||||||
|
print(data)
|
||||||
|
```
|
||||||
1
xl/__init__.py
Normal file
1
xl/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from .main import XL
|
||||||
46
xl/main.py
Normal file
46
xl/main.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
import openpyxl
|
||||||
|
|
||||||
|
class XL:
|
||||||
|
def __init__(self, file_path: str):
|
||||||
|
"""
|
||||||
|
Initialize the class with the path to the Excel file.
|
||||||
|
"""
|
||||||
|
self.file_path = file_path
|
||||||
|
self.workbook = None
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
"""
|
||||||
|
Open the workbook when entering the context.
|
||||||
|
"""
|
||||||
|
self.workbook = openpyxl.load_workbook(self.file_path)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
"""
|
||||||
|
Close the workbook when exiting the context.
|
||||||
|
"""
|
||||||
|
if self.workbook:
|
||||||
|
self.workbook.close()
|
||||||
|
|
||||||
|
def sheet_to_dict(self, sheet_name: str):
|
||||||
|
"""
|
||||||
|
Convert a sheet's data to a dictionary, removing rows with None values.
|
||||||
|
"""
|
||||||
|
if not self.workbook:
|
||||||
|
raise ValueError("Workbook is not loaded. Ensure you use 'with' to open the file.")
|
||||||
|
|
||||||
|
if sheet_name not in self.workbook.sheetnames:
|
||||||
|
raise ValueError(f"Sheet '{sheet_name}' not found in the workbook.")
|
||||||
|
|
||||||
|
sheet = self.workbook[sheet_name]
|
||||||
|
headers = [cell.value for cell in sheet[1] if cell.value is not None]
|
||||||
|
|
||||||
|
data_list = []
|
||||||
|
for row in sheet.iter_rows(min_row=2, values_only=True):
|
||||||
|
row_dict = {headers[i]: cell for i, cell in enumerate(row) if i < len(headers)}
|
||||||
|
# Remove keys with None values
|
||||||
|
row_dict = {key: value for key, value in row_dict.items() if value is not None}
|
||||||
|
if row_dict: # Only include non-empty rows
|
||||||
|
data_list.append(row_dict)
|
||||||
|
|
||||||
|
return data_list
|
||||||
Loading…
x
Reference in New Issue
Block a user