refactor: rename canto-backend → backend, canto-frontend → frontend
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
0
backend/utils/__init__.py
Normal file
0
backend/utils/__init__.py
Normal file
113
backend/utils/audio.py
Normal file
113
backend/utils/audio.py
Normal file
@@ -0,0 +1,113 @@
|
||||
import base64
|
||||
import io
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
from scipy import signal
|
||||
|
||||
|
||||
def validate_ref_audio(audio_data: bytes, max_size_mb: int = 10) -> bool:
|
||||
try:
|
||||
size_mb = len(audio_data) / (1024 * 1024)
|
||||
if size_mb > max_size_mb:
|
||||
return False
|
||||
|
||||
buffer = io.BytesIO(audio_data)
|
||||
audio_array, sample_rate = sf.read(buffer)
|
||||
|
||||
duration = len(audio_array) / sample_rate
|
||||
if duration < 1.0 or duration > 30.0:
|
||||
return False
|
||||
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def process_ref_audio(audio_data: bytes) -> tuple[np.ndarray, int]:
|
||||
buffer = io.BytesIO(audio_data)
|
||||
audio_array, orig_sr = sf.read(buffer)
|
||||
|
||||
if audio_array.ndim > 1:
|
||||
audio_array = np.mean(audio_array, axis=1)
|
||||
|
||||
target_sr = 24000
|
||||
if orig_sr != target_sr:
|
||||
audio_array = resample_audio(audio_array, orig_sr, target_sr)
|
||||
|
||||
audio_array = audio_array.astype(np.float32)
|
||||
return audio_array, target_sr
|
||||
|
||||
|
||||
def resample_audio(audio_array: np.ndarray, orig_sr: int, target_sr: int = 24000) -> np.ndarray:
|
||||
if orig_sr == target_sr:
|
||||
return audio_array
|
||||
|
||||
num_samples = int(len(audio_array) * target_sr / orig_sr)
|
||||
resampled = signal.resample(audio_array, num_samples)
|
||||
return resampled.astype(np.float32)
|
||||
|
||||
|
||||
def extract_audio_features(audio_array: np.ndarray, sample_rate: int) -> dict:
|
||||
duration = len(audio_array) / sample_rate
|
||||
rms_energy = np.sqrt(np.mean(audio_array ** 2))
|
||||
|
||||
return {
|
||||
'duration': float(duration),
|
||||
'sample_rate': int(sample_rate),
|
||||
'num_samples': int(len(audio_array)),
|
||||
'rms_energy': float(rms_energy)
|
||||
}
|
||||
|
||||
|
||||
def encode_audio_to_base64(audio_array: np.ndarray, sample_rate: int) -> str:
|
||||
buffer = io.BytesIO()
|
||||
sf.write(buffer, audio_array, sample_rate, format='WAV')
|
||||
buffer.seek(0)
|
||||
audio_bytes = buffer.read()
|
||||
return base64.b64encode(audio_bytes).decode('utf-8')
|
||||
|
||||
|
||||
def decode_base64_to_audio(base64_string: str) -> tuple[np.ndarray, int]:
|
||||
audio_bytes = base64.b64decode(base64_string)
|
||||
buffer = io.BytesIO(audio_bytes)
|
||||
audio_array, sample_rate = sf.read(buffer)
|
||||
return audio_array, sample_rate
|
||||
|
||||
|
||||
def validate_audio_format(audio_data: bytes) -> bool:
|
||||
try:
|
||||
buffer = io.BytesIO(audio_data)
|
||||
sf.read(buffer)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def get_audio_duration(audio_array: np.ndarray, sample_rate: int) -> float:
|
||||
return len(audio_array) / sample_rate
|
||||
|
||||
|
||||
def save_audio_file(
|
||||
audio_array: np.ndarray,
|
||||
sample_rate: int,
|
||||
output_path: str | Path
|
||||
) -> str:
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not isinstance(audio_array, np.ndarray):
|
||||
audio_array = np.array(audio_array, dtype=np.float32)
|
||||
|
||||
if audio_array.ndim == 1:
|
||||
pass
|
||||
elif audio_array.ndim == 2:
|
||||
if audio_array.shape[0] < audio_array.shape[1]:
|
||||
audio_array = audio_array.T
|
||||
else:
|
||||
raise ValueError(f"Unexpected audio array shape: {audio_array.shape}")
|
||||
|
||||
audio_array = audio_array.astype(np.float32)
|
||||
|
||||
sf.write(str(output_path), audio_array, sample_rate, format='WAV', subtype='PCM_16')
|
||||
return str(output_path)
|
||||
80
backend/utils/metrics.py
Normal file
80
backend/utils/metrics.py
Normal file
@@ -0,0 +1,80 @@
|
||||
import threading
|
||||
from typing import Dict
|
||||
from pathlib import Path
|
||||
from sqlalchemy.orm import Session
|
||||
from db.models import VoiceCache
|
||||
|
||||
|
||||
class CacheMetrics:
|
||||
def __init__(self):
|
||||
self._lock = threading.Lock()
|
||||
self.cache_hits = 0
|
||||
self.cache_misses = 0
|
||||
self._user_hits: Dict[int, int] = {}
|
||||
self._user_misses: Dict[int, int] = {}
|
||||
|
||||
def record_hit(self, user_id: int):
|
||||
with self._lock:
|
||||
self.cache_hits += 1
|
||||
self._user_hits[user_id] = self._user_hits.get(user_id, 0) + 1
|
||||
|
||||
def record_miss(self, user_id: int):
|
||||
with self._lock:
|
||||
self.cache_misses += 1
|
||||
self._user_misses[user_id] = self._user_misses.get(user_id, 0) + 1
|
||||
|
||||
def get_stats(self, db: Session, cache_dir: str) -> dict:
|
||||
with self._lock:
|
||||
total_requests = self.cache_hits + self.cache_misses
|
||||
hit_rate = self.cache_hits / total_requests if total_requests > 0 else 0.0
|
||||
|
||||
total_entries = db.query(VoiceCache).count()
|
||||
|
||||
total_size_bytes = 0
|
||||
cache_path = Path(cache_dir)
|
||||
if cache_path.exists():
|
||||
for cache_file in cache_path.glob("*.pkl"):
|
||||
total_size_bytes += cache_file.stat().st_size
|
||||
|
||||
total_size_mb = total_size_bytes / (1024 * 1024)
|
||||
|
||||
user_stats = []
|
||||
for user_id in set(list(self._user_hits.keys()) + list(self._user_misses.keys())):
|
||||
hits = self._user_hits.get(user_id, 0)
|
||||
misses = self._user_misses.get(user_id, 0)
|
||||
total = hits + misses
|
||||
user_hit_rate = hits / total if total > 0 else 0.0
|
||||
|
||||
user_cache_count = db.query(VoiceCache).filter(
|
||||
VoiceCache.user_id == user_id
|
||||
).count()
|
||||
|
||||
user_stats.append({
|
||||
'user_id': user_id,
|
||||
'hits': hits,
|
||||
'misses': misses,
|
||||
'hit_rate': user_hit_rate,
|
||||
'cache_entries': user_cache_count
|
||||
})
|
||||
|
||||
return {
|
||||
'global': {
|
||||
'total_requests': total_requests,
|
||||
'cache_hits': self.cache_hits,
|
||||
'cache_misses': self.cache_misses,
|
||||
'hit_rate': hit_rate,
|
||||
'total_entries': total_entries,
|
||||
'total_size_mb': total_size_mb
|
||||
},
|
||||
'users': user_stats
|
||||
}
|
||||
|
||||
def reset(self):
|
||||
with self._lock:
|
||||
self.cache_hits = 0
|
||||
self.cache_misses = 0
|
||||
self._user_hits.clear()
|
||||
self._user_misses.clear()
|
||||
|
||||
|
||||
cache_metrics = CacheMetrics()
|
||||
106
backend/utils/validation.py
Normal file
106
backend/utils/validation.py
Normal file
@@ -0,0 +1,106 @@
|
||||
from typing import List, Dict
|
||||
|
||||
SUPPORTED_LANGUAGES = [
|
||||
"Chinese", "English", "Japanese", "Korean", "German",
|
||||
"French", "Russian", "Portuguese", "Spanish", "Italian",
|
||||
"Auto", "Cantonese"
|
||||
]
|
||||
|
||||
SUPPORTED_SPEAKERS = [
|
||||
"Vivian", "Serena", "Uncle_Fu", "Dylan", "Eric",
|
||||
"Ryan", "Aiden", "Ono_Anna", "Sohee"
|
||||
]
|
||||
|
||||
SPEAKER_DESCRIPTIONS = {
|
||||
"Vivian": "Female, professional and clear",
|
||||
"Serena": "Female, gentle and warm",
|
||||
"Uncle_Fu": "Male, mature and authoritative",
|
||||
"Dylan": "Male, young and energetic",
|
||||
"Eric": "Male, calm and steady",
|
||||
"Ryan": "Male, friendly and casual",
|
||||
"Aiden": "Male, deep and resonant",
|
||||
"Ono_Anna": "Female, cute and lively",
|
||||
"Sohee": "Female, soft and melodious"
|
||||
}
|
||||
|
||||
LOCAL_SPEAKERS = SUPPORTED_SPEAKERS.copy()
|
||||
|
||||
LOCAL_SPEAKER_DESCRIPTIONS = SPEAKER_DESCRIPTIONS.copy()
|
||||
|
||||
|
||||
def validate_language(language: str) -> str:
|
||||
normalized = language.strip()
|
||||
|
||||
for supported in SUPPORTED_LANGUAGES:
|
||||
if normalized.lower() == supported.lower():
|
||||
return supported
|
||||
|
||||
raise ValueError(
|
||||
f"Unsupported language: {language}. "
|
||||
f"Supported languages: {', '.join(SUPPORTED_LANGUAGES)}"
|
||||
)
|
||||
|
||||
|
||||
def validate_speaker(speaker: str, backend: str = "local") -> str:
|
||||
normalized = speaker.strip()
|
||||
|
||||
for supported in LOCAL_SPEAKERS:
|
||||
if normalized.lower() == supported.lower():
|
||||
return supported
|
||||
|
||||
raise ValueError(
|
||||
f"Unsupported speaker: {speaker}. "
|
||||
f"Supported speakers: {', '.join(LOCAL_SPEAKERS)}"
|
||||
)
|
||||
|
||||
|
||||
def validate_text_length(text: str, max_length: int = 1000) -> str:
|
||||
if not text or not text.strip():
|
||||
raise ValueError("Text cannot be empty")
|
||||
|
||||
if len(text) > max_length:
|
||||
raise ValueError(
|
||||
f"Text length ({len(text)}) exceeds maximum ({max_length})"
|
||||
)
|
||||
|
||||
return text.strip()
|
||||
|
||||
|
||||
def validate_generation_params(params: dict) -> dict:
|
||||
validated = {}
|
||||
|
||||
validated['max_new_tokens'] = params.get('max_new_tokens', 2048)
|
||||
if not 128 <= validated['max_new_tokens'] <= 4096:
|
||||
raise ValueError("max_new_tokens must be between 128 and 4096")
|
||||
|
||||
validated['temperature'] = params.get('temperature', 0.9)
|
||||
if not 0.1 <= validated['temperature'] <= 2.0:
|
||||
raise ValueError("temperature must be between 0.1 and 2.0")
|
||||
|
||||
validated['top_k'] = params.get('top_k', 50)
|
||||
if not 1 <= validated['top_k'] <= 100:
|
||||
raise ValueError("top_k must be between 1 and 100")
|
||||
|
||||
validated['top_p'] = params.get('top_p', 1.0)
|
||||
if not 0.0 <= validated['top_p'] <= 1.0:
|
||||
raise ValueError("top_p must be between 0.0 and 1.0")
|
||||
|
||||
validated['repetition_penalty'] = params.get('repetition_penalty', 1.05)
|
||||
if not 1.0 <= validated['repetition_penalty'] <= 2.0:
|
||||
raise ValueError("repetition_penalty must be between 1.0 and 2.0")
|
||||
|
||||
return validated
|
||||
|
||||
|
||||
def get_supported_languages() -> List[str]:
|
||||
return SUPPORTED_LANGUAGES.copy()
|
||||
|
||||
|
||||
def get_supported_speakers(backend: str = "local") -> List[dict]:
|
||||
return [
|
||||
{
|
||||
"name": speaker,
|
||||
"description": LOCAL_SPEAKER_DESCRIPTIONS.get(speaker, "")
|
||||
}
|
||||
for speaker in LOCAL_SPEAKERS
|
||||
]
|
||||
Reference in New Issue
Block a user