refactor: rename backend/frontend dirs and remove NovelWriter submodule

- Rename qwen3-tts-backend → canto-backend - Rename qwen3-tts-frontend → canto-frontend - Remove NovelWriter embedded repo Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 18:03:29 +08:00
parent 777a7ec006
commit 2fa9c1fcb6
346 changed files with 548 additions and 585 deletions
--- a/canto-backend/indextts/s2mel/dac/utils/init.py
+++ b/canto-backend/indextts/s2mel/dac/utils/init.py
@@ -0,0 +1,123 @@
+from pathlib import Path
+
+import argbind
+from audiotools import ml
+
+import indextts.s2mel.dac as dac
+
+DAC = dac.model.DAC
+Accelerator = ml.Accelerator
+
+__MODEL_LATEST_TAGS__ = {
+    ("44khz", "8kbps"): "0.0.1",
+    ("24khz", "8kbps"): "0.0.4",
+    ("16khz", "8kbps"): "0.0.5",
+    ("44khz", "16kbps"): "1.0.0",
+}
+
+__MODEL_URLS__ = {
+    (
+        "44khz",
+        "0.0.1",
+        "8kbps",
+    ): "https://github.com/descriptinc/descript-audio-codec/releases/download/0.0.1/weights.pth",
+    (
+        "24khz",
+        "0.0.4",
+        "8kbps",
+    ): "https://github.com/descriptinc/descript-audio-codec/releases/download/0.0.4/weights_24khz.pth",
+    (
+        "16khz",
+        "0.0.5",
+        "8kbps",
+    ): "https://github.com/descriptinc/descript-audio-codec/releases/download/0.0.5/weights_16khz.pth",
+    (
+        "44khz",
+        "1.0.0",
+        "16kbps",
+    ): "https://github.com/descriptinc/descript-audio-codec/releases/download/1.0.0/weights_44khz_16kbps.pth",
+}
+
+
+@argbind.bind(group="download", positional=True, without_prefix=True)
+def download(
+    model_type: str = "44khz", model_bitrate: str = "8kbps", tag: str = "latest"
+):
+    """
+    Function that downloads the weights file from URL if a local cache is not found.
+
+    Parameters
+    ----------
+    model_type : str
+        The type of model to download. Must be one of "44khz", "24khz", or "16khz". Defaults to "44khz".
+    model_bitrate: str
+        Bitrate of the model. Must be one of "8kbps", or "16kbps". Defaults to "8kbps".
+        Only 44khz model supports 16kbps.
+    tag : str
+        The tag of the model to download. Defaults to "latest".
+
+    Returns
+    -------
+    Path
+        Directory path required to load model via audiotools.
+    """
+    model_type = model_type.lower()
+    tag = tag.lower()
+
+    assert model_type in [
+        "44khz",
+        "24khz",
+        "16khz",
+    ], "model_type must be one of '44khz', '24khz', or '16khz'"
+
+    assert model_bitrate in [
+        "8kbps",
+        "16kbps",
+    ], "model_bitrate must be one of '8kbps', or '16kbps'"
+
+    if tag == "latest":
+        tag = __MODEL_LATEST_TAGS__[(model_type, model_bitrate)]
+
+    download_link = __MODEL_URLS__.get((model_type, tag, model_bitrate), None)
+
+    if download_link is None:
+        raise ValueError(
+            f"Could not find model with tag {tag} and model type {model_type}"
+        )
+
+    local_path = (
+        Path.home()
+        / ".cache"
+        / "descript"
+        / "dac"
+        / f"weights_{model_type}_{model_bitrate}_{tag}.pth"
+    )
+    if not local_path.exists():
+        local_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Download the model
+        import requests
+
+        response = requests.get(download_link)
+
+        if response.status_code != 200:
+            raise ValueError(
+                f"Could not download model. Received response code {response.status_code}"
+            )
+        local_path.write_bytes(response.content)
+
+    return local_path
+
+
+def load_model(
+    model_type: str = "44khz",
+    model_bitrate: str = "8kbps",
+    tag: str = "latest",
+    load_path: str = None,
+):
+    if not load_path:
+        load_path = download(
+            model_type=model_type, model_bitrate=model_bitrate, tag=tag
+        )
+    generator = DAC.load(load_path)
+    return generator
--- a/canto-backend/indextts/s2mel/dac/utils/decode.py
+++ b/canto-backend/indextts/s2mel/dac/utils/decode.py
@@ -0,0 +1,95 @@
+import warnings
+from pathlib import Path
+
+import argbind
+import numpy as np
+import torch
+from audiotools import AudioSignal
+from tqdm import tqdm
+
+from dac import DACFile
+from dac.utils import load_model
+
+warnings.filterwarnings("ignore", category=UserWarning)
+
+
+@argbind.bind(group="decode", positional=True, without_prefix=True)
+@torch.inference_mode()
+@torch.no_grad()
+def decode(
+    input: str,
+    output: str = "",
+    weights_path: str = "",
+    model_tag: str = "latest",
+    model_bitrate: str = "8kbps",
+    device: str = "cuda",
+    model_type: str = "44khz",
+    verbose: bool = False,
+):
+    """Decode audio from codes.
+
+    Parameters
+    ----------
+    input : str
+        Path to input directory or file
+    output : str, optional
+        Path to output directory, by default "".
+        If `input` is a directory, the directory sub-tree relative to `input` is re-created in `output`.
+    weights_path : str, optional
+        Path to weights file, by default "". If not specified, the weights file will be downloaded from the internet using the
+        model_tag and model_type.
+    model_tag : str, optional
+        Tag of the model to use, by default "latest". Ignored if `weights_path` is specified.
+    model_bitrate: str
+        Bitrate of the model. Must be one of "8kbps", or "16kbps". Defaults to "8kbps".
+    device : str, optional
+        Device to use, by default "cuda". If "cpu", the model will be loaded on the CPU.
+    model_type : str, optional
+        The type of model to use. Must be one of "44khz", "24khz", or "16khz". Defaults to "44khz". Ignored if `weights_path` is specified.
+    """
+    generator = load_model(
+        model_type=model_type,
+        model_bitrate=model_bitrate,
+        tag=model_tag,
+        load_path=weights_path,
+    )
+    generator.to(device)
+    generator.eval()
+
+    # Find all .dac files in input directory
+    _input = Path(input)
+    input_files = list(_input.glob("**/*.dac"))
+
+    # If input is a .dac file, add it to the list
+    if _input.suffix == ".dac":
+        input_files.append(_input)
+
+    # Create output directory
+    output = Path(output)
+    output.mkdir(parents=True, exist_ok=True)
+
+    for i in tqdm(range(len(input_files)), desc=f"Decoding files"):
+        # Load file
+        artifact = DACFile.load(input_files[i])
+
+        # Reconstruct audio from codes
+        recons = generator.decompress(artifact, verbose=verbose)
+
+        # Compute output path
+        relative_path = input_files[i].relative_to(input)
+        output_dir = output / relative_path.parent
+        if not relative_path.name:
+            output_dir = output
+            relative_path = input_files[i]
+        output_name = relative_path.with_suffix(".wav").name
+        output_path = output_dir / output_name
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Write to file
+        recons.write(output_path)
+
+
+if __name__ == "__main__":
+    args = argbind.parse_args()
+    with argbind.scope(args):
+        decode()
--- a/canto-backend/indextts/s2mel/dac/utils/encode.py
+++ b/canto-backend/indextts/s2mel/dac/utils/encode.py
@@ -0,0 +1,94 @@
+import math
+import warnings
+from pathlib import Path
+
+import argbind
+import numpy as np
+import torch
+from audiotools import AudioSignal
+from audiotools.core import util
+from tqdm import tqdm
+
+from dac.utils import load_model
+
+warnings.filterwarnings("ignore", category=UserWarning)
+
+
+@argbind.bind(group="encode", positional=True, without_prefix=True)
+@torch.inference_mode()
+@torch.no_grad()
+def encode(
+    input: str,
+    output: str = "",
+    weights_path: str = "",
+    model_tag: str = "latest",
+    model_bitrate: str = "8kbps",
+    n_quantizers: int = None,
+    device: str = "cuda",
+    model_type: str = "44khz",
+    win_duration: float = 5.0,
+    verbose: bool = False,
+):
+    """Encode audio files in input path to .dac format.
+
+    Parameters
+    ----------
+    input : str
+        Path to input audio file or directory
+    output : str, optional
+        Path to output directory, by default "". If `input` is a directory, the directory sub-tree relative to `input` is re-created in `output`.
+    weights_path : str, optional
+        Path to weights file, by default "". If not specified, the weights file will be downloaded from the internet using the
+        model_tag and model_type.
+    model_tag : str, optional
+        Tag of the model to use, by default "latest". Ignored if `weights_path` is specified.
+    model_bitrate: str
+        Bitrate of the model. Must be one of "8kbps", or "16kbps". Defaults to "8kbps".
+    n_quantizers : int, optional
+        Number of quantizers to use, by default None. If not specified, all the quantizers will be used and the model will compress at maximum bitrate.
+    device : str, optional
+        Device to use, by default "cuda"
+    model_type : str, optional
+        The type of model to use. Must be one of "44khz", "24khz", or "16khz". Defaults to "44khz". Ignored if `weights_path` is specified.
+    """
+    generator = load_model(
+        model_type=model_type,
+        model_bitrate=model_bitrate,
+        tag=model_tag,
+        load_path=weights_path,
+    )
+    generator.to(device)
+    generator.eval()
+    kwargs = {"n_quantizers": n_quantizers}
+
+    # Find all audio files in input path
+    input = Path(input)
+    audio_files = util.find_audio(input)
+
+    output = Path(output)
+    output.mkdir(parents=True, exist_ok=True)
+
+    for i in tqdm(range(len(audio_files)), desc="Encoding files"):
+        # Load file
+        signal = AudioSignal(audio_files[i])
+
+        # Encode audio to .dac format
+        artifact = generator.compress(signal, win_duration, verbose=verbose, **kwargs)
+
+        # Compute output path
+        relative_path = audio_files[i].relative_to(input)
+        output_dir = output / relative_path.parent
+        if not relative_path.name:
+            output_dir = output
+            relative_path = audio_files[i]
+        output_name = relative_path.with_suffix(".dac").name
+        output_path = output_dir / output_name
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        artifact.save(output_path)
+
+
+if __name__ == "__main__":
+    args = argbind.parse_args()
+    with argbind.scope(args):
+        encode()