Text-to-Speech with Orpheus TTS models
Hello world
!pip install torchaudio snac openai soundfile &> /dev/null
from IPython.display import Audio, display
import torch
import openai
import snac
import re
import torchaudio
AUDIO_TOKENS_REGEX = re.compile(r"<custom_token_(\d+)>")
# Convert tokens into audio data
def convert_to_audio(audio_ids, model):
audio_ids = torch.tensor(audio_ids, dtype=torch.int32).reshape(-1, 7)
codes_0 = audio_ids[:, 0].unsqueeze(0)
codes_1 = torch.stack((audio_ids[:, 1], audio_ids[:, 4])).t().flatten().unsqueeze(0)
codes_2 = (
torch.stack((audio_ids[:, 2], audio_ids[:, 3], audio_ids[:, 5], audio_ids[:, 6]))
.t()
.flatten()
.unsqueeze(0)
)
with torch.inference_mode():
audio_hat = model.decode([codes_0, codes_1, codes_2])
return audio_hat[0]Last updated