Skip to content

Text to speech classes

tts.tts_xtts.Mouth_xtts

Mouth_xtts(model_id='tts_models/en/jenny/jenny', device='cpu', player=sd, speaker=None, wait=True, logger=None)

Bases: BaseMouth

Methods:

Name Description
run_tts

Attributes:

Name Type Description
model
device
speaker
Source code in openvoicechat/tts/tts_xtts.py
def __init__(
    self,
    model_id="tts_models/en/jenny/jenny",
    device="cpu",
    player=sd,
    speaker=None,
    wait=True,
    logger=None,
):
    from TTS.api import TTS

    self.model = TTS(model_id)
    self.device = device
    self.model.to(device)
    self.speaker = speaker
    super().__init__(
        sample_rate=self.model.synthesizer.output_sample_rate,
        player=player,
        wait=wait,
        logger=logger,
    )

model instance-attribute

model = TTS(model_id)

device instance-attribute

device = device

speaker instance-attribute

speaker = speaker

run_tts

run_tts(text)
Source code in openvoicechat/tts/tts_xtts.py
def run_tts(self, text):
    output = self.model.tts(
        text=text,
        split_sentences=False,
        speaker=self.speaker,
        language="en" if self.model.is_multi_lingual else None,
    )
    return np.array(output)

tts.tts_elevenlabs.Mouth_elevenlabs

Mouth_elevenlabs(model_id='eleven_turbo_v2', voice_id='IKne3meq5aSn9XLyUdCD', api_key='', player=sd, wait=True, logger=None)

Bases: BaseMouth

Methods:

Name Description
run_tts

Attributes:

Name Type Description
model_id
voice_id
api_key
Source code in openvoicechat/tts/tts_elevenlabs.py
def __init__(
    self,
    model_id="eleven_turbo_v2",
    voice_id="IKne3meq5aSn9XLyUdCD",
    api_key="",
    player=sd,
    wait=True,
    logger=None,
):
    self.model_id = model_id
    self.voice_id = voice_id
    if api_key == "":
        load_dotenv()
        api_key = os.getenv("ELEVENLABS_API_KEY")
    self.api_key = api_key
    super().__init__(sample_rate=44100, player=player, wait=wait, logger=logger)

model_id instance-attribute

model_id = model_id

voice_id instance-attribute

voice_id = voice_id

api_key instance-attribute

api_key = api_key

run_tts

run_tts(text)
Source code in openvoicechat/tts/tts_elevenlabs.py
def run_tts(self, text):
    url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}?optimize_streaming_latency=4"
    headers = {
        "Accept": "audio/mpeg",
        "Content-Type": "application/json",
        "xi-api-key": f"{self.api_key}",
    }

    data = {
        "text": text,
        "model_id": self.model_id,
        "voice_settings": {"stability": 0.5, "similarity_boost": 0.5},
    }

    response = requests.post(url, json=data, headers=headers)
    audio_segment = AudioSegment.from_file(
        io.BytesIO(response.content), format="mp3"
    )

    samples = np.array(audio_segment.get_array_of_samples())

    return samples

tts.tts_hf.Mouth_hf

Mouth_hf(model_id='kakao-enterprise/vits-vctk', device='cpu', forward_params={'speaker_id': 10}, player=sd, wait=True, logger=None)

Bases: BaseMouth

Methods:

Name Description
run_tts

Attributes:

Name Type Description
pipe
device
forward_params
Source code in openvoicechat/tts/tts_hf.py
def __init__(
    self,
    model_id="kakao-enterprise/vits-vctk",
    device="cpu",
    forward_params={"speaker_id": 10},
    player=sd,
    wait=True,
    logger=None,
):
    from transformers import pipeline

    self.pipe = pipeline("text-to-speech", model=model_id, device=device)
    self.device = device
    self.forward_params = forward_params
    super().__init__(
        sample_rate=self.pipe.sampling_rate, player=player, wait=wait, logger=logger
    )

pipe instance-attribute

pipe = pipeline('text-to-speech', model=model_id, device=device)

device instance-attribute

device = device

forward_params instance-attribute

forward_params = forward_params

run_tts

run_tts(text)
Source code in openvoicechat/tts/tts_hf.py
def run_tts(self, text):
    with torch.no_grad():
        # inputs = self.tokenizer(text, return_tensors="pt")
        # inputs = inputs.to(self.device)
        # output = self.model(**inputs, speaker_id=self.speaker_id).waveform[0].to('cpu')
        output = self.pipe(text, forward_params=self.forward_params)
        self.sample_rate = output["sampling_rate"]
        return output["audio"][0]

tts.tts_piper.Mouth_piper

Mouth_piper(device='cpu', model_path='models/en_US-ryan-high.onnx', config_path='models/en_en_US_ryan_high_en_US-ryan-high.onnx.json', player=sd, wait=True, logger=None)

Bases: BaseMouth

Methods:

Name Description
run_tts

Attributes:

Name Type Description
model
Source code in openvoicechat/tts/tts_piper.py
def __init__(
    self,
    device="cpu",
    model_path="models/en_US-ryan-high.onnx",
    config_path="models/en_en_US_ryan_high_en_US-ryan-high.onnx.json",
    player=sd,
    wait=True,
    logger=None,
):
    import piper

    self.model = piper.PiperVoice.load(
        model_path=model_path,
        config_path=config_path,
        use_cuda=True if device == "cuda" else False,
    )
    super().__init__(
        sample_rate=self.model.config.sample_rate,
        player=player,
        wait=wait,
        logger=logger,
    )

model instance-attribute

model = load(model_path=model_path, config_path=config_path, use_cuda=True if device == 'cuda' else False)

run_tts

run_tts(text)
Source code in openvoicechat/tts/tts_piper.py
def run_tts(self, text):
    audio = b""
    for i in self.model.synthesize_stream_raw(text):
        audio += i
    return np.frombuffer(audio, dtype=np.int16)