Speech to text classes

stt.stt_deepgram.Ear_deepgram

Ear_deepgram(silence_seconds=2, api_key='', listener=None, logger=None)

Bases: BaseEar

Methods:

Name	Description
`transcribe_stream`

Attributes:

Name	Type	Description
`api_key`

Source code in openvoicechat/stt/stt_deepgram.py

def __init__(self, silence_seconds=2, api_key="", listener=None, logger=None):
    super().__init__(silence_seconds, stream=True, listener=listener, logger=logger)
    self.api_key = api_key

api_key `instance-attribute`

api_key = api_key

transcribe_stream

transcribe_stream(audio_queue, transcription_queue)

Source code in openvoicechat/stt/stt_deepgram.py

def transcribe_stream(self, audio_queue, transcription_queue):
    extra_headers = {"Authorization": "token " + self.api_key}

    async def f():
        async with websockets.connect(
            "wss://api.deepgram.com/v1/listen?encoding=linear16&sample_rate=16000"
            "&channels=1&model=nova-2",
            extra_headers=extra_headers,
        ) as ws:

            async def sender(ws):  # sends audio to websocket
                try:
                    while True:
                        data = audio_queue.get()
                        if data is None:
                            await ws.send(json.dumps({"type": "CloseStream"}))
                            break
                        await ws.send(data)
                except Exception as e:
                    print("Error while sending: ", str(e))
                    raise

            async def receiver(ws):
                async for msg in ws:
                    msg = json.loads(msg)
                    if "channel" not in msg:
                        transcription_queue.put(None)
                        break
                    transcript = msg["channel"]["alternatives"][0]["transcript"]

                    if transcript:
                        transcription_queue.put(transcript)

            await asyncio.gather(sender(ws), receiver(ws))

    asyncio.run(f())

stt.stt_hf.Ear_hf

Ear_hf(model_id='openai/whisper-base.en', device='cpu', silence_seconds=2, listener=None, listen_interruptions=True, logger=None)

Bases: BaseEar

Methods:

Name	Description
`transcribe`

Attributes:

Name	Type	Description
`pipe`
`device`

Source code in openvoicechat/stt/stt_hf.py

def __init__(
    self,
    model_id="openai/whisper-base.en",
    device="cpu",
    silence_seconds=2,
    listener=None,
    listen_interruptions=True,
    logger=None,
):
    super().__init__(
        silence_seconds,
        listener=listener,
        listen_interruptions=listen_interruptions,
        logger=logger,
    )
    from transformers import pipeline

    self.pipe = pipeline(
        "automatic-speech-recognition", model=model_id, device=device
    )
    self.device = device

pipe `instance-attribute`

pipe = pipeline('automatic-speech-recognition', model=model_id, device=device)

device `instance-attribute`

device = device

transcribe

transcribe(audio)