@geppetto-app/geppetto

v0.3.2

Published

6 months ago

A TypeScript client for the Geppetto API

Downloads

0High
0Medium
0Low

cjpais

geppetto

This library is a simple wrapper of the geppetto.app API.

Currently it only supports speaking (text to speech) but more modalities will be added in the future.

Installation

npm install @geppetto-app/geppetto

Usage

Speak

There are two primary ways to use the library for speech. Sync and Streaming. Sync will return the entire audio file as a buffer, while streaming will return a readable stream of the audio file. Streaming is useful in applications where you want the lowest possible latency from generation to speaking time.

Speech Examples

Writing to a file

import { Geppetto } from '@geppetto-app/geppetto';

const geppetto = new Geppetto();

async function main() {
    const response = await geppetto.speak({
        text: "I'm a real boy!"
    })
    .catch((err) => {
        throw err;
    });

    fs.writeFileSync('output.mp3', response);
}

main()

Streaming Speech

import { Geppetto } from '@geppetto-app/geppetto';

const geppetto = new Geppetto();

async function main() {
    const response = await geppetto.speak({
        text: "I'm a real boy!",
        stream: true
    })
    .catch((err) => {
        throw err;
    });

    const chunks = []
    for await (const chunk of response) {
        // do something with the chunks, this example is just adding them to a file
        chunks.push(chunk);
    }

    fs.writeFileSync('output.mp3', Buffer.concat(chunks));
}

main()

Speech Params

export interface SpeakOptions {
  // the text to speak
  text: string;

  // the voice to speak in (default is semaine)
  voice?: GeppettoSpeakers; // "semaine" | "ryan"

  // the rate of speech (default is 1)
  speed?: number;

  // the pitch of speech (default is 1)
  pitch?: number; 

  // the audio format to output in (default is mp3)
  // pcm is output in 16000hz 16bit signed little endian (s16le)
  format?: GeppettoSpeakFormats; // "mp3" | "wav" | "ogg" | "pcm"

  // how much to delay speech between punctuation (default is 150)
  sentenceSilence?: number;

  // if the response should be streamed or sync
  stream?: boolean;
}

See

See let's you send an image to the geppetto API and get back a description of the image. It can also answer questions about images.

The responses can be streamed.

See Examples

Getting a description of an image

import { Geppetto } from '@geppetto-app/geppetto';

const geppetto = new Geppetto();

const response = await geppetto.see({
    image: "<base64 encoded image>",
});

console.log(response);

Getting a description with details about generation

import { Geppetto } from '@geppetto-app/geppetto';

const geppetto = new Geppetto();

const response = await geppetto.see({
    image: "<base64 encoded image>",
verbose: true
});

console.log(response);

Streaming response

import { Geppetto } from '@geppetto-app/geppetto';

const geppetto = new Geppetto();

const stream = await geppetto.see({
    image: IMAGE,
    stream: true,
});

let message = "";
for await (const chunk of stream) {
    console.log(chunk);
    message += chunk.content;
}

console.log(message);

See Params

type SeeOptions = {
    // REQUIRED: the base64 encoded image. 
    image: string;

    // the question to ask about the image (default is 'describe this image')
    prompt?: string | undefined;

    // the system prompt to use (default is "")
    system_prompt?: string | undefined;

    // if the response should be streamed or sync
    stream?: boolean | undefined;

    // the temperature of the models generation
    temperature?: number | undefined;

    // the maximum number of tokens to generate
    max_tokens?: number | undefined;

    presence_penalty?: number | undefined;

    frequency_penalty?: number | undefined;

    // the cumulative probability of tokens to generate
    top_p?: number | undefined;

    // if the response should give more verbose information
    verbose?: boolean | undefined;
}

See Return Types

Default

type SeeResponse = {
    // the generated content
    content: string;
}

Verbose

type SeeResponseVerbose = {
    // the generated content
    content: string;

    // the model used to generate the content
    model: string;

    // timings about the generation
    timings: {
        // how long it took to generate the content
        predicted_ms: number;

        // how many tokens were predicted
        predicted_n: number;

        // how many tokens were predicted per second
        predicted_per_second: number;

        // how long it took to process the prompt
        prompt_ms: number;
    };
    // how many tokens were cached
    tokens_cached: number;

    // how many tokens were evaluated
    tokens_evaluated: number;

    // how many tokens were predicted
    tokens_predicted: number;
}

Streaming

type SeeStreamingResponse = {
    // the generated content
    content: string;

    // if the generation is complete
    stop: boolean;
}

Hear

Hear lets you send audio to the geppetto API and get back a transcription of the audio.

Hear Examples

Transcribing audio from a file

import fs from 'fs';
import { Geppetto } from '@geppetto-app/geppetto';

const geppetto = new Geppetto();

const file = fs.readFileSync("<path to audio file>");
const response = await geppetto.hear({
    file: file,
});

console.log(response.text);

Hear Params

type HearOptions = {
    // REQUIRED the audio file to transcribe. Max size: 25MB
    file: Buffer | File;

    // model to use for transcription
    model?: "whisper-tiny"
    
    // language to transcribe in (ISO-639-1 format)
    language?: string;

    // what prompt to use for the transcription
    prompt?: string;

    // what format to respond in
    response_format?: "json" | "text" | "srt" | "vtt" | "verbose_json";

    // what temperature to use for the model
    temperature?: number;

    // The increment of temperature, between 0 and 1
    temperature_inc?: number;
}

Hear Return Types

JSON

type HearResponse = {
    text: string;
}

Verbose JSON

type HearResponseVerbose = {
    text: string;
    language: string;
    task: "transcribe" | "translate";
    duration: number;
    segments: {
        text: string;
        temperature: number;
        id: number;
        start: number;
        end: number;
        tokens: number[];
        words: HearResponseWord[];
        avg_logprob: number;
    }[];
}

type HearResponseWord = {
    word: string;
    start: number;
    end: number;
    t_dtw: number;
    probability: number;
}