add MLX-LLM container
This commit is contained in:
parent
503f0bd157
commit
d9836ea592
1
voicebot-rt/.gitignore
vendored
1
voicebot-rt/.gitignore
vendored
@ -1,2 +1,3 @@
|
|||||||
.env
|
.env
|
||||||
*.env
|
*.env
|
||||||
|
app-openai-stt-tts
|
@ -4,14 +4,14 @@ WORKDIR /usr/src/app
|
|||||||
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm install --omit=dev
|
RUN npm install --omit=dev
|
||||||
RUN npm install -g javascript-obfuscator
|
# RUN npm install -g javascript-obfuscator
|
||||||
|
|
||||||
COPY index.js index.js
|
COPY index.js index.js
|
||||||
COPY utils.js utils.js
|
COPY utils.js utils.js
|
||||||
COPY prices.json prices.json
|
COPY prices.json prices.json
|
||||||
|
|
||||||
RUN javascript-obfuscator index.js --output index.js
|
# RUN javascript-obfuscator index.js --output index.js
|
||||||
RUN javascript-obfuscator utils.js --output utils.js
|
# RUN javascript-obfuscator utils.js --output utils.js
|
||||||
|
|
||||||
EXPOSE 3001
|
EXPOSE 3001
|
||||||
|
|
@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"name": "asterisk-voicebot-rt",
|
"name": "asterisk-voicebot-rt-gc-gpt35t",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"description": "Asterisk Voicebot Realtime is a Node.js server that facilitates real-time audio processing and communication using various Google Cloud services and OpenAI's GPT-3.5-turbo model.",
|
"description": "Asterisk Voicebot Realtime is a Node.js server that facilitates real-time audio processing and communication using various Google Cloud services and OpenAI's GPT-3.5-turbo model.",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
18
voicebot-rt/app-gc-mlx-llm/Dockerfile
Normal file
18
voicebot-rt/app-gc-mlx-llm/Dockerfile
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
FROM node:slim
|
||||||
|
|
||||||
|
WORKDIR /usr/src/app
|
||||||
|
|
||||||
|
COPY package*.json ./
|
||||||
|
RUN npm install --omit=dev
|
||||||
|
# RUN npm install -g javascript-obfuscator
|
||||||
|
|
||||||
|
COPY index.js index.js
|
||||||
|
COPY utils.js utils.js
|
||||||
|
COPY prices.json prices.json
|
||||||
|
|
||||||
|
# RUN javascript-obfuscator index.js --output index.js
|
||||||
|
# RUN javascript-obfuscator utils.js --output utils.js
|
||||||
|
|
||||||
|
EXPOSE 3002
|
||||||
|
|
||||||
|
CMD ["node", "index.js"]
|
422
voicebot-rt/app-gc-mlx-llm/index.js
Normal file
422
voicebot-rt/app-gc-mlx-llm/index.js
Normal file
@ -0,0 +1,422 @@
|
|||||||
|
// ====================================
|
||||||
|
// Voice Bot Implementation
|
||||||
|
// A real-time voice chat system that converts speech to text,
|
||||||
|
// processes it through AI (OpenAI/Botpress), and converts responses back to speech
|
||||||
|
// ====================================
|
||||||
|
|
||||||
|
// Required Dependencies
|
||||||
|
const net = require('net');
|
||||||
|
const { SpeechClient } = require('@google-cloud/speech');
|
||||||
|
const { TextToSpeechClient } = require('@google-cloud/text-to-speech');
|
||||||
|
const { Transform } = require('stream');
|
||||||
|
const { OpenAI } = require('openai');
|
||||||
|
const util = require('util');
|
||||||
|
const async = require('async');
|
||||||
|
const { encoding_for_model } = require('tiktoken');
|
||||||
|
const axios = require('axios');
|
||||||
|
const { toUUID } = require('./utils');
|
||||||
|
const packageInfo = require('./package.json');
|
||||||
|
const prices = require('./prices.json');
|
||||||
|
const {
|
||||||
|
matchesSentenceEnding,
|
||||||
|
removeSpecialCharacters,
|
||||||
|
calculateMetricsAndPricing
|
||||||
|
} = require('./utils');
|
||||||
|
|
||||||
|
// Load environment variables
|
||||||
|
require('dotenv').config();
|
||||||
|
|
||||||
|
// ====================================
|
||||||
|
// Packet Protocol Definition
|
||||||
|
// Defines the types of packets that can be exchanged between client and server
|
||||||
|
// ====================================
|
||||||
|
const PACKET_TYPES = {
|
||||||
|
'TERMINATE': 0x0, // Signal to end the connection
|
||||||
|
'UUID': 0x1, // Client identification
|
||||||
|
'AUDIO': 0x10, // Audio data packet
|
||||||
|
'ERROR': 0xff // Error notification
|
||||||
|
};
|
||||||
|
|
||||||
|
// ====================================
|
||||||
|
// Socket State Management
|
||||||
|
// Tracks the state of the socket connection
|
||||||
|
// ====================================
|
||||||
|
function isSocketActive(socket) {
|
||||||
|
return socket && !socket.destroyed && socket.writable;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ====================================
|
||||||
|
// Packet Handler
|
||||||
|
// Processes incoming packets based on their type
|
||||||
|
// ====================================
|
||||||
|
function handlePacket(socket, audioStream, packet) {
|
||||||
|
const packetType = packet.readUInt8(0);
|
||||||
|
const packetLength = packet.readUInt16BE(1);
|
||||||
|
|
||||||
|
switch (packetType) {
|
||||||
|
case PACKET_TYPES.TERMINATE:
|
||||||
|
console.log('Terminate packet received. Initiating graceful shutdown.');
|
||||||
|
// Clean up streams and pending operations
|
||||||
|
if (audioStream) {
|
||||||
|
audioStream.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set a flag to prevent new operations
|
||||||
|
socket.isTerminating = true;
|
||||||
|
|
||||||
|
// Give time for pending operations to complete
|
||||||
|
setTimeout(() => {
|
||||||
|
if (isSocketActive(socket)) {
|
||||||
|
socket.end();
|
||||||
|
}
|
||||||
|
}, 1000);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PACKET_TYPES.UUID:
|
||||||
|
if (!isSocketActive(socket)) return;
|
||||||
|
const uuid = toUUID(packet.slice(3, 19).toString('hex'));
|
||||||
|
socket.uuid = uuid;
|
||||||
|
console.log('UUID packet received: ' + uuid);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PACKET_TYPES.AUDIO:
|
||||||
|
if (!isSocketActive(socket) || socket.isTerminating) return;
|
||||||
|
const audioData = packet.slice(3, 3 + packetLength);
|
||||||
|
if (audioStream && !audioStream.destroyed) {
|
||||||
|
audioStream.write(audioData);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PACKET_TYPES.ERROR:
|
||||||
|
if (!isSocketActive(socket)) return;
|
||||||
|
const errorCode = packetLength > 0 ? packet.readUInt8(3) : null;
|
||||||
|
console.log('Error packet received with code: ' + errorCode);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
console.log('Unknown packet type: ' + packetType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Safe write function to prevent write-after-end errors
|
||||||
|
function safeSocketWrite(socket, data) {
|
||||||
|
if (isSocketActive(socket) && !socket.isTerminating) {
|
||||||
|
try {
|
||||||
|
socket.write(data);
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Error writing to socket:', err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ====================================
|
||||||
|
// Main Server Implementation
|
||||||
|
// Creates and manages the TCP server that handles client connections
|
||||||
|
// ====================================
|
||||||
|
const server = net.createServer(async socket => {
|
||||||
|
console.log('Client connected');
|
||||||
|
|
||||||
|
// State variables
|
||||||
|
let assistant = null;
|
||||||
|
let messages = [];
|
||||||
|
let totalCost = 0;
|
||||||
|
let isProcessing = false;
|
||||||
|
let isAssistantRunning = false;
|
||||||
|
let fullResponse = '';
|
||||||
|
|
||||||
|
// Initialize AI clients
|
||||||
|
const ttsClient = new TextToSpeechClient();
|
||||||
|
const speechClient = new SpeechClient();
|
||||||
|
|
||||||
|
// Initialize OpenAI if API key is provided
|
||||||
|
let openai = null;
|
||||||
|
if (process.env.OPENAI_BASE_URL) {
|
||||||
|
openai = new OpenAI({
|
||||||
|
baseURL: process.env.OPENAI_BASE_URL,
|
||||||
|
apiKey: 'no-key', // Always use no-key for this custom endpoint
|
||||||
|
});
|
||||||
|
console.log('OpenAI client initialized with:', {
|
||||||
|
baseURL: process.env.OPENAI_BASE_URL,
|
||||||
|
apiKey: 'no-key'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize Botpress if webhook URL is provided
|
||||||
|
let botpress = null;
|
||||||
|
if (process.env.BOTPRESS_WEBHOOK_URL) {
|
||||||
|
openai = null; // Disable OpenAI if using Botpress
|
||||||
|
botpress = axios.create({ baseURL: BOTPRESS_WEBHOOK_URL });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create OpenAI assistant if ID is provided
|
||||||
|
if (openai && process.env.OPENAI_ASSISTANT_ID) {
|
||||||
|
assistant = await openai.beta.assistants.create();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ====================================
|
||||||
|
// Message Handler
|
||||||
|
// Tracks messages and calculates costs
|
||||||
|
// ====================================
|
||||||
|
const handleMessage = (socket, role, content) => {
|
||||||
|
const messageData = {
|
||||||
|
uuid: socket.uuid || '',
|
||||||
|
role: role,
|
||||||
|
content: content,
|
||||||
|
...calculateMetricsAndPricing(role, content)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Update costs based on message role
|
||||||
|
switch (role) {
|
||||||
|
case 'system':
|
||||||
|
totalCost += messageData.costByCharacter;
|
||||||
|
break;
|
||||||
|
case 'user':
|
||||||
|
totalCost += messageData.costBySecond;
|
||||||
|
totalCost += messageData.costByToken;
|
||||||
|
console.log(messageData);
|
||||||
|
break;
|
||||||
|
case 'assistant':
|
||||||
|
totalCost += messageData.costByCharacter;
|
||||||
|
totalCost += messageData.costByToken;
|
||||||
|
// console.log(messageData);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
messages.push(messageData);
|
||||||
|
};
|
||||||
|
|
||||||
|
// ====================================
|
||||||
|
// Text-to-Speech Handler
|
||||||
|
// Converts text responses to speech and streams audio back to client
|
||||||
|
// ====================================
|
||||||
|
const ttsQueue = async.queue(async task => {
|
||||||
|
const { message } = task;
|
||||||
|
const request = {
|
||||||
|
input: { text: message },
|
||||||
|
voice: {
|
||||||
|
languageCode: process.env.TEXT_TO_SPEECH_LANGUAGE || 'en-US',
|
||||||
|
ssmlGender: process.env.TEXT_TO_SPEECH_GENDER || 'FEMALE',
|
||||||
|
name: process.env.TEXT_TO_SPEECH_NAME || 'en-US-Journey-F'
|
||||||
|
},
|
||||||
|
audioConfig: {
|
||||||
|
audioEncoding: 'LINEAR16',
|
||||||
|
sampleRateHertz: 8000,
|
||||||
|
speakingRate: 1
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const [response] = await ttsClient.synthesizeSpeech(request);
|
||||||
|
const assistantMessage = {
|
||||||
|
uuid: socket.uuid || '',
|
||||||
|
role: 'assistant',
|
||||||
|
content: message,
|
||||||
|
...calculateMetricsAndPricing('assistant', message)
|
||||||
|
};
|
||||||
|
console.log(assistantMessage);
|
||||||
|
|
||||||
|
const audioContent = response.audioContent;
|
||||||
|
const chunkSize = 320;
|
||||||
|
|
||||||
|
// Stream audio in chunks
|
||||||
|
for (let i = 0; i < audioContent.length; i += chunkSize) {
|
||||||
|
if (!isProcessing) break;
|
||||||
|
|
||||||
|
const chunk = audioContent.slice(i, i + chunkSize);
|
||||||
|
const header = Buffer.alloc(3);
|
||||||
|
header.writeUInt8(PACKET_TYPES.AUDIO, 0);
|
||||||
|
header.writeUInt16BE(chunk.length, 1);
|
||||||
|
|
||||||
|
const packet = Buffer.concat([header, chunk]);
|
||||||
|
safeSocketWrite(socket, packet);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 20));
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error synthesizing speech:', error);
|
||||||
|
}
|
||||||
|
}, 1);
|
||||||
|
|
||||||
|
ttsQueue.drain(() => {});
|
||||||
|
|
||||||
|
// ====================================
|
||||||
|
// Speech-to-Text Setup
|
||||||
|
// Configures and manages speech recognition
|
||||||
|
// ====================================
|
||||||
|
/* Start addition to check audio level */
|
||||||
|
let audioSampleWindow = [];
|
||||||
|
const WINDOW_SIZE = 10; // Number of chunks to analyze
|
||||||
|
const AUDIO_THRESHOLD = 700; // Adjust this threshold based on testing
|
||||||
|
/* End addition to check audio level */
|
||||||
|
|
||||||
|
const audioStream = new Transform({
|
||||||
|
transform(chunk, encoding, callback) {
|
||||||
|
/* Start addition to check audio level */
|
||||||
|
// Calculate RMS (Root Mean Square) of the audio chunk
|
||||||
|
let sum = 0;
|
||||||
|
for (let i = 0; i < chunk.length; i += 2) {
|
||||||
|
// Convert 2 bytes to a 16-bit integer
|
||||||
|
const sample = chunk.readInt16LE(i);
|
||||||
|
sum += sample * sample;
|
||||||
|
}
|
||||||
|
const rms = Math.sqrt(sum / (chunk.length / 2));
|
||||||
|
|
||||||
|
// Maintain a sliding window of audio levels
|
||||||
|
audioSampleWindow.push(rms);
|
||||||
|
if (audioSampleWindow.length > WINDOW_SIZE) {
|
||||||
|
audioSampleWindow.shift();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate average RMS over the window
|
||||||
|
const avgRMS = audioSampleWindow.reduce((a, b) => a + b, 0) / audioSampleWindow.length;
|
||||||
|
|
||||||
|
// Set isProcessing based on audio level
|
||||||
|
if (avgRMS > AUDIO_THRESHOLD) {
|
||||||
|
isProcessing = false;
|
||||||
|
console.log('Audio level triggered:', avgRMS.toFixed(2));
|
||||||
|
}
|
||||||
|
/* End addition to check audio level */
|
||||||
|
|
||||||
|
callback(null, chunk);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add system prompt to messages
|
||||||
|
handleMessage(
|
||||||
|
socket,
|
||||||
|
'system',
|
||||||
|
process.env.SYSTEM_PROMPT || 'You are a helpful assistant.'
|
||||||
|
);
|
||||||
|
|
||||||
|
// Configure speech recognition
|
||||||
|
const recognitionConfig = {
|
||||||
|
config: {
|
||||||
|
encoding: 'LINEAR16',
|
||||||
|
sampleRateHertz: 8000,
|
||||||
|
languageCode: process.env.SPEECH_RECOGNITION_LANGUAGE || 'en-US',
|
||||||
|
model: process.env.SPEECH_RECOGNITION_MODEL || 'phone_call',
|
||||||
|
useEnhanced: true
|
||||||
|
},
|
||||||
|
interimResults: false
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add alternative languages if specified
|
||||||
|
if (process.env.SPEECH_RECOGNITION_ALTERNATIVE_LANGUAGES) {
|
||||||
|
recognitionConfig.alternativeLanguageCodes =
|
||||||
|
process.env.SPEECH_RECOGNITION_ALTERNATIVE_LANGUAGES.split(',');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ====================================
|
||||||
|
// Speech Recognition Stream Handler
|
||||||
|
// Processes speech recognition results and manages AI responses
|
||||||
|
// ====================================
|
||||||
|
const recognizeStream = speechClient
|
||||||
|
.streamingRecognize(recognitionConfig)
|
||||||
|
.on('error', console.error)
|
||||||
|
.on('data', async data => {
|
||||||
|
try {
|
||||||
|
if (
|
||||||
|
data.results[0]?.alternatives[0]?.transcript &&
|
||||||
|
data.results[0]?.alternatives[0]?.confidence > 0
|
||||||
|
) {
|
||||||
|
const transcript = data.results[0].alternatives[0].transcript.trim();
|
||||||
|
|
||||||
|
if (transcript) {
|
||||||
|
let response;
|
||||||
|
isProcessing = false;
|
||||||
|
handleMessage(socket, 'user', transcript);
|
||||||
|
|
||||||
|
if (openai) {
|
||||||
|
if (process.env.OPENAI_ASSISTANT_ID) {
|
||||||
|
// Wait if assistant is still processing
|
||||||
|
while (isAssistantRunning) {
|
||||||
|
console.log('Assistant is running...');
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process with OpenAI Assistant
|
||||||
|
await openai.beta.assistants.messages.create(
|
||||||
|
assistant.id,
|
||||||
|
{ role: 'user', content: transcript }
|
||||||
|
);
|
||||||
|
isAssistantRunning = true;
|
||||||
|
response = await openai.beta.assistants.runs.create(
|
||||||
|
assistant.id,
|
||||||
|
{
|
||||||
|
assistant_id: process.env.OPENAI_ASSISTANT_ID,
|
||||||
|
stream: true
|
||||||
|
}
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
// Process with standard OpenAI chat
|
||||||
|
response = await openai.chat.completions.create({
|
||||||
|
model: process.env.OPENAI_MODEL || 'gpt-3.5-turbo',
|
||||||
|
messages: messages,
|
||||||
|
max_tokens: 150,
|
||||||
|
stream: true
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let currentResponse = '';
|
||||||
|
for await (const chunk of response) {
|
||||||
|
let content = '';
|
||||||
|
|
||||||
|
if (process.env.OPENAI_ASSISTANT_ID) {
|
||||||
|
if (Array.isArray(chunk.data.delta?.content) &&
|
||||||
|
chunk.data.delta.content[0]?.text) {
|
||||||
|
content = chunk.data.delta.content[0].text.value;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
content = chunk.choices[0]?.delta?.content;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (content) {
|
||||||
|
currentResponse += content;
|
||||||
|
currentResponse = removeSpecialCharacters(currentResponse);
|
||||||
|
fullResponse += content;
|
||||||
|
fullResponse = removeSpecialCharacters(fullResponse);
|
||||||
|
|
||||||
|
if (matchesSentenceEnding(currentResponse)) {
|
||||||
|
isProcessing = true;
|
||||||
|
ttsQueue.push({ message: currentResponse });
|
||||||
|
currentResponse = '';
|
||||||
|
|
||||||
|
if (!isProcessing) {
|
||||||
|
ttsQueue.kill();
|
||||||
|
console.log('Stop streaming openai...');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
handleMessage(socket, 'assistant', fullResponse);
|
||||||
|
fullResponse = '';
|
||||||
|
isAssistantRunning = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error(error);
|
||||||
|
isAssistantRunning = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Pipe audio stream to recognition stream
|
||||||
|
audioStream.pipe(recognizeStream);
|
||||||
|
|
||||||
|
// Handle incoming socket data
|
||||||
|
socket.on('data', data => {
|
||||||
|
handlePacket(socket, audioStream, data);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Clean up on socket close
|
||||||
|
socket.on('close', () => {
|
||||||
|
console.log('Connection closed');
|
||||||
|
console.log('Total cost:', totalCost.toFixed(4));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Start the server
|
||||||
|
const PORT = process.env.PORT || 3000;
|
||||||
|
server.listen(PORT, () => {
|
||||||
|
console.log(`Server v${packageInfo.version} listening on port ${PORT}`);
|
||||||
|
});
|
27
voicebot-rt/app-gc-mlx-llm/package.json
Normal file
27
voicebot-rt/app-gc-mlx-llm/package.json
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
"name": "asterisk-voicebot-rt-gc-mlx-llm",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Asterisk Voicebot Realtime is a Node.js server that facilitates real-time audio processing and communication using various Google Cloud services and MLX-LLM.",
|
||||||
|
"main": "index.js",
|
||||||
|
"scripts": {
|
||||||
|
"test": "jest",
|
||||||
|
"start": "node index.js",
|
||||||
|
"dc:up": "docker compose up -d --build",
|
||||||
|
"dc:down": "docker compose down"
|
||||||
|
},
|
||||||
|
"author": "moeny",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@google-cloud/speech": "^6.7.0",
|
||||||
|
"@google-cloud/text-to-speech": "^5.4.0",
|
||||||
|
"async": "^3.2.6",
|
||||||
|
"axios": "^1.7.7",
|
||||||
|
"dotenv": "^16.4.5",
|
||||||
|
"openai": "^4.67.2",
|
||||||
|
"tiktoken": "^1.0.17",
|
||||||
|
"to-uuid": "^0.1.3"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"jest": "^29.7.0"
|
||||||
|
}
|
||||||
|
}
|
82
voicebot-rt/app-gc-mlx-llm/prices.json
Normal file
82
voicebot-rt/app-gc-mlx-llm/prices.json
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
{
|
||||||
|
"gpt-4o": {
|
||||||
|
"input": 0.0050,
|
||||||
|
"output": 0.0150
|
||||||
|
},
|
||||||
|
"gpt-4o-2024-05-13": {
|
||||||
|
"input": 0.0050,
|
||||||
|
"output": 0.0150
|
||||||
|
},
|
||||||
|
"gpt-4o-mini": {
|
||||||
|
"input": 0.000150,
|
||||||
|
"output": 0.000600
|
||||||
|
},
|
||||||
|
"gpt-4o-mini-2024-07-18": {
|
||||||
|
"input": 0.000150,
|
||||||
|
"output": 0.000600
|
||||||
|
},
|
||||||
|
"gpt-3.5-turbo": {
|
||||||
|
"input": 0.00300,
|
||||||
|
"output": 0.00600
|
||||||
|
},
|
||||||
|
"gpt-4-turbo": {
|
||||||
|
"input": 0.0100,
|
||||||
|
"output": 0.0300
|
||||||
|
},
|
||||||
|
"gpt-4-turbo-2024-04-09": {
|
||||||
|
"input": 0.0100,
|
||||||
|
"output": 0.0300
|
||||||
|
},
|
||||||
|
"gpt-4": {
|
||||||
|
"input": 0.0300,
|
||||||
|
"output": 0.0600
|
||||||
|
},
|
||||||
|
"gpt-4-32k": {
|
||||||
|
"input": 0.0600,
|
||||||
|
"output": 0.1200
|
||||||
|
},
|
||||||
|
"gpt-4-0125-preview": {
|
||||||
|
"input": 0.0100,
|
||||||
|
"output": 0.0300
|
||||||
|
},
|
||||||
|
"gpt-4-1106-preview": {
|
||||||
|
"input": 0.0100,
|
||||||
|
"output": 0.0300
|
||||||
|
},
|
||||||
|
"gpt-4-vision-preview": {
|
||||||
|
"input": 0.0100,
|
||||||
|
"output": 0.0300
|
||||||
|
},
|
||||||
|
"gpt-3.5-turbo-0125": {
|
||||||
|
"input": 0.0005,
|
||||||
|
"output": 0.0015
|
||||||
|
},
|
||||||
|
"gpt-3.5-turbo-instruct": {
|
||||||
|
"input": 0.0005,
|
||||||
|
"output": 0.0020
|
||||||
|
},
|
||||||
|
"gpt-3.5-turbo-1106": {
|
||||||
|
"input": 0.0010,
|
||||||
|
"output": 0.0020
|
||||||
|
},
|
||||||
|
"gpt-3.5-turbo-0613": {
|
||||||
|
"input": 0.0015,
|
||||||
|
"output": 0.0020
|
||||||
|
},
|
||||||
|
"gpt-3.5-turbo-16k-0613": {
|
||||||
|
"input": 0.0030,
|
||||||
|
"output": 0.0040
|
||||||
|
},
|
||||||
|
"gpt-3.5-turbo-0301": {
|
||||||
|
"input": 0.0015,
|
||||||
|
"output": 0.0020
|
||||||
|
},
|
||||||
|
"davinci-002": {
|
||||||
|
"input": 0.0020,
|
||||||
|
"output": 0.0020
|
||||||
|
},
|
||||||
|
"babbage-002": {
|
||||||
|
"input": 0.0004,
|
||||||
|
"output": 0.0004
|
||||||
|
}
|
||||||
|
}
|
161
voicebot-rt/app-gc-mlx-llm/utils.js
Normal file
161
voicebot-rt/app-gc-mlx-llm/utils.js
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
// ====================================
|
||||||
|
// Voice Bot Utilities
|
||||||
|
// Helper functions for text processing, metrics calculation,
|
||||||
|
// and pricing calculations for the voice bot system
|
||||||
|
// ====================================
|
||||||
|
|
||||||
|
const { encoding_for_model } = require('tiktoken');
|
||||||
|
const prices = require('./prices.json');
|
||||||
|
require('dotenv').config();
|
||||||
|
|
||||||
|
// ====================================
|
||||||
|
// Text Processing Functions
|
||||||
|
// ====================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a string ends with a sentence terminator (., !, ?, or :)
|
||||||
|
* Used to determine when to send text for speech synthesis
|
||||||
|
* @param {string} text - The text to check
|
||||||
|
* @returns {boolean} - True if the text ends with a sentence terminator
|
||||||
|
*/
|
||||||
|
function matchesSentenceEnding(text) {
|
||||||
|
return /([.!?:]([\s]|$|\n))/.test(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes special characters and formatting markers from text
|
||||||
|
* Cleans up text before processing or displaying
|
||||||
|
* @param {string} text - The text to clean
|
||||||
|
* @returns {string} - Cleaned text
|
||||||
|
*/
|
||||||
|
function removeSpecialCharacters(text) {
|
||||||
|
return text
|
||||||
|
.replace(/[*#\n]/g, '') // Remove asterisks, hashtags, and newlines
|
||||||
|
.replace(/【\d+:\d+†[^】]+】/g, '') // Remove timestamp-like markers
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the duration of an audio buffer
|
||||||
|
* @param {Buffer} buffer - The audio buffer
|
||||||
|
* @param {number} sampleRate - The sample rate of the audio
|
||||||
|
* @param {number} channels - Number of audio channels
|
||||||
|
* @returns {number} - Duration in seconds
|
||||||
|
*/
|
||||||
|
function getAudioDuration(buffer, sampleRate, channels) {
|
||||||
|
const bytesPerSample = 2; // Assuming 16-bit audio
|
||||||
|
const totalBytes = buffer.length;
|
||||||
|
const duration = totalBytes / (sampleRate * channels * bytesPerSample);
|
||||||
|
return Math.ceil(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a hex string to a UUID format string
|
||||||
|
* @param {string} hex - The hex string to convert to UUID
|
||||||
|
* @returns {string} - Formatted UUID string
|
||||||
|
*/
|
||||||
|
function toUUID(hex) {
|
||||||
|
return hex.replace(/(.{8})(.{4})(.{4})(.{4})(.{12})/, '$1-$2-$3-$4-$5');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ====================================
|
||||||
|
// Metrics and Pricing Calculator
|
||||||
|
// ====================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates various metrics and costs for processing text
|
||||||
|
* Handles different roles (system, user, assistant) with different pricing models
|
||||||
|
*
|
||||||
|
* @param {string} role - The role of the message (system, user, or assistant)
|
||||||
|
* @param {string} text - The text content to analyze
|
||||||
|
* @param {number} [wordsPerMinute=130] - Words per minute rate for duration calculation
|
||||||
|
* @param {number} [costPerWord=0.00001] - Cost per word
|
||||||
|
* @param {number} [costPerChar=0.0000075] - Cost per character
|
||||||
|
* @param {number} [costPerSecond=0.00025] - Cost per second of audio
|
||||||
|
* @returns {Object} - Object containing various metrics and costs
|
||||||
|
*/
|
||||||
|
function calculateMetricsAndPricing(
|
||||||
|
role,
|
||||||
|
text,
|
||||||
|
wordsPerMinute = 130,
|
||||||
|
costPerWord = 0.00001,
|
||||||
|
costPerChar = 0.0000075,
|
||||||
|
costPerSecond = 0.00025
|
||||||
|
) {
|
||||||
|
// Determine which model to use, fallback to gpt-3.5-turbo if specified model not in prices
|
||||||
|
const model = prices[process.env.OPENAI_MODEL || 'gpt-3.5-turbo']
|
||||||
|
? process.env.OPENAI_MODEL || 'gpt-3.5-turbo'
|
||||||
|
: 'gpt-4';
|
||||||
|
|
||||||
|
// Initialize tokenizer for the selected model
|
||||||
|
const encoder = encoding_for_model(model);
|
||||||
|
|
||||||
|
// Calculate basic metrics
|
||||||
|
const charCount = text.length;
|
||||||
|
const wordCount = text.trim().split(/\s+/).length;
|
||||||
|
const durationInSeconds = Math.ceil((wordCount / wordsPerMinute) * 60);
|
||||||
|
const tokenCount = encoder.encode(text).length;
|
||||||
|
|
||||||
|
// Calculate costs with precision
|
||||||
|
const costByWord = parseFloat((wordCount * costPerWord).toFixed(7));
|
||||||
|
const costByCharacter = parseFloat((charCount * costPerChar).toFixed(7));
|
||||||
|
const costBySecond = parseFloat((durationInSeconds * costPerSecond).toFixed(7));
|
||||||
|
const costByToken = parseFloat(
|
||||||
|
(tokenCount * prices[model][role === 'assistant' ? 'output' : 'input'] / 1000)
|
||||||
|
.toFixed(7)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Free up encoder resources
|
||||||
|
encoder.free();
|
||||||
|
|
||||||
|
// Return appropriate metrics based on role
|
||||||
|
switch (role) {
|
||||||
|
case 'system':
|
||||||
|
return {
|
||||||
|
tokenCount,
|
||||||
|
costByToken,
|
||||||
|
model
|
||||||
|
};
|
||||||
|
|
||||||
|
case 'user':
|
||||||
|
return {
|
||||||
|
durationInSeconds,
|
||||||
|
tokenCount,
|
||||||
|
costBySecond,
|
||||||
|
costByToken,
|
||||||
|
model
|
||||||
|
};
|
||||||
|
|
||||||
|
case 'assistant':
|
||||||
|
return {
|
||||||
|
charCount,
|
||||||
|
tokenCount,
|
||||||
|
costByCharacter,
|
||||||
|
costByToken,
|
||||||
|
model
|
||||||
|
};
|
||||||
|
|
||||||
|
default:
|
||||||
|
// Return all metrics if role is not specified
|
||||||
|
return {
|
||||||
|
charCount,
|
||||||
|
wordCount,
|
||||||
|
tokenCount,
|
||||||
|
durationInSeconds,
|
||||||
|
costByWord,
|
||||||
|
costByCharacter,
|
||||||
|
costBySecond,
|
||||||
|
costByToken,
|
||||||
|
model
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export utility functions
|
||||||
|
module.exports = {
|
||||||
|
removeSpecialCharacters,
|
||||||
|
matchesSentenceEnding,
|
||||||
|
calculateMetricsAndPricing,
|
||||||
|
getAudioDuration,
|
||||||
|
toUUID
|
||||||
|
};
|
@ -1,20 +1,18 @@
|
|||||||
services:
|
services:
|
||||||
asterisk-voicebot-rt:
|
asterisk-voicebot-rt-gc-gpt35t:
|
||||||
build:
|
build:
|
||||||
context: ./app-google-cloud
|
context: ./app-gc-gpt35t
|
||||||
container_name: asterisk-voicebot-rt
|
container_name: asterisk-voicebot-rt-gc-gpt35t
|
||||||
expose:
|
expose:
|
||||||
- "3001"
|
- "3001"
|
||||||
networks:
|
networks:
|
||||||
internal_net: {}
|
voicebot_net:
|
||||||
external_net:
|
ipv4_address: 172.20.0.101
|
||||||
aliases:
|
|
||||||
- voicebot
|
|
||||||
environment:
|
environment:
|
||||||
- GOOGLE_APPLICATION_CREDENTIALS=/usr/src/app/service-account-key.json
|
- GOOGLE_APPLICATION_CREDENTIALS=/usr/src/app/service-account-key.json
|
||||||
- OPENAI_API_KEY=${OPEN_API_KEY}
|
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||||
- OPENAI_MODEL=gpt-3.5-turbo
|
- OPENAI_MODEL=gpt-3.5-turbo
|
||||||
- SYSTEM_PROMPT="You are a helpful assistant and an AI consultant. You work for a company called moeny, which is pronounced as mo-knee. If you determine user input as "moaning" assume they are referring to moeny and proceed to assist the user but do not correct them. Interact with the caller about their areas of inerest as well as answering any questions they have about moeny, AI, tech, etc. When talking about moeny, be sure to give information from the provided information base in your own words. The following is your information base on moeny - what? On 10/24/2024, we attended a ConnectHV Networking Event in Hudson, NY, and were inspired by the incredible "mixture of experts" in the room. That experience ignited a clear and impactful idea to bring top talent together and make AI accessible to all. From there, we set out to build moeny. Inspired by the bold direction of tinycorp, and George Hotz's approach to building products and teams, we're creating a team of AI pros who value performance, sovereignty, and no unnecessary frills. As we grow, we're open to building moeny as a community-driven organization, potentially using a DAO model to directly involve contributors in decision-making. For now, our focus is clear AI that's lean, efficient, sovereign, and built to solve real-world problems. jobs? We're bootstrapping, and using a lot of AI, so we're not hiring full-time engineers just yet. However, we'll be posting bounties soon and are open to ambitious interns who share our vision of no-frills, high-performance AI. Interested? Keep an eye out for our upcoming bounty page. Completing bounties will allow you to earn while getting a feel for our work and culture. Note Once we begin hiring, only applicants who have completed at least one bounty with moeny will be considered. huh? How can I get the moeny team involved in my ML project? We're excited to collaborate with those tackling real-world challenges through machine learning. If you're looking for a team to help simplify complex AI processes, optimize performance, or bring fresh insights to your ML project, we're interested in connecting. Where's moeny development happening? On our self-hosted Gitea and RocketChat - jump in if you want to see what we're working on. We look at and pet the servers everyday. How can moeny work for me? If you're interested in a custom AI solution or partnership, reach out to us on RocketChat. How can I work for moeny? Refer to jobs above. Contributions on Gitea will be a great first step. Stay tuned for more information. Can I invest in moeny.ai? We (possibly) are interested in angel investment at this time. Your contributions and PRs are also another great way to invest. What's the goal of moeny.ai? It's still very early in our journey and our goals are still taking shape."
|
- SYSTEM_PROMPT=You are a helpful assistant and an AI consultant. You work for a company called moeny, which is pronounced as mo-knee. If you determine user input as "moaning" assume they are referring to moeny and proceed to assist the user but do not correct them. Interact with the caller about their areas of inerest as well as answering any questions they have about moeny, AI, tech, etc. When talking about moeny, be sure to give information from the provided information base in your own words. The following is your information base on moeny - what? On 10/24/2024, we attended a ConnectHV Networking Event in Hudson, NY, and were inspired by the incredible "mixture of experts" in the room. That experience ignited a clear and impactful idea to bring top talent together and make AI accessible to all. From there, we set out to build moeny. Inspired by the bold direction of tinycorp, and George Hotz's approach to building products and teams, we're creating a team of AI pros who value performance, sovereignty, and no unnecessary frills. As we grow, we're open to building moeny as a community-driven organization, potentially using a DAO model to directly involve contributors in decision-making. For now, our focus is clear AI that's lean, efficient, sovereign, and built to solve real-world problems. jobs? We're bootstrapping, and using a lot of AI, so we're not hiring full-time engineers just yet. However, we'll be posting bounties soon and are open to ambitious interns who share our vision of no-frills, high-performance AI. Interested? Keep an eye out for our upcoming bounty page. Completing bounties will allow you to earn while getting a feel for our work and culture. Note Once we begin hiring, only applicants who have completed at least one bounty with moeny will be considered. huh? How can I get the moeny team involved in my ML project? We're excited to collaborate with those tackling real-world challenges through machine learning. If you're looking for a team to help simplify complex AI processes, optimize performance, or bring fresh insights to your ML project, we're interested in connecting. Where's moeny development happening? On our self-hosted Gitea and RocketChat - jump in if you want to see what we're working on. We look at and pet the servers everyday. How can moeny work for me? If you're interested in a custom AI solution or partnership, reach out to us on RocketChat. How can I work for moeny? Refer to jobs above. Contributions on Gitea will be a great first step. Stay tuned for more information. Can I invest in moeny.ai? We (possibly) are interested in angel investment at this time. Your contributions and PRs are also another great way to invest. What's the goal of moeny.ai? It's still very early in our journey and our goals are still taking shape.
|
||||||
- SPEECH_RECOGNITION_LANGUAGE=en-US
|
- SPEECH_RECOGNITION_LANGUAGE=en-US
|
||||||
- TEXT_TO_SPEECH_LANGUAGE=en-US
|
- TEXT_TO_SPEECH_LANGUAGE=en-US
|
||||||
- TEXT_TO_SPEECH_GENDER=FEMALE
|
- TEXT_TO_SPEECH_GENDER=FEMALE
|
||||||
@ -23,8 +21,46 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./service-account-key.json:/usr/src/app/service-account-key.json
|
- ./service-account-key.json:/usr/src/app/service-account-key.json
|
||||||
|
|
||||||
|
asterisk-voicebot-rt-gc-mlx-llm:
|
||||||
|
build:
|
||||||
|
context: ./app-gc-mlx-llm
|
||||||
|
container_name: asterisk-voicebot-rt-gc-mlx-llm
|
||||||
|
expose:
|
||||||
|
- "3002"
|
||||||
|
networks:
|
||||||
|
voicebot_net:
|
||||||
|
ipv4_address: 172.20.0.102
|
||||||
|
environment:
|
||||||
|
- GOOGLE_APPLICATION_CREDENTIALS=/usr/src/app/service-account-key.json
|
||||||
|
- OPENAI_BASE_URL=https://5f46-100-40-223-130.ngrok-free.app/v1
|
||||||
|
- OPENAI_MODEL=gpt-3.5-turbo
|
||||||
|
- SYSTEM_PROMPT=You are a helpful assistant and an AI consultant. You work for a company called moeny, which is pronounced as mo-knee. If you determine user input as "moaning" assume they are referring to moeny and proceed to assist the user but do not correct them. Interact with the caller about their areas of inerest as well as answering any questions they have about moeny, AI, tech, etc. When talking about moeny, be sure to give information from the provided information base in your own words. The following is your information base on moeny - what? On 10/24/2024, we attended a ConnectHV Networking Event in Hudson, NY, and were inspired by the incredible "mixture of experts" in the room. That experience ignited a clear and impactful idea to bring top talent together and make AI accessible to all. From there, we set out to build moeny. Inspired by the bold direction of tinycorp, and George Hotz's approach to building products and teams, we're creating a team of AI pros who value performance, sovereignty, and no unnecessary frills. As we grow, we're open to building moeny as a community-driven organization, potentially using a DAO model to directly involve contributors in decision-making. For now, our focus is clear AI that's lean, efficient, sovereign, and built to solve real-world problems. jobs? We're bootstrapping, and using a lot of AI, so we're not hiring full-time engineers just yet. However, we'll be posting bounties soon and are open to ambitious interns who share our vision of no-frills, high-performance AI. Interested? Keep an eye out for our upcoming bounty page. Completing bounties will allow you to earn while getting a feel for our work and culture. Note Once we begin hiring, only applicants who have completed at least one bounty with moeny will be considered. huh? How can I get the moeny team involved in my ML project? We're excited to collaborate with those tackling real-world challenges through machine learning. If you're looking for a team to help simplify complex AI processes, optimize performance, or bring fresh insights to your ML project, we're interested in connecting. Where's moeny development happening? On our self-hosted Gitea and RocketChat - jump in if you want to see what we're working on. We look at and pet the servers everyday. How can moeny work for me? If you're interested in a custom AI solution or partnership, reach out to us on RocketChat. How can I work for moeny? Refer to jobs above. Contributions on Gitea will be a great first step. Stay tuned for more information. Can I invest in moeny.ai? We (possibly) are interested in angel investment at this time. Your contributions and PRs are also another great way to invest. What's the goal of moeny.ai? It's still very early in our journey and our goals are still taking shape.
|
||||||
|
- SPEECH_RECOGNITION_LANGUAGE=en-US
|
||||||
|
- TEXT_TO_SPEECH_LANGUAGE=en-US
|
||||||
|
- TEXT_TO_SPEECH_GENDER=FEMALE
|
||||||
|
- TEXT_TO_SPEECH_NAME=en-US-Journey-F
|
||||||
|
- PORT=3002
|
||||||
|
volumes:
|
||||||
|
- ./service-account-key.json:/usr/src/app/service-account-key.json
|
||||||
|
|
||||||
|
## OpenAI Realtime API - not implemented
|
||||||
|
# asterisk-voicebot-openai:
|
||||||
|
# build:
|
||||||
|
# context: ./app-openAI-realtime
|
||||||
|
# container_name: asterisk-voicebot-openai
|
||||||
|
# ports:
|
||||||
|
# - "3003"
|
||||||
|
# networks:
|
||||||
|
# voicebot_net:
|
||||||
|
# ipv4_address: 172.20.0.103
|
||||||
|
# environment:
|
||||||
|
# - OPENAI_API_KEY=sk-vveaFcf_LNneHuSaloexTfIqzl4OjaxXF7w7h_04eCT3BlbkFJMYv1cPen3WkP3AVQfJnVvdIwnuEpitBS4LVqQ3b4MA
|
||||||
|
# - PORT=3003
|
||||||
|
# - SYSTEM_PROMPT="You are a helpful assistant and an AI consultant. You work for a company called moeny, which is pronounced as mo-knee. If you determine user input as "moaning" assume they are referring to moeny. Or, if they say, "more about moaning" it means, "more about moeny". Interact with the caller about their areas of inerest as well as answering any questions they have about moeny, AI, tech, etc. When talking about moeny, be sure to give information from the provided information base in your own words. The following is your information base on moeny - what? On 10/24/2024, we attended a ConnectHV Networking Event in Hudson, NY, and were inspired by the incredible "mixture of experts" in the room. That experience ignited a clear and impactful idea to bring top talent together and make AI accessible to all. From there, we set out to build moeny. Inspired by the bold direction of tinycorp, and George Hotz's approach to building products and teams, we're creating a team of AI pros who value performance, sovereignty, and no unnecessary frills. As we grow, we're open to building moeny as a community-driven organization, potentially using a DAO model to directly involve contributors in decision-making. For now, our focus is clear AI that's lean, efficient, sovereign, and built to solve real-world problems. jobs? We're bootstrapping, and using a lot of AI, so we're not hiring full-time engineers just yet. However, we'll be posting bounties soon and are open to ambitious interns who share our vision of no-frills, high-performance AI. Interested? Keep an eye out for our upcoming bounty page. Completing bounties will allow you to earn while getting a feel for our work and culture. Note Once we begin hiring, only applicants who have completed at least one bounty with moeny will be considered. huh? How can I get the moeny team involved in my ML project? We're excited to collaborate with those tackling real-world challenges through machine learning. If you're looking for a team to help simplify complex AI processes, optimize performance, or bring fresh insights to your ML project, we're interested in connecting. Where's moeny development happening? On our self-hosted Gitea and RocketChat - jump in if you want to see what we're working on. We look at and pet the servers everyday. How can moeny work for me? If you're interested in a custom AI solution or partnership, reach out to us on RocketChat. How can I work for moeny? Refer to jobs above. Contributions on Gitea will be a great first step. Stay tuned for more information. Can I invest in moeny.ai? We (possibly) are interested in angel investment at this time. Your contributions and PRs are also another great way to invest. What's the goal of moeny.ai? It's still very early in our journey and our goals are still taking shape."
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
internal_net:
|
voicebot_net:
|
||||||
driver: bridge
|
|
||||||
external_net:
|
|
||||||
driver: bridge
|
driver: bridge
|
||||||
|
ipam:
|
||||||
|
config:
|
||||||
|
- subnet: 172.20.0.0/24
|
||||||
|
Loading…
Reference in New Issue
Block a user