From e777e80845e2386dfe3d6d67fe5f7e24838c0a61 Mon Sep 17 00:00:00 2001
From: moeny-matt <matt@masiero.tech>
Date: Tue, 17 Dec 2024 14:44:23 -0500
Subject: [PATCH] add MLX-LLM

---
 extensions.conf                               |  12 +-
 voicebot-rt/.gitignore                        |   1 +
 .../Dockerfile                                |   6 +-
 .../index.js                                  |   0
 .../package.json                              |   2 +-
 .../prices.json                               |   0
 .../utils.js                                  |   0
 voicebot-rt/app-gc-mlx-llm/Dockerfile         |  18 +
 voicebot-rt/app-gc-mlx-llm/index.js           | 422 ++++++++++++++++++
 voicebot-rt/app-gc-mlx-llm/package.json       |  27 ++
 voicebot-rt/app-gc-mlx-llm/prices.json        |  82 ++++
 voicebot-rt/app-gc-mlx-llm/utils.js           | 161 +++++++
 voicebot-rt/docker-compose.yaml               |  60 ++-
 13 files changed, 769 insertions(+), 22 deletions(-)
 rename voicebot-rt/{app-google-cloud => app-gc-gpt35t}/Dockerfile (57%)
 rename voicebot-rt/{app-google-cloud => app-gc-gpt35t}/index.js (100%)
 rename voicebot-rt/{app-google-cloud => app-gc-gpt35t}/package.json (94%)
 rename voicebot-rt/{app-google-cloud => app-gc-gpt35t}/prices.json (100%)
 rename voicebot-rt/{app-google-cloud => app-gc-gpt35t}/utils.js (100%)
 create mode 100644 voicebot-rt/app-gc-mlx-llm/Dockerfile
 create mode 100644 voicebot-rt/app-gc-mlx-llm/index.js
 create mode 100644 voicebot-rt/app-gc-mlx-llm/package.json
 create mode 100644 voicebot-rt/app-gc-mlx-llm/prices.json
 create mode 100644 voicebot-rt/app-gc-mlx-llm/utils.js

diff --git a/extensions.conf b/extensions.conf
index 000cbf6..a5b6345 100644
--- a/extensions.conf
+++ b/extensions.conf
@@ -6,19 +6,19 @@ writeprotect=no
 ; (888)88-MOENY
 exten => 18888866369,1,Answer()
  same => n,agi(googletts.agi,"Thank you for calling Moeny! How may I help you?",en)
- same => n,Goto(gc-ChatGPT-35T)
+ same => n,Goto(from-trunk,15598255224,gc-gpt35t)
  same => n,Hangup()
 
 ; (559)TALK-2-AI - Google Cloud w/ChatGPT 3.5 Turbo
 exten => 15598255224,1,Answer()
- same => n,agi(googletts.agi,"Thank you for calling Moeny! Powered by Google Cloud with OpenAI ChatGPT 3.5 Turbo. How may I help you?",en)
- same => n(gc-ChatGPT-35T),AudioSocket(66b7feb0-8938-11ee-abd7-0242ac151234,172.20.0.101:3001)
+ same => n,agi(googletts.agi,"Ready to talk to AI?",en)
+ same => n(gc-gpt35t),AudioSocket(66b7feb0-8938-11ee-abd7-0242ac151234,172.20.0.101:3001)
  same => n,Hangup()
 
-; (845)610-6555 - OpenAI Realtime API
+; (845)610-6555 - Google Cloud w/MLX LLM (via ngrok)
 exten => 18456106555,1,Answer()
- same => n,agi(googletts.agi,"Thank you for calling Moeny! Powered by OpenAI's Realtime API. How may I help you?",en)
- same => n(OAI-rt-API),AudioSocket(66b7feb0-8938-11ee-abd7-0242ac151234,172.20.0.102:3002)
+ same => n,agi(googletts.agi,"Ready to talk to AI?",en)
+ same => n(gc-mlx-llm),AudioSocket(66b7feb0-8938-11ee-abd7-0242ac151234,172.20.0.102:3002)
  same => n,Hangup()
 
 [from-internal]
diff --git a/voicebot-rt/.gitignore b/voicebot-rt/.gitignore
index 78b3df5..9dfa586 100644
--- a/voicebot-rt/.gitignore
+++ b/voicebot-rt/.gitignore
@@ -1,2 +1,3 @@
 .env
 *.env
+app-openai-stt-tts
\ No newline at end of file
diff --git a/voicebot-rt/app-google-cloud/Dockerfile b/voicebot-rt/app-gc-gpt35t/Dockerfile
similarity index 57%
rename from voicebot-rt/app-google-cloud/Dockerfile
rename to voicebot-rt/app-gc-gpt35t/Dockerfile
index ff3b075..613846b 100644
--- a/voicebot-rt/app-google-cloud/Dockerfile
+++ b/voicebot-rt/app-gc-gpt35t/Dockerfile
@@ -4,14 +4,14 @@ WORKDIR /usr/src/app
 
 COPY package*.json ./
 RUN npm install --omit=dev
-RUN npm install -g javascript-obfuscator
+# RUN npm install -g javascript-obfuscator
 
 COPY index.js index.js
 COPY utils.js utils.js
 COPY prices.json prices.json
 
-RUN javascript-obfuscator index.js --output index.js
-RUN javascript-obfuscator utils.js --output utils.js
+# RUN javascript-obfuscator index.js --output index.js
+# RUN javascript-obfuscator utils.js --output utils.js
 
 EXPOSE 3001
 
diff --git a/voicebot-rt/app-google-cloud/index.js b/voicebot-rt/app-gc-gpt35t/index.js
similarity index 100%
rename from voicebot-rt/app-google-cloud/index.js
rename to voicebot-rt/app-gc-gpt35t/index.js
diff --git a/voicebot-rt/app-google-cloud/package.json b/voicebot-rt/app-gc-gpt35t/package.json
similarity index 94%
rename from voicebot-rt/app-google-cloud/package.json
rename to voicebot-rt/app-gc-gpt35t/package.json
index dc9b3fd..d870d56 100644
--- a/voicebot-rt/app-google-cloud/package.json
+++ b/voicebot-rt/app-gc-gpt35t/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "asterisk-voicebot-rt",
+  "name": "asterisk-voicebot-rt-gc-gpt35t",
   "version": "1.0.0",
   "description": "Asterisk Voicebot Realtime is a Node.js server that facilitates real-time audio processing and communication using various Google Cloud services and OpenAI's GPT-3.5-turbo model.",
   "main": "index.js",
diff --git a/voicebot-rt/app-google-cloud/prices.json b/voicebot-rt/app-gc-gpt35t/prices.json
similarity index 100%
rename from voicebot-rt/app-google-cloud/prices.json
rename to voicebot-rt/app-gc-gpt35t/prices.json
diff --git a/voicebot-rt/app-google-cloud/utils.js b/voicebot-rt/app-gc-gpt35t/utils.js
similarity index 100%
rename from voicebot-rt/app-google-cloud/utils.js
rename to voicebot-rt/app-gc-gpt35t/utils.js
diff --git a/voicebot-rt/app-gc-mlx-llm/Dockerfile b/voicebot-rt/app-gc-mlx-llm/Dockerfile
new file mode 100644
index 0000000..69c4af8
--- /dev/null
+++ b/voicebot-rt/app-gc-mlx-llm/Dockerfile
@@ -0,0 +1,18 @@
+FROM node:slim
+
+WORKDIR /usr/src/app
+
+COPY package*.json ./
+RUN npm install --omit=dev
+# RUN npm install -g javascript-obfuscator
+
+COPY index.js index.js
+COPY utils.js utils.js
+COPY prices.json prices.json
+
+# RUN javascript-obfuscator index.js --output index.js
+# RUN javascript-obfuscator utils.js --output utils.js
+
+EXPOSE 3002
+
+CMD ["node", "index.js"]
diff --git a/voicebot-rt/app-gc-mlx-llm/index.js b/voicebot-rt/app-gc-mlx-llm/index.js
new file mode 100644
index 0000000..0a5c437
--- /dev/null
+++ b/voicebot-rt/app-gc-mlx-llm/index.js
@@ -0,0 +1,422 @@
+// ====================================
+// Voice Bot Implementation
+// A real-time voice chat system that converts speech to text,
+// processes it through AI (OpenAI/Botpress), and converts responses back to speech
+// ====================================
+
+// Required Dependencies
+const net = require('net');
+const { SpeechClient } = require('@google-cloud/speech');
+const { TextToSpeechClient } = require('@google-cloud/text-to-speech');
+const { Transform } = require('stream');
+const { OpenAI } = require('openai');
+const util = require('util');
+const async = require('async');
+const { encoding_for_model } = require('tiktoken');
+const axios = require('axios');
+const { toUUID } = require('./utils');
+const packageInfo = require('./package.json');
+const prices = require('./prices.json');
+const {
+    matchesSentenceEnding,
+    removeSpecialCharacters,
+    calculateMetricsAndPricing
+} = require('./utils');
+
+// Load environment variables
+require('dotenv').config();
+
+// ====================================
+// Packet Protocol Definition
+// Defines the types of packets that can be exchanged between client and server
+// ====================================
+const PACKET_TYPES = {
+    'TERMINATE': 0x0,  // Signal to end the connection
+    'UUID': 0x1,      // Client identification
+    'AUDIO': 0x10,    // Audio data packet
+    'ERROR': 0xff     // Error notification
+};
+
+// ====================================
+// Socket State Management
+// Tracks the state of the socket connection
+// ====================================
+function isSocketActive(socket) {
+    return socket && !socket.destroyed && socket.writable;
+}
+
+// ====================================
+// Packet Handler
+// Processes incoming packets based on their type
+// ====================================
+function handlePacket(socket, audioStream, packet) {
+    const packetType = packet.readUInt8(0);
+    const packetLength = packet.readUInt16BE(1);
+
+    switch (packetType) {
+        case PACKET_TYPES.TERMINATE:
+            console.log('Terminate packet received. Initiating graceful shutdown.');
+            // Clean up streams and pending operations
+            if (audioStream) {
+                audioStream.end();
+            }
+            
+            // Set a flag to prevent new operations
+            socket.isTerminating = true;
+            
+            // Give time for pending operations to complete
+            setTimeout(() => {
+                if (isSocketActive(socket)) {
+                    socket.end();
+                }
+            }, 1000);
+            break;
+
+        case PACKET_TYPES.UUID:
+            if (!isSocketActive(socket)) return;
+            const uuid = toUUID(packet.slice(3, 19).toString('hex'));
+            socket.uuid = uuid;
+            console.log('UUID packet received: ' + uuid);
+            break;
+
+        case PACKET_TYPES.AUDIO:
+            if (!isSocketActive(socket) || socket.isTerminating) return;
+            const audioData = packet.slice(3, 3 + packetLength);
+            if (audioStream && !audioStream.destroyed) {
+                audioStream.write(audioData);
+            }
+            break;
+
+        case PACKET_TYPES.ERROR:
+            if (!isSocketActive(socket)) return;
+            const errorCode = packetLength > 0 ? packet.readUInt8(3) : null;
+            console.log('Error packet received with code: ' + errorCode);
+            break;
+
+        default:
+            console.log('Unknown packet type: ' + packetType);
+    }
+}
+
+// Safe write function to prevent write-after-end errors
+function safeSocketWrite(socket, data) {
+    if (isSocketActive(socket) && !socket.isTerminating) {
+        try {
+            socket.write(data);
+        } catch (err) {
+            console.error('Error writing to socket:', err);
+        }
+    }
+}
+
+// ====================================
+// Main Server Implementation
+// Creates and manages the TCP server that handles client connections
+// ====================================
+const server = net.createServer(async socket => {
+    console.log('Client connected');
+
+    // State variables
+    let assistant = null;
+    let messages = [];
+    let totalCost = 0;
+    let isProcessing = false;
+    let isAssistantRunning = false;
+    let fullResponse = '';
+
+    // Initialize AI clients
+    const ttsClient = new TextToSpeechClient();
+    const speechClient = new SpeechClient();
+    
+    // Initialize OpenAI if API key is provided
+    let openai = null;
+    if (process.env.OPENAI_BASE_URL) {
+        openai = new OpenAI({ 
+            baseURL: process.env.OPENAI_BASE_URL,
+            apiKey: 'no-key',  // Always use no-key for this custom endpoint
+        });
+        console.log('OpenAI client initialized with:', {
+            baseURL: process.env.OPENAI_BASE_URL,
+            apiKey: 'no-key'
+        });
+    }
+
+    // Initialize Botpress if webhook URL is provided
+    let botpress = null;
+    if (process.env.BOTPRESS_WEBHOOK_URL) {
+        openai = null; // Disable OpenAI if using Botpress
+        botpress = axios.create({ baseURL: BOTPRESS_WEBHOOK_URL });
+    }
+
+    // Create OpenAI assistant if ID is provided
+    if (openai && process.env.OPENAI_ASSISTANT_ID) {
+        assistant = await openai.beta.assistants.create();
+    }
+
+    // ====================================
+    // Message Handler
+    // Tracks messages and calculates costs
+    // ====================================
+    const handleMessage = (socket, role, content) => {
+        const messageData = {
+            uuid: socket.uuid || '',
+            role: role,
+            content: content,
+            ...calculateMetricsAndPricing(role, content)
+        };
+
+        // Update costs based on message role
+        switch (role) {
+            case 'system':
+                totalCost += messageData.costByCharacter;
+                break;
+            case 'user':
+                totalCost += messageData.costBySecond;
+                totalCost += messageData.costByToken;
+                console.log(messageData);
+                break;
+            case 'assistant':
+                totalCost += messageData.costByCharacter;
+                totalCost += messageData.costByToken;
+                // console.log(messageData);
+                break;
+        }
+
+        messages.push(messageData);
+    };
+
+    // ====================================
+    // Text-to-Speech Handler
+    // Converts text responses to speech and streams audio back to client
+    // ====================================
+    const ttsQueue = async.queue(async task => {
+        const { message } = task;
+        const request = {
+            input: { text: message },
+            voice: {
+                languageCode: process.env.TEXT_TO_SPEECH_LANGUAGE || 'en-US',
+                ssmlGender: process.env.TEXT_TO_SPEECH_GENDER || 'FEMALE',
+                name: process.env.TEXT_TO_SPEECH_NAME || 'en-US-Journey-F'
+            },
+            audioConfig: {
+                audioEncoding: 'LINEAR16',
+                sampleRateHertz: 8000,
+                speakingRate: 1
+            }
+        };
+
+        try {
+            const [response] = await ttsClient.synthesizeSpeech(request);
+            const assistantMessage = {
+              uuid: socket.uuid || '',
+              role: 'assistant',
+              content: message,
+              ...calculateMetricsAndPricing('assistant', message)
+            };
+            console.log(assistantMessage);
+
+            const audioContent = response.audioContent;
+            const chunkSize = 320;
+
+            // Stream audio in chunks
+            for (let i = 0; i < audioContent.length; i += chunkSize) {
+                if (!isProcessing) break;
+
+                const chunk = audioContent.slice(i, i + chunkSize);
+                const header = Buffer.alloc(3);
+                header.writeUInt8(PACKET_TYPES.AUDIO, 0);
+                header.writeUInt16BE(chunk.length, 1);
+
+                const packet = Buffer.concat([header, chunk]);
+                safeSocketWrite(socket, packet);
+                await new Promise(resolve => setTimeout(resolve, 20));
+            }
+        } catch (error) {
+            console.error('Error synthesizing speech:', error);
+        }
+    }, 1);
+
+    ttsQueue.drain(() => {});
+
+    // ====================================
+    // Speech-to-Text Setup
+    // Configures and manages speech recognition
+    // ====================================
+    /* Start addition to check audio level */
+    let audioSampleWindow = [];
+    const WINDOW_SIZE = 10;  // Number of chunks to analyze
+    const AUDIO_THRESHOLD = 700;  // Adjust this threshold based on testing
+    /* End addition to check audio level */
+
+    const audioStream = new Transform({
+        transform(chunk, encoding, callback) {
+            /* Start addition to check audio level */
+            // Calculate RMS (Root Mean Square) of the audio chunk
+            let sum = 0;
+            for (let i = 0; i < chunk.length; i += 2) {
+                // Convert 2 bytes to a 16-bit integer
+                const sample = chunk.readInt16LE(i);
+                sum += sample * sample;
+            }
+            const rms = Math.sqrt(sum / (chunk.length / 2));
+
+            // Maintain a sliding window of audio levels
+            audioSampleWindow.push(rms);
+            if (audioSampleWindow.length > WINDOW_SIZE) {
+                audioSampleWindow.shift();
+            }
+
+            // Calculate average RMS over the window
+            const avgRMS = audioSampleWindow.reduce((a, b) => a + b, 0) / audioSampleWindow.length;
+
+            // Set isProcessing based on audio level
+            if (avgRMS > AUDIO_THRESHOLD) {
+                isProcessing = false;
+                console.log('Audio level triggered:', avgRMS.toFixed(2));
+            }
+            /* End addition to check audio level */
+
+            callback(null, chunk);
+        }
+    });
+
+    // Add system prompt to messages
+    handleMessage(
+        socket,
+        'system',
+        process.env.SYSTEM_PROMPT || 'You are a helpful assistant.'
+    );
+
+    // Configure speech recognition
+    const recognitionConfig = {
+        config: {
+            encoding: 'LINEAR16',
+            sampleRateHertz: 8000,
+            languageCode: process.env.SPEECH_RECOGNITION_LANGUAGE || 'en-US',
+            model: process.env.SPEECH_RECOGNITION_MODEL || 'phone_call',
+            useEnhanced: true
+        },
+        interimResults: false
+    };
+
+    // Add alternative languages if specified
+    if (process.env.SPEECH_RECOGNITION_ALTERNATIVE_LANGUAGES) {
+        recognitionConfig.alternativeLanguageCodes =
+            process.env.SPEECH_RECOGNITION_ALTERNATIVE_LANGUAGES.split(',');
+    }
+
+    // ====================================
+    // Speech Recognition Stream Handler
+    // Processes speech recognition results and manages AI responses
+    // ====================================
+    const recognizeStream = speechClient
+        .streamingRecognize(recognitionConfig)
+        .on('error', console.error)
+        .on('data', async data => {
+            try {
+                if (
+                    data.results[0]?.alternatives[0]?.transcript &&
+                    data.results[0]?.alternatives[0]?.confidence > 0
+                ) {
+                    const transcript = data.results[0].alternatives[0].transcript.trim();
+                    
+                    if (transcript) {
+                        let response;
+                        isProcessing = false;                        
+                        handleMessage(socket, 'user', transcript);
+
+                        if (openai) {
+                            if (process.env.OPENAI_ASSISTANT_ID) {
+                                // Wait if assistant is still processing
+                                while (isAssistantRunning) {
+                                    console.log('Assistant is running...');
+                                    await new Promise(resolve => setTimeout(resolve, 1000));
+                                }
+
+                                // Process with OpenAI Assistant
+                                await openai.beta.assistants.messages.create(
+                                    assistant.id,
+                                    { role: 'user', content: transcript }
+                                );
+                                isAssistantRunning = true;
+                                response = await openai.beta.assistants.runs.create(
+                                    assistant.id,
+                                    {
+                                        assistant_id: process.env.OPENAI_ASSISTANT_ID,
+                                        stream: true
+                                    }
+                                );
+                            } else {
+                                // Process with standard OpenAI chat
+                                response = await openai.chat.completions.create({
+                                    model: process.env.OPENAI_MODEL || 'gpt-3.5-turbo',
+                                    messages: messages,
+                                    max_tokens: 150,
+                                    stream: true
+                                });
+                            }
+
+                            let currentResponse = '';
+                            for await (const chunk of response) {
+                                let content = '';
+                                
+                                if (process.env.OPENAI_ASSISTANT_ID) {
+                                    if (Array.isArray(chunk.data.delta?.content) &&
+                                        chunk.data.delta.content[0]?.text) {
+                                        content = chunk.data.delta.content[0].text.value;
+                                    }
+                                } else {
+                                    content = chunk.choices[0]?.delta?.content;
+                                }
+
+                                if (content) {
+                                    currentResponse += content;
+                                    currentResponse = removeSpecialCharacters(currentResponse);
+                                    fullResponse += content;
+                                    fullResponse = removeSpecialCharacters(fullResponse);
+
+                                    if (matchesSentenceEnding(currentResponse)) {
+                                        isProcessing = true;
+                                        ttsQueue.push({ message: currentResponse });
+                                        currentResponse = '';
+
+                                        if (!isProcessing) {
+                                            ttsQueue.kill();
+                                            console.log('Stop streaming openai...');
+                                            break;
+                                        }
+                                    }
+                                }
+                            }
+                            handleMessage(socket, 'assistant', fullResponse);
+                            fullResponse = '';
+                            isAssistantRunning = false;
+                        }
+                    }
+                }
+            } catch (error) {
+                console.error(error);
+                isAssistantRunning = false;
+            }
+        });
+
+    // Pipe audio stream to recognition stream
+    audioStream.pipe(recognizeStream);
+
+    // Handle incoming socket data
+    socket.on('data', data => {
+        handlePacket(socket, audioStream, data);
+    });
+
+    // Clean up on socket close
+    socket.on('close', () => {
+        console.log('Connection closed');
+        console.log('Total cost:', totalCost.toFixed(4));
+    });
+});
+
+// Start the server
+const PORT = process.env.PORT || 3000;
+server.listen(PORT, () => {
+    console.log(`Server v${packageInfo.version} listening on port ${PORT}`);
+});
diff --git a/voicebot-rt/app-gc-mlx-llm/package.json b/voicebot-rt/app-gc-mlx-llm/package.json
new file mode 100644
index 0000000..cc367eb
--- /dev/null
+++ b/voicebot-rt/app-gc-mlx-llm/package.json
@@ -0,0 +1,27 @@
+{
+  "name": "asterisk-voicebot-rt-gc-mlx-llm",
+  "version": "1.0.0",
+  "description": "Asterisk Voicebot Realtime is a Node.js server that facilitates real-time audio processing and communication using various Google Cloud services and MLX-LLM.",
+  "main": "index.js",
+  "scripts": {
+      "test": "jest",
+      "start": "node index.js",
+      "dc:up": "docker compose up -d --build",
+      "dc:down": "docker compose down"
+  },
+  "author": "moeny",
+  "license": "MIT",
+  "dependencies": {
+      "@google-cloud/speech": "^6.7.0",
+      "@google-cloud/text-to-speech": "^5.4.0",
+      "async": "^3.2.6",
+      "axios": "^1.7.7",
+      "dotenv": "^16.4.5",
+      "openai": "^4.67.2",
+      "tiktoken": "^1.0.17",
+      "to-uuid": "^0.1.3"
+  },
+  "devDependencies": {
+      "jest": "^29.7.0"
+  }
+}
diff --git a/voicebot-rt/app-gc-mlx-llm/prices.json b/voicebot-rt/app-gc-mlx-llm/prices.json
new file mode 100644
index 0000000..519351a
--- /dev/null
+++ b/voicebot-rt/app-gc-mlx-llm/prices.json
@@ -0,0 +1,82 @@
+{
+  "gpt-4o": {
+      "input": 0.0050,
+      "output": 0.0150
+  },
+  "gpt-4o-2024-05-13": {
+      "input": 0.0050,
+      "output": 0.0150
+  },
+  "gpt-4o-mini": {
+      "input": 0.000150,
+      "output": 0.000600
+  },
+  "gpt-4o-mini-2024-07-18": {
+      "input": 0.000150,
+      "output": 0.000600
+  },
+  "gpt-3.5-turbo": {
+      "input": 0.00300,
+      "output": 0.00600
+  },
+  "gpt-4-turbo": {
+      "input": 0.0100,
+      "output": 0.0300
+  },
+  "gpt-4-turbo-2024-04-09": {
+      "input": 0.0100,
+      "output": 0.0300
+  },
+  "gpt-4": {
+      "input": 0.0300,
+      "output": 0.0600
+  },
+  "gpt-4-32k": {
+      "input": 0.0600,
+      "output": 0.1200
+  },
+  "gpt-4-0125-preview": {
+      "input": 0.0100,
+      "output": 0.0300
+  },
+  "gpt-4-1106-preview": {
+      "input": 0.0100,
+      "output": 0.0300
+  },
+  "gpt-4-vision-preview": {
+      "input": 0.0100,
+      "output": 0.0300
+  },
+  "gpt-3.5-turbo-0125": {
+      "input": 0.0005,
+      "output": 0.0015
+  },
+  "gpt-3.5-turbo-instruct": {
+      "input": 0.0005,
+      "output": 0.0020
+  },
+  "gpt-3.5-turbo-1106": {
+      "input": 0.0010,
+      "output": 0.0020
+  },
+  "gpt-3.5-turbo-0613": {
+      "input": 0.0015,
+      "output": 0.0020
+  },
+  "gpt-3.5-turbo-16k-0613": {
+      "input": 0.0030,
+      "output": 0.0040
+  },
+  "gpt-3.5-turbo-0301": {
+      "input": 0.0015,
+      "output": 0.0020
+  },
+  "davinci-002": {
+      "input": 0.0020,
+      "output": 0.0020
+  },
+  "babbage-002": {
+      "input": 0.0004,
+      "output": 0.0004
+  }
+}
diff --git a/voicebot-rt/app-gc-mlx-llm/utils.js b/voicebot-rt/app-gc-mlx-llm/utils.js
new file mode 100644
index 0000000..ebb8955
--- /dev/null
+++ b/voicebot-rt/app-gc-mlx-llm/utils.js
@@ -0,0 +1,161 @@
+// ====================================
+// Voice Bot Utilities
+// Helper functions for text processing, metrics calculation,
+// and pricing calculations for the voice bot system
+// ====================================
+
+const { encoding_for_model } = require('tiktoken');
+const prices = require('./prices.json');
+require('dotenv').config();
+
+// ====================================
+// Text Processing Functions
+// ====================================
+
+/**
+ * Checks if a string ends with a sentence terminator (., !, ?, or :)
+ * Used to determine when to send text for speech synthesis
+ * @param {string} text - The text to check
+ * @returns {boolean} - True if the text ends with a sentence terminator
+ */
+function matchesSentenceEnding(text) {
+    return /([.!?:]([\s]|$|\n))/.test(text);
+}
+
+/**
+ * Removes special characters and formatting markers from text
+ * Cleans up text before processing or displaying
+ * @param {string} text - The text to clean
+ * @returns {string} - Cleaned text
+ */
+function removeSpecialCharacters(text) {
+    return text
+        .replace(/[*#\n]/g, '')           // Remove asterisks, hashtags, and newlines
+        .replace(/【\d+:\d+†[^】]+】/g, '') // Remove timestamp-like markers
+        .trim();
+}
+
+/**
+ * Calculates the duration of an audio buffer
+ * @param {Buffer} buffer - The audio buffer
+ * @param {number} sampleRate - The sample rate of the audio
+ * @param {number} channels - Number of audio channels
+ * @returns {number} - Duration in seconds
+ */
+function getAudioDuration(buffer, sampleRate, channels) {
+    const bytesPerSample = 2; // Assuming 16-bit audio
+    const totalBytes = buffer.length;
+    const duration = totalBytes / (sampleRate * channels * bytesPerSample);
+    return Math.ceil(duration);
+}
+
+/**
+ * Converts a hex string to a UUID format string
+ * @param {string} hex - The hex string to convert to UUID
+ * @returns {string} - Formatted UUID string
+ */
+function toUUID(hex) {
+    return hex.replace(/(.{8})(.{4})(.{4})(.{4})(.{12})/, '$1-$2-$3-$4-$5');
+}
+
+// ====================================
+// Metrics and Pricing Calculator
+// ====================================
+
+/**
+ * Calculates various metrics and costs for processing text
+ * Handles different roles (system, user, assistant) with different pricing models
+ * 
+ * @param {string} role - The role of the message (system, user, or assistant)
+ * @param {string} text - The text content to analyze
+ * @param {number} [wordsPerMinute=130] - Words per minute rate for duration calculation
+ * @param {number} [costPerWord=0.00001] - Cost per word
+ * @param {number} [costPerChar=0.0000075] - Cost per character
+ * @param {number} [costPerSecond=0.00025] - Cost per second of audio
+ * @returns {Object} - Object containing various metrics and costs
+ */
+function calculateMetricsAndPricing(
+    role,
+    text,
+    wordsPerMinute = 130,
+    costPerWord = 0.00001,
+    costPerChar = 0.0000075,
+    costPerSecond = 0.00025
+) {
+    // Determine which model to use, fallback to gpt-3.5-turbo if specified model not in prices
+    const model = prices[process.env.OPENAI_MODEL || 'gpt-3.5-turbo']
+        ? process.env.OPENAI_MODEL || 'gpt-3.5-turbo'
+        : 'gpt-4';
+
+    // Initialize tokenizer for the selected model
+    const encoder = encoding_for_model(model);
+
+    // Calculate basic metrics
+    const charCount = text.length;
+    const wordCount = text.trim().split(/\s+/).length;
+    const durationInSeconds = Math.ceil((wordCount / wordsPerMinute) * 60);
+    const tokenCount = encoder.encode(text).length;
+
+    // Calculate costs with precision
+    const costByWord = parseFloat((wordCount * costPerWord).toFixed(7));
+    const costByCharacter = parseFloat((charCount * costPerChar).toFixed(7));
+    const costBySecond = parseFloat((durationInSeconds * costPerSecond).toFixed(7));
+    const costByToken = parseFloat(
+        (tokenCount * prices[model][role === 'assistant' ? 'output' : 'input'] / 1000)
+        .toFixed(7)
+    );
+
+    // Free up encoder resources
+    encoder.free();
+
+    // Return appropriate metrics based on role
+    switch (role) {
+        case 'system':
+            return {
+                tokenCount,
+                costByToken,
+                model
+            };
+
+        case 'user':
+            return {
+                durationInSeconds,
+                tokenCount,
+                costBySecond,
+                costByToken,
+                model
+            };
+
+        case 'assistant':
+            return {
+                charCount,
+                tokenCount,
+                costByCharacter,
+                costByToken,
+                model
+            };
+
+        default:
+            // Return all metrics if role is not specified
+            return {
+                charCount,
+                wordCount,
+                tokenCount,
+                durationInSeconds,
+                costByWord,
+                costByCharacter,
+                costBySecond,
+                costByToken,
+                model
+            };
+    }
+}
+
+// Export utility functions
+module.exports = {
+    removeSpecialCharacters,
+    matchesSentenceEnding,
+    calculateMetricsAndPricing,
+    getAudioDuration,
+    toUUID
+};
diff --git a/voicebot-rt/docker-compose.yaml b/voicebot-rt/docker-compose.yaml
index 4734e3c..9f6121b 100644
--- a/voicebot-rt/docker-compose.yaml
+++ b/voicebot-rt/docker-compose.yaml
@@ -1,20 +1,18 @@
 services:
-  asterisk-voicebot-rt:
+  asterisk-voicebot-rt-gc-gpt35t:
     build:
-      context: ./app-google-cloud
-    container_name: asterisk-voicebot-rt
+      context: ./app-gc-gpt35t
+    container_name: asterisk-voicebot-rt-gc-gpt35t
     expose:
       - "3001"
     networks:
-      internal_net: {}
-      external_net:
-        aliases:
-          - voicebot
+      voicebot_net:
+        ipv4_address: 172.20.0.101
     environment:
       - GOOGLE_APPLICATION_CREDENTIALS=/usr/src/app/service-account-key.json
-      - OPENAI_API_KEY=${OPEN_API_KEY}
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
       - OPENAI_MODEL=gpt-3.5-turbo
-      - SYSTEM_PROMPT="You are a helpful assistant and an AI consultant. You work for a company called moeny, which is pronounced as mo-knee. If you determine user input as "moaning" assume they are referring to moeny and proceed to assist the user but do not correct them. Interact with the caller about their areas of inerest as well as answering any questions they have about moeny, AI, tech, etc. When talking about moeny, be sure to give information from the provided information base in your own words. The following is your information base on moeny - what? On 10/24/2024, we attended a ConnectHV Networking Event in Hudson, NY, and were inspired by the incredible "mixture of experts" in the room. That experience ignited a clear and impactful idea to bring top talent together and make AI accessible to all. From there, we set out to build moeny. Inspired by the bold direction of tinycorp, and George Hotz's approach to building products and teams, we're creating a team of AI pros who value performance, sovereignty, and no unnecessary frills. As we grow, we're open to building moeny as a community-driven organization, potentially using a DAO model to directly involve contributors in decision-making. For now, our focus is clear AI that's lean, efficient, sovereign, and built to solve real-world problems. jobs? We're bootstrapping, and using a lot of AI, so we're not hiring full-time engineers just yet. However, we'll be posting bounties soon and are open to ambitious interns who share our vision of no-frills, high-performance AI. Interested? Keep an eye out for our upcoming bounty page. Completing bounties will allow you to earn while getting a feel for our work and culture. Note Once we begin hiring, only applicants who have completed at least one bounty with moeny will be considered. huh? How can I get the moeny team involved in my ML project? We're excited to collaborate with those tackling real-world challenges through machine learning. If you're looking for a team to help simplify complex AI processes, optimize performance, or bring fresh insights to your ML project, we're interested in connecting. Where's moeny development happening? On our self-hosted Gitea and RocketChat - jump in if you want to see what we're working on. We look at and pet the servers everyday. How can moeny work for me? If you're interested in a custom AI solution or partnership, reach out to us on RocketChat. How can I work for moeny? Refer to jobs above. Contributions on Gitea will be a great first step. Stay tuned for more information. Can I invest in moeny.ai? We (possibly) are interested in angel investment at this time. Your contributions and PRs are also another great way to invest. What's the goal of moeny.ai? It's still very early in our journey and our goals are still taking shape."
+      - SYSTEM_PROMPT=You are a helpful assistant and an AI consultant. You work for a company called moeny, which is pronounced as mo-knee. If you determine user input as "moaning" assume they are referring to moeny and proceed to assist the user but do not correct them. Interact with the caller about their areas of inerest as well as answering any questions they have about moeny, AI, tech, etc. When talking about moeny, be sure to give information from the provided information base in your own words. The following is your information base on moeny - what? On 10/24/2024, we attended a ConnectHV Networking Event in Hudson, NY, and were inspired by the incredible "mixture of experts" in the room. That experience ignited a clear and impactful idea to bring top talent together and make AI accessible to all. From there, we set out to build moeny. Inspired by the bold direction of tinycorp, and George Hotz's approach to building products and teams, we're creating a team of AI pros who value performance, sovereignty, and no unnecessary frills. As we grow, we're open to building moeny as a community-driven organization, potentially using a DAO model to directly involve contributors in decision-making. For now, our focus is clear AI that's lean, efficient, sovereign, and built to solve real-world problems. jobs? We're bootstrapping, and using a lot of AI, so we're not hiring full-time engineers just yet. However, we'll be posting bounties soon and are open to ambitious interns who share our vision of no-frills, high-performance AI. Interested? Keep an eye out for our upcoming bounty page. Completing bounties will allow you to earn while getting a feel for our work and culture. Note Once we begin hiring, only applicants who have completed at least one bounty with moeny will be considered. huh? How can I get the moeny team involved in my ML project? We're excited to collaborate with those tackling real-world challenges through machine learning. If you're looking for a team to help simplify complex AI processes, optimize performance, or bring fresh insights to your ML project, we're interested in connecting. Where's moeny development happening? On our self-hosted Gitea and RocketChat - jump in if you want to see what we're working on. We look at and pet the servers everyday. How can moeny work for me? If you're interested in a custom AI solution or partnership, reach out to us on RocketChat. How can I work for moeny? Refer to jobs above. Contributions on Gitea will be a great first step. Stay tuned for more information. Can I invest in moeny.ai? We (possibly) are interested in angel investment at this time. Your contributions and PRs are also another great way to invest. What's the goal of moeny.ai? It's still very early in our journey and our goals are still taking shape.
       - SPEECH_RECOGNITION_LANGUAGE=en-US
       - TEXT_TO_SPEECH_LANGUAGE=en-US
       - TEXT_TO_SPEECH_GENDER=FEMALE
@@ -23,8 +21,46 @@ services:
     volumes:
       - ./service-account-key.json:/usr/src/app/service-account-key.json
 
+  asterisk-voicebot-rt-gc-mlx-llm:
+    build:
+      context: ./app-gc-mlx-llm
+    container_name: asterisk-voicebot-rt-gc-mlx-llm
+    expose:
+      - "3002"
+    networks:
+      voicebot_net:
+        ipv4_address: 172.20.0.102
+    environment:
+      - GOOGLE_APPLICATION_CREDENTIALS=/usr/src/app/service-account-key.json
+      - OPENAI_BASE_URL=https://5f46-100-40-223-130.ngrok-free.app/v1
+      - OPENAI_MODEL=gpt-3.5-turbo
+      - SYSTEM_PROMPT=You are a helpful assistant and an AI consultant. You work for a company called moeny, which is pronounced as mo-knee. If you determine user input as "moaning" assume they are referring to moeny and proceed to assist the user but do not correct them. Interact with the caller about their areas of inerest as well as answering any questions they have about moeny, AI, tech, etc. When talking about moeny, be sure to give information from the provided information base in your own words. The following is your information base on moeny - what? On 10/24/2024, we attended a ConnectHV Networking Event in Hudson, NY, and were inspired by the incredible "mixture of experts" in the room. That experience ignited a clear and impactful idea to bring top talent together and make AI accessible to all. From there, we set out to build moeny. Inspired by the bold direction of tinycorp, and George Hotz's approach to building products and teams, we're creating a team of AI pros who value performance, sovereignty, and no unnecessary frills. As we grow, we're open to building moeny as a community-driven organization, potentially using a DAO model to directly involve contributors in decision-making. For now, our focus is clear AI that's lean, efficient, sovereign, and built to solve real-world problems. jobs? We're bootstrapping, and using a lot of AI, so we're not hiring full-time engineers just yet. However, we'll be posting bounties soon and are open to ambitious interns who share our vision of no-frills, high-performance AI. Interested? Keep an eye out for our upcoming bounty page. Completing bounties will allow you to earn while getting a feel for our work and culture. Note Once we begin hiring, only applicants who have completed at least one bounty with moeny will be considered. huh? How can I get the moeny team involved in my ML project? We're excited to collaborate with those tackling real-world challenges through machine learning. If you're looking for a team to help simplify complex AI processes, optimize performance, or bring fresh insights to your ML project, we're interested in connecting. Where's moeny development happening? On our self-hosted Gitea and RocketChat - jump in if you want to see what we're working on. We look at and pet the servers everyday. How can moeny work for me? If you're interested in a custom AI solution or partnership, reach out to us on RocketChat. How can I work for moeny? Refer to jobs above. Contributions on Gitea will be a great first step. Stay tuned for more information. Can I invest in moeny.ai? We (possibly) are interested in angel investment at this time. Your contributions and PRs are also another great way to invest. What's the goal of moeny.ai? It's still very early in our journey and our goals are still taking shape.
+      - SPEECH_RECOGNITION_LANGUAGE=en-US
+      - TEXT_TO_SPEECH_LANGUAGE=en-US
+      - TEXT_TO_SPEECH_GENDER=FEMALE
+      - TEXT_TO_SPEECH_NAME=en-US-Journey-F
+      - PORT=3002
+    volumes:
+      - ./service-account-key.json:/usr/src/app/service-account-key.json
+
+    ## OpenAI Realtime API - not implemented
+    # asterisk-voicebot-openai:
+    #   build:
+    #     context: ./app-openAI-realtime
+    #   container_name: asterisk-voicebot-openai
+    #   ports:
+    #     - "3003"
+    #   networks:
+    #     voicebot_net:
+    #       ipv4_address: 172.20.0.103
+    #   environment:
+    #     - OPENAI_API_KEY=${OPENAI_API_KEY}
+    #     - PORT=3003
+    #     - SYSTEM_PROMPT="You are a helpful assistant and an AI consultant. You work for a company called moeny, which is pronounced as mo-knee. If you determine user input as "moaning" assume they are referring to moeny. Or, if they say, "more about moaning" it means, "more about moeny". Interact with the caller about their areas of inerest as well as answering any questions they have about moeny, AI, tech, etc. When talking about moeny, be sure to give information from the provided information base in your own words. The following is your information base on moeny - what? On 10/24/2024, we attended a ConnectHV Networking Event in Hudson, NY, and were inspired by the incredible "mixture of experts" in the room. That experience ignited a clear and impactful idea to bring top talent together and make AI accessible to all. From there, we set out to build moeny. Inspired by the bold direction of tinycorp, and George Hotz's approach to building products and teams, we're creating a team of AI pros who value performance, sovereignty, and no unnecessary frills. As we grow, we're open to building moeny as a community-driven organization, potentially using a DAO model to directly involve contributors in decision-making. For now, our focus is clear AI that's lean, efficient, sovereign, and built to solve real-world problems. jobs? We're bootstrapping, and using a lot of AI, so we're not hiring full-time engineers just yet. However, we'll be posting bounties soon and are open to ambitious interns who share our vision of no-frills, high-performance AI. Interested? Keep an eye out for our upcoming bounty page. Completing bounties will allow you to earn while getting a feel for our work and culture. Note Once we begin hiring, only applicants who have completed at least one bounty with moeny will be considered. huh? How can I get the moeny team involved in my ML project? We're excited to collaborate with those tackling real-world challenges through machine learning. If you're looking for a team to help simplify complex AI processes, optimize performance, or bring fresh insights to your ML project, we're interested in connecting. Where's moeny development happening? On our self-hosted Gitea and RocketChat - jump in if you want to see what we're working on. We look at and pet the servers everyday. How can moeny work for me? If you're interested in a custom AI solution or partnership, reach out to us on RocketChat. How can I work for moeny? Refer to jobs above. Contributions on Gitea will be a great first step. Stay tuned for more information. Can I invest in moeny.ai? We (possibly) are interested in angel investment at this time. Your contributions and PRs are also another great way to invest. What's the goal of moeny.ai? It's still very early in our journey and our goals are still taking shape."
+
 networks:
-  internal_net:
-    driver: bridge
-  external_net:
+  voicebot_net:
     driver: bridge
+    ipam:
+      config:
+        - subnet: 172.20.0.0/24