windsurf/converter/app/api/process/route.ts

83 lines
3.0 KiB
TypeScript
Raw Normal View History

2024-11-20 23:27:41 +00:00
import { NextRequest, NextResponse } from 'next/server';
import OpenAI from 'openai';
import { z } from 'zod';
import { zodResponseFormat } from 'openai/helpers/zod';
const openai = new OpenAI();
// Define the schema for individual items
const ItemSchema = z.object({
item: z.string().describe('name of item, transaction description, or entry'),
unit_price: z.number().describe('unit price, transaction amount, or individual cost'),
quantity: z.number().describe('quantity, count of items, or number of transactions (use 1 for single transactions)'),
sum: z.number().describe('total amount for this item, transaction total, or entry amount')
});
// Define the main extraction schema
const ExtractionSchema = z.object({
company: z.string().describe('name of company, bank, or organization'),
address: z.string().describe('address, location, or branch information'),
total_sum: z.number().describe('total amount, sum of transactions, total paid, or final balance'),
items: z.array(ItemSchema).describe('list of items, transactions, or entries')
});
export async function POST(request: NextRequest) {
try {
const { text, schema } = await request.json();
if (!text) {
return NextResponse.json(
{ error: 'No text provided' },
{ status: 400 }
);
}
const completion = await openai.beta.chat.completions.parse({
model: 'gpt-4o-2024-08-06',
messages: [
{
role: 'system',
content: `You are an expert at extracting structured data from documents.
You will be given text from a PDF document (likely an invoice or bank statement)
and should extract the required information into the given structure.
For bank statements:
- Treat transaction descriptions as items
- Use transaction amounts as unit prices
- Use 1 as quantity for single transactions
- Total amount can be total deposits, total checks, or final balance
For invoices:
- Extract traditional invoice fields
- Calculate missing values if possible (e.g., sum = unit_price * quantity)
For any document type:
- Look for organization names in headers or footers
- Find address or location information
- Identify itemized lists or transactions
- Look for total amounts or summaries
Be precise with numerical values, extract them as numbers, not strings.
If a value is missing but can be calculated, calculate it.
Ensure all required fields are filled with meaningful values.`
},
{
role: 'user',
content: text
}
],
response_format: zodResponseFormat(ExtractionSchema, 'document_extraction')
});
const extractedData = completion.choices[0].message.parsed;
return NextResponse.json(extractedData);
} catch (error) {
console.error('Error processing text:', error);
return NextResponse.json(
{ error: 'Error processing text' },
{ status: 500 }
);
}
}