28if (!defined(
'NOTOKENRENEWAL')) {
29 define(
'NOTOKENRENEWAL', 1);
31if (!defined(
'NOREQUIREMENU')) {
32 define(
'NOREQUIREMENU',
'1');
34if (!defined(
'NOREQUIREHTML')) {
35 define(
'NOREQUIREHTML',
'1');
37if (!defined(
'NOREQUIREAJAX')) {
38 define(
'NOREQUIREAJAX',
'1');
40if (!defined(
'NOHEADERNOFOOTER')) {
41 define(
'NOHEADERNOFOOTER',
'1');
44require_once
'../../main.inc.php';
52require_once DOL_DOCUMENT_ROOT.
'/core/class/html.form.class.php';
53require_once DOL_DOCUMENT_ROOT.
'/core/lib/files.lib.php';
54require_once DOL_DOCUMENT_ROOT.
'/ai/class/ai.class.php';
58 $langs->setDefaultLang(
GETPOST(
'lang',
'aZ09'));
61$langs->loadLangs(array(
"main",
"other",
"exports"));
63$action =
GETPOST(
'action',
'aZ09');
64$modulepart =
GETPOST(
'modulepart',
'aZ09');
66$upload_dir =
$conf->user->dir_temp.
'/import';
74if (preg_match(
'/^upload_page-([a-z_]+)-uid(\d+)-/', $file, $reg)) {
75 $modulepart = $reg[1];
77 if ($reg[2] != $user->id) {
78 accessforbidden(
'User id found in filename to process does not match current user id');
104$originalfilename = $file;
105$uid = $thiid = $pid = $erid = $salid = 0;
106if (preg_match(
'/-uid([\d+])/', $file, $reg)) {
108 $originalfilename = preg_replace(
'/-uid\d+/',
'', $originalfilename);
110if (preg_match(
'/-thiid([\d+])/', $file, $reg)) {
112 $originalfilename = preg_replace(
'/-thiid\d+/',
'', $originalfilename);
114if (preg_match(
'/-pid([\d+])/', $file, $reg)) {
116 $originalfilename = preg_replace(
'/-pid\d+/',
'', $originalfilename);
118if (preg_match(
'/-erid([\d+])/', $file, $reg)) {
120 $originalfilename = preg_replace(
'/-erid\d+/',
'', $originalfilename);
122if (preg_match(
'/-salid([\d+])/', $file, $reg)) {
124 $originalfilename = preg_replace(
'/-salid\d+/',
'', $originalfilename);
126$originalfilename = preg_replace(
'/^upload_page-[a-z_]+-/',
'', $originalfilename);
132$METHOD =
'converttotext';
135$docformat = $doctypelabel = $prompt =
'';
136$fullpathoffile = $upload_dir.
'/'.$file;
141if ($modulepart ==
'invoice_supplier') {
143 $doctypelabel =
'invoice';
146 $prompt =
'Analyze the contents of this '.$docformat.
' document of an '.$doctypelabel.
' and convert all readable text and numerical information into a structured JSON format. Follow these guidelines:
148 1. **Hierarchical Structure:** Group related information into logical categories like "document_info", "vendor or issuer", "recipient", "items", "payment_info" and "other" depending on the content.
149 2. **Flexible Field Identification:** Identify and organize common fields that are found such as:
150 - "invoice number", "invoice reference"
151 - "date", "issue date", "due date", "transaction or invoice date"
152 - "vendor name", "vendor vat number", "vendor professional id (siret, siren, ...), "vendor address", "vendor phone", "vendor email",
153 - "items", "products", "services", "product or service label", "product or service ref", "product or service ref"
154 - "totals", "summary", "amounts", "balance"
155 - "notes," "comments," "messages," "terms"
156 3. **Handle Tables and Lists:** If the document contains tables, represent each row as an object in a list with fields like "no", "ref", "description", "quantity", "vat rate", "unit price", "total excluding tax", "total including tax".
157 4. **Normalize Dates:** If dates are present, format them in ISO format (YYYY-MM-DD) whenever possible.
158 5. **Ignore Background Noise:** Exclude background noise, decorative elements, and irrelevant symbols that do not contribute to the data content.
159 6. **Preserve Context:** If the image contains sections, headings, or grouping indicators, use them to create logical hierarchies in the JSON structure.
160 7. **General Usability:** Format the text to be suitable for further processing, analysis, or database import.
162 **Example JSON Structure:**
166 "document_ref": "<document ref or number>",
172 "address": "<address>",
173 "phone": "<phone number>",
175 "vatnumber": "<vat number>",
176 "profid": "<professional id>"
180 "address": "<address>",
181 "phone": "<phone number>",
183 "vatnumber": "<vat number>",
184 "profid": "<professional id>"
191 "description": "<description>",
192 "quantity": "<quantity>",
193 "vatrate": "<vat rate>",
194 "totalinctax": "<total including tax>"
195 "totalexcltax": "<total excluding tax>"
199 "subtotal": "<subtotal>",
200 "tax": "<amount of tax>",
201 "total": "<total to pay>"
205 "method": "<check or cash or card or direct_debit or credit_transfer or other>",
206 "details": "Detail of the payment mode"
211 "method": "<check or cash or card or direct_debit or credit_transfer or other>",
212 "amount": "<detail of the payment>",
213 "note":"<other information on payment done>"
216 "notes": "<optional text>"
224if ($METHOD ==
'converttotext') {
225 $result =
dolDocToText($fullpathoffile,
'',
'fulltext');
226 if (empty($result[
'error'])) {
227 $fileContent = $result[
'content'];
231 $prompt =
'This is the content of the document:'.
"\n\n".substr($fileContent, 0, 12000).
"\n\nQuestion: ".$prompt;
233 $result = $ai->generateContent($prompt,
'auto',
'docparsing',
'');
236 if (is_array($result)) {
237 if ($result[
'error']) {
239 $errors[] = $result[
'error'];
241 if ($result[
'curl_error_no']) {
243 $errors[] = $result[
'curl_error_no'];
249 $errors[] =
'Failed to convert document into TXT';
254if ($METHOD ==
'thread') {
265 "purpose" =>
"assistants",
266 "file" =>
new CURLFile($fullpathoffile)
269 $result = $ai->generateContent($payload,
'auto',
'file',
'');
271 if (is_array($result)) {
272 if ($result[
'error']) {
274 $errors[] = $result[
'error'];
276 if ($result[
'curl_error_no']) {
278 $errors[] = $result[
'curl_error_no'];
281 $fileId = json_decode($result,
true)[
'id'];
288 "name" =>
"PDF Analyzer",
289 "instructions" =>
"Analyze PDF and answer precisely",
291 [
"type" =>
"file_search"]
295 $result = $ai->generateContent($payload,
'auto',
'assistant',
'', array(
'OpenAI-Beta',
'assistants=v2'));
297 if (is_array($result)) {
298 if ($result[
'error']) {
300 $errors[] = $result[
'error'];
302 if ($result[
'curl_error_no']) {
304 $errors[] = $result[
'curl_error_no'];
307 $assistantId = json_decode($result,
true)[
'id'];
316 $result = $ai->generateContent($payload,
'auto',
'thread',
'', array(
'OpenAI-Beta',
'assistants=v2'));
318 if (is_array($result)) {
319 if ($result[
'error']) {
321 $errors[] = $result[
'error'];
323 if ($result[
'curl_error_no']) {
325 $errors[] = $result[
'curl_error_no'];
328 $threadId = json_decode($result,
true)[
'id'];
339 "type" =>
"input_text",
345 "file_id" => $fileId,
346 "tools" => [[
"type" =>
"file_search"]]
350 $moreendpoint = $threadId.
'/messages';
352 $result = $ai->generateContent($payload,
'auto',
'thread',
'', array(
'OpenAI-Beta',
'assistants=v2'), $moreendpoint);
354 if (is_array($result)) {
355 if ($result[
'error']) {
357 $errors[] = $result[
'error'];
359 if ($result[
'curl_error_no']) {
361 $errors[] = $result[
'curl_error_no'];
370 [
"assistant_id" => $assistantId]
372 $moreendpoint = $threadId.
'/runs';
374 $result = $ai->generateContent($payload,
'auto',
'thread',
'', array(
'OpenAI-Beta',
'assistants=v2'), $moreendpoint);
376 if (is_array($result)) {
377 if ($result[
'error']) {
379 $errors[] = $result[
'error'];
381 if ($result[
'curl_error_no']) {
383 $errors[] = $result[
'curl_error_no'];
386 $runId = json_decode($result,
true)[
'id'];
397 $moreendpoint = $threadId.
'/runs/'.$runId;
399 $result = $ai->generateContent($payload,
'auto',
'thread',
'', array(
'OpenAI-Beta',
'assistants=v2'), $moreendpoint);
401 if (is_array($result)) {
402 if ($result[
'error']) {
404 $errors[] = $result[
'error'];
406 if ($result[
'curl_error_no']) {
408 $errors[] = $result[
'curl_error_no'];
411 $status =
'completed';
413 $status = json_decode($result,
true)[
'status'];
415 }
while ($status !==
"completed");
422 $moreendpoint = $threadId.
'/messages';
424 $result = $ai->generateContent($prompt,
'auto',
'thread',
'', array(
'OpenAI-Beta',
'assistants=v2'));
426 if (is_array($result)) {
427 if ($result[
'error']) {
429 $errors[] = $result[
'error'];
431 if ($result[
'curl_error_no']) {
433 $errors[] = $result[
'curl_error_no'];
447if (!empty($errors)) {
448 http_response_code(500);
450 print json_encode(array(
'errors' => $errors));
452 $data = json_decode((
string) $answer,
true);
456 $errors[] =
'Failed to decode answer';
457 print
'Failed to decode answer';
dolDocToText($filetoprocess, $useFullTextIndexation='pdftotext', $options='html')
GETPOST($paramname, $check='alphanohtml', $method=0, $filter=null, $options=null, $noreplace=0)
Return value of a param into GET or POST supervariable.
if(!defined( 'NOREQUIREMENU')) if(!empty(GETPOST('seteventmessages', 'alpha'))) if(!function_exists("llxHeader")) top_httphead($contenttype='text/html', $forcenocache=0)
Show HTTP header.
accessforbidden($message='', $printheader=1, $printfooter=1, $showonlymessage=0, $params=null)
Show a message to say access is forbidden and stop program.