dolibarr 22.0.5
geturl.lib.php
Go to the documentation of this file.
1<?php
2/* Copyright (C) 2008-2020 Laurent Destailleur <eldy@users.sourceforge.net>
3 * Copyright (C) 2024 MDW <mdeweerd@users.noreply.github.com>
4 * Copyright (C) 2025 Frédéric France <frederic.france@free.fr>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <https://www.gnu.org/licenses/>.
18 * or see https://www.gnu.org/
19 */
20
45function getURLContent($url, $postorget = 'GET', $param = '', $followlocation = 1, $addheaders = array(), $allowedschemes = array('http', 'https'), $localurl = 0, $ssl_verifypeer = -1, $timeoutconnect = 0, $timeoutresponse = 0)
46{
47 //declaring of global variables
48 global $conf;
49 $USE_PROXY = !getDolGlobalString('MAIN_PROXY_USE') ? 0 : $conf->global->MAIN_PROXY_USE;
50 $PROXY_HOST = !getDolGlobalString('MAIN_PROXY_HOST') ? 0 : $conf->global->MAIN_PROXY_HOST;
51 $PROXY_PORT = !getDolGlobalString('MAIN_PROXY_PORT') ? 0 : $conf->global->MAIN_PROXY_PORT;
52 $PROXY_USER = !getDolGlobalString('MAIN_PROXY_USER') ? 0 : $conf->global->MAIN_PROXY_USER;
53 $PROXY_PASS = !getDolGlobalString('MAIN_PROXY_PASS') ? 0 : $conf->global->MAIN_PROXY_PASS;
54
55 dol_syslog("getURLContent postorget=".$postorget." URL=".$url." json_encode(param)=".json_encode($param));
56
57 if (!function_exists('curl_init')) {
58 return array('http_code' => 500, 'content' => '', 'curl_error_no' => 1, 'curl_error_msg' => 'PHP curl library must be installed');
59 }
60
61 //setting the curl parameters.
62 $ch = curl_init();
63
64 /*print $API_Endpoint."-".$API_version."-".$PAYPAL_API_USER."-".$PAYPAL_API_PASSWORD."-".$PAYPAL_API_SIGNATURE."<br>";
65 print $USE_PROXY."-".$gv_ApiErrorURL."<br>";
66 print $nvpStr;
67 exit;*/
68 curl_setopt($ch, CURLOPT_VERBOSE, 1);
69 curl_setopt($ch, CURLOPT_USERAGENT, 'Dolibarr geturl function'); // set the Dolibarr user agent name
70
71 // We use @ here because this may return warning if safe mode is on or open_basedir is on (following location is forbidden when safe mode is on).
72 // We force value to false so we will manage redirection ourself later.
73 @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
74
75 if (is_array($addheaders) && count($addheaders)) {
76 curl_setopt($ch, CURLOPT_HTTPHEADER, $addheaders);
77 }
78 curl_setopt($ch, CURLINFO_HEADER_OUT, true); // To be able to retrieve request header and log it
79
80 // By default use the TLS version decided by PHP.
81 // You can force, if supported a version like TLSv1 or TLSv1.2
82 if (getDolGlobalString('MAIN_CURL_SSLVERSION')) {
83 $sslversion = is_numeric(getDolGlobalString('MAIN_CURL_SSLVERSION')) ? getDolGlobalInt('MAIN_CURL_SSLVERSION') : constant(getDolGlobalString('MAIN_CURL_SSLVERSION'));
84 curl_setopt($ch, CURLOPT_SSLVERSION, (int) $sslversion);
85 }
86 //curl_setopt($ch, CURLOPT_SSLVERSION, 6); for tls 1.2
87
88 // Turning on or off the ssl target certificate
89 if ($ssl_verifypeer < 0) {
90 global $dolibarr_main_prod;
91 $ssl_verifypeer = ($dolibarr_main_prod ? true : false);
92 }
93 if (getDolGlobalString('MAIN_CURL_DISABLE_VERIFYPEER')) {
94 $ssl_verifypeer = 0;
95 }
96
97 // Turning off the server and peer verification(TrustManager Concept).
98 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, ($ssl_verifypeer ? true : false));
99 curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, ($ssl_verifypeer ? true : false));
100
101 // Restrict use to some protocols only
102 $protocols = 0;
103 $redir_list = array();
104 if (is_array($allowedschemes)) {
105 foreach ($allowedschemes as $allowedscheme) {
106 if ($allowedscheme == 'http') {
107 $protocols |= CURLPROTO_HTTP;
108 $redir_list["HTTP"] = 1;
109 } elseif ($allowedscheme == 'https') {
110 $protocols |= CURLPROTO_HTTPS;
111 $redir_list["HTTPS"] = 1;
112 } elseif ($allowedscheme == 'ftp') {
113 $protocols |= CURLPROTO_FTP;
114 $redir_list["FTP"] = 1;
115 } elseif ($allowedscheme == 'ftps') {
116 $protocols |= CURLPROTO_FTPS;
117 $redir_list["FTPS"] = 1;
118 }
119 }
120 }
121
122 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeoutconnect ? $timeoutconnect : getDolGlobalInt('MAIN_USE_CONNECT_TIMEOUT', 5));
123 curl_setopt($ch, CURLOPT_TIMEOUT, $timeoutresponse ? $timeoutresponse : getDolGlobalInt('MAIN_USE_RESPONSE_TIMEOUT', 30));
124
125 // limit size of downloaded files.
126 $maxsize = getDolGlobalInt('MAIN_SECURITY_MAXFILESIZE_DOWNLOADED');
127 if ($maxsize && defined('CURLOPT_MAXFILESIZE_LARGE')) {
128 curl_setopt($ch, CURLOPT_MAXFILESIZE_LARGE, $maxsize * 1024); // @phan-suppress-current-line PhanTypeMismatchArgumentNullableInternal
129 }
130 if ($maxsize && defined('CURLOPT_MAXFILESIZE')) {
131 curl_setopt($ch, CURLOPT_MAXFILESIZE, $maxsize * 1024);
132 }
133
134 //curl_setopt($ch, CURLOPT_SAFE_UPLOAD, true); // PHP 5.5
135 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // We want response
136 if ($postorget == 'POST') {
137 curl_setopt($ch, CURLOPT_POST, 1); // POST
138 curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // Setting param x=a&y=z as POST fields
139 } elseif ($postorget == 'POSTALREADYFORMATED') {
140 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST'); // HTTP request is 'POST' but param string is taken as it is
141 curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // param = content of post, like a xml string
142 } elseif ($postorget == 'PUT') {
143 $array_param = null;
144 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT'
145 if (!is_array($param)) {
146 parse_str($param, $array_param); // @phan-suppress-current-line PhanPluginConstantVariableNull
147 } else {
148 dol_syslog("parameter param must be a string", LOG_WARNING);
149 $array_param = $param;
150 }
151 curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($array_param)); // Setting param x=a&y=z as PUT fields
152 } elseif ($postorget == 'PUTALREADYFORMATED') {
153 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT'
154 curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // param = content of post, like a xml string
155 } elseif ($postorget == 'PATCH') {
156 $array_param = null;
157 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PATCH'); // RFC 5789
158 if (!is_array($param)) {
159 parse_str($param, $array_param); // @phan-suppress-current-line PhanPluginConstantVariableNull
160 } else {
161 dol_syslog("parameter param must be a string", LOG_WARNING);
162 $array_param = $param;
163 }
164 curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($array_param));
165 } elseif ($postorget == 'PATCHALREADYFORMATED') {
166 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PATCH'); // RFC 5789
167 curl_setopt($ch, CURLOPT_POSTFIELDS, $param);
168 } elseif ($postorget == 'HEAD') {
169 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'HEAD'); // HTTP request is 'HEAD'
170 curl_setopt($ch, CURLOPT_NOBODY, true);
171 } elseif ($postorget == 'DELETE') {
172 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'DELETE'); // POST
173 } else {
174 curl_setopt($ch, CURLOPT_POST, 0); // GET
175 }
176
177 //if USE_PROXY constant set at begin of this method.
178 if ($USE_PROXY) {
179 dol_syslog("getURLContent set proxy to ".$PROXY_HOST.":".$PROXY_PORT." - ".$PROXY_USER.":".$PROXY_PASS);
180 //curl_setopt ($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); // Curl 7.10
181 curl_setopt($ch, CURLOPT_PROXY, $PROXY_HOST.":".$PROXY_PORT);
182 if ($PROXY_USER) {
183 curl_setopt($ch, CURLOPT_PROXYUSERPWD, $PROXY_USER.":".$PROXY_PASS);
184 }
185 }
186
187 $newUrl = $url;
188 $maxRedirection = 5;
189 $info = array();
190 $response = '';
191
192 do {
193 if ($maxRedirection < 1) {
194 return array('http_code' => 400, 'content' => 'Maximum number of redirections reached', 'curl_error_no' => 1, 'curl_error_msg' => 'Maximum number of redirections reached');
195 }
196
197 curl_setopt($ch, CURLOPT_URL, $newUrl);
198
199 // Parse $newUrl
200 $newUrlArray = parse_url($newUrl);
201 $hosttocheck = $newUrlArray['host'];
202 $hosttocheck = str_replace(array('[', ']'), '', $hosttocheck); // Remove brackets of IPv6
203
204 // Deny some reserved host names
205 if (in_array($hosttocheck, array('metadata.google.internal'))) {
206 $info['http_code'] = 400;
207 $info['content'] = 'Error bad hostname '.$hosttocheck.' (Used by Google metadata). This value for hostname is not allowed.';
208 return array('http_code' => 400, 'content' => $info['content'], 'curl_error_no' => 1, 'curl_error_msg' => $info['content']);
209 }
210
211 // Clean host name $hosttocheck to convert it into an IP $iptocheck
212 if (in_array($hosttocheck, array('localhost', 'localhost.domain'))) {
213 $iptocheck = '127.0.0.1';
214 } elseif (in_array($hosttocheck, array('ip6-localhost', 'ip6-loopback'))) {
215 $iptocheck = '::1';
216 } else {
217 // Resolve $hosttocheck to get the IP $iptocheck
218 if (function_exists('gethostbyname')) {
219 $iptocheck = gethostbyname($hosttocheck);
220 } else {
221 $iptocheck = $hosttocheck;
222 }
223 // TODO Resolve ip v6
224 }
225
226 // Check $iptocheck is an IP (v4 or v6), if not clear value.
227 if (!filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4 | FILTER_FLAG_IPV6)) { // This is not an IP, we clean data
228 $iptocheck = '0'; //
229 }
230
231 if ($iptocheck) {
232 $tmpresult = isIPAllowed($iptocheck, $localurl);
233 if ($tmpresult) {
234 $info['http_code'] = 400;
235 $info['content'] = $tmpresult;
236 return array('http_code' => 400, 'content' => $tmpresult, 'curl_error_no' => 1, 'curl_error_msg' => $tmpresult);
237 }
238 }
239
240 if ($iptocheck) {
241 // Set CURLOPT_CONNECT_TO so curl will not try another resolution that may give a different result. Possible only on PHP v7+
242 if (defined('CURLOPT_CONNECT_TO')) {
243 $connect_to = array(sprintf("%s:%d:%s:%d", $newUrlArray['host'], empty($newUrlArray['port']) ? '' : $newUrlArray['port'], $iptocheck, empty($newUrlArray['port']) ? '' : $newUrlArray['port']));
244 //var_dump($newUrlArray);
245 //var_dump($connect_to);
246 curl_setopt($ch, CURLOPT_CONNECT_TO, $connect_to);
247 }
248 }
249
250 // Moving these just before the curl_exec option really limits
251 // on windows PHP 7.4.
252 curl_setopt($ch, CURLOPT_PROTOCOLS, $protocols);
253 curl_setopt($ch, CURLOPT_REDIR_PROTOCOLS, $protocols);
254 /* CURLOPT_REDIR_PROTOCOLS_STR available from PHP 7.85.0
255 if (version_compare(PHP_VERSION, '8.3.0', '>=') && version_compare(curl_version()['version'], '7.85.0', '>=')) {
256 curl_setopt($ch, CURLOPT_REDIR_PROTOCOLS_STR, implode(",", array_keys($redir_list)));
257 }
258 */
259
260 // Getting response from server
261 $response = curl_exec($ch); // return false on error, result on success
262
263 $info = curl_getinfo($ch); // Reading of request must be done after sending request
264 $http_code = $info['http_code'];
265
266 if ($followlocation && ($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307)) {
267 $newUrl = $info['redirect_url'];
268 $maxRedirection--;
269 // TODO Use $info['local_ip'] and $info['primary_ip'] ?
270 continue;
271 }
272
273 $http_code = 0;
274 } while ($http_code);
275
276 $request = curl_getinfo($ch, CURLINFO_HEADER_OUT); // Reading of request must be done after sending request
277
278 dol_syslog("getURLContent request=".$request);
279 if (getDolGlobalInt('MAIN_CURL_DEBUG')) {
280 // This may contains binary data, so we don't output response by default.
281 dol_syslog("getURLContent request=".$request, LOG_DEBUG, 0, '_curl');
282 dol_syslog("getURLContent response =".$response, LOG_DEBUG, 0, '_curl');
283 }
284 dol_syslog("getURLContent response size=".strlen($response)); // This may contains binary data, so we don't output it
285
286 $rep = array();
287 if (curl_errno($ch)) {
288 // Add keys to $rep
289 if ($response) {
290 $rep['content'] = (string) $response;
291 } else {
292 $rep['content'] = '';
293 }
294
295 $rep['http_code'] = 0;
296 $rep['curl_error_no'] = curl_errno($ch);
297 $rep['curl_error_msg'] = curl_error($ch);
298
299 dol_syslog("getURLContent response array is ".implode(',', $rep));
300 } else {
301 //$info = curl_getinfo($ch);
302
303 // Return all fields found into $info.
304 $rep = $info;
305 //$rep['header_size'] = $info['header_size'];
306 //$rep['http_code'] = $info['http_code'];
307 //$rep['content_type'] = $info['http_code'];
308
309 dol_syslog("getURLContent http_code=".$rep['http_code']);
310
311 // Add more keys to $rep
312 if ($response) {
313 $rep['content'] = (string) $response;
314 } else {
315 $rep['content'] = '';
316 }
317
318 $rep['curl_error_no'] = 0;
319 $rep['curl_error_msg'] = '';
320 }
321
322 //closing the curl
323 curl_close($ch);
324
325 // We must exclude phpstant wwarning, because all fields found in result of curl_getinfo may not be all defined into description of this method.
326 // @phpstan-ignore-next-line
327 return $rep;
328}
329
337function isIPAllowed($iptocheck, $localurl)
338{
339 if ($localurl == 0) { // Only external url allowed (dangerous, may allow to get malware)
340 if (!filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
341 // Deny ips like 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 0.0.0.0/8, 169.254.0.0/16, 127.0.0.0/8 et 240.0.0.0/4, ::1/128, ::/128, ::ffff:0:0/96, fe80::/10...
342 $errormsg = 'Error bad hostname IP (private or reserved range). Must be an external URL.';
343 return $errormsg;
344 }
345 if (!empty($_SERVER["SERVER_ADDR"]) && $iptocheck == $_SERVER["SERVER_ADDR"]) {
346 $errormsg = 'Error bad hostname IP (IP is a local IP). Must be an external URL.';
347 return $errormsg;
348 }
349 if (getDolGlobalString('MAIN_SECURITY_ANTI_SSRF_SERVER_IP') && in_array($iptocheck, explode(',', getDolGlobalString('MAIN_SECURITY_ANTI_SSRF_SERVER_IP')))) {
350 $errormsg = 'Error bad hostname IP (IP is a local IP defined into MAIN_SECURITY_SERVER_IP). Must be an external URL.';
351 return $errormsg;
352 }
353 }
354 if ($localurl == 1) { // Only local url allowed (dangerous, may allow to get metadata on server or make internal port scanning)
355 // Deny ips NOT like 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 0.0.0.0/8, 169.254.0.0/16, 127.0.0.0/8 et 240.0.0.0/4, ::1/128, ::/128, ::ffff:0:0/96, fe80::/10...
356 if (filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
357 $errormsg = 'Error bad hostname '.$iptocheck.'. Must be a local URL.';
358 return $errormsg;
359 }
360 if (getDolGlobalString('MAIN_SECURITY_ANTI_SSRF_SERVER_IP') && !in_array($iptocheck, explode(',', getDolGlobalString('MAIN_SECURITY_ANTI_SSRF_SERVER_IP')))) {
361 $errormsg = 'Error bad hostname IP (IP is not a local IP defined into list MAIN_SECURITY_SERVER_IP). Must be a local URL in allowed list.';
362 return $errormsg;
363 }
364 }
365
366 // Common check on ip (local and external)
367 // See list on https://tagmerge.com/gist/a7b9d57ff8ec11d63642f8778609a0b8
368 // Not evasive url that ar enot IP are excluded by test on IP v4/v6 validity.
369 $arrayofmetadataserver = array(
370 '100.100.100.200' => 'Alibaba',
371 '192.0.0.192' => 'Oracle',
372 '192.80.8.124' => 'Packet',
373 '100.88.222.5' => 'Tencent cloud',
374 );
375 foreach ($arrayofmetadataserver as $ipofmetadataserver => $nameofmetadataserver) {
376 if ($iptocheck == $ipofmetadataserver) {
377 $errormsg = 'Error bad hostname IP (Used by '.$nameofmetadataserver.' metadata server). This IP is forbidden.';
378 return $errormsg;
379 }
380 }
381
382 return '';
383}
384
394function getDomainFromURL($url, $mode = 0)
395{
396 $arrayof2levetopdomain = array(
397 'co.at', 'or.at', 'gv.at',
398 'avocat.fr', 'aeroport.fr', 'veterinaire.fr',
399 'com.ng', 'gov.ng', 'gov.ua', 'com.ua', 'in.ua', 'org.ua', 'edu.ua', 'net.ua',
400 'net.uk', 'org.uk', 'gov.uk', 'co.uk',
401 'com.mx'
402 );
403
404 // Set if tld is on 2 levels
405 $tldon2level = 0;
406 $parts = array_reverse(explode('.', $url));
407 if (!empty($parts[1]) && in_array($parts[1].'.'.$parts[0], $arrayof2levetopdomain)) {
408 $tldon2level = 1;
409 }
410
411 if ($tldon2level && $mode > 0) {
412 $mode++;
413 }
414
415 $tmpdomain = preg_replace('/^https?:\/\/[^:]+:[^@]+@/i', '', $url); // Remove http(s)://login@pass in https://login@pass:mydomain.com/path, so we now got mydomain.com/path
416 $tmpdomain = preg_replace('/^https?:\/\//i', '', $tmpdomain); // Remove http(s)://
417 $tmpdomain = preg_replace('/\/.*$/i', '', $tmpdomain); // Remove part after /
418 $tmpdomain = preg_replace('/^[^@]+@/i', '', $tmpdomain); // Remove part1@ in part1@part2 (for emails)
419 if ($mode == 3) {
420 $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)\.([^\.]+)\.([^\.]+)$/', '\1.\2.\3.\4', $tmpdomain);
421 } elseif ($mode == 2) {
422 $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)\.([^\.]+)$/', '\1.\2.\3', $tmpdomain); // Remove part 'www.' before 'abc.mydomain.com'
423 } elseif ($mode == 1) {
424 $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)$/', '\1.\2', $tmpdomain); // Remove part 'www.abc.' before 'mydomain.com'
425 }
426
427 if (empty($mode)) {
428 if ($tldon2level) {
429 $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)\.([^\.]+)$/', '\1.\2.\3', $tmpdomain); // Remove part 'www.abc.' before 'mydomain.com'
430 $tmpdomain = preg_replace('/\.[^\.]+\.[^\.]+$/', '', $tmpdomain); // Remove TLD (.com.mx, .co.uk, ...)
431 } else {
432 $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)$/', '\1.\2', $tmpdomain); // Remove part 'www.abc.' before 'mydomain.com'
433 $tmpdomain = preg_replace('/\.[^\.]+$/', '', $tmpdomain); // Remove TLD (.com, .net, ...)
434 }
435 }
436
437 return $tmpdomain;
438}
439
449function getRootURLFromURL($url)
450{
451 return preg_replace('/^([a-z]*:\/\/[^\/]*).*/i', '$1', $url);
452}
453
460function removeHtmlComment($content)
461{
462 $content = preg_replace('/<!--[^\-]+-->/', '', $content);
463 return $content;
464}
getDolGlobalInt($key, $default=0)
Return a Dolibarr global constant int value.
getDolGlobalString($key, $default='')
Return a Dolibarr global constant string value.
dol_syslog($message, $level=LOG_INFO, $ident=0, $suffixinfilename='', $restricttologhandler='', $logcontext=null)
Write log message into outputs.
getDomainFromURL($url, $mode=0)
Function get second level domain name.
isIPAllowed($iptocheck, $localurl)
Is IP allowed.
getRootURLFromURL($url)
Function root url from a long url For example: https://www.abc.mydomain.com/dir/page....
getURLContent($url, $postorget='GET', $param='', $followlocation=1, $addheaders=array(), $allowedschemes=array('http', 'https'), $localurl=0, $ssl_verifypeer=-1, $timeoutconnect=0, $timeoutresponse=0)
Function to get a content from an URL (use proxy if proxy defined).
removeHtmlComment($content)
Function to remove comments into HTML content.
global $conf
The following vars must be defined: $type2label $form $conf, $lang, The following vars may also be de...
Definition member.php:79