dolibarr  20.0.0-beta
geturl.lib.php
Go to the documentation of this file.
1 <?php
2 /* Copyright (C) 2008-2020 Laurent Destailleur <eldy@users.sourceforge.net>
3  * Copyright (C) 2024 MDW <mdeweerd@users.noreply.github.com>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <https://www.gnu.org/licenses/>.
17  * or see https://www.gnu.org/
18  */
19 
42 function getURLContent($url, $postorget = 'GET', $param = '', $followlocation = 1, $addheaders = array(), $allowedschemes = array('http', 'https'), $localurl = 0, $ssl_verifypeer = -1)
43 {
44  //declaring of global variables
45  global $conf;
46  $USE_PROXY = !getDolGlobalString('MAIN_PROXY_USE') ? 0 : $conf->global->MAIN_PROXY_USE;
47  $PROXY_HOST = !getDolGlobalString('MAIN_PROXY_HOST') ? 0 : $conf->global->MAIN_PROXY_HOST;
48  $PROXY_PORT = !getDolGlobalString('MAIN_PROXY_PORT') ? 0 : $conf->global->MAIN_PROXY_PORT;
49  $PROXY_USER = !getDolGlobalString('MAIN_PROXY_USER') ? 0 : $conf->global->MAIN_PROXY_USER;
50  $PROXY_PASS = !getDolGlobalString('MAIN_PROXY_PASS') ? 0 : $conf->global->MAIN_PROXY_PASS;
51 
52  dol_syslog("getURLContent postorget=".$postorget." URL=".$url." param=".$param);
53 
54  //setting the curl parameters.
55  $ch = curl_init();
56 
57  /*print $API_Endpoint."-".$API_version."-".$PAYPAL_API_USER."-".$PAYPAL_API_PASSWORD."-".$PAYPAL_API_SIGNATURE."<br>";
58  print $USE_PROXY."-".$gv_ApiErrorURL."<br>";
59  print $nvpStr;
60  exit;*/
61  curl_setopt($ch, CURLOPT_VERBOSE, 1);
62  curl_setopt($ch, CURLOPT_USERAGENT, 'Dolibarr geturl function');
63 
64  // We use @ here because this may return warning if safe mode is on or open_basedir is on (following location is forbidden when safe mode is on).
65  // We force value to false so we will manage redirection ourself later.
66  @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
67 
68  if (is_array($addheaders) && count($addheaders)) {
69  curl_setopt($ch, CURLOPT_HTTPHEADER, $addheaders);
70  }
71  curl_setopt($ch, CURLINFO_HEADER_OUT, true); // To be able to retrieve request header and log it
72 
73  // By default use the TLS version decided by PHP.
74  // You can force, if supported a version like TLSv1 or TLSv1.2
75  if (getDolGlobalString('MAIN_CURL_SSLVERSION')) {
76  curl_setopt($ch, CURLOPT_SSLVERSION, $conf->global->MAIN_CURL_SSLVERSION);
77  }
78  //curl_setopt($ch, CURLOPT_SSLVERSION, 6); for tls 1.2
79 
80  // Turning on or off the ssl target certificate
81  if ($ssl_verifypeer < 0) {
82  global $dolibarr_main_prod;
83  $ssl_verifypeer = ($dolibarr_main_prod ? true : false);
84  }
85  if (getDolGlobalString('MAIN_CURL_DISABLE_VERIFYPEER')) {
86  $ssl_verifypeer = 0;
87  }
88 
89  // Turning off the server and peer verification(TrustManager Concept).
90  curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, ($ssl_verifypeer ? true : false));
91  curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, ($ssl_verifypeer ? true : false));
92 
93  // Restrict use to some protocols only
94  $protocols = 0;
95  $redir_list = array();
96  if (is_array($allowedschemes)) {
97  foreach ($allowedschemes as $allowedscheme) {
98  if ($allowedscheme == 'http') {
99  $protocols |= CURLPROTO_HTTP;
100  $redir_list["HTTP"] = 1;
101  } elseif ($allowedscheme == 'https') {
102  $protocols |= CURLPROTO_HTTPS;
103  $redir_list["HTTPS"] = 1;
104  } elseif ($allowedscheme == 'ftp') {
105  $protocols |= CURLPROTO_FTP;
106  $redir_list["FTP"] = 1;
107  } elseif ($allowedscheme == 'ftps') {
108  $protocols |= CURLPROTO_FTPS;
109  $redir_list["FTPS"] = 1;
110  }
111  }
112  }
113 
114  curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, getDolGlobalInt('MAIN_USE_CONNECT_TIMEOUT', 5));
115  curl_setopt($ch, CURLOPT_TIMEOUT, getDolGlobalInt('MAIN_USE_RESPONSE_TIMEOUT', 30));
116 
117  // limit size of downloaded files. TODO Add MAIN_SECURITY_MAXFILESIZE_DOWNLOADED
118  $maxsize = getDolGlobalInt('MAIN_SECURITY_MAXFILESIZE_DOWNLOADED');
119  if ($maxsize && defined('CURLOPT_MAXFILESIZE_LARGE')) {
120  curl_setopt($ch, CURLOPT_MAXFILESIZE_LARGE, $maxsize);
121  }
122  if ($maxsize && defined('CURLOPT_MAXFILESIZE')) {
123  curl_setopt($ch, CURLOPT_MAXFILESIZE, $maxsize);
124  }
125 
126  //curl_setopt($ch, CURLOPT_SAFE_UPLOAD, true); // PHP 5.5
127  curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // We want response
128  if ($postorget == 'POST') {
129  curl_setopt($ch, CURLOPT_POST, 1); // POST
130  curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // Setting param x=a&y=z as POST fields
131  } elseif ($postorget == 'POSTALREADYFORMATED') {
132  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST'); // HTTP request is 'POST' but param string is taken as it is
133  curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // param = content of post, like a xml string
134  } elseif ($postorget == 'PUT') {
135  $array_param = null;
136  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT'
137  if (!is_array($param)) {
138  parse_str($param, $array_param);
139  } else {
140  dol_syslog("parameter param must be a string", LOG_WARNING);
141  $array_param = $param;
142  }
143  curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($array_param)); // Setting param x=a&y=z as PUT fields
144  } elseif ($postorget == 'PUTALREADYFORMATED') {
145  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT'
146  curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // param = content of post, like a xml string
147  } elseif ($postorget == 'HEAD') {
148  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'HEAD'); // HTTP request is 'HEAD'
149  curl_setopt($ch, CURLOPT_NOBODY, true);
150  } elseif ($postorget == 'DELETE') {
151  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'DELETE'); // POST
152  } else {
153  curl_setopt($ch, CURLOPT_POST, 0); // GET
154  }
155 
156  //if USE_PROXY constant set at begin of this method.
157  if ($USE_PROXY) {
158  dol_syslog("getURLContent set proxy to ".$PROXY_HOST.":".$PROXY_PORT." - ".$PROXY_USER.":".$PROXY_PASS);
159  //curl_setopt ($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); // Curl 7.10
160  curl_setopt($ch, CURLOPT_PROXY, $PROXY_HOST.":".$PROXY_PORT);
161  if ($PROXY_USER) {
162  curl_setopt($ch, CURLOPT_PROXYUSERPWD, $PROXY_USER.":".$PROXY_PASS);
163  }
164  }
165 
166  $newUrl = $url;
167  $maxRedirection = 5;
168  $info = array();
169  $response = '';
170 
171  do {
172  if ($maxRedirection < 1) {
173  break;
174  }
175 
176  curl_setopt($ch, CURLOPT_URL, $newUrl);
177 
178  // Parse $newUrl
179  $newUrlArray = parse_url($newUrl);
180  $hosttocheck = $newUrlArray['host'];
181  $hosttocheck = str_replace(array('[', ']'), '', $hosttocheck); // Remove brackets of IPv6
182 
183  // Deny some reserved host names
184  if (in_array($hosttocheck, array('metadata.google.internal'))) {
185  $info['http_code'] = 400;
186  $info['content'] = 'Error bad hostname '.$hosttocheck.' (Used by Google metadata). This value for hostname is not allowed.';
187  break;
188  }
189 
190  // Clean host name $hosttocheck to convert it into an IP $iptocheck
191  if (in_array($hosttocheck, array('localhost', 'localhost.domain'))) {
192  $iptocheck = '127.0.0.1';
193  } elseif (in_array($hosttocheck, array('ip6-localhost', 'ip6-loopback'))) {
194  $iptocheck = '::1';
195  } else {
196  // Resolve $hosttocheck to get the IP $iptocheck
197  if (function_exists('gethostbyname')) {
198  $iptocheck = gethostbyname($hosttocheck);
199  } else {
200  $iptocheck = $hosttocheck;
201  }
202  // TODO Resolve ip v6
203  }
204 
205  // Check $iptocheck is an IP (v4 or v6), if not clear value.
206  if (!filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4 | FILTER_FLAG_IPV6)) { // This is not an IP, we clean data
207  $iptocheck = '0'; //
208  }
209 
210  if ($iptocheck) {
211  $tmpresult = isIPAllowed($iptocheck, $localurl);
212  if ($tmpresult) {
213  $info['http_code'] = 400;
214  $info['content'] = $tmpresult;
215  break;
216  }
217  }
218 
219  if ($iptocheck) {
220  // Set CURLOPT_CONNECT_TO so curl will not try another resolution that may give a different result. Possible only on PHP v7+
221  if (defined('CURLOPT_CONNECT_TO')) {
222  $connect_to = array(sprintf("%s:%d:%s:%d", $newUrlArray['host'], empty($newUrlArray['port']) ? '' : $newUrlArray['port'], $iptocheck, empty($newUrlArray['port']) ? '' : $newUrlArray['port']));
223  //var_dump($newUrlArray);
224  //var_dump($connect_to);
225  curl_setopt($ch, CURLOPT_CONNECT_TO, $connect_to);
226  }
227  }
228 
229  // Moving these just before the curl_exec option really limits
230  // on windows PHP 7.4.
231  curl_setopt($ch, CURLOPT_PROTOCOLS, $protocols);
232  curl_setopt($ch, CURLOPT_REDIR_PROTOCOLS, $protocols);
233  /* CURLOPT_REDIR_PROTOCOLS_STR available from PHP 7.85.0
234  if (version_compare(PHP_VERSION, '8.3.0', '>=') && version_compare(curl_version()['version'], '7.85.0', '>=')) {
235  curl_setopt($ch, CURLOPT_REDIR_PROTOCOLS_STR, implode(",", array_keys($redir_list)));
236  }
237  */
238 
239  // Getting response from server
240  $response = curl_exec($ch);
241 
242  $info = curl_getinfo($ch); // Reading of request must be done after sending request
243  $http_code = $info['http_code'];
244 
245  if ($followlocation && ($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307)) {
246  $newUrl = $info['redirect_url'];
247  $maxRedirection--;
248  // TODO Use $info['local_ip'] and $info['primary_ip'] ?
249  continue;
250  }
251 
252  $http_code = 0;
253  } while ($http_code);
254 
255  $request = curl_getinfo($ch, CURLINFO_HEADER_OUT); // Reading of request must be done after sending request
256 
257  dol_syslog("getURLContent request=".$request);
258  if (getDolGlobalInt('MAIN_CURL_DEBUG')) {
259  // This may contains binary data, so we don't output response by default.
260  dol_syslog("getURLContent request=".$request, LOG_DEBUG, 0, '_curl');
261  dol_syslog("getURLContent response =".$response, LOG_DEBUG, 0, '_curl');
262  }
263  dol_syslog("getURLContent response size=".strlen($response)); // This may contains binary data, so we don't output it
264 
265  $rep = array();
266  if (curl_errno($ch)) {
267  // Add keys to $rep
268  $rep['content'] = $response;
269 
270  // moving to display page to display curl errors
271  $rep['curl_error_no'] = curl_errno($ch);
272  $rep['curl_error_msg'] = curl_error($ch);
273 
274  dol_syslog("getURLContent response array is ".implode(',', $rep));
275  } else {
276  //$info = curl_getinfo($ch);
277 
278  // Add keys to $rep
279  $rep = $info;
280  //$rep['header_size']=$info['header_size'];
281  //$rep['http_code']=$info['http_code'];
282  dol_syslog("getURLContent http_code=".$rep['http_code']);
283 
284  // Add more keys to $rep
285  if ($response) {
286  $rep['content'] = $response;
287  }
288  $rep['curl_error_no'] = '';
289  $rep['curl_error_msg'] = '';
290  }
291 
292  //closing the curl
293  curl_close($ch);
294 
295  return $rep;
296 }
297 
305 function isIPAllowed($iptocheck, $localurl)
306 {
307  if ($localurl == 0) { // Only external url allowed (dangerous, may allow to get malware)
308  if (!filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
309  // Deny ips like 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 0.0.0.0/8, 169.254.0.0/16, 127.0.0.0/8 et 240.0.0.0/4, ::1/128, ::/128, ::ffff:0:0/96, fe80::/10...
310  $errormsg = 'Error bad hostname IP (private or reserved range). Must be an external URL.';
311  return $errormsg;
312  }
313  if (!empty($_SERVER["SERVER_ADDR"]) && $iptocheck == $_SERVER["SERVER_ADDR"]) {
314  $errormsg = 'Error bad hostname IP (IP is a local IP). Must be an external URL.';
315  return $errormsg;
316  }
317  if (getDolGlobalString('MAIN_SECURITY_ANTI_SSRF_SERVER_IP') && in_array($iptocheck, explode(',', getDolGlobalString('MAIN_SECURITY_ANTI_SSRF_SERVER_IP')))) {
318  $errormsg = 'Error bad hostname IP (IP is a local IP defined into MAIN_SECURITY_SERVER_IP). Must be an external URL.';
319  return $errormsg;
320  }
321  }
322  if ($localurl == 1) { // Only local url allowed (dangerous, may allow to get metadata on server or make internal port scanning)
323  // Deny ips NOT like 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 0.0.0.0/8, 169.254.0.0/16, 127.0.0.0/8 et 240.0.0.0/4, ::1/128, ::/128, ::ffff:0:0/96, fe80::/10...
324  if (filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
325  $errormsg = 'Error bad hostname '.$iptocheck.'. Must be a local URL.';
326  return $errormsg;
327  }
328  if (getDolGlobalString('MAIN_SECURITY_ANTI_SSRF_SERVER_IP') && !in_array($iptocheck, explode(',', getDolGlobalString('MAIN_SECURITY_ANTI_SSRF_SERVER_IP')))) {
329  $errormsg = 'Error bad hostname IP (IP is not a local IP defined into list MAIN_SECURITY_SERVER_IP). Must be a local URL in allowed list.';
330  return $errormsg;
331  }
332  }
333 
334  // Common check on ip (local and external)
335  // See list on https://tagmerge.com/gist/a7b9d57ff8ec11d63642f8778609a0b8
336  // Not evasive url that ar enot IP are excluded by test on IP v4/v6 validity.
337  $arrayofmetadataserver = array(
338  '100.100.100.200' => 'Alibaba',
339  '192.0.0.192' => 'Oracle',
340  '192.80.8.124' => 'Packet',
341  '100.88.222.5' => 'Tencent cloud',
342  );
343  foreach ($arrayofmetadataserver as $ipofmetadataserver => $nameofmetadataserver) {
344  if ($iptocheck == $ipofmetadataserver) {
345  $errormsg = 'Error bad hostname IP (Used by '.$nameofmetadataserver.' metadata server). This IP is forbidden.';
346  return $errormsg;
347  }
348  }
349 
350  return '';
351 }
352 
361 function getDomainFromURL($url, $mode = 0)
362 {
363  $arrayof2levetopdomain = array(
364  'co.at', 'or.at', 'gv.at',
365  'avocat.fr', 'aeroport.fr', 'veterinaire.fr',
366  'com.ng', 'gov.ng', 'gov.ua', 'com.ua', 'in.ua', 'org.ua', 'edu.ua', 'net.ua',
367  'net.uk', 'org.uk', 'gov.uk', 'co.uk',
368  'com.mx'
369  );
370 
371  // Set if tld is on 2 levels
372  $tldon2level = 0;
373  $parts = array_reverse(explode('.', $url));
374  if (!empty($parts[1]) && in_array($parts[1].'.'.$parts[0], $arrayof2levetopdomain)) {
375  $tldon2level = 1;
376  }
377 
378  if ($tldon2level && $mode > 0) {
379  $mode++;
380  }
381 
382  $tmpdomain = preg_replace('/^https?:\/\//i', '', $url); // Remove http(s)://
383  $tmpdomain = preg_replace('/\/.*$/i', '', $tmpdomain); // Remove part after /
384  if ($mode == 3) {
385  $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)\.([^\.]+)\.([^\.]+)$/', '\1.\2.\3.\4', $tmpdomain);
386  } elseif ($mode == 2) {
387  $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)\.([^\.]+)$/', '\1.\2.\3', $tmpdomain); // Remove part 'www.' before 'abc.mydomain.com'
388  } elseif ($mode == 1) {
389  $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)$/', '\1.\2', $tmpdomain); // Remove part 'www.abc.' before 'mydomain.com'
390  }
391 
392  if (empty($mode)) {
393  if ($tldon2level) {
394  $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)\.([^\.]+)$/', '\1.\2.\3', $tmpdomain); // Remove part 'www.abc.' before 'mydomain.com'
395  $tmpdomain = preg_replace('/\.[^\.]+\.[^\.]+$/', '', $tmpdomain); // Remove TLD (.com.mx, .co.uk, ...)
396  } else {
397  $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)$/', '\1.\2', $tmpdomain); // Remove part 'www.abc.' before 'mydomain.com'
398  $tmpdomain = preg_replace('/\.[^\.]+$/', '', $tmpdomain); // Remove TLD (.com, .net, ...)
399  }
400  }
401 
402  return $tmpdomain;
403 }
404 
414 function getRootURLFromURL($url)
415 {
416  return preg_replace('/^([a-z]*:\/\/[^\/]*).*/i', '$1', $url);
417 }
418 
425 function removeHtmlComment($content)
426 {
427  $content = preg_replace('/<!--[^\-]+-->/', '', $content);
428  return $content;
429 }
getDolGlobalInt($key, $default=0)
Return a Dolibarr global constant int value.
getDolGlobalString($key, $default='')
Return dolibarr global constant string value.
dol_syslog($message, $level=LOG_INFO, $ident=0, $suffixinfilename='', $restricttologhandler='', $logcontext=null)
Write log message into outputs.
getDomainFromURL($url, $mode=0)
Function get second level domain name.
Definition: geturl.lib.php:361
isIPAllowed($iptocheck, $localurl)
Is IP allowed.
Definition: geturl.lib.php:305
getRootURLFromURL($url)
Function root url from a long url For example: https://www.abc.mydomain.com/dir/page....
Definition: geturl.lib.php:414
getURLContent($url, $postorget='GET', $param='', $followlocation=1, $addheaders=array(), $allowedschemes=array('http', 'https'), $localurl=0, $ssl_verifypeer=-1)
Function to get a content from an URL (use proxy if proxy defined).
Definition: geturl.lib.php:42
removeHtmlComment($content)
Function to remove comments into HTML content.
Definition: geturl.lib.php:425