dolibarr 19.0.4
geturl.lib.php
Go to the documentation of this file.
1<?php
2/* Copyright (C) 2008-2020 Laurent Destailleur <eldy@users.sourceforge.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 * or see https://www.gnu.org/
17 */
18
41function getURLContent($url, $postorget = 'GET', $param = '', $followlocation = 1, $addheaders = array(), $allowedschemes = array('http', 'https'), $localurl = 0, $ssl_verifypeer = -1)
42{
43 //declaring of global variables
44 global $conf;
45 $USE_PROXY = !getDolGlobalString('MAIN_PROXY_USE') ? 0 : $conf->global->MAIN_PROXY_USE;
46 $PROXY_HOST = !getDolGlobalString('MAIN_PROXY_HOST') ? 0 : $conf->global->MAIN_PROXY_HOST;
47 $PROXY_PORT = !getDolGlobalString('MAIN_PROXY_PORT') ? 0 : $conf->global->MAIN_PROXY_PORT;
48 $PROXY_USER = !getDolGlobalString('MAIN_PROXY_USER') ? 0 : $conf->global->MAIN_PROXY_USER;
49 $PROXY_PASS = !getDolGlobalString('MAIN_PROXY_PASS') ? 0 : $conf->global->MAIN_PROXY_PASS;
50
51 dol_syslog("getURLContent postorget=".$postorget." URL=".$url." param=".$param);
52
53 //setting the curl parameters.
54 $ch = curl_init();
55
56 /*print $API_Endpoint."-".$API_version."-".$PAYPAL_API_USER."-".$PAYPAL_API_PASSWORD."-".$PAYPAL_API_SIGNATURE."<br>";
57 print $USE_PROXY."-".$gv_ApiErrorURL."<br>";
58 print $nvpStr;
59 exit;*/
60 curl_setopt($ch, CURLOPT_VERBOSE, 1);
61 curl_setopt($ch, CURLOPT_USERAGENT, 'Dolibarr geturl function');
62
63 // We use @ here because this may return warning if safe mode is on or open_basedir is on (following location is forbidden when safe mode is on).
64 // We force value to false so we will manage redirection ourself later.
65 @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
66
67 if (is_array($addheaders) && count($addheaders)) {
68 curl_setopt($ch, CURLOPT_HTTPHEADER, $addheaders);
69 }
70 curl_setopt($ch, CURLINFO_HEADER_OUT, true); // To be able to retrieve request header and log it
71
72 // By default use tls decied by PHP.
73 // You can force, if supported a version like TLSv1 or TLSv1.2
74 if (getDolGlobalString('MAIN_CURL_SSLVERSION')) {
75 curl_setopt($ch, CURLOPT_SSLVERSION, $conf->global->MAIN_CURL_SSLVERSION);
76 }
77 //curl_setopt($ch, CURLOPT_SSLVERSION, 6); for tls 1.2
78
79 // Turning on or off the ssl target certificate
80 if ($ssl_verifypeer < 0) {
81 global $dolibarr_main_prod;
82 $ssl_verifypeer = ($dolibarr_main_prod ? true : false);
83 }
84 if (getDolGlobalString('MAIN_CURL_DISABLE_VERIFYPEER')) {
85 $ssl_verifypeer = 0;
86 }
87
88 // Turning off the server and peer verification(TrustManager Concept).
89 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, ($ssl_verifypeer ? true : false));
90 curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, ($ssl_verifypeer ? true : false));
91
92 // Restrict use to some protocols only
93 $protocols = 0;
94 if (is_array($allowedschemes)) {
95 foreach ($allowedschemes as $allowedscheme) {
96 if ($allowedscheme == 'http') {
97 $protocols |= CURLPROTO_HTTP;
98 }
99 if ($allowedscheme == 'https') {
100 $protocols |= CURLPROTO_HTTPS;
101 }
102 }
103 curl_setopt($ch, CURLOPT_PROTOCOLS, $protocols);
104 curl_setopt($ch, CURLOPT_REDIR_PROTOCOLS, $protocols);
105 }
106
107 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, !getDolGlobalString('MAIN_USE_CONNECT_TIMEOUT') ? 5 : $conf->global->MAIN_USE_CONNECT_TIMEOUT);
108 curl_setopt($ch, CURLOPT_TIMEOUT, !getDolGlobalString('MAIN_USE_RESPONSE_TIMEOUT') ? 30 : $conf->global->MAIN_USE_RESPONSE_TIMEOUT);
109
110 // limit size of downloaded files. TODO Add MAIN_SECURITY_MAXFILESIZE_DOWNLOADED
111 $maxsize = getDolGlobalInt('MAIN_SECURITY_MAXFILESIZE_DOWNLOADED');
112 if ($maxsize && defined('CURLOPT_MAXFILESIZE_LARGE')) {
113 curl_setopt($ch, CURLOPT_MAXFILESIZE_LARGE, $maxsize);
114 }
115 if ($maxsize && defined('CURLOPT_MAXFILESIZE')) {
116 curl_setopt($ch, CURLOPT_MAXFILESIZE, $maxsize);
117 }
118
119 //curl_setopt($ch, CURLOPT_SAFE_UPLOAD, true); // PHP 5.5
120 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // We want response
121 if ($postorget == 'POST') {
122 curl_setopt($ch, CURLOPT_POST, 1); // POST
123 curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // Setting param x=a&y=z as POST fields
124 } elseif ($postorget == 'POSTALREADYFORMATED') {
125 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST'); // HTTP request is 'POST' but param string is taken as it is
126 curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // param = content of post, like a xml string
127 } elseif ($postorget == 'PUT') {
128 $array_param = null;
129 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT'
130 if (!is_array($param)) {
131 parse_str($param, $array_param);
132 } else {
133 dol_syslog("parameter param must be a string", LOG_WARNING);
134 $array_param = $param;
135 }
136 curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($array_param)); // Setting param x=a&y=z as PUT fields
137 } elseif ($postorget == 'PUTALREADYFORMATED') {
138 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT'
139 curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // param = content of post, like a xml string
140 } elseif ($postorget == 'HEAD') {
141 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'HEAD'); // HTTP request is 'HEAD'
142 curl_setopt($ch, CURLOPT_NOBODY, true);
143 } elseif ($postorget == 'DELETE') {
144 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'DELETE'); // POST
145 } else {
146 curl_setopt($ch, CURLOPT_POST, 0); // GET
147 }
148
149 //if USE_PROXY constant set at begin of this method.
150 if ($USE_PROXY) {
151 dol_syslog("getURLContent set proxy to ".$PROXY_HOST.":".$PROXY_PORT." - ".$PROXY_USER.":".$PROXY_PASS);
152 //curl_setopt ($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); // Curl 7.10
153 curl_setopt($ch, CURLOPT_PROXY, $PROXY_HOST.":".$PROXY_PORT);
154 if ($PROXY_USER) {
155 curl_setopt($ch, CURLOPT_PROXYUSERPWD, $PROXY_USER.":".$PROXY_PASS);
156 }
157 }
158
159 $newUrl = $url;
160 $maxRedirection = 5;
161 $info = array();
162 $response = '';
163
164 do {
165 if ($maxRedirection < 1) {
166 break;
167 }
168
169 curl_setopt($ch, CURLOPT_URL, $newUrl);
170
171 // Parse $newUrl
172 $newUrlArray = parse_url($newUrl);
173 $hosttocheck = $newUrlArray['host'];
174 $hosttocheck = str_replace(array('[', ']'), '', $hosttocheck); // Remove brackets of IPv6
175
176 // Deny some reserved host names
177 if (in_array($hosttocheck, array('metadata.google.internal'))) {
178 $info['http_code'] = 400;
179 $info['content'] = 'Error bad hostname '.$hosttocheck.' (Used by Google metadata). This value for hostname is not allowed.';
180 break;
181 }
182
183 // Clean host name $hosttocheck to convert it into an IP $iptocheck
184 if (in_array($hosttocheck, array('localhost', 'localhost.domain'))) {
185 $iptocheck = '127.0.0.1';
186 } elseif (in_array($hosttocheck, array('ip6-localhost', 'ip6-loopback'))) {
187 $iptocheck = '::1';
188 } else {
189 // Resolve $hosttocheck to get the IP $iptocheck
190 if (function_exists('gethostbyname')) {
191 $iptocheck = gethostbyname($hosttocheck);
192 } else {
193 $iptocheck = $hosttocheck;
194 }
195 // TODO Resolve ip v6
196 }
197
198 // Check $iptocheck is an IP (v4 or v6), if not clear value.
199 if (!filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4 | FILTER_FLAG_IPV6)) { // This is not an IP, we clean data
200 $iptocheck = '0'; //
201 }
202
203 if ($iptocheck) {
204 $tmpresult = isIPAllowed($iptocheck, $localurl);
205 if ($tmpresult) {
206 $info['http_code'] = 400;
207 $info['content'] = $tmpresult;
208 break;
209 }
210 }
211
212 if ($iptocheck) {
213 // Set CURLOPT_CONNECT_TO so curl will not try another resolution that may give a different result. Possible only on PHP v7+
214 if (defined('CURLOPT_CONNECT_TO')) {
215 $connect_to = array(sprintf("%s:%d:%s:%d", $newUrlArray['host'], empty($newUrlArray['port']) ? '' : $newUrlArray['port'], $iptocheck, empty($newUrlArray['port']) ? '' : $newUrlArray['port']));
216 //var_dump($newUrlArray);
217 //var_dump($connect_to);
218 curl_setopt($ch, CURLOPT_CONNECT_TO, $connect_to);
219 }
220 }
221
222 // Getting response from server
223 $response = curl_exec($ch);
224
225 $info = curl_getinfo($ch); // Reading of request must be done after sending request
226 $http_code = $info['http_code'];
227
228 if ($followlocation && ($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307)) {
229 $newUrl = $info['redirect_url'];
230 $maxRedirection--;
231 // TODO Use $info['local_ip'] and $info['primary_ip'] ?
232 continue;
233 }
234
235 $http_code = 0;
236 } while ($http_code);
237
238 $request = curl_getinfo($ch, CURLINFO_HEADER_OUT); // Reading of request must be done after sending request
239
240 dol_syslog("getURLContent request=".$request);
241 if (getDolGlobalInt('MAIN_CURL_DEBUG')) {
242 // This may contains binary data, so we dont output reponse by default.
243 dol_syslog("getURLContent request=".$request, LOG_DEBUG, 0, '_curl');
244 dol_syslog("getURLContent response =".$response, LOG_DEBUG, 0, '_curl');
245 }
246 dol_syslog("getURLContent response size=".strlen($response)); // This may contains binary data, so we dont output it
247
248 $rep = array();
249 if (curl_errno($ch)) {
250 // Add keys to $rep
251 $rep['content'] = $response;
252
253 // moving to display page to display curl errors
254 $rep['curl_error_no'] = curl_errno($ch);
255 $rep['curl_error_msg'] = curl_error($ch);
256
257 dol_syslog("getURLContent response array is ".join(',', $rep));
258 } else {
259 //$info = curl_getinfo($ch);
260
261 // Add keys to $rep
262 $rep = $info;
263 //$rep['header_size']=$info['header_size'];
264 //$rep['http_code']=$info['http_code'];
265 dol_syslog("getURLContent http_code=".$rep['http_code']);
266
267 // Add more keys to $rep
268 if ($response) {
269 $rep['content'] = $response;
270 }
271 $rep['curl_error_no'] = '';
272 $rep['curl_error_msg'] = '';
273 }
274
275 //closing the curl
276 curl_close($ch);
277
278 return $rep;
279}
280
288function isIPAllowed($iptocheck, $localurl)
289{
290 global $conf;
291
292 if ($localurl == 0) { // Only external url allowed (dangerous, may allow to get malware)
293 if (!filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
294 // Deny ips like 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 0.0.0.0/8, 169.254.0.0/16, 127.0.0.0/8 et 240.0.0.0/4, ::1/128, ::/128, ::ffff:0:0/96, fe80::/10...
295 $errormsg = 'Error bad hostname IP (private or reserved range). Must be an external URL.';
296 return $errormsg;
297 }
298 if (!empty($_SERVER["SERVER_ADDR"]) && $iptocheck == $_SERVER["SERVER_ADDR"]) {
299 $errormsg = 'Error bad hostname IP (IP is a local IP). Must be an external URL.';
300 return $errormsg;
301 }
302 if (getDolGlobalString('MAIN_SECURITY_ANTI_SSRF_SERVER_IP') && in_array($iptocheck, explode(',', getDolGlobalString('MAIN_SECURITY_ANTI_SSRF_SERVER_IP')))) {
303 $errormsg = 'Error bad hostname IP (IP is a local IP defined into MAIN_SECURITY_SERVER_IP). Must be an external URL.';
304 return $errormsg;
305 }
306 }
307 if ($localurl == 1) { // Only local url allowed (dangerous, may allow to get metadata on server or make internal port scanning)
308 // Deny ips NOT like 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 0.0.0.0/8, 169.254.0.0/16, 127.0.0.0/8 et 240.0.0.0/4, ::1/128, ::/128, ::ffff:0:0/96, fe80::/10...
309 if (filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
310 $errormsg = 'Error bad hostname '.$iptocheck.'. Must be a local URL.';
311 return $errormsg;
312 }
313 if (getDolGlobalString('MAIN_SECURITY_ANTI_SSRF_SERVER_IP') && !in_array($iptocheck, explode(',', getDolGlobalString('MAIN_SECURITY_ANTI_SSRF_SERVER_IP')))) {
314 $errormsg = 'Error bad hostname IP (IP is not a local IP defined into list MAIN_SECURITY_SERVER_IP). Must be a local URL in allowed list.';
315 return $errormsg;
316 }
317 }
318
319 // Common check on ip (local and external)
320 // See list on https://tagmerge.com/gist/a7b9d57ff8ec11d63642f8778609a0b8
321 // Not evasive url that ar enot IP are excluded by test on IP v4/v6 validity.
322 $arrayofmetadataserver = array(
323 '100.100.100.200' => 'Alibaba',
324 '192.0.0.192' => 'Oracle',
325 '192.80.8.124' => 'Packet',
326 '100.88.222.5' => 'Tencent cloud',
327 );
328 foreach ($arrayofmetadataserver as $ipofmetadataserver => $nameofmetadataserver) {
329 if ($iptocheck == $ipofmetadataserver) {
330 $errormsg = 'Error bad hostname IP (Used by '.$nameofmetadataserver.' metadata server). This IP is forbidden.';
331 return $errormsg;
332 }
333 }
334
335 return '';
336}
337
346function getDomainFromURL($url, $mode = 0)
347{
348 $arrayof2levetopdomain = array(
349 'co.at', 'or.at', 'gv.at',
350 'avocat.fr', 'aeroport.fr', 'veterinaire.fr',
351 'com.ng', 'gov.ng', 'gov.ua', 'com.ua', 'in.ua', 'org.ua', 'edu.ua', 'net.ua',
352 'net.uk', 'org.uk', 'gov.uk', 'co.uk',
353 'com.mx'
354 );
355
356 // Set if tld is on 2 levels
357 $tldon2level = 0;
358 $parts = array_reverse(explode('.', $url));
359 if (!empty($parts[1]) && in_array($parts[1].'.'.$parts[0], $arrayof2levetopdomain)) {
360 $tldon2level = 1;
361 }
362
363 if ($tldon2level && $mode > 0) {
364 $mode++;
365 }
366
367 $tmpdomain = preg_replace('/^https?:\/\//i', '', $url); // Remove http(s)://
368 $tmpdomain = preg_replace('/\/.*$/i', '', $tmpdomain); // Remove part after /
369 if ($mode == 3) {
370 $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)\.([^\.]+)\.([^\.]+)$/', '\1.\2.\3.\4', $tmpdomain);
371 } elseif ($mode == 2) {
372 $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)\.([^\.]+)$/', '\1.\2.\3', $tmpdomain); // Remove part 'www.' before 'abc.mydomain.com'
373 } elseif ($mode == 1) {
374 $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)$/', '\1.\2', $tmpdomain); // Remove part 'www.abc.' before 'mydomain.com'
375 }
376
377 if (empty($mode)) {
378 if ($tldon2level) {
379 $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)\.([^\.]+)$/', '\1.\2.\3', $tmpdomain); // Remove part 'www.abc.' before 'mydomain.com'
380 $tmpdomain = preg_replace('/\.[^\.]+\.[^\.]+$/', '', $tmpdomain); // Remove TLD (.com.mx, .co.uk, ...)
381 } else {
382 $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)$/', '\1.\2', $tmpdomain); // Remove part 'www.abc.' before 'mydomain.com'
383 $tmpdomain = preg_replace('/\.[^\.]+$/', '', $tmpdomain); // Remove TLD (.com, .net, ...)
384 }
385 }
386
387 return $tmpdomain;
388}
389
398function getRootURLFromURL($url)
399{
400 $prefix = '';
401 $tmpurl = $url;
402 $reg = null;
403 if (preg_match('/^(https?:\/\/)/i', $tmpurl, $reg)) {
404 $prefix = $reg[1];
405 }
406 $tmpurl = preg_replace('/^https?:\/\//i', '', $tmpurl); // Remove http(s)://
407 $tmpurl = preg_replace('/\/.*$/i', '', $tmpurl); // Remove part after domain
408
409 return $prefix.$tmpurl;
410}
411
418function removeHtmlComment($content)
419{
420 $content = preg_replace('/<!--[^\-]+-->/', '', $content);
421 return $content;
422}
getDolGlobalInt($key, $default=0)
Return a Dolibarr global constant int value.
getDolGlobalString($key, $default='')
Return dolibarr global constant string value.
dol_syslog($message, $level=LOG_INFO, $ident=0, $suffixinfilename='', $restricttologhandler='', $logcontext=null)
Write log message into outputs.
getDomainFromURL($url, $mode=0)
Function get second level domain name.
isIPAllowed($iptocheck, $localurl)
Is IP allowed.
getRootURLFromURL($url)
Function root url from a long url For example: https://www.abc.mydomain.com/dir/page....
getURLContent($url, $postorget='GET', $param='', $followlocation=1, $addheaders=array(), $allowedschemes=array('http', 'https'), $localurl=0, $ssl_verifypeer=-1)
Function to get a content from an URL (use proxy if proxy defined).
removeHtmlComment($content)
Function to remove comments into HTML content.