283 public function parser($urlRSS, $maxNb = 0, $cachedelay = 60, $cachedir =
'')
285 include_once DOL_DOCUMENT_ROOT.
'/core/lib/files.lib.php';
286 include_once DOL_DOCUMENT_ROOT.
'/core/lib/geturl.lib.php';
293 $this->error =
"ErrorBadUrl";
297 $this->_urlRSS = $urlRSS;
298 $newpathofdestfile = $cachedir.
'/'.
dol_hash($this->_urlRSS,
'3');
306 if ($cachedelay > 0 && $cachedir) {
308 if ($filedate >= ($nowgmt - $cachedelay)) {
312 $this->_lastfetchdate = $filedate;
314 dol_syslog(get_class($this).
"::parser cache file ".$newpathofdestfile.
" is not found or older than now - cachedelay (".$nowgmt.
" - ".$cachedelay.
") so we can't use it.");
319 if ($foundintocache) {
320 $str = file_get_contents($newpathofdestfile);
323 $result =
getURLContent($this->_urlRSS,
'GET',
'', 1, array(), array(
'http',
'https'), 0);
325 if (!empty($result[
'content'])) {
326 $str = $result[
'content'];
327 } elseif (!empty($result[
'curl_error_msg'])) {
328 $this->error =
'Error retrieving URL '.$this->_urlRSS.
' - '.$result[
'curl_error_msg'];
332 $this->error =
'Error retrieving URL '.$this->_urlRSS.
' - '.$e->getMessage();
337 if ($str !==
false) {
341 libxml_use_internal_errors(
false);
342 if (LIBXML_VERSION < 20900) {
346 libxml_disable_entity_loader(
true);
349 $rss = simplexml_load_string($str,
"SimpleXMLElement", LIBXML_NOCDATA);
351 if (!function_exists(
'xml_parser_create')) {
352 $this->error =
'Function xml_parser_create are not supported by your PHP';
358 $xmlparser = xml_parser_create(
null);
360 xml_parser_set_option($xmlparser, XML_OPTION_CASE_FOLDING, 0);
361 xml_parser_set_option($xmlparser, XML_OPTION_SKIP_WHITE, 1);
362 xml_parser_set_option($xmlparser, XML_OPTION_TARGET_ENCODING,
"UTF-8");
365 if (!is_resource($xmlparser) && !is_object($xmlparser)) {
366 $this->error =
"ErrorFailedToCreateParser";
371 xml_set_object($xmlparser, $this);
373 xml_set_element_handler($xmlparser,
'feed_start_element',
'feed_end_element');
375 xml_set_character_data_handler($xmlparser,
'feed_cdata');
377 $status = xml_parse($xmlparser, $str,
false);
379 xml_parser_free($xmlparser);
392 if (empty($foundintocache) && $cachedir) {
393 dol_syslog(get_class($this).
"::parser cache file ".$newpathofdestfile.
" is saved onto disk.");
397 $fp = fopen($newpathofdestfile,
'w');
403 $this->_lastfetchdate = $nowgmt;
405 print
'Error, failed to open file '.$newpathofdestfile.
' for write';
411 if (empty($rss->_format)) {
412 $rss->_format =
'rss';
413 if (empty($rss->channel)) {
414 $rss->_format =
'atom';
421 if ($rss->_format ==
'rss') {
424 if (!empty($rss->channel->language)) {
425 $this->_language =
sanitizeVal((
string) $rss->channel->language);
427 if (!empty($rss->channel->generator)) {
428 $this->_generator =
sanitizeVal((
string) $rss->channel->generator);
430 if (!empty($rss->channel->copyright)) {
431 $this->_copyright =
sanitizeVal((
string) $rss->channel->copyright);
433 if (!empty($rss->channel->lastbuilddate)) {
434 $this->_lastbuilddate =
sanitizeVal((
string) $rss->channel->lastbuilddate);
436 if (!empty($rss->channel->image->url[0])) {
437 $this->_imageurl =
sanitizeVal((
string) $rss->channel->image->url[0]);
439 if (!empty($rss->channel->link)) {
440 $this->_link =
sanitizeVal((
string) $rss->channel->link);
442 if (!empty($rss->channel->title)) {
443 $this->_title =
sanitizeVal((
string) $rss->channel->title);
445 if (!empty($rss->channel->description)) {
446 $this->_description =
sanitizeVal((
string) $rss->channel->description);
450 if (!empty($rss->channel[
'language'])) {
451 $this->_language =
sanitizeVal((
string) $rss->channel[
'language']);
453 if (!empty($rss->channel[
'generator'])) {
454 $this->_generator =
sanitizeVal((
string) $rss->channel[
'generator']);
456 if (!empty($rss->channel[
'copyright'])) {
457 $this->_copyright =
sanitizeVal((
string) $rss->channel[
'copyright']);
459 if (!empty($rss->channel[
'lastbuilddate'])) {
460 $this->_lastbuilddate =
sanitizeVal((
string) $rss->channel[
'lastbuilddate']);
462 if (!empty($rss->image[
'url'])) {
463 $this->_imageurl =
sanitizeVal((
string) $rss->image[
'url']);
465 if (!empty($rss->channel[
'link'])) {
466 $this->_link =
sanitizeVal((
string) $rss->channel[
'link']);
468 if (!empty($rss->channel[
'title'])) {
469 $this->_title =
sanitizeVal((
string) $rss->channel[
'title']);
471 if (!empty($rss->channel[
'description'])) {
472 $this->_description =
sanitizeVal((
string) $rss->channel[
'description']);
477 $items = $rss->channel->item;
479 $items = $rss->items;
482 } elseif ($rss->_format ==
'atom') {
485 if (!empty($rss->generator)) {
486 $this->_generator =
sanitizeVal((
string) $rss->generator);
488 if (!empty($rss->lastbuilddate)) {
489 $this->_lastbuilddate =
sanitizeVal((
string) $rss->modified);
491 if (!empty($rss->link->href)) {
492 $this->_link =
sanitizeVal((
string) $rss->link->href);
494 if (!empty($rss->title)) {
497 if (!empty($rss->description)) {
498 $this->_description =
sanitizeVal((
string) $rss->description);
502 if (!empty($rss->channel[
'generator'])) {
503 $this->_generator =
sanitizeVal((
string) $rss->channel[
'generator']);
506 if (!empty($rss->channel[
'modified'])) {
507 $this->_lastbuilddate =
sanitizeVal((
string) $rss->channel[
'modified']);
510 if (!empty($rss->channel[
'link'])) {
511 $this->_link =
sanitizeVal((
string) $rss->channel[
'link']);
513 if (!empty($rss->channel[
'title'])) {
514 $this->_title =
sanitizeVal((
string) $rss->channel[
'title']);
518 if (!empty($rss->channel)) {
524 $items = $tmprss[
'entry'];
527 $items = $rss->items;
535 if (is_array($items)) {
536 foreach ($items as $item) {
538 if ($rss->_format ==
'rss') {
542 $itemDescription =
sanitizeVal((
string) $item->description);
543 $itemPubDate =
sanitizeVal((
string) $item->pubDate);
549 $itemDescription =
sanitizeVal((
string) $item[
'description']);
550 $itemPubDate =
sanitizeVal((
string) $item[
'pubdate']);
552 $itemAuthor =
sanitizeVal((
string) ($item[
'author'] ??
''));
556 $itemCategory = array();
557 if (!empty($item->category) && is_array($item->category)) {
558 foreach ($item->category as $cat) {
559 $itemCategory[] = (
string) $cat;
562 } elseif ($rss->_format ==
'atom') {
563 $itemLink = (isset($item[
'link']) ?
sanitizeVal((
string) $item[
'link']) :
'');
566 $itemPubDate =
sanitizeVal((
string) $item[
'created']);
568 $itemAuthor =
sanitizeVal((
string) ($item[
'author'] ? $item[
'author'] : $item[
'author_name']));
569 $itemCategory = array();
573 $itemDescription =
'';
577 $itemCategory = array();
578 print
'ErrorBadFeedFormat';
582 $this->_rssarray[$i] = array(
584 'title' => $itemTitle,
585 'description' => $itemDescription,
586 'pubDate' => $itemPubDate,
587 'category' => $itemCategory,
589 'author' => $itemAuthor
603 $this->error =
'ErrorFailedToLoadRSSFile';
622 $el = $element = strtolower($element);
623 $attrs = array_change_key_case($attrs, CASE_LOWER);
627 if (strpos($element,
':')) {
628 list($ns, $el) = explode(
':', $element, 2);
630 if ($ns and $ns !=
'rdf') {
631 $this->current_namespace = $ns;
635 if (empty($this->_format)) {
637 $this->_format =
'rss';
638 $this->feed_version =
'1.0';
639 } elseif ($el ==
'rss') {
640 $this->_format =
'rss';
641 $this->feed_version = $attrs[
'version'];
642 } elseif ($el ==
'feed') {
643 $this->_format =
'atom';
644 $this->feed_version = $attrs[
'version'];
645 $this->inchannel =
true;
650 if ($el ==
'channel') {
651 $this->inchannel =
true;
652 } elseif ($el ==
'item' || $el ==
'entry') {
653 $this->initem =
true;
654 if (isset($attrs[
'rdf:about'])) {
655 $this->current_item[
'about'] = $attrs[
'rdf:about'];
657 } elseif ($this->_format ==
'rss' && $this->current_namespace ==
'' && $el ==
'textinput') {
660 $this->intextinput =
true;
661 } elseif ($this->_format ==
'rss' && $this->current_namespace ==
'' && $el ==
'image') {
662 $this->inimage =
true;
663 } elseif ($this->_format ==
'atom' && in_array($el, $this->_CONTENT_CONSTRUCTS)) {
666 if ($el ==
'content') {
667 $el =
'atom_content';
670 $this->incontent = $el;
671 } elseif ($this->_format ==
'atom' && $this->incontent) {
674 $attrs_str = implode(
' ', array_map(
'rss_map_attrs', array_keys($attrs), array_values($attrs)));
678 array_unshift($this->stack, $el);
679 } elseif ($this->_format ==
'atom' && $el ==
'link') {
683 if (isset($attrs[
'rel']) && $attrs[
'rel'] ==
'alternate') {
685 } elseif (!isset($attrs[
'rel'])) {
688 $link_el =
'link_'.$attrs[
'rel'];
691 $this->
append($link_el, $attrs[
'href']);
694 array_unshift($this->stack, $el);
729 $el = strtolower($el);
731 if ($el ==
'item' or $el ==
'entry') {
732 $this->items[] = $this->current_item;
733 $this->current_item = array();
734 $this->initem =
false;
735 } elseif ($this->_format ==
'rss' and $this->current_namespace ==
'' and $el ==
'textinput') {
736 $this->intextinput =
false;
737 } elseif ($this->_format ==
'rss' and $this->current_namespace ==
'' and $el ==
'image') {
738 $this->inimage =
false;
739 } elseif ($this->_format ==
'atom' and in_array($el, $this->_CONTENT_CONSTRUCTS)) {
740 $this->incontent =
false;
741 } elseif ($el ==
'channel' or $el ==
'feed') {
742 $this->inchannel =
false;
743 } elseif ($this->_format ==
'atom' and $this->incontent) {
746 if ($this->stack[0] == $el) {
752 array_shift($this->stack);
754 array_shift($this->stack);
757 $this->current_namespace =
false;
806 if (!empty($this->current_namespace)) {
807 if (!empty($this->initem)) {
808 $this->
concat($this->current_item[$this->current_namespace][$el], $text);
809 } elseif (!empty($this->inchannel)) {
810 $this->
concat($this->channel[$this->current_namespace][$el], $text);
811 } elseif (!empty($this->intextinput)) {
812 $this->
concat($this->textinput[$this->current_namespace][$el], $text);
813 } elseif (!empty($this->inimage)) {
814 $this->
concat($this->image[$this->current_namespace][$el], $text);
817 if (!empty($this->initem)) {
819 $this->
concat($this->current_item[$el], $text);
820 } elseif (!empty($this->intextinput)) {
822 $this->
concat($this->textinput[$el], $text);
823 } elseif (!empty($this->inimage)) {
825 $this->
concat($this->image[$el], $text);
826 } elseif (!empty($this->inchannel)) {
827 $this->
concat($this->channel[$el], $text);