?iť?
Current Path : /home/s/c/g/scgforma/www/soctest/htdocs/core/class/ |
Current File : /home/s/c/g/scgforma/www/soctest/htdocs/core/class/rssparser.class.php |
<?php /* Copyright (C) 2011-2012 Laurent Destailleur <eldy@users.sourceforge.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /** * \file htdocs/core/class/rssparser.class.php * \ingroup core * \brief File of class to parse RSS feeds */ /** * Class to parse RSS files */ class RssParser { /** * @var DoliDB Database handler. */ public $db; /** * @var string Error code (or message) */ public $error=''; private $_format=''; private $_urlRSS; private $_language; private $_generator; private $_copyright; private $_lastbuilddate; private $_imageurl; private $_link; private $_title; private $_description; private $_lastfetchdate; // Last successful fetch private $_rssarray=array(); // For parsing with xmlparser public $stack = array(); // parser stack private $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); /** * Constructor * * @param DoliDB $db Database handler */ public function __construct($db) { $this->db = $db; } /** * getFormat * * @return string */ public function getFormat() { return $this->_format; } /** * getUrlRss * * @return string */ public function getUrlRss() { return $this->_urlRSS; } /** * getLanguage * * @return string */ public function getLanguage() { return $this->_language; } /** * getGenerator * * @return string */ public function getGenerator() { return $this->_generator; } /** * getCopyright * * @return string */ public function getCopyright() { return $this->_copyright; } /** * getLastBuildDate * * @return string */ public function getLastBuildDate() { return $this->_lastbuilddate; } /** * getImageUrl * * @return string */ public function getImageUrl() { return $this->_imageurl; } /** * getLink * * @return string */ public function getLink() { return $this->_link; } /** * getTitle * * @return string */ public function getTitle() { return $this->_title; } /** * getDescription * * @return string */ public function getDescription() { return $this->_description; } /** * getLastFetchDate * * @return string */ public function getLastFetchDate() { return $this->_lastfetchdate; } /** * getItems * * @return string */ public function getItems() { return $this->_rssarray; } /** * Parse rss URL * * @param string $urlRSS Url to parse * @param int $maxNb Max nb of records to get (0 for no limit) * @param int $cachedelay 0=No cache, nb of seconds we accept cache files (cachedir must also be defined) * @param string $cachedir Directory where to save cache file * @return int <0 if KO, >0 if OK */ public function parser($urlRSS, $maxNb = 0, $cachedelay = 60, $cachedir = '') { global $conf; include_once DOL_DOCUMENT_ROOT.'/core/lib/files.lib.php'; $rss=''; $str=''; // This will contain content of feed // Check parameters if (! dol_is_url($urlRSS)) { $this->error="ErrorBadUrl"; return -1; } $this->_urlRSS = $urlRSS; $newpathofdestfile=$cachedir.'/'.dol_hash($this->_urlRSS, 3); // Force md5 hash (does not contains special chars) $newmask='0644'; //dol_syslog("RssPArser::parser parse url=".$urlRSS." => cache file=".$newpathofdestfile); $nowgmt = dol_now(); // Search into cache $foundintocache=0; if ($cachedelay > 0 && $cachedir) { $filedate=dol_filemtime($newpathofdestfile); if ($filedate >= ($nowgmt - $cachedelay)) { //dol_syslog("RssParser::parser cache file ".$newpathofdestfile." is not older than now - cachedelay (".$nowgmt." - ".$cachedelay.") so we use it."); $foundintocache=1; $this->_lastfetchdate=$filedate; } else { dol_syslog(get_class($this)."::parser cache file ".$newpathofdestfile." is not found or older than now - cachedelay (".$nowgmt." - ".$cachedelay.") so we can't use it."); } } // Load file into $str if ($foundintocache) // Cache file found and is not too old { $str = file_get_contents($newpathofdestfile); } else { try { ini_set("user_agent", "Dolibarr ERP-CRM RSS reader"); ini_set("max_execution_time", $conf->global->MAIN_USE_RESPONSE_TIMEOUT); ini_set("default_socket_timeout", $conf->global->MAIN_USE_RESPONSE_TIMEOUT); $opts = array('http'=>array('method'=>"GET")); if (! empty($conf->global->MAIN_USE_CONNECT_TIMEOUT)) $opts['http']['timeout']=$conf->global->MAIN_USE_CONNECT_TIMEOUT; if (! empty($conf->global->MAIN_PROXY_USE)) $opts['http']['proxy']='tcp://'.$conf->global->MAIN_PROXY_HOST.':'.$conf->global->MAIN_PROXY_PORT; //var_dump($opts);exit; $context = stream_context_create($opts); $str = file_get_contents($this->_urlRSS, false, $context); } catch (Exception $e) { print 'Error retrieving URL '.$this->_urlRSS.' - '.$e->getMessage(); } } if ($str !== false) { // Convert $str into xml if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) { //print 'xx'.LIBXML_NOCDATA; libxml_use_internal_errors(false); $rss = simplexml_load_string($str, "SimpleXMLElement", LIBXML_NOCDATA); } else { $xmlparser=xml_parser_create(''); if (!is_resource($xmlparser)) { $this->error="ErrorFailedToCreateParser"; return -1; } xml_set_object($xmlparser, $this); xml_set_element_handler($xmlparser, 'feed_start_element', 'feed_end_element'); xml_set_character_data_handler($xmlparser, 'feed_cdata'); $status = xml_parse($xmlparser, $str); xml_parser_free($xmlparser); $rss=$this; //var_dump($rss->_format);exit; } } // If $rss loaded if ($rss) { // Save file into cache if (empty($foundintocache) && $cachedir) { dol_syslog(get_class($this)."::parser cache file ".$newpathofdestfile." is saved onto disk."); if (! dol_is_dir($cachedir)) dol_mkdir($cachedir); $fp = fopen($newpathofdestfile, 'w'); if ($fp) { fwrite($fp, $str); fclose($fp); if (! empty($conf->global->MAIN_UMASK)) $newmask=$conf->global->MAIN_UMASK; @chmod($newpathofdestfile, octdec($newmask)); $this->_lastfetchdate=$nowgmt; } else { print 'Error, failed to open file '.$newpathofdestfile.' for write'; } } unset($str); // Free memory if (empty($rss->_format)) // If format not detected automatically { $rss->_format='rss'; if (empty($rss->channel)) $rss->_format='atom'; } $items=array(); // Save description entries if ($rss->_format == 'rss') { //var_dump($rss); if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) { if (!empty($rss->channel->language)) $this->_language = (string) $rss->channel->language; if (!empty($rss->channel->generator)) $this->_generator = (string) $rss->channel->generator; if (!empty($rss->channel->copyright)) $this->_copyright = (string) $rss->channel->copyright; if (!empty($rss->channel->lastbuilddate)) $this->_lastbuilddate = (string) $rss->channel->lastbuilddate; if (!empty($rss->channel->image->url[0])) $this->_imageurl = (string) $rss->channel->image->url[0]; if (!empty($rss->channel->link)) $this->_link = (string) $rss->channel->link; if (!empty($rss->channel->title)) $this->_title = (string) $rss->channel->title; if (!empty($rss->channel->description)) $this->_description = (string) $rss->channel->description; } else { //var_dump($rss->channel); if (!empty($rss->channel['language'])) $this->_language = (string) $rss->channel['language']; if (!empty($rss->channel['generator'])) $this->_generator = (string) $rss->channel['generator']; if (!empty($rss->channel['copyright'])) $this->_copyright = (string) $rss->channel['copyright']; if (!empty($rss->channel['lastbuilddate'])) $this->_lastbuilddate = (string) $rss->channel['lastbuilddate']; if (!empty($rss->image['url'])) $this->_imageurl = (string) $rss->image['url']; if (!empty($rss->channel['link'])) $this->_link = (string) $rss->channel['link']; if (!empty($rss->channel['title'])) $this->_title = (string) $rss->channel['title']; if (!empty($rss->channel['description'])) $this->_description = (string) $rss->channel['description']; } if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) $items=$rss->channel->item; // With simplexml else $items=$rss->items; // With xmlparse //var_dump($items);exit; } elseif ($rss->_format == 'atom') { //var_dump($rss); if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) { if (!empty($rss->generator)) $this->_generator = (string) $rss->generator; if (!empty($rss->lastbuilddate)) $this->_lastbuilddate = (string) $rss->modified; if (!empty($rss->link->href)) $this->_link = (string) $rss->link->href; if (!empty($rss->title)) $this->_title = (string) $rss->title; if (!empty($rss->description)) $this->_description = (string) $rss->description; } else { //if (!empty($rss->channel['rss_language'])) $this->_language = (string) $rss->channel['rss_language']; if (!empty($rss->channel['generator'])) $this->_generator = (string) $rss->channel['generator']; //if (!empty($rss->channel['rss_copyright'])) $this->_copyright = (string) $rss->channel['rss_copyright']; if (!empty($rss->channel['modified'])) $this->_lastbuilddate = (string) $rss->channel['modified']; //if (!empty($rss->image['rss_url'])) $this->_imageurl = (string) $rss->image['rss_url']; if (!empty($rss->channel['link'])) $this->_link = (string) $rss->channel['link']; if (!empty($rss->channel['title'])) $this->_title = (string) $rss->channel['title']; //if (!empty($rss->channel['rss_description'])) $this->_description = (string) $rss->channel['rss_description']; } if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) { $tmprss=xml2php($rss); $items=$tmprss['entry']; } // With simplexml else $items=$rss->items; // With xmlparse //var_dump($items);exit; } $i = 0; // Loop on each record if (is_array($items)) { foreach($items as $item) { //var_dump($item);exit; if ($rss->_format == 'rss') { if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) { $itemLink = (string) $item->link; $itemTitle = (string) $item->title; $itemDescription = (string) $item->description; $itemPubDate = (string) $item->pubDate; $itemId = ''; $itemAuthor = ''; } else { $itemLink = (string) $item['link']; $itemTitle = (string) $item['title']; $itemDescription = (string) $item['description']; $itemPubDate = (string) $item['pubdate']; $itemId = (string) $item['guid']; $itemAuthor = (string) $item['author']; } // Loop on each category $itemCategory=array(); if (is_array($item->category)) { foreach ($item->category as $cat) { $itemCategory[] = (string) $cat; } } } elseif ($rss->_format == 'atom') { if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) { $itemLink = (isset($item['link']['href']) ? (string) $item['link']['href'] : ''); $itemTitle = (string) $item['title']; $itemDescription = (string) $item['summary']; $itemPubDate = (string) $item['created']; $itemId = (string) $item['id']; $itemAuthor = (string) ($item['author']?$item['author']:$item['author_name']); } else { $itemLink = (isset($item['link']['href']) ? (string) $item['link']['href'] : ''); $itemTitle = (string) $item['title']; $itemDescription = (string) $item['summary']; $itemPubDate = (string) $item['created']; $itemId = (string) $item['id']; $itemAuthor = (string) ($item['author']?$item['author']:$item['author_name']); } } else print 'ErrorBadFeedFormat'; // Add record to result array $this->_rssarray[$i] = array( 'link'=>$itemLink, 'title'=>$itemTitle, 'description'=>$itemDescription, 'pubDate'=>$itemPubDate, 'category'=>$itemCategory, 'id'=>$itemId, 'author'=>$itemAuthor); //var_dump($this->_rssarray); $i++; if ($i > $maxNb) break; // We get all records we want } } return 1; } else { $this->error='ErrorFailedToLoadRSSFile'; return -1; } } // phpcs:disable PEAR.NamingConventions.ValidFunctionName.ScopeNotCamelCaps /** * Triggered when opened tag is found * * @param string $p Start * @param string $element Tag * @param array $attrs Attributes of tags * @return void */ public function feed_start_element($p, $element, &$attrs) { // phpcs:enable $el = $element = strtolower($element); $attrs = array_change_key_case($attrs, CASE_LOWER); // check for a namespace, and split if found $ns = false; if (strpos($element, ':')) { list($ns, $el) = explode(':', $element, 2); } if ( $ns and $ns != 'rdf' ) { $this->current_namespace = $ns; } // if feed type isn't set, then this is first element of feed identify feed from root element if (empty($this->_format)) { if ( $el == 'rdf' ) { $this->_format = 'rss'; $this->feed_version = '1.0'; } elseif ( $el == 'rss' ) { $this->_format = 'rss'; $this->feed_version = $attrs['version']; } elseif ( $el == 'feed' ) { $this->_format = 'atom'; $this->feed_version = $attrs['version']; $this->inchannel = true; } return; } if ( $el == 'channel' ) { $this->inchannel = true; } elseif ($el == 'item' or $el == 'entry' ) { $this->initem = true; if ( isset($attrs['rdf:about']) ) { $this->current_item['about'] = $attrs['rdf:about']; } } // if we're in the default namespace of an RSS feed, // record textinput or image fields elseif ( $this->_format == 'rss' and $this->current_namespace == '' and $el == 'textinput' ) { $this->intextinput = true; } elseif ( $this->_format == 'rss' and $this->current_namespace == '' and $el == 'image' ) { $this->inimage = true; } // handle atom content constructs elseif ( $this->_format == 'atom' and in_array($el, $this->_CONTENT_CONSTRUCTS) ) { // avoid clashing w/ RSS mod_content if ($el == 'content' ) { $el = 'atom_content'; } $this->incontent = $el; } // if inside an Atom content construct (e.g. content or summary) field treat tags as text elseif ($this->_format == 'atom' and $this->incontent ) { // if tags are inlined, then flatten $attrs_str = join(' ', array_map('map_attrs', array_keys($attrs), array_values($attrs))); $this->append_content("<$element $attrs_str>"); array_unshift($this->stack, $el); } // Atom support many links per containging element. // Magpie treats link elements of type rel='alternate' // as being equivalent to RSS's simple link element. // elseif ($this->_format == 'atom' and $el == 'link' ) { if ( isset($attrs['rel']) && $attrs['rel'] == 'alternate' ) { $link_el = 'link'; } else { $link_el = 'link_' . $attrs['rel']; } $this->append($link_el, $attrs['href']); } // set stack[0] to current element else { array_unshift($this->stack, $el); } } // phpcs:disable PEAR.NamingConventions.ValidFunctionName.ScopeNotCamelCaps /** * Triggered when CDATA is found * * @param string $p P * @param string $text Tag * @return void */ public function feed_cdata($p, $text) { // phpcs:enable if ($this->_format == 'atom' and $this->incontent) { $this->append_content($text); } else { $current_el = join('_', array_reverse($this->stack)); $this->append($current_el, $text); } } // phpcs:disable PEAR.NamingConventions.ValidFunctionName.ScopeNotCamelCaps /** * Triggered when closed tag is found * * @param string $p P * @param string $el Tag * @return void */ public function feed_end_element($p, $el) { // phpcs:enable $el = strtolower($el); if ($el == 'item' or $el == 'entry') { $this->items[] = $this->current_item; $this->current_item = array(); $this->initem = false; } elseif ($this->_format == 'rss' and $this->current_namespace == '' and $el == 'textinput' ) { $this->intextinput = false; } elseif ($this->_format == 'rss' and $this->current_namespace == '' and $el == 'image' ) { $this->inimage = false; } elseif ($this->_format == 'atom' and in_array($el, $this->_CONTENT_CONSTRUCTS) ) { $this->incontent = false; } elseif ($el == 'channel' or $el == 'feed' ) { $this->inchannel = false; } elseif ($this->_format == 'atom' and $this->incontent ) { // balance tags properly // note: i don't think this is actually neccessary if ( $this->stack[0] == $el ) { $this->append_content("</$el>"); } else { $this->append_content("<$el />"); } array_shift($this->stack); } else { array_shift($this->stack); } $this->current_namespace = false; } /** * To concat 2 string with no warning if an operand is not defined * * @param string $str1 Str1 * @param string $str2 Str2 * @return string String cancatenated */ public function concat(&$str1, $str2 = "") { if (!isset($str1) ) { $str1=""; } $str1 .= $str2; } // phpcs:disable PEAR.NamingConventions.ValidFunctionName.ScopeNotCamelCaps /** * Enter description here ... * * @param string $text Text * @return void */ public function append_content($text) { // phpcs:enable if ( $this->initem ) { $this->concat($this->current_item[ $this->incontent ], $text); } elseif ( $this->inchannel ) { $this->concat($this->channel[ $this->incontent ], $text); } } /** * smart append - field and namespace aware * * @param string $el El * @param string $text Text * @return void */ public function append($el, $text) { if (!$el) { return; } if ( $this->current_namespace ) { if ( $this->initem ) { $this->concat($this->current_item[ $this->current_namespace ][ $el ], $text); } elseif ($this->inchannel) { $this->concat($this->channel[ $this->current_namespace][ $el ], $text); } elseif ($this->intextinput) { $this->concat($this->textinput[ $this->current_namespace][ $el ], $text); } elseif ($this->inimage) { $this->concat($this->image[ $this->current_namespace ][ $el ], $text); } } else { if ( $this->initem ) { $this->concat($this->current_item[ $el ], $text); } elseif ($this->intextinput) { $this->concat($this->textinput[ $el ], $text); } elseif ($this->inimage) { $this->concat($this->image[ $el ], $text); } elseif ($this->inchannel) { $this->concat($this->channel[ $el ], $text); } } } } /** * Function to convert an XML object into an array * * @param SimpleXMLElement $xml Xml * @return void */ function xml2php($xml) { $fils = 0; $tab = false; $array = array(); foreach($xml->children() as $key => $value) { $child = xml2php($value); //To deal with the attributes foreach($value->attributes() as $ak=>$av) { $child[$ak] = (string) $av; } //Let see if the new child is not in the array if ($tab === false && in_array($key, array_keys($array))) { //If this element is already in the array we will create an indexed array $tmp = $array[$key]; $array[$key] = null; $array[$key][] = $tmp; $array[$key][] = $child; $tab = true; } elseif($tab === true) { //Add an element in an existing array $array[$key][] = $child; } else { //Add a simple element $array[$key] = $child; } $fils++; } if ($fils==0) { return (string) $xml; } return $array; }