php Sunra-PhpSimple-HtmlDomParser类(方法)实例源码

下面列出了php Sunra-PhpSimple-HtmlDomParser 类(方法)源码代码实例,从而了解它的用法。

作者:hiss    项目:concrete5-am   
public static function content(ContentBlockController $controller)
 {
     $content = $controller->getSearchableContent();
     $dom = new HtmlDomParser();
     $r = $dom->str_get_html($content, true, true, DEFAULT_TARGET_CHARSET, false);
     if (is_object($r)) {
         foreach ($r->find('concrete-picture') as $picture) {
             $fID = $picture->fid;
             $fo = File::getByID($fID);
             if (is_object($fo)) {
                 $tag = new AmpImg($fo);
                 $tag->alt($picture->alt);
                 $picture->outertext = (string) $tag;
             }
         }
         foreach ($r->find('img') as $img) {
             $tag = new Element('amp-img');
             $tag->alt($img->alt);
             $tag->src($img->src);
             $tag->height($img->height);
             $tag->width($img->width);
             $img->outertext = (string) $tag;
         }
         foreach ($r->find('*[style]') as $element) {
             $element->removeAttribute('style');
         }
         $content = (string) $r->restore_noise($r);
     }
     $content = LinkAbstractor::translateFrom($content);
     return $content;
 }

作者:cek    项目:concrete5-   
public static function fromHtml($html)
 {
     $dom = new HtmlDomParser();
     $r = $dom->str_get_html($html);
     $nodes = $r->childNodes();
     $node = $nodes[0];
     $element = new Element($node->tag);
     $element->class($node->class);
     $column = new static($element);
     return $column;
 }

作者:digideski    项目:concrete   
public static function fromHtml($html)
 {
     $dom = new HtmlDomParser();
     $r = $dom->str_get_html($html);
     $nodes = $r->childNodes();
     $node = $nodes[0];
     $element = new Element($node->tag);
     foreach ($node->getAllAttributes() as $key => $value) {
         $element->setAttribute($key, $value);
     }
     $column = new static($element);
     return $column;
 }

作者:hdk001    项目:concrete5-   
public function getLayoutContainerHtmlObject()
 {
     $gf = $this->layout->getThemeGridFrameworkObject();
     if (is_object($gf)) {
         $dom = new HtmlDomParser();
         $r = $dom->str_get_html($gf->getPageThemeGridFrameworkRowStartHTML() . $gf->getPageThemeGridFrameworkRowEndHTML());
         $nodes = $r->childNodes();
         $node = $nodes[0];
         $element = new Element($node->tag);
         $element->id($node->id);
         $element->class($node->class);
         return $element;
     }
 }

作者:KartikPadmanabha    项目:markdown-resum   
protected function execute(InputInterface $input, OutputInterface $output)
 {
     $this->app = $this->getApplication();
     $source = $input->getArgument('source');
     $destination = rtrim($input->getArgument('destination'), DIRECTORY_SEPARATOR);
     $template = $input->getOption('template');
     $pdfSource = join(DIRECTORY_SEPARATOR, array($destination, '.tmp_pdf_source.html'));
     $destFilename = join(DIRECTORY_SEPARATOR, array($destination, pathinfo($source, PATHINFO_FILENAME) . '.pdf'));
     // Make sure we've got out converter available
     exec('wkhtmltopdf -V', $results, $returnVal);
     if ($returnVal) {
         $output->writeln("\n<error>Error:</error> Unable to locate wkhtmltopdf.\n" . "  Please make sure that it is installed and available in " . "your path. \n  For installation help, please read: " . "https://github.com/pdfkit/pdfkit/wiki/Installing-WKHTMLTOPDF \n\n", $this->app->outputFormat);
         return false;
     }
     $rendered = $this->generateHtml($source, $template, false);
     // The pdf needs some extra css rules, and so we'll add them here
     // to our html document
     $simpleDom = HtmlDomParser::str_get_html($rendered);
     $body = $simpleDom->find('body', 0);
     $body->class = $body->class . ' pdf';
     $rendered = (string) $simpleDom;
     // Save to a temp destination for the pdf renderer to use
     file_put_contents($pdfSource, $rendered);
     // Process the document with wkhtmltopdf
     exec('wkhtmltopdf ' . $pdfSource . ' ' . $destFilename);
     // Unlink the temporary file
     unlink($pdfSource);
     $output->writeln(sprintf("Wrote pdf resume to: <info>%s</info>", $destFilename), $this->app->outputFormat);
     return true;
 }

作者:AgenceStrati    项目:migrato   
/**
  * @param string $text
  * @param string $selector
  * @return array
  */
 public static function exec($text, $selector)
 {
     $document = HtmlDomParser::str_get_html($text);
     return array_map(function ($element) {
         return $element->innertext;
     }, $document->find($selector));
 }

作者:Nemanja    项目:meteoalar   
public function doImport()
 {
     $this->cachePath .= date("Y-m-d") . '/';
     if (is_readable($this->cachePath) === false) {
         mkdir($this->cachePath, 0755, true);
     }
     $this->cacheFile = md5($this->sepaUrl . $this->sepaPage . '-' . date("Y-m-d")) . '.dat';
     $this->client = new Client(['base_uri' => $this->sepaUrl, 'timeout' => 5.0]);
     if (file_exists($this->cachePath . $this->cacheFile) === false) {
         $response = $this->client->get($this->sepaPage);
         if ($response->getStatusCode() > 204) {
             echo "Can't fetch list \n";
             exit(1);
         }
         $content = $response->getBody();
         file_put_contents($this->cachePath . $this->cacheFile, $content);
     } else {
         $content = file_get_contents($this->cachePath . $this->cacheFile);
     }
     $components = [];
     $html = HtmlDomParser::str_get_html($content);
     foreach ($html->find('label[class=komponentelabela]') as $label) {
         if (count($label->children())) {
             $sepaId = $label->children(0)->attr['value'];
             $name = $label->children(1)->plaintext;
             $component = ['sepa_id' => intval($sepaId), 'name' => trim($name)];
             array_push($components, $component);
         }
     }
     return $components;
 }

作者:jinseoko    项目:as   
private function aggregatePromoDataFromWeb()
 {
     $response = $this->getPage('http://auto.danawa.com/newcar/?Work=sales');
     $html = HtmlDomParser::str_get_html($response);
     $elements = $html->find('div.salesCont');
     $arrData = [];
     foreach ($elements as $element) {
         $brandName = $element->prev_sibling()->children(0)->children(0)->alt;
         $subElements = $element->find('dl.base dt');
         $arrModels = [];
         foreach ($subElements as $subElement) {
             $model = str_replace(['올 뉴 ', '더 뉴 ', '신형 ', 'All New ', 'All-New ', 'The New ', 'New '], '', $subElement->children(0)->plaintext);
             $origString = str_replace(['&nbsp;', '①', '②', '③', '④', '⑤', '⑥', '⑦', '⑧'], '', $subElement->next_sibling()->children(0)->innertext);
             $tempString = str_replace([', <span', '.+ ', '정용', '지급 시', '무이자할부'], ['|<span', ' + ', '적용', '지급시', '무이자 할부'], $origString);
             $items = array_map('trim', array_map('strip_tags', explode('|', $tempString)));
             if (array_key_exists($model, $arrModels)) {
                 foreach ($items as $item) {
                     array_push($arrModels[$model], $item);
                 }
             } else {
                 $arrModels[$model] = $items;
             }
         }
         $arrData[$brandName] = $arrModels;
     }
     return $arrData;
 }

作者:KartikPadmanabha    项目:markdown-resum   
protected function generateHtml($source, $template, $refresh)
 {
     // Check that the source file is sane
     if (!file_exists($source)) {
         throw new \Exception("Unable to open source file: {$source}");
     }
     // Check that our template is sane, or set to the default one
     if (!$template) {
         $template = $this->app->defaultTemplate;
     }
     $templatePath = join(DIRECTORY_SEPARATOR, array($this->app->templatePath, basename($template)));
     $templateIndexPath = join(DIRECTORY_SEPARATOR, array($templatePath, 'index.html'));
     if (!file_exists($templateIndexPath)) {
         throw new \Exception("Unable to open template file: {$templateIndexPath}");
     }
     $style = $this->generateContent($templatePath, 'css');
     $links = $this->generateContent($templatePath, 'links');
     $templateContent = file_get_contents($templateIndexPath);
     $resumeContent = file_get_contents($source);
     // Process with Markdown, and then use SmartyPants to clean up punctuation.
     $resumeHtml = MarkdownExtra::defaultTransform($resumeContent);
     $resumeHtml = SmartyPants::defaultTransform($resumeHtml);
     // Construct the title for the html document from the h1 and h2 tags
     $simpleDom = HtmlDomParser::str_get_html($resumeHtml);
     $title = sprintf('%s | %s', $simpleDom->find('h1', 0)->innertext, $simpleDom->find('h2', 0)->innertext);
     // Render the Markdown into an html file with Mustache Templates
     $m = new \Mustache_Engine();
     $rendered = $m->render($templateContent, array('title' => $title, 'style' => $style, 'links' => $links, 'resume' => $resumeHtml, 'reload' => (bool) $refresh, 'refresh_rate' => $refresh));
     return $rendered;
 }

作者:Nemanja    项目:meteoalar   
public function doImport()
 {
     if (file_exists($this->cachePath . $this->cacheFile) === false) {
         $response = $this->client->get($this->sepaPage);
         if ($response->getStatusCode() > 204) {
             echo "Can't fetch list \n";
             exit(1);
         }
         $content = $response->getBody();
         file_put_contents($this->cachePath . $this->cacheFile, $content);
     } else {
         $content = file_get_contents($this->cachePath . $this->cacheFile);
     }
     $nodes = [];
     $html = HtmlDomParser::str_get_html($content);
     foreach ($html->find('tr') as $row) {
         if (count($row->children()) === 7) {
             $rowId = $row->children(0)->plaintext;
             $code = $row->children(1)->plaintext;
             $name = trim($row->children(3)->plaintext);
             $network = $row->children(4)->plaintext;
             $type = $row->children(5)->plaintext;
             //get sepa cms ID
             preg_match("/stanica=([\\d]+)/", $row->children(3)->innertext, $matches);
             if (isset($matches[1]) === true && is_numeric($matches[1]) && intval($matches[1]) > 0) {
                 $sepaId = $matches[1];
                 $node = ['eoi_code' => trim($code), 'name' => trim($name), 'network' => trim($network), 'type' => trim($type), 'sepa_id' => intval($sepaId)];
                 array_push($nodes, $node);
             }
         }
     }
     return $nodes;
 }

作者:jinseoko    项目:as   
private function aggregateTrimData($id, $brand, $model)
 {
     $url = 'http://m.auto.danawa.com/auto/?Work=model&Model=' . $id;
     $response = $this->getPage($url);
     $html = HtmlDomParser::str_get_html($response);
     $elements = $html->find('ul.modelPrice');
     for ($i = 0; $i < count($elements); $i++) {
         $lists = $elements[$i]->find('li');
         foreach ($lists as $item) {
             $string = trim($item->innertext);
             preg_match('/.*?<!--(.*?)-->.*/msU', $string, $comments);
             $words = preg_replace('/\\s+/', '', trim($comments[1]));
             preg_match('/<div>([^<]+)<\\/div><div>([^<]+)<\\/div><div>([^<]+)<\\/div>/', $words, $tags);
             $fuel = $tags[1];
             $engine = $tags[2];
             $mileage = $tags[3];
             $trim1 = trim($item->find('div.name span label', 0)->plaintext);
             $trim2 = str_replace('&nbsp;', '', $trim1);
             $trim3 = str_replace(['럭셔리', '프리미엄', '인승'], ['Luxury', 'Premium', '-seater'], $trim2);
             $trim4 = str_replace(['디젤', 'Diessel'], 'Diesel', $trim3);
             $trim5 = preg_replace('/[^\\x20-\\x7e]/', '', $trim4);
             $trim6 = str_replace(['( )', '()'], '', $trim5);
             $trim7 = str_replace(['(Without 360 Camera)', '(Without 360 camera/Sunroof'], ['Without 360 Camera', 'Without 360 Camera/Sunroof'], $trim6);
             $trim8 = str_replace(['LIMITED', 'LIMTED', 'hybrid', 'convertible', ' roof', 'EXECUTIVE', 'SUPREME', 'F-SPORT', 'F SPORT', 'F Sport', 'pkg', 'road', 'style', 'w/'], ['Limited', 'Limited', 'Hybrid', 'Convertible', ' Roof', 'Executive', 'Supreme', 'F-Sport', 'F-Sport', 'F-Sport', 'PKG', 'Road', 'Style', 'With '], $trim7);
             $trim9 = preg_replace('/\\s+/', ' ', $trim8);
             $trim = str_replace(['3.8 AWD & (A/T)'], ['3.8 AWD (A/T)'], $trim9);
             $price = intval(str_replace(',', '', trim($item->find('div.price span', 0)->plaintext)));
             $vehicle = Trim::updateOrCreate(['danawa_id' => $id, 'brand' => $brand, 'model' => $model, 'trim' => $trim, 'fuel' => $fuel, 'engine' => $engine, 'mileage' => $mileage, 'price' => $price]);
         }
     }
 }

作者:sebastien-fauve    项目:Amazon-Mws-Repricin   
public function getSellerName($sellerId, $marketPlace = "A1PA6795UKMFR9")
 {
     $url = "http://www.amazon.de/gp/aag/main?ie=UTF8&marketplaceID=" . $marketPlace . "&orderID=&seller=" . $sellerId;
     //echo $url;
     $this->lastUrl = $url;
     $html = file_get_contents($url);
     $dom = HtmlDomParser::str_get_html($html);
     $sellerName = null;
     $rightColumn = $dom->find('.amabot_right', 0);
     $UlElement = $rightColumn->find('ul.aagLegalData', 0);
     if ($UlElement != null) {
         $LiElement = $UlElement->find('li.aagLegalRow', 0);
         if ($LiElement != null) {
             $LiElement->children(0)->innertext = "";
             $sellerName = $LiElement->plaintext;
         }
     } else {
         $pElement = $rightColumn->find('p', 0);
         $children = $pElement->children(0);
         if ($children) {
             $children->innertext = "";
         }
         $sellerName = $pElement->plaintext;
     }
     return $sellerName;
 }

作者:undowndin    项目:cnBeta   
public function loadArticle()
 {
     $this->html = file_get_contents(cbURL($this->id));
     $dom = HtmlDomParser::str_get_html($this->html);
     $this->article['id'] = $this->id;
     $titleNode = $dom->find('h2#news_title', 0);
     if (!$titleNode) {
         throw new ErrorException('文章找不到了...');
     }
     $this->article['title'] = $titleNode->plaintext;
     $this->article['date'] = $dom->find('div.title_bar span.date', 0)->plaintext;
     $this->article['source'] = trim($dom->find('div.title_bar span.where', 0)->plaintext);
     $this->article['source'] = str_replace('稿源:', '', $this->article['source']);
     $sourceLinkNode = $dom->find('div.title_bar span.where a', 0);
     if ($sourceLinkNode) {
         $this->article['sourceLink'] = $dom->find('div.title_bar span.where a', 0)->href;
     }
     $this->article['intro'] = trim($dom->find('div.introduction', 0)->plaintext);
     $topicURL = $dom->find('div.introduction div a', 0)->href;
     preg_match('/topics\\/(\\d+)\\.htm/', $topicURL, $matches);
     $this->article['topicId'] = (int) $matches[1];
     $this->article['topicTitle'] = $dom->find('div.introduction div a img', 0)->title;
     $this->article['topicImage'] = $dom->find('div.introduction div a img', 0)->src;
     $content = $dom->find('section.article_content div.content', 0)->innertext;
     $content = String::tidy($content);
     $this->article['content'] = str_replace(' class="f_center"', '', $content);
     $this->article['author'] = trim($dom->find('span.author', 0)->plaintext, "[] ");
 }

作者:Sywooc    项目:find-parse   
public static function getPrice($url)
 {
     $parser = new HtmlDomParser();
     $dom = $parser->file_get_html($url);
     $price = $dom->find('div.pricelabel strong')[0]->plaintext;
     unset($dom);
     if (isset($price) && !empty($price)) {
         preg_match_all("/(\\d+)/", str_replace(" ", "", $price), $price);
         if (isset($price[0]) && !empty($price[0])) {
             return $price[0];
         } else {
             return "0";
         }
     } else {
         return "0";
     }
 }

作者:cek    项目:concrete5-   
public function getPresetContainerHtmlObject()
 {
     $dom = new HtmlDomParser();
     $r = $dom->str_get_html($this->arrayPreset['container']);
     if (is_object($r)) {
         $nodes = $r->childNodes();
         $node = $nodes[0];
         if (is_object($node)) {
             $element = new Element($node->tag);
             $element->class($node->class);
         }
     }
     if (!isset($element)) {
         $element = new Element('div');
     }
     return $element;
 }

作者:pseci    项目:versionsca   
/**
  * Execute the "missing" command
  *
  * @param  InputInterface $input Input object
  * @param  OutputInterface $output Output object
  */
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $this->verbose = $input->getOption('verbose');
     $changelog = file_get_contents('http://php.net/ChangeLog-5.php');
     // Get our current checks
     $json = json_decode(file_get_contents(__DIR__ . '/../../../Psecio/Versionscan/checks.json'));
     $checksList = [];
     foreach ($json->checks as $check) {
         if (!in_array($check->cveid, $checksList)) {
             $checksList[] = $check->cveid;
         }
     }
     // Parse the changelog into versions
     preg_match_all('#<section class="version" id="([0-9\\.]+)">(.+?)</section>#ms', $changelog, $matches);
     $cveIdList = [];
     $fixVersions = [];
     $results = [];
     // print_r($matches);
     foreach ($matches[0] as $index => $match) {
         $versionId = $matches[1][$index];
         // see if we have any CVEs
         if (strstr($match, 'CVE') === false) {
             continue;
         }
         // Extract our CVEs
         preg_match_all('/CVE-[0-9]+-[0-9]+/', $match, $cveList);
         // @TODO limit it down to just five for throttling's sake
         $cveList[0] = array_slice($cveList[0], 0, 1);
         // print_r($cveList);
         foreach ($cveList[0] as $cveId) {
             if (in_array($cveId, $checksList) === true) {
                 continue;
             }
             $cveIdList[] = $cveId;
             $cveDetail = $this->getCveDetail($cveId, $output);
             if ($cveDetail === false) {
                 continue;
             }
             $dom = HtmlDomParser::str_get_html($cveDetail);
             $cveScore = $dom->find('div.cvssbox')[0]->plaintext;
             $cveSummary = explode("\n", trim($dom->find('div.cvedetailssummary')[0]->plaintext))[0];
             $output->writeLn('(' . $cveScore . ') fixed in ' . $versionId);
             if (!isset($fixVersions[$cveId])) {
                 $fixVersions[$cveId] = ['threat' => $cveScore, 'cveid' => $cveId, 'summary' => trim($cveSummary), 'fixVersions' => ['base' => []]];
             }
             $fixVersions[$cveId]['fixVersions']['base'][] = $versionId;
         }
     }
     if (empty($fixVersions)) {
         $output->writeLn('No missing versions/CVEs detected');
     } else {
         $jsonOutput = json_encode(array_values($fixVersions), JSON_PRETTY_PRINT);
         echo $jsonOutput . "\n\n";
     }
     if ($this->verbose === true) {
         $output->writeLn('Missing records found: ' . count($fixVersions));
     }
 }

作者:petu    项目:ipad-slide   
public static function getImgSrcFromHtml($html, $default = '')
 {
     $dom = HtmlDomParser::str_get_html($html);
     $images = $ret = $dom->find('img');
     if ($images) {
         return $images[0]->src;
     }
     return $default;
 }

作者:lehoikm    项目:Larave-4.   
public function boc_tach_noi_dung_bai_viet($url)
 {
     $content = $this->lay_noi_dung($url);
     $html = HtmlDomParser::str_get_html($content);
     $noi_dung = $html->find('.the-article-summary', 0)->outertext;
     $noi_dung .= $html->find('.the-article-body', 0)->outertext;
     $noi_dung .= $html->find('.the-article-credit', 0)->outertext;
     return $noi_dung;
 }

作者:BePsvP    项目:CCU-Plu   
/**
  * 分析成績列表.
  *
  * @param string $content
  * @return array
  */
 protected function parseLists($content)
 {
     $rows = HtmlDomParser::str_get_html($content)->find('table table', 0)->find('tr[bgcolor!="#4d6eb2"]');
     $result = [];
     foreach ($rows as $row) {
         $result[] = ['name' => trim($row->children(0)->plaintext), 'value' => trim($row->children(4 === count($row->children) ? 2 : 1)->plaintext)];
     }
     return $result;
 }

作者:shortlist-digita    项目:agreable-catfish-importer-plugi   
protected static function getUrlsFromSitemap($sitemapLocation)
 {
     $sitemap = HtmlDomParser::file_get_html($sitemapLocation);
     $urls = [];
     foreach ($sitemap->find('loc') as $loc) {
         $urls[] = $loc->innertext;
     }
     return $urls;
 }


问题


面经


文章

微信
公众号

扫码关注公众号