作者:hiss
项目:concrete5-am
public static function content(ContentBlockController $controller)
{
$content = $controller->getSearchableContent();
$dom = new HtmlDomParser();
$r = $dom->str_get_html($content, true, true, DEFAULT_TARGET_CHARSET, false);
if (is_object($r)) {
foreach ($r->find('concrete-picture') as $picture) {
$fID = $picture->fid;
$fo = File::getByID($fID);
if (is_object($fo)) {
$tag = new AmpImg($fo);
$tag->alt($picture->alt);
$picture->outertext = (string) $tag;
}
}
foreach ($r->find('img') as $img) {
$tag = new Element('amp-img');
$tag->alt($img->alt);
$tag->src($img->src);
$tag->height($img->height);
$tag->width($img->width);
$img->outertext = (string) $tag;
}
foreach ($r->find('*[style]') as $element) {
$element->removeAttribute('style');
}
$content = (string) $r->restore_noise($r);
}
$content = LinkAbstractor::translateFrom($content);
return $content;
}
作者:cek
项目:concrete5-
public static function fromHtml($html)
{
$dom = new HtmlDomParser();
$r = $dom->str_get_html($html);
$nodes = $r->childNodes();
$node = $nodes[0];
$element = new Element($node->tag);
$element->class($node->class);
$column = new static($element);
return $column;
}
作者:digideski
项目:concrete
public static function fromHtml($html)
{
$dom = new HtmlDomParser();
$r = $dom->str_get_html($html);
$nodes = $r->childNodes();
$node = $nodes[0];
$element = new Element($node->tag);
foreach ($node->getAllAttributes() as $key => $value) {
$element->setAttribute($key, $value);
}
$column = new static($element);
return $column;
}
作者:hdk001
项目:concrete5-
public function getLayoutContainerHtmlObject()
{
$gf = $this->layout->getThemeGridFrameworkObject();
if (is_object($gf)) {
$dom = new HtmlDomParser();
$r = $dom->str_get_html($gf->getPageThemeGridFrameworkRowStartHTML() . $gf->getPageThemeGridFrameworkRowEndHTML());
$nodes = $r->childNodes();
$node = $nodes[0];
$element = new Element($node->tag);
$element->id($node->id);
$element->class($node->class);
return $element;
}
}
作者:KartikPadmanabha
项目:markdown-resum
protected function execute(InputInterface $input, OutputInterface $output)
{
$this->app = $this->getApplication();
$source = $input->getArgument('source');
$destination = rtrim($input->getArgument('destination'), DIRECTORY_SEPARATOR);
$template = $input->getOption('template');
$pdfSource = join(DIRECTORY_SEPARATOR, array($destination, '.tmp_pdf_source.html'));
$destFilename = join(DIRECTORY_SEPARATOR, array($destination, pathinfo($source, PATHINFO_FILENAME) . '.pdf'));
// Make sure we've got out converter available
exec('wkhtmltopdf -V', $results, $returnVal);
if ($returnVal) {
$output->writeln("\n<error>Error:</error> Unable to locate wkhtmltopdf.\n" . " Please make sure that it is installed and available in " . "your path. \n For installation help, please read: " . "https://github.com/pdfkit/pdfkit/wiki/Installing-WKHTMLTOPDF \n\n", $this->app->outputFormat);
return false;
}
$rendered = $this->generateHtml($source, $template, false);
// The pdf needs some extra css rules, and so we'll add them here
// to our html document
$simpleDom = HtmlDomParser::str_get_html($rendered);
$body = $simpleDom->find('body', 0);
$body->class = $body->class . ' pdf';
$rendered = (string) $simpleDom;
// Save to a temp destination for the pdf renderer to use
file_put_contents($pdfSource, $rendered);
// Process the document with wkhtmltopdf
exec('wkhtmltopdf ' . $pdfSource . ' ' . $destFilename);
// Unlink the temporary file
unlink($pdfSource);
$output->writeln(sprintf("Wrote pdf resume to: <info>%s</info>", $destFilename), $this->app->outputFormat);
return true;
}
作者:AgenceStrati
项目:migrato
/**
* @param string $text
* @param string $selector
* @return array
*/
public static function exec($text, $selector)
{
$document = HtmlDomParser::str_get_html($text);
return array_map(function ($element) {
return $element->innertext;
}, $document->find($selector));
}
作者:Nemanja
项目:meteoalar
public function doImport()
{
$this->cachePath .= date("Y-m-d") . '/';
if (is_readable($this->cachePath) === false) {
mkdir($this->cachePath, 0755, true);
}
$this->cacheFile = md5($this->sepaUrl . $this->sepaPage . '-' . date("Y-m-d")) . '.dat';
$this->client = new Client(['base_uri' => $this->sepaUrl, 'timeout' => 5.0]);
if (file_exists($this->cachePath . $this->cacheFile) === false) {
$response = $this->client->get($this->sepaPage);
if ($response->getStatusCode() > 204) {
echo "Can't fetch list \n";
exit(1);
}
$content = $response->getBody();
file_put_contents($this->cachePath . $this->cacheFile, $content);
} else {
$content = file_get_contents($this->cachePath . $this->cacheFile);
}
$components = [];
$html = HtmlDomParser::str_get_html($content);
foreach ($html->find('label[class=komponentelabela]') as $label) {
if (count($label->children())) {
$sepaId = $label->children(0)->attr['value'];
$name = $label->children(1)->plaintext;
$component = ['sepa_id' => intval($sepaId), 'name' => trim($name)];
array_push($components, $component);
}
}
return $components;
}
作者:jinseoko
项目:as
private function aggregatePromoDataFromWeb()
{
$response = $this->getPage('http://auto.danawa.com/newcar/?Work=sales');
$html = HtmlDomParser::str_get_html($response);
$elements = $html->find('div.salesCont');
$arrData = [];
foreach ($elements as $element) {
$brandName = $element->prev_sibling()->children(0)->children(0)->alt;
$subElements = $element->find('dl.base dt');
$arrModels = [];
foreach ($subElements as $subElement) {
$model = str_replace(['올 뉴 ', '더 뉴 ', '신형 ', 'All New ', 'All-New ', 'The New ', 'New '], '', $subElement->children(0)->plaintext);
$origString = str_replace([' ', '①', '②', '③', '④', '⑤', '⑥', '⑦', '⑧'], '', $subElement->next_sibling()->children(0)->innertext);
$tempString = str_replace([', <span', '.+ ', '정용', '지급 시', '무이자할부'], ['|<span', ' + ', '적용', '지급시', '무이자 할부'], $origString);
$items = array_map('trim', array_map('strip_tags', explode('|', $tempString)));
if (array_key_exists($model, $arrModels)) {
foreach ($items as $item) {
array_push($arrModels[$model], $item);
}
} else {
$arrModels[$model] = $items;
}
}
$arrData[$brandName] = $arrModels;
}
return $arrData;
}
作者:KartikPadmanabha
项目:markdown-resum
protected function generateHtml($source, $template, $refresh)
{
// Check that the source file is sane
if (!file_exists($source)) {
throw new \Exception("Unable to open source file: {$source}");
}
// Check that our template is sane, or set to the default one
if (!$template) {
$template = $this->app->defaultTemplate;
}
$templatePath = join(DIRECTORY_SEPARATOR, array($this->app->templatePath, basename($template)));
$templateIndexPath = join(DIRECTORY_SEPARATOR, array($templatePath, 'index.html'));
if (!file_exists($templateIndexPath)) {
throw new \Exception("Unable to open template file: {$templateIndexPath}");
}
$style = $this->generateContent($templatePath, 'css');
$links = $this->generateContent($templatePath, 'links');
$templateContent = file_get_contents($templateIndexPath);
$resumeContent = file_get_contents($source);
// Process with Markdown, and then use SmartyPants to clean up punctuation.
$resumeHtml = MarkdownExtra::defaultTransform($resumeContent);
$resumeHtml = SmartyPants::defaultTransform($resumeHtml);
// Construct the title for the html document from the h1 and h2 tags
$simpleDom = HtmlDomParser::str_get_html($resumeHtml);
$title = sprintf('%s | %s', $simpleDom->find('h1', 0)->innertext, $simpleDom->find('h2', 0)->innertext);
// Render the Markdown into an html file with Mustache Templates
$m = new \Mustache_Engine();
$rendered = $m->render($templateContent, array('title' => $title, 'style' => $style, 'links' => $links, 'resume' => $resumeHtml, 'reload' => (bool) $refresh, 'refresh_rate' => $refresh));
return $rendered;
}
作者:Nemanja
项目:meteoalar
public function doImport()
{
if (file_exists($this->cachePath . $this->cacheFile) === false) {
$response = $this->client->get($this->sepaPage);
if ($response->getStatusCode() > 204) {
echo "Can't fetch list \n";
exit(1);
}
$content = $response->getBody();
file_put_contents($this->cachePath . $this->cacheFile, $content);
} else {
$content = file_get_contents($this->cachePath . $this->cacheFile);
}
$nodes = [];
$html = HtmlDomParser::str_get_html($content);
foreach ($html->find('tr') as $row) {
if (count($row->children()) === 7) {
$rowId = $row->children(0)->plaintext;
$code = $row->children(1)->plaintext;
$name = trim($row->children(3)->plaintext);
$network = $row->children(4)->plaintext;
$type = $row->children(5)->plaintext;
//get sepa cms ID
preg_match("/stanica=([\\d]+)/", $row->children(3)->innertext, $matches);
if (isset($matches[1]) === true && is_numeric($matches[1]) && intval($matches[1]) > 0) {
$sepaId = $matches[1];
$node = ['eoi_code' => trim($code), 'name' => trim($name), 'network' => trim($network), 'type' => trim($type), 'sepa_id' => intval($sepaId)];
array_push($nodes, $node);
}
}
}
return $nodes;
}
作者:jinseoko
项目:as
private function aggregateTrimData($id, $brand, $model)
{
$url = 'http://m.auto.danawa.com/auto/?Work=model&Model=' . $id;
$response = $this->getPage($url);
$html = HtmlDomParser::str_get_html($response);
$elements = $html->find('ul.modelPrice');
for ($i = 0; $i < count($elements); $i++) {
$lists = $elements[$i]->find('li');
foreach ($lists as $item) {
$string = trim($item->innertext);
preg_match('/.*?<!--(.*?)-->.*/msU', $string, $comments);
$words = preg_replace('/\\s+/', '', trim($comments[1]));
preg_match('/<div>([^<]+)<\\/div><div>([^<]+)<\\/div><div>([^<]+)<\\/div>/', $words, $tags);
$fuel = $tags[1];
$engine = $tags[2];
$mileage = $tags[3];
$trim1 = trim($item->find('div.name span label', 0)->plaintext);
$trim2 = str_replace(' ', '', $trim1);
$trim3 = str_replace(['럭셔리', '프리미엄', '인승'], ['Luxury', 'Premium', '-seater'], $trim2);
$trim4 = str_replace(['디젤', 'Diessel'], 'Diesel', $trim3);
$trim5 = preg_replace('/[^\\x20-\\x7e]/', '', $trim4);
$trim6 = str_replace(['( )', '()'], '', $trim5);
$trim7 = str_replace(['(Without 360 Camera)', '(Without 360 camera/Sunroof'], ['Without 360 Camera', 'Without 360 Camera/Sunroof'], $trim6);
$trim8 = str_replace(['LIMITED', 'LIMTED', 'hybrid', 'convertible', ' roof', 'EXECUTIVE', 'SUPREME', 'F-SPORT', 'F SPORT', 'F Sport', 'pkg', 'road', 'style', 'w/'], ['Limited', 'Limited', 'Hybrid', 'Convertible', ' Roof', 'Executive', 'Supreme', 'F-Sport', 'F-Sport', 'F-Sport', 'PKG', 'Road', 'Style', 'With '], $trim7);
$trim9 = preg_replace('/\\s+/', ' ', $trim8);
$trim = str_replace(['3.8 AWD & (A/T)'], ['3.8 AWD (A/T)'], $trim9);
$price = intval(str_replace(',', '', trim($item->find('div.price span', 0)->plaintext)));
$vehicle = Trim::updateOrCreate(['danawa_id' => $id, 'brand' => $brand, 'model' => $model, 'trim' => $trim, 'fuel' => $fuel, 'engine' => $engine, 'mileage' => $mileage, 'price' => $price]);
}
}
}
作者:sebastien-fauve
项目:Amazon-Mws-Repricin
public function getSellerName($sellerId, $marketPlace = "A1PA6795UKMFR9")
{
$url = "http://www.amazon.de/gp/aag/main?ie=UTF8&marketplaceID=" . $marketPlace . "&orderID=&seller=" . $sellerId;
//echo $url;
$this->lastUrl = $url;
$html = file_get_contents($url);
$dom = HtmlDomParser::str_get_html($html);
$sellerName = null;
$rightColumn = $dom->find('.amabot_right', 0);
$UlElement = $rightColumn->find('ul.aagLegalData', 0);
if ($UlElement != null) {
$LiElement = $UlElement->find('li.aagLegalRow', 0);
if ($LiElement != null) {
$LiElement->children(0)->innertext = "";
$sellerName = $LiElement->plaintext;
}
} else {
$pElement = $rightColumn->find('p', 0);
$children = $pElement->children(0);
if ($children) {
$children->innertext = "";
}
$sellerName = $pElement->plaintext;
}
return $sellerName;
}
作者:undowndin
项目:cnBeta
public function loadArticle()
{
$this->html = file_get_contents(cbURL($this->id));
$dom = HtmlDomParser::str_get_html($this->html);
$this->article['id'] = $this->id;
$titleNode = $dom->find('h2#news_title', 0);
if (!$titleNode) {
throw new ErrorException('文章找不到了...');
}
$this->article['title'] = $titleNode->plaintext;
$this->article['date'] = $dom->find('div.title_bar span.date', 0)->plaintext;
$this->article['source'] = trim($dom->find('div.title_bar span.where', 0)->plaintext);
$this->article['source'] = str_replace('稿源:', '', $this->article['source']);
$sourceLinkNode = $dom->find('div.title_bar span.where a', 0);
if ($sourceLinkNode) {
$this->article['sourceLink'] = $dom->find('div.title_bar span.where a', 0)->href;
}
$this->article['intro'] = trim($dom->find('div.introduction', 0)->plaintext);
$topicURL = $dom->find('div.introduction div a', 0)->href;
preg_match('/topics\\/(\\d+)\\.htm/', $topicURL, $matches);
$this->article['topicId'] = (int) $matches[1];
$this->article['topicTitle'] = $dom->find('div.introduction div a img', 0)->title;
$this->article['topicImage'] = $dom->find('div.introduction div a img', 0)->src;
$content = $dom->find('section.article_content div.content', 0)->innertext;
$content = String::tidy($content);
$this->article['content'] = str_replace(' class="f_center"', '', $content);
$this->article['author'] = trim($dom->find('span.author', 0)->plaintext, "[] ");
}
作者:Sywooc
项目:find-parse
public static function getPrice($url)
{
$parser = new HtmlDomParser();
$dom = $parser->file_get_html($url);
$price = $dom->find('div.pricelabel strong')[0]->plaintext;
unset($dom);
if (isset($price) && !empty($price)) {
preg_match_all("/(\\d+)/", str_replace(" ", "", $price), $price);
if (isset($price[0]) && !empty($price[0])) {
return $price[0];
} else {
return "0";
}
} else {
return "0";
}
}
作者:cek
项目:concrete5-
public function getPresetContainerHtmlObject()
{
$dom = new HtmlDomParser();
$r = $dom->str_get_html($this->arrayPreset['container']);
if (is_object($r)) {
$nodes = $r->childNodes();
$node = $nodes[0];
if (is_object($node)) {
$element = new Element($node->tag);
$element->class($node->class);
}
}
if (!isset($element)) {
$element = new Element('div');
}
return $element;
}
作者:pseci
项目:versionsca
/**
* Execute the "missing" command
*
* @param InputInterface $input Input object
* @param OutputInterface $output Output object
*/
protected function execute(InputInterface $input, OutputInterface $output)
{
$this->verbose = $input->getOption('verbose');
$changelog = file_get_contents('http://php.net/ChangeLog-5.php');
// Get our current checks
$json = json_decode(file_get_contents(__DIR__ . '/../../../Psecio/Versionscan/checks.json'));
$checksList = [];
foreach ($json->checks as $check) {
if (!in_array($check->cveid, $checksList)) {
$checksList[] = $check->cveid;
}
}
// Parse the changelog into versions
preg_match_all('#<section class="version" id="([0-9\\.]+)">(.+?)</section>#ms', $changelog, $matches);
$cveIdList = [];
$fixVersions = [];
$results = [];
// print_r($matches);
foreach ($matches[0] as $index => $match) {
$versionId = $matches[1][$index];
// see if we have any CVEs
if (strstr($match, 'CVE') === false) {
continue;
}
// Extract our CVEs
preg_match_all('/CVE-[0-9]+-[0-9]+/', $match, $cveList);
// @TODO limit it down to just five for throttling's sake
$cveList[0] = array_slice($cveList[0], 0, 1);
// print_r($cveList);
foreach ($cveList[0] as $cveId) {
if (in_array($cveId, $checksList) === true) {
continue;
}
$cveIdList[] = $cveId;
$cveDetail = $this->getCveDetail($cveId, $output);
if ($cveDetail === false) {
continue;
}
$dom = HtmlDomParser::str_get_html($cveDetail);
$cveScore = $dom->find('div.cvssbox')[0]->plaintext;
$cveSummary = explode("\n", trim($dom->find('div.cvedetailssummary')[0]->plaintext))[0];
$output->writeLn('(' . $cveScore . ') fixed in ' . $versionId);
if (!isset($fixVersions[$cveId])) {
$fixVersions[$cveId] = ['threat' => $cveScore, 'cveid' => $cveId, 'summary' => trim($cveSummary), 'fixVersions' => ['base' => []]];
}
$fixVersions[$cveId]['fixVersions']['base'][] = $versionId;
}
}
if (empty($fixVersions)) {
$output->writeLn('No missing versions/CVEs detected');
} else {
$jsonOutput = json_encode(array_values($fixVersions), JSON_PRETTY_PRINT);
echo $jsonOutput . "\n\n";
}
if ($this->verbose === true) {
$output->writeLn('Missing records found: ' . count($fixVersions));
}
}
作者:petu
项目:ipad-slide
public static function getImgSrcFromHtml($html, $default = '')
{
$dom = HtmlDomParser::str_get_html($html);
$images = $ret = $dom->find('img');
if ($images) {
return $images[0]->src;
}
return $default;
}
作者:lehoikm
项目:Larave-4.
public function boc_tach_noi_dung_bai_viet($url)
{
$content = $this->lay_noi_dung($url);
$html = HtmlDomParser::str_get_html($content);
$noi_dung = $html->find('.the-article-summary', 0)->outertext;
$noi_dung .= $html->find('.the-article-body', 0)->outertext;
$noi_dung .= $html->find('.the-article-credit', 0)->outertext;
return $noi_dung;
}
作者:BePsvP
项目:CCU-Plu
/**
* 分析成績列表.
*
* @param string $content
* @return array
*/
protected function parseLists($content)
{
$rows = HtmlDomParser::str_get_html($content)->find('table table', 0)->find('tr[bgcolor!="#4d6eb2"]');
$result = [];
foreach ($rows as $row) {
$result[] = ['name' => trim($row->children(0)->plaintext), 'value' => trim($row->children(4 === count($row->children) ? 2 : 1)->plaintext)];
}
return $result;
}
作者:shortlist-digita
项目:agreable-catfish-importer-plugi
protected static function getUrlsFromSitemap($sitemapLocation)
{
$sitemap = HtmlDomParser::file_get_html($sitemapLocation);
$urls = [];
foreach ($sitemap->find('loc') as $loc) {
$urls[] = $loc->innertext;
}
return $urls;
}