goutte怎么获取a标签里面的url?或者好用的php爬虫库,谢谢
<code><?php
require('./Vendor/autoload.php');
use Goutte\Client;
/**
*
*/
class Spider
{
private $_client;
private $_crawler;
public $_news = [
'title' => [],
'link' => [],
'content' => [],
'source' => [],
'date' => [],
];
public function __construct()
{
try {
$this->_client = new Client();
$this->_crawler = $this->_client->request('GET', 'http://www.ningshan.gov.cn/Category_90/Index.aspx');
// $client->getClient()->setDefaultOption('config/curl/'.CURLOPT_TIMEOUT, 10);
} catch (Exception $e) {
throw new \Exception($e->getMessage(), 1);
}
}
public function getDate()
{
$this->_crawler->filter('div#list>ul>li>span')->each(function ($node) {
$this->_news['date'][] = $node->text();
});
}
public function getTitle()
{
$link = $this->_crawler->selectLink('宁陕县召开政协八届二十二次次常委会')->link();
var_dump($link->getUri);die;
$this->_crawler->filter('div#list>ul>li>a')->each(function ($node) {
if ($node->text() !== '宁陕要闻') {
$this->_news['title'][] = $node->text();
$this->_news['link'][] = $node->link();
$this->_news['source'][] = '宁陕要闻';
}
});
}
}
//-----------------------------------
try {
$spider = new Spider();
$spider->getDate();
$spider->getTitle();
echo json_encode($spider->_news, JSON_UNESCAPED_UNICODE);
} catch (Exception $e) {
echo $e->getMessage();
}
</code>goutte怎么获取a标签里面的url?或者好用的php爬虫库,谢谢
<code><?php
require('./Vendor/autoload.php');
use Goutte\Client;
/**
*
*/
class Spider
{
private $_client;
private $_crawler;
public $_news = [
'title' => [],
'link' => [],
'content' => [],
'source' => [],
'date' => [],
];
public function __construct()
{
try {
$this->_client = new Client();
$this->_crawler = $this->_client->request('GET', 'http://www.ningshan.gov.cn/Category_90/Index.aspx');
// $client->getClient()->setDefaultOption('config/curl/'.CURLOPT_TIMEOUT, 10);
} catch (Exception $e) {
throw new \Exception($e->getMessage(), 1);
}
}
public function getDate()
{
$this->_crawler->filter('div#list>ul>li>span')->each(function ($node) {
$this->_news['date'][] = $node->text();
});
}
public function getTitle()
{
$link = $this->_crawler->selectLink('宁陕县召开政协八届二十二次次常委会')->link();
var_dump($link->getUri);die;
$this->_crawler->filter('div#list>ul>li>a')->each(function ($node) {
if ($node->text() !== '宁陕要闻') {
$this->_news['title'][] = $node->text();
$this->_news['link'][] = $node->link();
$this->_news['source'][] = '宁陕要闻';
}
});
}
}
//-----------------------------------
try {
$spider = new Spider();
$spider->getDate();
$spider->getTitle();
echo json_encode($spider->_news, JSON_UNESCAPED_UNICODE);
} catch (Exception $e) {
echo $e->getMessage();
}
</code>
现找的
<code>$crawler = $client->request('GET', 'http://www.symfony.com/blog/');
$link = $crawler->selectLink('Security Advisories')->link();
print_r($link->getUri());</code>手册:http://symfony.com/doc/curren...
GIT:https://github.com/FriendsOfP...
采集类参考:http://flc.ren/2016/06/528.html
立即学习“PHP免费学习笔记(深入)”;
PHP怎么学习?PHP怎么入门?PHP在哪学?PHP怎么学才快?不用担心,这里为大家提供了PHP速学教程(入门到精通),有需要的小伙伴保存下载就能学习啦!
Copyright 2014-2025 https://www.php.cn/ All Rights Reserved | php.cn | 湘ICP备2023035733号