php生成网站地图
发表于:2024-04-24 00:27:21浏览:164次
引言
如今,随着互联网的发展,越来越多的网站需要提供网站地图功能,帮助用户更好地浏览和搜索网站内容。本文将介绍如何使用PHP实现一个简单的网站地图功能,并通过代码示例帮助读者更好地理解。
示例
// 生成整站a链接的sitemap.xml
$sitemap = new \app\common\utils\Sitemap("http://www.dazijie.com",\app\common\utils\Sitemap::TYPE_ALL_XML);
$sitemap->build();
// 生成某个页面a链接的sitemap.xml
$sitemap = new \app\common\utils\Sitemap("https://www.dazijie.com/news",\app\common\utils\Sitemap::TYPE_ONE_XML);
$sitemap->build();
// 自定义链接写入sitemap.xml(未有sitemap.xml,新生成)
$sitemap = new \app\common\utils\Sitemap("https://www.dazijie.com/news",\app\common\utils\Sitemap::TYPE_DIY_XML);
$sitemap->addItem("https://www.dazijie.com/news","0.88","daily");
$sitemap->addItem("https://www.dazijie.com/cases","0.95","monthly");
$sitemap->build();
// 追加链接写入sitemap.xml(已有sitemap.xml)
$sitemap = new \app\common\utils\Sitemap("https://www.dazijie.com/news",\app\common\utils\Sitemap::TYPE_APPEND_XML);
$sitemap->addItem("https://www.dazijie.com/news","0.88","daily");
$sitemap->addItem("https://www.dazijie.com/cases","0.95","monthly");
$sitemap->build();
// 生成整站a链接提交到搜索引擎
$sitemap = new \app\common\utils\Sitemap("https://www.dazijie.com",\app\common\utils\Sitemap::TYPE_ALL_POST,['baidu_token'=>'xxxx']);
$sitemap->build();
// 生成某个页面a链接提交到搜索引擎
$sitemap = new \app\common\utils\Sitemap("https://www.dazijie.com/news",\app\common\utils\Sitemap::TYPE_ONE_POST,['baidu_token'=>'xxxx']);
$sitemap->build();
// 自定义链接提交到搜索引擎
$sitemap = new \app\common\utils\Sitemap("https://www.dazijie.com/news",\app\common\utils\Sitemap::TYPE_DIY_POST,['baidu_token'=>'xxxx']);
$sitemap->build();
类
<?php
namespace app\common\utils;
/**
* 生成站点地图并可自动提交到搜索引擎
* Class Sitemap
* @package app\common\utils
*/
class Sitemap {
// 类型-生成整站a链接的sitemap.xml
const TYPE_ALL_XML = 10;
// 类型-生成某个页面a链接的sitemap.xml
const TYPE_ONE_XML = 20;
// 类型-自定义链接写入sitemap.xml(未有sitemap.xml,新生成)
const TYPE_DIY_XML = 30;
// 类型-追加链接写入sitemap.xml(已有sitemap.xml)
const TYPE_APPEND_XML = 35;
// 类型-生成整站a链接提交到搜索引擎
const TYPE_ALL_POST = 40;
// 类型-生成某个页面a链接提交到搜索引擎
const TYPE_ONE_POST = 50;
// 类型-自定义链接提交到搜索引擎
const TYPE_DIY_POST = 60;
// 当前类型
private $type;
// 传入的URL
private $url;
// 错误信息
private $error;
// 域名
private $domain;
// 要新增到XML的链接
private $data;
// xml对象
private $xml;
// 参数
private $options;
/**
* 初始化
* Sitemap constructor.
* @param $url
* @param int $type
*/
public function __construct($url = '',$type = self::TYPE_ALL_XML,$options = [])
{
$this->data = [];
$this->url = $url;
$this->type = $type;
$this->domain = $this->checkDomain($url);
$this->options = array_merge([
// 页面内容更新频率 可选值:"always"、 "hourly"、 "daily"、 "weekly"、 "monthly"、 "yearly"、 "never"
'changefreq'=>'weekly',
// 保存xml路径
'savename'=>getcwd() . '/sitemap.xml',
// 提交到百度收录token
'baidu_token'=>'',
],$options);
$this->init();
}
/**
* 初始化
*/
private function init()
{
switch ($this->type) {
case self::TYPE_ALL_XML:
case self::TYPE_ONE_XML:
$this->newXml();
$this->make($this->url);
break;
case self::TYPE_DIY_XML:
$this->newXml();
break;
case self::TYPE_APPEND_XML:
$this->readXml();
break;
case self::TYPE_ALL_POST:
case self::TYPE_ONE_POST:
$this->hrefList($this->url);
break;
case self::TYPE_DIY_POST:
$this->setData($this->url);
break;
}
}
/**
* 创建XML对象
* @return $this
*/
private function newXml()
{
$this->xml = new \SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"></urlset>');
return $this;
}
/**
* 读取XML文件
* @return $this
*/
private function readXml()
{
$this->xml = simplexml_load_file($this->options['savename']);
return $this;
}
/**
* 构建
* @param $url
* @return $this
*/
private function make($url)
{
$this->addItem($url,'1.00');
$priority = [
'0'=>'1.00',
'1'=>'0.90',
'2'=>'0.80'
];
$keys = array_keys($priority);
$last_key = end($keys);
$data = $this->hrefList($url);
foreach ($data as $href) {
$res = parse_url($href);
$floor = 0;
if (isset($res['path']) && $res['path']!= '/') {
$floor = count(explode('/',mb_substr($res['path'], 0, null, 'UTF-8')));
$floor = $floor > count($priority) ? $last_key : $floor-1;
}
$this->addItem($href,$priority[$floor],$this->options['changefreq']);
}
return $this;
}
/**
* 增加URL
* @param string $loc 必选 页面永久链接地址 示例:http://xxx.xx.com/article/100.html
* @param string $priority 必选 相对于其他页面的优先权 0-1 数值越大,权重越大,示例:0.97、1.00、0.80
* @param string $changefreq 可选 页面内容更新频率 可选值:"always"、 "hourly"、 "daily"、 "weekly"、 "monthly"、 "yearly"、 "never"
* @param string $lastmod 可选 页面最后修改时间 格式:2005-06-04
*/
public function addItem(string $loc,$priority = '0.80',string $changefreq = '',string $lastmod = '')
{
// 添加一个元素并设置属性
$url = $this->xml->addChild('url');
$url->addChild('loc',htmlspecialchars($loc));
$lastmod && $url->addChild('lastmod',$lastmod);
$changefreq && $url->addChild('changefreq',$changefreq);
$url->addChild('priority',$priority);
return $this;
}
/**
* 构建
* @return $this
*/
public function build()
{
switch ($this->type) {
case self::TYPE_ALL_XML:
case self::TYPE_ONE_XML:
case self::TYPE_DIY_XML:
case self::TYPE_APPEND_XML:
$this->buildXml();
break;
case self::TYPE_ALL_POST:
case self::TYPE_ONE_POST:
case self::TYPE_DIY_POST:
$this->buildPostSpider();
break;
default:
break;
}
return $this;
}
/**
* 构建XML
* @return bool|string
*/
private function buildXml()
{
// 将XML内容转换为字符串
$xml_string = $this->xml->asXML();
// 确保XML字符串不是false或者空字符串
if ($xml_string) {
// 指定文件路径
$savename = $this->options['savename'];
$dir = dirname($savename);
if (!is_dir($dir)) {
mkdir($dir, 0777, true);
}
// 将XML内容写入文件
file_put_contents($savename, $xml_string);
return $savename;
} else {
return $this->setError('无法保存XML,内容为空。');
}
}
/**
* 构建提交到搜索引擎
* @return bool
*/
private function buildPostSpider()
{
$urls = $this->getData();
!empty($this->options['baidu_token']) && $this->postBaidu($urls);
return true;
}
/**
* 页面内a标签链接
* @param $url
* @return array|mixed
*/
private function hrefList($url)
{
$htmlContent = @file_get_contents($url);
$dom = new \DOMDocument();
@$dom->loadHTML($htmlContent); // 使用@来忽略HTML中的警告
$list = $dom->getElementsByTagName('a');
foreach ($list as $link) {
$href = $this->trimall($link->getAttribute('href'));
if ($this->isStart($href,'/')) {
$href = $this->domain.$href;
}
if (empty($href) || !$this->isNext($href)) {
continue;
}
$this->setData($href);
($this->type == self::TYPE_ALL_XML || $this->type == self::TYPE_ALL_POST) && $this->hrefList($href);
}
return $this->getData();
}
/**
* 判断是否加入到数据中
* @param $href
* @return bool
*/
private function isNext($href)
{
if ($href == '/' ||
$href == $this->domain ||
$href == $this->domain.'/' ||
!($this->isStart($href,'/') || $this->isStart($href,$this->domain)) ||
in_array($href,$this->getData())
) {
return false;
}
return true;
}
/**
* 域名检测
* @param string $domain
* @return bool|string 返回的协议+域名,注意:域名后面没有带/
*/
private function checkDomain(string $domain)
{
$res = parse_url($domain);
if (!empty($res['scheme']) && !empty($res['host'])) {
return $res['scheme'].'://'.$res['host'];
}
return $this->setError('无效域名');
}
/**
* 检查字符串中是否包含某些字符串
* @param string $str
* @param string|array $needles
* @return bool
*/
public function inStr(string $str, $needles): bool
{
foreach ((array) $needles as $needle) {
if ('' != $needle && mb_strpos($str, $needle) !== false) {
return true;
}
}
return false;
}
/**
* 检查字符串是否以某些字符串结尾
*
* @param string $str
* @param string|array $needles
* @return bool
*/
public function isEnd(string $str, $needles): bool
{
foreach ((array) $needles as $needle) {
if ((string) $needle === mb_substr($str, mb_strlen($needle), null, 'UTF-8')) {
return true;
}
}
return false;
}
/**
* 检查字符串是否以某些字符串开头
*
* @param string $str
* @param string|array $needles
* @return bool
*/
public function isStart(string $str, $needles): bool
{
foreach ((array) $needles as $needle) {
if ('' != $needle && mb_strpos($str, $needle) === 0) {
return true;
}
}
return false;
}
/**
* 过滤字符串中所有空格
* @param $str
* @return array|string|string[]
*/
public function trimall($str)
{
$match = array(" "," ","\t","\n","\r");
return str_replace($match,'',$str);
}
/**
* 设置异常
* @param $value
* @return bool
*/
private function setError($value)
{
$this->error = $value;
return false;
}
/**
* 获取异常
* @return mixed
*/
public function getError()
{
return $this->error;
}
/**
* 设置数据
* @param $value
* @return bool
*/
private function setData($value)
{
if (is_array($value)) {
$this->data = array_merge($this->data,$value);
}
else {
array_push($this->data,$value);
}
return true;
}
/**
* 获取数据
* @return array
*/
public function getData()
{
return $this->data;
}
/**
* 提交到百度
* 百度收录地址:https://ziyuan.baidu.com/linksubmit/index
* @param $urls
* @return array
*/
private function postBaidu($urls)
{
$token = $this->options['baidu_token'] ?? '';
$url = 'http://data.zz.baidu.com/urls?site=' . urlencode(str_replace(array('http://', 'https://'), '', $this->domain)) . '&token=' . $token;
return $this->post($url,implode("\n", $urls),[
'httpheader'=>array('Content-Type: text/plain'),
]);
}
/**
* post请求
* @param $url
* @param $data
* @param array $options
* @return array
*/
private function post($url,$data,$options = [])
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
// 文件上传,示例:/path/to/your/file.txt
if (!empty($options['file_path'])) {
$field = $options['file_field'] ?? 'file';
$file = curl_file_create($options['file_path']);
$data = [$field=>$file];
}
// 参数,示例:
if (!empty($data)) {
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
}
// 设置超时,单位秒,示例:3
if (!empty($options['timeout'])) {
curl_setopt($ch, CURLOPT_TIMEOUT, $options['timeout']);
}
// 头部,示例:array('Content-Type: text/plain')
if (!empty($options['httpheader'])) {
curl_setopt($ch, CURLOPT_HTTPHEADER, $options['httpheader']);
}
// 用户代理,示例:true
if (!empty($options['useragent'])) {
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36");
}
// 设置代理,示例:proxy.example.com:8080
if (!empty($options['proxy'])) {
curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, true);
curl_setopt($ch, CURLOPT_PROXY, $options['proxy']);
}
// 使用COOKIE,示例:username=John Doe; usertype=member
if (!empty($options['cookie'])) {
curl_setopt($ch, CURLOPT_COOKIE, $options['cookie']);
}
// 进行HTTP认证,示例:username:password
if (!empty($options['userpwd'])) {
curl_setopt($ch, CURLOPT_USERPWD, $options['userpwd']);
}
// 开启ssl
if (!empty($options['ssl'])) {
// SSL证书文件,示例:/path/to/your/certificate.crt
if (!empty($options['ssl_cert'])) {
curl_setopt($ch, CURLOPT_SSLCERT, $options['ssl_cert']);
}
// 指SSL证书的私钥,示例:/path/to/your/private_key.pem
if (!empty($options['ssl_key'])) {
curl_setopt($ch, CURLOPT_SSLKEY, $options['ssl_key']);
}
// 设置为false,关闭SSL证书验证,以禁用对远程服务器的证书验证。由于您使用的是自签名证书,因此服务器证书将无法通过正常的验证过程。
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
// 设置为false,以禁用对远程服务器主机名的验证。
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
}
// 设置为true,以便将响应作为字符串返回,而不是直接输出到屏幕上
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$result = curl_exec($ch);
$error = curl_error($ch);
curl_close($ch);
if ($error) {
return ['status'=>0,'msg'=>$error,'data'=>null];
}
else {
return ['status'=>1,'msg'=>'success','data'=>$result];
}
}
}
栏目分类全部>