您的当前位置:首页>全部文章>文章详情

php生成网站地图

发表于:2024-04-24 00:27:21浏览:164次TAG: #seo #搜索引擎 #收录 #爬虫 #sitemap

引言

如今,随着互联网的发展,越来越多的网站需要提供网站地图功能,帮助用户更好地浏览和搜索网站内容。本文将介绍如何使用PHP实现一个简单的网站地图功能,并通过代码示例帮助读者更好地理解。

示例

// 生成整站a链接的sitemap.xml
$sitemap = new \app\common\utils\Sitemap("http://www.dazijie.com",\app\common\utils\Sitemap::TYPE_ALL_XML);
$sitemap->build();

// 生成某个页面a链接的sitemap.xml
$sitemap = new \app\common\utils\Sitemap("https://www.dazijie.com/news",\app\common\utils\Sitemap::TYPE_ONE_XML);
$sitemap->build();

// 自定义链接写入sitemap.xml(未有sitemap.xml,新生成)
$sitemap = new \app\common\utils\Sitemap("https://www.dazijie.com/news",\app\common\utils\Sitemap::TYPE_DIY_XML);
$sitemap->addItem("https://www.dazijie.com/news","0.88","daily");
$sitemap->addItem("https://www.dazijie.com/cases","0.95","monthly");
$sitemap->build();

// 追加链接写入sitemap.xml(已有sitemap.xml)
$sitemap = new \app\common\utils\Sitemap("https://www.dazijie.com/news",\app\common\utils\Sitemap::TYPE_APPEND_XML);
$sitemap->addItem("https://www.dazijie.com/news","0.88","daily");
$sitemap->addItem("https://www.dazijie.com/cases","0.95","monthly");
$sitemap->build();

// 生成整站a链接提交到搜索引擎
$sitemap = new \app\common\utils\Sitemap("https://www.dazijie.com",\app\common\utils\Sitemap::TYPE_ALL_POST,['baidu_token'=>'xxxx']);
$sitemap->build();

// 生成某个页面a链接提交到搜索引擎
$sitemap = new \app\common\utils\Sitemap("https://www.dazijie.com/news",\app\common\utils\Sitemap::TYPE_ONE_POST,['baidu_token'=>'xxxx']);
$sitemap->build();

// 自定义链接提交到搜索引擎
$sitemap = new \app\common\utils\Sitemap("https://www.dazijie.com/news",\app\common\utils\Sitemap::TYPE_DIY_POST,['baidu_token'=>'xxxx']);
$sitemap->build();

<?php
namespace app\common\utils;
/**
 * 生成站点地图并可自动提交到搜索引擎
 * Class Sitemap
 * @package app\common\utils
 */
class Sitemap {
    // 类型-生成整站a链接的sitemap.xml
    const TYPE_ALL_XML = 10;
    // 类型-生成某个页面a链接的sitemap.xml
    const TYPE_ONE_XML = 20;
    // 类型-自定义链接写入sitemap.xml(未有sitemap.xml,新生成)
    const TYPE_DIY_XML = 30;
    // 类型-追加链接写入sitemap.xml(已有sitemap.xml)
    const TYPE_APPEND_XML = 35;
    // 类型-生成整站a链接提交到搜索引擎
    const TYPE_ALL_POST = 40;
    // 类型-生成某个页面a链接提交到搜索引擎
    const TYPE_ONE_POST = 50;
    // 类型-自定义链接提交到搜索引擎
    const TYPE_DIY_POST = 60;
    // 当前类型
    private $type;
    // 传入的URL
    private $url;
    // 错误信息
    private $error;
    // 域名
    private $domain;
    // 要新增到XML的链接
    private $data;
    // xml对象
    private $xml;
    // 参数
    private $options;
    /**
     * 初始化
     * Sitemap constructor.
     * @param $url
     * @param int $type
     */
    public function __construct($url = '',$type = self::TYPE_ALL_XML,$options = [])
    {
        $this->data = [];
        $this->url = $url;
        $this->type = $type;
        $this->domain = $this->checkDomain($url);
        $this->options = array_merge([
            // 页面内容更新频率 可选值:"always"、 "hourly"、 "daily"、 "weekly"、 "monthly"、 "yearly"、 "never"
            'changefreq'=>'weekly',
            // 保存xml路径
            'savename'=>getcwd() . '/sitemap.xml',
            // 提交到百度收录token
            'baidu_token'=>'',
        ],$options);
        $this->init();
    }

    /**
     * 初始化
     */
    private function init()
    {
        switch ($this->type) {
            case self::TYPE_ALL_XML:
            case self::TYPE_ONE_XML:
                $this->newXml();
                $this->make($this->url);
                break;
            case self::TYPE_DIY_XML:
                $this->newXml();
                break;
            case self::TYPE_APPEND_XML:
                $this->readXml();
                break;
            case self::TYPE_ALL_POST:
            case self::TYPE_ONE_POST:
                $this->hrefList($this->url);
                break;
            case self::TYPE_DIY_POST:
                $this->setData($this->url);
                break;
        }
    }

    /**
     * 创建XML对象
     * @return $this
     */
    private function newXml()
    {
        $this->xml = new \SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"></urlset>');
        return $this;
    }

    /**
     * 读取XML文件
     * @return $this
     */
    private function readXml()
    {
        $this->xml = simplexml_load_file($this->options['savename']);
        return $this;
    }
    /**
     * 构建
     * @param $url
     * @return $this
     */
    private function make($url)
    {
        $this->addItem($url,'1.00');
        $priority = [
            '0'=>'1.00',
            '1'=>'0.90',
            '2'=>'0.80'
        ];
        $keys = array_keys($priority);
        $last_key = end($keys);
        $data = $this->hrefList($url);
        foreach ($data as $href) {
            $res = parse_url($href);
            $floor = 0;
            if (isset($res['path']) && $res['path']!= '/') {
                $floor = count(explode('/',mb_substr($res['path'], 0, null, 'UTF-8')));
                $floor = $floor > count($priority) ? $last_key : $floor-1;
            }
            $this->addItem($href,$priority[$floor],$this->options['changefreq']);
        }
        return $this;
    }

    /**
     * 增加URL
     * @param string $loc 必选 页面永久链接地址 示例:http://xxx.xx.com/article/100.html
     * @param string $priority 必选 相对于其他页面的优先权 0-1 数值越大,权重越大,示例:0.97、1.00、0.80
     * @param string $changefreq 可选 页面内容更新频率 可选值:"always"、 "hourly"、 "daily"、 "weekly"、 "monthly"、 "yearly"、 "never"
     * @param string $lastmod 可选 页面最后修改时间 格式:2005-06-04
     */
    public function addItem(string $loc,$priority = '0.80',string $changefreq = '',string $lastmod = '')
    {
        // 添加一个元素并设置属性
        $url = $this->xml->addChild('url');
        $url->addChild('loc',htmlspecialchars($loc));
        $lastmod && $url->addChild('lastmod',$lastmod);
        $changefreq && $url->addChild('changefreq',$changefreq);
        $url->addChild('priority',$priority);
        return $this;
    }

    /**
     * 构建
     * @return $this
     */
    public function build()
    {
        switch ($this->type) {
            case self::TYPE_ALL_XML:
            case self::TYPE_ONE_XML:
            case self::TYPE_DIY_XML:
            case self::TYPE_APPEND_XML:
                $this->buildXml();
                break;
            case self::TYPE_ALL_POST:
            case self::TYPE_ONE_POST:
            case self::TYPE_DIY_POST:
                $this->buildPostSpider();
                break;
            default:
                break;
        }
        return $this;
    }

    /**
     * 构建XML
     * @return bool|string
     */
    private function buildXml()
    {
        // 将XML内容转换为字符串
        $xml_string = $this->xml->asXML();
        // 确保XML字符串不是false或者空字符串
        if ($xml_string) {
            // 指定文件路径
            $savename = $this->options['savename'];
            $dir = dirname($savename);
            if (!is_dir($dir)) {
                mkdir($dir, 0777, true);
            }
            // 将XML内容写入文件
            file_put_contents($savename, $xml_string);
            return $savename;
        } else {
            return $this->setError('无法保存XML,内容为空。');
        }
    }

    /**
     * 构建提交到搜索引擎
     * @return bool
     */
    private function buildPostSpider()
    {
        $urls = $this->getData();
        !empty($this->options['baidu_token']) && $this->postBaidu($urls);
        return true;
    }

    /**
     * 页面内a标签链接
     * @param $url
     * @return array|mixed
     */
    private function hrefList($url)
    {
        $htmlContent = @file_get_contents($url);
        $dom = new \DOMDocument();
        @$dom->loadHTML($htmlContent); // 使用@来忽略HTML中的警告
        $list = $dom->getElementsByTagName('a');
        foreach ($list as $link) {
            $href = $this->trimall($link->getAttribute('href'));
            if ($this->isStart($href,'/')) {
                $href = $this->domain.$href;
            }
            if (empty($href) || !$this->isNext($href)) {
                continue;
            }
            $this->setData($href);
            ($this->type == self::TYPE_ALL_XML || $this->type == self::TYPE_ALL_POST) && $this->hrefList($href);
        }
        return $this->getData();
    }

    /**
     * 判断是否加入到数据中
     * @param $href
     * @return bool
     */
    private function isNext($href)
    {
        if ($href == '/' ||
            $href == $this->domain ||
            $href == $this->domain.'/' ||
            !($this->isStart($href,'/') || $this->isStart($href,$this->domain)) ||
            in_array($href,$this->getData())
        ) {
            return false;
        }
        return true;
    }

    /**
     * 域名检测
     * @param string $domain
     * @return bool|string 返回的协议+域名,注意:域名后面没有带/
     */
    private function checkDomain(string $domain)
    {
        $res = parse_url($domain);
        if (!empty($res['scheme']) && !empty($res['host'])) {
            return $res['scheme'].'://'.$res['host'];
        }
        return $this->setError('无效域名');
    }

    /**
     * 检查字符串中是否包含某些字符串
     * @param string       $str
     * @param string|array $needles
     * @return bool
     */
    public function inStr(string $str, $needles): bool
    {
        foreach ((array) $needles as $needle) {
            if ('' != $needle && mb_strpos($str, $needle) !== false) {
                return true;
            }
        }
        return false;
    }


    /**
     * 检查字符串是否以某些字符串结尾
     *
     * @param  string       $str
     * @param  string|array $needles
     * @return bool
     */
    public function isEnd(string $str, $needles): bool
    {
        foreach ((array) $needles as $needle) {
            if ((string) $needle === mb_substr($str, mb_strlen($needle), null, 'UTF-8')) {
                return true;
            }
        }
        return false;
    }

    /**
     * 检查字符串是否以某些字符串开头
     *
     * @param  string       $str
     * @param  string|array $needles
     * @return bool
     */
    public function isStart(string $str, $needles): bool
    {
        foreach ((array) $needles as $needle) {
            if ('' != $needle && mb_strpos($str, $needle) === 0) {
                return true;
            }
        }
        return false;
    }

    /**
     * 过滤字符串中所有空格
     * @param $str
     * @return array|string|string[]
     */
    public function trimall($str)
    {
        $match = array(" "," ","\t","\n","\r");
        return str_replace($match,'',$str);
    }

    /**
     * 设置异常
     * @param $value
     * @return bool
     */
    private function setError($value)
    {
        $this->error = $value;
        return false;
    }

    /**
     * 获取异常
     * @return mixed
     */
    public function getError()
    {
        return $this->error;
    }

    /**
     * 设置数据
     * @param $value
     * @return bool
     */
    private function setData($value)
    {
       if (is_array($value)) {
            $this->data = array_merge($this->data,$value);
        }
        else {
            array_push($this->data,$value);
        }
        return true;
    }

    /**
     * 获取数据
     * @return array
     */
    public function getData()
    {
        return $this->data;
    }

    /**
     * 提交到百度
     * 百度收录地址:https://ziyuan.baidu.com/linksubmit/index
     * @param $urls
     * @return array
     */
    private function postBaidu($urls)
    {
        $token   = $this->options['baidu_token'] ?? '';
        $url = 'http://data.zz.baidu.com/urls?site=' . urlencode(str_replace(array('http://', 'https://'), '', $this->domain)) . '&token=' . $token;
        return $this->post($url,implode("\n", $urls),[
            'httpheader'=>array('Content-Type: text/plain'),
        ]);
    }

    /**
     * post请求
     * @param $url
     * @param $data
     * @param array $options
     * @return array
     */
    private function post($url,$data,$options = [])
    {
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_POST, true);
        // 文件上传,示例:/path/to/your/file.txt
        if (!empty($options['file_path'])) {
            $field = $options['file_field'] ?? 'file';
            $file = curl_file_create($options['file_path']);
            $data = [$field=>$file];
        }
        // 参数,示例:
        if (!empty($data)) {
            curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
        }
        // 设置超时,单位秒,示例:3
        if (!empty($options['timeout'])) {
            curl_setopt($ch, CURLOPT_TIMEOUT, $options['timeout']);
        }

        // 头部,示例:array('Content-Type: text/plain')
        if (!empty($options['httpheader'])) {
            curl_setopt($ch, CURLOPT_HTTPHEADER, $options['httpheader']);
        }
        // 用户代理,示例:true
        if (!empty($options['useragent'])) {
            curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36");
        }
        // 设置代理,示例:proxy.example.com:8080
        if (!empty($options['proxy'])) {
            curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, true);
            curl_setopt($ch, CURLOPT_PROXY, $options['proxy']);
        }
        // 使用COOKIE,示例:username=John Doe; usertype=member
        if (!empty($options['cookie'])) {
            curl_setopt($ch, CURLOPT_COOKIE, $options['cookie']);
        }
        // 进行HTTP认证,示例:username:password
        if (!empty($options['userpwd'])) {
            curl_setopt($ch, CURLOPT_USERPWD, $options['userpwd']);
        }
        // 开启ssl
        if (!empty($options['ssl'])) {
            // SSL证书文件,示例:/path/to/your/certificate.crt
            if (!empty($options['ssl_cert'])) {
                curl_setopt($ch, CURLOPT_SSLCERT, $options['ssl_cert']);
            }
            // 指SSL证书的私钥,示例:/path/to/your/private_key.pem
            if (!empty($options['ssl_key'])) {
                curl_setopt($ch, CURLOPT_SSLKEY, $options['ssl_key']);
            }
            // 设置为false,关闭SSL证书验证,以禁用对远程服务器的证书验证。由于您使用的是自签名证书,因此服务器证书将无法通过正常的验证过程。
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
            // 设置为false,以禁用对远程服务器主机名的验证。
            curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
        }
        // 设置为true,以便将响应作为字符串返回,而不是直接输出到屏幕上
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        $result = curl_exec($ch);
        $error = curl_error($ch);
        curl_close($ch);
        if ($error) {
            return ['status'=>0,'msg'=>$error,'data'=>null];
        }
        else {
            return ['status'=>1,'msg'=>'success','data'=>$result];
        }
    }
}