SitemapBuilder.php
<?php
namespace Phad;
/**
* A simple Sitemap builder that writes to disk immediately so memory use stays low.
*/
class SitemapBuilder {
public $cache_dir;
public $dir;
/** file handles */
public $handles = [];
/** sitemap handlers that modify sitemap data */
public $handlers = [];
/**
* a router object that is used to parse the url patterns
*/
public $router;
/**
* A pdo instance for performing queries
*/
public ?\PDO $pdo;
public bool $throw_on_query_failure = false;
/** the website to prefix all <loc> paths with, such as https://example.com
*/
public string $host = '';
/**
*
* @param $storageDir directory to write your xml files to.
*/
public function __construct($storageDir){
$this->dir = $storageDir;
if (!is_dir($this->dir)){
mkdir($this->dir, 0654);
// throw new \Exception("\nSitemap Dir '$storageDir' does not exist \n");
}
}
/**
* get an existing file handle or open a new one
* @return a file handle for use with fwrite()
*/
protected function handle($name){
if (isset($this->handles[$name]))return $this->handles[$name];
$this->handles[$name] = fopen($this->dir.'/'.$name, 'w+');
//write the opening for a sitemap xml file
$sitemapOpen =
<<<XML
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
XML;
fwrite($this->handles[$name], $sitemapOpen);
return $this->handles[$name];
}
/**
* Close any open file handles
*/
public function close($name){
if (!isset($this->handles[$name]))$name = $name.'.xml';
// var_dump("close: ". $name);
$handle = $this->handles[$name] ?? null;
if ($handle==null)return;
unset($this->handles[$name]);
//write </sitemap>
fwrite($handle, "\n</urlset>");
fclose($handle);
// echo "closed the handle!!";
// exit;
}
/**
* Writes a new entry to the target xml file
* @param $sitemapName the name of the sitemap, with or without .xml
* @param $entry `key=>value` array to write as `<key>value</key>`
*/
public function addEntry($sitemapName, $entry){
if (substr($sitemapName,-4)!='.xml')$sitemapName .= '.xml';
$h = $this->handle($sitemapName);
ksort($entry);
$ident = ' ';
fwrite($h, "\n$ident<url>");
foreach ($entry as $key=>$value){
if ($value==null||trim($value)=='')continue;
if ($key=='last_mod')$key = 'lastmod';
fwrite($h, "\n$ident <$key>$value</$key>");
}
fwrite($h, "\n$ident</url>");
}
/**
* Parse the stored xml file for each url entry
*/
public function get_stored_entries($sitemapName){
if (substr($sitemapName,-4)!='.xml')$sitemapName .= '.xml';
// if (isset($this->handles[$name]))throw new \Exception("There is an ")
$file = $this->dir.'/'.$sitemapName;
if (!is_file($file))throw new \Exception("There is no file '{$file}'. Cannot lookup entries.");
$content = file_get_contents($file);
$doc = new \Taeluf\PHTML($content);
$out = [];
$all = [];
foreach ($doc->xpath('//url') as $entry){
foreach ($entry->children as $cn){
if ($cn->nodeName=='#text')continue;
$out[$cn->nodeName] = trim($cn->innerText);
}
$all[] = $out;
}
return $all;
}
public function __destruct(){
foreach ($this->handles as $name=>$h){
$this->close($name);
}
}
public function build_entries(array $sitemap_data){
// sample sitemap data:
// $sitemap_data =
// [
// 'sql' => 'SELECT slug FROM blog',
// 'filter'=> 'ns:filter_name',
// 'priority' => '0.8',
// 'last_mod' => '1354',
// 'changefreq' => 'daily',
// 'pattern' => '/blog/{slug}',
// ];
$base = array_merge(
['priority'=>null,
'last_mod'=>null,
'changefreq'=>null,
],
$sitemap_data,
);
unset($base['sql']);
unset($base['handler']);
unset($base['pattern']);
$pattern = $sitemap_data['pattern'];
$pattern = $this->host.$pattern;
$parsed = $this->parse_pattern($pattern);
// return one entry if no sql given
if (!isset($sitemap_data['sql'])){
$entry = $base;
$entry['loc'] = $pattern;
$entries = [
$entry
];
return $entries;
}
$results = $this->get_results($sitemap_data['sql']);
$entries = [];
foreach ($results as $r){
$url = $this->fill_pattern($parsed, $r);
$out = [];
// override default $out['priority'] with $result['priority'], last_mod, and change_freq if result is set
foreach ($base as $k=>$s){
$out[$k] = $r[$k] ?? $base[$k];
}
$out['loc'] = $url;
// $entries[] = array_merge($base, ['loc'=>$url]);
$entries[] = $out;
}
return $entries;
}
public function get_results($sql){
$pdo = $this->pdo;
$stmt = $pdo->prepare($sql);
if ($stmt===false){
if ($this->throw_on_query_failure){
print_r($pdo->errorInfo());
throw new \PDOException("Could not prepare query...");
}
return false;
}
$stmt->execute();
return $stmt->fetchAll(\PDO::FETCH_ASSOC);
}
public function parse_pattern($pattern){
$router = $this->router;
$parsed = $router->decode_pattern($pattern);
return $parsed;
}
public function fill_pattern($parsed, array $values){
$out = $parsed['pattern'];
foreach ($parsed['params'] as $p){
$out = str_replace('{'.$p.'}', $values[$p], $out);
}
return $out;
}
/**
*
* @param $sitemap_data_list array from @see(get_sitemap_list())
* @param $sitemap_name
* @return string sitemap file name (like sitemap.xml)
*/
public function make_sitemap(array $sitemap_data_list, $sitemap_name='sitemap'){
// $sitemaps = [];
foreach ($sitemap_data_list as $sitemap_data){
$entries = $this->build_entries($sitemap_data);
foreach($entries as $e){
$this->addEntry($sitemap_name, $e);
}
// $sitemaps[] = $sitemap_name.'.xml';
}
$this->close($sitemap_name);
// return $sitemaps;
return $sitemap_name.'.xml';
}
/**
* @param $sitemap_data_list array from @see(get_sitemap_list())
* @return array of sitemap entries
*/
public function get_sitemap_as_array(array $sitemap_data_list){
$all_entries = [];
foreach ($sitemap_data_list as $sitemap_data){
$entries = $this->build_entries($sitemap_data);
foreach ($entries as $e){
$all_entries[] = $e;
}
}
return $all_entries;
}
/**
* @param $items @see(Phad::get_all_items())
* @param $phad a phad instance
* @return an array to pass to @see(this::make_sitemap())
*/
public function get_sitemap_list($items, $phad){
$sm_list = [];
foreach ($items as $v){
$item = $phad->item($v,[]);
$sm = $item->sitemap_data();
if (!is_array($sm))continue;
$sm_list = array_merge($sm_list, array_values($sm));
}
return $sm_list;
}
// public function buildSitemap(array $sitemapData, $sitemapName = 'sitemap.xml'){
// $sitemapBuilder = new \Phad\SitemapBuilder($this->cache_dir);
//
// $defaults = [
// 'priority'=>null,
// 'last_mod'=>null,
// 'changefreq'=>null,
// ];
//
// foreach ($sitemapData as $s){
// $entry = [];
// $pdo = $this->pdo;
// $stmt = $pdo->prepare($s['sql']);
// if ($stmt===false){
// if ($this->throw_on_query_failure){
// print_r($pdo->errorInfo());
// throw new \PDOException("Could not prepare query...");
// }
// return false;
// }
// $res = $stmt->execute();
// $pattern = $s['pattern'];
// $parsed = $this->router->decode_pattern($pattern);
// $params = $parsed['params'];
// $handler = null;
// if (isset($s['handler'])){
// $handler = $this->handlers[$s['handler']]??null;
// if ($handler==null){
// $handlerName = $s['handler'];
// throw new \Exception("There was no handler found for '$handlerName'");
// }
// }
// while ($row = $stmt->fetch(\PDO::FETCH_ASSOC)){
// $ownPattern = $pattern;
// foreach ($params as $p){
// $ownPattern = str_replace('{'.$p.'}', $row[$p], $pattern);
// }
// $entry['loc'] = $ownPattern;
//
// //for each `sm_` column, just directly copy it to array
// foreach ($row as $col=>$val){
// if (substr($col,0,3)=='sm_'){
// $entry[substr($col,3)] = $val;
// }
// }
//
// if ($handler!=null){
// $entry = $handler($entry, $row, $s);
// if ($entry===false)continue;
// else if (!is_array($entry)){
// $handlerName = $s['handler'];
// throw new \Exception("Your handler '$handlerName' MUST return boolean false or an array");
// }
// }
//
// foreach ($defaults as $k=>$value){
// if (isset($entry[$k]))continue;
// $value = $value ?? $s[$k] ?? null;
// if ($value==null)continue;
// $entry[$k] = $value;
// }
//
// $sitemapBuilder->addEntry($sitemapName, $entry);
// }
// }
// //do stuff
//
// return $sitemapBuilder;
// }
}