class Doc {
public $doc;
protected $sourceHtml;
protected $opt;
protected $entities;
// protected $event;
protected $imageDestination;
public $imageUrlPrefix;
public function __construct($html,array $options=[]){
$this->sourceHtml = $html;
$this->opt = (object)$options;
$this->isHTMLDoc = $this->isHTMLDoc($html);
$this->doc = $this->docFromHTML($html,$this->opt->hideXmlErrors);
$this->entities = $this->loadEntities();
* Return an array of images from the page in the format:
* [index] => [
* 'index'=> ...,
* 'url' => ...,
* 'alt' => ...,
* ]
* scrapes the image tags
* @return array
public function getImages(){
$xPath = new \DOMXpath($this->doc);
$lin = $xPath->query('//img');
$images = [];
foreach ($lin as $index=>$in){
$img = [
$images[$index] = $img;
return $images;
* adds a <link rel="canonical" href=""> to the <head>
* Will prepend the current scheme://DOMAIN if the url lacks them
* @param mixed $url
* @return void
public function putCanonicalUrl(string $url) {
$link = $this->doc->createElement('link');
$link->rel = "canonical";
$link->href = static::fullUrl($url);
$head = $this->queryTagFirst('head');
* Inserts an og:image tag into the <head> of the document, if it is a full html doc
* @param array $img in format ['url'=>absoluteurl, 'alt'=>string]. See getImages()
* @return void
public function putMetaImage(array $img){
$doc = $this->doc;
$head = $this->queryTagFirst('head');
$meta = $doc->createElement('meta');
$meta->property = "og:image";
$meta->content = $img['url'];
* Set the <title>, <meta og:title>, and <meta twitter:title> with the given title
* @param mixed $titleText
* @return void
public function putMetaTitle(string $titleText){
$titleText = trim($titleText);
$head = $this->queryTagFirst('head');
$title = $this->doc->createElement('title');
$title->innerHTML = $titleText;
$ogtitle = $this->doc->createElement('meta');
$ogtitle->property = "og:title";
$ogtitle->content = $titleText;
$tweettitle = $this->doc->createElement('meta');
$tweettitle->name = "twitter:title";
$tweettitle->content = $titleText;
public function putMetaDescription(string $descriptionText){
$descriptionText = trim($descriptionText);
$head = $this->queryTagFirst('head');
$ogdescript = $this->doc->createElement('meta');
$ogdescript->property = "og:description";
$ogdescript->content = $descriptionText;
$descript = $this->doc->createElement('meta');
$descript->name = "description";
$descript->content = $descriptionText;
public function appendText(\DOMNode $parentNode, $text){
public function appendNode(\DOMNode $parentNode,\DOMNode $newNode){
// $parentNode->appendChild($this->doc->createTextNode("\n"));
public function queryTagFirst($tagName){
return $this->xpathFirst('//'.$tagName);
* Execute an xpath query on the document with the given refnode (or from the root if null)
* @param string $xPathStr
* @param DomNode $refNode
* @return array
public function xpath(string $xPathStr,\DomNode $refNode=null): array{
$xPath = new \DOMXpath($this->doc);
$list = $xPath->query($xPathStr,$refNode);
$arr = [];
foreach ($list as $item){
$arr[] = $item;
return $arr;
public function xpathFirst(...$args){
$all = $this->xpath(...$args);
// if (count($all)>0)
return $all[0];
protected function isHTMLDoc($html){
$pos = strpos($html,'<html>');
if ($pos===false)return false;
return true;
// public function setEvent($event){
// $this->event = $event;
// // var_dump($event);
// // exit;
// }
public function autoFill($removeAPIData=true){
foreach ($this->entities as $entity){
public function enableEditMode($popupDialog){
foreach ($this->entities as $entity){
$bodyXPath = new \DOMXpath($this->doc);
$bodies = $bodyXPath->query('//body');
$body = $bodies[0];
if ($body==null)$body = $this->doc->childNodes[0];
// var_dump($body);
// exit;
$body->innerHTML = $body->innerHTML . $popupDialog;
public function submit($data){
$forms = [];
foreach ($this->entities as $entity){
if ($entity instanceof \Doc\FormEntity){
$forms[] = $entity;
if (count($forms)>1)throw new \Exception("We can only submit one form at a time.");
else if (count($forms)==0)throw new \Exception("We don't know what form to submit.");
protected function insertHiddenIdInput($form){
$xPath = new DOMXpath($this->doc);
$inputs = $xPath->query('//input[@name="id"][@type="hidden"]');
if (count($inputs)!=0)return;
$form->innerHTML = "\n".'<input type="hidden" name="id" />'."\n".$form->innerHTML;
protected function loadEntities(){
$xPath = new \DOMXPath($this->doc);
$entities = $xPath->query('//*[@table]');
$list = [];
foreach ($entities as $node){
if (strtolower($node->tagName)=='form'){
$list[] = new \Doc\FormEntity($node,$this,(array)$this->opt);
} else {
$list[] = new \Doc\Entity($node,$this,(array)$this->opt);
return $list;
protected function docFromHTML($html,$hideXmlErrors=true){
$domDoc = new \DomDocument();
// $domDoc->docType = 'html';
// $domDoc->docType->removeAttribute('public');
$domDoc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
// echo $html;
// exit;
$html = '<root>'.$html.'</root>';
// echo $domDoc->saveHTML($domDoc->childNodes[0]);
// exit;
return $domDoc;
protected function default(&$array,$key,$defaultValue){
$array[$key] = $array[$key]
?? $defaultValue;
public function setImagePaths($destinationFolder,$urlPrefix){
$this->imageDestination = $destinationFolder;
$this->imageUrlPrefix = $urlPrefix;
// public function innerBodyHTML(){
// // this was in my old FormsPlus code...
// $output = $this->doc->saveHtml($this->doc->childNodes[1]->childNodes[0]);
// $output = substr($output,strlen('<body>'),-strlen('</body>'));
// return $output;
// }
public function __toString()
$root = $this->doc->saveHtml();
$start = strpos($root,'<root>')+6;
$end = strrpos($root,'</root>');
$length = $end-$start;
$final = substr($root,$start,$length);
// $final = $root;
if ($this->isHTMLDoc){
$final = "<!DOCTYPE html>\n<html>\n{$final}\n</html>";
// $final = substr($root,strlen('<root>'),-strlen('</root>'));
return $final;
static public function decodeDataString($dataStr){
$entries = explode(';',$dataStr);
$data = [];
foreach ($entries as $e){
$parts = explode(':',$e);
$key = $parts[0];
if ($key==null)continue;
$value = $parts[1] ?? null;
$data[$key] = $value;
return $data;
public function uploadFile($file, $destinationFolder=null, $validExts = ['jpg', 'png','pdf'], $maxMB = 15)
if ($destinationFolder==null)$destinationFolder = $this->imageDestination;
if (!is_array($file) || $file == []
|| $file['size'] == 0
|| $file['name'] == ''
|| $file['tmp_name'] == ''
|| !is_int($file['error'])) {
return false;
try {
if (!isset($file['error']) ||
) {
throw new RuntimeException('Invalid parameters.');
switch ($file['error']) {
throw new RuntimeException('No file sent.');
throw new RuntimeException('Exceeded filesize limit.');
throw new RuntimeException('Unknown errors.');
// You should also check filesize here.
if ($file['size'] > ($maxMB * 1024 * 1024)) {
throw new RuntimeException('Exceeded filesize limit.');
$ext = pathinfo($file['name'], PATHINFO_EXTENSION);
if (!in_array($ext, $validExts)) {
// var_dump($ext);
// var_dump($validExts);
// var_dump($file);
// exit;
throw new RuntimeException('Invalid file format.');
if (!file_exists($destinationFolder)) {
mkdir($destinationFolder, 0775, true);
$fileName = sha1_file($file['tmp_name']) . '.' . $ext;
if (!move_uploaded_file(
$destinationFolder . '/' . $fileName
) {
throw new RuntimeException('Failed to move uploaded file.');
return $fileName;
} catch (RuntimeException $e) {
throw $e;
* Return a domain-relative url ('/theurl/') with https?:// prepended
static public function fullUrl($value){
$scheme = $p['scheme'] ?? null;
$host = $p['host'] ?? null;//$_SERVER['HTTP_HOST'];
$scheme = 'http://';
$isSecure = false;
if (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == 'on') {
$isSecure = true;
} elseif (!empty($_SERVER['HTTP_X_FORWARDED_PROTO']) && $_SERVER['HTTP_X_FORWARDED_PROTO'] == 'https' || !empty($_SERVER['HTTP_X_FORWARDED_SSL']) && $_SERVER['HTTP_X_FORWARDED_SSL'] == 'on') {
$isSecure = true;
if ($isSecure)$scheme = 'https://';
$value = $scheme.$_SERVER['HTTP_HOST'].$value;
return $value;