Doc.php

<?php

class Doc {

    public $doc;
    protected $sourceHtml;
    protected $opt;
    protected $entities;
    // protected $event;
    protected $imageDestination;
    public $imageUrlPrefix;

    public function __construct($html,array $options=[]){
        $this->sourceHtml = $html;

        $this->default($options,'hideXmlErrors',true);
        $this->opt = (object)$options;
        $this->isHTMLDoc = $this->isHTMLDoc($html);
        $this->doc = $this->docFromHTML($html,$this->opt->hideXmlErrors);
        $this->entities = $this->loadEntities();

    }
    /**
     * Return an array of images from the page in the format:
     * [index] => [
     *              'index'=> ...,
     *              'url'  => ...,
     *              'alt'  => ...,
     *              ]
     *  scrapes the image tags
     * @return array
     */
    public function getImages(){
        $xPath = new \DOMXpath($this->doc);
        $lin = $xPath->query('//img');
        $images = [];
        foreach ($lin as $index=>$in){
            $img = [
                'index'=>$index,
                'url'=>$in->src,
                'alt'=>$in->alt
            ];
            $images[$index] = $img;
        }
        return $images;
    }
    
    /**
     * adds a <link rel="canonical" href="https://full_url.com/relpath"> to the <head>
     * Will prepend the current scheme://DOMAIN if the url lacks them
     *
     * @param  mixed $url
     * @return void
     */
    public function putCanonicalUrl(string $url) {
        $link = $this->doc->createElement('link');
        $link->rel = "canonical";
        $link->href = static::fullUrl($url);
        $head = $this->queryTagFirst('head');
        $this->appendNode($head,$link);
        $this->appendText($head,"\n");
    }
    /**
     * Inserts an og:image tag into the <head> of the document, if it is a full html doc 
     * 
     * 
     * @param  array $img in format ['url'=>absoluteurl, 'alt'=>string]. See getImages()
     * @return void
     */
    public function putMetaImage(array $img){
        $doc = $this->doc;
        $head = $this->queryTagFirst('head');
        $meta = $doc->createElement('meta');
        $meta->property = "og:image";
        $meta->content = $img['url'];
        $this->appendNode($head,$meta);
        $this->appendText($head,"\n");
    }
        
    /**
     * Set the <title>, <meta og:title>, and <meta twitter:title> with the given title 
     *
     * @param  mixed $titleText
     * @return void
     */
    public function putMetaTitle(string $titleText){
        $titleText = trim($titleText);
        $head = $this->queryTagFirst('head');
        $title = $this->doc->createElement('title');
            $title->innerHTML = $titleText;
        $ogtitle = $this->doc->createElement('meta');
            $ogtitle->property = "og:title";
            $ogtitle->content = $titleText;
        $tweettitle = $this->doc->createElement('meta');
            $tweettitle->name = "twitter:title";
            $tweettitle->content = $titleText;
        $this->appendNode($head,$title);
        $this->appendNode($head,$ogtitle);
        $this->appendNode($head,$tweettitle);
        $this->appendText($head,"\n");
    }
    public function putMetaDescription(string $descriptionText){
        $descriptionText = trim($descriptionText);
        $head = $this->queryTagFirst('head');
        $ogdescript = $this->doc->createElement('meta');
            $ogdescript->property = "og:description";
            $ogdescript->content = $descriptionText;
        $descript = $this->doc->createElement('meta');
            $descript->name = "description";
            $descript->content = $descriptionText;
        $this->appendNode($head,$ogdescript);
        $this->appendNode($head,$descript);
        $this->appendText($head,"\n");
    }
    public function appendText(\DOMNode $parentNode, $text){
        $parentNode->appendChild($this->doc->createTextNode($text));
    }

    public function appendNode(\DOMNode $parentNode,\DOMNode $newNode){
        $parentNode->appendChild($this->doc->createTextNode("\n"));
        $parentNode->appendChild($newNode);
        // $parentNode->appendChild($this->doc->createTextNode("\n"));
    }
    
    public function queryTagFirst($tagName){
        return $this->xpathFirst('//'.$tagName);
    }
    /**
     * Execute an xpath query on the document with the given refnode (or from the root if null)
     *
     * @param  string $xPathStr
     * @param  DomNode $refNode
     * @return array
     */
    public function xpath(string $xPathStr,\DomNode $refNode=null): array{
        $xPath = new \DOMXpath($this->doc);
        $list = $xPath->query($xPathStr,$refNode);
        $arr = [];
        foreach ($list as $item){
            $arr[] = $item;
        }
        return $arr;
    }
    public function xpathFirst(...$args){
        $all = $this->xpath(...$args);
        // if (count($all)>0)
        return $all[0];
    }

    protected function isHTMLDoc($html){
        $pos = strpos($html,'<html>');
        if ($pos===false)return false;
        return true;
    }
    // public function setEvent($event){
        // $this->event = $event;
        // // var_dump($event);
        // // exit;
    // }
    public function autoFill($removeAPIData=true){
        foreach ($this->entities as $entity){
            $entity->fillSelf($removeAPIData);
        }
    }
    public function enableEditMode($popupDialog){
        foreach ($this->entities as $entity){
            $entity->enableEditMode($popupDialog);
        }
        $bodyXPath = new \DOMXpath($this->doc);
        $bodies = $bodyXPath->query('//body');
        $body = $bodies[0];
        if ($body==null)$body = $this->doc->childNodes[0];
        // var_dump($body);
        // exit;
        $body->innerHTML = $body->innerHTML . $popupDialog;
    }

    public function submit($data){

        $this->autoFill(false);
        $forms = [];
        foreach ($this->entities as $entity){
            if ($entity instanceof \Doc\FormEntity){
                $forms[] = $entity;
            }
        }
        if (count($forms)>1)throw new \Exception("We can only submit one form at a time.");
        else if (count($forms)==0)throw new \Exception("We don't know what form to submit.");

        $forms[0]->submit($data);

    }


    protected function insertHiddenIdInput($form){
        $xPath = new DOMXpath($this->doc);
        $inputs = $xPath->query('//input[@name="id"][@type="hidden"]');
        if (count($inputs)!=0)return;
        $form->innerHTML = "\n".'<input type="hidden" name="id" />'."\n".$form->innerHTML;
    }
    protected function loadEntities(){
        $xPath = new \DOMXPath($this->doc);
        $entities = $xPath->query('//*[@table]');
        $list = [];

        foreach ($entities as $node){
            if (strtolower($node->tagName)=='form'){
                $list[] = new \Doc\FormEntity($node,$this,(array)$this->opt);
            } else {
                $list[] = new \Doc\Entity($node,$this,(array)$this->opt);
            }
        }
        return $list;
    }

    protected function docFromHTML($html,$hideXmlErrors=true){
        libxml_use_internal_errors($hideXmlErrors);
        $domDoc = new \DomDocument();
        // $domDoc->docType = 'html';
        // $domDoc->docType->removeAttribute('public');
        $domDoc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
        // echo $html;
        // exit;
        $html = '<root>'.$html.'</root>';
        $domDoc->loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
        // echo $domDoc->saveHTML($domDoc->childNodes[0]);
        // exit;
        libxml_use_internal_errors(false);
        return $domDoc;
    }

    protected function default(&$array,$key,$defaultValue){
        $array[$key] = $array[$key] 
            ?? $defaultValue;
    }

    public function setImagePaths($destinationFolder,$urlPrefix){
        $this->imageDestination = $destinationFolder;
        $this->imageUrlPrefix = $urlPrefix;
    }
    // public function innerBodyHTML(){
        // // this was in my old FormsPlus code...
        // $output = $this->doc->saveHtml($this->doc->childNodes[1]->childNodes[0]);
        // $output = substr($output,strlen('<body>'),-strlen('</body>'));
        // return $output;
    // }

    public function __toString()
    {
        $root = $this->doc->saveHtml();
        $start = strpos($root,'<root>')+6;
        $end = strrpos($root,'</root>');
        $length = $end-$start;
        $final = substr($root,$start,$length);
        // $final = $root;
        if ($this->isHTMLDoc){
            $final = "<!DOCTYPE html>\n<html>\n{$final}\n</html>";
        }
        // $final = substr($root,strlen('<root>'),-strlen('</root>'));
        return $final;
    }

    static public function decodeDataString($dataStr){
        $entries = explode(';',$dataStr);
        $data = [];
        foreach ($entries as $e){
            $parts = explode(':',$e);
            $key = $parts[0];
            if ($key==null)continue;
            $value = $parts[1] ?? null;
            $data[$key] = $value;
        }
        return $data;
    }

    public function uploadFile($file, $destinationFolder=null, $validExts = ['jpg', 'png','pdf'], $maxMB = 15)
    {
        if ($destinationFolder==null)$destinationFolder = $this->imageDestination;
        if (!is_array($file) || $file == []
            || $file['size'] == 0
            || $file['name'] == ''
            || $file['tmp_name'] == ''
            || !is_int($file['error'])) {
            return false;
        }

        try {
            if (!isset($file['error']) ||
                is_array($file['error'])
            ) {
                throw new RuntimeException('Invalid parameters.');
            }

            switch ($file['error']) {
                case UPLOAD_ERR_OK:
                    break;
                case UPLOAD_ERR_NO_FILE:
                    throw new RuntimeException('No file sent.');
                case UPLOAD_ERR_INI_SIZE:
                case UPLOAD_ERR_FORM_SIZE:
                    throw new RuntimeException('Exceeded filesize limit.');
                default:
                    throw new RuntimeException('Unknown errors.');
            }

            // You should also check filesize here.
            if ($file['size'] > ($maxMB * 1024 * 1024)) {
                throw new RuntimeException('Exceeded filesize limit.');
            }

            $ext = pathinfo($file['name'], PATHINFO_EXTENSION);
            if (!in_array($ext, $validExts)) {
                // var_dump($ext);
                // var_dump($validExts);
                // var_dump($file);
                // exit;
                throw new RuntimeException('Invalid file format.');
            }

            if (!file_exists($destinationFolder)) {
                mkdir($destinationFolder, 0775, true);
            }

            $fileName = sha1_file($file['tmp_name']) . '.' . $ext;
            if (!move_uploaded_file(
                $file['tmp_name'],
                $destinationFolder . '/' . $fileName
            )
            ) {
                throw new RuntimeException('Failed to move uploaded file.');
            }

            return $fileName;

        } catch (RuntimeException $e) {

            throw $e;

        }
    }

        /**
     * Return a domain-relative url ('/theurl/') with https?://domain.com prepended
     */
    static public function fullUrl($value){
        $scheme = $p['scheme'] ?? null;
        $host = $p['host'] ?? null;//$_SERVER['HTTP_HOST'];
        if(substr($value,0,1)=='/'&&($scheme==null||$host==null)){
            $scheme = 'http://';
            $isSecure = false;
            if (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == 'on') {
                $isSecure = true;
            } elseif (!empty($_SERVER['HTTP_X_FORWARDED_PROTO']) && $_SERVER['HTTP_X_FORWARDED_PROTO'] == 'https' || !empty($_SERVER['HTTP_X_FORWARDED_SSL']) && $_SERVER['HTTP_X_FORWARDED_SSL'] == 'on') {
                $isSecure = true;
            }
            if ($isSecure)$scheme = 'https://';
            $value = $scheme.$_SERVER['HTTP_HOST'].$value;
        }
        return $value;
    }
}