SeoExtractor.php

<?php

namespace Lia\Addon;

class SeoExtractor {

    protected $lia;

    public function __construct($liaison){
        $this->lia = $liaison;
    }

    static public function enable($lia){
        $instance = new static($lia);
        $lia->schedule('RouteResolved',
            [$instance, 'route_resolved']
        );
        $lia->set('tlf:addon.seo-extractor', $instance);
        return $instance;
    }

    public function route_resolved($route, $response){
        $lia = $this->lia;
        $url = $response->request->url();
        if (substr($url,-1)!='/')return;
        if (!$response->useTheme)return;
    
        $seo = $this->get_seo_content($response->content);
        if (isset($seo['title']))$lia->seoTitle($seo['title']);
        if (isset($seo['description']))$lia->seoDescription($seo['description']);

        if (isset($seo['image'])){
            $lia->seoImage($lia->urlWithDomain($seo['image']->src), $seo['image']->alt);
        }
    }

    public function get_seo_content($html){

        $seo = [];

        try {
            $phtml = new \Taeluf\PHTML($html);
            $h1 = $phtml->xpath('//h1')[0] ?? null;
            // $seo['title'] = $h1->textContent;
            foreach ($h1->children??[] as $c){
                if (isset($c->wholeText)){
                    $seo['title'] = $c->wholeText;
                    break;
                }
            }
            if ($h1===null)return;
            $p = $phtml->xpath('//p')[0] ?? null;
            $seo['description'] = $p ? $p->textContent : '';
            $seo['image'] = $phtml->xpath('//img')[0] ?? null;
        } catch (\Exception $e){
            // just leave it be
        }

        return $seo;
    }
    
}