Grammar.php

<?php

namespace Tlf\Lexer;

class Grammar {

    protected $blankCount = 0;

    /** The actual array of directives, built during onGrammarAdded() */
    public $directives = [];

    /**
     * the lexer currently running
     */
    protected \Tlf\Lexer $lexer;

    public function lexer_started($lexer, $ast, $token){
        $this->lexer = $lexer;
    }
    public function setLexer($lexer){
        $this->lexer = $lexer;
    }

    /**
     * Get a namespace prefix to use for specifying what directive to target
     * @return the class name, but lowercase, no namespace
     * @override
     */
    public function getNamespace(){
        $class = get_class($this);
        $parts = explode('\\',$class);
        $class = array_pop($parts);
        return strtolower($class);
    }

    /**
     * Get the class to use for any new ASTs where the directive does not specify.
     * @return a string class name. 
     *
     * @override to change from `\Tlf\Lexer\Ast`
     */
    public function getAstClass(): string {
        $astClass = \Tlf\Lexer\Ast::class;
        return $astClass;
    }

    /**
     * Get an array of directives from the list of directive names
     *
     * @return a key=>value array of directives.
     */
    public function getDirectives($directiveName, array $overrides=[]){

        if ($directiveName[0]!=':'){
            $parts = explode(':',$directiveName);
            if (count($parts)!=2){
                throw new \Exception("Directive '$directiveName' must contain one colon like `grammar:directive`. leave grammar blank for current grammar.");
            }
            $grammar = $this->lexer->getGrammar($parts[0]);
            $directiveName = ':'.$parts[1];
            if ($grammar!==$this){
                return $grammar->getDirectives($directiveName);
            }
        }


        // echo "\n\n---\n";
        // var_dump($directiveName);
        // var_dump($overrides);
        // echo "\n\n";
        // exit;
        //

        if (substr($directiveName,0,1)!==':'){
            echo "\n\n'$directiveName' needs to start with a colon...\n\n";
            throw new \Exception("So fix it...");
        }

        $overrides = $this->normalizeDirective($overrides);

        $directiveName = substr($directiveName, 1);
        $sourceDirective = $this->directives[$directiveName] ?? $this->getDotDirective($directiveName) ?? null;

        if ($sourceDirective===null){
            if (substr($directiveName,0,6)=='_blank'){
                $sourceDirective = [];
                if (strlen($directiveName)==6){
                    $directiveName = $directiveName .'-'.$this->blankCount++;
                }
            } else if ($directiveName[0]=='+'){
                $sourceDirective = [];
                $directiveName = $directiveName .'-'.$this->blankCount++;
            }
        } 
        if ($sourceDirective===null){
            throw new \Exception("Directive '$directiveName' not available on '".get_class($this)."'");
        }

        $sourceDirective = $this->normalizeDirective($sourceDirective);
        $overriddenDirective = $this->getOverriddenDirective($overrides, $sourceDirective);
        $overriddenDirective->_name = $directiveName;
        $overriddenDirective->_grammar = $this;

        if (!isset($overriddenDirective->is))return [$directiveName=>$overriddenDirective];


        return $this->expandDirectiveWithIs($overriddenDirective, (array)$overrides);
    }

    /** 
     *
     * Get multiple directives this one is pointing to.
     *
     * @roadmap(current) Version 1: Load all the targets as source directives, then process overrides with $directive as the overrides
     * @roadmap(next) Version 2: Load all the targets as source directives, using their own array values as overrides over the source, then process that as the source and $directive as the override
     * @roadmap(future, maybe) Version 3: The parent directive (who defines the 'is')... It's key/values are treated as overrides. The 'is' targets are loaded as source directives, and the parent key=>values (except 'is') are applied as the overrides to make a new source directive. Then the 'is' targets own array values are aplied as the overrides to make another new source directive. Then $directive is applied on each to make ANOTHER new source directive. These ones are returned.
     *
     *
     */
    public function expandDirectiveWithIs(object $directive, $overridesDirective = []){
        $isDirectiveName = $directive->_name;
        $d = $directive;

        $maxCount = 1;
        if (isset($directive->_name))$maxCount++;
        if (isset($directive->_grammar))$maxCount++;
        $out = [];
        // if (count((array)$d)>$maxCount){
            // // print_r($d->stop);
            // // print_r($overridesDirective);
            // throw new \Exception("'$isDirectiveName' cannot be processed because it has instructions other than 'is'");
        // }
        foreach ($d->is as $key=>$override){
            if (count($override)>0){
                throw new \Exception("We don't yet process overrides on 'is' entries.");
            }

            $subDirectives = $this->getDirectives($key, (array)$overridesDirective);
            foreach ($subDirectives as $directiveToAdd){
                $out[$directiveToAdd->_name] = $directiveToAdd;
            }
        }

        return $out;
    }

    /**
     *
     * - Inheritance rules:
     *   - if raw `match` directive is first in src directive, then add it first
     *   - then add instructions from the child directive, in declared order
     *   - then add all other instructions from the source directive, in their declared order.
     *   - If child directive contains any keys found in source directive, then do not copy the value from the source directive. The child simply overwrites (but in the child's declared order)
     *   
     * @param $newDirective The new directive / overrides, but not yet filled by the source
     * @param $sourceDirective the original directive
     */
    public function getOverriddenDirective(object $overridesDirective, object $sourceDirective){
        // $source = (array)$sourceDirective;
        $newDirective = [];

        foreach ($sourceDirective as $isn=>$instructionList){
            $firstInstruction = array_slice($sourceDirective->$isn,0,1);
            if (isset($firstInstruction['match'])
                &&!isset($overridesDirective->$isn['match'])
            ){
                $newDirective[$isn]['match'] = $firstInstruction['match'];
            }
        }

        foreach ($overridesDirective as $isn=>$instructionList){
            if ($isn[0]=='_')continue;
            if (!isset($newDirective[$isn]))$newDirective[$isn] = [];
            $newDirective[$isn] = $newDirective[$isn] + $instructionList;
        }

        foreach ($sourceDirective as $isn=>$instructionList){
            foreach ($instructionList as $instruction=>$value){
                if (!isset($newDirective[$isn][$instruction]))$newDirective[$isn][$instruction] = $value;
            }
        }

        return (object)$newDirective;
    }

    /**
     * - Arrayify instructions that need to be arrayified.
     * - Convert bool value-keys like 'buffer.clear' to 'buffer.clear'=>true
     */
    public function normalizeDirective($d){
        if (!is_object($d))$d = (object)$d;
        $instructionSetNames = ['start', 'match', 'stop'];
        $arrayify = [
            'match',
        ];
        foreach ($instructionSetNames as $isn){
            if (!isset($d->$isn))continue;
            $in = $d->$isn;
            $out = [];
            if (!is_array($in))$in = ['match'=>$in];
            foreach ($in as $key=>$value){
                if (is_int($key)){
                    if (substr($value,0,5)=='then '){
                        $out[$value] = [];
                    } else {
                        $out[$value] = true;
                    }
                    continue;
                }
                // convert values to array
                if (in_array($key, $arrayify)&&!is_array($value)){
                    $out[$key] = [$value];
                    continue;
                } 

                $out[$key] = $value;
            }
            $d->$isn = $out;
        }

        $alternateSetAutoValues = [
            'is'=>[],
        ];

        foreach ($alternateSetAutoValues as $setName=>$autoValue){
            if (!isset($d->$setName))continue;
            foreach ($d->$setName as $key=>$value){
                if (is_int($key)){
                    $newKey = $value;
                    $newValue = $autoValue;
                } else {
                    $newKey = $key;
                    $newValue = $value;
                }
                unset($d->$setName[$key]);
                $d->$setName[$newKey] = $newValue;
            }
        }


        return $d;
    }

    /**
     * Get a directive that has a dot-form like `:string.stop` to get a new directive who's `start` is `:string`'s `stop`
     * @return a single directive
     */
    protected function getDotDirective($key){
        $parts = explode('.',$key);
        if (count($parts)<=1)return null;
        if (count($parts)>2){
            throw new \Exception("We haven't implementented nested directive names, so '$key' won't work yet.");
        }

        $directiveName = $parts[0];
        $target = $parts[1];

        $directive = $this->directives[$parts[0]] ?? null;
        if ($target=='stop'){
            $newDirective = [
                'start'=>$directive['stop']??null,
                'onStart'=>$directive['onStop']??[],
            ];
            return $newDirective;
        } 

        throw new \Exception("We don't have special handling for '$target' yet. Caused by directive '$directiveName'");
    }

    /**
     * Called before any characters are added to the token
     *
     * @usage Set placeholder values
     * @param Tlf\Lexer $lexer the lexer
     * @param Tlf\Lexer\Ast $ast The root ast. Usually a 'file' type ast with 'ext', 'name', and 'path' set
     * @param Tlf\Lexer\Token $token a token with an empty buffer
     *
     * @export(DocBlock.onLexerStart)
     */
    public function onLexerStart(\Tlf\Lexer $lexer,\Tlf\Lexer\Ast $ast,\Tlf\Lexer\Token $token){

    }
    public function onLexerEnd(\Tlf\Lexer $lexer,\Tlf\Lexer\Ast $ast, ?\Tlf\Lexer\Token $token){
    }
    public function onGrammarAdded(\Tlf\Lexer $lexer){
    }

}