Lexer

Convert code or other text into a structured tree (multi-dimensional array).

In This File:

  • create a grammar with directives & handler functions
  • test a grammar
  • a complex directive that builds an AST without php
  • How to write an extension

Usage, Parsing code into an AST

To parse code

  • instantiate the lexer
  • instantiate the grammar(s) to use
  • setup the starting directive(s)
  • setup the root AST.
  • lex it Example:
@import(Test.Doc.LexPhpString)

Extending

To Extend the Lexer and process unsupported files, or to process files differently:

  • Create a Grammar class, extending from \Tlf\Lexer\Grammar
  • Write directive tests, starting with very simple ones
  • Write directives for processing
  • Write supporting functions that support your directives

Create a Grammar

<?php

/** this is a partial copy of the bash grammar */
class MyGrammar extends \Tlf\Lexer\Grammar {

    protected $my_directives = [
        'root'=>[
            'is'=>[
                // ':comment',
                ':docblock',
                ':function',
            ],
        ],

        'docblock'=>[
            'start'=>[
                'match'=>'##',
            ],
            'stop'=>[
                'match'=>'/(^\s*[^\#])/m',
                'rewind 2',
                'this:handleDocblockEnd',
                'buffer.clear',
                // 'forward 2'
            ]
        ],

        'function'=>[
            'start'=>[
                'match'=>'/(?:function\s+)?([a-zA-Z\_0-9]*)(?:(?:\s*\(\))|\s+)\{/',
                'this:handleFunction',
                'stop',
                'buffer.clear',
            ]
        ],
        // an additional 'comment' directive is below
    ];

    public function getNamespace(){return 'mygrammar';}

    public function __construct(){
        $this->directives = array_merge(
            $this->main_directives,
            // $this->_other_directives,
        );
    }

    public function onLexerStart($lexer,$file,$token){

    }

    public function handleDocblockEnd($lexer, $ast, $token, $directive){
        $block = $token->buffer();
        $clean_input = preg_replace('/^\s*#+/m','',$block);
        $db_grammar = new \Tlf\Lexer\DocblockGrammar();
        $ast = $db_grammar->buildAstWithAttributes(explode("\n",$clean_input));
        $lexer->setPrevious('docblock', $ast);
    }

    public function handleFunction($lexer, $ast, $token, $directive){
        // $func_name = $token->match(1);
        $func = new \Tlf\Lexer\Ast('function');
        $func->name = $token->match(1);
        $func->docblock = $lexer->previous('docblock');
        $lexer->getHead()->add('function', $func);
    }
}

Test a Grammar

<?php

class MyGrammarTest extends extends \Tlf\Lexer\Test\Tester {

    protected $my_tests = [
        'Comments'=>[
            // the 'comment' directive is below and can be added to the `MyGrammar` that is above
            'start'=>'comment', // t
            'input'=>"var=\"abc\"\n#I am a comment\nvarb=\"def\"",
            'expect'=>[
                "comments"=>[
                    0=>[
                        'type'=>'comment',
                        'src'=>'#I am a comment',
                        'description'=> "I am a comment",
                    ]
                ],
            ],
        ],
    ];

    public function testBashDirectives(){
        $myGrammar = new \MyGrammar();
        $grammars = [
            $myGrammar
        ];
        // $docGram->buildDirectives();

        $this->runDirectiveTests($grammars, $this->my_tests);
    }

}

A more complex directive

<?php
// you would put this in your directives class
$directives = [
    'comment'=>[
        'start'=>[
            'match'=>'/#[^\#]/',
            'rewind 2',
            'buffer.clear',
            'forward 1',
            // you can create & modify ASTs all in the directive code, without php
            'ast.new'=>[
                '_addto'=>'comments',
                '_type'=>'comment',
                'src'=>'_token:buffer',
            ],
            'buffer.clear //again',
        ],

        // `match` gets called for each char after `start`
        'match'=>[
            'match'=>'/@[a-zA-Z0-9]/', // match an @attribute
            'rewind 1',
            'ast.append src',
            'rewind 1 // again',
            'ast.append description',
            'forward 2',
            'buffer.clear',
            'then :+'=>[ // the :+ means that we're defining a new directive rather than referencing an existing one
                'start'=>[
                    //just immediately start
                    'match'=>'',
                    'rewind 1',
                ],
                'stop'=>[
                    // i honestly don't know why I have this here.
                    'match'=>'/(\\r|\\n)/',
                    'rewind 1',
                    'ast.append src',
                    'buffer.clear',
                ]
            ],

        ],
        'stop'=>[
            'match'=>'/(\\r|\\n)/',
            'rewind'=>1,
            'ast.append src',
            'ast.append description',
            'forward'=>1,
            'buffer.clear',
        ],
    ]
];