
Convert code or other text into a structured tree (multi-dimensional array).

In This File:

  • create a grammar with directives & handler functions
  • test a grammar
  • a complex directive that builds an AST without php
  • How to write an extension

Usage, Parsing code into an AST

To parse code

  • instantiate the lexer
  • instantiate the grammar(s) to use
  • setup the starting directive(s)
  • setup the root AST.
  • lex it Example:


To Extend the Lexer and process unsupported files, or to process files differently:

  • Create a Grammar class, extending from \Tlf\Lexer\Grammar
  • Write directive tests, starting with very simple ones
  • Write directives for processing
  • Write supporting functions that support your directives

Create a Grammar


/** this is a partial copy of the bash grammar */
class MyGrammar extends \Tlf\Lexer\Grammar {

    protected $my_directives = [
                // ':comment',

                'rewind 2',
                // 'forward 2'

        // an additional 'comment' directive is below

    public function getNamespace(){return 'mygrammar';}

    public function __construct(){
        $this->directives = array_merge(
            // $this->_other_directives,

    public function onLexerStart($lexer,$file,$token){


    public function handleDocblockEnd($lexer, $ast, $token, $directive){
        $block = $token->buffer();
        $clean_input = preg_replace('/^\s*#+/m','',$block);
        $db_grammar = new \Tlf\Lexer\DocblockGrammar();
        $ast = $db_grammar->buildAstWithAttributes(explode("\n",$clean_input));
        $lexer->setPrevious('docblock', $ast);

    public function handleFunction($lexer, $ast, $token, $directive){
        // $func_name = $token->match(1);
        $func = new \Tlf\Lexer\Ast('function');
        $func->name = $token->match(1);
        $func->docblock = $lexer->previous('docblock');
        $lexer->getHead()->add('function', $func);

Test a Grammar


class MyGrammarTest extends extends \Tlf\Lexer\Test\Tester {

    protected $my_tests = [
            // the 'comment' directive is below and can be added to the `MyGrammar` that is above
            'start'=>'comment', // t
            'input'=>"var=\"abc\"\n#I am a comment\nvarb=\"def\"",
                        'src'=>'#I am a comment',
                        'description'=> "I am a comment",

    public function testBashDirectives(){
        $myGrammar = new \MyGrammar();
        $grammars = [
        // $docGram->buildDirectives();

        $this->runDirectiveTests($grammars, $this->my_tests);


A more complex directive

// you would put this in your directives class
$directives = [
            'rewind 2',
            'forward 1',
            // you can create & modify ASTs all in the directive code, without php
            'buffer.clear //again',

        // `match` gets called for each char after `start`
            'match'=>'/@[a-zA-Z0-9]/', // match an @attribute
            'rewind 1',
            'ast.append src',
            'rewind 1 // again',
            'ast.append description',
            'forward 2',
            'then :+'=>[ // the :+ means that we're defining a new directive rather than referencing an existing one
                    //just immediately start
                    'rewind 1',
                    // i honestly don't know why I have this here.
                    'rewind 1',
                    'ast.append src',

            'ast.append src',
            'ast.append description',