Lexer
Convert code or other text into a structured tree (multi-dimensional array).
In This File:
- create a grammar with directives & handler functions
- test a grammar
- a complex directive that builds an AST without php
- How to write an extension
Usage, Parsing code into an AST
To parse code
- instantiate the lexer
- instantiate the grammar(s) to use
- setup the starting directive(s)
- setup the root AST.
- lex it Example:
@import(Test.Doc.LexPhpString)
Extending
To Extend the Lexer and process unsupported files, or to process files differently:
- Create a Grammar class, extending from
\Tlf\Lexer\Grammar
- Write directive tests, starting with very simple ones
- Write directives for processing
- Write supporting functions that support your directives
Create a Grammar
<?php
/** this is a partial copy of the bash grammar */
class MyGrammar extends \Tlf\Lexer\Grammar {
protected $my_directives = [
'root'=>[
'is'=>[
// ':comment',
':docblock',
':function',
],
],
'docblock'=>[
'start'=>[
'match'=>'##',
],
'stop'=>[
'match'=>'/(^\s*[^\#])/m',
'rewind 2',
'this:handleDocblockEnd',
'buffer.clear',
// 'forward 2'
]
],
'function'=>[
'start'=>[
'match'=>'/(?:function\s+)?([a-zA-Z\_0-9]*)(?:(?:\s*\(\))|\s+)\{/',
'this:handleFunction',
'stop',
'buffer.clear',
]
],
// an additional 'comment' directive is below
];
public function getNamespace(){return 'mygrammar';}
public function __construct(){
$this->directives = array_merge(
$this->main_directives,
// $this->_other_directives,
);
}
public function onLexerStart($lexer,$file,$token){
}
public function handleDocblockEnd($lexer, $ast, $token, $directive){
$block = $token->buffer();
$clean_input = preg_replace('/^\s*#+/m','',$block);
$db_grammar = new \Tlf\Lexer\DocblockGrammar();
$ast = $db_grammar->buildAstWithAttributes(explode("\n",$clean_input));
$lexer->setPrevious('docblock', $ast);
}
public function handleFunction($lexer, $ast, $token, $directive){
// $func_name = $token->match(1);
$func = new \Tlf\Lexer\Ast('function');
$func->name = $token->match(1);
$func->docblock = $lexer->previous('docblock');
$lexer->getHead()->add('function', $func);
}
}
Test a Grammar
<?php
class MyGrammarTest extends extends \Tlf\Lexer\Test\Tester {
protected $my_tests = [
'Comments'=>[
// the 'comment' directive is below and can be added to the `MyGrammar` that is above
'start'=>'comment', // t
'input'=>"var=\"abc\"\n#I am a comment\nvarb=\"def\"",
'expect'=>[
"comments"=>[
0=>[
'type'=>'comment',
'src'=>'#I am a comment',
'description'=> "I am a comment",
]
],
],
],
];
public function testBashDirectives(){
$myGrammar = new \MyGrammar();
$grammars = [
$myGrammar
];
// $docGram->buildDirectives();
$this->runDirectiveTests($grammars, $this->my_tests);
}
}
A more complex directive
<?php
// you would put this in your directives class
$directives = [
'comment'=>[
'start'=>[
'match'=>'/#[^\#]/',
'rewind 2',
'buffer.clear',
'forward 1',
// you can create & modify ASTs all in the directive code, without php
'ast.new'=>[
'_addto'=>'comments',
'_type'=>'comment',
'src'=>'_token:buffer',
],
'buffer.clear //again',
],
// `match` gets called for each char after `start`
'match'=>[
'match'=>'/@[a-zA-Z0-9]/', // match an @attribute
'rewind 1',
'ast.append src',
'rewind 1 // again',
'ast.append description',
'forward 2',
'buffer.clear',
'then :+'=>[ // the :+ means that we're defining a new directive rather than referencing an existing one
'start'=>[
//just immediately start
'match'=>'',
'rewind 1',
],
'stop'=>[
// i honestly don't know why I have this here.
'match'=>'/(\\r|\\n)/',
'rewind 1',
'ast.append src',
'buffer.clear',
]
],
],
'stop'=>[
'match'=>'/(\\r|\\n)/',
'rewind'=>1,
'ast.append src',
'ast.append description',
'forward'=>1,
'buffer.clear',
],
]
];