mangadex/scripts/JBBCode/Parser.php

<?php

namespace JBBCode;

require_once 'ElementNode.php';
require_once 'TextNode.php';
require_once 'DefaultCodeDefinitionSet.php';
require_once 'DocumentElement.php';
require_once 'CodeDefinition.php';
require_once 'CodeDefinitionBuilder.php';
require_once 'CodeDefinitionSet.php';
require_once 'NodeVisitor.php';
require_once 'ParserException.php';
require_once 'Tokenizer.php';
require_once 'visitors/NestLimitVisitor.php';
require_once 'InputValidator.php';

use JBBCode\CodeDefinition;

/**
 * BBCodeParser is the main parser class that constructs and stores the parse tree. Through this class
 * new bbcode definitions can be added, and documents may be parsed and converted to html/bbcode/plaintext, etc.
 *
 * @author jbowens
 */
class Parser
{

    const OPTION_STATE_DEFAULT = 0;
    const OPTION_STATE_TAGNAME = 1;
    const OPTION_STATE_KEY = 2;
    const OPTION_STATE_VALUE = 3;
    const OPTION_STATE_QUOTED_VALUE = 4;
    const OPTION_STATE_JAVASCRIPT = 5;

    /* The root element of the parse tree */
    protected $treeRoot;

    /* The list of bbcodes to be used by the parser. */
    protected $bbcodes;

    /* The next node id to use. This is used while parsing. */
    protected $nextNodeid;

    /**
     * Constructs an instance of the BBCode parser
     */
    public function __construct()
    {
        $this->reset();
        $this->bbcodes = array();
    }

    /**
     * Adds a simple (text-replacement only) bbcode definition
     *
     * @param string  $tagName      the tag name of the code (for example the b in [b])
     * @param string  $replace      the html to use, with {param} and optionally {option} for replacements
     * @param boolean $useOption    whether or not this bbcode uses the secondary {option} replacement
     * @param boolean $parseContent whether or not to parse the content within these elements
     * @param integer $nestLimit    an optional limit of the number of elements of this kind that can be nested within
     *                              each other before the parser stops parsing them.
     * @param InputValidator $optionValidator   the validator to run {option} through
     * @param BodyValidator  $bodyValidator     the validator to run {param} through (only used if $parseContent == false)
     *
     * @return Parser
     */
    public function addBBCode($tagName, $replace, $useOption = false, $parseContent = true, $nestLimit = -1,
                              InputValidator $optionValidator = null, InputValidator $bodyValidator = null)
    {
        $builder = new CodeDefinitionBuilder($tagName, $replace);

        $builder->setUseOption($useOption);
        $builder->setParseContent($parseContent);
        $builder->setNestLimit($nestLimit);

        if ($optionValidator) {
            $builder->setOptionValidator($optionValidator);
        }

        if ($bodyValidator) {
            $builder->setBodyValidator($bodyValidator);
        }

        $this->addCodeDefinition($builder->build());

        return $this;
    }

    /**
     * Adds a complex bbcode definition. You may subclass the CodeDefinition class, instantiate a definition of your new
     * class and add it to the parser through this method.
     *
     * @param CodeDefinition $definition the bbcode definition to add
     *
     * @return Parser
     */
    public function addCodeDefinition(CodeDefinition $definition)
    {
        array_push($this->bbcodes, $definition);

        return $this;
    }

    /**
     * Adds a set of CodeDefinitions.
     *
     * @param CodeDefinitionSet $set  the set of definitions to add
     *
     * @return Parser
     */
    public function addCodeDefinitionSet(CodeDefinitionSet $set) {
        foreach ($set->getCodeDefinitions() as $def) {
            $this->addCodeDefinition($def);
        }

        return $this;
    }

    /**
     * Returns the entire parse tree as text. Only {param} content is returned. BBCode markup will be ignored.
     *
     * @return string a text representation of the parse tree
     */
    public function getAsText()
    {
        return $this->treeRoot->getAsText();
    }

    /**
     * Returns the entire parse tree as bbcode. This will be identical to the inputted string, except unclosed tags
     * will be closed.
     *
     * @return string a bbcode representation of the parse tree
     */
    public function getAsBBCode()
    {
        return $this->treeRoot->getAsBBCode();
    }

    /**
     * Returns the entire parse tree as HTML. All BBCode replacements will be made. This is generally the method
     * you will want to use to retrieve the parsed bbcode.
     *
     * @return string a parsed html string
     */
    public function getAsHTML()
    {
        return $this->treeRoot->getAsHTML();
    }

    /**
     * Accepts the given NodeVisitor at the root.
     *
     * @param NodeVisitor  a NodeVisitor
     *
     * @return Parser
     */
    public function accept(NodeVisitor $nodeVisitor)
    {
        $this->treeRoot->accept($nodeVisitor);

        return $this;
    }
    /**
     * Constructs the parse tree from a string of bbcode markup.
     *
     * @param string $str the bbcode markup to parse
     *
     * @return Parser
     */
    public function parse($str)
    {
        /* Set the tree root back to a fresh DocumentElement. */
        $this->reset();

        $parent = $this->treeRoot;
        $tokenizer = new Tokenizer($str);

        while ($tokenizer->hasNext()) {
            $parent = $this->parseStartState($parent, $tokenizer);
            if ($parent->getCodeDefinition() && false ===
                $parent->getCodeDefinition()->parseContent()) {
                /* We're inside an element that does not allow its contents to be parseable. */
                $this->parseAsTextUntilClose($parent, $tokenizer);
                $parent = $parent->getParent();
            }
        }

        /* We parsed ignoring nest limits. Do an O(n) traversal to remove any elements that
         * are nested beyond their CodeDefinition's nest limit. */
        $this->removeOverNestedElements();

        return $this;
    }

    /**
     * Removes any elements that are nested beyond their nest limit from the parse tree. This
     * method is now deprecated. In a future release its access privileges will be made
     * protected.
     *
     * @deprecated
     */
    public function removeOverNestedElements()
    {
        $nestLimitVisitor = new \JBBCode\visitors\NestLimitVisitor();
        $this->accept($nestLimitVisitor);
    }

    /**
     * Removes the old parse tree if one exists.
     */
    protected function reset()
    {
        // remove any old tree information
        $this->treeRoot = new DocumentElement();
        /* The document element is created with nodeid 0. */
        $this->nextNodeid = 1;
    }

    /**
     * Determines whether a bbcode exists based on its tag name and whether or not it uses an option
     *
     * @param string  $tagName    the bbcode tag name to check
     * @param boolean $usesOption whether or not the bbcode accepts an option
     *
     * @return bool true if the code exists, false otherwise
     */
    public function codeExists($tagName, $usesOption = false)
    {
        foreach ($this->bbcodes as $code) {
            if (strtolower($tagName) == $code->getTagName() && $usesOption == $code->usesOption()) {
                return true;
            }
        }

        return false;
    }

    /**
     * Returns the CodeDefinition of a bbcode with the matching tag name and usesOption parameter
     *
     * @param string  $tagName    the tag name of the bbcode being searched for
     * @param boolean $usesOption whether or not the bbcode accepts an option
     *
     * @return CodeDefinition if the bbcode exists, null otherwise
     */
    public function getCode($tagName, $usesOption = false)
    {
        foreach ($this->bbcodes as $code) {
            if (strtolower($tagName) == $code->getTagName() && $code->usesOption() == $usesOption) {
                return $code;
            }
        }

        return null;
    }

    /**
     * Adds a set of default, standard bbcode definitions commonly used across the web.
     *
     * This method is now deprecated. Please use DefaultCodeDefinitionSet and
     * addCodeDefinitionSet() instead.
     *
     * @deprecated
     */
    public function loadDefaultCodes()
    {
        $defaultSet = new DefaultCodeDefinitionSet();
        $this->addCodeDefinitionSet($defaultSet);
    }

    /**
     * Creates a new text node with the given parent and text string.
     *
     * @param $parent  the parent of the text node
     * @param $string  the text of the text node
     *
     * @return TextNode the newly created TextNode
     */
    protected function createTextNode(ElementNode $parent, $string)
    {
        if (count($parent->getChildren())) {
            $children = $parent->getChildren();
            $lastElement = end($children);
            reset($children);

            if ($lastElement->isTextNode()) {
                $lastElement->setValue($lastElement->getValue() . $string);
                return $lastElement;
            }
        }

        $textNode = new TextNode($string);
        $textNode->setNodeId(++$this->nextNodeid);
        $parent->addChild($textNode);
        return $textNode;
    }

    /**
     * jBBCode parsing logic is loosely modelled after a FSM. While not every function maps
     * to a unique DFSM state, each function handles the logic of one or more FSM states.
     * This function handles the beginning parse state when we're not currently in a tag
     * name.
     *
     * @param ElementNode  $parent the current parent node we're under
     * @param Tokenizer    $tokenizer the tokenizer we're using
     *
     * @return ElementNode the new parent we should use for the next iteration.
     */
    protected function parseStartState(ElementNode $parent, Tokenizer $tokenizer)
    {
        $next = $tokenizer->next();

        if ('[' == $next) {
            return $this->parseTagOpen($parent, $tokenizer);
        }
        else {
            $this->createTextNode($parent, $next);
            /* Drop back into the main parse loop which will call this
             * same method again. */
            return $parent;
        }
    }

    /**
     * This function handles parsing the beginnings of an open tag. When we see a [
     * at an appropriate time, this function is entered.
     *
     * @param ElementNode $parent  the current parent node
     * @param Tokenizer   $tokenizer  the tokenizer we're using
     *
     * @return ElementNode the new parent node
     */
    protected function parseTagOpen(ElementNode $parent, Tokenizer $tokenizer)
    {

        if (!$tokenizer->hasNext()) {
            /* The [ that sent us to this state was just a trailing [, not the
             * opening for a new tag. Treat it as such. */
            $this->createTextNode($parent, '[');
            return $parent;
        }

        $next = $tokenizer->next();

        /* This while loop could be replaced by a recursive call to this same method,
         * which would likely be a lot clearer but I decided to use a while loop to
         * prevent stack overflow with a string like [[[[[[[[[...[[[.
         */
        while ('[' == $next) {
            /* The previous [ was just a random bracket that should be treated as text.
             * Continue until we get a non open bracket. */
            $this->createTextNode($parent, '[');
            if (!$tokenizer->hasNext()) {
                $this->createTextNode($parent, '[');
                return $parent;
            }
            $next = $tokenizer->next();
        }

        if (!$tokenizer->hasNext()) {
            $this->createTextNode($parent, '['.$next);
            return $parent;
        }

        $after_next = $tokenizer->next();
        $tokenizer->stepBack();

        if ($after_next != ']')
        {
            $this->createTextNode($parent, '['.$next);
            return $parent;
        }

        /* At this point $next is either ']' or plain text. */
        if (']' == $next) {
            $this->createTextNode($parent, '[');
            $this->createTextNode($parent, ']');
            return $parent;
        } else {
            /* $next is plain text... likely a tag name. */
            return $this->parseTag($parent, $tokenizer, $next);
        }
    }

    protected function parseOptions($tagContent)
    {
        $buffer = "";
        $tagName = "";
        $state = static::OPTION_STATE_TAGNAME;
        $keys = array();
        $values = array();
        $options = array();

        $len = strlen($tagContent);
        $done = false;
        $idx = 0;

        try{
            while(!$done){
                $char = $idx < $len ? $tagContent[$idx]:null;
                switch($state){
                    case static::OPTION_STATE_TAGNAME:
                        switch($char){
                            case '=':
                                $state = static::OPTION_STATE_VALUE;
                                $tagName = $buffer;
                                $keys[] = $tagName;
                                $buffer = "";
                                break;
                            case ' ':
                                $state = static::OPTION_STATE_DEFAULT;
                                $tagName = $buffer;
                                $buffer = '';
                                $keys[] = $tagName;
                                break;

                            case null:
                                $tagName = $buffer;
                                $buffer = '';
                                $keys[] = $tagName;
                                break;
                            default:
                                $buffer .= $char;
                        }
                        break;

                    case static::OPTION_STATE_DEFAULT:
                        switch($char){
                            case ' ':
                                // do nothing
                            default:
                                $state = static::OPTION_STATE_KEY;
                                $buffer .= $char;
                        }
                        break;

                    case static::OPTION_STATE_VALUE:
                        switch($char){
                            case '"':
                                $state = static::OPTION_STATE_QUOTED_VALUE;
                                break;
                            case null: // intentional fall-through
                            case ' ': // key=value<space> delimits to next key
                                $values[] = $buffer;
                                $buffer = "";
                                $state = static::OPTION_STATE_KEY;
                                break;
                            case ":":
                                if($buffer=="javascript"){
                                    $state = static::OPTION_STATE_JAVASCRIPT;
                                }
                                $buffer .= $char;
                                break;
                            default:
                                $buffer .= $char;

                        }
                        break;

                    case static::OPTION_STATE_JAVASCRIPT:
                        switch($char){
                            case ";":
                                $buffer .= $char;
                                $values[] = $buffer;
                                $buffer = "";
                                $state = static::OPTION_STATE_KEY;

                                break;
                            default:
                                $buffer .= $char;
                        }
                        break;

                    case static::OPTION_STATE_KEY:
                        switch($char){
                            case '=':
                                $state = static::OPTION_STATE_VALUE;
                                $keys[] = $buffer;
                                $buffer = '';
                                break;
                            case ' ': // ignore <space>key=value
                                break;
                            default:
                                $buffer .= $char;
                                break;
                        }
                        break;

                    case static::OPTION_STATE_QUOTED_VALUE:
                        switch($char){
                            case null:
                            case '"':
                                $state = static::OPTION_STATE_KEY;
                                $values[] = $buffer;
                                $buffer = '';

                                // peek ahead. If the next character is not a space or a closing brace, we have a bad tag and need to abort
                                if(isset($tagContent[$idx+1]) && $tagContent[$idx+1]!=" " && $tagContent[$idx+1]!="]" ){
                                    throw new ParserException("Badly formed attribute: $tagContent");
                                }
                                break;
                            default:
                                $buffer .= $char;
                                break;
                        }
                        break;
                    default:
                        if(!empty($char)){
                            $state = static::OPTION_STATE_KEY;
                        }

                }
                if($idx >= $len){
                    $done = true;
                }
                $idx++;
            }

            if(count($keys) && count($values)){
                if(count($keys)==(count($values)+1)){
                    array_unshift($values, "");
                }

                $options = array_combine($keys, $values);
            }
        }
        catch(ParserException $e){
            // if we're in this state, then something evidently went wrong. We'll consider everything that came after the tagname to be the attribute for that keyname
            $options[$tagName]= substr($tagContent, strpos($tagContent, "=")+1);
        }
        return array($tagName, $options);
    }

    /**
     * This is the next step in parsing a tag. It's possible for it to still be invalid at this
     * point but many of the basic invalid tag name conditions have already been handled.
     *
     * @param ElementNode $parent  the current parent element
     * @param Tokenizer   $tokenizer  the tokenizer we're using
     * @param string      $tagContent  the text between the [ and the ], assuming there is actually a ]
     *
     * @return ElementNode the new parent element
     */
    protected function parseTag(ElementNode $parent, Tokenizer $tokenizer, $tagContent)
    {

        $next;
        if (!$tokenizer->hasNext() || ($next = $tokenizer->next()) != ']') {
            /* This is a malformed tag. Both the previous [ and the tagContent
             * is really just plain text. */
            $this->createTextNode($parent, '[');
            $this->createTextNode($parent, $tagContent);
            return $parent;
        }

        /* This is a well-formed tag consisting of [something] or [/something], but
         * we still need to ensure that 'something' is a valid tag name. Additionally,
         * if it's a closing tag, we need to ensure that there was a previous matching
         * opening tag.
         */
        /* There could be attributes. */
        list($tmpTagName, $options) = $this->parseOptions($tagContent);

        // $tagPieces = explode('=', $tagContent);
        // $tmpTagName = $tagPieces[0];

        $actualTagName;
        if ('' != $tmpTagName && '/' == $tmpTagName[0]) {
            /* This is a closing tag name. */
            $actualTagName = substr($tmpTagName, 1);
        } else {
            $actualTagName = $tmpTagName;
        }

        if ('' != $tmpTagName && '/' == $tmpTagName[0]) {
            /* This is attempting to close an open tag. We must verify that there exists an
             * open tag of the same type and that there is no option (options on closing
             * tags don't make any sense). */
            $elToClose = $parent->closestParentOfType($actualTagName);
            if (null == $elToClose || count($options) > 1) {
                /* Closing an unopened tag or has an option. Treat everything as plain text. */
                $this->createTextNode($parent, '[');
                $this->createTextNode($parent, $tagContent);
                $this->createTextNode($parent, ']');
                return $parent;
            } else {
                /* We're closing $elToClose. In order to do that, we just need to return
                 * $elToClose's parent, since that will change our effective parent to be
                 * elToClose's parent. */
                return $elToClose->getParent();
            }
        }

        /* Verify that this is a known bbcode tag name. */
        if ('' == $actualTagName || !$this->codeExists($actualTagName, !empty($options))) {
            /* This is an invalid tag name! Treat everything we've seen as plain text. */
            $this->createTextNode($parent, '[');
            $this->createTextNode($parent, $tagContent);
            $this->createTextNode($parent, ']');
            return $parent;
        }

        /* If we're here, this is a valid opening tag. Let's make a new node for it. */
        $el = new ElementNode();
        $el->setNodeId(++$this->nextNodeid);
        $code = $this->getCode($actualTagName, !empty($options));
        $el->setCodeDefinition($code);
        if (!empty($options)) {
            /* We have an attribute we should save. */
            $el->setAttribute($options);
        }
        $parent->addChild($el);
        return $el;
    }

    /**
     * Handles parsing elements whose CodeDefinitions disable parsing of element
     * contents. This function uses a rolling window of 3 tokens until it finds the
     * appropriate closing tag or reaches the end of the token stream.
     *
     * @param ElementNode $parent  the current parent element
     * @param Tokenizer   $tokenizer  the tokenizer we're using
     *
     * @return ElementNode the new parent element
     */
    protected function parseAsTextUntilClose(ElementNode $parent, Tokenizer $tokenizer)
    {
        /* $parent's code definition doesn't allow its contents to be parsed. Here we use
         * a sliding window of three tokens until we find [ /tagname ], signifying the
         * end of the parent. */
        if (!$tokenizer->hasNext()) {
            return $parent;
        }
        $prevPrev = $tokenizer->next();
        if (!$tokenizer->hasNext()) {
            $this->createTextNode($parent, $prevPrev);
            return $parent;
        }
        $prev = $tokenizer->next();
        if (!$tokenizer->hasNext()) {
            $this->createTextNode($parent, $prevPrev);
            $this->createTextNode($parent, $prev);
            return $parent;
        }
        $curr = $tokenizer->next();
        while ('[' != $prevPrev || '/'.$parent->getTagName() != strtolower($prev) ||
            ']' != $curr) {
            $this->createTextNode($parent, $prevPrev);
            $prevPrev = $prev;
            $prev = $curr;
            if (!$tokenizer->hasNext()) {
                $this->createTextNode($parent, $prevPrev);
                $this->createTextNode($parent, $prev);
                return $parent;
            }
            $curr = $tokenizer->next();
        }
    }

}
Release code 2021-03-14 17:31:55 -04:00			`<?php`

			`namespace JBBCode;`

			`require_once 'ElementNode.php';`
			`require_once 'TextNode.php';`
			`require_once 'DefaultCodeDefinitionSet.php';`
			`require_once 'DocumentElement.php';`
			`require_once 'CodeDefinition.php';`
			`require_once 'CodeDefinitionBuilder.php';`
			`require_once 'CodeDefinitionSet.php';`
			`require_once 'NodeVisitor.php';`
			`require_once 'ParserException.php';`
			`require_once 'Tokenizer.php';`
			`require_once 'visitors/NestLimitVisitor.php';`
			`require_once 'InputValidator.php';`

			`use JBBCode\CodeDefinition;`

			`/**`
			`* BBCodeParser is the main parser class that constructs and stores the parse tree. Through this class`
			`* new bbcode definitions can be added, and documents may be parsed and converted to html/bbcode/plaintext, etc.`
			`*`
			`* @author jbowens`
			`*/`
			`class Parser`
			`{`

			`const OPTION_STATE_DEFAULT = 0;`
			`const OPTION_STATE_TAGNAME = 1;`
			`const OPTION_STATE_KEY = 2;`
			`const OPTION_STATE_VALUE = 3;`
			`const OPTION_STATE_QUOTED_VALUE = 4;`
			`const OPTION_STATE_JAVASCRIPT = 5;`

			`/* The root element of the parse tree */`
			`protected $treeRoot;`

			`/* The list of bbcodes to be used by the parser. */`
			`protected $bbcodes;`

			`/* The next node id to use. This is used while parsing. */`
			`protected $nextNodeid;`

			`/**`
			`* Constructs an instance of the BBCode parser`
			`*/`
			`public function __construct()`
			`{`
			`$this->reset();`
			`$this->bbcodes = array();`
			`}`

			`/**`
			`* Adds a simple (text-replacement only) bbcode definition`
			`*`
			`* @param string $tagName the tag name of the code (for example the b in [b])`
			`* @param string $replace the html to use, with {param} and optionally {option} for replacements`
			`* @param boolean $useOption whether or not this bbcode uses the secondary {option} replacement`
			`* @param boolean $parseContent whether or not to parse the content within these elements`
			`* @param integer $nestLimit an optional limit of the number of elements of this kind that can be nested within`
			`* each other before the parser stops parsing them.`
			`* @param InputValidator $optionValidator the validator to run {option} through`
			`* @param BodyValidator $bodyValidator the validator to run {param} through (only used if $parseContent == false)`
			`*`
			`* @return Parser`
			`*/`
			`public function addBBCode($tagName, $replace, $useOption = false, $parseContent = true, $nestLimit = -1,`
			`InputValidator $optionValidator = null, InputValidator $bodyValidator = null)`
			`{`
			`$builder = new CodeDefinitionBuilder($tagName, $replace);`

			`$builder->setUseOption($useOption);`
			`$builder->setParseContent($parseContent);`
			`$builder->setNestLimit($nestLimit);`

			`if ($optionValidator) {`
			`$builder->setOptionValidator($optionValidator);`
			`}`

			`if ($bodyValidator) {`
			`$builder->setBodyValidator($bodyValidator);`
			`}`

			`$this->addCodeDefinition($builder->build());`

			`return $this;`
			`}`

			`/**`
			`* Adds a complex bbcode definition. You may subclass the CodeDefinition class, instantiate a definition of your new`
			`* class and add it to the parser through this method.`
			`*`
			`* @param CodeDefinition $definition the bbcode definition to add`
			`*`
			`* @return Parser`
			`*/`
			`public function addCodeDefinition(CodeDefinition $definition)`
			`{`
			`array_push($this->bbcodes, $definition);`

			`return $this;`
			`}`

			`/**`
			`* Adds a set of CodeDefinitions.`
			`*`
			`* @param CodeDefinitionSet $set the set of definitions to add`
			`*`
			`* @return Parser`
			`*/`
			`public function addCodeDefinitionSet(CodeDefinitionSet $set) {`
			`foreach ($set->getCodeDefinitions() as $def) {`
			`$this->addCodeDefinition($def);`
			`}`

			`return $this;`
			`}`

			`/**`
			`* Returns the entire parse tree as text. Only {param} content is returned. BBCode markup will be ignored.`
			`*`
			`* @return string a text representation of the parse tree`
			`*/`
			`public function getAsText()`
			`{`
			`return $this->treeRoot->getAsText();`
			`}`

			`/**`
			`* Returns the entire parse tree as bbcode. This will be identical to the inputted string, except unclosed tags`
			`* will be closed.`
			`*`
			`* @return string a bbcode representation of the parse tree`
			`*/`
			`public function getAsBBCode()`
			`{`
			`return $this->treeRoot->getAsBBCode();`
			`}`

			`/**`
			`* Returns the entire parse tree as HTML. All BBCode replacements will be made. This is generally the method`
			`* you will want to use to retrieve the parsed bbcode.`
			`*`
			`* @return string a parsed html string`
			`*/`
			`public function getAsHTML()`
			`{`
			`return $this->treeRoot->getAsHTML();`
			`}`

			`/**`
			`* Accepts the given NodeVisitor at the root.`
			`*`
			`* @param NodeVisitor a NodeVisitor`
			`*`
			`* @return Parser`
			`*/`
			`public function accept(NodeVisitor $nodeVisitor)`
			`{`
			`$this->treeRoot->accept($nodeVisitor);`

			`return $this;`
			`}`
			`/**`
			`* Constructs the parse tree from a string of bbcode markup.`
			`*`
			`* @param string $str the bbcode markup to parse`
			`*`
			`* @return Parser`
			`*/`
			`public function parse($str)`
			`{`
			`/* Set the tree root back to a fresh DocumentElement. */`
			`$this->reset();`

			`$parent = $this->treeRoot;`
			`$tokenizer = new Tokenizer($str);`

			`while ($tokenizer->hasNext()) {`
			`$parent = $this->parseStartState($parent, $tokenizer);`
			`if ($parent->getCodeDefinition() && false ===`
			`$parent->getCodeDefinition()->parseContent()) {`
			`/* We're inside an element that does not allow its contents to be parseable. */`
			`$this->parseAsTextUntilClose($parent, $tokenizer);`
			`$parent = $parent->getParent();`
			`}`
			`}`

			`/* We parsed ignoring nest limits. Do an O(n) traversal to remove any elements that`
			`* are nested beyond their CodeDefinition's nest limit. */`
			`$this->removeOverNestedElements();`

			`return $this;`
			`}`

			`/**`
			`* Removes any elements that are nested beyond their nest limit from the parse tree. This`
			`* method is now deprecated. In a future release its access privileges will be made`
			`* protected.`
			`*`
			`* @deprecated`
			`*/`
			`public function removeOverNestedElements()`
			`{`
			`$nestLimitVisitor = new \JBBCode\visitors\NestLimitVisitor();`
			`$this->accept($nestLimitVisitor);`
			`}`

			`/**`
			`* Removes the old parse tree if one exists.`
			`*/`
			`protected function reset()`
			`{`
			`// remove any old tree information`
			`$this->treeRoot = new DocumentElement();`
			`/* The document element is created with nodeid 0. */`
			`$this->nextNodeid = 1;`
			`}`

			`/**`
			`* Determines whether a bbcode exists based on its tag name and whether or not it uses an option`
			`*`
			`* @param string $tagName the bbcode tag name to check`
			`* @param boolean $usesOption whether or not the bbcode accepts an option`
			`*`
			`* @return bool true if the code exists, false otherwise`
			`*/`
			`public function codeExists($tagName, $usesOption = false)`
			`{`
			`foreach ($this->bbcodes as $code) {`
			`if (strtolower($tagName) == $code->getTagName() && $usesOption == $code->usesOption()) {`
			`return true;`
			`}`
			`}`

			`return false;`
			`}`

			`/**`
			`* Returns the CodeDefinition of a bbcode with the matching tag name and usesOption parameter`
			`*`
			`* @param string $tagName the tag name of the bbcode being searched for`
			`* @param boolean $usesOption whether or not the bbcode accepts an option`
			`*`
			`* @return CodeDefinition if the bbcode exists, null otherwise`
			`*/`
			`public function getCode($tagName, $usesOption = false)`
			`{`
			`foreach ($this->bbcodes as $code) {`
			`if (strtolower($tagName) == $code->getTagName() && $code->usesOption() == $usesOption) {`
			`return $code;`
			`}`
			`}`

			`return null;`
			`}`

			`/**`
			`* Adds a set of default, standard bbcode definitions commonly used across the web.`
			`*`
			`* This method is now deprecated. Please use DefaultCodeDefinitionSet and`
			`* addCodeDefinitionSet() instead.`
			`*`
			`* @deprecated`
			`*/`
			`public function loadDefaultCodes()`
			`{`
			`$defaultSet = new DefaultCodeDefinitionSet();`
			`$this->addCodeDefinitionSet($defaultSet);`
			`}`

			`/**`
			`* Creates a new text node with the given parent and text string.`
			`*`
			`* @param $parent the parent of the text node`
			`* @param $string the text of the text node`
			`*`
			`* @return TextNode the newly created TextNode`
			`*/`
			`protected function createTextNode(ElementNode $parent, $string)`
			`{`
			`if (count($parent->getChildren())) {`
			`$children = $parent->getChildren();`
			`$lastElement = end($children);`
			`reset($children);`

			`if ($lastElement->isTextNode()) {`
			`$lastElement->setValue($lastElement->getValue() . $string);`
			`return $lastElement;`
			`}`
			`}`

			`$textNode = new TextNode($string);`
			`$textNode->setNodeId(++$this->nextNodeid);`
			`$parent->addChild($textNode);`
			`return $textNode;`
			`}`

			`/**`
			`* jBBCode parsing logic is loosely modelled after a FSM. While not every function maps`
			`* to a unique DFSM state, each function handles the logic of one or more FSM states.`
			`* This function handles the beginning parse state when we're not currently in a tag`
			`* name.`
			`*`
			`* @param ElementNode $parent the current parent node we're under`
			`* @param Tokenizer $tokenizer the tokenizer we're using`
			`*`
			`* @return ElementNode the new parent we should use for the next iteration.`
			`*/`
			`protected function parseStartState(ElementNode $parent, Tokenizer $tokenizer)`
			`{`
			`$next = $tokenizer->next();`

			`if ('[' == $next) {`
			`return $this->parseTagOpen($parent, $tokenizer);`
			`}`
			`else {`
			`$this->createTextNode($parent, $next);`
			`/* Drop back into the main parse loop which will call this`
			`* same method again. */`
			`return $parent;`
			`}`
			`}`

			`/**`
			`* This function handles parsing the beginnings of an open tag. When we see a [`
			`* at an appropriate time, this function is entered.`
			`*`
			`* @param ElementNode $parent the current parent node`
			`* @param Tokenizer $tokenizer the tokenizer we're using`
			`*`
			`* @return ElementNode the new parent node`
			`*/`
			`protected function parseTagOpen(ElementNode $parent, Tokenizer $tokenizer)`
			`{`

			`if (!$tokenizer->hasNext()) {`
			`/* The [ that sent us to this state was just a trailing [, not the`
			`* opening for a new tag. Treat it as such. */`
			`$this->createTextNode($parent, '[');`
			`return $parent;`
			`}`

			`$next = $tokenizer->next();`

			`/* This while loop could be replaced by a recursive call to this same method,`
			`* which would likely be a lot clearer but I decided to use a while loop to`
			`* prevent stack overflow with a string like [[[[[[[[[...[[[.`
			`*/`
			`while ('[' == $next) {`
			`/* The previous [ was just a random bracket that should be treated as text.`
			`* Continue until we get a non open bracket. */`
			`$this->createTextNode($parent, '[');`
			`if (!$tokenizer->hasNext()) {`
			`$this->createTextNode($parent, '[');`
			`return $parent;`
			`}`
			`$next = $tokenizer->next();`
			`}`

			`if (!$tokenizer->hasNext()) {`
			`$this->createTextNode($parent, '['.$next);`
			`return $parent;`
			`}`

			`$after_next = $tokenizer->next();`
			`$tokenizer->stepBack();`

			`if ($after_next != ']')`
			`{`
			`$this->createTextNode($parent, '['.$next);`
			`return $parent;`
			`}`

			`/* At this point $next is either ']' or plain text. */`
			`if (']' == $next) {`
			`$this->createTextNode($parent, '[');`
			`$this->createTextNode($parent, ']');`
			`return $parent;`
			`} else {`
			`/* $next is plain text... likely a tag name. */`
			`return $this->parseTag($parent, $tokenizer, $next);`
			`}`
			`}`

			`protected function parseOptions($tagContent)`
			`{`
			`$buffer = "";`
			`$tagName = "";`
			`$state = static::OPTION_STATE_TAGNAME;`
			`$keys = array();`
			`$values = array();`
			`$options = array();`

			`$len = strlen($tagContent);`
			`$done = false;`
			`$idx = 0;`

			`try{`
			`while(!$done){`
			`$char = $idx < $len ? $tagContent[$idx]:null;`
			`switch($state){`
			`case static::OPTION_STATE_TAGNAME:`
			`switch($char){`
			`case '=':`
			`$state = static::OPTION_STATE_VALUE;`
			`$tagName = $buffer;`
			`$keys[] = $tagName;`
			`$buffer = "";`
			`break;`
			`case ' ':`
			`$state = static::OPTION_STATE_DEFAULT;`
			`$tagName = $buffer;`
			`$buffer = '';`
			`$keys[] = $tagName;`
			`break;`

			`case null:`
			`$tagName = $buffer;`
			`$buffer = '';`
			`$keys[] = $tagName;`
			`break;`
			`default:`
			`$buffer .= $char;`
			`}`
			`break;`

			`case static::OPTION_STATE_DEFAULT:`
			`switch($char){`
			`case ' ':`
			`// do nothing`
			`default:`
			`$state = static::OPTION_STATE_KEY;`
			`$buffer .= $char;`
			`}`
			`break;`

			`case static::OPTION_STATE_VALUE:`
			`switch($char){`
			`case '"':`
			`$state = static::OPTION_STATE_QUOTED_VALUE;`
			`break;`
			`case null: // intentional fall-through`
			`case ' ': // key=value<space> delimits to next key`
			`$values[] = $buffer;`
			`$buffer = "";`
			`$state = static::OPTION_STATE_KEY;`
			`break;`
			`case ":":`
			`if($buffer=="javascript"){`
			`$state = static::OPTION_STATE_JAVASCRIPT;`
			`}`
			`$buffer .= $char;`
			`break;`
			`default:`
			`$buffer .= $char;`

			`}`
			`break;`

			`case static::OPTION_STATE_JAVASCRIPT:`
			`switch($char){`
			`case ";":`
			`$buffer .= $char;`
			`$values[] = $buffer;`
			`$buffer = "";`
			`$state = static::OPTION_STATE_KEY;`

			`break;`
			`default:`
			`$buffer .= $char;`
			`}`
			`break;`

			`case static::OPTION_STATE_KEY:`
			`switch($char){`
			`case '=':`
			`$state = static::OPTION_STATE_VALUE;`
			`$keys[] = $buffer;`
			`$buffer = '';`
			`break;`
			`case ' ': // ignore <space>key=value`
			`break;`
			`default:`
			`$buffer .= $char;`
			`break;`
			`}`
			`break;`

			`case static::OPTION_STATE_QUOTED_VALUE:`
			`switch($char){`
			`case null:`
			`case '"':`
			`$state = static::OPTION_STATE_KEY;`
			`$values[] = $buffer;`
			`$buffer = '';`

			`// peek ahead. If the next character is not a space or a closing brace, we have a bad tag and need to abort`
			`if(isset($tagContent[$idx+1]) && $tagContent[$idx+1]!=" " && $tagContent[$idx+1]!="]" ){`
			`throw new ParserException("Badly formed attribute: $tagContent");`
			`}`
			`break;`
			`default:`
			`$buffer .= $char;`
			`break;`
			`}`
			`break;`
			`default:`
			`if(!empty($char)){`
			`$state = static::OPTION_STATE_KEY;`
			`}`

			`}`
			`if($idx >= $len){`
			`$done = true;`
			`}`
			`$idx++;`
			`}`

			`if(count($keys) && count($values)){`
			`if(count($keys)==(count($values)+1)){`
			`array_unshift($values, "");`
			`}`

			`$options = array_combine($keys, $values);`
			`}`
			`}`
			`catch(ParserException $e){`
			`// if we're in this state, then something evidently went wrong. We'll consider everything that came after the tagname to be the attribute for that keyname`
			`$options[$tagName]= substr($tagContent, strpos($tagContent, "=")+1);`
			`}`
			`return array($tagName, $options);`
			`}`

			`/**`
			`* This is the next step in parsing a tag. It's possible for it to still be invalid at this`
			`* point but many of the basic invalid tag name conditions have already been handled.`
			`*`
			`* @param ElementNode $parent the current parent element`
			`* @param Tokenizer $tokenizer the tokenizer we're using`
			`* @param string $tagContent the text between the [ and the ], assuming there is actually a ]`
			`*`
			`* @return ElementNode the new parent element`
			`*/`
			`protected function parseTag(ElementNode $parent, Tokenizer $tokenizer, $tagContent)`
			`{`

			`$next;`
			`if (!$tokenizer->hasNext() \|\| ($next = $tokenizer->next()) != ']') {`
			`/* This is a malformed tag. Both the previous [ and the tagContent`
			`* is really just plain text. */`
			`$this->createTextNode($parent, '[');`
			`$this->createTextNode($parent, $tagContent);`
			`return $parent;`
			`}`

			`/* This is a well-formed tag consisting of [something] or [/something], but`
			`* we still need to ensure that 'something' is a valid tag name. Additionally,`
			`* if it's a closing tag, we need to ensure that there was a previous matching`
			`* opening tag.`
			`*/`
			`/* There could be attributes. */`
			`list($tmpTagName, $options) = $this->parseOptions($tagContent);`

			`// $tagPieces = explode('=', $tagContent);`
			`// $tmpTagName = $tagPieces[0];`

			`$actualTagName;`
			`if ('' != $tmpTagName && '/' == $tmpTagName[0]) {`
			`/* This is a closing tag name. */`
			`$actualTagName = substr($tmpTagName, 1);`
			`} else {`
			`$actualTagName = $tmpTagName;`
			`}`

			`if ('' != $tmpTagName && '/' == $tmpTagName[0]) {`
			`/* This is attempting to close an open tag. We must verify that there exists an`
			`* open tag of the same type and that there is no option (options on closing`
			`* tags don't make any sense). */`
			`$elToClose = $parent->closestParentOfType($actualTagName);`
			`if (null == $elToClose \|\| count($options) > 1) {`
			`/* Closing an unopened tag or has an option. Treat everything as plain text. */`
			`$this->createTextNode($parent, '[');`
			`$this->createTextNode($parent, $tagContent);`
			`$this->createTextNode($parent, ']');`
			`return $parent;`
			`} else {`
			`/* We're closing $elToClose. In order to do that, we just need to return`
			`* $elToClose's parent, since that will change our effective parent to be`
			`* elToClose's parent. */`
			`return $elToClose->getParent();`
			`}`
			`}`

			`/* Verify that this is a known bbcode tag name. */`
			`if ('' == $actualTagName \|\| !$this->codeExists($actualTagName, !empty($options))) {`
			`/* This is an invalid tag name! Treat everything we've seen as plain text. */`
			`$this->createTextNode($parent, '[');`
			`$this->createTextNode($parent, $tagContent);`
			`$this->createTextNode($parent, ']');`
			`return $parent;`
			`}`

			`/* If we're here, this is a valid opening tag. Let's make a new node for it. */`
			`$el = new ElementNode();`
			`$el->setNodeId(++$this->nextNodeid);`
			`$code = $this->getCode($actualTagName, !empty($options));`
			`$el->setCodeDefinition($code);`
			`if (!empty($options)) {`
			`/* We have an attribute we should save. */`
			`$el->setAttribute($options);`
			`}`
			`$parent->addChild($el);`
			`return $el;`
			`}`

			`/**`
			`* Handles parsing elements whose CodeDefinitions disable parsing of element`
			`* contents. This function uses a rolling window of 3 tokens until it finds the`
			`* appropriate closing tag or reaches the end of the token stream.`
			`*`
			`* @param ElementNode $parent the current parent element`
			`* @param Tokenizer $tokenizer the tokenizer we're using`
			`*`
			`* @return ElementNode the new parent element`
			`*/`
			`protected function parseAsTextUntilClose(ElementNode $parent, Tokenizer $tokenizer)`
			`{`
			`/* $parent's code definition doesn't allow its contents to be parsed. Here we use`
			`* a sliding window of three tokens until we find [ /tagname ], signifying the`
			`* end of the parent. */`
			`if (!$tokenizer->hasNext()) {`
			`return $parent;`
			`}`
			`$prevPrev = $tokenizer->next();`
			`if (!$tokenizer->hasNext()) {`
			`$this->createTextNode($parent, $prevPrev);`
			`return $parent;`
			`}`
			`$prev = $tokenizer->next();`
			`if (!$tokenizer->hasNext()) {`
			`$this->createTextNode($parent, $prevPrev);`
			`$this->createTextNode($parent, $prev);`
			`return $parent;`
			`}`
			`$curr = $tokenizer->next();`
			`while ('[' != $prevPrev \|\| '/'.$parent->getTagName() != strtolower($prev) \|\|`
			`']' != $curr) {`
			`$this->createTextNode($parent, $prevPrev);`
			`$prevPrev = $prev;`
			`$prev = $curr;`
			`if (!$tokenizer->hasNext()) {`
			`$this->createTextNode($parent, $prevPrev);`
			`$this->createTextNode($parent, $prev);`
			`return $parent;`
			`}`
			`$curr = $tokenizer->next();`
			`}`
			`}`

			`}`