bbcode 转换 html 实例源码(jbbcode)

bbcode 转换 html 实例源码(jbbcode)


  发布时间:2014-07-21
  PHP语言基础
  发 布 人:crazycode
 相关标签: bbcode HTML





namespace JBBCode;

require_once 'ElementNode.php';
require_once 'TextNode.php';
require_once 'DefaultCodeDefinitionSet.php';
require_once 'DocumentElement.php';
require_once 'CodeDefinition.php';
require_once 'CodeDefinitionBuilder.php';
require_once 'CodeDefinitionSet.php';
require_once 'NodeVisitor.php';
require_once 'ParserException.php';
require_once 'Tokenizer.php';
require_once 'visitors/NestLimitVisitor.php';
require_once 'InputValidator.php';

use JBBCode\CodeDefinition;

 * BBCodeParser is the main parser class that constructs and stores the parse tree. Through this class
 * new bbcode definitions can be added, and documents may be parsed and converted to html/bbcode/plaintext, etc.
 * @author jbowens
class Parser

    const OPTION_STATE_KEY = 2;
    const OPTION_STATE_VALUE = 3;

    /* The root element of the parse tree */
    protected $treeRoot;

    /* The list of bbcodes to be used by the parser. */
    protected $bbcodes;

    /* The next node id to use. This is used while parsing. */
    protected $nextNodeid;

     * Constructs an instance of the BBCode parser
    public function __construct()
        $this->bbcodes = array();

     * Adds a simple (text-replacement only) bbcode definition
     * @param string  $tagName      the tag name of the code (for example the b in [b])
     * @param string  $replace      the html to use, with {param} and optionally {option} for replacements
     * @param boolean $useOption    whether or not this bbcode uses the secondary {option} replacement
     * @param boolean $parseContent whether or not to parse the content within these elements
     * @param integer $nestLimit    an optional limit of the number of elements of this kind that can be nested within
     *                              each other before the parser stops parsing them.
     * @param InputValidator $optionValidator   the validator to run {option} through
     * @param BodyValidator  $bodyValidator     the validator to run {param} through (only used if $parseContent == false)
     * @return Parser
    public function addBBCode($tagName, $replace, $useOption = false, $parseContent = true, $nestLimit = -1,
                              InputValidator $optionValidator = null, InputValidator $bodyValidator = null)
        $builder = new CodeDefinitionBuilder($tagName, $replace);


        if ($optionValidator) {

        if ($bodyValidator) {


        return $this;

     * Adds a complex bbcode definition. You may subclass the CodeDefinition class, instantiate a definition of your new
     * class and add it to the parser through this method.
     * @param CodeDefinition $definition the bbcode definition to add
     * @return Parser
    public function addCodeDefinition(CodeDefinition $definition)
        array_push($this->bbcodes, $definition);

        return $this;

     * Adds a set of CodeDefinitions.
     * @param CodeDefinitionSet $set  the set of definitions to add
     * @return Parser
    public function addCodeDefinitionSet(CodeDefinitionSet $set) {
        foreach ($set->getCodeDefinitions() as $def) {

        return $this;

     * Returns the entire parse tree as text. Only {param} content is returned. BBCode markup will be ignored.
     * @return string a text representation of the parse tree
    public function getAsText()
        return $this->treeRoot->getAsText();

     * Returns the entire parse tree as bbcode. This will be identical to the inputted string, except unclosed tags
     * will be closed.
     * @return string a bbcode representation of the parse tree
    public function getAsBBCode()
        return $this->treeRoot->getAsBBCode();

     * Returns the entire parse tree as HTML. All BBCode replacements will be made. This is generally the method
     * you will want to use to retrieve the parsed bbcode.
     * @return string a parsed html string
    public function getAsHTML()
        return $this->treeRoot->getAsHTML();

     * Accepts the given NodeVisitor at the root.
     * @param NodeVisitor  a NodeVisitor
     * @return Parser
    public function accept(NodeVisitor $nodeVisitor)

        return $this;
     * Constructs the parse tree from a string of bbcode markup.
     * @param string $str the bbcode markup to parse
     * @return Parser
    public function parse($str)
        /* Set the tree root back to a fresh DocumentElement. */

        $parent = $this->treeRoot;
        $tokenizer = new Tokenizer($str);

        while ($tokenizer->hasNext()) {
            $parent = $this->parseStartState($parent, $tokenizer);
            if ($parent->getCodeDefinition() && false ===
                $parent->getCodeDefinition()->parseContent()) {
                /* We're inside an element that does not allow its contents to be parseable. */
                $this->parseAsTextUntilClose($parent, $tokenizer);
                $parent = $parent->getParent();

        /* We parsed ignoring nest limits. Do an O(n) traversal to remove any elements that
         * are nested beyond their CodeDefinition's nest limit. */

        return $this;

     * Removes any elements that are nested beyond their nest limit from the parse tree. This
     * method is now deprecated. In a future release its access privileges will be made
     * protected.
     * @deprecated
    public function removeOverNestedElements()
        $nestLimitVisitor = new \JBBCode\visitors\NestLimitVisitor();

     * Removes the old parse tree if one exists.
    protected function reset()
        // remove any old tree information
        $this->treeRoot = new DocumentElement();
        /* The document element is created with nodeid 0. */
        $this->nextNodeid = 1;

     * Determines whether a bbcode exists based on its tag name and whether or not it uses an option
     * @param string  $tagName    the bbcode tag name to check
     * @param boolean $usesOption whether or not the bbcode accepts an option
     * @return bool true if the code exists, false otherwise
    public function codeExists($tagName, $usesOption = false)
        foreach ($this->bbcodes as $code) {
            if (strtolower($tagName) == $code->getTagName() && $usesOption == $code->usesOption()) {
                return true;

        return false;

     * Returns the CodeDefinition of a bbcode with the matching tag name and usesOption parameter
     * @param string  $tagName    the tag name of the bbcode being searched for
     * @param boolean $usesOption whether or not the bbcode accepts an option
     * @return CodeDefinition if the bbcode exists, null otherwise
    public function getCode($tagName, $usesOption = false)
        foreach ($this->bbcodes as $code) {
            if (strtolower($tagName) == $code->getTagName() && $code->usesOption() == $usesOption) {
                return $code;

        return null;

     * Adds a set of default, standard bbcode definitions commonly used across the web.
     * This method is now deprecated. Please use DefaultCodeDefinitionSet and
     * addCodeDefinitionSet() instead.
     * @deprecated
    public function loadDefaultCodes()
        $defaultSet = new DefaultCodeDefinitionSet();

     * Creates a new text node with the given parent and text string.
     * @param $parent  the parent of the text node
     * @param $string  the text of the text node
     * @return TextNode the newly created TextNode
    protected function createTextNode(ElementNode $parent, $string)
        if (count($parent->getChildren())) {
            $children = $parent->getChildren();
            $lastElement = end($children);

            if ($lastElement->isTextNode()) {
                $lastElement->setValue($lastElement->getValue() . $string);
                return $lastElement;

        $textNode = new TextNode($string);
        $textNode->setNodeId(  $this->nextNodeid);
        return $textNode;

     * jBBCode parsing logic is loosely modelled after a FSM. While not every function maps
     * to a unique DFSM state, each function handles the logic of one or more FSM states.
     * This function handles the beginning parse state when we're not currently in a tag
     * name.
     * @param ElementNode  $parent the current parent node we're under
     * @param Tokenizer    $tokenizer the tokenizer we're using
     * @return ElementNode the new parent we should use for the next iteration.
    protected function parseStartState(ElementNode $parent, Tokenizer $tokenizer)
        $next = $tokenizer->next();

        if ('[' == $next) {
            return $this->parseTagOpen($parent, $tokenizer);
        else {
            $this->createTextNode($parent, $next);
            /* Drop back into the main parse loop which will call this
             * same method again. */
            return $parent;

     * This function handles parsing the beginnings of an open tag. When we see a [
     * at an appropriate time, this function is entered.
     * @param ElementNode $parent  the current parent node
     * @param Tokenizer   $tokenizer  the tokenizer we're using
     * @return ElementNode the new parent node
    protected function parseTagOpen(ElementNode $parent, Tokenizer $tokenizer)

        if (!$tokenizer->hasNext()) {
            /* The [ that sent us to this state was just a trailing [, not the
             * opening for a new tag. Treat it as such. */
            $this->createTextNode($parent, '[');
            return $parent;

        $next = $tokenizer->next();

        /* This while loop could be replaced by a recursive call to this same method,
         * which would likely be a lot clearer but I decided to use a while loop to
         * prevent stack overflow with a string like [[[[[[[[[...[[[.
        while ('[' == $next) {
            /* The previous [ was just a random bracket that should be treated as text.
             * Continue until we get a non open bracket. */
            $this->createTextNode($parent, '[');
            if (!$tokenizer->hasNext()) {
                $this->createTextNode($parent, '[');
                return $parent;
            $next = $tokenizer->next();

        if (!$tokenizer->hasNext()) {
            $this->createTextNode($parent, '['.$next);
            return $parent;

        $after_next = $tokenizer->next();

        if ($after_next != ']')
            $this->createTextNode($parent, '['.$next);
            return $parent;

        /* At this point $next is either ']' or plain text. */
        if (']' == $next) {
            $this->createTextNode($parent, '[');
            $this->createTextNode($parent, ']');
            return $parent;
        } else {
            /* $next is plain text... likely a tag name. */
            return $this->parseTag($parent, $tokenizer, $next);

    protected function parseOptions($tagContent)
        $buffer = "";
        $tagName = "";
        $state = static::OPTION_STATE_TAGNAME;
        $keys = array();
        $values = array();
        $options = array();

        $len = strlen($tagContent);
        $done = false;
        $idx = 0;

                $char = $idx < $len ? $tagContent[$idx]:null;
                    case static::OPTION_STATE_TAGNAME:
                            case '=':
                                $state = static::OPTION_STATE_VALUE;
                                $tagName = $buffer;
                                $keys[] = $tagName;
                                $buffer = "";
                            case ' ':
                                $state = static::OPTION_STATE_DEFAULT;
                                $tagName = $buffer;
                                $buffer = '';
                                $keys[] = $tagName;

                            case null:
                                $tagName = $buffer;
                                $buffer = '';
                                $keys[] = $tagName;
                                $buffer .= $char;

                    case static::OPTION_STATE_DEFAULT:
                            case ' ':
                                // do nothing
                                $state = static::OPTION_STATE_KEY;
                                $buffer .= $char;

                    case static::OPTION_STATE_VALUE:
                            case '"':
                                $state = static::OPTION_STATE_QUOTED_VALUE;
                            case null: // intentional fall-through
                            case ' ': // key=value<space> delimits to next key
                                $values[] = $buffer;
                                $buffer = "";
                                $state = static::OPTION_STATE_KEY;
                            case ":":
                                    $state = static::OPTION_STATE_JAVASCRIPT;
                                $buffer .= $char;
                                $buffer .= $char;


                    case static::OPTION_STATE_JAVASCRIPT:
                            case ";":
                                $buffer .= $char;
                                $values[] = $buffer;
                                $buffer = "";
                                $state = static::OPTION_STATE_KEY;

                                $buffer .= $char;

                    case static::OPTION_STATE_KEY:
                            case '=':
                                $state = static::OPTION_STATE_VALUE;
                                $keys[] = $buffer;
                                $buffer = '';
                            case ' ': // ignore <space>key=value
                                $buffer .= $char;

                    case static::OPTION_STATE_QUOTED_VALUE:
                            case null:
                            case '"':
                                $state = static::OPTION_STATE_KEY;
                                $values[] = $buffer;
                                $buffer = '';

                                // peek ahead. If the next character is not a space or a closing brace, we have a bad tag and need to abort
                                if(isset($tagContent[$idx 1]) && $tagContent[$idx 1]!=" " && $tagContent[$idx 1]!="]" ){
                                    throw new ParserException("Badly formed attribute: $tagContent");
                                $buffer .= $char;
                            $state = static::OPTION_STATE_KEY;

                if($idx >= $len){
                    $done = true;
                $idx  ;

            if(count($keys) && count($values)){
                if(count($keys)==(count($values) 1)){
                    array_unshift($values, "");

                $options = array_combine($keys, $values);
        catch(ParserException $e){
            // if we're in this state, then something evidently went wrong. We'll consider everything that came after the tagname to be the attribute for that keyname
            $options[$tagName]= substr($tagContent, strpos($tagContent, "=") 1);
        return array($tagName, $options);

     * This is the next step in parsing a tag. It's possible for it to still be invalid at this
     * point but many of the basic invalid tag name conditions have already been handled.
     * @param ElementNode $parent  the current parent element
     * @param Tokenizer   $tokenizer  the tokenizer we're using
     * @param string      $tagContent  the text between the [ and the ], assuming there is actually a ]
     * @return ElementNode the new parent element
    protected function parseTag(ElementNode $parent, Tokenizer $tokenizer, $tagContent)

        if (!$tokenizer->hasNext() || ($next = $tokenizer->next()) != ']') {
            /* This is a malformed tag. Both the previous [ and the tagContent
             * is really just plain text. */
            $this->createTextNode($parent, '[');
            $this->createTextNode($parent, $tagContent);
            return $parent;

        /* This is a well-formed tag consisting of [something] or [/something], but
         * we still need to ensure that 'something' is a valid tag name. Additionally,
         * if it's a closing tag, we need to ensure that there was a previous matching
         * opening tag.
        /* There could be attributes. */
        list($tmpTagName, $options) = $this->parseOptions($tagContent);

        // $tagPieces = explode('=', $tagContent);
        // $tmpTagName = $tagPieces[0];

        if ('' != $tmpTagName && '/' == $tmpTagName[0]) {
            /* This is a closing tag name. */
            $actualTagName = substr($tmpTagName, 1);
        } else {
            $actualTagName = $tmpTagName;

        if ('' != $tmpTagName && '/' == $tmpTagName[0]) {
            /* This is attempting to close an open tag. We must verify that there exists an
             * open tag of the same type and that there is no option (options on closing
             * tags don't make any sense). */
            $elToClose = $parent->closestParentOfType($actualTagName);
            if (null == $elToClose || count($options) > 1) {
                /* Closing an unopened tag or has an option. Treat everything as plain text. */
                $this->createTextNode($parent, '[');
                $this->createTextNode($parent, $tagContent);
                $this->createTextNode($parent, ']');
                return $parent;
            } else {
                /* We're closing $elToClose. In order to do that, we just need to return
                 * $elToClose's parent, since that will change our effective parent to be
                 * elToClose's parent. */
                return $elToClose->getParent();

        /* Verify that this is a known bbcode tag name. */
        if ('' == $actualTagName || !$this->codeExists($actualTagName, !empty($options))) {
            /* This is an invalid tag name! Treat everything we've seen as plain text. */
            $this->createTextNode($parent, '[');
            $this->createTextNode($parent, $tagContent);
            $this->createTextNode($parent, ']');
            return $parent;

        /* If we're here, this is a valid opening tag. Let's make a new node for it. */
        $el = new ElementNode();
        $el->setNodeId(  $this->nextNodeid);
        $code = $this->getCode($actualTagName, !empty($options));
        if (!empty($options)) {
            /* We have an attribute we should save. */
        return $el;

     * Handles parsing elements whose CodeDefinitions disable parsing of element
     * contents. This function uses a rolling window of 3 tokens until it finds the
     * appropriate closing tag or reaches the end of the token stream.
     * @param ElementNode $parent  the current parent element
     * @param Tokenizer   $tokenizer  the tokenizer we're using
     * @return ElementNode the new parent element
    protected function parseAsTextUntilClose(ElementNode $parent, Tokenizer $tokenizer)
        /* $parent's code definition doesn't allow its contents to be parsed. Here we use
         * a sliding window of three tokens until we find [ /tagname ], signifying the
         * end of the parent. */
        if (!$tokenizer->hasNext()) {
            return $parent;
        $prevPrev = $tokenizer->next();
        if (!$tokenizer->hasNext()) {
            $this->createTextNode($parent, $prevPrev);
            return $parent;
        $prev = $tokenizer->next();
        if (!$tokenizer->hasNext()) {
            $this->createTextNode($parent, $prevPrev);
            $this->createTextNode($parent, $prev);
            return $parent;
        $curr = $tokenizer->next();
        while ('[' != $prevPrev || '/'.$parent->getTagName() != strtolower($prev) ||
            ']' != $curr) {
            $this->createTextNode($parent, $prevPrev);
            $prevPrev = $prev;
            $prev = $curr;
            if (!$tokenizer->hasNext()) {
                $this->createTextNode($parent, $prevPrev);
                $this->createTextNode($parent, $prev);
                return $parent;
            $curr = $tokenizer->next();


bbcode 转换 html 实例源码(jbbcode)

