thirdparty/jsdoctoolkit/app/handlers/XMLDOC/XMLParse.js
changeset 3041 c8f47f0b6697
equal deleted inserted replaced
3040:8f9580309846 3041:c8f47f0b6697
       
     1 LOG.inform("XMLDOC.Parser loaded");
       
     2 
       
     3 /**
       
     4  * XML Parser object.  Returns an {@link #XMLDOC.Parser.node} which is
       
     5  * the root element of the parsed document.
       
     6  * <p/>
       
     7  * By default, this parser will only handle well formed XML.  To
       
     8  * allow the parser to handle HTML, set the <tt>XMLDOC.Parser.strictMode</tt>
       
     9  * variable to <tt>false</tt> before calling <tt>XMLDOC.Parser.parse()</tt>.
       
    10  * <p/>
       
    11  * <i>Note: If you pass poorly formed XML, it will cause the parser to throw
       
    12  * an exception.</i>
       
    13  *
       
    14  * @author Brett Fattori (bfattori@fry.com)
       
    15  * @author $Author: micmath $
       
    16  * @version $Revision: 497 $
       
    17  */
       
    18 XMLDOC.Parser = {};
       
    19 
       
    20 /**
       
    21  * Strict mode setting.  Setting this to false allows HTML-style source to
       
    22  * be parsed.  Normally, well formed XML has defined end tags, or empty tags
       
    23  * are properly formed.  Default: <tt>true</tt>
       
    24  * @type Boolean
       
    25  */
       
    26 XMLDOC.Parser.strictMode = true;
       
    27 
       
    28 /**
       
    29  * A node in an XML Document.  Node types are ROOT, ELEMENT, COMMENT, PI, and TEXT.
       
    30  * @param parent {XMLDOC.Parser.node} The parent node
       
    31  * @param name {String} The node name
       
    32  * @param type {String} One of the types
       
    33  */
       
    34 XMLDOC.Parser.node = function(parent, name, type)
       
    35 {
       
    36    this.name = name;
       
    37    this.type = type || "ELEMENT";
       
    38    this.parent = parent;
       
    39    this.charData = "";
       
    40    this.attrs = {};
       
    41    this.nodes = [];
       
    42    this.cPtr = 0;
       
    43 
       
    44    XMLDOC.Parser.node.prototype.getAttributeNames = function() {
       
    45       var a = [];
       
    46       for (var o in this.attrs)
       
    47       {
       
    48          a.push(o);
       
    49       }
       
    50 
       
    51       return a;
       
    52    };
       
    53 
       
    54    XMLDOC.Parser.node.prototype.getAttribute = function(attr) {
       
    55       return this.attrs[attr];
       
    56    };
       
    57 
       
    58    XMLDOC.Parser.node.prototype.setAttribute = function(attr, val) {
       
    59       this.attrs[attr] = val;
       
    60    };
       
    61 
       
    62    XMLDOC.Parser.node.prototype.getChild = function(idx) {
       
    63       return this.nodes[idx];
       
    64    };
       
    65 
       
    66    XMLDOC.Parser.node.prototype.parentNode = function() {
       
    67       return this.parent;
       
    68    };
       
    69 
       
    70    XMLDOC.Parser.node.prototype.firstChild = function() {
       
    71       return this.nodes[0];
       
    72    };
       
    73 
       
    74    XMLDOC.Parser.node.prototype.lastChild = function() {
       
    75       return this.nodes[this.nodes.length - 1];
       
    76    };
       
    77 
       
    78    XMLDOC.Parser.node.prototype.nextSibling = function() {
       
    79       var p = this.parent;
       
    80       if (p && (p.nodes.indexOf(this) + 1 != p.nodes.length))
       
    81       {
       
    82          return p.getChild(p.nodes.indexOf(this) + 1);
       
    83       }
       
    84       return null;
       
    85    };
       
    86 
       
    87    XMLDOC.Parser.node.prototype.prevSibling = function() {
       
    88       var p = this.parent;
       
    89       if (p && (p.nodes.indexOf(this) - 1 >= 0))
       
    90       {
       
    91          return p.getChild(p.nodes.indexOf(this) - 1);
       
    92       }
       
    93       return null;
       
    94    };
       
    95 };
       
    96 
       
    97 /**
       
    98  * Parse an XML Document from the specified source.  The XML should be
       
    99  * well formed, unless strict mode is disabled, then the parser will
       
   100  * handle HTML-style XML documents.
       
   101  * @param src {String} The source to parse
       
   102  */
       
   103 XMLDOC.Parser.parse = function(src)
       
   104 {
       
   105    var A = [];
       
   106 
       
   107    // Normailize whitespace
       
   108    A = src.split("\r\n");
       
   109    src = A.join("\n");
       
   110    A = src.split("\r");
       
   111    src = A.join("\n");
       
   112 
       
   113    // Remove XML and DOCTYPE specifier
       
   114    src.replace(/<\?XML .*\?>/i, "");
       
   115    src.replace(/<!DOCTYPE .*\>/i, "");
       
   116 
       
   117    // The document is the root node and cannot be modified or removed
       
   118    var doc = new XMLDOC.Parser.node(null, "ROOT", "DOCUMENT");
       
   119 
       
   120    // Let's break it down
       
   121    XMLDOC.Parser.eat(doc, src);
       
   122 
       
   123    return doc;
       
   124 };
       
   125 
       
   126 /**
       
   127  * The XML fragment processing routine.  This method is private and should not be called
       
   128  * directly.
       
   129  * @param parentNode {XMLDOC.Parser.node} The node which is the parent of this fragment
       
   130  * @param src {String} The source within the fragment to process
       
   131  * @private
       
   132  */
       
   133 XMLDOC.Parser.eat = function(parentNode, src)
       
   134 {
       
   135    // A simple tag def
       
   136    var reTag = new RegExp("<(!|)(\\?|--|)((.|\\s)*?)\\2>","g");
       
   137 
       
   138    // Special tag types
       
   139    var reCommentTag = /<!--((.|\s)*?)-->/;
       
   140    var rePITag = /<\?((.|\s)*?)\?>/;
       
   141 
       
   142    // A start tag (with potential empty marker)
       
   143    var reStartTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*(\/)?>/;
       
   144 
       
   145    // An empty HTML style tag (not proper XML, but we'll accept it so we can process HTML)
       
   146    var reHTMLEmptyTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*>/;
       
   147 
       
   148    // Fully enclosing tag with nested tags
       
   149    var reEnclosingTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*?)\4)*>((.|\s)*?)<\/\1>/;
       
   150 
       
   151    // Breaks down attributes
       
   152    var reAttributes = new RegExp(" +([\\w_\\-]*)=(\"|')(.*?)\\2","g");
       
   153 
       
   154    // Find us a tag
       
   155    var tag;
       
   156    while ((tag = reTag.exec(src)) != null)
       
   157    {
       
   158       if (tag.index > 0)
       
   159       {
       
   160          // The next tag has some text before it
       
   161          var text = src.substring(0, tag.index).replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1");
       
   162 
       
   163          if (text.length > 0 && (text != "\n"))
       
   164          {
       
   165             var txtnode = new XMLDOC.Parser.node(parentNode, "", "TEXT");
       
   166             txtnode.charData = text;
       
   167 
       
   168             // Append the new text node
       
   169             parentNode.nodes.push(txtnode);
       
   170          }
       
   171 
       
   172          // Reset the lastIndex of reTag
       
   173          reTag.lastIndex -= src.substring(0, tag.index).length;
       
   174 
       
   175          // Eat the text
       
   176          src = src.substring(tag.index);
       
   177       }
       
   178 
       
   179       if (reCommentTag.test(tag[0]))
       
   180       {
       
   181          // Is this a comment?
       
   182          var comment = new XMLDOC.Parser.node(parentNode, "", "COMMENT");
       
   183          comment.charData = reCommentTag.exec(tag[0])[1];
       
   184 
       
   185          // Append the comment
       
   186          parentNode.nodes.push(comment);
       
   187 
       
   188          // Move the lastIndex of reTag
       
   189          reTag.lastIndex -= tag[0].length;
       
   190 
       
   191          // Eat the tag
       
   192          src = src.replace(reCommentTag, "");
       
   193       }
       
   194       else if (rePITag.test(tag[0]))
       
   195       {
       
   196          // Is this a processing instruction?
       
   197          var pi = new XMLDOC.Parser.node(parentNode, "", "PI");
       
   198          pi.charData = rePITag.exec(tag[0])[1];
       
   199 
       
   200          // Append the processing instruction
       
   201          parentNode.nodes.push(pi);
       
   202 
       
   203          // Move the lastIndex of reTag
       
   204          reTag.lastIndex -= tag[0].length;
       
   205 
       
   206          // Eat the tag
       
   207          src = src.replace(rePITag, "");
       
   208       }
       
   209       else if (reStartTag.test(tag[0]))
       
   210       {
       
   211          // Break it down
       
   212          var e = reStartTag.exec(tag[0]);
       
   213          var elem = new XMLDOC.Parser.node(parentNode, e[1], "ELEMENT");
       
   214 
       
   215          // Get attributes from the tag
       
   216          var a;
       
   217          while ((a = reAttributes.exec(e[2])) != null )
       
   218          {
       
   219             elem.attrs[a[1]] = a[3];
       
   220          }
       
   221 
       
   222          // Is this an empty XML-style tag?
       
   223          if (e[6] == "/")
       
   224          {
       
   225             // Append the empty element
       
   226             parentNode.nodes.push(elem);
       
   227 
       
   228             // Move the lastIndex of reTag (include the start tag length)
       
   229             reTag.lastIndex -= e[0].length;
       
   230 
       
   231             // Eat the tag
       
   232             src = src.replace(reStartTag, "");
       
   233          }
       
   234          else
       
   235          {
       
   236             // Check for malformed XML tags
       
   237             var htmlParsed = false;
       
   238             var htmlStartTag = reHTMLEmptyTag.exec(src);
       
   239 
       
   240             // See if there isn't an end tag within this block
       
   241             var reHTMLEndTag = new RegExp("</" + htmlStartTag[1] + ">");
       
   242             var htmlEndTag = reHTMLEndTag.exec(src);
       
   243 
       
   244             if (XMLDOC.Parser.strictMode && htmlEndTag == null)
       
   245             {
       
   246                // Poorly formed XML fails in strict mode
       
   247                var err = new Error("Malformed XML passed to XMLDOC.Parser... Error contains malformed 'src'");
       
   248                err.src = src;
       
   249                throw err;
       
   250             }
       
   251             else if (htmlEndTag == null)
       
   252             {
       
   253                // This is an HTML-style empty tag, store the element for it in non-strict mode
       
   254                parentNode.nodes.push(elem);
       
   255 
       
   256                // Eat the tag
       
   257                src = src.replace(reHTMLEmptyTag, "");
       
   258                htmlParsed = true;
       
   259             }
       
   260 
       
   261             // If we didn't parse HTML-style, it must be an enclosing tag
       
   262             if (!htmlParsed)
       
   263             {
       
   264                var enc = reEnclosingTag.exec(src);
       
   265 
       
   266                // Go deeper into the document
       
   267                XMLDOC.Parser.eat(elem, enc[6]);
       
   268 
       
   269                // Append the new element node
       
   270                parentNode.nodes.push(elem);
       
   271 
       
   272                // Eat the tag
       
   273                src = src.replace(reEnclosingTag, "");
       
   274             }
       
   275          }
       
   276 
       
   277          // Reset the lastIndex of reTag
       
   278          reTag.lastIndex = 0;
       
   279       }
       
   280    }
       
   281 
       
   282    // No tag was found... append the text if there is any
       
   283    src = src.replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1");
       
   284    if (src.length > 0 && (src != "\n"))
       
   285    {
       
   286       var txtNode = new XMLDOC.Parser.node(parentNode, "", "TEXT");
       
   287       txtNode.charData = src;
       
   288 
       
   289       // Append the new text node
       
   290       parentNode.nodes.push(txtNode);
       
   291    }
       
   292 };