|
1 LOG.inform("XMLDOC.Parser loaded"); |
|
2 |
|
3 /** |
|
4 * XML Parser object. Returns an {@link #XMLDOC.Parser.node} which is |
|
5 * the root element of the parsed document. |
|
6 * <p/> |
|
7 * By default, this parser will only handle well formed XML. To |
|
8 * allow the parser to handle HTML, set the <tt>XMLDOC.Parser.strictMode</tt> |
|
9 * variable to <tt>false</tt> before calling <tt>XMLDOC.Parser.parse()</tt>. |
|
10 * <p/> |
|
11 * <i>Note: If you pass poorly formed XML, it will cause the parser to throw |
|
12 * an exception.</i> |
|
13 * |
|
14 * @author Brett Fattori (bfattori@fry.com) |
|
15 * @author $Author: micmath $ |
|
16 * @version $Revision: 497 $ |
|
17 */ |
|
18 XMLDOC.Parser = {}; |
|
19 |
|
20 /** |
|
21 * Strict mode setting. Setting this to false allows HTML-style source to |
|
22 * be parsed. Normally, well formed XML has defined end tags, or empty tags |
|
23 * are properly formed. Default: <tt>true</tt> |
|
24 * @type Boolean |
|
25 */ |
|
26 XMLDOC.Parser.strictMode = true; |
|
27 |
|
28 /** |
|
29 * A node in an XML Document. Node types are ROOT, ELEMENT, COMMENT, PI, and TEXT. |
|
30 * @param parent {XMLDOC.Parser.node} The parent node |
|
31 * @param name {String} The node name |
|
32 * @param type {String} One of the types |
|
33 */ |
|
34 XMLDOC.Parser.node = function(parent, name, type) |
|
35 { |
|
36 this.name = name; |
|
37 this.type = type || "ELEMENT"; |
|
38 this.parent = parent; |
|
39 this.charData = ""; |
|
40 this.attrs = {}; |
|
41 this.nodes = []; |
|
42 this.cPtr = 0; |
|
43 |
|
44 XMLDOC.Parser.node.prototype.getAttributeNames = function() { |
|
45 var a = []; |
|
46 for (var o in this.attrs) |
|
47 { |
|
48 a.push(o); |
|
49 } |
|
50 |
|
51 return a; |
|
52 }; |
|
53 |
|
54 XMLDOC.Parser.node.prototype.getAttribute = function(attr) { |
|
55 return this.attrs[attr]; |
|
56 }; |
|
57 |
|
58 XMLDOC.Parser.node.prototype.setAttribute = function(attr, val) { |
|
59 this.attrs[attr] = val; |
|
60 }; |
|
61 |
|
62 XMLDOC.Parser.node.prototype.getChild = function(idx) { |
|
63 return this.nodes[idx]; |
|
64 }; |
|
65 |
|
66 XMLDOC.Parser.node.prototype.parentNode = function() { |
|
67 return this.parent; |
|
68 }; |
|
69 |
|
70 XMLDOC.Parser.node.prototype.firstChild = function() { |
|
71 return this.nodes[0]; |
|
72 }; |
|
73 |
|
74 XMLDOC.Parser.node.prototype.lastChild = function() { |
|
75 return this.nodes[this.nodes.length - 1]; |
|
76 }; |
|
77 |
|
78 XMLDOC.Parser.node.prototype.nextSibling = function() { |
|
79 var p = this.parent; |
|
80 if (p && (p.nodes.indexOf(this) + 1 != p.nodes.length)) |
|
81 { |
|
82 return p.getChild(p.nodes.indexOf(this) + 1); |
|
83 } |
|
84 return null; |
|
85 }; |
|
86 |
|
87 XMLDOC.Parser.node.prototype.prevSibling = function() { |
|
88 var p = this.parent; |
|
89 if (p && (p.nodes.indexOf(this) - 1 >= 0)) |
|
90 { |
|
91 return p.getChild(p.nodes.indexOf(this) - 1); |
|
92 } |
|
93 return null; |
|
94 }; |
|
95 }; |
|
96 |
|
97 /** |
|
98 * Parse an XML Document from the specified source. The XML should be |
|
99 * well formed, unless strict mode is disabled, then the parser will |
|
100 * handle HTML-style XML documents. |
|
101 * @param src {String} The source to parse |
|
102 */ |
|
103 XMLDOC.Parser.parse = function(src) |
|
104 { |
|
105 var A = []; |
|
106 |
|
107 // Normailize whitespace |
|
108 A = src.split("\r\n"); |
|
109 src = A.join("\n"); |
|
110 A = src.split("\r"); |
|
111 src = A.join("\n"); |
|
112 |
|
113 // Remove XML and DOCTYPE specifier |
|
114 src.replace(/<\?XML .*\?>/i, ""); |
|
115 src.replace(/<!DOCTYPE .*\>/i, ""); |
|
116 |
|
117 // The document is the root node and cannot be modified or removed |
|
118 var doc = new XMLDOC.Parser.node(null, "ROOT", "DOCUMENT"); |
|
119 |
|
120 // Let's break it down |
|
121 XMLDOC.Parser.eat(doc, src); |
|
122 |
|
123 return doc; |
|
124 }; |
|
125 |
|
126 /** |
|
127 * The XML fragment processing routine. This method is private and should not be called |
|
128 * directly. |
|
129 * @param parentNode {XMLDOC.Parser.node} The node which is the parent of this fragment |
|
130 * @param src {String} The source within the fragment to process |
|
131 * @private |
|
132 */ |
|
133 XMLDOC.Parser.eat = function(parentNode, src) |
|
134 { |
|
135 // A simple tag def |
|
136 var reTag = new RegExp("<(!|)(\\?|--|)((.|\\s)*?)\\2>","g"); |
|
137 |
|
138 // Special tag types |
|
139 var reCommentTag = /<!--((.|\s)*?)-->/; |
|
140 var rePITag = /<\?((.|\s)*?)\?>/; |
|
141 |
|
142 // A start tag (with potential empty marker) |
|
143 var reStartTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*(\/)?>/; |
|
144 |
|
145 // An empty HTML style tag (not proper XML, but we'll accept it so we can process HTML) |
|
146 var reHTMLEmptyTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*>/; |
|
147 |
|
148 // Fully enclosing tag with nested tags |
|
149 var reEnclosingTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*?)\4)*>((.|\s)*?)<\/\1>/; |
|
150 |
|
151 // Breaks down attributes |
|
152 var reAttributes = new RegExp(" +([\\w_\\-]*)=(\"|')(.*?)\\2","g"); |
|
153 |
|
154 // Find us a tag |
|
155 var tag; |
|
156 while ((tag = reTag.exec(src)) != null) |
|
157 { |
|
158 if (tag.index > 0) |
|
159 { |
|
160 // The next tag has some text before it |
|
161 var text = src.substring(0, tag.index).replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1"); |
|
162 |
|
163 if (text.length > 0 && (text != "\n")) |
|
164 { |
|
165 var txtnode = new XMLDOC.Parser.node(parentNode, "", "TEXT"); |
|
166 txtnode.charData = text; |
|
167 |
|
168 // Append the new text node |
|
169 parentNode.nodes.push(txtnode); |
|
170 } |
|
171 |
|
172 // Reset the lastIndex of reTag |
|
173 reTag.lastIndex -= src.substring(0, tag.index).length; |
|
174 |
|
175 // Eat the text |
|
176 src = src.substring(tag.index); |
|
177 } |
|
178 |
|
179 if (reCommentTag.test(tag[0])) |
|
180 { |
|
181 // Is this a comment? |
|
182 var comment = new XMLDOC.Parser.node(parentNode, "", "COMMENT"); |
|
183 comment.charData = reCommentTag.exec(tag[0])[1]; |
|
184 |
|
185 // Append the comment |
|
186 parentNode.nodes.push(comment); |
|
187 |
|
188 // Move the lastIndex of reTag |
|
189 reTag.lastIndex -= tag[0].length; |
|
190 |
|
191 // Eat the tag |
|
192 src = src.replace(reCommentTag, ""); |
|
193 } |
|
194 else if (rePITag.test(tag[0])) |
|
195 { |
|
196 // Is this a processing instruction? |
|
197 var pi = new XMLDOC.Parser.node(parentNode, "", "PI"); |
|
198 pi.charData = rePITag.exec(tag[0])[1]; |
|
199 |
|
200 // Append the processing instruction |
|
201 parentNode.nodes.push(pi); |
|
202 |
|
203 // Move the lastIndex of reTag |
|
204 reTag.lastIndex -= tag[0].length; |
|
205 |
|
206 // Eat the tag |
|
207 src = src.replace(rePITag, ""); |
|
208 } |
|
209 else if (reStartTag.test(tag[0])) |
|
210 { |
|
211 // Break it down |
|
212 var e = reStartTag.exec(tag[0]); |
|
213 var elem = new XMLDOC.Parser.node(parentNode, e[1], "ELEMENT"); |
|
214 |
|
215 // Get attributes from the tag |
|
216 var a; |
|
217 while ((a = reAttributes.exec(e[2])) != null ) |
|
218 { |
|
219 elem.attrs[a[1]] = a[3]; |
|
220 } |
|
221 |
|
222 // Is this an empty XML-style tag? |
|
223 if (e[6] == "/") |
|
224 { |
|
225 // Append the empty element |
|
226 parentNode.nodes.push(elem); |
|
227 |
|
228 // Move the lastIndex of reTag (include the start tag length) |
|
229 reTag.lastIndex -= e[0].length; |
|
230 |
|
231 // Eat the tag |
|
232 src = src.replace(reStartTag, ""); |
|
233 } |
|
234 else |
|
235 { |
|
236 // Check for malformed XML tags |
|
237 var htmlParsed = false; |
|
238 var htmlStartTag = reHTMLEmptyTag.exec(src); |
|
239 |
|
240 // See if there isn't an end tag within this block |
|
241 var reHTMLEndTag = new RegExp("</" + htmlStartTag[1] + ">"); |
|
242 var htmlEndTag = reHTMLEndTag.exec(src); |
|
243 |
|
244 if (XMLDOC.Parser.strictMode && htmlEndTag == null) |
|
245 { |
|
246 // Poorly formed XML fails in strict mode |
|
247 var err = new Error("Malformed XML passed to XMLDOC.Parser... Error contains malformed 'src'"); |
|
248 err.src = src; |
|
249 throw err; |
|
250 } |
|
251 else if (htmlEndTag == null) |
|
252 { |
|
253 // This is an HTML-style empty tag, store the element for it in non-strict mode |
|
254 parentNode.nodes.push(elem); |
|
255 |
|
256 // Eat the tag |
|
257 src = src.replace(reHTMLEmptyTag, ""); |
|
258 htmlParsed = true; |
|
259 } |
|
260 |
|
261 // If we didn't parse HTML-style, it must be an enclosing tag |
|
262 if (!htmlParsed) |
|
263 { |
|
264 var enc = reEnclosingTag.exec(src); |
|
265 |
|
266 // Go deeper into the document |
|
267 XMLDOC.Parser.eat(elem, enc[6]); |
|
268 |
|
269 // Append the new element node |
|
270 parentNode.nodes.push(elem); |
|
271 |
|
272 // Eat the tag |
|
273 src = src.replace(reEnclosingTag, ""); |
|
274 } |
|
275 } |
|
276 |
|
277 // Reset the lastIndex of reTag |
|
278 reTag.lastIndex = 0; |
|
279 } |
|
280 } |
|
281 |
|
282 // No tag was found... append the text if there is any |
|
283 src = src.replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1"); |
|
284 if (src.length > 0 && (src != "\n")) |
|
285 { |
|
286 var txtNode = new XMLDOC.Parser.node(parentNode, "", "TEXT"); |
|
287 txtNode.charData = src; |
|
288 |
|
289 // Append the new text node |
|
290 parentNode.nodes.push(txtNode); |
|
291 } |
|
292 }; |