|
1 if (typeof JSDOC == "undefined") JSDOC = {}; |
|
2 |
|
3 /** |
|
4 @class Search a {@link JSDOC.TextStream} for language tokens. |
|
5 */ |
|
6 JSDOC.TokenReader = function() { |
|
7 this.keepDocs = true; |
|
8 this.keepWhite = false; |
|
9 this.keepComments = false; |
|
10 } |
|
11 |
|
12 /** |
|
13 @type {JSDOC.Token[]} |
|
14 */ |
|
15 JSDOC.TokenReader.prototype.tokenize = function(/**JSDOC.TextStream*/stream) { |
|
16 var tokens = []; |
|
17 /**@ignore*/ tokens.last = function() { return tokens[tokens.length-1]; } |
|
18 /**@ignore*/ tokens.lastSym = function() { |
|
19 for (var i = tokens.length-1; i >= 0; i--) { |
|
20 if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i]; |
|
21 } |
|
22 } |
|
23 |
|
24 while (!stream.look().eof) { |
|
25 if (this.read_mlcomment(stream, tokens)) continue; |
|
26 if (this.read_slcomment(stream, tokens)) continue; |
|
27 if (this.read_dbquote(stream, tokens)) continue; |
|
28 if (this.read_snquote(stream, tokens)) continue; |
|
29 if (this.read_regx(stream, tokens)) continue; |
|
30 if (this.read_numb(stream, tokens)) continue; |
|
31 if (this.read_punc(stream, tokens)) continue; |
|
32 if (this.read_newline(stream, tokens)) continue; |
|
33 if (this.read_space(stream, tokens)) continue; |
|
34 if (this.read_word(stream, tokens)) continue; |
|
35 |
|
36 // if execution reaches here then an error has happened |
|
37 tokens.push(new JSDOC.Token(stream.next(), "TOKN", "UNKNOWN_TOKEN")); |
|
38 } |
|
39 return tokens; |
|
40 } |
|
41 |
|
42 /** |
|
43 @returns {Boolean} Was the token found? |
|
44 */ |
|
45 JSDOC.TokenReader.prototype.read_word = function(/**JSDOC.TokenStream*/stream, tokens) { |
|
46 var found = ""; |
|
47 while (!stream.look().eof && JSDOC.Lang.isWordChar(stream.look())) { |
|
48 found += stream.next(); |
|
49 } |
|
50 |
|
51 if (found === "") { |
|
52 return false; |
|
53 } |
|
54 else { |
|
55 var name; |
|
56 if ((name = JSDOC.Lang.keyword(found))) tokens.push(new JSDOC.Token(found, "KEYW", name)); |
|
57 else tokens.push(new JSDOC.Token(found, "NAME", "NAME")); |
|
58 return true; |
|
59 } |
|
60 } |
|
61 |
|
62 /** |
|
63 @returns {Boolean} Was the token found? |
|
64 */ |
|
65 JSDOC.TokenReader.prototype.read_punc = function(/**JSDOC.TokenStream*/stream, tokens) { |
|
66 var found = ""; |
|
67 var name; |
|
68 while (!stream.look().eof && JSDOC.Lang.punc(found+stream.look())) { |
|
69 found += stream.next(); |
|
70 } |
|
71 |
|
72 if (found === "") { |
|
73 return false; |
|
74 } |
|
75 else { |
|
76 tokens.push(new JSDOC.Token(found, "PUNC", JSDOC.Lang.punc(found))); |
|
77 return true; |
|
78 } |
|
79 } |
|
80 |
|
81 /** |
|
82 @returns {Boolean} Was the token found? |
|
83 */ |
|
84 JSDOC.TokenReader.prototype.read_space = function(/**JSDOC.TokenStream*/stream, tokens) { |
|
85 var found = ""; |
|
86 |
|
87 while (!stream.look().eof && JSDOC.Lang.isSpace(stream.look())) { |
|
88 found += stream.next(); |
|
89 } |
|
90 |
|
91 if (found === "") { |
|
92 return false; |
|
93 } |
|
94 else { |
|
95 if (this.collapseWhite) found = " "; |
|
96 if (this.keepWhite) tokens.push(new JSDOC.Token(found, "WHIT", "SPACE")); |
|
97 return true; |
|
98 } |
|
99 } |
|
100 |
|
101 /** |
|
102 @returns {Boolean} Was the token found? |
|
103 */ |
|
104 JSDOC.TokenReader.prototype.read_newline = function(/**JSDOC.TokenStream*/stream, tokens) { |
|
105 var found = ""; |
|
106 |
|
107 while (!stream.look().eof && JSDOC.Lang.isNewline(stream.look())) { |
|
108 found += stream.next(); |
|
109 } |
|
110 |
|
111 if (found === "") { |
|
112 return false; |
|
113 } |
|
114 else { |
|
115 if (this.collapseWhite) found = "\n"; |
|
116 if (this.keepWhite) tokens.push(new JSDOC.Token(found, "WHIT", "NEWLINE")); |
|
117 return true; |
|
118 } |
|
119 } |
|
120 |
|
121 /** |
|
122 @returns {Boolean} Was the token found? |
|
123 */ |
|
124 JSDOC.TokenReader.prototype.read_mlcomment = function(/**JSDOC.TokenStream*/stream, tokens) { |
|
125 if (stream.look() == "/" && stream.look(1) == "*") { |
|
126 var found = stream.next(2); |
|
127 |
|
128 while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) { |
|
129 found += stream.next(); |
|
130 } |
|
131 |
|
132 // to start doclet we allow /** or /*** but not /**/ or /**** |
|
133 if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new JSDOC.Token(found, "COMM", "JSDOC")); |
|
134 else if (this.keepComments) tokens.push(new JSDOC.Token(found, "COMM", "MULTI_LINE_COMM")); |
|
135 return true; |
|
136 } |
|
137 return false; |
|
138 } |
|
139 |
|
140 /** |
|
141 @returns {Boolean} Was the token found? |
|
142 */ |
|
143 JSDOC.TokenReader.prototype.read_slcomment = function(/**JSDOC.TokenStream*/stream, tokens) { |
|
144 var found; |
|
145 if ( |
|
146 (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2))) |
|
147 || |
|
148 (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4))) |
|
149 ) { |
|
150 |
|
151 while (!stream.look().eof && !JSDOC.Lang.isNewline(stream.look())) { |
|
152 found += stream.next(); |
|
153 } |
|
154 |
|
155 if (this.keepComments) { |
|
156 tokens.push(new JSDOC.Token(found, "COMM", "SINGLE_LINE_COMM")); |
|
157 } |
|
158 return true; |
|
159 } |
|
160 return false; |
|
161 } |
|
162 |
|
163 /** |
|
164 @returns {Boolean} Was the token found? |
|
165 */ |
|
166 JSDOC.TokenReader.prototype.read_dbquote = function(/**JSDOC.TokenStream*/stream, tokens) { |
|
167 if (stream.look() == "\"") { |
|
168 // find terminator |
|
169 var string = stream.next(); |
|
170 |
|
171 while (!stream.look().eof) { |
|
172 if (stream.look() == "\\") { |
|
173 if (JSDOC.Lang.isNewline(stream.look(1))) { |
|
174 do { |
|
175 stream.next(); |
|
176 } while (!stream.look().eof && JSDOC.Lang.isNewline(stream.look())); |
|
177 string += "\\\n"; |
|
178 } |
|
179 else { |
|
180 string += stream.next(2); |
|
181 } |
|
182 } |
|
183 else if (stream.look() == "\"") { |
|
184 string += stream.next(); |
|
185 tokens.push(new JSDOC.Token(string, "STRN", "DOUBLE_QUOTE")); |
|
186 return true; |
|
187 } |
|
188 else { |
|
189 string += stream.next(); |
|
190 } |
|
191 } |
|
192 } |
|
193 return false; // error! unterminated string |
|
194 } |
|
195 |
|
196 /** |
|
197 @returns {Boolean} Was the token found? |
|
198 */ |
|
199 JSDOC.TokenReader.prototype.read_snquote = function(/**JSDOC.TokenStream*/stream, tokens) { |
|
200 if (stream.look() == "'") { |
|
201 // find terminator |
|
202 var string = stream.next(); |
|
203 |
|
204 while (!stream.look().eof) { |
|
205 if (stream.look() == "\\") { // escape sequence |
|
206 string += stream.next(2); |
|
207 } |
|
208 else if (stream.look() == "'") { |
|
209 string += stream.next(); |
|
210 tokens.push(new JSDOC.Token(string, "STRN", "SINGLE_QUOTE")); |
|
211 return true; |
|
212 } |
|
213 else { |
|
214 string += stream.next(); |
|
215 } |
|
216 } |
|
217 } |
|
218 return false; // error! unterminated string |
|
219 } |
|
220 |
|
221 /** |
|
222 @returns {Boolean} Was the token found? |
|
223 */ |
|
224 JSDOC.TokenReader.prototype.read_numb = function(/**JSDOC.TokenStream*/stream, tokens) { |
|
225 if (stream.look() === "0" && stream.look(1) == "x") { |
|
226 return this.read_hex(stream, tokens); |
|
227 } |
|
228 |
|
229 var found = ""; |
|
230 |
|
231 while (!stream.look().eof && JSDOC.Lang.isNumber(found+stream.look())){ |
|
232 found += stream.next(); |
|
233 } |
|
234 |
|
235 if (found === "") { |
|
236 return false; |
|
237 } |
|
238 else { |
|
239 if (/^0[0-7]/.test(found)) tokens.push(new JSDOC.Token(found, "NUMB", "OCTAL")); |
|
240 else tokens.push(new JSDOC.Token(found, "NUMB", "DECIMAL")); |
|
241 return true; |
|
242 } |
|
243 } |
|
244 /*t: |
|
245 requires("../lib/JSDOC/TextStream.js"); |
|
246 requires("../lib/JSDOC/Token.js"); |
|
247 requires("../lib/JSDOC/Lang.js"); |
|
248 |
|
249 plan(3, "testing JSDOC.TokenReader.prototype.read_numb"); |
|
250 |
|
251 //// setup |
|
252 var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}"; |
|
253 var tr = new JSDOC.TokenReader(); |
|
254 var tokens = tr.tokenize(new JSDOC.TextStream(src)); |
|
255 |
|
256 var hexToken, octToken, decToken; |
|
257 for (var i = 0; i < tokens.length; i++) { |
|
258 if (tokens[i].name == "HEX_DEC") hexToken = tokens[i]; |
|
259 if (tokens[i].name == "OCTAL") octToken = tokens[i]; |
|
260 if (tokens[i].name == "DECIMAL") decToken = tokens[i]; |
|
261 } |
|
262 //// |
|
263 |
|
264 is(decToken.data, "8.0", "decimal number is found in source."); |
|
265 is(hexToken.data, "0x20", "hexdec number is found in source (issue #99)."); |
|
266 is(octToken.data, "0777", "octal number is found in source."); |
|
267 */ |
|
268 |
|
269 /** |
|
270 @returns {Boolean} Was the token found? |
|
271 */ |
|
272 JSDOC.TokenReader.prototype.read_hex = function(/**JSDOC.TokenStream*/stream, tokens) { |
|
273 var found = stream.next(2); |
|
274 |
|
275 while (!stream.look().eof) { |
|
276 if (JSDOC.Lang.isHexDec(found) && !JSDOC.Lang.isHexDec(found+stream.look())) { // done |
|
277 tokens.push(new JSDOC.Token(found, "NUMB", "HEX_DEC")); |
|
278 return true; |
|
279 } |
|
280 else { |
|
281 found += stream.next(); |
|
282 } |
|
283 } |
|
284 return false; |
|
285 } |
|
286 |
|
287 /** |
|
288 @returns {Boolean} Was the token found? |
|
289 */ |
|
290 JSDOC.TokenReader.prototype.read_regx = function(/**JSDOC.TokenStream*/stream, tokens) { |
|
291 var last; |
|
292 if ( |
|
293 stream.look() == "/" |
|
294 && |
|
295 ( |
|
296 |
|
297 ( |
|
298 !(last = tokens.lastSym()) // there is no last, the regex is the first symbol |
|
299 || |
|
300 ( |
|
301 !last.is("NUMB") |
|
302 && !last.is("NAME") |
|
303 && !last.is("RIGHT_PAREN") |
|
304 && !last.is("RIGHT_BRACKET") |
|
305 ) |
|
306 ) |
|
307 ) |
|
308 ) { |
|
309 var regex = stream.next(); |
|
310 |
|
311 while (!stream.look().eof) { |
|
312 if (stream.look() == "\\") { // escape sequence |
|
313 regex += stream.next(2); |
|
314 } |
|
315 else if (stream.look() == "/") { |
|
316 regex += stream.next(); |
|
317 |
|
318 while (/[gmi]/.test(stream.look())) { |
|
319 regex += stream.next(); |
|
320 } |
|
321 |
|
322 tokens.push(new JSDOC.Token(regex, "REGX", "REGX")); |
|
323 return true; |
|
324 } |
|
325 else { |
|
326 regex += stream.next(); |
|
327 } |
|
328 } |
|
329 // error: unterminated regex |
|
330 } |
|
331 return false; |
|
332 } |