Object
ruby constants for strings (should this be moved somewhere else?)
How the parser advances to the next token.
@return true if not at end of file (EOF).
# File lib/ruby_lexer.rb, line 53
53: def advance
54: r = yylex
55: self.token = r
56:
57: raise "yylex returned nil" unless r
58:
59: return RubyLexer::EOF != r
60: end
# File lib/ruby_lexer.rb, line 62
62: def arg_ambiguous
63: self.warning("Ambiguous first argument. make sure.")
64: end
# File lib/ruby_lexer.rb, line 72
72: def expr_beg_push val
73: cond.push false
74: cmdarg.push false
75: self.lex_state = :expr_beg
76: self.yacc_value = val
77: end
# File lib/ruby_lexer.rb, line 79
79: def fix_arg_lex_state
80: self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
81: :expr_arg
82: else
83: :expr_beg
84: end
85: end
# File lib/ruby_lexer.rb, line 87
87: def heredoc here # 63 lines
88: _, eos, func, last_line = here
89:
90: indent = (func & STR_FUNC_INDENT) != 0
91: expand = (func & STR_FUNC_EXPAND) != 0
92: eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
93: err_msg = "can't match #{eos_re.inspect} anywhere in "
94:
95: rb_compile_error err_msg if
96: src.eos?
97:
98: if src.beginning_of_line? && src.scan(eos_re) then
99: src.unread_many last_line # TODO: figure out how to remove this
100: self.yacc_value = eos
101: return :tSTRING_END
102: end
103:
104: self.string_buffer = []
105:
106: if expand then
107: case
108: when src.scan(/#[$@]/) then
109: src.pos -= 1 # FIX omg stupid
110: self.yacc_value = src.matched
111: return :tSTRING_DVAR
112: when src.scan(/#[{]/) then
113: self.yacc_value = src.matched
114: return :tSTRING_DBEG
115: when src.scan(/#/) then
116: string_buffer << '#'
117: end
118:
119: until src.scan(eos_re) do
120: c = tokadd_string func, "\n", nil
121:
122: rb_compile_error err_msg if
123: c == RubyLexer::EOF
124:
125: if c != "\n" then
126: self.yacc_value = string_buffer.join.delete("\r")
127: return :tSTRING_CONTENT
128: else
129: string_buffer << src.scan(/\n/)
130: end
131:
132: rb_compile_error err_msg if
133: src.eos?
134: end
135:
136: # tack on a NL after the heredoc token - FIX NL should not be needed
137: src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid
138: else
139: until src.check(eos_re) do
140: string_buffer << src.scan(/.*(\n|\z)/)
141: rb_compile_error err_msg if
142: src.eos?
143: end
144: end
145:
146: self.lex_strterm = [:heredoc, eos, func, last_line]
147: self.yacc_value = string_buffer.join.delete("\r")
148:
149: return :tSTRING_CONTENT
150: end
# File lib/ruby_lexer.rb, line 152
152: def heredoc_identifier # 51 lines
153: term, func = nil, STR_FUNC_BORING
154: self.string_buffer = []
155:
156: case
157: when src.scan(/(-?)(['"`])(.*?)\22//) then
158: term = src[2]
159: unless src[1].empty? then
160: func |= STR_FUNC_INDENT
161: end
162: func |= case term
163: when "\'" then
164: STR_SQUOTE
165: when '"' then
166: STR_DQUOTE
167: else
168: STR_XQUOTE
169: end
170: string_buffer << src[3]
171: when src.scan(/-?(['"`])(?!\11**\Z)/) then
172: rb_compile_error "unterminated here document identifier"
173: when src.scan(/(-?)(\w+)/) then
174: term = '"'
175: func |= STR_DQUOTE
176: unless src[1].empty? then
177: func |= STR_FUNC_INDENT
178: end
179: string_buffer << src[2]
180: else
181: return nil
182: end
183:
184: if src.check(/.*\n/) then
185: # TODO: think about storing off the char range instead
186: line = src.string[src.pos, src.matched_size]
187: src.string[src.pos, src.matched_size] = "\n"
188: src.pos += 1
189: else
190: line = nil
191: end
192:
193: self.lex_strterm = [:heredoc, string_buffer.join, func, line]
194:
195: if term == '`' then
196: self.yacc_value = "`"
197: return :tXSTRING_BEG
198: else
199: self.yacc_value = "\""
200: return :tSTRING_BEG
201: end
202: end
# File lib/ruby_lexer.rb, line 213
213: def int_with_base base
214: rb_compile_error "Invalid numeric format" if src.matched =~ /__/
215: self.yacc_value = src.matched.to_i(base)
216: return :tINTEGER
217: end
# File lib/ruby_lexer.rb, line 219
219: def lex_state= o
220: raise "wtf?" unless Symbol === o
221: @lex_state = o
222: end
# File lib/ruby_lexer.rb, line 225
225: def lineno
226: @lineno ||= src.lineno
227: end
Parse a number from the input stream.
@param c The first character of the number. @return A int constant wich represents a token.
# File lib/ruby_lexer.rb, line 235
235: def parse_number
236: self.lex_state = :expr_end
237:
238: case
239: when src.scan(/[+-]?0[xbd]\b/) then
240: rb_compile_error "Invalid numeric format"
241: when src.scan(/[+-]?0x[a-f0-9_]+/) then
242: int_with_base(16)
243: when src.scan(/[+-]?0b[01_]+/) then
244: int_with_base(2)
245: when src.scan(/[+-]?0d[0-9_]+/) then
246: int_with_base(10)
247: when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
248: rb_compile_error "Illegal octal digit."
249: when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
250: int_with_base(8)
251: when src.scan(/[+-]?[\d_]+_(e|\.)/) then
252: rb_compile_error "Trailing '_' in number."
253: when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/) then
254: number = src.matched
255: if number =~ /__/ then
256: rb_compile_error "Invalid numeric format"
257: end
258: self.yacc_value = number.to_f
259: :tFLOAT
260: when src.scan(/[+-]?0\b/) then
261: int_with_base(10)
262: when src.scan(/[+-]?[\d_]+\b/) then
263: int_with_base(10)
264: else
265: rb_compile_error "Bad number format"
266: end
267: end
# File lib/ruby_lexer.rb, line 269
269: def parse_quote # 58 lines
270: beg, nnd, short_hand, c = nil, nil, false, nil
271:
272: if src.scan(/[a-z0-9]{1,2}/) then # Long-hand (e.g. %Q{}).
273: rb_compile_error "unknown type of %string" if src.matched_size == 2
274: c, beg, short_hand = src.matched, src.getch, false
275: else # Short-hand (e.g. %{, %., %!, etc)
276: c, beg, short_hand = 'Q', src.getch, true
277: end
278:
279: if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
280: rb_compile_error "unterminated quoted string meets end of file"
281: end
282:
283: # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
284: nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
285: nnd, beg = beg, "\00"" if nnd.nil?
286:
287: token_type, self.yacc_value = nil, "%#{c}#{beg}"
288: token_type, string_type = case c
289: when 'Q' then
290: ch = short_hand ? nnd : c + beg
291: self.yacc_value = "%#{ch}"
292: [:tSTRING_BEG, STR_DQUOTE]
293: when 'q' then
294: [:tSTRING_BEG, STR_SQUOTE]
295: when 'W' then
296: src.scan(/\s*/)
297: [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS]
298: when 'w' then
299: src.scan(/\s*/)
300: [:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS]
301: when 'x' then
302: [:tXSTRING_BEG, STR_XQUOTE]
303: when 'r' then
304: [:tREGEXP_BEG, STR_REGEXP]
305: when 's' then
306: self.lex_state = :expr_fname
307: [:tSYMBEG, STR_SSYM]
308: end
309:
310: rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
311: token_type.nil?
312:
313: self.lex_strterm = [:strterm, string_type, nnd, beg]
314:
315: return token_type
316: end
# File lib/ruby_lexer.rb, line 318
318: def parse_string(quote) # 65 lines
319: _, string_type, term, open = quote
320:
321: space = false # FIX: remove these
322: func = string_type
323: paren = open
324: term_re = Regexp.escape term
325:
326: awords = (func & STR_FUNC_AWORDS) != 0
327: regexp = (func & STR_FUNC_REGEXP) != 0
328: expand = (func & STR_FUNC_EXPAND) != 0
329:
330: unless func then # FIX: impossible, prolly needs == 0
331: self.lineno = nil
332: return :tSTRING_END
333: end
334:
335: space = true if awords and src.scan(/\s+/)
336:
337: if self.nest == 0 && src.scan(/#{term_re}/) then
338: if awords then
339: quote[1] = nil
340: return :tSPACE
341: elsif regexp then
342: self.yacc_value = self.regx_options
343: self.lineno = nil
344: return :tREGEXP_END
345: else
346: self.yacc_value = term
347: self.lineno = nil
348: return :tSTRING_END
349: end
350: end
351:
352: if space then
353: return :tSPACE
354: end
355:
356: self.string_buffer = []
357:
358: if expand
359: case
360: when src.scan(/#(?=[$@])/) then
361: return :tSTRING_DVAR
362: when src.scan(/#[{]/) then
363: return :tSTRING_DBEG
364: when src.scan(/#/) then
365: string_buffer << '#'
366: end
367: end
368:
369: if tokadd_string(func, term, paren) == RubyLexer::EOF then
370: rb_compile_error "unterminated string meets end of file"
371: end
372:
373: self.yacc_value = string_buffer.join
374:
375:
376: return :tSTRING_CONTENT
377: end
# File lib/ruby_lexer.rb, line 1232
1232: def process_token(command_state)
1233:
1234: token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/)
1235:
1236: result = nil
1237: last_state = lex_state
1238:
1239:
1240: case token
1241: when /^\$/ then
1242: self.lex_state, result = :expr_end, :tGVAR
1243: when /^@@/ then
1244: self.lex_state, result = :expr_end, :tCVAR
1245: when /^@/ then
1246: self.lex_state, result = :expr_end, :tIVAR
1247: else
1248: if token =~ /[!?]$/ then
1249: result = :tFID
1250: else
1251: if lex_state == :expr_fname then
1252: # ident=, not =~ => == or followed by =>
1253: # TODO test lexing of a=>b vs a==>b
1254: if src.scan(/=(?:(?![~>=])|(?==>))/) then
1255: result = :tIDENTIFIER
1256: token << src.matched
1257: end
1258: end
1259:
1260: result ||= if token =~ /^[A-Z]/ then
1261: :tCONSTANT
1262: else
1263: :tIDENTIFIER
1264: end
1265: end
1266:
1267: unless lex_state == :expr_dot then
1268: # See if it is a reserved word.
1269: keyword = Keyword.keyword token
1270:
1271: if keyword then
1272: state = lex_state
1273: self.lex_state = keyword.state
1274: self.yacc_value = token
1275:
1276: if keyword.id0 == :kDO then
1277: self.command_start = true
1278: return :kDO_COND if cond.is_in_state
1279: return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
1280: return :kDO_BLOCK if state == :expr_endarg
1281: return :kDO
1282: end
1283:
1284: return keyword.id0 if state == :expr_beg
1285:
1286: self.lex_state = :expr_beg if keyword.id0 != keyword.id1
1287:
1288: return keyword.id1
1289: end
1290: end
1291:
1292: if (lex_state == :expr_beg || lex_state == :expr_mid ||
1293: lex_state == :expr_dot || lex_state == :expr_arg ||
1294: lex_state == :expr_cmdarg) then
1295: if command_state then
1296: self.lex_state = :expr_cmdarg
1297: else
1298: self.lex_state = :expr_arg
1299: end
1300: else
1301: self.lex_state = :expr_end
1302: end
1303: end
1304:
1305: self.yacc_value = token
1306:
1307:
1308: self.lex_state = :expr_end if
1309: last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar
1310:
1311: return result
1312: end
# File lib/ruby_lexer.rb, line 379
379: def rb_compile_error msg
380: msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
381: raise SyntaxError, msg
382: end
# File lib/ruby_lexer.rb, line 384
384: def read_escape # 51 lines
385: case
386: when src.scan(/\\/) then # Backslash
387: '\'
388: when src.scan(/n/) then # newline
389: "\n"
390: when src.scan(/t/) then # horizontal tab
391: "\t"
392: when src.scan(/r/) then # carriage-return
393: "\r"
394: when src.scan(/f/) then # form-feed
395: "\f"
396: when src.scan(/v/) then # vertical tab
397: "\113""
398: when src.scan(/a/) then # alarm(bell)
399: "\0007"
400: when src.scan(/e/) then # escape
401: "\0033"
402: when src.scan(/b/) then # backspace
403: "\0010"
404: when src.scan(/s/) then # space
405: " "
406: when src.scan(/[0-7]{1,3}/) then # octal constant
407: src.matched.to_i(8).chr
408: when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
409: src[1].to_i(16).chr
410: when src.check(/M-\\[\\MCc]/) then
411: src.scan(/M-\\/) # eat it
412: c = self.read_escape
413: c[0] = (c[0].ord | 0x80).chr
414: c
415: when src.scan(/M-(.)/) then
416: c = src[1]
417: c[0] = (c[0].ord | 0x80).chr
418: c
419: when src.check(/(C-|c)\\[\\MCc]/) then
420: src.scan(/(C-|c)\\/) # eat it
421: c = self.read_escape
422: c[0] = (c[0].ord & 0x9f).chr
423: c
424: when src.scan(/C-\?|c\?/) then
425: 127.chr
426: when src.scan(/(C-|c)(.)/) then
427: c = src[2]
428: c[0] = (c[0].ord & 0x9f).chr
429: c
430: when src.scan(/[McCx0-9]/) || src.eos? then
431: rb_compile_error("Invalid escape character syntax")
432: else
433: src.getch
434: end
435: end
# File lib/ruby_lexer.rb, line 437
437: def regx_options # 15 lines
438: good, bad = [], []
439:
440: if src.scan(/[a-z]+/) then
441: good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
442: end
443:
444: unless bad.empty? then
445: rb_compile_error("unknown regexp option%s - %s" %
446: [(bad.size > 1 ? "s" : ""), bad.join.inspect])
447: end
448:
449: return good.join
450: end
# File lib/ruby_lexer.rb, line 452
452: def reset
453: self.command_start = true
454: self.lex_strterm = nil
455: self.token = nil
456: self.yacc_value = nil
457:
458: @src = nil
459: @lex_state = nil
460: end
# File lib/ruby_lexer.rb, line 462
462: def src= src
463: raise "bad src: #{src.inspect}" unless String === src
464: @src = RPStringScanner.new(src)
465: end
# File lib/ruby_lexer.rb, line 467
467: def tokadd_escape term # 20 lines
468: case
469: when src.scan(/\\\n/) then
470: # just ignore
471: when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
472: self.string_buffer << src.matched
473: when src.scan(/\\([MC]-|c)(?=\\)/) then
474: self.string_buffer << src.matched
475: self.tokadd_escape term
476: when src.scan(/\\([MC]-|c)(.)/) then
477: self.string_buffer << src.matched
478: when src.scan(/\\[McCx]/) then
479: rb_compile_error "Invalid escape character syntax"
480: when src.scan(/\\(.)/) then
481: self.string_buffer << src.matched
482: else
483: rb_compile_error "Invalid escape character syntax"
484: end
485: end
# File lib/ruby_lexer.rb, line 487
487: def tokadd_string(func, term, paren) # 105 lines
488: awords = (func & STR_FUNC_AWORDS) != 0
489: escape = (func & STR_FUNC_ESCAPE) != 0
490: expand = (func & STR_FUNC_EXPAND) != 0
491: regexp = (func & STR_FUNC_REGEXP) != 0
492: symbol = (func & STR_FUNC_SYMBOL) != 0
493:
494: paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
495: term_re = Regexp.new(Regexp.escape(term))
496:
497: until src.eos? do
498: c = nil
499: handled = true
500: case
501: when self.nest == 0 && src.scan(term_re) then
502: src.pos -= 1
503: break
504: when paren_re && src.scan(paren_re) then
505: self.nest += 1
506: when src.scan(term_re) then
507: self.nest -= 1
508: when awords && src.scan(/\s/) then
509: src.pos -= 1
510: break
511: when expand && src.scan(/#(?=[\$\@\{])/) then
512: src.pos -= 1
513: break
514: when expand && src.scan(/#(?!\n)/) then
515: # do nothing
516: when src.check(/\\/) then
517: case
518: when awords && src.scan(/\\\n/) then
519: string_buffer << "\n"
520: next
521: when awords && src.scan(/\\\s/) then
522: c = ' '
523: when expand && src.scan(/\\\n/) then
524: next
525: when regexp && src.check(/\\/) then
526: self.tokadd_escape term
527: next
528: when expand && src.scan(/\\/) then
529: c = self.read_escape
530: when src.scan(/\\\n/) then
531: # do nothing
532: when src.scan(/\\\\/) then
533: string_buffer << '\' if escape
534: c = '\'
535: when src.scan(/\\/) then
536: unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
537: string_buffer << "\\"
538: end
539: else
540: handled = false
541: end
542: else
543: handled = false
544: end # case
545:
546: unless handled then
547:
548: t = Regexp.escape term
549: x = Regexp.escape(paren) if paren && paren != "\0000"
550: re = if awords then
551: /[^#{t}#{x}\#\00\\\\n\ ]+|./ # |. to pick up whatever
552: else
553: /[^#{t}#{x}\#\00\\\]+|./
554: end
555:
556: src.scan re
557: c = src.matched
558:
559: rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\00//
560: end # unless handled
561:
562: c ||= src.matched
563: string_buffer << c
564: end # until
565:
566: c ||= src.matched
567: c = RubyLexer::EOF if src.eos?
568:
569:
570: return c
571: end
# File lib/ruby_lexer.rb, line 573
573: def unescape s
574:
575: r = {
576: "a" => "\0007",
577: "b" => "\0010",
578: "e" => "\0033",
579: "f" => "\f",
580: "n" => "\n",
581: "r" => "\r",
582: "s" => " ",
583: "t" => "\t",
584: "v" => "\113"",
585: "\\" => '\',
586: "\n" => "",
587: "C-\?" => 127.chr,
588: "c\?" => 127.chr,
589: }[s]
590:
591: return r if r
592:
593: case s
594: when /^[0-7]{1,3}/ then
595: $&.to_i(8).chr
596: when /^x([0-9a-fA-F]{1,2})/ then
597: $1.to_i(16).chr
598: when /^M-(.)/ then
599: ($1[0].ord | 0x80).chr
600: when /^(C-|c)(.)/ then
601: ($2[0].ord & 0x9f).chr
602: when /^[McCx0-9]/ then
603: rb_compile_error("Invalid escape character syntax")
604: else
605: s
606: end
607: end
# File lib/ruby_lexer.rb, line 609
609: def warning s
610: # do nothing for now
611: end
Returns the next token. Also sets yy_val is needed.
@return Description of the Returned Value
# File lib/ruby_lexer.rb, line 618
618: def yylex # 826 lines
619:
620: c = ''
621: space_seen = false
622: command_state = false
623: src = self.src
624:
625: self.token = nil
626: self.yacc_value = nil
627:
628: return yylex_string if lex_strterm
629:
630: command_state = self.command_start
631: self.command_start = false
632:
633: last_state = lex_state
634:
635: loop do # START OF CASE
636: if src.scan(/\ |\t|\r|\f|\113//) then # white spaces, 13 = '\v
637: space_seen = true
638: next
639: elsif src.check(/[^a-zA-Z]/) then
640: if src.scan(/\n|#/) then
641: self.lineno = nil
642: c = src.matched
643: if c == '#' then
644: src.unread c # ok
645:
646: while src.scan(/\s*#.*(\n+|\z)/) do
647: @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
648: end
649:
650: if src.eos? then
651: return RubyLexer::EOF
652: end
653: end
654:
655: # Replace a string of newlines with a single one
656: src.scan(/\n+/)
657:
658: if [:expr_beg, :expr_fname,
659: :expr_dot, :expr_class].include? lex_state then
660: next
661: end
662:
663: self.command_start = true
664: self.lex_state = :expr_beg
665: return :tNL
666: elsif src.scan(/[\]\)\}]/) then
667: cond.lexpop
668: cmdarg.lexpop
669: self.lex_state = :expr_end
670: self.yacc_value = src.matched
671: result = {
672: ")" => :tRPAREN,
673: "]" => :tRBRACK,
674: "}" => :tRCURLY
675: }[src.matched]
676: return result
677: elsif src.check(/\./) then
678: if src.scan(/\.\.\./) then
679: self.lex_state = :expr_beg
680: self.yacc_value = "..."
681: return :tDOT3
682: elsif src.scan(/\.\./) then
683: self.lex_state = :expr_beg
684: self.yacc_value = ".."
685: return :tDOT2
686: elsif src.scan(/\.\d/) then
687: rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
688: elsif src.scan(/\./) then
689: self.lex_state = :expr_dot
690: self.yacc_value = "."
691: return :tDOT
692: end
693: elsif src.scan(/\,/) then
694: self.lex_state = :expr_beg
695: self.yacc_value = ","
696: return :tCOMMA
697: elsif src.scan(/\(/) then
698: result = :tLPAREN2
699: self.command_start = true
700: if lex_state == :expr_beg || lex_state == :expr_mid then
701: result = :tLPAREN
702: elsif space_seen then
703: if lex_state == :expr_cmdarg then
704: result = :tLPAREN_ARG
705: elsif lex_state == :expr_arg then
706: warning("don't put space before argument parentheses")
707: result = :tLPAREN2
708: end
709: end
710:
711: self.expr_beg_push "("
712:
713: return result
714: elsif src.check(/\=/) then
715: if src.scan(/\=\=\=/) then
716: self.fix_arg_lex_state
717: self.yacc_value = "==="
718: return :tEQQ
719: elsif src.scan(/\=\=/) then
720: self.fix_arg_lex_state
721: self.yacc_value = "=="
722: return :tEQ
723: elsif src.scan(/\=~/) then
724: self.fix_arg_lex_state
725: self.yacc_value = "=~"
726: return :tMATCH
727: elsif src.scan(/\=>/) then
728: self.fix_arg_lex_state
729: self.yacc_value = "=>"
730: return :tASSOC
731: elsif src.scan(/\=/) then
732: if src.was_begin_of_line and src.scan(/begin(?=\s)/) then
733: @comments << '=' << src.matched
734:
735: unless src.scan(/.*?\n=end\s*(\n|\z)/) then
736: @comments.clear
737: rb_compile_error("embedded document meets end of file")
738: end
739:
740: @comments << src.matched
741:
742: next
743: else
744: self.fix_arg_lex_state
745: self.yacc_value = '='
746: return :tEQL
747: end
748: end
749: elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/) then
750: self.yacc_value = src.matched[1..2].gsub(ESC_RE) { unescape $1 }
751: self.lex_state = :expr_end
752: return :tSTRING
753: elsif src.scan(/\"/) then # FALLBACK
754: self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\00""] # TODO: question this
755: self.yacc_value = "\""
756: return :tSTRING_BEG
757: elsif src.scan(/\@\@?\w*/) then
758: self.token = src.matched
759:
760: rb_compile_error "`#{token}` is not allowed as a variable name" if
761: token =~ /\@\d/
762:
763: return process_token(command_state)
764: elsif src.scan(/\:\:/) then
765: if (lex_state == :expr_beg ||
766: lex_state == :expr_mid ||
767: lex_state == :expr_class ||
768: (lex_state.is_argument && space_seen)) then
769: self.lex_state = :expr_beg
770: self.yacc_value = "::"
771: return :tCOLON3
772: end
773:
774: self.lex_state = :expr_dot
775: self.yacc_value = "::"
776: return :tCOLON2
777: elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then
778: self.yacc_value = src[1]
779: self.lex_state = :expr_end
780: return :tSYMBOL
781: elsif src.scan(/\:/) then
782: # ?: / then / when
783: if (lex_state == :expr_end || lex_state == :expr_endarg||
784: src.check(/\s/)) then
785: self.lex_state = :expr_beg
786: self.yacc_value = ":"
787: return :tCOLON
788: end
789:
790: case
791: when src.scan(/\'/) then
792: self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\00""]
793: when src.scan(/\"/) then
794: self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\00""]
795: end
796:
797: self.lex_state = :expr_fname
798: self.yacc_value = ":"
799: return :tSYMBEG
800: elsif src.check(/[0-9]/) then
801: return parse_number
802: elsif src.scan(/\[/) then
803: result = src.matched
804:
805: if lex_state == :expr_fname || lex_state == :expr_dot then
806: self.lex_state = :expr_arg
807: case
808: when src.scan(/\]\=/) then
809: self.yacc_value = "[]="
810: return :tASET
811: when src.scan(/\]/) then
812: self.yacc_value = "[]"
813: return :tAREF
814: else
815: rb_compile_error "unexpected '['"
816: end
817: elsif lex_state == :expr_beg || lex_state == :expr_mid then
818: result = :tLBRACK
819: elsif lex_state.is_argument && space_seen then
820: result = :tLBRACK
821: end
822:
823: self.expr_beg_push "["
824:
825: return result
826: elsif src.scan(/\'(\\.|[^\'])*\'/) then
827: self.yacc_value = src.matched[1..2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
828: self.lex_state = :expr_end
829: return :tSTRING
830: elsif src.check(/\|/) then
831: if src.scan(/\|\|\=/) then
832: self.lex_state = :expr_beg
833: self.yacc_value = "||"
834: return :tOP_ASGN
835: elsif src.scan(/\|\|/) then
836: self.lex_state = :expr_beg
837: self.yacc_value = "||"
838: return :tOROP
839: elsif src.scan(/\|\=/) then
840: self.lex_state = :expr_beg
841: self.yacc_value = "|"
842: return :tOP_ASGN
843: elsif src.scan(/\|/) then
844: self.fix_arg_lex_state
845: self.yacc_value = "|"
846: return :tPIPE
847: end
848: elsif src.scan(/\{/) then
849: result = if lex_state.is_argument || lex_state == :expr_end then
850: :tLCURLY # block (primary)
851: elsif lex_state == :expr_endarg then
852: :tLBRACE_ARG # block (expr)
853: else
854: :tLBRACE # hash
855: end
856:
857: self.expr_beg_push "{"
858:
859: return result
860: elsif src.scan(/[+-]/) then
861: sign = src.matched
862: utype, type = if sign == "+" then
863: [:tUPLUS, :tPLUS]
864: else
865: [:tUMINUS, :tMINUS]
866: end
867:
868: if lex_state == :expr_fname || lex_state == :expr_dot then
869: self.lex_state = :expr_arg
870: if src.scan(/@/) then
871: self.yacc_value = "#{sign}@"
872: return utype
873: else
874: self.yacc_value = sign
875: return type
876: end
877: end
878:
879: if src.scan(/\=/) then
880: self.lex_state = :expr_beg
881: self.yacc_value = sign
882: return :tOP_ASGN
883: end
884:
885: if (lex_state == :expr_beg || lex_state == :expr_mid ||
886: (lex_state.is_argument && space_seen && !src.check(/\s/))) then
887: if lex_state.is_argument then
888: arg_ambiguous
889: end
890:
891: self.lex_state = :expr_beg
892: self.yacc_value = sign
893:
894: if src.check(/\d/) then
895: if utype == :tUPLUS then
896: return self.parse_number
897: else
898: return :tUMINUS_NUM
899: end
900: end
901:
902: return utype
903: end
904:
905: self.lex_state = :expr_beg
906: self.yacc_value = sign
907: return type
908: elsif src.check(/\*/) then
909: if src.scan(/\*\*=/) then
910: self.lex_state = :expr_beg
911: self.yacc_value = "**"
912: return :tOP_ASGN
913: elsif src.scan(/\*\*/) then
914: self.yacc_value = "**"
915: self.fix_arg_lex_state
916: return :tPOW
917: elsif src.scan(/\*\=/) then
918: self.lex_state = :expr_beg
919: self.yacc_value = "*"
920: return :tOP_ASGN
921: elsif src.scan(/\*/) then
922: result = if lex_state.is_argument && space_seen && src.check(/\S/) then
923: warning("`*' interpreted as argument prefix")
924: :tSTAR
925: elsif lex_state == :expr_beg || lex_state == :expr_mid then
926: :tSTAR
927: else
928: :tSTAR2
929: end
930: self.yacc_value = "*"
931: self.fix_arg_lex_state
932:
933: return result
934: end
935: elsif src.check(/\!/) then
936: if src.scan(/\!\=/) then
937: self.lex_state = :expr_beg
938: self.yacc_value = "!="
939: return :tNEQ
940: elsif src.scan(/\!~/) then
941: self.lex_state = :expr_beg
942: self.yacc_value = "!~"
943: return :tNMATCH
944: elsif src.scan(/\!/) then
945: self.lex_state = :expr_beg
946: self.yacc_value = "!"
947: return :tBANG
948: end
949: elsif src.check(/\</) then
950: if src.scan(/\<\=\>/) then
951: self.fix_arg_lex_state
952: self.yacc_value = "<=>"
953: return :tCMP
954: elsif src.scan(/\<\=/) then
955: self.fix_arg_lex_state
956: self.yacc_value = "<="
957: return :tLEQ
958: elsif src.scan(/\<\<\=/) then
959: self.fix_arg_lex_state
960: self.lex_state = :expr_beg
961: self.yacc_value = "\<\<"
962: return :tOP_ASGN
963: elsif src.scan(/\<\</) then
964: if (! [:expr_end, :expr_dot,
965: :expr_endarg, :expr_class].include?(lex_state) &&
966: (!lex_state.is_argument || space_seen)) then
967: tok = self.heredoc_identifier
968: if tok then
969: return tok
970: end
971: end
972:
973: self.fix_arg_lex_state
974: self.yacc_value = "\<\<"
975: return :tLSHFT
976: elsif src.scan(/\</) then
977: self.fix_arg_lex_state
978: self.yacc_value = "<"
979: return :tLT
980: end
981: elsif src.check(/\>/) then
982: if src.scan(/\>\=/) then
983: self.fix_arg_lex_state
984: self.yacc_value = ">="
985: return :tGEQ
986: elsif src.scan(/\>\>=/) then
987: self.fix_arg_lex_state
988: self.lex_state = :expr_beg
989: self.yacc_value = ">>"
990: return :tOP_ASGN
991: elsif src.scan(/\>\>/) then
992: self.fix_arg_lex_state
993: self.yacc_value = ">>"
994: return :tRSHFT
995: elsif src.scan(/\>/) then
996: self.fix_arg_lex_state
997: self.yacc_value = ">"
998: return :tGT
999: end
1000: elsif src.scan(/\`/) then
1001: self.yacc_value = "`"
1002: case lex_state
1003: when :expr_fname then
1004: self.lex_state = :expr_end
1005: return :tBACK_REF2
1006: when :expr_dot then
1007: self.lex_state = if command_state then
1008: :expr_cmdarg
1009: else
1010: :expr_arg
1011: end
1012: return :tBACK_REF2
1013: end
1014: self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\00""]
1015: return :tXSTRING_BEG
1016: elsif src.scan(/\?/) then
1017: if lex_state == :expr_end || lex_state == :expr_endarg then
1018: self.lex_state = :expr_beg
1019: self.yacc_value = "?"
1020: return :tEH
1021: end
1022:
1023: if src.eos? then
1024: rb_compile_error "incomplete character syntax"
1025: end
1026:
1027: if src.check(/\s|\v/) then
1028: unless lex_state.is_argument then
1029: c2 = { " " => 's',
1030: "\n" => 'n',
1031: "\t" => 't',
1032: "\v" => 'v',
1033: "\r" => 'r',
1034: "\f" => 'f' }[src.matched]
1035:
1036: if c2 then
1037: warning("invalid character syntax; use ?\\" + c2)
1038: end
1039: end
1040:
1041: # ternary
1042: self.lex_state = :expr_beg
1043: self.yacc_value = "?"
1044: return :tEH
1045: elsif src.check(/\w(?=\w)/) then # ternary, also
1046: self.lex_state = :expr_beg
1047: self.yacc_value = "?"
1048: return :tEH
1049: end
1050:
1051: c = if src.scan(/\\/) then
1052: self.read_escape
1053: else
1054: src.getch
1055: end
1056: self.lex_state = :expr_end
1057: self.yacc_value = c[0].ord & 0xff
1058: return :tINTEGER
1059: elsif src.check(/\&/) then
1060: if src.scan(/\&\&\=/) then
1061: self.yacc_value = "&&"
1062: self.lex_state = :expr_beg
1063: return :tOP_ASGN
1064: elsif src.scan(/\&\&/) then
1065: self.lex_state = :expr_beg
1066: self.yacc_value = "&&"
1067: return :tANDOP
1068: elsif src.scan(/\&\=/) then
1069: self.yacc_value = "&"
1070: self.lex_state = :expr_beg
1071: return :tOP_ASGN
1072: elsif src.scan(/&/) then
1073: result = if lex_state.is_argument && space_seen &&
1074: !src.check(/\s/) then
1075: warning("`&' interpreted as argument prefix")
1076: :tAMPER
1077: elsif lex_state == :expr_beg || lex_state == :expr_mid then
1078: :tAMPER
1079: else
1080: :tAMPER2
1081: end
1082:
1083: self.fix_arg_lex_state
1084: self.yacc_value = "&"
1085: return result
1086: end
1087: elsif src.scan(/\//) then
1088: if lex_state == :expr_beg || lex_state == :expr_mid then
1089: self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""]
1090: self.yacc_value = "/"
1091: return :tREGEXP_BEG
1092: end
1093:
1094: if src.scan(/\=/) then
1095: self.yacc_value = "/"
1096: self.lex_state = :expr_beg
1097: return :tOP_ASGN
1098: end
1099:
1100: if lex_state.is_argument && space_seen then
1101: unless src.scan(/\s/) then
1102: arg_ambiguous
1103: self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""]
1104: self.yacc_value = "/"
1105: return :tREGEXP_BEG
1106: end
1107: end
1108:
1109: self.fix_arg_lex_state
1110: self.yacc_value = "/"
1111:
1112: return :tDIVIDE
1113: elsif src.scan(/\^=/) then
1114: self.lex_state = :expr_beg
1115: self.yacc_value = "^"
1116: return :tOP_ASGN
1117: elsif src.scan(/\^/) then
1118: self.fix_arg_lex_state
1119: self.yacc_value = "^"
1120: return :tCARET
1121: elsif src.scan(/\;/) then
1122: self.command_start = true
1123: self.lex_state = :expr_beg
1124: self.yacc_value = ";"
1125: return :tSEMI
1126: elsif src.scan(/\~/) then
1127: if lex_state == :expr_fname || lex_state == :expr_dot then
1128: src.scan(/@/)
1129: end
1130:
1131: self.fix_arg_lex_state
1132: self.yacc_value = "~"
1133:
1134: return :tTILDE
1135: elsif src.scan(/\\/) then
1136: if src.scan(/\n/) then
1137: self.lineno = nil
1138: space_seen = true
1139: next
1140: end
1141: rb_compile_error "bare backslash only allowed before newline"
1142: elsif src.scan(/\%/) then
1143: if lex_state == :expr_beg || lex_state == :expr_mid then
1144: return parse_quote
1145: end
1146:
1147: if src.scan(/\=/) then
1148: self.lex_state = :expr_beg
1149: self.yacc_value = "%"
1150: return :tOP_ASGN
1151: end
1152:
1153: if lex_state.is_argument && space_seen && ! src.check(/\s/) then
1154: return parse_quote
1155: end
1156:
1157: self.fix_arg_lex_state
1158: self.yacc_value = "%"
1159:
1160: return :tPERCENT
1161: elsif src.check(/\$/) then
1162: if src.scan(/(\$_)(\w+)/) then
1163: self.lex_state = :expr_end
1164: self.token = src.matched
1165: return process_token(command_state)
1166: elsif src.scan(/\$_/) then
1167: self.lex_state = :expr_end
1168: self.token = src.matched
1169: self.yacc_value = src.matched
1170: return :tGVAR
1171: elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1172: self.lex_state = :expr_end
1173: self.yacc_value = src.matched
1174: return :tGVAR
1175: elsif src.scan(/\$([\&\`\'\+])/) then
1176: self.lex_state = :expr_end
1177: # Explicit reference to these vars as symbols...
1178: if last_state == :expr_fname then
1179: self.yacc_value = src.matched
1180: return :tGVAR
1181: else
1182: self.yacc_value = src[1].to_sym
1183: return :tBACK_REF
1184: end
1185: elsif src.scan(/\$([1-9]\d*)/) then
1186: self.lex_state = :expr_end
1187: if last_state == :expr_fname then
1188: self.yacc_value = src.matched
1189: return :tGVAR
1190: else
1191: self.yacc_value = src[1].to_i
1192: return :tNTH_REF
1193: end
1194: elsif src.scan(/\$0/) then
1195: self.lex_state = :expr_end
1196: self.token = src.matched
1197: return process_token(command_state)
1198: elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
1199: self.lex_state = :expr_end
1200: self.yacc_value = "$"
1201: return "$"
1202: elsif src.scan(/\$\w+/)
1203: self.lex_state = :expr_end
1204: self.token = src.matched
1205: return process_token(command_state)
1206: end
1207: elsif src.check(/\_/) then
1208: if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
1209: self.lineno = nil
1210: return RubyLexer::EOF
1211: elsif src.scan(/\_\w*/) then
1212: self.token = src.matched
1213: return process_token(command_state)
1214: end
1215: end
1216: end # END OF CASE
1217:
1218: if src.scan(/\0004|\0032|\0000/) || src.eos? then # ^D, ^Z, EOF
1219: return RubyLexer::EOF
1220: else # alpha check
1221: if src.scan(/\W/) then
1222: rb_compile_error "Invalid char #{src.matched.inspect} in expression"
1223: end
1224: end
1225:
1226: self.token = src.matched if self.src.scan(/\w+/)
1227:
1228: return process_token(command_state)
1229: end
1230: end
# File lib/ruby_lexer.rb, line 1314
1314: def yylex_string # 23 lines
1315: token = if lex_strterm[0] == :heredoc then
1316: self.heredoc lex_strterm
1317: else
1318: self.parse_string lex_strterm
1319: end
1320:
1321: if token == :tSTRING_END || token == :tREGEXP_END then
1322: self.lineno = nil
1323: self.lex_strterm = nil
1324: self.lex_state = :expr_end
1325: end
1326:
1327: return token
1328: end
Disabled; run with --debug to generate this.
Generated with the Darkfish Rdoc Generator 1.1.6.
# File lib/ruby_lexer.rb, line 66 66: def comments 67: c = @comments.join 68: @comments.clear 69: c 70: end