Class | RDoc::C_Parser |
In: |
parsers/parse_c.rb
|
Parent: | Object |
We attempt to parse C extension files. Basically we look for the standard patterns that you find in extensions: rb_define_class, rb_define_method and so on. We also try to find the corresponding C source for the methods and extract comments, but if we fail we don‘t worry too much.
The comments associated with a Ruby method are extracted from the C comment block associated with the routine that implements that method, that is to say the method whose name is given in the rb_define_method call. For example, you might write:
/* * Returns a new array that is a one-dimensional flattening of this * array (recursively). That is, for every element that is an array, * extract its elements into the new array. * * s = [ 1, 2, 3 ] #=> [1, 2, 3] * t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]] * a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10] * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] */ static VALUE rb_ary_flatten(ary) VALUE ary; { ary = rb_obj_dup(ary); rb_ary_flatten_bang(ary); return ary; } ... void Init_Array() { ... rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0);
Here RDoc will determine from the rb_define_method line that there‘s a method called "flatten" in class Array, and will look for the implementation in the method rb_ary_flatten. It will then use the comment from that method in the HTML output. This method must be in the same source file as the rb_define_method.
C classes can be diagramed (see /tc/dl/ruby/ruby/error.c), and RDoc integrates C and Ruby source into one tree
The comment blocks may include special direcives:
In additon, RDoc assumes by default that the C method implementing a Ruby function is in the same source file as the rb_define_method call. If this isn‘t the case, add the comment
rb_define_method(....); // in: filename
As an example, we might have an extension that defines multiple classes in its Init_xxx method. We could document them using
/* * Document-class: MyClass * * Encapsulate the writing and reading of the configuration * file. ... */ /* * Document-method: read_value * * call-seq: * cfg.read_value(key) -> value * cfg.read_value(key} { |key| } -> value * * Return the value corresponding to +key+ from the configuration. * In the second form, if the key isn't found, invoke the * block and return its value. */
prepare to parse a C file
# File parsers/parse_c.rb, line 178 178: def initialize(top_level, file_name, body, options, stats) 179: @known_classes = KNOWN_CLASSES.dup 180: @options = options 181: @body = handle_tab_width(handle_ifdefs_in(body)) 182: @stats = stats 183: @top_level = top_level 184: @classes = Hash.new 185: @file_dir = File.dirname(file_name) 186: @progress = $stderr unless @options.quiet 187: end
# File parsers/parse_c.rb, line 457 457: def do_aliases 458: @body.scan(%r{rb_define_alias\s*\(\s*(\w+),\s*"([^"]+)",\s*"([^"]+)"\s*\)}m) do 459: |var_name, new_name, old_name| 460: @stats.num_methods += 1 461: class_name = @known_classes[var_name] || var_name 462: class_obj = find_class(var_name, class_name) 463: 464: class_obj.add_alias(Alias.new("", old_name, new_name, "")) 465: end 466: end
# File parsers/parse_c.rb, line 320 320: def do_classes 321: @body.scan(/(\w+)\s* = \s*rb_define_module\s*\(\s*"(\w+)"\s*\)/mx) do 322: |var_name, class_name| 323: handle_class_module(var_name, "module", class_name, nil, nil) 324: end 325: 326: # The '.' lets us handle SWIG-generated files 327: @body.scan(/([\w\.]+)\s* = \s*rb_define_class\s* 328: \( 329: \s*"(\w+)", 330: \s*(\w+)\s* 331: \)/mx) do 332: 333: |var_name, class_name, parent| 334: handle_class_module(var_name, "class", class_name, parent, nil) 335: end 336: 337: @body.scan(/(\w+)\s*=\s*boot_defclass\s*\(\s*"(\w+?)",\s*(\w+?)\s*\)/) do 338: |var_name, class_name, parent| 339: parent = nil if parent == "0" 340: handle_class_module(var_name, "class", class_name, parent, nil) 341: end 342: 343: @body.scan(/(\w+)\s* = \s*rb_define_module_under\s* 344: \( 345: \s*(\w+), 346: \s*"(\w+)" 347: \s*\)/mx) do 348: 349: |var_name, in_module, class_name| 350: handle_class_module(var_name, "module", class_name, nil, in_module) 351: end 352: 353: @body.scan(/([\w\.]+)\s* = \s*rb_define_class_under\s* 354: \( 355: \s*(\w+), 356: \s*"(\w+)", 357: \s*(\w+)\s* 358: \s*\)/mx) do 359: 360: |var_name, in_module, class_name, parent| 361: handle_class_module(var_name, "class", class_name, parent, in_module) 362: end 363: 364: end
# File parsers/parse_c.rb, line 368 368: def do_constants 369: @body.scan(%r{\Wrb_define_ 370: ( 371: variable | 372: readonly_variable | 373: const | 374: global_const | 375: ) 376: \s*\( 377: (?:\s*(\w+),)? 378: \s*"(\w+)", 379: \s*(.*?)\s*\)\s*; 380: }xm) do 381: 382: |type, var_name, const_name, definition| 383: var_name = "rb_cObject" if !var_name or var_name == "rb_mKernel" 384: handle_constants(type, var_name, const_name, definition) 385: end 386: end
Look for includes of the form:
rb_include_module(rb_cArray, rb_mEnumerable);
# File parsers/parse_c.rb, line 723 723: def do_includes 724: @body.scan(/rb_include_module\s*\(\s*(\w+?),\s*(\w+?)\s*\)/) do |c,m| 725: if cls = @classes[c] 726: m = @known_classes[m] || m 727: cls.add_include(Include.new(m, "")) 728: end 729: end 730: end
# File parsers/parse_c.rb, line 390 390: def do_methods 391: 392: @body.scan(%r{rb_define_ 393: ( 394: singleton_method | 395: method | 396: module_function | 397: private_method 398: ) 399: \s*\(\s*([\w\.]+), 400: \s*"([^"]+)", 401: \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, 402: \s*(-?\w+)\s*\) 403: (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))? 404: }xm) do 405: |type, var_name, meth_name, meth_body, param_count, source_file| 406: #" 407: 408: # Ignore top-object and weird struct.c dynamic stuff 409: next if var_name == "ruby_top_self" 410: next if var_name == "nstr" 411: next if var_name == "envtbl" 412: next if var_name == "argf" # it'd be nice to handle this one 413: 414: var_name = "rb_cObject" if var_name == "rb_mKernel" 415: handle_method(type, var_name, meth_name, 416: meth_body, param_count, source_file) 417: end 418: 419: @body.scan(%r{rb_define_attr\( 420: \s*([\w\.]+), 421: \s*"([^"]+)", 422: \s*(\d+), 423: \s*(\d+)\s*\); 424: }xm) do #" 425: |var_name, attr_name, attr_reader, attr_writer| 426: 427: #var_name = "rb_cObject" if var_name == "rb_mKernel" 428: handle_attr(var_name, attr_name, 429: attr_reader.to_i != 0, 430: attr_writer.to_i != 0) 431: end 432: 433: @body.scan(%r{rb_define_global_function\s*\( 434: \s*"([^"]+)", 435: \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, 436: \s*(-?\w+)\s*\) 437: (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))? 438: }xm) do #" 439: |meth_name, meth_body, param_count, source_file| 440: handle_method("method", "rb_mKernel", meth_name, 441: meth_body, param_count, source_file) 442: end 443: 444: @body.scan(/define_filetest_function\s*\( 445: \s*"([^"]+)", 446: \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, 447: \s*(-?\w+)\s*\)/xm) do #" 448: |meth_name, meth_body, param_count| 449: 450: handle_method("method", "rb_mFileTest", meth_name, meth_body, param_count) 451: handle_method("singleton_method", "rb_cFile", meth_name, meth_body, param_count) 452: end 453: end
# File parsers/parse_c.rb, line 568 568: def find_attr_comment(attr_name) 569: if @body =~ %r{((?>/\*.*?\*/\s+)) 570: rb_define_attr\((?:\s*(\w+),)?\s*"#{attr_name}"\s*,.*?\)\s*;}xmi 571: $1 572: elsif @body =~ %r{Document-attr:\s#{attr_name}\s*?\n((?>.*?\*/))}m 573: $1 574: else 575: '' 576: end 577: end
Find the C code corresponding to a Ruby method
# File parsers/parse_c.rb, line 628 628: def find_body(meth_name, meth_obj, body, quiet = false) 629: case body 630: when %r"((?>/\*.*?\*/\s*))(?:static\s+)?VALUE\s+#{meth_name} 631: \s*(\([^)]*\))\s*\{.*?^\}"xm 632: comment, params = $1, $2 633: body_text = $& 634: 635: remove_private_comments(comment) if comment 636: 637: # see if we can find the whole body 638: 639: re = Regexp.escape(body_text) + '[^(]*^\{.*?^\}' 640: if Regexp.new(re, Regexp::MULTILINE).match(body) 641: body_text = $& 642: end 643: 644: # The comment block may have been overridden with a 645: # 'Document-method' block. This happens in the interpreter 646: # when multiple methods are vectored through to the same 647: # C method but those methods are logically distinct (for 648: # example Kernel.hash and Kernel.object_id share the same 649: # implementation 650: 651: override_comment = find_override_comment(meth_obj.name) 652: comment = override_comment if override_comment 653: 654: find_modifiers(comment, meth_obj) if comment 655: 656: # meth_obj.params = params 657: meth_obj.start_collecting_tokens 658: meth_obj.add_token(RubyToken::Token.new(1,1).set_text(body_text)) 659: meth_obj.comment = mangle_comment(comment) 660: when %r{((?>/\*.*?\*/\s*))^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m 661: comment = $1 662: find_body($2, meth_obj, body, true) 663: find_modifiers(comment, meth_obj) 664: meth_obj.comment = mangle_comment(comment) + meth_obj.comment 665: when %r{^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m 666: unless find_body($1, meth_obj, body, true) 667: warn "No definition for #{meth_name}" unless quiet 668: return false 669: end 670: else 671: 672: # No body, but might still have an override comment 673: comment = find_override_comment(meth_obj.name) 674: 675: if comment 676: find_modifiers(comment, meth_obj) 677: meth_obj.comment = mangle_comment(comment) 678: else 679: warn "No definition for #{meth_name}" unless quiet 680: return false 681: end 682: end 683: true 684: end
# File parsers/parse_c.rb, line 742 742: def find_class(raw_name, name) 743: unless @classes[raw_name] 744: if raw_name =~ /^rb_m/ 745: @classes[raw_name] = @top_level.add_module(NormalModule, name) 746: else 747: @classes[raw_name] = @top_level.add_class(NormalClass, name, nil) 748: end 749: end 750: @classes[raw_name] 751: end
Look for class or module documentation above Init_+class_name+(void), in a Document-class class_name (or module) comment or above an rb_define_class (or module). If a comment is supplied above a matching Init_ and a rb_define_class the Init_ comment is used.
/* * This is a comment for Foo */ Init_Foo(void) { VALUE cFoo = rb_define_class("Foo", rb_cObject); } /* * Document-class: Foo * This is a comment for Foo */ Init_foo(void) { VALUE cFoo = rb_define_class("Foo", rb_cObject); } /* * This is a comment for Foo */ VALUE cFoo = rb_define_class("Foo", rb_cObject);
# File parsers/parse_c.rb, line 295 295: def find_class_comment(class_name, class_meth) 296: comment = nil 297: if @body =~ %r{((?>/\*.*?\*/\s+)) 298: (static\s+)?void\s+Init_#{class_name}\s*(?:_\(\s*)?\(\s*(?:void\s*)\)}xmi 299: comment = $1 300: elsif @body =~ %r{Document-(class|module):\s#{class_name}\s*?\n((?>.*?\*/))}m 301: comment = $2 302: else 303: if @body =~ /rb_define_(class|module)/m then 304: class_name = class_name.split("::").last 305: comments = [] 306: @body.split(/(\/\*.*?\*\/)\s*?\n/m).each_with_index do |chunk, index| 307: comments[index] = chunk 308: if chunk =~ /rb_define_(class|module).*?"(#{class_name})"/m then 309: comment = comments[index-1] 310: break 311: end 312: end 313: end 314: end 315: class_meth.comment = mangle_comment(comment) if comment 316: end
Finds a comment matching type and const_name either above the comment or in the matching Document- section.
# File parsers/parse_c.rb, line 525 525: def find_const_comment(type, const_name) 526: if @body =~ %r{((?>^\s*/\*.*?\*/\s+)) 527: rb_define_#{type}\((?:\s*(\w+),)?\s*"#{const_name}"\s*,.*?\)\s*;}xmi 528: $1 529: elsif @body =~ %r{Document-(?:const|global|variable):\s#{const_name}\s*?\n((?>.*?\*/))}m 530: $1 531: else 532: '' 533: end 534: end
If the comment block contains a section that looks like:
use it for the parameters.
# File parsers/parse_c.rb, line 696 696: def find_modifiers(comment, meth_obj) 697: if comment.sub!(/:nodoc:\s*^\s*\*?\s*$/m, '') or 698: comment.sub!(/\A\/\*\s*:nodoc:\s*\*\/\Z/, '') 699: meth_obj.document_self = false 700: end 701: if comment.sub!(/call-seq:(.*?)^\s*\*?\s*$/m, '') or 702: comment.sub!(/\A\/\*\s*call-seq:(.*?)\*\/\Z/, '') 703: seq = $1 704: seq.gsub!(/^\s*\*\s*/, '') 705: meth_obj.call_seq = seq 706: end 707: end
# File parsers/parse_c.rb, line 711 711: def find_override_comment(meth_name) 712: name = Regexp.escape(meth_name) 713: if @body =~ %r{Document-method:\s#{name}\s*?\n((?>.*?\*/))}m 714: $1 715: end 716: end
# File parsers/parse_c.rb, line 538 538: def handle_attr(var_name, attr_name, reader, writer) 539: rw = '' 540: if reader 541: #@stats.num_methods += 1 542: rw << 'R' 543: end 544: if writer 545: #@stats.num_methods += 1 546: rw << 'W' 547: end 548: 549: class_name = @known_classes[var_name] 550: 551: return unless class_name 552: 553: class_obj = find_class(var_name, class_name) 554: 555: if class_obj 556: comment = find_attr_comment(attr_name) 557: unless comment.empty? 558: comment = mangle_comment(comment) 559: end 560: att = Attr.new('', attr_name, rw, comment) 561: class_obj.add_attribute(att) 562: end 563: 564: end
# File parsers/parse_c.rb, line 231 231: def handle_class_module(var_name, class_mod, class_name, parent, in_module) 232: progress(class_mod[0, 1]) 233: 234: parent_name = @known_classes[parent] || parent 235: 236: if in_module 237: enclosure = @classes[in_module] || @@enclosure_classes[in_module] 238: unless enclosure 239: if enclosure = @known_classes[in_module] 240: handle_class_module(in_module, (/^rb_m/ =~ in_module ? "module" : "class"), 241: enclosure, nil, nil) 242: enclosure = @classes[in_module] 243: end 244: end 245: unless enclosure 246: warn("Enclosing class/module '#{in_module}' for " + 247: "#{class_mod} #{class_name} not known") 248: return 249: end 250: else 251: enclosure = @top_level 252: end 253: 254: if class_mod == "class" 255: cm = enclosure.add_class(NormalClass, class_name, parent_name) 256: @stats.num_classes += 1 257: else 258: cm = enclosure.add_module(NormalModule, class_name) 259: @stats.num_modules += 1 260: end 261: cm.record_location(enclosure.toplevel) 262: 263: find_class_comment(cm.full_name, cm) 264: @classes[var_name] = cm 265: @@enclosure_classes[var_name] = cm 266: @known_classes[var_name] = cm.full_name 267: end
Adds constant comments. By providing some_value: at the start ofthe comment you can override the C value of the comment to give a friendly definition.
/* 300: The perfect score in bowling */ rb_define_const(cFoo, "PERFECT", INT2FIX(300);
Will override +INT2FIX(300)+ with the value 300 in the output RDoc. Values may include quotes and escaped colons (\:).
# File parsers/parse_c.rb, line 479 479: def handle_constants(type, var_name, const_name, definition) 480: #@stats.num_constants += 1 481: class_name = @known_classes[var_name] 482: 483: return unless class_name 484: 485: class_obj = find_class(var_name, class_name) 486: 487: unless class_obj 488: warn("Enclosing class/module '#{const_name}' for not known") 489: return 490: end 491: 492: comment = find_const_comment(type, const_name) 493: 494: # In the case of rb_define_const, the definition and comment are in 495: # "/* definition: comment */" form. The literal ':' and '\' characters 496: # can be escaped with a backslash. 497: if type.downcase == 'const' then 498: elements = mangle_comment(comment).split(':') 499: if elements.nil? or elements.empty? then 500: con = Constant.new(const_name, definition, mangle_comment(comment)) 501: else 502: new_definition = elements[0..-2].join(':') 503: if new_definition.empty? then # Default to literal C definition 504: new_definition = definition 505: else 506: new_definition.gsub!("\:", ":") 507: new_definition.gsub!("\\", '\\') 508: end 509: new_definition.sub!(/\A(\s+)/, '') 510: new_comment = $1.nil? ? elements.last : "#{$1}#{elements.last.lstrip}" 511: con = Constant.new(const_name, new_definition, 512: mangle_comment(new_comment)) 513: end 514: else 515: con = Constant.new(const_name, definition, mangle_comment(comment)) 516: end 517: 518: class_obj.add_constant(con) 519: end
Removes ifdefs that would otherwise confuse us
# File parsers/parse_c.rb, line 768 768: def handle_ifdefs_in(body) 769: body.gsub(/^#ifdef HAVE_PROTOTYPES.*?#else.*?\n(.*?)#endif.*?\n/m, '\1') 770: end
# File parsers/parse_c.rb, line 581 581: def handle_method(type, var_name, meth_name, 582: meth_body, param_count, source_file = nil) 583: progress(".") 584: 585: @stats.num_methods += 1 586: class_name = @known_classes[var_name] 587: 588: return unless class_name 589: 590: class_obj = find_class(var_name, class_name) 591: 592: if class_obj 593: if meth_name == "initialize" 594: meth_name = "new" 595: type = "singleton_method" 596: end 597: meth_obj = AnyMethod.new("", meth_name) 598: meth_obj.singleton = 599: %w{singleton_method module_function}.include?(type) 600: 601: p_count = (Integer(param_count) rescue -1) 602: 603: if p_count < 0 604: meth_obj.params = "(...)" 605: elsif p_count == 0 606: meth_obj.params = "()" 607: else 608: meth_obj.params = "(" + 609: (1..p_count).map{|i| "p#{i}"}.join(", ") + 610: ")" 611: end 612: 613: if source_file 614: file_name = File.join(@file_dir, source_file) 615: body = (@@known_bodies[source_file] ||= File.read(file_name)) 616: else 617: body = @body 618: end 619: if find_body(meth_body, meth_obj, body) and meth_obj.document_self 620: class_obj.add_method(meth_obj) 621: end 622: end 623: end
# File parsers/parse_c.rb, line 753 753: def handle_tab_width(body) 754: if /\t/ =~ body 755: tab_width = @options.tab_width 756: body.split(/\n/).map do |line| 757: 1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #` 758: line 759: end .join("\n") 760: else 761: body 762: end 763: end
Remove the /*’s and leading asterisks from C comments
# File parsers/parse_c.rb, line 735 735: def mangle_comment(comment) 736: comment.sub!(%r{/\*+}) { " " * $&.length } 737: comment.sub!(%r{\*+/}) { " " * $&.length } 738: comment.gsub!(/^[ \t]*\*/m) { " " * $&.length } 739: comment 740: end
# File parsers/parse_c.rb, line 205 205: def progress(char) 206: unless @options.quiet 207: @progress.print(char) 208: @progress.flush 209: end 210: end
removes lines that are commented out that might otherwise get picked up when scanning for classes and methods
# File parsers/parse_c.rb, line 227 227: def remove_commented_out_lines 228: @body.gsub!(%r{//.*rb_define_}, '//') 229: end
# File parsers/parse_c.rb, line 218 218: def remove_private_comments(comment) 219: comment.gsub!(/\/?\*--(.*?)\/?\*\+\+/m, '') 220: comment.sub!(/\/?\*--.*/m, '') 221: end