apidoc.py 24.6 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
"""Extract SNAKES docstrings and generate API documentation from them
in Markdown format (one file for each Python file). This is both a
Python module and a command-line tool:

    :::console
    $ python -m snakes.utils.apidoc
    [error] Usage: python -m snakes.utils.apidoc TARGET [EXCLUDE...]
            TARGET   target directory to write files
            EXCLUDE  pattern to exclude modules (not file names)
    $ python -m snakes.utils.apidoc api snakes.lang.*
    [info] snakes -> 'api/index.md'
    ...

With respect to existing API extractors, this one takes into account
the structure of SNAKES, in particular, documentation for plugins is
correctly searched within functions `extend` and rendered accordingly.
Moreover, `apidoc` considers all the strings within modules or classes
as docstrings, which is particularly useful to document modules.

There is no real user manual for `apidoc`, in particular about its
syntax and usage, the best source of information is probably to look
at SNAKES source code. However, let's note a few points:

  * most Epydoc fields are supported: `@param`, `@type`, `@note`, ...
  * directive `# apidoc skip` allow not to include the next object in
    the generated documentation
  * directive `# apidoc stop` allow to stop the processing of current
    object (module or class)
  * directive `# apidoc include 'filename' lang='language'` allow to
    include the content of a file as a block of source code in the
    specified language
  * when statement `pass` is found in a doctest, the rest of the
    doctest is skipped, this is useful when this doctest is for test
    purpose only but does not provide useful documentation in itself
  * the syntax is that supported by Markdown Python module, with no
    extensions other that Epydoc fields
  * `apidoc` is intended to be simple and so does not support many
    options nor customisation, however, it is quite flexible already
    by the way it handles documentation

@note: in the example above we do not build doc for the sub-modules of
    `snakes.lang` because some of them are programmed for distinct
    versions of Python with incompatible syntaxes. So `apidoc` will
    fail for sure on one or another of these modules. Fortunately,
    most of the documentation for `snakes.lang` is in the package
    itself.
@note: for better rendering, `apidoc` uses [Python
    Markdown](http://pythonhosted.org/Markdown), however, it will
    handle situations when it is not installed
@warning: `apidoc` has been tested _only_ with SNAKES source, I guess
    it should work in other cases but this may required some work
"""

import sys, os, os.path
import inspect, fnmatch, collections, re, shlex
import textwrap, doctest
import snakes
from snakes.lang import unparse
from snakes.lang.python.parser import parse, ast

try :
    import markdown
except :
    markdown = None

##
## console messages
##

CLEAR = "\033[0m"
BLUE = "\033[1;34m"
BOLD = "\033[1;38m"
GRAY = "\033[1;30m"
LIGHTGRAY = "\033[0;30m"
GREEN = "\033[1;32m"
CYAN = "\033[1;36m"
MAGENTA = "\033[1;35m"
RED = "\033[1;31m"
YELLOW = "\033[1;33m"

# apidoc skip
def log (message, color=None, eol=True) :
    if color :
        sys.stderr.write(color)
    if isinstance(message, (list, tuple)) :
        message = " ".join(str(m) for m in message)
    sys.stderr.write(message)
    if color :
        sys.stderr.write(CLEAR)
    if eol :
        sys.stderr.write("\n")
    sys.stderr.flush()

# apidoc skip
def info (message, eol=True) :
    log("[info] ", BLUE, False)
    log(message, eol=eol)

# apidoc skip
def warn (message, eol=True) :
    log("[warn] ", YELLOW, False)
    log(message, eol=eol)

# apidoc skip
def err (message, eol=True) :
    log("[error] ", RED, False)
    log(message, eol=eol)

# apidoc skip
def die (message, code=1) :
    err(message)
    sys.exit(code)

##
## extract doc
##

class DocExtract (object) :
    """The class that extracts documentation and renders it
    """
    def __init__ (self, inpath, outpath, exclude=[]) :
        """
        @param inpath: directory where the source code is searched for
        @type inpath: `str`
        @param outpath: directory where the documentation is generated
        @type outpath: `str`
        @param exclude: a list of glob patterns to exclude modules
            (not file names, but Python modules names)
        @type exclude: `list`
        """
        self.path = inpath.rstrip(os.sep)
        self.outpath = outpath
        self.out = None
        self.exclude = exclude
        self._last = "\n\n"
    def md (self, text, inline=True) :
        """Return the Markdow rendering of `text`, include `<p>` if
        `inline` is `False`. If Python Markdown module is not
        installed, `text` is returned unchanged.

        @param text: the text to be rendered
        @type text: `str`
        @param inline: `True` if this text is part of a larger
            paragraph, `False` if this is a paragraph by itself
        @type inline: `bool`
        @return: HTML text
        @rtype: `str`
        """
        if markdown is None :
            return text
        elif inline :
            return re.sub("</?p>", "\n", markdown.markdown(text), re.I)
        else :
            return markdown.markdown(text)
    def openout (self, path) :
        """Open in `self.out` the output file where the conversion of
        input file `path` will be rendered. If `self.out` is already
        opened, it is first closed.

        Return a Boolean indicating if `path` should be ignored, in
        which case the output file is not opened. This occurs either
        if `path` correspond to a module that has been excluded or if
        the output file exists and is newer than the input file.

        @param path: input file to be converted
        @type path: `str`
        @return: `True` is output file is open and ready, `False` if
            input file should be skipped
        @rtype: `bool`
        @note: package files called `__init__.py` are converted to
            files called `index.md` so that when converted to HTML,
            this will yield a file called `index.html`
        """
        if self.out is not None :
            self.out.close()
        relpath = path[len(os.path.dirname(self.path)):].strip(os.sep)
        parts = relpath.split(os.sep)
        relpath = os.path.dirname(relpath.split(os.sep, 1)[-1])
        self.package = (parts[-1] == "__init__.py")
        if self.package :
            self.module = ".".join(parts[:-1])
            target = "index.md"
        else :
            parts[-1] = os.path.splitext(parts[-1])[0]
            self.module = ".".join(parts)
            target =  parts[-1] + ".md"
        if any(fnmatch.fnmatch(self.module, glob) for glob in self.exclude) :
            warn("skip %s" % self.module)
            return False
        outdir = os.path.join(self.outpath, relpath)
        outpath = os.path.join(outdir, target)
        if os.path.exists(outpath) :
            if os.stat(path).st_mtime <= os.stat(outpath).st_mtime :
                return False
        self.inpath = path
        info("%s -> %r" % (self.module, outpath))
        if not os.path.exists(outdir) :
            os.makedirs(outdir)
        self.out = open(outpath, "w")
        self.classname = None
        return True
    def write (self, text) :
        """Write `text` to output file

        @param text: the text to write
        @type text: `str`
        """
        if len(text) > 1 :
            self._last = text[-2:]
        elif len(text) > 0 :
            self._last = self._last[-1] + text[-1]
        else :
            return
        self.out.write(text)
    def newline (self) :
        """Write a blank line to output file, never more than one
        blank line is written so there is no need to be carefull about
        how many successive times `newline` is called (which would be
        complicated when multiple calls are made from different
        locations).
        """
        if self._last != "\n\n" :
            self.write("\n")
    def writeline (self, text="") :
        """Write a line of text to output file, ensuring there is a
        end-of-line character at the end (and removing those that may
        exist in `text`).

        @param text: the text to write
        @type text: `str`
        """
        self.write(text.rstrip() + "\n")
    def writetext (self, text, **args) :
        """Write some `text` to output file, with line wrapping.

        @param text: the text to write
        @type text: `str`
        @param args: additional arguments to `textwrap.wrap`
        """
        br = args.pop("break_on_hyphens", False)
        for line in textwrap.wrap(text, break_on_hyphens=br, **args) :
            self.writeline(line)
    def writelist (self, text, bullet="  * ", **args) :
        """Write one list item to output file, wrapping the text
        properly.

        @param text: the text within the item
        @type text: `str`
        @param bullet: list marker
        @type bullet: `str`
        @param args: additional arguments to `textwrap.wrap`
        """
        br = args.pop("break_on_hyphens", False)
        for line in textwrap.wrap(text, break_on_hyphens=br,
                                  initial_indent=bullet,
                                  subsequent_indent=" "*len(bullet),
                                  **args) :
            self.writeline(line)
    def process (self) :
        """Main method that process input directory and write
        converted files to output directory.

        Each input file is parsed using `snakes.lang.python` to an AST
        that is then traversed for processing.
        """
        for dirpath, dirnames, filenames in os.walk(self.path) :
            for name in sorted(filenames) :
                if not name.endswith(".py") or name.startswith(".") :
                    continue
                path = os.path.join(dirpath, name)
                if not self.openout(path) :
                    continue
                try :
                    node = parse(open(path).read())
                except :
                    err("error parsing %r" % path)
                    continue
                if ".plugins." in self.module :
                    self.visit_plugin(node)
                else :
                    self.visit_module(node)
    def _pass (self, node) :
        pass
    def directive (self, node) :
        """Return directives (skip, stop, include) for this node, or
        `None`

        @param node: an AST node
        @type node: `AST`
        """
        lines = node.st.text.lexer.lines
        num = node.st.srow - 2
        while num >= 0 and (not lines[num].strip()
                            or lines[num].strip().startswith("@")) :
            num -= 1
        if num >= 0 and lines[num].strip().startswith("#") :
            dirline = lines[num].lstrip("# \t").rstrip()
            items = shlex.split(dirline)
            if len(items) >= 2 and items[0].lower() == "apidoc" :
                if len(items) == 2 and items[1].lower() in ("skip", "stop") :
                    return items[1]
                elif items[1].lower() == "include" :
                    path = items[2]
                    try :
                        args = dict(i.split("=", 1) for i in items[3:])
                    except :
                        err("invalid directive %r (line %s)"
                            % (dirline, num+1))
                    self.write_include(path, **args)
                else :
                    err("unknown directive %r (line %s)"
                        % (items[1], num+1))
                    return None
    def children (self, node) :
        """Iterates over the children of `node`

        @param node: an AST node
        @type node: `AST`
        @return: all the children but the `skip`ped ones, until the
            end of `stop` directive is found
        @rtype: `generator`
        """
        for child in ast.iter_child_nodes(node) :
            directive = self.directive(child)
            if directive == "skip" :
                continue
            elif directive == "stop" :
                break
            yield child
    def visit (self, node) :
        """Generic visit of a node that actually dispatch to the
        appropriate method `visit_...`
        """
        name = getattr(node, "name", "__")
        if (name.startswith("_") and not (name.startswith("__")
                                          and name.endswith("__"))) :
            return
        try :
            getattr(self, "visit_" + node.__class__.__name__,
                    self._pass)(node)
        except :
            src = node.st.source()
            if len(src) > 40 :
                src = src[:40] + "..."
            err("line %s source %r" % (node.lineno, src))
            raise
    def visit_module (self, node) :
        """Visit a node that is a module
        """
        self.write_module()
        for child in self.children(node) :
            self.visit(child)
    def visit_plugin (self, node) :
        """Visit a node that is a plugin
        """
        self.write_module()
        extend = None
        for child in self.children(node) :
            if (getattr(child, "name", None) == "extend"
                and isinstance(child, ast.FunctionDef)) :
                extend = child
            else :
                self.visit(child)
        self.write_plugin()
        for child in self.children(extend) :
            self.visit(child)
    def visit_ClassDef (self, node) :
        """Visit a node that is a class definition
        """
        self.write_class(node)
        self.classname = node.name
        for child in self.children(node) :
            self.visit(child)
        self.classname = None
    def visit_FunctionDef (self, node) :
        """Visit a node that is a function definition
        """
        self.write_function(node)
        self.args = [n.arg for n in node.args.args]
        if self.args and self.args[0] == "self" :
            del self.args[0]
        if node.args.vararg :
            self.args.append(node.args.vararg)
        if node.args.kwarg :
            self.args.append(node.args.kwarg)
        self.visit(node.body[0])
        self.args = []
    def visit_Expr (self, node) :
        """Visit a node that is an expression
        """
        self.visit(node.value)
    def visit_Str (self, node) :
        """Visit a node that is a string literal
        """
        self.write_doc(inspect.cleandoc(node.s))
    def write_module (self) :
        """Write the documentation about a module (not its content),
        which is just a title. The module name is in `self.module` and
        there is also a Boolean `self.package` to indicate if this
        module is a actually package.
        """
        if self.package :
            self.writeline("# Package `%s` #" % self.module)
        else :
            self.writeline("# Module `%s` #" % self.module)
        self.newline()
    def write_plugin (self) :
        """Write the documentation about function `extend` in a plugin
        (not its content), which is just a title
        """
        self.writeline("## Extensions ##")
        self.newline()
    def write_function (self, node) :
        """Write the documentation about a function or method (which
        can be differentiated by looking if `self.classname` is not
        None)
        """
        self.newline()
        if self.classname :
            self.writeline("#### Method `%s.%s` ####"
                           % (self.classname, node.name))
        else :
            self.writeline("### Function `%s` ###" % node.name)
        self.newline()
        self.write_def(node)
    def write_class (self, node) :
        """Write the documentation about a class
        """
        self.newline()
        self.writeline("### Class `%s` ###" % node.name)
        self.write_def(node)
    def write_def (self, node) :
        """Write the documentation about a function or method
        definition (parameters, decorators, etc.)
        """
        indent = node.st.scol
        if node.st[0].symbol == "decorated" :
            srow, scol = node.st[0][0][0][0].srow, node.st[0][0][0][0].scol
            erow, ecol = node.st[0][1][-2].erow, node.st[0][1][-2].ecol
        else :
            srow, scol = node.st[0].srow, node.st[0].scol
            erow, ecol = node.st[0][-2].erow, node.st[0][-2].ecol
        lines = node.st.text.lexer.lines
        if srow == erow :
            source = [lines[srow-1][scol:ecol+1]]
        else :
            source = lines[srow-1:erow]
            source[0] = source[0][scol:]
            source[1:-1] = [s[indent:] for s in source[1:-1]]
            source[-1] = source[-1][indent:ecol+1].rstrip() + " ..."
        self.newline()
        self.writeline("    :::python")
        for line in source :
            self.writeline("    " + line)
        self.newline()
    parse = doctest.DocTestParser().parse
    def write_doc (self, doc) :
        """Write the content of a docstring that is parsed to extract
        doctests, Epydoc fields and plain text
        """
        if doc is None :
            return
        docs = self.parse(doc)
        test, skip = False, False
        for doc in docs :
            if isinstance(doc, str) :
                doc = doc.strip()
                if test :
                    if not doc :
                        continue
                    test, skip = False, False
                self.newline()
                lines = doc.strip().splitlines()
                for num, line in enumerate(lines) :
                    if line.startswith("@") :
                        self.write_epydoc("\n".join(lines[num:]))
                        break
                    self.writeline(line)
            elif not skip :
                if doc.source.strip() == "pass" :
                    skip = True
                else :
                    if not test :
                        test = True
                        self.newline()
                        self.writetext("<!-- this comment avoids a bug in"
                                       " Markdown parsing -->")
                        self.writeline("    :::pycon")
                    for i, line in enumerate(doc.source.splitlines()) :
                        if i > 0 :
                            self.writeline("    ... %s" % line)
                        else :
                            self.writeline("    >>> %s" % line)
                    for line in doc.want.splitlines() :
                        self.writeline("    %s" % line)
    def write_epydoc (self, doc) :
        """Write a block of epydoc fields
        """
        info = {"param" : {},
                "type" : {},
                "keyword" : {},
                "raise" : {},
                "todo" : [],
                "note" : [],
                "attention": [],
                "bug" : [],
                "warning" : [],
                }
        for item in doc.lstrip("@").split("\n@") :
            left, text = item.split(":", 1)
            left = left.split()
            assert 1 <= len(left) <= 2, "unsupported item %r" % item
            if len(left) == 1 :
                left.append(None)
            tag, name = [x.strip() if x else x for x in left]
            text = " ".join(text.strip().split())
            if isinstance(info.get(tag, None), list) :
                assert name is None, "unsupported item %r" % item
                info[tag].append(text)
            elif isinstance(info.get(tag, None), dict) :
                assert name is not None, "unsupported item %r" % item
                assert name not in info[tag], "duplicated item %r" % item
                info[tag][name] = text
            else :
                assert name is None, "unsupported item %r" % item
                assert tag not in info, "duplicated tag %r" % item
                info[tag] = text
        if any(k in info for k in ("author", "organization", "copyright",
                                   "license", "contact")) :
            self.newline()
            self.writeline('<ul id="api-info">')
            for tag in ("author", "organization", "contact",
                        "copyright", "license", ) :
                if tag in info :
                    self.writeline('<li id="api-%s">' % tag)
                    self.writetext('<span class="api-title">%s:</span> %s'
                                   % (tag.capitalize(),
                                      self.md(info[tag])),
                                   subsequent_indent="  ")
                    self.writeline('</li>')
            self.writeline('</ul>')
        if any(info[k] for k in
               ("todo", "note", "attention", "bug", "warning")) :
            self.newline()
            self.writeline('<div id="api-remarks">')
            for tag in ("note", "todo", "attention", "bug", "warning") :
                for text in info[tag] :
                    self.writeline('<div class="api-%s">' % tag)
                    self.writetext('<span class="api-title">%s:</span> %s'
                                   % (tag.capitalize(), self.md(text)),
                                   subsequent_indent="  ")
                    self.writeline('</div>')
            self.writeline('</div>')
        if (any(info[k] for k in ("param", "type", "keyword"))
            or any(k in info for k in ("return", "rtype"))) :
            self.newline()
            self.writeline("##### Call API #####")
            self.newline()
            for arg in self.args :
                if arg in info["param"] :
                    self.writelist("`%s %s`: %s"
                                   % (info["type"].get(arg, "object").strip("`"),
                                      arg,
                                      info["param"][arg]))
                else :
                    self.writelist("`%s %s`"
                                   % (info["type"].get(arg, "object").strip("`"),
                                      arg))
            for kw, text in sorted(info["keyword"].items()) :
                self.writelist("keyword `%s`: %s" % (kw, text))
            if any(k in info for k in ("return", "rtype")) :
                if "return" in info :
                    self.writelist("`return %s`: %s"
                                   % (info.get("rtype", "object").strip("`"),
                                      info["return"]))
                else :
                    self.writelist("`return %s`"
                                   % (info.get("rtype", "object").strip("`")))
        if info["raise"] :
            self.newline()
            self.writeline("##### Exceptions #####")
            self.newline()
            for exc, reason in sorted(info["raise"].items()) :
                self.writelist("`%s`: %s" % (exc, reason))
            self.newline()
    def write_include (self, name, lang="python", first=1, last=-1) :
        """Write a block of source code included through directive
        `apidoc include ...`
        """
        first, last = int(first), int(last)
        if os.path.exists(name) :
            path = name
        else :
            path = os.path.join(os.path.dirname(self.inpath), name)
        if not os.path.exists(path) :
            err("include file %r not found" % name)
        with open(path) as infile :
            self.newline()
            self.writeline("    :::%s" % lang)
            for i, line in enumerate(infile) :
                if first <= i+1 and (last == -1 or i+1 <= last) :
                    self.writeline("    " + line.rstrip())
            self.newline()

def main (finder, args, source=None) :
    """Main function of `apidoc`. Source directory may be given
    explicitly, otherwise it is computed as follows:

        :::python
        import snakes
        source = os.path.dirname(snakes.__file__)

    So it is important to set `PYTHONPATH` or `sys.path` if you want
    to use a non-standard location for SNAKES.

    @param finder: `DocExtract` or a subclass of it to perform the
        processing
    @type finder: `class`
    @param args: command line arguments
    @type args: `list`
    @param source: the source directory for the code to be documented,
        or `None` to extract documentation from SNAKES
    @type source: `str`
    """
    try :
        if source is None :
            source = os.path.dirname(snakes.__file__)
        target = args[0]
        exclude = args[1:]
        if not os.path.isdir(source) :
            raise Exception("could not find SNAKES sources")
        elif not os.path.isdir(target) :
            raise Exception("no directory %r" % target)
    except (ValueError, IndexError) :
        die("""Usage: python -m snakes.utils.apidoc TARGET [EXCLUDE...]
        TARGET   target directory to write files
        EXCLUDE  pattern to exclude modules (not file names)""")
    except Exception as error :
        die(str(error))
    finder(source, target, exclude).process()

if __name__ == "__main__" :
    main(DocExtract, sys.argv[1:])