ansible-repo/mitogen-0.3.9/mitogen/minify.py

# Copyright 2017, Alex Willmer
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

# !mitogen: minify_safe

import sys

try:
    from io import StringIO
except ImportError:
    from StringIO import StringIO

import mitogen.core

if sys.version_info < (2, 7, 11):
    from mitogen.compat import tokenize
else:
    import tokenize


def minimize_source(source):
    """
    Remove comments and docstrings from Python `source`, preserving line
    numbers and syntax of empty blocks.

    :param str source:
        The source to minimize.

    :returns str:
        The minimized source.
    """
    source = mitogen.core.to_text(source)
    tokens = tokenize.generate_tokens(StringIO(source).readline)
    tokens = strip_comments(tokens)
    tokens = strip_docstrings(tokens)
    tokens = reindent(tokens)
    return tokenize.untokenize(tokens)


def strip_comments(tokens):
    """
    Drop comment tokens from a `tokenize` stream.

    Comments on lines 1-2 are kept, to preserve hashbang and encoding.
    Trailing whitespace is remove from all lines.
    """
    prev_typ = None
    prev_end_col = 0
    for typ, tok, (start_row, start_col), (end_row, end_col), line in tokens:
        if typ in (tokenize.NL, tokenize.NEWLINE):
            if prev_typ in (tokenize.NL, tokenize.NEWLINE):
                start_col = 0
            else:
                start_col = prev_end_col
            end_col = start_col + 1
        elif typ == tokenize.COMMENT and start_row > 2:
            continue
        prev_typ = typ
        prev_end_col = end_col
        yield typ, tok, (start_row, start_col), (end_row, end_col), line


def strip_docstrings(tokens):
    """
    Replace docstring tokens with NL tokens in a `tokenize` stream.

    Any STRING token not part of an expression is deemed a docstring.
    Indented docstrings are not yet recognised.
    """
    stack = []
    state = 'wait_string'
    for t in tokens:
        typ = t[0]
        if state == 'wait_string':
            if typ in (tokenize.NL, tokenize.COMMENT):
                yield t
            elif typ in (tokenize.DEDENT, tokenize.INDENT, tokenize.STRING):
                stack.append(t)
            elif typ == tokenize.NEWLINE:
                stack.append(t)
                start_line, end_line = stack[0][2][0], stack[-1][3][0]+1
                for i in range(start_line, end_line):
                    yield tokenize.NL, '\n', (i, 0), (i,1), '\n'
                for t in stack:
                    if t[0] in (tokenize.DEDENT, tokenize.INDENT):
                        yield t[0], t[1], (i+1, t[2][1]), (i+1, t[3][1]), t[4]
                del stack[:]
            else:
                stack.append(t)
                for t in stack: yield t
                del stack[:]
                state = 'wait_newline'
        elif state == 'wait_newline':
            if typ == tokenize.NEWLINE:
                state = 'wait_string'
            yield t


def reindent(tokens, indent=' '):
    """
    Replace existing indentation in a token steam, with `indent`.
    """
    old_levels = []
    old_level = 0
    new_level = 0
    for typ, tok, (start_row, start_col), (end_row, end_col), line in tokens:
        if typ == tokenize.INDENT:
            old_levels.append(old_level)
            old_level = len(tok)
            new_level += 1
            tok = indent * new_level
        elif typ == tokenize.DEDENT:
            old_level = old_levels.pop()
            new_level -= 1
        start_col = max(0, start_col - old_level + new_level)
        if start_row == end_row:
            end_col = start_col + len(tok)
        yield typ, tok, (start_row, start_col), (end_row, end_col), line
w8 wut 2024-10-22 16:16:43 +10:00			`# Copyright 2017, Alex Willmer`
			`#`
			`# Redistribution and use in source and binary forms, with or without`
			`# modification, are permitted provided that the following conditions are met:`
			`#`
			`# 1. Redistributions of source code must retain the above copyright notice,`
			`# this list of conditions and the following disclaimer.`
			`#`
			`# 2. Redistributions in binary form must reproduce the above copyright notice,`
			`# this list of conditions and the following disclaimer in the documentation`
			`# and/or other materials provided with the distribution.`
			`#`
			`# 3. Neither the name of the copyright holder nor the names of its contributors`
			`# may be used to endorse or promote products derived from this software without`
			`# specific prior written permission.`
			`#`
			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"`
			`# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE`
			`# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE`
			`# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR`
			`# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF`
			`# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS`
			`# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
			`# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)`
			`# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
			`# POSSIBILITY OF SUCH DAMAGE.`

			`# !mitogen: minify_safe`

			`import sys`

			`try:`
			`from io import StringIO`
			`except ImportError:`
			`from StringIO import StringIO`

			`import mitogen.core`

			`if sys.version_info < (2, 7, 11):`
			`from mitogen.compat import tokenize`
			`else:`
			`import tokenize`


			`def minimize_source(source):`
			`"""`
			Remove comments and docstrings from Python `source`, preserving line
			`numbers and syntax of empty blocks.`

			`:param str source:`
			`The source to minimize.`

			`:returns str:`
			`The minimized source.`
			`"""`
			`source = mitogen.core.to_text(source)`
			`tokens = tokenize.generate_tokens(StringIO(source).readline)`
			`tokens = strip_comments(tokens)`
			`tokens = strip_docstrings(tokens)`
			`tokens = reindent(tokens)`
			`return tokenize.untokenize(tokens)`


			`def strip_comments(tokens):`
			`"""`
			Drop comment tokens from a `tokenize` stream.

			`Comments on lines 1-2 are kept, to preserve hashbang and encoding.`
			`Trailing whitespace is remove from all lines.`
			`"""`
			`prev_typ = None`
			`prev_end_col = 0`
			`for typ, tok, (start_row, start_col), (end_row, end_col), line in tokens:`
			`if typ in (tokenize.NL, tokenize.NEWLINE):`
			`if prev_typ in (tokenize.NL, tokenize.NEWLINE):`
			`start_col = 0`
			`else:`
			`start_col = prev_end_col`
			`end_col = start_col + 1`
			`elif typ == tokenize.COMMENT and start_row > 2:`
			`continue`
			`prev_typ = typ`
			`prev_end_col = end_col`
			`yield typ, tok, (start_row, start_col), (end_row, end_col), line`


			`def strip_docstrings(tokens):`
			`"""`
			Replace docstring tokens with NL tokens in a `tokenize` stream.

			`Any STRING token not part of an expression is deemed a docstring.`
			`Indented docstrings are not yet recognised.`
			`"""`
			`stack = []`
			`state = 'wait_string'`
			`for t in tokens:`
			`typ = t[0]`
			`if state == 'wait_string':`
			`if typ in (tokenize.NL, tokenize.COMMENT):`
			`yield t`
			`elif typ in (tokenize.DEDENT, tokenize.INDENT, tokenize.STRING):`
			`stack.append(t)`
			`elif typ == tokenize.NEWLINE:`
			`stack.append(t)`
			`start_line, end_line = stack[0][2][0], stack[-1][3][0]+1`
			`for i in range(start_line, end_line):`
			`yield tokenize.NL, '\n', (i, 0), (i,1), '\n'`
			`for t in stack:`
			`if t[0] in (tokenize.DEDENT, tokenize.INDENT):`
			`yield t[0], t[1], (i+1, t[2][1]), (i+1, t[3][1]), t[4]`
			`del stack[:]`
			`else:`
			`stack.append(t)`
			`for t in stack: yield t`
			`del stack[:]`
			`state = 'wait_newline'`
			`elif state == 'wait_newline':`
			`if typ == tokenize.NEWLINE:`
			`state = 'wait_string'`
			`yield t`


			`def reindent(tokens, indent=' '):`
			`"""`
			Replace existing indentation in a token steam, with `indent`.
			`"""`
			`old_levels = []`
			`old_level = 0`
			`new_level = 0`
			`for typ, tok, (start_row, start_col), (end_row, end_col), line in tokens:`
			`if typ == tokenize.INDENT:`
			`old_levels.append(old_level)`
			`old_level = len(tok)`
			`new_level += 1`
			`tok = indent * new_level`
			`elif typ == tokenize.DEDENT:`
			`old_level = old_levels.pop()`
			`new_level -= 1`
			`start_col = max(0, start_col - old_level + new_level)`
			`if start_row == end_row:`
			`end_col = start_col + len(tok)`
			`yield typ, tok, (start_row, start_col), (end_row, end_col), line`