Add deps locally

This commit is contained in:
Rim
2023-12-11 20:30:44 -05:00
parent 5a888bbf06
commit 619e5550cc
5618 changed files with 1022726 additions and 6 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,227 @@
#!/usr/bin/env python
#
# Copyright 2008 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generate Google Mock classes from base classes.
This program will read in a C++ source file and output the Google Mock
classes for the specified classes. If no class is specified, all
classes in the source file are emitted.
Usage:
gmock_class.py header-file.h [ClassName]...
Output is sent to stdout.
"""
__author__ = 'nnorwitz@google.com (Neal Norwitz)'
import os
import re
import sys
from cpp import ast
from cpp import utils
# Preserve compatibility with Python 2.3.
try:
_dummy = set
except NameError:
import sets
set = sets.Set
_VERSION = (1, 0, 1) # The version of this script.
# How many spaces to indent. Can set me with the INDENT environment variable.
_INDENT = 2
def _GenerateMethods(output_lines, source, class_node):
function_type = (ast.FUNCTION_VIRTUAL | ast.FUNCTION_PURE_VIRTUAL |
ast.FUNCTION_OVERRIDE)
ctor_or_dtor = ast.FUNCTION_CTOR | ast.FUNCTION_DTOR
indent = ' ' * _INDENT
for node in class_node.body:
# We only care about virtual functions.
if (isinstance(node, ast.Function) and
node.modifiers & function_type and
not node.modifiers & ctor_or_dtor):
# Pick out all the elements we need from the original function.
const = ''
if node.modifiers & ast.FUNCTION_CONST:
const = 'CONST_'
return_type = 'void'
if node.return_type:
# Add modifiers like 'const'.
modifiers = ''
if node.return_type.modifiers:
modifiers = ' '.join(node.return_type.modifiers) + ' '
return_type = modifiers + node.return_type.name
template_args = [arg.name for arg in node.return_type.templated_types]
if template_args:
return_type += '<' + ', '.join(template_args) + '>'
if len(template_args) > 1:
for line in [
'// The following line won\'t really compile, as the return',
'// type has multiple template arguments. To fix it, use a',
'// typedef for the return type.']:
output_lines.append(indent + line)
if node.return_type.pointer:
return_type += '*'
if node.return_type.reference:
return_type += '&'
num_parameters = len(node.parameters)
if len(node.parameters) == 1:
first_param = node.parameters[0]
if source[first_param.start:first_param.end].strip() == 'void':
# We must treat T(void) as a function with no parameters.
num_parameters = 0
tmpl = ''
if class_node.templated_types:
tmpl = '_T'
mock_method_macro = 'MOCK_%sMETHOD%d%s' % (const, num_parameters, tmpl)
args = ''
if node.parameters:
# Due to the parser limitations, it is impossible to keep comments
# while stripping the default parameters. When defaults are
# present, we choose to strip them and comments (and produce
# compilable code).
# TODO(nnorwitz@google.com): Investigate whether it is possible to
# preserve parameter name when reconstructing parameter text from
# the AST.
if len([param for param in node.parameters if param.default]) > 0:
args = ', '.join(param.type.name for param in node.parameters)
else:
# Get the full text of the parameters from the start
# of the first parameter to the end of the last parameter.
start = node.parameters[0].start
end = node.parameters[-1].end
# Remove // comments.
args_strings = re.sub(r'//.*', '', source[start:end])
# Condense multiple spaces and eliminate newlines putting the
# parameters together on a single line. Ensure there is a
# space in an argument which is split by a newline without
# intervening whitespace, e.g.: int\nBar
args = re.sub(' +', ' ', args_strings.replace('\n', ' '))
# Create the mock method definition.
output_lines.extend(['%s%s(%s,' % (indent, mock_method_macro, node.name),
'%s%s(%s));' % (indent*3, return_type, args)])
def _GenerateMocks(filename, source, ast_list, desired_class_names):
processed_class_names = set()
lines = []
for node in ast_list:
if (isinstance(node, ast.Class) and node.body and
# desired_class_names being None means that all classes are selected.
(not desired_class_names or node.name in desired_class_names)):
class_name = node.name
parent_name = class_name
processed_class_names.add(class_name)
class_node = node
# Add namespace before the class.
if class_node.namespace:
lines.extend(['namespace %s {' % n for n in class_node.namespace]) # }
lines.append('')
# Add template args for templated classes.
if class_node.templated_types:
# TODO(paulchang): The AST doesn't preserve template argument order,
# so we have to make up names here.
# TODO(paulchang): Handle non-type template arguments (e.g.
# template<typename T, int N>).
template_arg_count = len(class_node.templated_types.keys())
template_args = ['T%d' % n for n in range(template_arg_count)]
template_decls = ['typename ' + arg for arg in template_args]
lines.append('template <' + ', '.join(template_decls) + '>')
parent_name += '<' + ', '.join(template_args) + '>'
# Add the class prolog.
lines.append('class Mock%s : public %s {' # }
% (class_name, parent_name))
lines.append('%spublic:' % (' ' * (_INDENT // 2)))
# Add all the methods.
_GenerateMethods(lines, source, class_node)
# Close the class.
if lines:
# If there are no virtual methods, no need for a public label.
if len(lines) == 2:
del lines[-1]
# Only close the class if there really is a class.
lines.append('};')
lines.append('') # Add an extra newline.
# Close the namespace.
if class_node.namespace:
for i in range(len(class_node.namespace)-1, -1, -1):
lines.append('} // namespace %s' % class_node.namespace[i])
lines.append('') # Add an extra newline.
if desired_class_names:
missing_class_name_list = list(desired_class_names - processed_class_names)
if missing_class_name_list:
missing_class_name_list.sort()
sys.stderr.write('Class(es) not found in %s: %s\n' %
(filename, ', '.join(missing_class_name_list)))
elif not processed_class_names:
sys.stderr.write('No class found in %s\n' % filename)
return lines
def main(argv=sys.argv):
if len(argv) < 2:
sys.stderr.write('Google Mock Class Generator v%s\n\n' %
'.'.join(map(str, _VERSION)))
sys.stderr.write(__doc__)
return 1
global _INDENT
try:
_INDENT = int(os.environ['INDENT'])
except KeyError:
pass
except:
sys.stderr.write('Unable to use indent of %s\n' % os.environ.get('INDENT'))
filename = argv[1]
desired_class_names = None # None means all classes in the source file.
if len(argv) >= 3:
desired_class_names = set(argv[2:])
source = utils.ReadFile(filename)
if source is None:
return 1
builder = ast.BuilderFromSource(source, filename)
try:
entire_ast = filter(None, builder.Generate())
except KeyboardInterrupt:
return
except:
# An error message was already printed since we couldn't parse.
sys.exit(1)
else:
lines = _GenerateMocks(filename, source, entire_ast, desired_class_names)
sys.stdout.write('\n'.join(lines))
if __name__ == '__main__':
main(sys.argv)

View File

@ -0,0 +1,448 @@
#!/usr/bin/env python
#
# Copyright 2009 Neal Norwitz All Rights Reserved.
# Portions Copyright 2009 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for gmock.scripts.generator.cpp.gmock_class."""
__author__ = 'nnorwitz@google.com (Neal Norwitz)'
import os
import sys
import unittest
# Allow the cpp imports below to work when run as a standalone script.
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from cpp import ast
from cpp import gmock_class
class TestCase(unittest.TestCase):
"""Helper class that adds assert methods."""
def StripLeadingWhitespace(self, lines):
"""Strip leading whitespace in each line in 'lines'."""
return '\n'.join([s.lstrip() for s in lines.split('\n')])
def assertEqualIgnoreLeadingWhitespace(self, expected_lines, lines):
"""Specialized assert that ignores the indent level."""
self.assertEqual(expected_lines, self.StripLeadingWhitespace(lines))
class GenerateMethodsTest(TestCase):
def GenerateMethodSource(self, cpp_source):
"""Convert C++ source to Google Mock output source lines."""
method_source_lines = []
# <test> is a pseudo-filename, it is not read or written.
builder = ast.BuilderFromSource(cpp_source, '<test>')
ast_list = list(builder.Generate())
gmock_class._GenerateMethods(method_source_lines, cpp_source, ast_list[0])
return '\n'.join(method_source_lines)
def testSimpleMethod(self):
source = """
class Foo {
public:
virtual int Bar();
};
"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD0(Bar,\nint());',
self.GenerateMethodSource(source))
def testSimpleConstructorsAndDestructor(self):
source = """
class Foo {
public:
Foo();
Foo(int x);
Foo(const Foo& f);
Foo(Foo&& f);
~Foo();
virtual int Bar() = 0;
};
"""
# The constructors and destructor should be ignored.
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD0(Bar,\nint());',
self.GenerateMethodSource(source))
def testVirtualDestructor(self):
source = """
class Foo {
public:
virtual ~Foo();
virtual int Bar() = 0;
};
"""
# The destructor should be ignored.
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD0(Bar,\nint());',
self.GenerateMethodSource(source))
def testExplicitlyDefaultedConstructorsAndDestructor(self):
source = """
class Foo {
public:
Foo() = default;
Foo(const Foo& f) = default;
Foo(Foo&& f) = default;
~Foo() = default;
virtual int Bar() = 0;
};
"""
# The constructors and destructor should be ignored.
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD0(Bar,\nint());',
self.GenerateMethodSource(source))
def testExplicitlyDeletedConstructorsAndDestructor(self):
source = """
class Foo {
public:
Foo() = delete;
Foo(const Foo& f) = delete;
Foo(Foo&& f) = delete;
~Foo() = delete;
virtual int Bar() = 0;
};
"""
# The constructors and destructor should be ignored.
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD0(Bar,\nint());',
self.GenerateMethodSource(source))
def testSimpleOverrideMethod(self):
source = """
class Foo {
public:
int Bar() override;
};
"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD0(Bar,\nint());',
self.GenerateMethodSource(source))
def testSimpleConstMethod(self):
source = """
class Foo {
public:
virtual void Bar(bool flag) const;
};
"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_CONST_METHOD1(Bar,\nvoid(bool flag));',
self.GenerateMethodSource(source))
def testExplicitVoid(self):
source = """
class Foo {
public:
virtual int Bar(void);
};
"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD0(Bar,\nint(void));',
self.GenerateMethodSource(source))
def testStrangeNewlineInParameter(self):
source = """
class Foo {
public:
virtual void Bar(int
a) = 0;
};
"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD1(Bar,\nvoid(int a));',
self.GenerateMethodSource(source))
def testDefaultParameters(self):
source = """
class Foo {
public:
virtual void Bar(int a, char c = 'x') = 0;
};
"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD2(Bar,\nvoid(int, char));',
self.GenerateMethodSource(source))
def testMultipleDefaultParameters(self):
source = """
class Foo {
public:
virtual void Bar(int a = 42, char c = 'x') = 0;
};
"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD2(Bar,\nvoid(int, char));',
self.GenerateMethodSource(source))
def testRemovesCommentsWhenDefaultsArePresent(self):
source = """
class Foo {
public:
virtual void Bar(int a = 42 /* a comment */,
char /* other comment */ c= 'x') = 0;
};
"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD2(Bar,\nvoid(int, char));',
self.GenerateMethodSource(source))
def testDoubleSlashCommentsInParameterListAreRemoved(self):
source = """
class Foo {
public:
virtual void Bar(int a, // inline comments should be elided.
int b // inline comments should be elided.
) const = 0;
};
"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_CONST_METHOD2(Bar,\nvoid(int a, int b));',
self.GenerateMethodSource(source))
def testCStyleCommentsInParameterListAreNotRemoved(self):
# NOTE(nnorwitz): I'm not sure if it's the best behavior to keep these
# comments. Also note that C style comments after the last parameter
# are still elided.
source = """
class Foo {
public:
virtual const string& Bar(int /* keeper */, int b);
};
"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD2(Bar,\nconst string&(int /* keeper */, int b));',
self.GenerateMethodSource(source))
def testArgsOfTemplateTypes(self):
source = """
class Foo {
public:
virtual int Bar(const vector<int>& v, map<int, string>* output);
};"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD2(Bar,\n'
'int(const vector<int>& v, map<int, string>* output));',
self.GenerateMethodSource(source))
def testReturnTypeWithOneTemplateArg(self):
source = """
class Foo {
public:
virtual vector<int>* Bar(int n);
};"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD1(Bar,\nvector<int>*(int n));',
self.GenerateMethodSource(source))
def testReturnTypeWithManyTemplateArgs(self):
source = """
class Foo {
public:
virtual map<int, string> Bar();
};"""
# Comparing the comment text is brittle - we'll think of something
# better in case this gets annoying, but for now let's keep it simple.
self.assertEqualIgnoreLeadingWhitespace(
'// The following line won\'t really compile, as the return\n'
'// type has multiple template arguments. To fix it, use a\n'
'// typedef for the return type.\n'
'MOCK_METHOD0(Bar,\nmap<int, string>());',
self.GenerateMethodSource(source))
def testSimpleMethodInTemplatedClass(self):
source = """
template<class T>
class Foo {
public:
virtual int Bar();
};
"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD0_T(Bar,\nint());',
self.GenerateMethodSource(source))
def testPointerArgWithoutNames(self):
source = """
class Foo {
virtual int Bar(C*);
};
"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD1(Bar,\nint(C*));',
self.GenerateMethodSource(source))
def testReferenceArgWithoutNames(self):
source = """
class Foo {
virtual int Bar(C&);
};
"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD1(Bar,\nint(C&));',
self.GenerateMethodSource(source))
def testArrayArgWithoutNames(self):
source = """
class Foo {
virtual int Bar(C[]);
};
"""
self.assertEqualIgnoreLeadingWhitespace(
'MOCK_METHOD1(Bar,\nint(C[]));',
self.GenerateMethodSource(source))
class GenerateMocksTest(TestCase):
def GenerateMocks(self, cpp_source):
"""Convert C++ source to complete Google Mock output source."""
# <test> is a pseudo-filename, it is not read or written.
filename = '<test>'
builder = ast.BuilderFromSource(cpp_source, filename)
ast_list = list(builder.Generate())
lines = gmock_class._GenerateMocks(filename, cpp_source, ast_list, None)
return '\n'.join(lines)
def testNamespaces(self):
source = """
namespace Foo {
namespace Bar { class Forward; }
namespace Baz {
class Test {
public:
virtual void Foo();
};
} // namespace Baz
} // namespace Foo
"""
expected = """\
namespace Foo {
namespace Baz {
class MockTest : public Test {
public:
MOCK_METHOD0(Foo,
void());
};
} // namespace Baz
} // namespace Foo
"""
self.assertEqualIgnoreLeadingWhitespace(
expected, self.GenerateMocks(source))
def testClassWithStorageSpecifierMacro(self):
source = """
class STORAGE_SPECIFIER Test {
public:
virtual void Foo();
};
"""
expected = """\
class MockTest : public Test {
public:
MOCK_METHOD0(Foo,
void());
};
"""
self.assertEqualIgnoreLeadingWhitespace(
expected, self.GenerateMocks(source))
def testTemplatedForwardDeclaration(self):
source = """
template <class T> class Forward; // Forward declaration should be ignored.
class Test {
public:
virtual void Foo();
};
"""
expected = """\
class MockTest : public Test {
public:
MOCK_METHOD0(Foo,
void());
};
"""
self.assertEqualIgnoreLeadingWhitespace(
expected, self.GenerateMocks(source))
def testTemplatedClass(self):
source = """
template <typename S, typename T>
class Test {
public:
virtual void Foo();
};
"""
expected = """\
template <typename T0, typename T1>
class MockTest : public Test<T0, T1> {
public:
MOCK_METHOD0_T(Foo,
void());
};
"""
self.assertEqualIgnoreLeadingWhitespace(
expected, self.GenerateMocks(source))
def testTemplateInATemplateTypedef(self):
source = """
class Test {
public:
typedef std::vector<std::list<int>> FooType;
virtual void Bar(const FooType& test_arg);
};
"""
expected = """\
class MockTest : public Test {
public:
MOCK_METHOD1(Bar,
void(const FooType& test_arg));
};
"""
self.assertEqualIgnoreLeadingWhitespace(
expected, self.GenerateMocks(source))
def testTemplateInATemplateTypedefWithComma(self):
source = """
class Test {
public:
typedef std::function<void(
const vector<std::list<int>>&, int> FooType;
virtual void Bar(const FooType& test_arg);
};
"""
expected = """\
class MockTest : public Test {
public:
MOCK_METHOD1(Bar,
void(const FooType& test_arg));
};
"""
self.assertEqualIgnoreLeadingWhitespace(
expected, self.GenerateMocks(source))
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,59 @@
#!/usr/bin/env python
#
# Copyright 2007 Neal Norwitz
# Portions Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""C++ keywords and helper utilities for determining keywords."""
__author__ = 'nnorwitz@google.com (Neal Norwitz)'
try:
# Python 3.x
import builtins
except ImportError:
# Python 2.x
import __builtin__ as builtins
if not hasattr(builtins, 'set'):
# Nominal support for Python 2.3.
from sets import Set as set
TYPES = set('bool char int long short double float void wchar_t unsigned signed'.split())
TYPE_MODIFIERS = set('auto register const inline extern static virtual volatile mutable'.split())
ACCESS = set('public protected private friend'.split())
CASTS = set('static_cast const_cast dynamic_cast reinterpret_cast'.split())
OTHERS = set('true false asm class namespace using explicit this operator sizeof'.split())
OTHER_TYPES = set('new delete typedef struct union enum typeid typename template'.split())
CONTROL = set('case switch default if else return goto'.split())
EXCEPTION = set('try catch throw'.split())
LOOP = set('while do for break continue'.split())
ALL = TYPES | TYPE_MODIFIERS | ACCESS | CASTS | OTHERS | OTHER_TYPES | CONTROL | EXCEPTION | LOOP
def IsKeyword(token):
return token in ALL
def IsBuiltinType(token):
if token in ('virtual', 'inline'):
# These only apply to methods, they can't be types by themselves.
return False
return token in TYPES or token in TYPE_MODIFIERS

View File

@ -0,0 +1,287 @@
#!/usr/bin/env python
#
# Copyright 2007 Neal Norwitz
# Portions Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tokenize C++ source code."""
__author__ = 'nnorwitz@google.com (Neal Norwitz)'
try:
# Python 3.x
import builtins
except ImportError:
# Python 2.x
import __builtin__ as builtins
import sys
from cpp import utils
if not hasattr(builtins, 'set'):
# Nominal support for Python 2.3.
from sets import Set as set
# Add $ as a valid identifier char since so much code uses it.
_letters = 'abcdefghijklmnopqrstuvwxyz'
VALID_IDENTIFIER_CHARS = set(_letters + _letters.upper() + '_0123456789$')
HEX_DIGITS = set('0123456789abcdefABCDEF')
INT_OR_FLOAT_DIGITS = set('01234567890eE-+')
# C++0x string preffixes.
_STR_PREFIXES = set(('R', 'u8', 'u8R', 'u', 'uR', 'U', 'UR', 'L', 'LR'))
# Token types.
UNKNOWN = 'UNKNOWN'
SYNTAX = 'SYNTAX'
CONSTANT = 'CONSTANT'
NAME = 'NAME'
PREPROCESSOR = 'PREPROCESSOR'
# Where the token originated from. This can be used for backtracking.
# It is always set to WHENCE_STREAM in this code.
WHENCE_STREAM, WHENCE_QUEUE = range(2)
class Token(object):
"""Data container to represent a C++ token.
Tokens can be identifiers, syntax char(s), constants, or
pre-processor directives.
start contains the index of the first char of the token in the source
end contains the index of the last char of the token in the source
"""
def __init__(self, token_type, name, start, end):
self.token_type = token_type
self.name = name
self.start = start
self.end = end
self.whence = WHENCE_STREAM
def __str__(self):
if not utils.DEBUG:
return 'Token(%r)' % self.name
return 'Token(%r, %s, %s)' % (self.name, self.start, self.end)
__repr__ = __str__
def _GetString(source, start, i):
i = source.find('"', i+1)
while source[i-1] == '\\':
# Count the trailing backslashes.
backslash_count = 1
j = i - 2
while source[j] == '\\':
backslash_count += 1
j -= 1
# When trailing backslashes are even, they escape each other.
if (backslash_count % 2) == 0:
break
i = source.find('"', i+1)
return i + 1
def _GetChar(source, start, i):
# NOTE(nnorwitz): may not be quite correct, should be good enough.
i = source.find("'", i+1)
while source[i-1] == '\\':
# Need to special case '\\'.
if (i - 2) > start and source[i-2] == '\\':
break
i = source.find("'", i+1)
# Try to handle unterminated single quotes (in a #if 0 block).
if i < 0:
i = start
return i + 1
def GetTokens(source):
"""Returns a sequence of Tokens.
Args:
source: string of C++ source code.
Yields:
Token that represents the next token in the source.
"""
# Cache various valid character sets for speed.
valid_identifier_chars = VALID_IDENTIFIER_CHARS
hex_digits = HEX_DIGITS
int_or_float_digits = INT_OR_FLOAT_DIGITS
int_or_float_digits2 = int_or_float_digits | set('.')
# Only ignore errors while in a #if 0 block.
ignore_errors = False
count_ifs = 0
i = 0
end = len(source)
while i < end:
# Skip whitespace.
while i < end and source[i].isspace():
i += 1
if i >= end:
return
token_type = UNKNOWN
start = i
c = source[i]
if c.isalpha() or c == '_': # Find a string token.
token_type = NAME
while source[i] in valid_identifier_chars:
i += 1
# String and character constants can look like a name if
# they are something like L"".
if (source[i] == "'" and (i - start) == 1 and
source[start:i] in 'uUL'):
# u, U, and L are valid C++0x character preffixes.
token_type = CONSTANT
i = _GetChar(source, start, i)
elif source[i] == "'" and source[start:i] in _STR_PREFIXES:
token_type = CONSTANT
i = _GetString(source, start, i)
elif c == '/' and source[i+1] == '/': # Find // comments.
i = source.find('\n', i)
if i == -1: # Handle EOF.
i = end
continue
elif c == '/' and source[i+1] == '*': # Find /* comments. */
i = source.find('*/', i) + 2
continue
elif c in ':+-<>&|*=': # : or :: (plus other chars).
token_type = SYNTAX
i += 1
new_ch = source[i]
if new_ch == c and c != '>': # Treat ">>" as two tokens.
i += 1
elif c == '-' and new_ch == '>':
i += 1
elif new_ch == '=':
i += 1
elif c in '()[]{}~!?^%;/.,': # Handle single char tokens.
token_type = SYNTAX
i += 1
if c == '.' and source[i].isdigit():
token_type = CONSTANT
i += 1
while source[i] in int_or_float_digits:
i += 1
# Handle float suffixes.
for suffix in ('l', 'f'):
if suffix == source[i:i+1].lower():
i += 1
break
elif c.isdigit(): # Find integer.
token_type = CONSTANT
if c == '0' and source[i+1] in 'xX':
# Handle hex digits.
i += 2
while source[i] in hex_digits:
i += 1
else:
while source[i] in int_or_float_digits2:
i += 1
# Handle integer (and float) suffixes.
for suffix in ('ull', 'll', 'ul', 'l', 'f', 'u'):
size = len(suffix)
if suffix == source[i:i+size].lower():
i += size
break
elif c == '"': # Find string.
token_type = CONSTANT
i = _GetString(source, start, i)
elif c == "'": # Find char.
token_type = CONSTANT
i = _GetChar(source, start, i)
elif c == '#': # Find pre-processor command.
token_type = PREPROCESSOR
got_if = source[i:i+3] == '#if' and source[i+3:i+4].isspace()
if got_if:
count_ifs += 1
elif source[i:i+6] == '#endif':
count_ifs -= 1
if count_ifs == 0:
ignore_errors = False
# TODO(nnorwitz): handle preprocessor statements (\ continuations).
while 1:
i1 = source.find('\n', i)
i2 = source.find('//', i)
i3 = source.find('/*', i)
i4 = source.find('"', i)
# NOTE(nnorwitz): doesn't handle comments in #define macros.
# Get the first important symbol (newline, comment, EOF/end).
i = min([x for x in (i1, i2, i3, i4, end) if x != -1])
# Handle #include "dir//foo.h" properly.
if source[i] == '"':
i = source.find('"', i+1) + 1
assert i > 0
continue
# Keep going if end of the line and the line ends with \.
if not (i == i1 and source[i-1] == '\\'):
if got_if:
condition = source[start+4:i].lstrip()
if (condition.startswith('0') or
condition.startswith('(0)')):
ignore_errors = True
break
i += 1
elif c == '\\': # Handle \ in code.
# This is different from the pre-processor \ handling.
i += 1
continue
elif ignore_errors:
# The tokenizer seems to be in pretty good shape. This
# raise is conditionally disabled so that bogus code
# in an #if 0 block can be handled. Since we will ignore
# it anyways, this is probably fine. So disable the
# exception and return the bogus char.
i += 1
else:
sys.stderr.write('Got invalid token in %s @ %d token:%s: %r\n' %
('?', i, c, source[i-10:i+10]))
raise RuntimeError('unexpected token')
if i <= 0:
print('Invalid index, exiting now.')
return
yield Token(token_type, source[start:i], start, i)
if __name__ == '__main__':
def main(argv):
"""Driver mostly for testing purposes."""
for filename in argv[1:]:
source = utils.ReadFile(filename)
if source is None:
continue
for token in GetTokens(source):
print('%-12s: %s' % (token.token_type, token.name))
# print('\r%6.2f%%' % (100.0 * index / token.end),)
sys.stdout.write('\n')
main(sys.argv)

View File

@ -0,0 +1,41 @@
#!/usr/bin/env python
#
# Copyright 2007 Neal Norwitz
# Portions Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generic utilities for C++ parsing."""
__author__ = 'nnorwitz@google.com (Neal Norwitz)'
import sys
# Set to True to see the start/end token indices.
DEBUG = True
def ReadFile(filename, print_error=True):
"""Returns the contents of a file."""
try:
fp = open(filename)
try:
return fp.read()
finally:
fp.close()
except IOError:
if print_error:
print('Error reading %s: %s' % (filename, sys.exc_info()[1]))
return None