Python: Copy Python extractor to codeql repo

2025-12-17 01:03:14 +01:00 · 2024-02-28 15:15:21 +00:00
parent 297a17975d
commit 6dec323cfc
369 changed files with 165346 additions and 0 deletions
--- a/python/extractor/tests/init.py
+++ b/python/extractor/tests/init.py
--- a/python/extractor/tests/buildtools/init.py
+++ b/python/extractor/tests/buildtools/init.py
--- a/python/extractor/tests/buildtools/helper.py
+++ b/python/extractor/tests/buildtools/helper.py
@@ -0,0 +1,102 @@
+import os
+import stat
+import tempfile
+import shutil
+import time
+import sys
+import subprocess
+from contextlib import contextmanager
+from functools import wraps
+
+
+# Would have liked to use a decorator, but for Python 2 the functools.wraps is not good enough for
+# signature preservation that pytest can figure out what is going on. It would be possible to use
+# the decorator package, but that seemed like a bit too much of a hassle.
+@contextmanager
+def in_fresh_temp_dir():
+    old_cwd = os.getcwd()
+    with managed_temp_dir('extractor-python-buildtools-test-') as tmp:
+        os.chdir(tmp)
+        try:
+            yield tmp
+        finally:
+            os.chdir(old_cwd)
+
+
+@contextmanager
+def managed_temp_dir(prefix=None):
+    dir = tempfile.mkdtemp(prefix=prefix)
+    try:
+        yield dir
+    finally:
+        rmtree_robust(dir)
+
+
+def rmtree_robust(dir):
+    if is_windows():
+        # It's important that the path is a Unicode path on Windows, so
+        # that the right system calls get used.
+        dir = u'' + dir
+        if not dir.startswith("\\\\?\\"):
+            dir = "\\\\?\\" + os.path.abspath(dir)
+
+    # Emulate Python 3 "nonlocal" keyword
+    class state: pass
+    state.last_failed_delete = None
+
+
+    def _rmtree(path):
+        """wrapper of shutil.rmtree to handle Python 3.12 rename (onerror => onexc)"""
+        if sys.version_info >= (3, 12):
+            shutil.rmtree(path, onexc=remove_error)
+        else:
+            shutil.rmtree(path, onerror=remove_error)
+
+    def remove_error(func, path, excinfo):
+        # If we get an error twice in a row for the same path then just give up.
+        if state.last_failed_delete == path:
+            return
+        state.last_failed_delete = path
+
+        # The problem could be one of permissions, so setting path writable
+        # might fix it.
+        os.chmod(path, stat.S_IWRITE)
+
+        # On Windows, we sometimes get errors about directories not being
+        # empty, but immediately afterwards they are empty. Waiting a bit
+        # might therefore be sufficient.
+        t = 0.1
+        while (True):
+            try:
+                if os.path.isdir(path):
+                    _rmtree(path)
+                else:
+                    os.remove(path)
+            except OSError:
+                if (t > 1):
+                    return # Give up
+                time.sleep(t)
+                t *= 2
+    _rmtree(dir)
+    # On Windows, attempting to write immediately after deletion may result in
+    # an 'access denied' exception, so wait a bit.
+    if is_windows():
+        time.sleep(0.5)
+
+
+def is_windows():
+    return os.name == 'nt'
+
+
+@contextmanager
+def copy_repo_dir(repo_dir_in):
+    with managed_temp_dir(prefix="extractor-python-buildtools-test-") as tmp:
+        repo_dir = os.path.join(tmp, 'repo')
+        print('copying', repo_dir_in, 'to', repo_dir)
+        shutil.copytree(repo_dir_in, repo_dir, symlinks=True)
+        yield repo_dir
+
+################################################################################
+
+
+DEVNULL = subprocess.DEVNULL
--- a/python/extractor/tests/buildtools/test_index.py
+++ b/python/extractor/tests/buildtools/test_index.py
@@ -0,0 +1,169 @@
+import os
+import pytest
+import shutil
+import glob
+
+import buildtools.index
+from tests.buildtools.helper import in_fresh_temp_dir
+
+# we use `monkeypatch.setenv` instead of setting `os.environ` directly, since that produces
+# cross-talk between tests. (using mock.patch.dict is only available for Python 3)
+
+
+class TestIncludeOptions:
+    @staticmethod
+    def test_LGTM_SRC(monkeypatch):
+        monkeypatch.setenv("LGTM_SRC", "path/src")
+        assert buildtools.index.get_include_options() == ["-R", "path/src"]
+
+    @staticmethod
+    def test_LGTM_INDEX_INCLUDE(monkeypatch):
+        monkeypatch.setenv("LGTM_INDEX_INCLUDE", "/foo\n/bar")
+        assert buildtools.index.get_include_options() == ["-R", "/foo", "-R", "/bar"]
+
+
+class TestPip21_3:
+    @staticmethod
+    def test_no_build_dir(monkeypatch):
+        with in_fresh_temp_dir() as path:
+            os.makedirs(os.path.join(path, "src"))
+            monkeypatch.setenv("LGTM_SRC", path)
+            assert buildtools.index.exclude_pip_21_3_build_dir_options() == []
+
+    @staticmethod
+    def test_faked_build_dir(monkeypatch):
+        # since I don't want to introduce specific pip version on our
+        # testing infrastructure, I'm just going to fake that `pip install .` had
+        # been called.
+        with in_fresh_temp_dir() as path:
+            os.makedirs(os.path.join(path, "build", "lib"))
+            monkeypatch.setenv("LGTM_SRC", path)
+            expected = ["-Y", os.path.join(path, "build")]
+            assert buildtools.index.exclude_pip_21_3_build_dir_options() == expected
+
+    @staticmethod
+    def test_disable_environment_variable(monkeypatch):
+        monkeypatch.setenv(
+            "CODEQL_EXTRACTOR_PYTHON_DISABLE_AUTOMATIC_PIP_BUILD_DIR_EXCLUDE", "1"
+        )
+        with in_fresh_temp_dir() as path:
+            os.makedirs(os.path.join(path, "build", "lib"))
+            monkeypatch.setenv("LGTM_SRC", path)
+            assert buildtools.index.exclude_pip_21_3_build_dir_options() == []
+
+    @staticmethod
+    def test_code_build_dir(monkeypatch):
+        # simulating that you have the module `mypkg.build.lib.foo`
+        with in_fresh_temp_dir() as path:
+            os.makedirs(os.path.join(path, "mypkg", "build", "lib"))
+            open(os.path.join(path, "mypkg", "build", "lib", "foo.py"), "wt").write("print(42)")
+            open(os.path.join(path, "mypkg", "build", "lib", "__init__.py"), "wt").write("")
+            open(os.path.join(path, "mypkg", "build", "__init__.py"), "wt").write("")
+            open(os.path.join(path, "mypkg", "__init__.py"), "wt").write("")
+
+            monkeypatch.setenv("LGTM_SRC", path)
+            assert buildtools.index.exclude_pip_21_3_build_dir_options() == []
+
+
+def create_fake_venv(path, is_unix):
+    os.makedirs(path)
+    open(os.path.join(path, "pyvenv.cfg"), "wt").write("")
+    if is_unix:
+        os.mkdir(os.path.join(path, "bin"))
+        open(os.path.join(path, "bin", "activate"), "wt").write("")
+        os.makedirs(os.path.join(path, "lib", "python3.10", "site-packages"))
+    else:
+        os.mkdir(os.path.join(path, "Scripts"))
+        open(os.path.join(path, "Scripts", "activate.bat"), "wt").write("")
+        os.makedirs(os.path.join(path, "Lib", "site-packages"))
+
+class TestVenvIgnore:
+    @staticmethod
+    def test_no_venv(monkeypatch):
+        with in_fresh_temp_dir() as path:
+            monkeypatch.setenv("LGTM_SRC", path)
+            assert buildtools.index.exclude_venvs_options() == []
+
+    @staticmethod
+    @pytest.mark.parametrize("is_unix", [True,False])
+    def test_faked_venv_dir(monkeypatch, is_unix):
+        with in_fresh_temp_dir() as path:
+            create_fake_venv(os.path.join(path, "venv"), is_unix=is_unix)
+            monkeypatch.setenv("LGTM_SRC", path)
+            assert buildtools.index.exclude_venvs_options() == ["-Y", os.path.join(path, "venv")]
+
+    @staticmethod
+    @pytest.mark.parametrize("is_unix", [True,False])
+    def test_multiple_faked_venv_dirs(monkeypatch, is_unix):
+        with in_fresh_temp_dir() as path:
+            create_fake_venv(os.path.join(path, "venv"), is_unix=is_unix)
+            create_fake_venv(os.path.join(path, "venv2"), is_unix=is_unix)
+
+            monkeypatch.setenv("LGTM_SRC", path)
+
+            expected = [
+                "-Y", os.path.join(path, "venv"),
+                "-Y", os.path.join(path, "venv2"),
+            ]
+
+            actual = buildtools.index.exclude_venvs_options()
+            assert sorted(actual) == sorted(expected)
+
+    @staticmethod
+    def test_faked_venv_dir_no_pyvenv_cfg(monkeypatch):
+        """
+        Some times, the `pyvenv.cfg` file is not included when a virtual environment is
+        added to a git-repo, but we should be able to ignore the venv anyway.
+
+        See
+        - https://github.com/FiacreT/M-moire/tree/4089755191ffc848614247e98bbb641c1933450d/osintplatform/testNeo/venv
+        - https://github.com/Lynchie/KCM/tree/ea9eeed07e0c9eec41f9fc7480ce90390ee09876/VENV
+        """
+        with in_fresh_temp_dir() as path:
+            create_fake_venv(os.path.join(path, "venv"), is_unix=True)
+            monkeypatch.setenv("LGTM_SRC", path)
+            os.remove(os.path.join(path, "venv", "pyvenv.cfg"))
+            assert buildtools.index.exclude_venvs_options() == ["-Y", os.path.join(path, "venv")]
+
+    @staticmethod
+    def test_faked_venv_no_bin_dir(monkeypatch):
+        """
+        Some times, the activate script is not included when a virtual environment is
+        added to a git-repo, but we should be able to ignore the venv anyway.
+        """
+
+        with in_fresh_temp_dir() as path:
+            create_fake_venv(os.path.join(path, "venv"), is_unix=True)
+            monkeypatch.setenv("LGTM_SRC", path)
+            bin_dir = os.path.join(path, "venv", "bin")
+            assert os.path.isdir(bin_dir)
+            shutil.rmtree(bin_dir)
+            assert buildtools.index.exclude_venvs_options() == ["-Y", os.path.join(path, "venv")]
+
+    @staticmethod
+    def test_faked_venv_dir_no_lib_python(monkeypatch):
+        """
+        If there are no `lib/pyhton*` dirs within a unix venv, then it doesn't
+        constitute a functional virtual environment, and we don't exclude it. That's not
+        going to hurt, since it won't contain any installed packages.
+        """
+
+        with in_fresh_temp_dir() as path:
+            create_fake_venv(os.path.join(path, "venv"), is_unix=True)
+            monkeypatch.setenv("LGTM_SRC", path)
+            glob_res = glob.glob(os.path.join(path, "venv", "lib", "python*"))
+            assert glob_res
+            for d in glob_res:
+                shutil.rmtree(d)
+            assert buildtools.index.exclude_venvs_options() == []
+
+    @staticmethod
+    @pytest.mark.parametrize("is_unix", [True,False])
+    def test_disable_environment_variable(monkeypatch, is_unix):
+        monkeypatch.setenv(
+            "CODEQL_EXTRACTOR_PYTHON_DISABLE_AUTOMATIC_VENV_EXCLUDE", "1"
+        )
+        with in_fresh_temp_dir() as path:
+            create_fake_venv(os.path.join(path, "venv"), is_unix=is_unix)
+            monkeypatch.setenv("LGTM_SRC", path)
+            assert buildtools.index.exclude_venvs_options() == []
--- a/python/extractor/tests/buildtools/test_install.py
+++ b/python/extractor/tests/buildtools/test_install.py
@@ -0,0 +1,16 @@
+import pytest
+
+import buildtools.install
+from tests.buildtools.helper import in_fresh_temp_dir
+
+def test_basic(monkeypatch, mocker):
+    mocker.patch('subprocess.call')
+    mocker.patch('subprocess.check_call')
+
+    with in_fresh_temp_dir() as path:
+        monkeypatch.setenv('LGTM_WORKSPACE', path)
+        monkeypatch.setenv('SEMMLE_DIST', '<none>')
+
+        with pytest.raises(SystemExit) as exc_info:
+            buildtools.install.main(3, '.', [])
+        assert exc_info.value.code == 0
--- a/python/extractor/tests/buildtools/test_python_auto_install.py
+++ b/python/extractor/tests/buildtools/test_python_auto_install.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+import buildtools.semmle.requirements as requirements
+import unittest
+
+class RequirementsTests(unittest.TestCase):
+
+    def assertExpected(self, reqs, expected):
+        self.assertEqual(str(reqs), str(requirements.parse(expected.splitlines())))
+
+    _input = """\
+        SQLAlchemy<1.1.0,>=1.0.10 # MIT
+        sqlalchemy-migrate>=0.9.6 # Apache-2.0
+        stevedore>=1.10.0a4 # Apache-2.0
+        WebOb>1.2.3 # MIT
+        oslo.i18n!=2.1.0,==2.0.7 # Apache-2.0
+        foo>=0.9,<0.8 # Contradictory
+        bar>=1.3, <1.3 # Contradictory, but only just
+        baz>=3 # No dot in version number.
+        git+https://github.com/mozilla/elasticutils.git # Requirement in Git. Should be ignored.
+        -e git+https://github.com/Lasagne/Lasagne.git@8f4f9b2#egg=Lasagne==0.2.git # Another Git requirement.
+        """
+
+    def test_clean(self):
+        reqs = requirements.parse(self._input.splitlines())
+        expected = """\
+        SQLAlchemy<1.1.0,>=1.0.10
+        sqlalchemy-migrate>=0.9.6
+        stevedore>=1.10.0a4
+        WebOb>1.2.3
+        oslo.i18n!=2.1.0,==2.0.7
+        foo>=0.9
+        bar>=1.3
+        baz>=3
+        """
+        self.assertExpected(requirements.clean(reqs), expected)
+
+    def test_restricted(self):
+        reqs = requirements.parse(self._input.splitlines())
+        expected = """\
+        SQLAlchemy<1.1.0,>=1.0.10,==1.*
+        sqlalchemy-migrate>=0.9.6,==0.*
+        stevedore>=1.10.0a4,==1.*
+        WebOb>1.2.3,==1.*
+        oslo.i18n!=2.1.0,==2.0.7
+        foo>=0.9,==0.*
+        bar>=1.3,==1.*
+        baz==3.*,>=3
+        """
+        self.assertExpected(requirements.restrict(reqs), expected)
+
+if __name__ == "__main__":
+    unittest.main()
--- a/python/extractor/tests/buildtools/test_version.py
+++ b/python/extractor/tests/buildtools/test_version.py
@@ -0,0 +1,244 @@
+import os
+import re
+from textwrap import dedent
+import itertools
+
+import pytest
+
+import buildtools.version as version
+from tests.buildtools.helper import in_fresh_temp_dir
+
+
+class TestTravisVersion:
+
+    # based on https://docs.travis-ci.com/user/customizing-the-build/#build-matrix
+    # and https://docs.travis-ci.com/user/languages/python/
+
+    def test_simple(self):
+        with in_fresh_temp_dir():
+            assert version.travis_version('.') is None
+
+
+    @pytest.mark.parametrize(
+        'name,expected,travis_file',[
+        ('empty', None, ''),
+        ('no_python', None, dedent("""\
+        language: ruby
+        rvm:
+        - 2.5
+        - 2.6
+        """)),
+
+        ('both', None, dedent("""\
+        language: python
+        python:
+        - "2.6"
+        - "2.7"
+        - "3.5"
+        - "3.6"
+        """)),
+
+        ('only_py2', 2, dedent("""\
+        language: python
+        python:
+        - "2.6"
+        - "2.7"
+        """)),
+
+        ('only_py3', 3, dedent("""\
+        language: python
+        python:
+        - "3.5"
+        - "3.6"
+        """)),
+
+        ('jobs_both', None, dedent("""\
+        language: python
+        jobs:
+            include:
+                - python: 2.6
+                - python: 2.7
+                - python: 3.5
+                - python: 3.6
+        """)),
+
+        ('jobs_only_py2', 2, dedent("""\
+        language: python
+        jobs:
+            include:
+                - python: 2.6
+                - python: 2.7
+        """)),
+
+        ('jobs_only_py3', 3, dedent("""\
+        language: python
+        jobs:
+            include:
+                - python: 3.5
+                - python: 3.6
+        """)),
+
+        ('top_level_and_jobs', None, dedent("""\
+        language: python
+        python:
+        - "2.6"
+        - "2.7"
+        jobs:
+            include:
+                - python: 3.5
+                - python: 3.6
+        """)),
+
+        ('jobs_unrelated', 2, dedent("""\
+        language: python
+        python:
+        - "2.6"
+        - "2.7"
+        jobs:
+            include:
+                - env: FOO=FOO
+                - env: FOO=BAR
+        """)),
+
+        ('jobs_no_python', None, dedent("""\
+        language: ruby
+        jobs:
+            include:
+                - rvm: 2.5
+                - rvm: 2.6
+        """)),
+
+        # matrix is the old name for jobs (still supported as of 2019-11)
+        ('matrix_only_py3', 3, dedent("""\
+        language: python
+        matrix:
+            include:
+                - python: 3.5
+                - python: 3.6
+        """)),
+
+        ('quoted_py2', 2, dedent("""\
+        language: python
+        python:
+        - "2.7"
+        """)),
+
+        ('unquoted_py2', 2, dedent("""\
+        language: python
+        python:
+        - 2.7
+        """)),
+    ])
+    def test_with_file(self, name, expected, travis_file):
+        with in_fresh_temp_dir():
+            with open('.travis.yml', 'w') as f:
+                f.write(travis_file)
+            assert version.travis_version('.') is expected, name
+
+    def test_filesnames(self):
+        """Should prefer .travis.yml over travis.yml (which we still support for some legacy reason)
+        """
+        with in_fresh_temp_dir():
+            with open('travis.yml', 'w') as f:
+                f.write(dedent("""\
+                    language: python
+                    python:
+                    - "2.6"
+                    - "2.7"
+                    """))
+            assert version.travis_version('.') is 2
+
+            with open('.travis.yml', 'w') as f:
+                f.write(dedent("""\
+                    language: python
+                    python:
+                    - "3.5"
+                    - "3.6"
+                    """))
+            assert version.travis_version('.') is 3
+class TestTroveVersion:
+
+    def test_empty(self):
+        with in_fresh_temp_dir():
+            assert version.trove_version('.') is None
+
+    def test_with_file(self):
+        def _to_file(classifiers):
+            with open('setup.py', 'wt') as f:
+                f.write(dedent("""\
+                setup(
+                    classifiers={!r}
+                )
+                """.format(classifiers)
+                ))
+
+        cases = [
+            (2, "Programming Language :: Python :: 2.7"),
+            (2, "Programming Language :: Python :: 2"),
+            (2, "Programming Language :: Python :: 2 :: Only"),
+            (3, "Programming Language :: Python :: 3.7"),
+            (3, "Programming Language :: Python :: 3"),
+            (3, "Programming Language :: Python :: 3 :: Only"),
+        ]
+
+        for expected, classifier in cases:
+            with in_fresh_temp_dir():
+                _to_file([classifier])
+                assert version.trove_version('.') == expected
+
+        for combination in itertools.combinations(cases, 2):
+            with in_fresh_temp_dir():
+                versions, classifiers = zip(*combination)
+                _to_file(classifiers)
+                expected = 3 if 3 in versions else 2
+                assert version.trove_version('.') == expected
+
+    @pytest.mark.xfail()
+    def test_tricked_regex_is_too_simple(self):
+        with in_fresh_temp_dir():
+            with open('setup.py', 'wt') as f:
+                f.write(dedent("""\
+                setup(
+                    name='Programming Language :: Python :: 2',
+                    classifiers=[],
+                )
+                """
+                ))
+            assert version.trove_version('.') is None
+
+    @pytest.mark.xfail()
+    def test_tricked_regex_is_too_simple2(self):
+        with in_fresh_temp_dir():
+            with open('setup.py', 'wt') as f:
+                f.write(dedent("""\
+                setup(
+                    # classifiers=['Programming Language :: Python :: 2'],
+                )
+                """
+                ))
+            assert version.trove_version('.') is None
+
+    @pytest.mark.xfail()
+    def test_tricked_not_running_as_code(self):
+        with in_fresh_temp_dir():
+            with open('setup.py', 'wt') as f:
+                f.write(dedent("""\
+                c = 'Programming Language :: ' + 'Python :: 2'
+                setup(
+                    classifiers=[c],
+                )
+                """
+                ))
+            assert version.trove_version('.') is 2
+
+    def test_constructing_other_place(self):
+        with in_fresh_temp_dir():
+            with open('setup.py', 'wt') as f:
+                f.write(dedent("""\
+                c = 'Programming Language :: Python :: 2'
+                setup(
+                    classifiers=[c],
+                )
+                """
+                ))
+            assert version.trove_version('.') is 2
--- a/python/extractor/tests/data-imports/mod1.py
+++ b/python/extractor/tests/data-imports/mod1.py
@@ -0,0 +1 @@
+import mod2
--- a/python/extractor/tests/data-imports/mod2.py
+++ b/python/extractor/tests/data-imports/mod2.py
@@ -0,0 +1,2 @@
+import mod3
+import mod4
--- a/python/extractor/tests/data-imports/mod3.py
+++ b/python/extractor/tests/data-imports/mod3.py
--- a/python/extractor/tests/data-imports/mod4.py
+++ b/python/extractor/tests/data-imports/mod4.py
@@ -0,0 +1 @@
+import mod5
--- a/python/extractor/tests/data-imports/mod5.py
+++ b/python/extractor/tests/data-imports/mod5.py
--- a/python/extractor/tests/data-imports/mod6.py
+++ b/python/extractor/tests/data-imports/mod6.py
--- a/python/extractor/tests/data/mod1.py
+++ b/python/extractor/tests/data/mod1.py
--- a/python/extractor/tests/data/mod11.py
+++ b/python/extractor/tests/data/mod11.py
--- a/python/extractor/tests/data/mod2.py
+++ b/python/extractor/tests/data/mod2.py
--- a/python/extractor/tests/data/package/init.py
+++ b/python/extractor/tests/data/package/init.py
--- a/python/extractor/tests/data/package/sub/init.py
+++ b/python/extractor/tests/data/package/sub/init.py
--- a/python/extractor/tests/data/package/sub/a.py
+++ b/python/extractor/tests/data/package/sub/a.py
--- a/python/extractor/tests/data/package/sub/b.py
+++ b/python/extractor/tests/data/package/sub/b.py
--- a/python/extractor/tests/data/package/sub2.py
+++ b/python/extractor/tests/data/package/sub2.py
--- a/python/extractor/tests/data/package/x.py
+++ b/python/extractor/tests/data/package/x.py
--- a/python/extractor/tests/data/package/y.py
+++ b/python/extractor/tests/data/package/y.py
--- a/python/extractor/tests/dot-py/why.py/init.py
+++ b/python/extractor/tests/dot-py/why.py/init.py
@@ -0,0 +1,2 @@
+import a
+import why
--- a/python/extractor/tests/dot-py/why.py/a.py
+++ b/python/extractor/tests/dot-py/why.py/a.py
@@ -0,0 +1 @@
+x = 1
--- a/python/extractor/tests/lgtm_src/x.py
+++ b/python/extractor/tests/lgtm_src/x.py
--- a/python/extractor/tests/lgtm_src/y.py
+++ b/python/extractor/tests/lgtm_src/y.py
--- a/python/extractor/tests/off-path/nameless.py
+++ b/python/extractor/tests/off-path/nameless.py
@@ -0,0 +1,2 @@
+import mod1
+import mod2
--- a/python/extractor/tests/parser/alternating.py
+++ b/python/extractor/tests/parser/alternating.py
@@ -0,0 +1 @@
+(0, [1, (2, [3, (4, [5, 6])])])
--- a/python/extractor/tests/parser/assignment.py
+++ b/python/extractor/tests/parser/assignment.py
@@ -0,0 +1,17 @@
+
+#foo[bar].baz[(quux := 5)] = 5
+foo = 5
+
+baz, quux = 1, 2
+
+blah : int = 5
+
+just_the_type : float
+
+x, y = z, w = 3, 4
+
+(a, (b, (c, (d, e)))) = (j, (k, (l, (m, n))))
+
+s, *t = u
+
+o,p, = q,r,
--- a/python/extractor/tests/parser/call.py
+++ b/python/extractor/tests/parser/call.py
@@ -0,0 +1,11 @@
+foo(x,y,z=1,w=2)
+
+bar()()()
+
+baz(2+2, kw = 3*4)
+
+a(*b, **c)
+
+d(e,)
+
+f(g.h[i])
--- a/python/extractor/tests/parser/class.py
+++ b/python/extractor/tests/parser/class.py
@@ -0,0 +1,2 @@
+class Foo(int, object, metaclass=type):
+    x = 5
--- a/python/extractor/tests/parser/collections.py
+++ b/python/extractor/tests/parser/collections.py
@@ -0,0 +1,37 @@
+()
+
+[]
+
+{}
+
+[1,2,3]
+
+(4,5,6)
+
+{7: 8, 9: 10, 11: 12}
+
+{13, 14, 15}
+
+a = {x:y}
+
+b = {z:w, **a}
+
+c = [k,l,*m]
+
+(o,)
+
+(p,q,r,)
+
+s,
+
+t, u,
+
+(#comment
+  v, w
+#comment
+)
+
+(#comment
+  x, y,
+#comment
+)
--- a/python/extractor/tests/parser/comment-in-args.py
+++ b/python/extractor/tests/parser/comment-in-args.py
@@ -0,0 +1,15 @@
+"""
+At the time this test was added, when either comment 2 or comment 3 was present, this
+would cause the TSG parser to have an error.
+"""
+
+# comment 0
+print(
+    # comment 1
+    (
+        # comment 2
+        1
+        # comment 3
+)
+# comment 4
+)
--- a/python/extractor/tests/parser/comprehensions.py
+++ b/python/extractor/tests/parser/comprehensions.py
@@ -0,0 +1,57 @@
+(a
+    for b in c
+        if d
+        if e
+    for f in g
+        if h
+        if i
+)
+
+(a1 for b1 in c1)
+
+(a2 for b2 in c2 if d2)
+
+[k 
+    for l in m
+        if n
+        if o
+    for p in q
+        if r
+        if s
+]
+
+[k1 for l1 in m1]
+
+[k2 for l2 in m2 if n2]
+
+{p
+    for q in r
+        if s
+        if t
+    for u in v
+        if w
+        if x
+}
+
+{p1 for q1 in r1}
+
+{p2 for q2 in r2 if s2}
+
+    
+{k3: v3
+    for l3 in m3
+        if n3
+        if o3
+    for p3 in q3
+        if r3
+        if s3
+}
+
+{k4: v4 for l4 in m4}
+
+{k5: v5 for l5 in m5 if n5}
+
+# Special case for generator expressions inside calls
+t = tuple(x for y in z)
+
+[(  t,  ) for v in w]
--- a/python/extractor/tests/parser/empty.py
+++ b/python/extractor/tests/parser/empty.py
--- a/python/extractor/tests/parser/exception_groups_new.expected
+++ b/python/extractor/tests/parser/exception_groups_new.expected
@@ -0,0 +1,136 @@
+Module: [1, 0] - [22, 0]
+  body: [
+    Try: [1, 0] - [1, 4]
+      body: [
+        Expr: [2, 4] - [2, 5]
+          value:
+            Name: [2, 4] - [2, 5]
+              variable: Variable('a', None)
+              ctx: Load
+        Expr: [3, 4] - [3, 5]
+          value:
+            Name: [3, 4] - [3, 5]
+              variable: Variable('b', None)
+              ctx: Load
+      ]
+      orelse: [
+        Expr: [17, 4] - [17, 5]
+          value:
+            Name: [17, 4] - [17, 5]
+              variable: Variable('s', None)
+              ctx: Load
+        Expr: [18, 4] - [18, 5]
+          value:
+            Name: [18, 4] - [18, 5]
+              variable: Variable('t', None)
+              ctx: Load
+      ]
+      handlers: [
+        ExceptGroupStmt: [4, 0] - [6, 5]
+          type:
+            Name: [4, 8] - [4, 9]
+              variable: Variable('c', None)
+              ctx: Load
+          name: None
+          body: [
+            Expr: [5, 4] - [5, 5]
+              value:
+                Name: [5, 4] - [5, 5]
+                  variable: Variable('d', None)
+                  ctx: Load
+            Expr: [6, 4] - [6, 5]
+              value:
+                Name: [6, 4] - [6, 5]
+                  variable: Variable('e', None)
+                  ctx: Load
+          ]
+        ExceptGroupStmt: [7, 0] - [9, 5]
+          type:
+            Name: [7, 8] - [7, 9]
+              variable: Variable('f', None)
+              ctx: Load
+          name:
+            Name: [7, 13] - [7, 14]
+              variable: Variable('g', None)
+              ctx: Store
+          body: [
+            Expr: [8, 4] - [8, 5]
+              value:
+                Name: [8, 4] - [8, 5]
+                  variable: Variable('h', None)
+                  ctx: Load
+            Expr: [9, 4] - [9, 5]
+              value:
+                Name: [9, 4] - [9, 5]
+                  variable: Variable('i', None)
+                  ctx: Load
+          ]
+        ExceptGroupStmt: [10, 0] - [12, 5]
+          type:
+            Tuple: [10, 9] - [10, 13]
+              elts: [
+                Name: [10, 9] - [10, 10]
+                  variable: Variable('j', None)
+                  ctx: Load
+                Name: [10, 12] - [10, 13]
+                  variable: Variable('k', None)
+                  ctx: Load
+              ]
+              ctx: Load
+              parenthesised: True
+          name: None
+          body: [
+            Expr: [11, 4] - [11, 5]
+              value:
+                Name: [11, 4] - [11, 5]
+                  variable: Variable('l', None)
+                  ctx: Load
+            Expr: [12, 4] - [12, 5]
+              value:
+                Name: [12, 4] - [12, 5]
+                  variable: Variable('m', None)
+                  ctx: Load
+          ]
+        ExceptGroupStmt: [13, 0] - [15, 5]
+          type:
+            Tuple: [13, 9] - [13, 13]
+              elts: [
+                Name: [13, 9] - [13, 10]
+                  variable: Variable('n', None)
+                  ctx: Load
+                Name: [13, 12] - [13, 13]
+                  variable: Variable('o', None)
+                  ctx: Load
+              ]
+              ctx: Load
+              parenthesised: True
+          name:
+            Name: [13, 18] - [13, 19]
+              variable: Variable('p', None)
+              ctx: Store
+          body: [
+            Expr: [14, 4] - [14, 5]
+              value:
+                Name: [14, 4] - [14, 5]
+                  variable: Variable('q', None)
+                  ctx: Load
+            Expr: [15, 4] - [15, 5]
+              value:
+                Name: [15, 4] - [15, 5]
+                  variable: Variable('r', None)
+                  ctx: Load
+          ]
+      ]
+      finalbody: [
+        Expr: [20, 4] - [20, 5]
+          value:
+            Name: [20, 4] - [20, 5]
+              variable: Variable('u', None)
+              ctx: Load
+        Expr: [21, 4] - [21, 5]
+          value:
+            Name: [21, 4] - [21, 5]
+              variable: Variable('v', None)
+              ctx: Load
+      ]
+  ]
--- a/python/extractor/tests/parser/exception_groups_new.py
+++ b/python/extractor/tests/parser/exception_groups_new.py
@@ -0,0 +1,21 @@
+try:
+    a
+    b
+except* c:
+    d
+    e
+except* f as g:
+    h
+    i
+except* (j, k):
+    l
+    m
+except* (n, o) as p:
+    q
+    r
+else:
+    s
+    t
+finally:
+    u
+    v
--- a/python/extractor/tests/parser/exceptions.py
+++ b/python/extractor/tests/parser/exceptions.py
@@ -0,0 +1,18 @@
+try:
+    a
+    b
+except c as d:
+    e
+    f
+except g:
+    h
+    i
+except:
+    j
+    k
+else:
+    l
+    m
+finally:
+    n
+    o
--- a/python/extractor/tests/parser/expressions.py
+++ b/python/extractor/tests/parser/expressions.py
@@ -0,0 +1,14 @@
+1
+
+2 + 3
+
+4 * 5 / 6
+
+(7 + 8) * 9
+
+(10, 11)
+(            12,      13         )
+
+14         ,           15
+
+(match := 16)
--- a/python/extractor/tests/parser/for.py
+++ b/python/extractor/tests/parser/for.py
@@ -0,0 +1,15 @@
+async for x in y:
+    if z: continue
+    if w: break
+else:
+    v
+
+for ham in eggs:
+    spam
+
+for (a,b) in c:
+    pass
+
+for d, *e in f:
+    pass
+
--- a/python/extractor/tests/parser/functions.py
+++ b/python/extractor/tests/parser/functions.py
@@ -0,0 +1,58 @@
+def a(b): pass
+def c(*d): pass
+
+def foo(a, b, c=d, e:f, g:h=i, *j) -> t:
+    x
+    y
+
+def foo(l):
+    pass
+
+def bar(*k):
+    x1
+    y1
+
+def bar(*k, l, m:n, o:p=q, r=s, **u):
+    x1
+    y1
+
+def klef(*): pass
+
+def main(): pass
+
+@dec1(a,b)
+@dec2(c,d)
+def func(e,f,g):
+    h
+    i
+
+
+lambda: a
+
+lambda b: c
+
+lambda d, *e: f
+
+lambda *g, h: i
+
+lambda j=k: l
+
+lambda *m: n
+
+lambda **o: p
+
+lambda *p, q=r: s
+
+def typed_dictionary_splat(**kw : KEYWORD):
+    pass
+def typed_list_splat(*args : ARGS):
+    pass
+
+@False or True
+def decorated(): pass
+
+def all_separators(pos_only, /, pos_or_keyword, *, keyword_only): pass
+
+@decorator #comment
+def decorated_with_comment():
+    pass
--- a/python/extractor/tests/parser/if.py
+++ b/python/extractor/tests/parser/if.py
@@ -0,0 +1,11 @@
+if x: do_x
+elif y: do_y
+elif z: do_z
+else: do_else
+
+if a and b:
+    c
+# comment
+elif d or e:
+    f
+
--- a/python/extractor/tests/parser/just_comments.py
+++ b/python/extractor/tests/parser/just_comments.py
@@ -0,0 +1,8 @@
+# This is a comment
+# it goes on for many lines...
+# (Well, okay. Three lines.)
+
+# Here's one that's separated with some whitespace.
+
+# More whitespace at the end.
+
--- a/python/extractor/tests/parser/just_newlines.py
+++ b/python/extractor/tests/parser/just_newlines.py
@@ -0,0 +1,3 @@
+
+
+
--- a/python/extractor/tests/parser/match_new.expected
+++ b/python/extractor/tests/parser/match_new.expected
@@ -0,0 +1,382 @@
+Module: [1, 0] - [43, 0]
+  body: [
+    Match: [1, 0] - [3, 19]
+      subject:
+        List: [1, 6] - [1, 11]
+          elts: [
+            Num: [1, 7] - [1, 8]
+              n: 1
+              text: '1'
+            Num: [1, 9] - [1, 10]
+              n: 2
+              text: '2'
+          ]
+          ctx: Load
+      cases: [
+        Case: [2, 4] - [3, 19]
+          pattern:
+            MatchSequencePattern: [2, 9] - [2, 15]
+              patterns: [
+                MatchCapturePattern: [2, 10] - [2, 11]
+                  variable:
+                    Name: [2, 10] - [2, 11]
+                      variable: Variable('a', None)
+                      ctx: Store
+                MatchCapturePattern: [2, 13] - [2, 14]
+                  variable:
+                    Name: [2, 13] - [2, 14]
+                      variable: Variable('b', None)
+                      ctx: Store
+              ]
+          guard: None
+          body: [
+            Expr: [3, 8] - [3, 19]
+              value:
+                Call: [3, 8] - [3, 19]
+                  func:
+                    Name: [3, 8] - [3, 13]
+                      variable: Variable('print', None)
+                      ctx: Load
+                  positional_args: [
+                    Name: [3, 14] - [3, 15]
+                      variable: Variable('b', None)
+                      ctx: Load
+                    Name: [3, 17] - [3, 18]
+                      variable: Variable('a', None)
+                      ctx: Load
+                  ]
+                  named_args: []
+          ]
+      ]
+    Match: [5, 0] - [15, 12]
+      subject:
+        BinOp: [5, 6] - [5, 10]
+          left:
+            Num: [5, 6] - [5, 7]
+              n: 1
+              text: '1'
+          op: Add
+          right:
+            Num: [5, 8] - [5, 10]
+              n: 2j
+              text: '2j'
+      cases: [
+        Case: [6, 4] - [7, 12]
+          pattern:
+            MatchLiteralPattern: [6, 9] - [6, 13]
+              literal:
+                BinOp: [6, 9] - [6, 13]
+                  left:
+                    Num: [6, 9] - [6, 10]
+                      n: 1
+                      text: '1'
+                  op: Add
+                  right:
+                    Num: [6, 11] - [6, 13]
+                      n: 2j
+                      text: '2j'
+          guard: None
+          body: [
+            Pass: [7, 8] - [7, 12]
+          ]
+        Case: [8, 4] - [9, 12]
+          pattern:
+            MatchLiteralPattern: [8, 9] - [8, 18]
+              literal:
+                BinOp: [8, 9] - [8, 18]
+                  left:
+                    UnaryOp: [8, 10] - [8, 11]
+                      op: USub
+                      operand:
+                        Num: [8, 10] - [8, 11]
+                          n: 1
+                          text: '1'
+                  op: Sub
+                  right:
+                    Num: [8, 12] - [8, 18]
+                      n: 26000000j
+                      text: '2.6e7j'
+          guard: None
+          body: [
+            Pass: [9, 8] - [9, 12]
+          ]
+        Case: [10, 4] - [11, 12]
+          pattern:
+            MatchLiteralPattern: [10, 9] - [10, 11]
+              literal:
+                UnaryOp: [10, 10] - [10, 11]
+                  op: USub
+                  operand:
+                    Num: [10, 10] - [10, 11]
+                      n: 1
+                      text: '1'
+          guard: None
+          body: [
+            Pass: [11, 8] - [11, 12]
+          ]
+        Case: [12, 4] - [13, 12]
+          pattern:
+            MatchLiteralPattern: [12, 9] - [12, 10]
+              literal:
+                Num: [12, 9] - [12, 10]
+                  n: 2
+                  text: '2'
+          guard: None
+          body: [
+            Pass: [13, 8] - [13, 12]
+          ]
+        Case: [14, 4] - [15, 12]
+          pattern:
+            MatchLiteralPattern: [14, 9] - [14, 16]
+              literal:
+                BinOp: [14, 9] - [14, 16]
+                  left:
+                    UnaryOp: [14, 10] - [14, 13]
+                      op: USub
+                      operand:
+                        Num: [14, 10] - [14, 13]
+                          n: 1.5
+                          text: '1.5'
+                  op: Add
+                  right:
+                    Num: [14, 14] - [14, 16]
+                      n: 5j
+                      text: '5j'
+          guard: None
+          body: [
+            Pass: [15, 8] - [15, 12]
+          ]
+      ]
+    Assign: [17, 0] - [17, 20]
+      targets: [
+        Name: [17, 4] - [17, 17]
+          variable: Variable('soft_keywords', None)
+          ctx: Store
+      ]
+      value:
+        FunctionExpr: [17, 0] - [17, 20]
+          name: 'soft_keywords'
+          args:
+            arguments
+              defaults: []
+              kw_defaults: []
+              annotations: []
+              varargannotation: None
+              kwargannotation: None
+              kw_annotations: []
+          returns: None
+          inner_scope:
+            Function: [17, 0] - [17, 20]
+              name: 'soft_keywords'
+              type_parameters: []
+              args: []
+              vararg: None
+              kwonlyargs: []
+              kwarg: None
+              body: [
+                Assign: [18, 4] - [18, 13]
+                  targets: [
+                    Name: [18, 4] - [18, 9]
+                      variable: Variable('match', None)
+                      ctx: Store
+                  ]
+                  value:
+                    Num: [18, 12] - [18, 13]
+                      n: 0
+                      text: '0'
+                Assign: [19, 4] - [19, 12]
+                  targets: [
+                    Name: [19, 4] - [19, 8]
+                      variable: Variable('case', None)
+                      ctx: Store
+                  ]
+                  value:
+                    Num: [19, 11] - [19, 12]
+                      n: 0
+                      text: '0'
+                Match: [20, 4] - [22, 17]
+                  subject:
+                    Name: [20, 10] - [20, 15]
+                      variable: Variable('match', None)
+                      ctx: Load
+                  cases: [
+                    Case: [21, 8] - [22, 17]
+                      pattern:
+                        MatchCapturePattern: [21, 13] - [21, 17]
+                          variable:
+                            Name: [21, 13] - [21, 17]
+                              variable: Variable('case', None)
+                              ctx: Store
+                      guard: None
+                      body: [
+                        Assign: [22, 12] - [22, 17]
+                          targets: [
+                            Name: [22, 12] - [22, 13]
+                              variable: Variable('x', None)
+                              ctx: Store
+                          ]
+                          value:
+                            Num: [22, 16] - [22, 17]
+                              n: 0
+                              text: '0'
+                      ]
+                  ]
+              ]
+    Match: [24, 0] - [26, 12]
+      subject:
+        Tuple: [24, 7] - [24, 10]
+          elts: [
+            Num: [24, 7] - [24, 8]
+              n: 0
+              text: '0'
+            Num: [24, 9] - [24, 10]
+              n: 1
+              text: '1'
+          ]
+          ctx: Load
+          parenthesised: True
+      cases: [
+        Case: [25, 4] - [26, 12]
+          pattern:
+            MatchSequencePattern: [25, 9] - [25, 12]
+              patterns: [
+                MatchStarPattern: [25, 9] - [25, 11]
+                  target:
+                    MatchCapturePattern: [25, 10] - [25, 11]
+                      variable:
+                        Name: [25, 10] - [25, 11]
+                          variable: Variable('x', None)
+                          ctx: Store
+              ]
+          guard: None
+          body: [
+            Pass: [26, 8] - [26, 12]
+          ]
+      ]
+    Match: [28, 0] - [30, 12]
+      subject:
+        Tuple: [28, 7] - [28, 10]
+          elts: [
+            Num: [28, 7] - [28, 8]
+              n: 2
+              text: '2'
+            Num: [28, 9] - [28, 10]
+              n: 3
+              text: '3'
+          ]
+          ctx: Load
+          parenthesised: True
+      cases: [
+        Case: [29, 4] - [30, 12]
+          pattern:
+            MatchSequencePattern: [29, 9] - [29, 14]
+              patterns: [
+                MatchStarPattern: [29, 10] - [29, 12]
+                  target:
+                    MatchCapturePattern: [29, 11] - [29, 12]
+                      variable:
+                        Name: [29, 11] - [29, 12]
+                          variable: Variable('x', None)
+                          ctx: Store
+              ]
+          guard: None
+          body: [
+            Pass: [30, 8] - [30, 12]
+          ]
+      ]
+    Match: [32, 0] - [34, 13]
+      subject:
+        Tuple: [32, 6] - [32, 10]
+          elts: [
+            Name: [32, 6] - [32, 7]
+              variable: Variable('w', None)
+              ctx: Load
+            Name: [32, 9] - [32, 10]
+              variable: Variable('x', None)
+              ctx: Load
+          ]
+          ctx: Load
+          parenthesised: True
+      cases: [
+        Case: [33, 4] - [34, 13]
+          pattern:
+            MatchSequencePattern: [33, 9] - [33, 13]
+              patterns: [
+                MatchCapturePattern: [33, 9] - [33, 10]
+                  variable:
+                    Name: [33, 9] - [33, 10]
+                      variable: Variable('y', None)
+                      ctx: Store
+                MatchCapturePattern: [33, 12] - [33, 13]
+                  variable:
+                    Name: [33, 12] - [33, 13]
+                      variable: Variable('z', None)
+                      ctx: Store
+              ]
+          guard: None
+          body: [
+            Assign: [34, 8] - [34, 13]
+              targets: [
+                Name: [34, 8] - [34, 9]
+                  variable: Variable('v', None)
+                  ctx: Store
+              ]
+              value:
+                Num: [34, 12] - [34, 13]
+                  n: 0
+                  text: '0'
+          ]
+      ]
+    Match: [36, 0] - [38, 12]
+      subject:
+        Tuple: [36, 6] - [36, 10]
+          elts: [
+            Name: [36, 6] - [36, 7]
+              variable: Variable('x', None)
+              ctx: Load
+            Name: [36, 9] - [36, 10]
+              variable: Variable('y', None)
+              ctx: Load
+          ]
+          ctx: Load
+          parenthesised: True
+      cases: [
+        Case: [37, 4] - [38, 12]
+          pattern:
+            MatchSequencePattern: [37, 9] - [37, 13]
+              patterns: [
+                MatchLiteralPattern: [37, 9] - [37, 10]
+                  literal:
+                    Num: [37, 9] - [37, 10]
+                      n: 1
+                      text: '1'
+                MatchLiteralPattern: [37, 12] - [37, 13]
+                  literal:
+                    Num: [37, 12] - [37, 13]
+                      n: 2
+                      text: '2'
+              ]
+          guard: None
+          body: [
+            Pass: [38, 8] - [38, 12]
+          ]
+      ]
+    Match: [40, 0] - [42, 12]
+      subject:
+        Name: [40, 6] - [40, 7]
+          variable: Variable('z', None)
+          ctx: Load
+      cases: [
+        Case: [41, 4] - [42, 12]
+          pattern:
+            MatchCapturePattern: [41, 9] - [41, 10]
+              variable:
+                Name: [41, 9] - [41, 10]
+                  variable: Variable('w', None)
+                  ctx: Store
+          guard: None
+          body: [
+            Pass: [42, 8] - [42, 12]
+          ]
+      ]
+  ]
--- a/python/extractor/tests/parser/match_new.py
+++ b/python/extractor/tests/parser/match_new.py
@@ -0,0 +1,42 @@
+match [1,2]:
+    case (a, b):
+        print(b, a)
+
+match 1+2j:
+    case 1+2j:
+        pass
+    case -1-2.6e7j:
+        pass
+    case -1:
+        pass
+    case 2:
+        pass
+    case -1.5+5j:
+        pass
+
+def soft_keywords():
+    match = 0
+    case = 0
+    match match:
+        case case:
+            x = 0
+
+match (0,1):
+    case *x,:
+        pass
+
+match (2,3):
+    case (*x,):
+        pass
+
+match w, x:
+    case y, z:
+        v = 0
+
+match x, y:
+    case 1, 2:
+        pass
+
+match z:
+    case w:
+        pass
--- a/python/extractor/tests/parser/misc.py
+++ b/python/extractor/tests/parser/misc.py
@@ -0,0 +1,15 @@
+...
+
+a = (b := c)
+
+d = e if f else g
+
+h1[h2] = h3[h4]
+
+i[i1:i2]
+
+j[j1:j2:j3]
+
+k.l = m.n
+
+o[p,q]
--- a/python/extractor/tests/parser/numbers.py
+++ b/python/extractor/tests/parser/numbers.py
@@ -0,0 +1,12 @@
+1
+
+2.0
+
+3j
+
+0x4
+
+0o5
+
+0b110
+
--- a/python/extractor/tests/parser/operators.py
+++ b/python/extractor/tests/parser/operators.py
@@ -0,0 +1,54 @@
+1 and 2
+
+3 or 4
+
+5 == 6
+
+7 > 8
+
+9 >= 10
+
+11 in 12 
+
+13 is 14
+
+15 is not 16
+
+17 < 18
+
+19 <= 20
+
+21 != 22
+
+23 <> 24
+
+25 not in 26
+
+27 > 28 >= 29 < 30 <= 31 == 32 != 33 <> 34
+
+35
+
+-36
+
+~37
+
+not 38
+
+# or(not(a), b)
+not a or b
+
+# and(c, d, e, f, g)
+c and d and e and f and g
+
+# or(h, i, j, k, l)
+h or i or j or k or l
+
+# or(and(m, n), and(o, p))
+m and n or o and p
+
+# or(q, and(s, t), u)
+q or s and t or u
+
+a1 or b1 and c1
+
+d1 and e1 or f1
--- a/python/extractor/tests/parser/simple_statements.py
+++ b/python/extractor/tests/parser/simple_statements.py
@@ -0,0 +1,70 @@
+# Statements that do not contain any other statements.
+
+pass
+
+a = b
+
+c : int = 1
+
+d += e
+
+del f
+
+del f1, f2
+
+global h
+
+global h1, h2
+
+nonlocal i
+
+nonlocal i1, i2
+
+import j
+
+import j1, j2
+
+import j3.j4.j5, j6.j7.j8 as j9
+
+import j10.j11 as j12
+
+from k import l
+
+from ..k1.k2 import l1 as l2, l3
+
+from __future__ import print_function, goto_statement
+
+from . import l4
+
+from l5 import *
+
+from ..l6 import *
+from ... import *
+
+raise
+
+raise m
+
+raise m1 from m2
+
+raise m3, m4
+
+raise m5, m6, m7
+
+assert n
+
+assert n1, n2
+
+return o
+
+return *o1,
+
+return 1, *o2
+
+return 2, *o3,
+
+yield p
+
+yield from q
+
+await r
--- a/python/extractor/tests/parser/simple_statements_py2.py
+++ b/python/extractor/tests/parser/simple_statements_py2.py
@@ -0,0 +1,8 @@
+
+exec "ls"
+
+print "Hello"
+
+print "two parts", "no newline",
+
+print >> f, "World"
--- a/python/extractor/tests/parser/strings.py
+++ b/python/extractor/tests/parser/strings.py
@@ -0,0 +1,79 @@
+if 1:
+    "double quotes"
+if 2:
+    'single quotes'
+if 3:
+    """triple double quotes (sextuple quotes?)"""
+if 4:
+    '''triple single quotes'''
+if 5:
+    r"raw string"
+if 6:
+    b"byte string"
+if 7:
+    u"unicode string"
+if 8:
+    br"raw byte string"
+if 9:
+    "Let's put some control\tcharacters in here\n"
+if 10:
+    """
+    Multiline
+    string
+    time
+    """
+if 11:
+    "escaped \"quotes\" here"
+if 12:
+    """Unescaped "quotes" inside triple quotes"""
+if 13:
+    "string" """concatenation""" 'here' '''oh yeah'''
+if 14:
+    f"format string with no funny business"
+if 15:
+    f"format string with {1} interpolation"
+if 16:
+    f"{2}{3}{4}"
+if 17:
+    f"and a format string with {'nested'} string"
+if 18:
+    f"foo{x}bar" "regular string"
+if 19:
+    pass
+    # This doesn't quite give the right result, but it's close enough.
+    #f"no interpolation" ' but still implicit concatenation'
+if 20:
+    f"{9}" "blah" f'{10}'
+if 21:
+    f"format{129}string" "not format"
+if 21.1:
+    # regression from https://github.com/github/codeql/issues/9940
+    f"format{123}string" f"technically format string\n"
+if 22:
+    "again not format" f"format again{foo}hello"
+if 23:
+    f"""f-string with {"inner " 'implicit ' '''concatenation'''} how awful"""
+if 24:
+    f'''oh no python { f'why do you {"allow"} such'} absolute horrors?'''
+if 25:
+    b"""5""" b"byte format"
+if 26:
+    r'X(\u0061|a)*Y'
+if 27:
+    f"""triple-quoted {11}""f-st""" fr"""ri'''ng\\\\\""{12} with an inner quoted part"""
+if 28:
+    f'{value:{width + padding!r}.{precision}}'
+if 29:
+    f'{1,}'
+if 30:
+    fr"""quotes before interpolation "{123}" are okay."""
+if 31:
+    fr"""backslash before an interpolation \{456}\ are okay."""
+if 32:
+    f''
+if 33:
+    ''
+if 34:
+    b'\xc5\xe5'
+if 35:
+    f"{x=}"
--- a/python/extractor/tests/parser/strings_3.12_new.expected
+++ b/python/extractor/tests/parser/strings_3.12_new.expected
@@ -0,0 +1,305 @@
+Module: [2, 0] - [22, 0]
+  body: [
+    Assign: [2, 0] - [2, 60]
+      targets: [
+        Name: [2, 0] - [2, 5]
+          variable: Variable('songs', None)
+          ctx: Store
+      ]
+      value:
+        List: [2, 8] - [2, 60]
+          elts: [
+            Str: [2, 9] - [2, 31]
+              s: 'Take me back to Eden'
+              prefix: "'"
+              implicitly_concatenated_parts: None
+            Str: [2, 33] - [2, 43]
+              s: 'Alkaline'
+              prefix: "'"
+              implicitly_concatenated_parts: None
+            Str: [2, 45] - [2, 59]
+              s: 'Ascensionism'
+              prefix: "'"
+              implicitly_concatenated_parts: None
+          ]
+          ctx: Load
+    Expr: [3, 0] - [3, 43]
+      value:
+        JoinedStr: [3, 0] - [3, 43]
+          values: [
+            Str: [3, 0] - [3, 25]
+              s: 'This is the playlist: '
+              prefix: 'f"'
+              implicitly_concatenated_parts: None
+            Call: [3, 25] - [3, 41]
+              func:
+                Attribute: [3, 25] - [3, 34]
+                  value:
+                    Str: [3, 25] - [3, 29]
+                      s: ', '
+                      prefix: '"'
+                      implicitly_concatenated_parts: None
+                  attr: 'join'
+                  ctx: Load
+              positional_args: [
+                Name: [3, 35] - [3, 40]
+                  variable: Variable('songs', None)
+                  ctx: Load
+              ]
+              named_args: []
+            Str: [3, 41] - [3, 43]
+              s: ''
+              prefix: 'f"'
+              implicitly_concatenated_parts: None
+          ]
+    Expr: [6, 0] - [6, 31]
+      value:
+        JoinedStr: [6, 0] - [6, 31]
+          values: [
+            Str: [6, 0] - [6, 5]
+              s: ''
+              prefix: 'f"""'
+              implicitly_concatenated_parts: None
+            JoinedStr: [6, 5] - [6, 27]
+              values: [
+                Str: [6, 5] - [6, 10]
+                  s: ''
+                  prefix: "f'''"
+                  implicitly_concatenated_parts: None
+                JoinedStr: [6, 10] - [6, 23]
+                  values: [
+                    Str: [6, 10] - [6, 13]
+                      s: ''
+                      prefix: "f'"
+                      implicitly_concatenated_parts: None
+                    JoinedStr: [6, 13] - [6, 21]
+                      values: [
+                        Str: [6, 13] - [6, 16]
+                          s: ''
+                          prefix: 'f"'
+                          implicitly_concatenated_parts: None
+                        BinOp: [6, 16] - [6, 19]
+                          left:
+                            Num: [6, 16] - [6, 17]
+                              n: 1
+                              text: '1'
+                          op: Add
+                          right:
+                            Num: [6, 18] - [6, 19]
+                              n: 1
+                              text: '1'
+                        Str: [6, 19] - [6, 21]
+                          s: ''
+                          prefix: 'f"'
+                          implicitly_concatenated_parts: None
+                      ]
+                    Str: [6, 21] - [6, 23]
+                      s: ''
+                      prefix: "f'"
+                      implicitly_concatenated_parts: None
+                  ]
+                Str: [6, 23] - [6, 27]
+                  s: ''
+                  prefix: "f'''"
+                  implicitly_concatenated_parts: None
+              ]
+            Str: [6, 27] - [6, 31]
+              s: ''
+              prefix: 'f"""'
+              implicitly_concatenated_parts: None
+          ]
+    Expr: [9, 0] - [9, 33]
+      value:
+        JoinedStr: [9, 0] - [9, 33]
+          values: [
+            Str: [9, 0] - [9, 3]
+              s: ''
+              prefix: 'f"'
+              implicitly_concatenated_parts: None
+            JoinedStr: [9, 3] - [9, 31]
+              values: [
+                Str: [9, 3] - [9, 6]
+                  s: ''
+                  prefix: 'f"'
+                  implicitly_concatenated_parts: None
+                JoinedStr: [9, 6] - [9, 29]
+                  values: [
+                    Str: [9, 6] - [9, 9]
+                      s: ''
+                      prefix: 'f"'
+                      implicitly_concatenated_parts: None
+                    JoinedStr: [9, 9] - [9, 27]
+                      values: [
+                        Str: [9, 9] - [9, 12]
+                          s: ''
+                          prefix: 'f"'
+                          implicitly_concatenated_parts: None
+                        JoinedStr: [9, 12] - [9, 25]
+                          values: [
+                            Str: [9, 12] - [9, 15]
+                              s: ''
+                              prefix: 'f"'
+                              implicitly_concatenated_parts: None
+                            JoinedStr: [9, 15] - [9, 23]
+                              values: [
+                                Str: [9, 15] - [9, 18]
+                                  s: ''
+                                  prefix: 'f"'
+                                  implicitly_concatenated_parts: None
+                                BinOp: [9, 18] - [9, 21]
+                                  left:
+                                    Num: [9, 18] - [9, 19]
+                                      n: 1
+                                      text: '1'
+                                  op: Add
+                                  right:
+                                    Num: [9, 20] - [9, 21]
+                                      n: 1
+                                      text: '1'
+                                Str: [9, 21] - [9, 23]
+                                  s: ''
+                                  prefix: 'f"'
+                                  implicitly_concatenated_parts: None
+                              ]
+                            Str: [9, 23] - [9, 25]
+                              s: ''
+                              prefix: 'f"'
+                              implicitly_concatenated_parts: None
+                          ]
+                        Str: [9, 25] - [9, 27]
+                          s: ''
+                          prefix: 'f"'
+                          implicitly_concatenated_parts: None
+                      ]
+                    Str: [9, 27] - [9, 29]
+                      s: ''
+                      prefix: 'f"'
+                      implicitly_concatenated_parts: None
+                  ]
+                Str: [9, 29] - [9, 31]
+                  s: ''
+                  prefix: 'f"'
+                  implicitly_concatenated_parts: None
+              ]
+            Str: [9, 31] - [9, 33]
+              s: ''
+              prefix: 'f"'
+              implicitly_concatenated_parts: None
+          ]
+    Expr: [12, 0] - [16, 4]
+      value:
+        JoinedStr: [12, 0] - [16, 4]
+          values: [
+            Str: [12, 0] - [12, 25]
+              s: 'This is the playlist: '
+              prefix: 'f"'
+              implicitly_concatenated_parts: None
+            Call: [12, 25] - [16, 2]
+              func:
+                Attribute: [12, 25] - [12, 34]
+                  value:
+                    Str: [12, 25] - [12, 29]
+                      s: ', '
+                      prefix: '"'
+                      implicitly_concatenated_parts: None
+                  attr: 'join'
+                  ctx: Load
+              positional_args: [
+                List: [12, 35] - [16, 1]
+                  elts: [
+                    Str: [13, 4] - [13, 26]
+                      s: 'Take me back to Eden'
+                      prefix: "'"
+                      implicitly_concatenated_parts: None
+                    Str: [14, 4] - [14, 14]
+                      s: 'Alkaline'
+                      prefix: "'"
+                      implicitly_concatenated_parts: None
+                    Str: [15, 4] - [15, 18]
+                      s: 'Ascensionism'
+                      prefix: "'"
+                      implicitly_concatenated_parts: None
+                  ]
+                  ctx: Load
+              ]
+              named_args: []
+            Str: [16, 2] - [16, 4]
+              s: ''
+              prefix: 'f"'
+              implicitly_concatenated_parts: None
+          ]
+    Expr: [19, 0] - [19, 50]
+      value:
+        Call: [19, 0] - [19, 50]
+          func:
+            Name: [19, 0] - [19, 5]
+              variable: Variable('print', None)
+              ctx: Load
+          positional_args: [
+            JoinedStr: [19, 6] - [19, 49]
+              values: [
+                Str: [19, 6] - [19, 31]
+                  s: 'This is the playlist: '
+                  prefix: 'f"'
+                  implicitly_concatenated_parts: None
+                Call: [19, 31] - [19, 47]
+                  func:
+                    Attribute: [19, 31] - [19, 40]
+                      value:
+                        Str: [19, 31] - [19, 35]
+                          s: '\n'
+                          prefix: '"'
+                          implicitly_concatenated_parts: None
+                      attr: 'join'
+                      ctx: Load
+                  positional_args: [
+                    Name: [19, 41] - [19, 46]
+                      variable: Variable('songs', None)
+                      ctx: Load
+                  ]
+                  named_args: []
+                Str: [19, 47] - [19, 49]
+                  s: ''
+                  prefix: 'f"'
+                  implicitly_concatenated_parts: None
+              ]
+          ]
+          named_args: []
+    Expr: [21, 0] - [21, 68]
+      value:
+        Call: [21, 0] - [21, 68]
+          func:
+            Name: [21, 0] - [21, 5]
+              variable: Variable('print', None)
+              ctx: Load
+          positional_args: [
+            JoinedStr: [21, 6] - [21, 67]
+              values: [
+                Str: [21, 6] - [21, 31]
+                  s: 'This is the playlist: '
+                  prefix: 'f"'
+                  implicitly_concatenated_parts: None
+                Call: [21, 31] - [21, 65]
+                  func:
+                    Attribute: [21, 31] - [21, 58]
+                      value:
+                        Str: [21, 31] - [21, 53]
+                          s: '♥'
+                          prefix: '"'
+                          implicitly_concatenated_parts: None
+                      attr: 'join'
+                      ctx: Load
+                  positional_args: [
+                    Name: [21, 59] - [21, 64]
+                      variable: Variable('songs', None)
+                      ctx: Load
+                  ]
+                  named_args: []
+                Str: [21, 65] - [21, 67]
+                  s: ''
+                  prefix: 'f"'
+                  implicitly_concatenated_parts: None
+              ]
+          ]
+          named_args: []
+  ]
--- a/python/extractor/tests/parser/strings_3.12_new.py
+++ b/python/extractor/tests/parser/strings_3.12_new.py
@@ -0,0 +1,21 @@
+# An expression containing the same kind of quotes as the outer f-string
+songs = ['Take me back to Eden', 'Alkaline', 'Ascensionism']
+f"This is the playlist: {", ".join(songs)}"
+
+# An example of the previously maximal level of nesting
+f"""{f'''{f'{f"{1+1}"}'}'''}"""
+
+# An example of the new, unlimited level of nesting
+f"{f"{f"{f"{f"{f"{1+1}"}"}"}"}"}"
+
+# An f-string with newlines inside the expression part
+f"This is the playlist: {", ".join([
+    'Take me back to Eden',  # My, my, those eyes like fire
+    'Alkaline',              # Not acid nor alkaline
+    'Ascensionism'           # Take to the broken skies at last
+])}"
+
+# Two instances of string escaping used inside the expression part
+print(f"This is the playlist: {"\n".join(songs)}")
+
+print(f"This is the playlist: {"\N{BLACK HEART SUIT}".join(songs)}")
--- a/python/extractor/tests/parser/strings_new.expected
+++ b/python/extractor/tests/parser/strings_new.expected
@@ -0,0 +1,265 @@
+Module: [1, 0] - [31, 0]
+  body: [
+    If: [1, 0] - [1, 5]
+      test:
+        Num: [1, 3] - [1, 4]
+          n: 1
+          text: '1'
+      body: [
+        Expr: [2, 4] - [2, 72]
+          value:
+            JoinedStr: [2, 4] - [2, 72]
+              values: [
+                Str: [2, 4] - [2, 61]
+                  s: 'this is not a unicode escape but an interpolation: \\N'
+                  prefix: 'fr"'
+                  implicitly_concatenated_parts: None
+                Name: [2, 61] - [2, 70]
+                  variable: Variable('AMPERSAND', None)
+                  ctx: Load
+                Str: [2, 70] - [2, 72]
+                  s: ''
+                  prefix: 'fr"'
+                  implicitly_concatenated_parts: None
+              ]
+      ]
+      orelse: None
+    If: [3, 0] - [3, 5]
+      test:
+        Num: [3, 3] - [3, 4]
+          n: 2
+          text: '2'
+      body: [
+        Expr: [4, 4] - [4, 44]
+          value:
+            JoinedStr: [4, 4] - [4, 44]
+              values: [
+                Str: [4, 4] - [4, 33]
+                  s: 'also an interpolation: \\N'
+                  prefix: "f'"
+                  implicitly_concatenated_parts: None
+                Name: [4, 33] - [4, 42]
+                  variable: Variable('AMPERSAND', None)
+                  ctx: Load
+                Str: [4, 42] - [4, 44]
+                  s: ''
+                  prefix: "f'"
+                  implicitly_concatenated_parts: None
+              ]
+      ]
+      orelse: None
+    If: [5, 0] - [5, 5]
+      test:
+        Num: [5, 3] - [5, 4]
+          n: 3
+          text: '3'
+      body: [
+        Expr: [6, 4] - [6, 14]
+          value:
+            Str: [6, 4] - [6, 14]
+              s: '\\Nspam'
+              prefix: "f'"
+              implicitly_concatenated_parts: None
+      ]
+      orelse: None
+    If: [7, 0] - [7, 5]
+      test:
+        Num: [7, 3] - [7, 4]
+          n: 4
+          text: '4'
+      body: [
+        Expr: [8, 4] - [8, 46]
+          value:
+            Str: [8, 4] - [8, 46]
+              s: 'this is a unicode escape: &'
+              prefix: 'f"'
+              implicitly_concatenated_parts: None
+      ]
+      orelse: None
+    If: [9, 0] - [9, 5]
+      test:
+        Num: [9, 3] - [9, 4]
+          n: 5
+          text: '5'
+      body: [
+        Expr: [10, 4] - [10, 55]
+          value:
+            Str: [10, 4] - [10, 55]
+              s: 'this is also not a unicode escape: \\N{AMPERSAND}'
+              prefix: 'r"'
+              implicitly_concatenated_parts: None
+      ]
+      orelse: None
+    If: [11, 0] - [11, 5]
+      test:
+        Num: [11, 3] - [11, 4]
+          n: 6
+          text: '6'
+      body: [
+        Expr: [12, 4] - [12, 20]
+          value:
+            Str: [12, 4] - [12, 20]
+              s: '\\N{AMPERSAND}'
+              prefix: "'"
+              implicitly_concatenated_parts: None
+      ]
+      orelse: None
+    If: [13, 0] - [13, 5]
+      test:
+        Num: [13, 3] - [13, 4]
+          n: 7
+          text: '7'
+      body: [
+        Expr: [14, 4] - [14, 13]
+          value:
+            Str: [14, 4] - [14, 13]
+              s: '\\Nspam'
+              prefix: "'"
+              implicitly_concatenated_parts: None
+      ]
+      orelse: None
+    If: [15, 0] - [15, 5]
+      test:
+        Num: [15, 3] - [15, 4]
+          n: 8
+          text: '8'
+      body: [
+        Expr: [16, 4] - [16, 55]
+          value:
+            Str: [16, 4] - [16, 55]
+              s: 'this is also also a unicode escape: &'
+              prefix: '"'
+              implicitly_concatenated_parts: None
+      ]
+      orelse: None
+    If: [17, 0] - [17, 5]
+      test:
+        Num: [17, 3] - [17, 4]
+          n: 9
+          text: '9'
+      body: [
+        Expr: [18, 4] - [18, 56]
+          value:
+            Str: [18, 4] - [18, 56]
+              s: 'this is also not a unicode escape: \\N{AMPERSAND}'
+              prefix: 'rb"'
+              implicitly_concatenated_parts: None
+      ]
+      orelse: None
+    If: [19, 0] - [19, 6]
+      test:
+        Num: [19, 3] - [19, 5]
+          n: 10
+          text: '10'
+      body: [
+        Expr: [20, 4] - [20, 21]
+          value:
+            Str: [20, 4] - [20, 21]
+              s: '\\N{AMPERSAND}'
+              prefix: "b'"
+              implicitly_concatenated_parts: None
+      ]
+      orelse: None
+    If: [21, 0] - [21, 6]
+      test:
+        Num: [21, 3] - [21, 5]
+          n: 11
+          text: '11'
+      body: [
+        Expr: [22, 4] - [22, 14]
+          value:
+            Str: [22, 4] - [22, 14]
+              s: '\\Nspam'
+              prefix: "b'"
+              implicitly_concatenated_parts: None
+      ]
+      orelse: None
+    If: [23, 0] - [23, 6]
+      test:
+        Num: [23, 3] - [23, 5]
+          n: 12
+          text: '12'
+      body: [
+        Expr: [24, 4] - [24, 81]
+          value:
+            Str: [24, 4] - [24, 81]
+              s: 'this is not a unicode escape because we are in a bytestring: \\N{AMPERSAND}'
+              prefix: 'b"'
+              implicitly_concatenated_parts: None
+      ]
+      orelse: None
+    If: [25, 0] - [25, 6]
+      test:
+        Num: [25, 3] - [25, 5]
+          n: 13
+          text: '13'
+      body: [
+        Expr: [26, 4] - [26, 55]
+          value:
+            JoinedStr: [26, 4] - [26, 55]
+              values: [
+                Str: [26, 4] - [26, 39]
+                  s: 'quotes before interpolation "'
+                  prefix: 'fr"""'
+                  implicitly_concatenated_parts: None
+                Num: [26, 39] - [26, 40]
+                  n: 0
+                  text: '0'
+                Str: [26, 40] - [26, 55]
+                  s: '" are okay.'
+                  prefix: 'fr"""'
+                  implicitly_concatenated_parts: None
+              ]
+      ]
+      orelse: None
+    If: [27, 0] - [27, 6]
+      test:
+        Num: [27, 3] - [27, 5]
+          n: 14
+          text: '14'
+      body: [
+        Expr: [28, 4] - [28, 61]
+          value:
+            JoinedStr: [28, 4] - [28, 61]
+              values: [
+                Str: [28, 4] - [28, 45]
+                  s: 'backslash before an interpolation \\'
+                  prefix: 'fr"""'
+                  implicitly_concatenated_parts: None
+                Num: [28, 45] - [28, 46]
+                  n: 1
+                  text: '1'
+                Str: [28, 46] - [28, 61]
+                  s: '\\ are okay.'
+                  prefix: 'fr"""'
+                  implicitly_concatenated_parts: None
+              ]
+      ]
+      orelse: None
+    If: [29, 0] - [29, 6]
+      test:
+        Num: [29, 3] - [29, 5]
+          n: 15
+          text: '15'
+      body: [
+        Expr: [30, 4] - [30, 54]
+          value:
+            JoinedStr: [30, 4] - [30, 54]
+              values: [
+                Str: [30, 4] - [30, 33]
+                  s: 'Yield inside an f-string: '
+                  prefix: 'f"'
+                  implicitly_concatenated_parts: None
+                Yield: [30, 33] - [30, 40]
+                  value:
+                    Num: [30, 39] - [30, 40]
+                      n: 5
+                      text: '5'
+                Str: [30, 40] - [30, 54]
+                  s: ' is allowed.'
+                  prefix: 'f"'
+                  implicitly_concatenated_parts: None
+              ]
+      ]
+      orelse: None
+  ]
--- a/python/extractor/tests/parser/strings_new.py
+++ b/python/extractor/tests/parser/strings_new.py
@@ -0,0 +1,30 @@
+if 1:
+    fr"this is not a unicode escape but an interpolation: \N{AMPERSAND}"
+if 2:
+    f'also an interpolation: \\N{AMPERSAND}'
+if 3:
+    f'\\Nspam'
+if 4:
+    f"this is a unicode escape: \N{AMPERSAND}"
+if 5:
+    r"this is also not a unicode escape: \N{AMPERSAND}"
+if 6:
+    '\\N{AMPERSAND}'
+if 7:
+    '\\Nspam'
+if 8:
+    "this is also also a unicode escape: \N{AMPERSAND}"
+if 9:
+    rb"this is also not a unicode escape: \N{AMPERSAND}"
+if 10:
+    b'\\N{AMPERSAND}'
+if 11:
+    b'\\Nspam'
+if 12:
+    b"this is not a unicode escape because we are in a bytestring: \N{AMPERSAND}"
+if 13:
+    fr"""quotes before interpolation "{0}" are okay."""
+if 14:
+    fr"""backslash before an interpolation \{1}\ are okay."""
+if 15:
+    f"Yield inside an f-string: {yield 5} is allowed."
--- a/python/extractor/tests/parser/types_new.expected
+++ b/python/extractor/tests/parser/types_new.expected
@@ -0,0 +1,142 @@
+Module: [1, 0] - [6, 0]
+  body: [
+    TypeAlias: [1, 0] - [1, 34]
+      name:
+        Name: [1, 5] - [1, 6]
+          variable: Variable('T', None)
+          ctx: Store
+      type_parameters: [
+        TypeVar: [1, 7] - [1, 9]
+          name:
+            Name: [1, 7] - [1, 9]
+              variable: Variable('T1', None)
+              ctx: Store
+          bound: None
+        TypeVar: [1, 11] - [1, 17]
+          name:
+            Name: [1, 11] - [1, 13]
+              variable: Variable('T2', None)
+              ctx: Store
+          bound:
+            Name: [1, 15] - [1, 17]
+              variable: Variable('E1', None)
+              ctx: Load
+        TypeVarTuple: [1, 19] - [1, 22]
+          name:
+            Name: [1, 20] - [1, 22]
+              variable: Variable('T3', None)
+              ctx: Store
+        ParamSpec: [1, 24] - [1, 28]
+          name:
+            Name: [1, 26] - [1, 28]
+              variable: Variable('T4', None)
+              ctx: Store
+      ]
+      value:
+        Name: [1, 32] - [1, 34]
+          variable: Variable('T5', None)
+          ctx: Load
+    Assign: [3, 0] - [3, 31]
+      targets: [
+        Name: [3, 4] - [3, 5]
+          variable: Variable('f', None)
+          ctx: Store
+      ]
+      value:
+        FunctionExpr: [3, 0] - [3, 31]
+          name: 'f'
+          args:
+            arguments
+              defaults: []
+              kw_defaults: []
+              annotations: []
+              varargannotation: None
+              kwargannotation: None
+              kw_annotations: []
+          returns: None
+          inner_scope:
+            Function: [3, 0] - [3, 31]
+              name: 'f'
+              type_parameters: [
+                TypeVar: [3, 6] - [3, 8]
+                  name:
+                    Name: [3, 6] - [3, 8]
+                      variable: Variable('T6', None)
+                      ctx: Store
+                  bound: None
+                TypeVar: [3, 10] - [3, 16]
+                  name:
+                    Name: [3, 10] - [3, 12]
+                      variable: Variable('T7', None)
+                      ctx: Store
+                  bound:
+                    Name: [3, 14] - [3, 16]
+                      variable: Variable('E2', None)
+                      ctx: Load
+                TypeVarTuple: [3, 18] - [3, 21]
+                  name:
+                    Name: [3, 19] - [3, 21]
+                      variable: Variable('T8', None)
+                      ctx: Store
+                ParamSpec: [3, 23] - [3, 27]
+                  name:
+                    Name: [3, 25] - [3, 27]
+                      variable: Variable('T9', None)
+                      ctx: Store
+              ]
+              args: []
+              vararg: None
+              kwonlyargs: []
+              kwarg: None
+              body: [
+                Expr: [3, 32] - [3, 35]
+                  value:
+                    Ellipsis: [3, 32] - [3, 35]
+              ]
+    Assign: [5, 0] - [5, 35]
+      targets: [
+        Name: [5, 6] - [5, 7]
+          variable: Variable('C', None)
+          ctx: Store
+      ]
+      value:
+        ClassExpr: [5, 0] - [5, 35]
+          name: 'C'
+          type_parameters: [
+            TypeVar: [5, 8] - [5, 11]
+              name:
+                Name: [5, 8] - [5, 11]
+                  variable: Variable('T10', None)
+                  ctx: Store
+              bound: None
+            TypeVar: [5, 13] - [5, 20]
+              name:
+                Name: [5, 13] - [5, 16]
+                  variable: Variable('T11', None)
+                  ctx: Store
+              bound:
+                Name: [5, 18] - [5, 20]
+                  variable: Variable('E3', None)
+                  ctx: Load
+            TypeVarTuple: [5, 22] - [5, 26]
+              name:
+                Name: [5, 23] - [5, 26]
+                  variable: Variable('T12', None)
+                  ctx: Store
+            ParamSpec: [5, 28] - [5, 33]
+              name:
+                Name: [5, 30] - [5, 33]
+                  variable: Variable('T13', None)
+                  ctx: Store
+          ]
+          bases: []
+          keywords: []
+          inner_scope:
+            Class: [5, 0] - [5, 35]
+              name: 'C'
+              body: [
+                Expr: [5, 36] - [5, 39]
+                  value:
+                    Ellipsis: [5, 36] - [5, 39]
+              ]
+  ]
--- a/python/extractor/tests/parser/types_new.py
+++ b/python/extractor/tests/parser/types_new.py
@@ -0,0 +1,5 @@
+type T[T1, T2: E1, *T3, **T4] = T5
+
+def f[T6, T7: E2, *T8, **T9](): ...
+
+class C[T10, T11: E3, *T12, **T13]: ...
--- a/python/extractor/tests/parser/while.py
+++ b/python/extractor/tests/parser/while.py
@@ -0,0 +1,6 @@
+while a:
+    b
+    c
+else:
+    d
+    e
--- a/python/extractor/tests/parser/with.py
+++ b/python/extractor/tests/parser/with.py
@@ -0,0 +1,9 @@
+with a as b:
+    c
+    d
+
+with f as g, h as i:
+    j
+
+with k, l:
+    m
--- a/python/extractor/tests/project_layout/project-layout
+++ b/python/extractor/tests/project_layout/project-layout
@@ -0,0 +1,2 @@
+#/target
+**//src
--- a/python/extractor/tests/project_layout/src/mod1.py
+++ b/python/extractor/tests/project_layout/src/mod1.py
--- a/python/extractor/tests/source_archive_unchanged/src/no_newline.py
+++ b/python/extractor/tests/source_archive_unchanged/src/no_newline.py
@@ -0,0 +1 @@
+print("Hello world! This line of code has no newline at the end.")
--- a/python/extractor/tests/source_archive_unchanged/src/weird_bytes.py
+++ b/python/extractor/tests/source_archive_unchanged/src/weird_bytes.py
@@ -0,0 +1,2 @@
+print("This line of code ends with a non-standard newline:")
+print("This string contains weird bytes: a<>b")
--- a/python/extractor/tests/syntax-error/error.py
+++ b/python/extractor/tests/syntax-error/error.py
@@ -0,0 +1 @@
+This is a syntax error!
--- a/python/extractor/tests/test_concurrent_cache.py
+++ b/python/extractor/tests/test_concurrent_cache.py
@@ -0,0 +1,95 @@
+
+import sys
+import os.path
+import unittest
+import multiprocessing
+
+import semmle
+from tests import test_utils
+from semmle.util import makedirs
+
+
+ITERATIONS = 100
+CONCURRENCY = 20
+
+class ConcurrentCacheTest(test_utils.ExtractorTest):
+    '''
+    Test the cache under heavy concurrent load.
+    '''
+
+    def __init__(self, name):
+        super(ConcurrentCacheTest, self).__init__(name)
+        self.cachedir = os.path.abspath(os.path.join(self.here, "cache"))
+
+    def setUp(self):
+        super(ConcurrentCacheTest, self).setUp()
+        makedirs(self.cachedir)
+        self.cache = semmle.cache.Cache(self.cachedir)
+
+    def tearDown(self):
+        super(ConcurrentCacheTest, self).tearDown()
+
+    def _concurrent_read_and_write(self):
+        readers = []
+        writers = []
+        queue = multiprocessing.Queue(CONCURRENCY+1)
+        for i in range(CONCURRENCY):
+            readers.append(multiprocessing.Process(target=read_func, args=(self.cache, queue)))
+            writers.append(multiprocessing.Process(target=write_func, args=(self.cache, ITERATIONS//4)))
+        for read, write in zip(readers, writers):
+            read.start()
+            write.start()
+        for proc in writers:
+            proc.join()
+        for proc in readers:
+            proc.join()
+        successes = [ queue.get(False) for i in range(CONCURRENCY) ]
+        self.assertNotIn(None, successes)
+        # We expect a fairly low success rate here
+        # But want to assert that at least one read succeeded.
+        self.assertGreater(sum(successes), 0)
+
+    def _concurrent_read_ok(self):
+        readers = []
+        queue = multiprocessing.Queue(CONCURRENCY+1)
+        for i in range(CONCURRENCY):
+            readers.append(multiprocessing.Process(target=read_func, args=(self.cache, queue)))
+        for proc in readers:
+            proc.start()
+        for proc in readers:
+            proc.join()
+        successes = [ queue.get(False) for i in range(CONCURRENCY) ]
+        self.assertNotIn(None, successes)
+        self.assertEqual(sum(successes), 2*CONCURRENCY*ITERATIONS)
+
+    def test(self):
+        #Must run this first as it populates the cache
+        self._concurrent_read_and_write()
+        #Then this tests that the cache is correctly populated.
+        self._concurrent_read_ok()
+
+def key(i):
+    return "key%d" % i
+
+def value(i):
+    return ("value%d\n" % i).encode("utf-8")*10000
+
+def read_func(cache, queue):
+    successes = 0
+    for i in range(ITERATIONS):
+        val = cache.get(key(i))
+        if val is not None:
+            successes += 1
+            assert val == value(i)
+    for i in range(ITERATIONS):
+        val = cache.get(key(i))
+        if val is not None:
+            successes += 1
+            assert val == value(i)
+    queue.put(successes)
+
+def write_func(cache, offset):
+    for i in range(offset, ITERATIONS):
+        cache.set(key(i), value(i))
+    for i in range(offset-1, -1, -1):
+        cache.set(key(i), value(i))
--- a/python/extractor/tests/test_config1/setup.py
+++ b/python/extractor/tests/test_config1/setup.py
@@ -0,0 +1,7 @@
+
+classifiers = [
+    'License :: OSI Approved :: MIT License',
+    'Programming Language :: Java :: 7',
+    'Programming Language :: Python',
+    'Programming Language :: Python :: 2.7',
+]
--- a/python/extractor/tests/test_config2/setup.py
+++ b/python/extractor/tests/test_config2/setup.py
@@ -0,0 +1,13 @@
+
+classifiers = [
+    'Development Status :: 2 - Pre-Alpha',
+    'Environment :: Console',
+    'Intended Audience :: Developers',
+    'License :: OSI Approved :: MIT License',
+    'Operating System :: POSIX',
+    'Programming Language :: Python :: 3.2',
+    'Programming Language :: Python :: 3.3',
+    'Programming Language :: Python :: 3.4',
+    'Programming Language :: Python :: 3.5',
+    'Topic :: Software Development',
+]
--- a/python/extractor/tests/test_dot_py.py
+++ b/python/extractor/tests/test_dot_py.py
@@ -0,0 +1,18 @@
+
+import sys
+import os.path
+import shutil
+import unittest
+
+import semmle.populator
+from tests import test_utils
+
+class DotPyPathTest(test_utils.ExtractorTest):
+
+    def __init__(self, name):
+      super(DotPyPathTest, self).__init__(name)
+
+    def test_dot_py(self):
+        dot_py = os.path.abspath(os.path.join(self.here, "dot-py"))
+        self.run_extractor("-R", dot_py, "-p", dot_py)
+        self.check_only_traps_exists_and_clear('__init__', 'a')
--- a/python/extractor/tests/test_exclude.py
+++ b/python/extractor/tests/test_exclude.py
@@ -0,0 +1,25 @@
+
+import sys
+import os.path
+import shutil
+import unittest
+
+import semmle.populator
+from tests import test_utils
+
+class ExtractorExcludeTest(test_utils.ExtractorTest):
+
+    def __init__(self, name):
+        super(ExtractorExcludeTest, self).__init__(name)
+
+    def test_simple_exclude(self):
+        self.run_extractor("-y", "package.sub", "mod1", "package.x", "package.sub.a")
+        self.check_only_traps_exists_and_clear("mod1", "package/", "x")
+
+    def test_simple_exclude_pattern(self):
+        self.run_extractor("--exclude-pattern", ".*(a|x)", "mod1", "package.x", "package.sub.a", "package.sub.b")
+        self.check_only_traps_exists_and_clear("mod1", "b", "package/", "sub/")
+
+    def test_multiple_exclude(self):
+        self.run_extractor("-y", "package.sub.x", "mod1", "-y", "package.sub.y", "package.sub.a")
+        self.check_only_traps_exists_and_clear("mod1", "package/", "sub/", "a")
--- a/python/extractor/tests/test_file.py
+++ b/python/extractor/tests/test_file.py
@@ -0,0 +1,30 @@
+
+import sys
+import os.path
+import shutil
+import unittest
+import subprocess
+
+import semmle.populator
+from tests import test_utils
+
+class FileOptionTest(test_utils.ExtractorTest):
+
+    def __init__(self, name):
+        super(FileOptionTest, self).__init__(name)
+
+    def test_file(self):
+        self.run_extractor("-F", "tests/data/mod1.py")
+        self.check_only_traps_exists_and_clear("mod1")
+
+    def test_no_file(self):
+        try:
+            self.run_extractor("-F", "this-file-does-not-exist.py")
+        except subprocess.CalledProcessError as ex:
+            self.assertEqual(ex.returncode, 1)
+
+    def test_no_module(self):
+        try:
+            self.run_extractor("this_module_does_not_exist")
+        except subprocess.CalledProcessError as ex:
+            self.assertEqual(ex.returncode, 1)
--- a/python/extractor/tests/test_import_restrict.py
+++ b/python/extractor/tests/test_import_restrict.py
@@ -0,0 +1,30 @@
+
+import sys
+import os.path
+import shutil
+import unittest
+
+import semmle.populator
+from tests import test_utils
+
+class ExtractorImportRestrictTest(test_utils.ExtractorTest):
+
+    def __init__(self, name):
+      super(ExtractorImportRestrictTest, self).__init__(name)
+      self.module_path = os.path.abspath(os.path.join(self.here, "data-imports"))
+
+    def test_import_unrestricted(self):
+        self.run_extractor("mod1")
+        self.check_only_traps_exists_and_clear("mod1", "mod2", "mod3", "mod4", "mod5")
+
+    def test_import_unrestricted_2(self):
+        self.run_extractor("mod2")
+        self.check_only_traps_exists_and_clear("mod2", "mod3", "mod4", "mod5")
+
+    def test_import_depth(self):
+        self.run_extractor("--max-import-depth", "1", "mod1")
+        self.check_only_traps_exists_and_clear("mod1", "mod2")
+
+    def test_import_depth_2(self):
+        self.run_extractor("--max-import-depth", "2", "mod1")
+        self.check_only_traps_exists_and_clear("mod1", "mod2", "mod3", "mod4")
--- a/python/extractor/tests/test_io_error.py
+++ b/python/extractor/tests/test_io_error.py
@@ -0,0 +1,45 @@
+
+import sys
+import os.path
+import shutil
+import unittest
+from contextlib import contextmanager
+
+import semmle.populator
+from tests import test_utils
+import subprocess
+if sys.version_info < (3,0):
+    from StringIO import StringIO
+else:
+    from io import StringIO
+
+ALL_ACCESS = int("777", base=8)
+
+
+@contextmanager
+def discard_output():
+    new_out, new_err = StringIO(), StringIO()
+    old_out, old_err = sys.stdout, sys.stderr
+    try:
+        sys.stdout, sys.stderr = new_out, new_err
+        yield
+    finally:
+        sys.stdout, sys.stderr = old_out, old_err
+
+class SingleThreadedTest(test_utils.ExtractorTest):
+
+    def __init__(self, name):
+        super(SingleThreadedTest, self).__init__(name)
+
+    def test_ioerror(self):
+        if os.name == "nt":
+            return
+        try:
+            os.chmod(self.trap_path, 0)
+            with discard_output():
+                try:
+                    self.run_extractor("-z1", "-y", "package.sub", "mod1", "package.x", "package.sub.a")
+                except subprocess.CalledProcessError as ex:
+                    self.assertEqual(ex.returncode, 1)
+        finally:
+            os.chmod(self.trap_path, ALL_ACCESS)
--- a/python/extractor/tests/test_lgtm_relative_path.py
+++ b/python/extractor/tests/test_lgtm_relative_path.py
@@ -0,0 +1,14 @@
+import os
+
+from tests import test_utils
+
+class ExtractorPatternsTest(test_utils.ExtractorTest):
+
+    def __init__(self, name):
+        super(ExtractorPatternsTest, self).__init__(name)
+
+    def test(self):
+        src = os.path.join(self.here, "lgtm_src")
+        with test_utils.environment("LGTM_SRC", src):
+            self.run_extractor("-R", src, "--filter", "exclude:*.py",  "--filter", "include:x.py")
+            self.check_only_traps_exists_and_clear("x")
--- a/python/extractor/tests/test_off_path.py
+++ b/python/extractor/tests/test_off_path.py
@@ -0,0 +1,18 @@
+
+import sys
+import os.path
+import shutil
+import unittest
+
+import semmle.populator
+from tests import test_utils
+
+class ExtractorOffPathTest(test_utils.ExtractorTest):
+
+    def __init__(self, name):
+      super(ExtractorOffPathTest, self).__init__(name)
+
+    def test_off_path(self):
+        off_path = os.path.abspath(os.path.join(self.here, "off-path"))
+        self.run_extractor("-R", off_path)
+        self.check_only_traps_exists_and_clear("nameless", "mod1", "mod2")
--- a/python/extractor/tests/test_omit_syntax_error.py
+++ b/python/extractor/tests/test_omit_syntax_error.py
@@ -0,0 +1,22 @@
+
+import sys
+import os.path
+import shutil
+import unittest
+
+import semmle.populator
+from tests import test_utils
+
+class OmitSyntaxErrorTest(test_utils.ExtractorTest):
+
+    def __init__(self, name):
+        super(OmitSyntaxErrorTest, self).__init__(name)
+        self.module_path = os.path.abspath(os.path.join(self.here, "syntax-error"))
+
+    def test_omit(self):
+        self.run_extractor("--omit-syntax-error", "error")
+        self.check_only_traps_exists_and_clear()
+
+    def test_dont_omit(self):
+        self.run_extractor("error")
+        self.check_only_traps_exists_and_clear("error", "error")
--- a/python/extractor/tests/test_parser.py
+++ b/python/extractor/tests/test_parser.py
@@ -0,0 +1,107 @@
+import sys
+import os.path
+import shutil
+import unittest
+import pytest
+import warnings
+
+from tests import test_utils
+from semmle.python.parser.dump_ast import old_parser, AstDumper, StdoutLogger
+from semmle.python.parser.tsg_parser import parse as new_parser
+import subprocess
+
+class ParserTest(unittest.TestCase):
+    def __init__(self, name):
+        super(ParserTest, self).__init__(name)
+        self.test_folder = os.path.join(os.path.dirname(__file__), "parser")
+        self.maxDiff = None
+
+
+    @pytest.fixture(autouse=True)
+    def capsys(self, capsys):
+        self.capsys = capsys
+
+    def compare_parses(self, filename, logger):
+        pyfile = os.path.join(self.test_folder, filename)
+        stem = filename[:-3]
+        oldfile = os.path.join(self.test_folder, stem + ".old")
+        newfile = os.path.join(self.test_folder, stem + ".new")
+        old_error = False
+        new_error = False
+        try:
+            old_ast = old_parser(pyfile, logger)
+            with open(oldfile, "w") as old:
+                AstDumper(old).visit(old_ast)
+        except SyntaxError:
+            old_error = True
+        try:
+            new_ast = new_parser(pyfile, logger)
+            with open(newfile, "w") as new:
+                AstDumper(new).visit(new_ast)
+        except SyntaxError:
+            new_error = True
+
+        if old_error or new_error:
+            raise Exception("Parser error: old_error={}, new_error={}".format(old_error, new_error))
+        try:
+            diff = subprocess.check_output(["git", "diff", "--patience", "--no-index", oldfile, newfile])
+        except subprocess.CalledProcessError as e:
+            diff = e.output
+        if diff:
+            pytest.fail(diff.decode("utf-8"))
+        self.assertEqual(self.capsys.readouterr().err, "")
+        os.remove(oldfile)
+        os.remove(newfile)
+
+    def compare_expected(self, filename, logger, new=True ):
+        if sys.version_info.major < 3:
+            return
+        pyfile = os.path.join(self.test_folder, filename)
+        stem = filename[:-3]
+        expected = os.path.join(self.test_folder, stem + ".expected")
+        actual = os.path.join(self.test_folder, stem + ".actual")
+        parser = new_parser if new else old_parser
+        with warnings.catch_warnings():
+            # The test case `b"this is not a unicode escape because we are in a
+            # bytestring: \N{AMPERSAND}"`` in strings_new.py gives a DeprecationWarning,
+            # however we are actually testing the parser behavior on such bad code, so
+            # we can't just "fix" the code. You would think we could use the Python
+            # warning filter to ignore this specific warning, but that doesn't work --
+            # furthermore, using `error::DeprecationWarning` makes the *output* of the
+            # test change :O
+            #
+            # This was the best solution I could come up with that _both_ allows pytest
+            # to error on normal deprecation warnings, but also allows this one case to
+            # exist.
+            if filename == "strings_new.py":
+                warnings.simplefilter("ignore", DeprecationWarning)
+            ast = parser(pyfile, logger)
+        with open(actual, "w") as actual_file:
+            AstDumper(actual_file).visit(ast)
+        try:
+            diff = subprocess.check_output(["git", "diff", "--patience", "--no-index", expected, actual])
+        except subprocess.CalledProcessError as e:
+            diff = e.output
+        if diff:
+            pytest.fail(diff.decode("utf-8"))
+        self.assertEqual(self.capsys.readouterr().err, "")
+        os.remove(actual)
+
+
+def setup_tests():
+    test_folder = os.path.join(os.path.dirname(__file__), "parser")
+    with StdoutLogger() as logger:
+        for file in os.listdir(test_folder):
+            if file.endswith(".py"):
+                stem = file[:-3]
+                test_name = "test_" + stem
+                if stem.endswith("_new"):
+                    test_func = lambda self, file=file: self.compare_expected(file, logger, new=True)
+                elif stem.endswith("_old"):
+                    test_func = lambda self, file=file: self.compare_expected(file, logger, new=False)
+                else:
+                    test_func = lambda self, file=file: self.compare_parses(file, logger)
+                setattr(ParserTest, test_name, test_func)
+
+setup_tests()
+del setup_tests
--- a/python/extractor/tests/test_patterns.py
+++ b/python/extractor/tests/test_patterns.py
@@ -0,0 +1,27 @@
+import os
+import json
+import subprocess
+
+import semmle.path_filters
+from tests import test_utils
+
+class ExtractorPatternsTest(test_utils.ExtractorTest):
+
+    def __init__(self, name):
+        super(ExtractorPatternsTest, self).__init__(name)
+
+    def test(self):
+        repo_dir = subprocess.Popen(["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE).communicate()[0].rstrip().decode("utf-8")
+        test_file_path = os.path.abspath(os.path.join(repo_dir, "unit-tests", "files", "pattern-matching", "patterns.json"))
+        with open(test_file_path) as test_file:
+            test_patterns = json.load(test_file)
+        for test_pattern in test_patterns:
+            pattern = test_pattern["pattern"]
+            regex = semmle.path_filters.glob_to_regex(pattern)
+            for matching_path in test_pattern["should_match"]:
+                self.assertTrue(regex.match(matching_path), "Pattern \"%s\" did not match path \"%s\"." % (pattern, matching_path))
+            for matching_path in test_pattern["shouldnt_match"]:
+                self.assertFalse(regex.match(matching_path), "Pattern \"%s\" matched path \"%s\"." % (pattern, matching_path))
+
+    def test_escape_prefix(self):
+        semmle.path_filters.glob_to_regex("x", prefix="foo\\")
--- a/python/extractor/tests/test_projectlayout.py
+++ b/python/extractor/tests/test_projectlayout.py
@@ -0,0 +1,133 @@
+#
+# This is a port of com.semmle.extractor.projectstructure.ProjectLayoutTests
+# and must be kept in sync
+#
+
+from semmle.projectlayout import ProjectLayout
+import unittest
+
+PROJECT_LAYOUT = ProjectLayout(u"""
+@Example
+
+/this/path/will/remain
+-this/path/will/not
+/and/look//this/path/is/ok
+
+#Source
+/src//
+-/src/tests
+
+#Tests
+/src/tests//
+
+#Generated
+/gen
+/gen2//gen
+
+#Misc
+misc//
+othermisc
+//thirdmisc
+
+#ExecutionOrder
+ex/order
+-ex/order/tests/a
+ex/order/tests
+/src/tests//testA.c
+#Patterns
+**/*.included
+**/inc
+-**/exc
+my
+-my/excluded/**/files
+my/**//files/**/a
+//**/weird""".split('\n'))
+
+MINIMAL_LAYOUT = ProjectLayout(u"""
+/included/path
+- excluded/path""".split('\n'))
+
+CS_LAYOUT = ProjectLayout(u"""
+#Production code
+/
+-**/src.test
+
+#Testing code
+**/src.test""".split('\n'))
+
+def map(path):
+    return PROJECT_LAYOUT.artificial_path(path)
+
+class ProjectLayoutTests(unittest.TestCase):
+    def test_advanced_patterns(self):
+        self.assertEqual(u"/Patterns/firstPattern.included", map(u"/firstPattern.included"))
+        self.assertEqual(u"/Patterns/P1/P2/a.included", map(u"/P1/P2/a.included"))
+        self.assertEqual(u"/Patterns/P3/P4/inc", map(u"/P3/P4/inc"))
+        self.assertEqual(u"/Patterns/P4/P5/inc/a.c", map(u"/P4/P5/inc/a.c"))
+        assert map(u"/P3/P4/inc/exc") is None
+        assert map(u"/P3/P4/inc/exc/a/b.c") is None
+        self.assertEqual(u"/Patterns/my/code", map(u"/my/code"))
+        assert map("u/my/excluded/but/very/interesting/files/a.c") is None
+        self.assertEqual(u"/Patterns/files/a/b.c", map(u"/my/excluded/but/very/interesting/files/a/b.c"))
+        self.assertEqual(u"/Patterns/P5/P6/weird", map(u"/P5/P6/weird"))
+
+    def test_non_virtual_path(self):
+        self.assertEqual(u"/this/path/will/remain/the-same.c", map(u"/this/path/will/remain/the-same.c"))
+        assert map(u"/this/path/will/not/be/included.c") is None
+        self.assertEqual(u"/this/path/is/ok/to-use.c", map(u"/and/look/this/path/is/ok/to-use.c"))
+
+    def test_ignore_unmentioned_paths(self):
+        assert map(u"/lib/foo.c") is None
+
+    def test_do_not_match_partial_names(self):
+        assert map(u"/gen2/foo.c") is None
+        assert map(u"/src2/foo.c") is None
+
+    def test_simple_mapping(self):
+        self.assertEqual(u"/Source/foo.c", map(u"/src/foo.c"))
+
+    def test_match_in_sequence(self):
+        self.assertEqual(u"/ExecutionOrder/ex/order/tests/a", map("/ex/order/tests/a"))
+        self.assertEqual(u"/Tests/testA.c", map(u"/src/tests/testA.c"))
+
+    def test_excluded_and_included(self):
+        self.assertEqual(u"/Tests/test.c", map("/src/tests/test.c"))
+
+    def test_without_double_slashes(self):
+        self.assertEqual(u"/Generated/gen/gen.c", map("/gen/gen.c"))
+
+    def test_middle_double_slash(self):
+        self.assertEqual(u"/Generated/gen/gen.c", map("/gen2/gen/gen.c"))
+
+    def test_initial_double_slash(self):
+        self.assertEqual(u"/Misc/thirdmisc/misc.c", map("/thirdmisc/misc.c"))
+
+    def test_map_directories(self):
+        self.assertEqual(u"/Generated/gen", map("/gen"))
+        self.assertEqual(u"/Generated/gen/", map("/gen/"))
+        self.assertEqual(u"/Source", map("/src"))
+        self.assertEqual(u"/Misc/thirdmisc", map("/thirdmisc"))
+
+    def test_missing_initial_slash(self):
+        self.assertEqual(u"/Misc", map("/misc"))
+        self.assertEqual(u"/Misc/othermisc", map("/othermisc"))
+
+    def test_minimal_layout(self):
+        self.assertEqual(u"/included/path/foo.c", MINIMAL_LAYOUT.artificial_path("/included/path/foo.c"))
+        assert MINIMAL_LAYOUT.artificial_path(u"/excluded/path/name") is None
+
+    def test_project_names(self):
+        self.assertEqual(u"Example", PROJECT_LAYOUT.project_name())
+        self.assertEqual(u"Example", PROJECT_LAYOUT.project_name("Something else"))
+        self.assertRaises(Exception, lambda: MINIMAL_LAYOUT.project_name())
+        self.assertEqual(u"My project", MINIMAL_LAYOUT.project_name("My project"))
+
+    def test_cs(self):
+        self.assertEqual(u"/Production code", CS_LAYOUT.artificial_path(u""))
+        self.assertEqual(u"/Production code/", CS_LAYOUT.artificial_path(u"/"));
+        self.assertEqual(u"/Production code/AppAuth/ear/App/src", CS_LAYOUT.artificial_path(u"/AppAuth/ear/App/src"));
+        self.assertEqual(u"/Testing code/BUILD/bun/BUILD/src.test", CS_LAYOUT.artificial_path(u"/BUILD/bun/BUILD/src.test"));
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/python/extractor/tests/test_python_sanity.py
+++ b/python/extractor/tests/test_python_sanity.py
@@ -0,0 +1,23 @@
+import sys
+import unittest
+
+
+class PythonSanityTest(unittest.TestCase):
+    """Tests various implicit assumptions we have about Python behavior.
+
+    This is intended to catch changes that may break extraction in future
+    versions of Python.
+    """
+
+    def __init__(self, name):
+      super(PythonSanityTest, self).__init__(name)
+
+    def test_latin_1_encoding(self):
+        """Tests whether 'latin-1' acts as a "do nothing" encoding."""
+
+        s = bytes(range(256))
+        u = str(s, 'latin-1')
+        s_as_tuple = tuple(s)
+
+        u_as_tuple = tuple(map(ord, u))
+        assert u_as_tuple == s_as_tuple
--- a/python/extractor/tests/test_single.py
+++ b/python/extractor/tests/test_single.py
@@ -0,0 +1,21 @@
+
+import sys
+import os.path
+import shutil
+import unittest
+
+import semmle.populator
+from tests import test_utils
+
+class SingleThreadedTest(test_utils.ExtractorTest):
+
+    def __init__(self, name):
+        super(SingleThreadedTest, self).__init__(name)
+
+    def test_simple(self):
+        self.run_extractor("-z1", "package.sub.a")
+        self.check_only_traps_exists_and_clear("a", "package/", "sub/")
+
+    def test_simple_exclude(self):
+        self.run_extractor("-z1", "-y", "package.sub", "mod1", "package.x", "package.sub.a")
+        self.check_only_traps_exists_and_clear("mod1", "package/", "x")
--- a/python/extractor/tests/test_source_archive_unchanged.py
+++ b/python/extractor/tests/test_source_archive_unchanged.py
@@ -0,0 +1,27 @@
+import os
+import subprocess
+import filecmp
+
+from tests.test_utils import ExtractorTest, environment
+
+class SourceArchiveUnchangedTest(ExtractorTest):
+    """Checks that the files stored in the source archive are exact copies of the originals."""
+
+    def __init__(self, name):
+        super(SourceArchiveUnchangedTest, self).__init__(name)
+        testfiledir = os.path.abspath(os.path.join(self.here, "source_archive_unchanged"))
+        self.src_path = os.path.join(testfiledir, "src")
+        self.src_archive = os.path.join(testfiledir, "src_archive")
+
+    def test_source_archive_unchanged(self):
+        self.run_extractor(
+            "-F", "tests/source_archive_unchanged/src/weird_bytes.py",
+            "-F", "tests/source_archive_unchanged/src/no_newline.py"
+        )
+        source_archive_location = os.path.join(self.src_archive, os.path.relpath(self.src_path, "/"))
+        for filename in ("weird_bytes.py", "no_newline.py"):
+            orig = os.path.join(self.src_path, filename)
+            copy = os.path.join(source_archive_location, filename)
+            if not filecmp.cmp(orig, copy):
+                self.fail("The source archive version of the following file has changed: " + copy)
+            self.check_source_exists_and_clear(os.path.join(source_archive_location, filename))
--- a/python/extractor/tests/test_tokenizer.py
+++ b/python/extractor/tests/test_tokenizer.py
@@ -0,0 +1,66 @@
+
+import sys
+import os.path
+import shutil
+import unittest
+
+import semmle.populator
+from tests import test_utils
+from semmle.python.parser import tokenizer
+from blib2to3.pgen2.token import tok_name
+
+def unescape(s):
+    return u"'" + s.replace(u"\\", u"\\\\").replace(u"\n", u"\\n").replace(u"\t", u"\\t").replace(u"\'", u"\\'") + u"'"
+
+
+def format_token(token):
+    type, text, start, end = token
+    # Use Python 3 tokenize style output, regardless of version
+    token_range = u"%d,%d-%d,%d:" % (start + end)
+    return u"%-20s%-15s%s" % (token_range, tok_name[type], unescape(text))
+
+class TokenizerTest(unittest.TestCase):
+
+    def __init__(self, name):
+        super(TokenizerTest, self).__init__(name)
+        self.test_folder = os.path.join(os.path.dirname(__file__), "tokenizer")
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def compare_tokens(self, filename):
+        pyfile = os.path.join(self.test_folder, filename)
+        tokenfile = os.path.join(self.test_folder, filename[:-3]+".tokens")
+        with open(tokenfile, "rb") as tkns:
+            expected = [ line.strip().decode("utf8") for line in tkns if line.strip() ]
+        try:
+            with open(pyfile, "rb") as srcfile:
+                srcbytes = srcfile.read()
+            encoding, srcbytes = tokenizer.encoding_from_source(srcbytes)
+            text = srcbytes.decode(encoding)
+            actual = [format_token(tkn) for tkn in tokenizer.Tokenizer(text).tokens()]
+        except Exception as ex:
+            print(ex)
+            self.fail("Failed to tokenize " + filename)
+        if expected == actual:
+            return
+        actualfile = os.path.join(self.test_folder, filename[:-3]+".actual")
+        with open(actualfile, "wb") as out:
+            for line in actual:
+                out.write(line.encode("utf8"))
+                out.write(b"\n")
+        lineno = 1
+        for expected_tkn, actual_tkn in zip(expected, actual):
+            assert type(expected_tkn) is str
+            assert type(actual_tkn) is str
+            self.assertEqual(expected_tkn, actual_tkn, " at %s:%d" % (filename[:-3]+".tokens", lineno))
+            lineno += 1
+        self.assertTrue(len(expected) == len(actual), "Too few or too many tokens for %s" % filename)
+
+    def test_tokens(self):
+        for file in os.listdir(self.test_folder):
+            if file.endswith(".py"):
+                self.compare_tokens(file)
--- a/python/extractor/tests/test_trap_cache.py
+++ b/python/extractor/tests/test_trap_cache.py
@@ -0,0 +1,39 @@
+
+import sys
+import os.path
+import shutil
+import unittest
+
+import semmle.populator
+from tests import test_utils
+
+class TrapCacheTest(test_utils.ExtractorTest):
+
+    def __init__(self, name):
+        super(TrapCacheTest, self).__init__(name)
+        self.trap_cache = os.path.abspath(os.path.join(self.here, "cache"))
+
+
+    def tearDown(self):
+        super(TrapCacheTest, self).tearDown()
+        shutil.rmtree(self.trap_cache, ignore_errors=True)
+
+    def run_extractor(self, *args):
+        super(TrapCacheTest, self).run_extractor(*(["-c", self.trap_cache] + list(args)))
+
+    def create_trap_cache(self):
+        try:
+            os.makedirs(self.trap_cache)
+        except:
+            if os.path.exists(self.trap_cache):
+                return
+            raise
+
+    def test_pre_created(self):
+        self.create_trap_cache()
+        self.run_extractor("mod1", "package.x", "package.sub.a")
+        self.check_only_traps_exists_and_clear("mod1", "package/", "x", "sub/", "a")
+
+    def test_not_pre_created(self):
+        self.run_extractor("mod1", "package.x", "package.sub.a")
+        self.check_only_traps_exists_and_clear("mod1", "package/", "x", "sub/", "a")
--- a/python/extractor/tests/test_use_projectlayout.py
+++ b/python/extractor/tests/test_use_projectlayout.py
@@ -0,0 +1,27 @@
+import os
+import subprocess
+
+from tests.test_utils import ExtractorTest, environment
+
+class ProjectLayoutUseTest(ExtractorTest):
+
+    def __init__(self, name):
+        super(ProjectLayoutUseTest, self).__init__(name)
+        self.module_path = os.path.abspath(os.path.join(self.here, "project_layout"))
+        self.src_path = os.path.join(self.module_path, "src")
+        self.src_archive = os.path.join(self.module_path, "src_archive")
+
+    def test_layout(self):
+        with environment("SEMMLE_PATH_TRANSFORMER", "tests/project_layout/project-layout"):
+            self.run_extractor("-R", self.src_path)
+        self.check_only_traps_exists_and_clear("mod1")
+        self.check_source_exists_and_clear(os.path.join(self.src_archive, "target", "src", "mod1.py"))
+
+    def test_invalid_layout(self):
+        try:
+            with environment("SEMMLE_PATH_TRANSFORMER", "nonsuch/project-layout"):
+                self.run_extractor("-R", self.src_path)
+        except subprocess.CalledProcessError as ex:
+            self.assertEqual(ex.returncode, 2)
+        else:
+            self.fail("Not cleanly halting on invalid path transformer")
--- a/python/extractor/tests/test_utils.py
+++ b/python/extractor/tests/test_utils.py
@@ -0,0 +1,83 @@
+import os
+import sys
+import semmle
+import unittest
+import shutil
+import re
+from contextlib import contextmanager
+
+import semmle.populator
+import subprocess
+
+BUILTIN_TRAP = "builtins.trap.gz"
+
+PY_PATTERN = re.compile(r"(\w+)\.py.[A-Za-z0-9=_\-]+\.trap\.gz")
+FOLDER_PATTERN = re.compile(r"(\w+).[A-Za-z0-9=_\-]+\.trap\.gz")
+
+
+@contextmanager
+def environment(key, value):
+    os.environ[key] = value
+    try:
+        yield
+    finally:
+        del os.environ[key]
+
+
+class ExtractorTest(unittest.TestCase):
+
+    def __init__(self, name):
+        unittest.TestCase.__init__(self, name)
+        self.here = os.path.dirname(__file__)
+        self.module_path = os.path.abspath(os.path.join(self.here, "data"))
+        self.trap_path = os.path.abspath(os.path.join(self.here, "traps"))
+        self.src_archive = None
+
+    def setUp(self):
+        try:
+            os.makedirs(self.trap_path)
+        except:
+            if os.path.exists(self.trap_path):
+                return
+            raise
+
+    def tearDown(self):
+        shutil.rmtree(self.trap_path, ignore_errors=True)
+
+    def check_only_traps_exists_and_clear(self, *module_names):
+        modules = list(module_names)
+        for filename in os.listdir(self.trap_path):
+            match = PY_PATTERN.match(filename)
+            if match:
+                name = match.group(1)
+            else:
+                match = FOLDER_PATTERN.match(filename)
+                if match:
+                    name = match.group(1) + "/"
+                else:
+                    continue
+            if name in modules:
+                modules.remove(name)
+                path = os.path.join(self.trap_path, filename)
+                os.remove(path)
+        if modules:
+            self.fail("No trap file for " + modules.pop())
+        for _, _, filenames in os.walk(self.trap_path):
+            #Ignore the builtin trap file, any `__init__.py` files, and $file, $interpreter trap files.
+            filenames = [ name for name in filenames if not name.startswith("$") and not name.startswith("__init__.py") and name != BUILTIN_TRAP]
+            self.assertFalse(filenames, "Some trap files remain: " + ", ".join(filenames))
+
+    def check_source_exists_and_clear(self, path):
+        try:
+            os.remove(path)
+        except OSError:
+            self.fail("File '%s' does not exist" % path)
+
+    def run_extractor(self, *args):
+        cmd = [sys.executable, os.path.join(os.path.dirname(self.here), "python_tracer.py"), "--quiet" ] + ["-p", self.module_path, "-o", self.trap_path] + list(args)
+        with environment("CODEQL_EXTRACTOR_PYTHON_ENABLE_PYTHON2_EXTRACTION", "True"):
+            if self.src_archive:
+                with environment("CODEQL_EXTRACTOR_PYTHON_SOURCE_ARCHIVE_DIR", self.src_archive):
+                        subprocess.check_call(cmd)
+            else:
+                subprocess.check_call(cmd)
--- a/python/extractor/tests/tokenizer/basic.py
+++ b/python/extractor/tests/tokenizer/basic.py
@@ -0,0 +1,134 @@
+
+#AST nodes: Classes, Functions, Modules, expr, stmts
+
+class C:
+
+    def stmts(p0, p1):
+        global x
+        assert x == 2
+        y = 3
+        y += 4
+        while True:
+            break
+        while x > 0:
+            x -= 1
+            continue
+
+        f()
+        for x in y:
+            pass
+        if x:
+            print(y)
+        import a
+        import a.b as c
+        import a as b
+        from a.b import c
+
+
+        with open("file") as f:
+            pass
+        try:
+            1/0
+        except Exception as ex:
+            del y
+        finally:
+            del x
+        if x:
+            raise Exception()
+        else:
+            return
+
+    def exprs(p2, p3):
+        p2.x = 2
+        a = p3.y
+        x = 1 + 2
+        y = b'h4tpvhsa'
+        call(arg0, arg1, name0="Hi", name1=y, *(), **{})
+        x < y
+        {1:1, 2: 2}
+
+        x[a, 7]
+        (x for x in y)
+        17 if x < y else 16
+        lambda x : x * y
+        [ 1, 2, a, x.b, p1.c ]
+        [ a + "Hi" for a in str(y) ]
+
+
+
+        #a, *b = y
+        u"Hi"
+        x[0]
+        x[y[0]]
+        (p2, p3, 7)
+
+#Some multiline strings
+'''
+Single quotes string'''
+
+"""
+Double-quotes
+string"""
+
+r'''
+Bytes
+'''
+
+U"""
+Raw
+Unicode
+"""
+
+#Decorated function
+@deco
+def f():
+    pass
+
+#Inner function (see ODASA-1774)
+def outer():
+    def inner():
+        pass
+
+#Oddly laid out comprehension
+[[
+  x for x in y
+  ]
+  
+  for a in b
+]
+
+#Nested binary operations
+"Hello" + " " + "world"
+1+2+f()
+1+(2+3)
+
+# operations
+a|b&c+d-e
+x*f%g^h@j**k
+
+#Augmented assigns
+a @= b
+a |= b
+a *= b
+
+~a
+
+#Comparisons
+<
+>
+<=
+>=
+!=
+==
+is
+is not
+
+("""
+""")
+del x
+
+`backticks`
+
+x := y
+
+1 <> 2
--- a/python/extractor/tests/tokenizer/basic.tokens
+++ b/python/extractor/tests/tokenizer/basic.tokens
@@ -0,0 +1,472 @@
+2,0-2,52:           COMMENT        '#AST nodes: Classes, Functions, Modules, expr, stmts'
+4,0-4,5:            NAME           'class'
+4,6-4,7:            NAME           'C'
+4,7-4,8:            COLON          ':'
+4,8-4,9:            NEWLINE        '\n'
+6,0-6,4:            INDENT         '    '
+6,4-6,7:            NAME           'def'
+6,8-6,13:           NAME           'stmts'
+6,13-6,14:          LPAR           '('
+6,14-6,16:          NAME           'p0'
+6,16-6,17:          COMMA          ','
+6,18-6,20:          NAME           'p1'
+6,20-6,21:          RPAR           ')'
+6,21-6,22:          COLON          ':'
+6,22-6,23:          NEWLINE        '\n'
+7,0-7,8:            INDENT         '        '
+7,8-7,14:           NAME           'global'
+7,15-7,16:          NAME           'x'
+7,16-7,17:          NEWLINE        '\n'
+8,8-8,14:           NAME           'assert'
+8,15-8,16:          NAME           'x'
+8,17-8,19:          OP             '=='
+8,20-8,21:          NUMBER         '2'
+8,21-8,22:          NEWLINE        '\n'
+9,8-9,9:            NAME           'y'
+9,10-9,11:          OP             '='
+9,12-9,13:          NUMBER         '3'
+9,13-9,14:          NEWLINE        '\n'
+10,8-10,9:          NAME           'y'
+10,10-10,12:        OP             '+='
+10,13-10,14:        NUMBER         '4'
+10,14-10,15:        NEWLINE        '\n'
+11,8-11,13:         NAME           'while'
+11,14-11,18:        NAME           'True'
+11,18-11,19:        COLON          ':'
+11,19-11,20:        NEWLINE        '\n'
+12,0-12,12:         INDENT         '            '
+12,12-12,17:        NAME           'break'
+12,17-12,18:        NEWLINE        '\n'
+13,8-13,8:          DEDENT         ''
+13,8-13,13:         NAME           'while'
+13,14-13,15:        NAME           'x'
+13,16-13,17:        OP             '>'
+13,18-13,19:        NUMBER         '0'
+13,19-13,20:        COLON          ':'
+13,20-13,21:        NEWLINE        '\n'
+14,0-14,12:         INDENT         '            '
+14,12-14,13:        NAME           'x'
+14,14-14,16:        OP             '-='
+14,17-14,18:        NUMBER         '1'
+14,18-14,19:        NEWLINE        '\n'
+15,12-15,20:        NAME           'continue'
+15,20-15,21:        NEWLINE        '\n'
+17,8-17,8:          DEDENT         ''
+17,8-17,9:          NAME           'f'
+17,9-17,10:         LPAR           '('
+17,10-17,11:        RPAR           ')'
+17,11-17,12:        NEWLINE        '\n'
+18,8-18,11:         NAME           'for'
+18,12-18,13:        NAME           'x'
+18,14-18,16:        NAME           'in'
+18,17-18,18:        NAME           'y'
+18,18-18,19:        COLON          ':'
+18,19-18,20:        NEWLINE        '\n'
+19,0-19,12:         INDENT         '            '
+19,12-19,16:        NAME           'pass'
+19,16-19,17:        NEWLINE        '\n'
+20,8-20,8:          DEDENT         ''
+20,8-20,10:         NAME           'if'
+20,11-20,12:        NAME           'x'
+20,12-20,13:        COLON          ':'
+20,13-20,14:        NEWLINE        '\n'
+21,0-21,12:         INDENT         '            '
+21,12-21,17:        NAME           'print'
+21,17-21,18:        LPAR           '('
+21,18-21,19:        NAME           'y'
+21,19-21,20:        RPAR           ')'
+21,20-21,21:        NEWLINE        '\n'
+22,8-22,8:          DEDENT         ''
+22,8-22,14:         NAME           'import'
+22,15-22,16:        NAME           'a'
+22,16-22,17:        NEWLINE        '\n'
+23,8-23,14:         NAME           'import'
+23,15-23,16:        NAME           'a'
+23,16-23,17:        DOT            '.'
+23,17-23,18:        NAME           'b'
+23,19-23,21:        NAME           'as'
+23,22-23,23:        NAME           'c'
+23,23-23,24:        NEWLINE        '\n'
+24,8-24,14:         NAME           'import'
+24,15-24,16:        NAME           'a'
+24,17-24,19:        NAME           'as'
+24,20-24,21:        NAME           'b'
+24,21-24,22:        NEWLINE        '\n'
+25,8-25,12:         NAME           'from'
+25,13-25,14:        NAME           'a'
+25,14-25,15:        DOT            '.'
+25,15-25,16:        NAME           'b'
+25,17-25,23:        NAME           'import'
+25,24-25,25:        NAME           'c'
+25,25-25,26:        NEWLINE        '\n'
+28,8-28,12:         NAME           'with'
+28,13-28,17:        NAME           'open'
+28,17-28,18:        LPAR           '('
+28,18-28,24:        STRING         '"file"'
+28,24-28,25:        RPAR           ')'
+28,26-28,28:        NAME           'as'
+28,29-28,30:        NAME           'f'
+28,30-28,31:        COLON          ':'
+28,31-28,32:        NEWLINE        '\n'
+29,0-29,12:         INDENT         '            '
+29,12-29,16:        NAME           'pass'
+29,16-29,17:        NEWLINE        '\n'
+30,8-30,8:          DEDENT         ''
+30,8-30,11:         NAME           'try'
+30,11-30,12:        COLON          ':'
+30,12-30,13:        NEWLINE        '\n'
+31,0-31,12:         INDENT         '            '
+31,12-31,13:        NUMBER         '1'
+31,13-31,14:        OP             '/'
+31,14-31,15:        NUMBER         '0'
+31,15-31,16:        NEWLINE        '\n'
+32,8-32,8:          DEDENT         ''
+32,8-32,14:         NAME           'except'
+32,15-32,24:        NAME           'Exception'
+32,25-32,27:        NAME           'as'
+32,28-32,30:        NAME           'ex'
+32,30-32,31:        COLON          ':'
+32,31-32,32:        NEWLINE        '\n'
+33,0-33,12:         INDENT         '            '
+33,12-33,15:        NAME           'del'
+33,16-33,17:        NAME           'y'
+33,17-33,18:        NEWLINE        '\n'
+34,8-34,8:          DEDENT         ''
+34,8-34,15:         NAME           'finally'
+34,15-34,16:        COLON          ':'
+34,16-34,17:        NEWLINE        '\n'
+35,0-35,12:         INDENT         '            '
+35,12-35,15:        NAME           'del'
+35,16-35,17:        NAME           'x'
+35,17-35,18:        NEWLINE        '\n'
+36,8-36,8:          DEDENT         ''
+36,8-36,10:         NAME           'if'
+36,11-36,12:        NAME           'x'
+36,12-36,13:        COLON          ':'
+36,13-36,14:        NEWLINE        '\n'
+37,0-37,12:         INDENT         '            '
+37,12-37,17:        NAME           'raise'
+37,18-37,27:        NAME           'Exception'
+37,27-37,28:        LPAR           '('
+37,28-37,29:        RPAR           ')'
+37,29-37,30:        NEWLINE        '\n'
+38,8-38,8:          DEDENT         ''
+38,8-38,12:         NAME           'else'
+38,12-38,13:        COLON          ':'
+38,13-38,14:        NEWLINE        '\n'
+39,0-39,12:         INDENT         '            '
+39,12-39,18:        NAME           'return'
+39,18-39,19:        NEWLINE        '\n'
+41,4-41,4:          DEDENT         ''
+41,4-41,4:          DEDENT         ''
+41,4-41,7:          NAME           'def'
+41,8-41,13:         NAME           'exprs'
+41,13-41,14:        LPAR           '('
+41,14-41,16:        NAME           'p2'
+41,16-41,17:        COMMA          ','
+41,18-41,20:        NAME           'p3'
+41,20-41,21:        RPAR           ')'
+41,21-41,22:        COLON          ':'
+41,22-41,23:        NEWLINE        '\n'
+42,0-42,8:          INDENT         '        '
+42,8-42,10:         NAME           'p2'
+42,10-42,11:        DOT            '.'
+42,11-42,12:        NAME           'x'
+42,13-42,14:        OP             '='
+42,15-42,16:        NUMBER         '2'
+42,16-42,17:        NEWLINE        '\n'
+43,8-43,9:          NAME           'a'
+43,10-43,11:        OP             '='
+43,12-43,14:        NAME           'p3'
+43,14-43,15:        DOT            '.'
+43,15-43,16:        NAME           'y'
+43,16-43,17:        NEWLINE        '\n'
+44,8-44,9:          NAME           'x'
+44,10-44,11:        OP             '='
+44,12-44,13:        NUMBER         '1'
+44,14-44,15:        OP             '+'
+44,16-44,17:        NUMBER         '2'
+44,17-44,18:        NEWLINE        '\n'
+45,8-45,9:          NAME           'y'
+45,10-45,11:        OP             '='
+45,12-45,23:        STRING         'b\'h4tpvhsa\''
+45,23-45,24:        NEWLINE        '\n'
+46,8-46,12:         NAME           'call'
+46,12-46,13:        LPAR           '('
+46,13-46,17:        NAME           'arg0'
+46,17-46,18:        COMMA          ','
+46,19-46,23:        NAME           'arg1'
+46,23-46,24:        COMMA          ','
+46,25-46,30:        NAME           'name0'
+46,30-46,31:        OP             '='
+46,31-46,35:        STRING         '"Hi"'
+46,35-46,36:        COMMA          ','
+46,37-46,42:        NAME           'name1'
+46,42-46,43:        OP             '='
+46,43-46,44:        NAME           'y'
+46,44-46,45:        COMMA          ','
+46,46-46,47:        OP             '*'
+46,47-46,48:        LPAR           '('
+46,48-46,49:        RPAR           ')'
+46,49-46,50:        COMMA          ','
+46,51-46,53:        OP             '**'
+46,53-46,54:        LBRACE         '{'
+46,54-46,55:        RBRACE         '}'
+46,55-46,56:        RPAR           ')'
+46,56-46,57:        NEWLINE        '\n'
+47,8-47,9:          NAME           'x'
+47,10-47,11:        OP             '<'
+47,12-47,13:        NAME           'y'
+47,13-47,14:        NEWLINE        '\n'
+48,8-48,9:          LBRACE         '{'
+48,9-48,10:         NUMBER         '1'
+48,10-48,11:        COLON          ':'
+48,11-48,12:        NUMBER         '1'
+48,12-48,13:        COMMA          ','
+48,14-48,15:        NUMBER         '2'
+48,15-48,16:        COLON          ':'
+48,17-48,18:        NUMBER         '2'
+48,18-48,19:        RBRACE         '}'
+48,19-48,20:        NEWLINE        '\n'
+50,8-50,9:          NAME           'x'
+50,9-50,10:         LSQB           '['
+50,10-50,11:        NAME           'a'
+50,11-50,12:        COMMA          ','
+50,13-50,14:        NUMBER         '7'
+50,14-50,15:        RSQB           ']'
+50,15-50,16:        NEWLINE        '\n'
+51,8-51,9:          LPAR           '('
+51,9-51,10:         NAME           'x'
+51,11-51,14:        NAME           'for'
+51,15-51,16:        NAME           'x'
+51,17-51,19:        NAME           'in'
+51,20-51,21:        NAME           'y'
+51,21-51,22:        RPAR           ')'
+51,22-51,23:        NEWLINE        '\n'
+52,8-52,10:         NUMBER         '17'
+52,11-52,13:        NAME           'if'
+52,14-52,15:        NAME           'x'
+52,16-52,17:        OP             '<'
+52,18-52,19:        NAME           'y'
+52,20-52,24:        NAME           'else'
+52,25-52,27:        NUMBER         '16'
+52,27-52,28:        NEWLINE        '\n'
+53,8-53,14:         NAME           'lambda'
+53,15-53,16:        NAME           'x'
+53,17-53,18:        COLON          ':'
+53,19-53,20:        NAME           'x'
+53,21-53,22:        OP             '*'
+53,23-53,24:        NAME           'y'
+53,24-53,25:        NEWLINE        '\n'
+54,8-54,9:          LSQB           '['
+54,10-54,11:        NUMBER         '1'
+54,11-54,12:        COMMA          ','
+54,13-54,14:        NUMBER         '2'
+54,14-54,15:        COMMA          ','
+54,16-54,17:        NAME           'a'
+54,17-54,18:        COMMA          ','
+54,19-54,20:        NAME           'x'
+54,20-54,21:        DOT            '.'
+54,21-54,22:        NAME           'b'
+54,22-54,23:        COMMA          ','
+54,24-54,26:        NAME           'p1'
+54,26-54,27:        DOT            '.'
+54,27-54,28:        NAME           'c'
+54,29-54,30:        RSQB           ']'
+54,30-54,31:        NEWLINE        '\n'
+55,8-55,9:          LSQB           '['
+55,10-55,11:        NAME           'a'
+55,12-55,13:        OP             '+'
+55,14-55,18:        STRING         '"Hi"'
+55,19-55,22:        NAME           'for'
+55,23-55,24:        NAME           'a'
+55,25-55,27:        NAME           'in'
+55,28-55,31:        NAME           'str'
+55,31-55,32:        LPAR           '('
+55,32-55,33:        NAME           'y'
+55,33-55,34:        RPAR           ')'
+55,35-55,36:        RSQB           ']'
+55,36-55,37:        NEWLINE        '\n'
+59,8-59,18:         COMMENT        '#a, *b = y'
+60,8-60,13:         STRING         'u"Hi"'
+60,13-60,14:        NEWLINE        '\n'
+61,8-61,9:          NAME           'x'
+61,9-61,10:         LSQB           '['
+61,10-61,11:        NUMBER         '0'
+61,11-61,12:        RSQB           ']'
+61,12-61,13:        NEWLINE        '\n'
+62,8-62,9:          NAME           'x'
+62,9-62,10:         LSQB           '['
+62,10-62,11:        NAME           'y'
+62,11-62,12:        LSQB           '['
+62,12-62,13:        NUMBER         '0'
+62,13-62,14:        RSQB           ']'
+62,14-62,15:        RSQB           ']'
+62,15-62,16:        NEWLINE        '\n'
+63,8-63,9:          LPAR           '('
+63,9-63,11:         NAME           'p2'
+63,11-63,12:        COMMA          ','
+63,13-63,15:        NAME           'p3'
+63,15-63,16:        COMMA          ','
+63,17-63,18:        NUMBER         '7'
+63,18-63,19:        RPAR           ')'
+63,19-63,20:        NEWLINE        '\n'
+65,0-65,23:         COMMENT        '#Some multiline strings'
+66,0-66,0:          DEDENT         ''
+66,0-66,0:          DEDENT         ''
+66,0-67,23:         STRING         '\'\'\'\nSingle quotes string\'\'\''
+67,23-67,24:        NEWLINE        '\n'
+69,0-71,9:          STRING         '"""\nDouble-quotes\nstring"""'
+71,9-71,10:         NEWLINE        '\n'
+73,0-75,3:          STRING         'r\'\'\'\nBytes\n\'\'\''
+75,3-75,4:          NEWLINE        '\n'
+77,0-80,3:          STRING         'U"""\nRaw\nUnicode\n"""'
+80,3-80,4:          NEWLINE        '\n'
+82,0-82,19:         COMMENT        '#Decorated function'
+83,0-83,1:          AT             '@'
+83,1-83,5:          NAME           'deco'
+83,5-83,6:          NEWLINE        '\n'
+84,0-84,3:          NAME           'def'
+84,4-84,5:          NAME           'f'
+84,5-84,6:          LPAR           '('
+84,6-84,7:          RPAR           ')'
+84,7-84,8:          COLON          ':'
+84,8-84,9:          NEWLINE        '\n'
+85,0-85,4:          INDENT         '    '
+85,4-85,8:          NAME           'pass'
+85,8-85,9:          NEWLINE        '\n'
+87,0-87,32:         COMMENT        '#Inner function (see ODASA-1774)'
+88,0-88,0:          DEDENT         ''
+88,0-88,3:          NAME           'def'
+88,4-88,9:          NAME           'outer'
+88,9-88,10:         LPAR           '('
+88,10-88,11:        RPAR           ')'
+88,11-88,12:        COLON          ':'
+88,12-88,13:        NEWLINE        '\n'
+89,0-89,4:          INDENT         '    '
+89,4-89,7:          NAME           'def'
+89,8-89,13:         NAME           'inner'
+89,13-89,14:        LPAR           '('
+89,14-89,15:        RPAR           ')'
+89,15-89,16:        COLON          ':'
+89,16-89,17:        NEWLINE        '\n'
+90,0-90,8:          INDENT         '        '
+90,8-90,12:         NAME           'pass'
+90,12-90,13:        NEWLINE        '\n'
+92,0-92,29:         COMMENT        '#Oddly laid out comprehension'
+93,0-93,0:          DEDENT         ''
+93,0-93,0:          DEDENT         ''
+93,0-93,1:          LSQB           '['
+93,1-93,2:          LSQB           '['
+94,2-94,3:          NAME           'x'
+94,4-94,7:          NAME           'for'
+94,8-94,9:          NAME           'x'
+94,10-94,12:        NAME           'in'
+94,13-94,14:        NAME           'y'
+95,2-95,3:          RSQB           ']'
+97,2-97,5:          NAME           'for'
+97,6-97,7:          NAME           'a'
+97,8-97,10:         NAME           'in'
+97,11-97,12:        NAME           'b'
+98,0-98,1:          RSQB           ']'
+98,1-98,2:          NEWLINE        '\n'
+100,0-100,25:       COMMENT        '#Nested binary operations'
+101,0-101,7:        STRING         '"Hello"'
+101,8-101,9:        OP             '+'
+101,10-101,13:      STRING         '" "'
+101,14-101,15:      OP             '+'
+101,16-101,23:      STRING         '"world"'
+101,23-101,24:      NEWLINE        '\n'
+102,0-102,1:        NUMBER         '1'
+102,1-102,2:        OP             '+'
+102,2-102,3:        NUMBER         '2'
+102,3-102,4:        OP             '+'
+102,4-102,5:        NAME           'f'
+102,5-102,6:        LPAR           '('
+102,6-102,7:        RPAR           ')'
+102,7-102,8:        NEWLINE        '\n'
+103,0-103,1:        NUMBER         '1'
+103,1-103,2:        OP             '+'
+103,2-103,3:        LPAR           '('
+103,3-103,4:        NUMBER         '2'
+103,4-103,5:        OP             '+'
+103,5-103,6:        NUMBER         '3'
+103,6-103,7:        RPAR           ')'
+103,7-103,8:        NEWLINE        '\n'
+105,0-105,12:       COMMENT        '# operations'
+106,0-106,1:        NAME           'a'
+106,1-106,2:        OP             '|'
+106,2-106,3:        NAME           'b'
+106,3-106,4:        OP             '&'
+106,4-106,5:        NAME           'c'
+106,5-106,6:        OP             '+'
+106,6-106,7:        NAME           'd'
+106,7-106,8:        OP             '-'
+106,8-106,9:        NAME           'e'
+106,9-106,10:       NEWLINE        '\n'
+107,0-107,1:        NAME           'x'
+107,1-107,2:        OP             '*'
+107,2-107,3:        NAME           'f'
+107,3-107,4:        OP             '%'
+107,4-107,5:        NAME           'g'
+107,5-107,6:        OP             '^'
+107,6-107,7:        NAME           'h'
+107,7-107,8:        AT             '@'
+107,8-107,9:        NAME           'j'
+107,9-107,11:       OP             '**'
+107,11-107,12:      NAME           'k'
+107,12-107,13:      NEWLINE        '\n'
+109,0-109,18:       COMMENT        '#Augmented assigns'
+110,0-110,1:        NAME           'a'
+110,2-110,4:        OP             '@='
+110,5-110,6:        NAME           'b'
+110,6-110,7:        NEWLINE        '\n'
+111,0-111,1:        NAME           'a'
+111,2-111,4:        OP             '|='
+111,5-111,6:        NAME           'b'
+111,6-111,7:        NEWLINE        '\n'
+112,0-112,1:        NAME           'a'
+112,2-112,4:        OP             '*='
+112,5-112,6:        NAME           'b'
+112,6-112,7:        NEWLINE        '\n'
+114,0-114,1:        OP             '~'
+114,1-114,2:        NAME           'a'
+114,2-114,3:        NEWLINE        '\n'
+116,0-116,12:       COMMENT        '#Comparisons'
+117,0-117,1:        OP             '<'
+117,1-117,2:        NEWLINE        '\n'
+118,0-118,1:        OP             '>'
+118,1-118,2:        NEWLINE        '\n'
+119,0-119,2:        OP             '<='
+119,2-119,3:        NEWLINE        '\n'
+120,0-120,2:        OP             '>='
+120,2-120,3:        NEWLINE        '\n'
+121,0-121,2:        OP             '!='
+121,2-121,3:        NEWLINE        '\n'
+122,0-122,2:        OP             '=='
+122,2-122,3:        NEWLINE        '\n'
+123,0-123,2:        NAME           'is'
+123,2-123,3:        NEWLINE        '\n'
+124,0-124,2:        NAME           'is'
+124,3-124,6:        NAME           'not'
+124,6-124,7:        NEWLINE        '\n'
+126,0-126,1:        LPAR           '('
+126,1-127,3:        STRING         '"""\n"""'
+127,3-127,4:        RPAR           ')'
+127,4-127,5:        NEWLINE        '\n'
+128,0-128,3:        NAME           'del'
+128,4-128,5:        NAME           'x'
+128,5-128,6:        NEWLINE        '\n'
+130,0-130,1:        BACKQUOTE      '`'
+130,1-130,10:       NAME           'backticks'
+130,10-130,11:      BACKQUOTE      '`'
+130,11-130,12:      NEWLINE        '\n'
+132,0-132,1:        NAME           'x'
+132,3-132,4:        COLONEQUAL     ':='
+132,5-132,6:        NAME           'y'
+132,6-132,7:        NEWLINE        '\n'
+134,0-134,1:        NUMBER         '1'
+134,2-134,4:        OP             '<>'
+134,5-134,6:        NUMBER         '2'
+134,6-134,7:        NEWLINE        '\n'
+135,0-135,0:        ENDMARKER      ''
--- a/python/extractor/tests/tokenizer/close_brace.py
+++ b/python/extractor/tests/tokenizer/close_brace.py
@@ -0,0 +1,3 @@
+}
+)
+]
--- a/python/extractor/tests/tokenizer/close_brace.tokens
+++ b/python/extractor/tests/tokenizer/close_brace.tokens
@@ -0,0 +1,7 @@
+1,0-1,1:            RBRACE         '}'
+1,1-1,2:            NEWLINE        '\n'
+2,0-2,1:            RPAR           ')'
+2,1-2,2:            NEWLINE        '\n'
+3,0-3,1:            RSQB           ']'
+3,1-3,2:            NEWLINE        '\n'
+4,0-4,0:            ENDMARKER      ''
--- a/python/extractor/tests/tokenizer/comments.py
+++ b/python/extractor/tests/tokenizer/comments.py
@@ -0,0 +1,13 @@
+
+import sys
+
+def f():
+    code-here # Line end comment
+    #Indented comment
+#Unindented comment
+    return 1
+
+def g(arg):
+    return arg
+
+x = g(f())
--- a/python/extractor/tests/tokenizer/comments.tokens
+++ b/python/extractor/tests/tokenizer/comments.tokens
@@ -0,0 +1,43 @@
+2,0-2,6:            NAME           'import'
+2,7-2,10:           NAME           'sys'
+2,10-2,11:          NEWLINE        '\n'
+4,0-4,3:            NAME           'def'
+4,4-4,5:            NAME           'f'
+4,5-4,6:            LPAR           '('
+4,6-4,7:            RPAR           ')'
+4,7-4,8:            COLON          ':'
+4,8-4,9:            NEWLINE        '\n'
+5,0-5,4:            INDENT         '    '
+5,4-5,8:            NAME           'code'
+5,8-5,9:            OP             '-'
+5,9-5,13:           NAME           'here'
+5,14-5,32:          COMMENT        '# Line end comment'
+5,32-5,33:          NEWLINE        '\n'
+6,4-6,21:           COMMENT        '#Indented comment'
+7,0-7,19:           COMMENT        '#Unindented comment'
+8,4-8,10:           NAME           'return'
+8,11-8,12:          NUMBER         '1'
+8,12-8,13:          NEWLINE        '\n'
+10,0-10,0:          DEDENT         ''
+10,0-10,3:          NAME           'def'
+10,4-10,5:          NAME           'g'
+10,5-10,6:          LPAR           '('
+10,6-10,9:          NAME           'arg'
+10,9-10,10:         RPAR           ')'
+10,10-10,11:        COLON          ':'
+10,11-10,12:        NEWLINE        '\n'
+11,0-11,4:          INDENT         '    '
+11,4-11,10:         NAME           'return'
+11,11-11,14:        NAME           'arg'
+11,14-11,15:        NEWLINE        '\n'
+13,0-13,0:          DEDENT         ''
+13,0-13,1:          NAME           'x'
+13,2-13,3:          OP             '='
+13,4-13,5:          NAME           'g'
+13,5-13,6:          LPAR           '('
+13,6-13,7:          NAME           'f'
+13,7-13,8:          LPAR           '('
+13,8-13,9:          RPAR           ')'
+13,9-13,10:         RPAR           ')'
+13,10-13,11:        NEWLINE        '\n'
+14,0-14,0:          ENDMARKER      ''
--- a/python/extractor/tests/tokenizer/continuation.py
+++ b/python/extractor/tests/tokenizer/continuation.py
@@ -0,0 +1,5 @@
+def foo():
+    pass \
+\
+\
+
--- a/python/extractor/tests/tokenizer/continuation.tokens
+++ b/python/extractor/tests/tokenizer/continuation.tokens
@@ -0,0 +1,11 @@
+1,0-1,3:            NAME           'def'
+1,4-1,7:            NAME           'foo'
+1,7-1,8:            LPAR           '('
+1,8-1,9:            RPAR           ')'
+1,9-1,10:           COLON          ':'
+1,10-1,11:          NEWLINE        '\n'
+2,0-2,4:            INDENT         '    '
+2,4-2,8:            NAME           'pass'
+5,0-5,1:            NEWLINE        '\n'
+6,0-6,0:            DEDENT         ''
+6,0-6,0:            ENDMARKER      ''
--- a/python/extractor/tests/tokenizer/dollar.py
+++ b/python/extractor/tests/tokenizer/dollar.py
@@ -0,0 +1,2 @@
+$name
+$ßðđ0
--- a/python/extractor/tests/tokenizer/dollar.tokens
+++ b/python/extractor/tests/tokenizer/dollar.tokens
@@ -0,0 +1,5 @@
+1,0-1,5:            DOLLARNAME     '$name'
+1,5-1,6:            NEWLINE        '\n'
+2,0-2,5:            DOLLARNAME     '$ßðđ0'
+2,5-2,6:            NEWLINE        '\n'
+3,0-3,0:            ENDMARKER      ''
--- a/python/extractor/tests/tokenizer/dots.py
+++ b/python/extractor/tests/tokenizer/dots.py
@@ -0,0 +1,4 @@
+.
+..
+...
+....
--- a/python/extractor/tests/tokenizer/dots.tokens
+++ b/python/extractor/tests/tokenizer/dots.tokens
@@ -0,0 +1,15 @@
+1,0-1,1:            DOT            '.'
+1,1-1,2:            NEWLINE        '\n'
+2,0-2,1:            DOT            '.'
+2,1-2,2:            DOT            '.'
+2,2-2,3:            NEWLINE        '\n'
+3,0-3,1:            DOT            '.'
+3,1-3,2:            DOT            '.'
+3,2-3,3:            DOT            '.'
+3,3-3,4:            NEWLINE        '\n'
+4,0-4,1:            DOT            '.'
+4,1-4,2:            DOT            '.'
+4,2-4,3:            DOT            '.'
+4,3-4,4:            DOT            '.'
+4,4-4,5:            NEWLINE        '\n'
+5,0-5,0:            ENDMARKER      ''
--- a/python/extractor/tests/tokenizer/emoji.py
+++ b/python/extractor/tests/tokenizer/emoji.py
@@ -0,0 +1,2 @@
+"👦👦🏻👦🏼👦🏽👦🏾👦🏿👧👧🏻👧🏼👧🏽👧🏾👧🏿"
+"😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏"
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`print("Hello world! This line of code has no newline at the end.")`