From 5355d8f830ca9df68d32a6ed6ed798bef5daf87a Mon Sep 17 00:00:00 2001 From: David Douard <david.douard@sdfa3.org> Date: Wed, 22 Nov 2023 18:53:36 +0100 Subject: [PATCH] Migrate to copier-based swh-py-template and normalize the README file --- .copier-answers.yml | 11 +++++++ .gitignore | 19 ++++------- .pre-commit-config.yaml | 23 ++++++------- MANIFEST.in | 9 ----- README.md => README.rst | 11 +++++-- docs/Makefile | 3 +- docs/README.md | 1 - docs/index.rst | 7 +--- mypy.ini | 11 ++++--- pyproject.toml | 47 ++++++++++++++++++++++++++ pytest.ini | 1 - setup.py | 73 ----------------------------------------- swh/__init__.py | 3 -- tox.ini | 44 ++++++++++--------------- 14 files changed, 112 insertions(+), 151 deletions(-) create mode 100644 .copier-answers.yml delete mode 100644 MANIFEST.in rename README.md => README.rst (94%) delete mode 120000 docs/README.md delete mode 100644 setup.py delete mode 100644 swh/__init__.py diff --git a/.copier-answers.yml b/.copier-answers.yml new file mode 100644 index 00000000..afb27db2 --- /dev/null +++ b/.copier-answers.yml @@ -0,0 +1,11 @@ +# Changes here will be overwritten by Copier +_commit: v0.1.5 +_src_path: https://gitlab.softwareheritage.org/swh/devel/swh-py-template.git +description: Software Heritage indexer +distribution_name: swh-indexer +have_cli: true +have_workers: true +package_root: swh/indexer +project_name: swh.indexer +python_minimal_version: '3.7' +readme_format: rst diff --git a/.gitignore b/.gitignore index f7a062ef..035b1395 100644 --- a/.gitignore +++ b/.gitignore @@ -1,17 +1,12 @@ +*.egg-info/ *.pyc -*.sw? -*~ -/.coverage -/.coverage.* +.coverage .eggs/ +.hypothesis +.mypy_cache +.tox __pycache__ -*.egg-info/ build/ dist/ -version.txt -/sql/createdb-stamp -/sql/filldb-stamp -.tox/ -.hypothesis/ -.mypy_cache/ -.vscode/ \ No newline at end of file +docs/README.rst +docs/README.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 02181e7e..32f68ed8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,13 +1,23 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: trailing-whitespace - id: check-json - id: check-yaml + - repo: https://github.com/python/black + rev: 23.1.0 + hooks: + - id: black + + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + - repo: https://github.com/pycqa/flake8 - rev: 5.0.4 + rev: 6.0.0 hooks: - id: flake8 additional_dependencies: [flake8-bugbear==22.9.23] @@ -30,12 +40,3 @@ repos: language: system types: [python] - - repo: https://github.com/PyCQA/isort - rev: 5.11.5 - hooks: - - id: isort - - - repo: https://github.com/python/black - rev: 22.10.0 - hooks: - - id: black diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index d5bc305c..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,9 +0,0 @@ -include README.md -include Makefile -include requirements*.txt -include version.txt -include conftest.py -recursive-include sql * -recursive-include swh/indexer/sql *.sql -recursive-include swh/indexer/data * -recursive-include swh py.typed diff --git a/README.md b/README.rst similarity index 94% rename from README.md rename to README.rst index 56e255b0..80a5b98d 100644 --- a/README.md +++ b/README.rst @@ -1,25 +1,32 @@ -swh-indexer -============ +Software Heritage - Indexer +=========================== Tools to compute multiple indexes on SWH's raw contents: + - content: + - mimetype - fossology-license - metadata + - origin: + - metadata (intrinsic, using the content indexer; and extrinsic) An indexer is in charge of: + - looking up objects - extracting information from those objects - store those information in the swh-indexer db There are multiple indexers working on different object types: + - content indexer: works with content sha1 hashes - revision indexer: works with revision sha1 hashes - origin indexer: works with origin identifiers Indexation procedure: + - receive batch of ids - retrieve the associated data depending on object type - compute for that object some index diff --git a/docs/Makefile b/docs/Makefile index 59d8b805..6145e351 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -1,3 +1,2 @@ -include ../../swh-docs/Makefile.sphinx -include Makefile.local - +include swh-docs/Makefile.sphinx diff --git a/docs/README.md b/docs/README.md deleted file mode 120000 index 32d46ee8..00000000 --- a/docs/README.md +++ /dev/null @@ -1 +0,0 @@ -../README.md \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index e479d9d7..50fb0008 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,11 +1,6 @@ .. _swh-indexer: -Software Heritage - Indexer -=========================== - -Tools and workers used to mine the content of the archive and extract derived -information from archive source code artifacts. - +.. include:: README.rst .. toctree:: :maxdepth: 1 diff --git a/mypy.ini b/mypy.ini index 9f39ef90..7bb41e00 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,10 +1,11 @@ [mypy] namespace_packages = True warn_unused_ignores = True - +explicit_package_bases = True +# ^ Needed for mypy to detect py.typed from swh packages installed +# in editable mode # 3rd party libraries without stubs (yet) - [mypy-celery.*] ignore_missing_imports = True @@ -29,11 +30,11 @@ ignore_missing_imports = True [mypy-pyld.*] ignore_missing_imports = True -[mypy-pytest.*] -ignore_missing_imports = True - [mypy-rdflib.*] ignore_missing_imports = True [mypy-xmltodict.*] ignore_missing_imports = True + +# [mypy-add_your_lib_here.*] +# ignore_missing_imports = True diff --git a/pyproject.toml b/pyproject.toml index 69b8f4dd..4cd52669 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,50 @@ +[project] +name = "swh.indexer" +authors = [ + {name="Software Heritage developers", email="swh-devel@inria.fr"}, +] + +description = "Software Heritage indexer" +readme = {file = "README.rst", content-type = "text/x-rst"} +requires-python = ">=3.7" +classifiers = [ + "Programming Language :: Python :: 3", + "Intended Audience :: Developers", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Operating System :: OS Independent", + "Development Status :: 5 - Production/Stable", +] +dynamic = ["version", "dependencies", "optional-dependencies"] + +[tool.setuptools.packages.find] +include = ["swh.*"] + +[tool.setuptools.dynamic] +dependencies = {file = ["requirements.txt", "requirements-swh.txt"]} + +[tool.setuptools.dynamic.optional-dependencies] +testing = {file = ["requirements-test.txt"]} + +[project.entry-points."swh.cli.subcommands"] +"swh.indexer" = "swh.indexer.cli" + +[project.entry-points."swh.workers"] +"swh.indexer" = "swh.indexer:register_tasks" + +[project.urls] +"Homepage" = "https://gitlab.softwareheritage.org/swh/devel/swh-indexer" +"Bug Reports" = "https://gitlab.softwareheritage.org/swh/devel/swh-indexer/-/issues" +"Funding" = "https://www.softwareheritage.org/donate" +"Documentation" = "https://docs.softwareheritage.org/devel/swh-indexer/" +"Source" = "https://gitlab.softwareheritage.org/swh/devel/swh-indexer.git" + +[build-system] +requires = ["setuptools", "setuptools-scm"] +build-backend = "setuptools.build_meta" + +[tool.setuptools_scm] +fallback_version = "0.0.1" + [tool.black] target-version = ['py37'] diff --git a/pytest.ini b/pytest.ini index c752097e..0a35f2c7 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,4 @@ [pytest] addopts = -p no:pytest_swh_scheduler norecursedirs = build docs .* - asyncio_mode = strict diff --git a/setup.py b/setup.py deleted file mode 100644 index a13c82ab..00000000 --- a/setup.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (C) 2015-2020 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from io import open -from os import path - -from setuptools import find_packages, setup - -here = path.abspath(path.dirname(__file__)) - -# Get the long description from the README file -with open(path.join(here, "README.md"), encoding="utf-8") as f: - long_description = f.read() - - -def parse_requirements(name=None): - if name: - reqf = "requirements-%s.txt" % name - else: - reqf = "requirements.txt" - - requirements = [] - if not path.exists(reqf): - return requirements - - with open(reqf) as f: - for line in f.readlines(): - line = line.strip() - if not line or line.startswith("#"): - continue - requirements.append(line) - return requirements - - -setup( - name="swh.indexer", - description="Software Heritage Content Indexer", - long_description=long_description, - long_description_content_type="text/markdown", - python_requires=">=3.7", - author="Software Heritage developers", - author_email="swh-devel@inria.fr", - url="https://forge.softwareheritage.org/diffusion/78/", - packages=find_packages(), - scripts=[], - install_requires=parse_requirements() + parse_requirements("swh"), - setup_requires=["setuptools-scm"], - use_scm_version=True, - extras_require={"testing": parse_requirements("test")}, - include_package_data=True, - entry_points=""" - [swh.cli.subcommands] - indexer=swh.indexer.cli - [swh.workers] - indexer.workers=swh.indexer:register_tasks - """, - classifiers=[ - "Programming Language :: Python :: 3", - "Intended Audience :: Developers", - "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", - "Operating System :: OS Independent", - "Development Status :: 5 - Production/Stable", - ], - project_urls={ - "Bug Reports": "https://forge.softwareheritage.org/maniphest", - "Funding": "https://www.softwareheritage.org/donate", - "Source": "https://forge.softwareheritage.org/source/swh-indexer", - "Documentation": "https://docs.softwareheritage.org/devel/swh-indexer/", - }, -) diff --git a/swh/__init__.py b/swh/__init__.py deleted file mode 100644 index b36383a6..00000000 --- a/swh/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from pkgutil import extend_path - -__path__ = extend_path(__path__, __name__) diff --git a/tox.ini b/tox.ini index b135fcc6..ed785ff3 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,10 @@ [tox] -requires = - tox>4 -envlist=black,flake8,mypy,py3 +minversion = 4 +envlist = + black + flake8 + mypy + py3 [testenv] extras = @@ -10,14 +13,20 @@ deps = pytest-cov swh-scheduler[testing] >= 0.5.0 swh-storage[testing] >= 0.10.0 - dev: pdbpp commands = pytest --doctest-modules \ !slow: --hypothesis-profile=fast \ slow: --hypothesis-profile=slow \ {envsitepackagesdir}/swh/indexer \ + --import-mode importlib \ + --rootdir {envsitepackagesdir} \ --cov={envsitepackagesdir}/swh/indexer \ - --cov-branch {posargs} + --cov-branch \ + {envsitepackagesdir}/swh/indexer \ + {posargs} +# --rootdir and --import-mode are required to make tests that depends +# on the test file to be a proper submodule of the swh namespace after +# migration to PEP420 (implicit namespace). [testenv:black] skip_install = true @@ -39,7 +48,7 @@ commands = extras = testing deps = - mypy==1.0 + mypy==1.0.1 commands = mypy swh @@ -48,31 +57,14 @@ commands = # breaking doc build [testenv:sphinx] allowlist_externals = make -usedevelop = true -extras = - testing -deps = - -e git+https://gitlab.softwareheritage.org/swh/devel/swh-docs.git\#egg=swh.docs -setenv = - SWH_PACKAGE_DOC_TOX_BUILD = 1 - # turn warnings into errors - SPHINXOPTS = -W -commands = - make -I ../.tox/sphinx/src/swh-docs/swh/ -C docs - -# build documentation only inside swh-environment using local state -# of swh-docs package -[testenv:sphinx-dev] -allowlist_externals = make -usedevelop = true extras = testing deps = - # install swh-docs in develop mode - -e ../swh-docs + # fetch and install swh-docs + git+https://gitlab.softwareheritage.org/swh/devel/swh-docs.git\#egg=swh.docs setenv = SWH_PACKAGE_DOC_TOX_BUILD = 1 # turn warnings into errors SPHINXOPTS = -W commands = - make -I ../.tox/sphinx-dev/src/swh-docs/swh/ -C docs + make -I {env_dir}/share/ -C docs -- GitLab