Skip to content

Commit

Permalink
Tooling, test coverage and linting. (#1)
Browse files Browse the repository at this point in the history
  • Loading branch information
EJOOSTEROP authored Aug 26, 2023
1 parent f6b4d69 commit a66214f
Show file tree
Hide file tree
Showing 12 changed files with 224 additions and 102 deletions.
31 changes: 14 additions & 17 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,44 +7,41 @@
"dockerfile": "./dockerfile",
"context": "."
},

// Features to add to the dev container. More info: https://containers.dev/features.
"features": {
"ghcr.io/devcontainers/features/common-utils:2": {
"installZsh": true,
"installOhMyZsh": true,
"installOhMyZshConfig": true,
"upgradePackages": true,
"username": "devcontainer"
},
"ghcr.io/devcontainers-contrib/features/poetry:2": {
"version": "latest"
},
"ghcr.io/devcontainers-contrib/features/nox:2": {
"version": "latest"
}
"ghcr.io/devcontainers/features/common-utils:2": {},
"ghcr.io/devcontainers-contrib/features/poetry:2": {},
"ghcr.io/devcontainers-contrib/features/nox:2": {},
"ghcr.io/devcontainers-contrib/features/pre-commit:2": {}
},

// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],

// Use 'postCreateCommand' to run commands after the container is created.
// "postCreateCommand": "pip3 install --user -r requirements.txt",
"postCreateCommand": "poetry install" ,
"postCreateCommand": "poetry install && pre-commit install" ,
// "postCreateCommand": "poetry install" ,

// Configure tool-specific properties.
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance",
"Gruntfuggly.todo-tree"
"Gruntfuggly.todo-tree",
"GitHub.vscode-pull-request-github",
"ms-python.black-formatter",
"ms-python.flake8",
"ms-python.isort",
"njpwerner.autodocstring"
]
}
},

// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
"remoteUser": "vscode"

}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ docs/youtube/
docs/src_docs/
docs/pdf - Copy/
.chroma/
.ruff_cache/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
14 changes: 14 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
exclude: '^$'
fail_fast: false
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v1.2.3
hooks:
- id: trailing-whitespace
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.0.285
hooks:
- id: ruff
# args: [--fix, --exit-non-zero-on-fix]
args: [--fix]
28 changes: 14 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
<br />
<div align="center">
<a href="https://github.com/ejoosterop/quke">
<img src="media/llms.png" alt="Logo" width="240" height="160">
<img src="https://github.com/EJOOSTEROP/quke/blob/main/media/llms.png?raw=true" alt="Logo" width="240" height="160">
</a>

<h3 align="center">quke</h3>
Expand Down Expand Up @@ -78,7 +78,7 @@
<a href="#getting-started">Getting Started</a>
<ul>
<li><a href="#prerequisites">Prerequisites</a></li>
<li><a href="#installation">Installation</a></li>
<li><a href="#installation">Installation</a></li>
</ul>
</li>
<li>
Expand Down Expand Up @@ -110,7 +110,7 @@
## About The Project
Compare the answering capabilities of different LLMs - for example LlaMa, ChatGPT, Cohere, Falcon - against user provided document(s) and questions.

Specify the different models, embedding tools and vector databases in configuration files.
Specify the different models, embedding tools and vector databases in configuration files.

Maintain reproducable experiments reflecting combinations of these configurations.

Expand All @@ -122,37 +122,37 @@ Maintain reproducable experiments reflecting combinations of these configuration
The instructions assume a Python environment with [Poetry][poetry-url] installed. Development of the tool is done in Python 3.11. While Poetry is not actually needed for the tool to function, the examples assume Poetry is installed.

#### API keys
The tool uses 3rd party hosted inference APIs. API keys need to be specified as environment variables.
The tool uses 3rd party hosted inference APIs. API keys need to be specified as environment variables.

The services used:
- [HuggingFace][huggingface-url]
- [OpenAI][openai-url]
- [Cohere][cohere-url]
- [Replicate][replicate-url]

The API keys can be specied in a [.env file][.env-url]. Use the provided .env.example file as an example (enter your own API keys and rename it to '.env').
The API keys can be specied in a [.env file][.env-url]. Use the provided .env.example file as an example (enter your own API keys and rename it to '.env').

At present, all services used in the example configuration have free tiers available.

<p align="right">(<a href="#readme-top">back to top</a>)</p>

### Installation
Navigate to the directory that contains the pyproject.toml file, then execute the
Navigate to the directory that contains the pyproject.toml file, then execute the
```sh
poetry install
```
```
command.

<p align="right">(<a href="#readme-top">back to top</a>)</p>

<!-- USAGE EXAMPLES -->
## Usage
For the examples the project comes with a public financial document for a Canadian Bank (CIBC) as source pdf file.
For the examples the project comes with a public financial document for a Canadian Bank (CIBC) as source pdf file.

### Base
In order to run the first example, ensure to specify your HuggingFace API key.

Use the command
Use the command
```sh
poetry run quke
```
Expand All @@ -167,9 +167,9 @@ The defaults are specified in the config.yaml file (in the ./quke/conf/ director
### Specify models and embeddings
*Ensure to specify your Cohere API key before running.*

As per the configuration files, the default LLM is Falcon and the default embedding uses HuggingFace embedding.
As per the configuration files, the default LLM is Falcon and the default embedding uses HuggingFace embedding.

To specify a different LLM - Cohere in this example - run the following:
To specify a different LLM - Cohere in this example - run the following:
```sh
poetry run quke embedding=huggingface llm=cohere question=eps
```
Expand All @@ -192,7 +192,7 @@ poetry run quke embedding=huggingface llm=cohere question=eps
The LLMs, embeddings, questions and other configurations can be captured in experiment config files. The command
```sh
poetry run quke +experiment=openai
```
```
uses an experiment file openai.yaml (see folder ./config/experiments) which specifies the LLM, embedding and questions to be used. It is equivalent to running:
```sh
poetry run quke embedding=openai llm=gpt3-5 question=eps
Expand Down Expand Up @@ -227,7 +227,7 @@ Note to set `vectorstore_write_mode` to `append` or `overwrite` in the embedding
### Limitations
The free tiers for the third party services generally come with fairly strict limitations. They differ between services; and may differ over time.

To try out the tool with your own documents it is best to start with a single small source document, no more than two questions and only one combination of LLM/embedding.
To try out the tool with your own documents it is best to start with a single small source document, no more than two questions and only one combination of LLM/embedding.

Error messages due to limitations of the APIs are not always clearly indicated as such.

Expand All @@ -240,7 +240,7 @@ The tool uses the [LangChain][langchain-url] Python package to interact with the

In general I do not know to what extent any of the data is encrypted during transmission.

The tool shares no information with me.
The tool shares no information with me.

<p align="right">(<a href="#readme-top">back to top</a>)</p>

Expand Down
13 changes: 7 additions & 6 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@
# especially look at roughly line 51, flake/lint
import nox

nox.options.sessions = ["black", "ruff"]


# TODO: Is this an option: https://nox-poetry.readthedocs.io/en/stable/
# TODO: or a better option: https://github.com/pdm-project/pdm (instead of Poetry)
@nox.session
def flake(session):
session.install(
Expand Down Expand Up @@ -38,11 +41,9 @@ def ruff(session):
def test(session):
# Not certain this is a good approach. But it currently works.
# session.install("pytest")
# session.install("pytest-cov")

if session.posargs:
test_files = session.posargs
else:
test_files = []
session.run("pytest", "--cov=quke", "tests/")

session.run("pytest", *test_files)
# session.run("pytest")
# test_files = session.posargs if session.posargs else []
# session.run("pytest", "--cov=quke", *test_files)
84 changes: 83 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 18 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "quke"
version = "0.1.3"
version = "0.2.0"
description = "Compare the answering capabilities of different LLMs - for example LlaMa, ChatGPT, Cohere, Falcon - against user provided document(s) and questions."
authors = ["Erik Oosterop"]
maintainers = ["Erik Oosterop"]
Expand Down Expand Up @@ -41,6 +41,10 @@ rich = "^13.5.2"
pytest = "^7.4.0"
requests-mock = "^1.11.0"


[tool.poetry.group.dev.dependencies]
pytest-cov = "^4.1.0"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
Expand All @@ -53,16 +57,15 @@ markers = [

[tool.ruff]
include = ["*.py", "*.pyi", "**/pyproject.toml"]
fix = false
line-length = 119
select = [ # https://beta.ruff.rs/docs/rules/
"A", # prevent using keywords that clobber python builtins
"ANN", # type annotation
"B", # bugbear: security warnings
"C",
"C90",
"C",
"C90",
"D", # pydocstyle
"DAR", # darglint, but does not seem to be implemented at the moment
# "DAR", # darglint, but does not seem to be implemented at the moment
"DTZ", # date timezone
"E", # pycodestyle
"F", # pyflakes
Expand All @@ -80,19 +83,24 @@ select = [ # https://beta.ruff.rs/docs/rules/
ignore = [
"E203", # comments allowed
"E501",
"ANN101", # type annotation for self
]

# fixing is off by default
fix = true
fixable = [
"F401", # Remove unused imports.
"NPY001", # Fix numpy types, which are removed in 1.24.
"RUF100", # Remove unused noqa comments.
"I", # Fix import order
"PTH", # Path.cwd()
]

[tool.ruff.per-file-ignores]
"tests/**/*.py" = [
# at least this three should be fine in tests:
"S101", # asserts allowed in tests...
"ANN", # TODO: do not care about type annotations in tests for now
"ARG", # Unused function args -> fixtures nevertheless are functionally relevant...
"FBT", # Don't care about booleans as positional arguments in tests, e.g. via @pytest.mark.parametrize()
# The below are debateable
Expand All @@ -101,5 +109,10 @@ fixable = [
"D", # no pydocstyle
]

"noxfile.py" = [
"ANN",
"D",
]

[tool.ruff.pydocstyle]
convention = "google"
Loading

0 comments on commit a66214f

Please sign in to comment.