Skip to content

Dockerize Fastpath #935

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 26 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
0d460cf
First steps
hynnot Jan 31, 2025
ef14b0a
Next steps
hynnot Mar 4, 2025
5856ed4
Merge branch 'master' into support-deploying-fastpath-as-docker-conta…
LDiazN Apr 29, 2025
9342b85
Use right source for clickhouse image
LDiazN Apr 29, 2025
8863b73
Add requests library used in tests
LDiazN May 1, 2025
62a06e4
Add docker related operations
LDiazN May 1, 2025
59572bd
Added profiles to select which services to run
LDiazN May 1, 2025
78a9dad
Add context comment
LDiazN May 1, 2025
a592191
Remove dependency on clickhouse to allow fastpath to run standalone
LDiazN May 1, 2025
dd59865
binding to 0.0.0.0 to accept traffic from outside docker
LDiazN May 1, 2025
2d0b94b
Using simpler dockerfile configuration
LDiazN May 1, 2025
d1827d1
Using simpler dockerfile configuration
LDiazN May 1, 2025
cc4be7e
Fixing dependency betwen fastpath and clickhouse
LDiazN May 1, 2025
f696e9e
Improving usability of makefile
LDiazN May 1, 2025
d9c647e
Creating multi stage docker file
LDiazN May 2, 2025
5e78cc4
Remove deprecated pkg_resources dependency
LDiazN May 2, 2025
41f7176
Skip broken tests
LDiazN May 2, 2025
522a851
Upgrade deprecated upload-artifact action in test_fastpath.yml
LDiazN May 2, 2025
2d19f51
Move fastpath.conf to the top dir
LDiazN May 2, 2025
74b62e5
Merge branch 'support-deploying-fastpath-as-docker-container' of http…
LDiazN May 2, 2025
e65dae0
remove useless print
LDiazN May 2, 2025
9f9c286
Working on adding integration tests for the docker deployment
LDiazN May 2, 2025
894d3e0
Added integration tests with docker
LDiazN May 5, 2025
6457384
Added requests to simplify testing
LDiazN May 5, 2025
397908e
Add more integration tests
LDiazN May 5, 2025
c1a1c2d
Add better healthcheck to docker compose
LDiazN May 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_fastpath.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
fastpath/tests/test_unit.py

- name: Archive code coverage HTML pages
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: coverage
path: fastpath/htmlcov
30 changes: 30 additions & 0 deletions fastpath/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Stage 1: Building
FROM python:slim AS builder

RUN apt update && apt install -y --no-install-recommends \
build-essential \
gcc \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /app

COPY requirements.txt .

RUN pip install --prefix=/install --no-cache-dir -r requirements.txt

# Stage 2: Running
FROM python:slim

RUN apt update && rm -rf /var/lib/apt/lists/*

WORKDIR /app

COPY --from=builder /install /usr/local
COPY . .

ENV PYTHONPATH=/app
COPY fastpath.conf /etc/ooni/fastpath.conf

EXPOSE 5000

CMD ["python", "/app/run_fastpath"]
205 changes: 205 additions & 0 deletions fastpath/clickhouse_init.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
-- Create tables for Clickhouse integ tests

-- Main tables

CREATE TABLE IF NOT EXISTS default.fastpath
(
`measurement_uid` String,
`report_id` String,
`input` String,
`probe_cc` String,
`probe_asn` UInt32,
`test_name` String,
`test_start_time` DateTime,
`measurement_start_time` DateTime,
`filename` String,
`scores` String,
`platform` String,
`anomaly` String,
`confirmed` String,
`msm_failure` String,
`domain` String,
`software_name` String,
`software_version` String,
`control_failure` String,
`blocking_general` Float32,
`is_ssl_expected` Int8,
`page_len` Int32,
`page_len_ratio` Float32,
`server_cc` String,
`server_asn` Int8,
`server_as_name` String,
`update_time` DateTime64(3) MATERIALIZED now64(),
`test_version` String,
`test_runtime` Float32,
`architecture` String,
`engine_name` String,
`engine_version` String,
`blocking_type` String,
`test_helper_address` LowCardinality(String),
`test_helper_type` LowCardinality(String),
`ooni_run_link_id` Nullable(UInt64)
)
ENGINE = ReplacingMergeTree
ORDER BY (measurement_start_time, report_id, input)
SETTINGS index_granularity = 8192;

CREATE TABLE IF NOT EXISTS default.jsonl
(
`report_id` String,
`input` String,
`s3path` String,
`linenum` Int32,
`measurement_uid` String
)
ENGINE = MergeTree
ORDER BY (report_id, input)
SETTINGS index_granularity = 8192;

CREATE TABLE IF NOT EXISTS default.url_priorities (
`sign` Int8,
`category_code` String,
`cc` String,
`domain` String,
`url` String,
`priority` Int32
)
ENGINE = CollapsingMergeTree(sign)
ORDER BY (category_code, cc, domain, url, priority)
SETTINGS index_granularity = 1024;

CREATE TABLE IF NOT EXISTS default.citizenlab
(
`domain` String,
`url` String,
`cc` FixedString(32),
`category_code` String
)
ENGINE = ReplacingMergeTree
ORDER BY (domain, url, cc, category_code)
SETTINGS index_granularity = 4;

CREATE TABLE IF NOT EXISTS default.citizenlab_flip AS default.citizenlab;

CREATE TABLE IF NOT EXISTS test_groups (
`test_name` String,
`test_group` String
)
ENGINE = Join(ANY, LEFT, test_name);


-- Auth

CREATE TABLE IF NOT EXISTS accounts
(
`account_id` FixedString(32),
`role` String
)
ENGINE = EmbeddedRocksDB
PRIMARY KEY account_id;

CREATE TABLE IF NOT EXISTS session_expunge
(
`account_id` FixedString(32),
`threshold` DateTime DEFAULT now()
)
ENGINE = EmbeddedRocksDB
PRIMARY KEY account_id;

-- Materialized views

CREATE MATERIALIZED VIEW IF NOT EXISTS default.counters_test_list
(
`day` DateTime,
`probe_cc` String,
`input` String,
`msmt_cnt` UInt64
)
ENGINE = SummingMergeTree
PARTITION BY day
ORDER BY (probe_cc, input)
SETTINGS index_granularity = 8192 AS
SELECT
toDate(measurement_start_time) AS day,
probe_cc,
input,
count() AS msmt_cnt
FROM default.fastpath
INNER JOIN default.citizenlab ON fastpath.input = citizenlab.url
WHERE (measurement_start_time < now()) AND (measurement_start_time > (now() - toIntervalDay(8))) AND (test_name = 'web_connectivity')
GROUP BY
day,
probe_cc,
input;

CREATE MATERIALIZED VIEW IF NOT EXISTS default.counters_asn_test_list
(
`week` DateTime,
`probe_cc` String,
`probe_asn` UInt32,
`input` String,
`msmt_cnt` UInt64
)
ENGINE = SummingMergeTree
ORDER BY (probe_cc, probe_asn, input)
SETTINGS index_granularity = 8192 AS
SELECT
toStartOfWeek(measurement_start_time) AS week,
probe_cc,
probe_asn,
input,
count() AS msmt_cnt
FROM default.fastpath
INNER JOIN default.citizenlab ON fastpath.input = citizenlab.url
WHERE (measurement_start_time < now()) AND (measurement_start_time > (now() - toIntervalDay(8))) AND (test_name = 'web_connectivity')
GROUP BY
week,
probe_cc,
probe_asn,
input;

CREATE TABLE IF NOT EXISTS msmt_feedback
(
`measurement_uid` String,
`account_id` String,
`status` String,
`update_time` DateTime64(3) MATERIALIZED now64()
)
ENGINE = ReplacingMergeTree
ORDER BY (measurement_uid, account_id)
SETTINGS index_granularity = 4;

CREATE TABLE IF NOT EXISTS default.fingerprints_dns
(
`name` String,
`scope` Enum8('nat' = 1, 'isp' = 2, 'prod' = 3, 'inst' = 4, 'vbw' = 5, 'fp' = 6),
`other_names` String,
`location_found` String,
`pattern_type` Enum8('full' = 1, 'prefix' = 2, 'contains' = 3, 'regexp' = 4),
`pattern` String,
`confidence_no_fp` UInt8,
`expected_countries` String,
`source` String,
`exp_url` String,
`notes` String
)
ENGINE = EmbeddedRocksDB
PRIMARY KEY name;

CREATE TABLE IF NOT EXISTS default.fingerprints_http
(
`name` String,
`scope` Enum8('nat' = 1, 'isp' = 2, 'prod' = 3, 'inst' = 4, 'vbw' = 5, 'fp' = 6, 'injb' = 7, 'prov' = 8),
`other_names` String,
`location_found` String,
`pattern_type` Enum8('full' = 1, 'prefix' = 2, 'contains' = 3, 'regexp' = 4),
`pattern` String,
`confidence_no_fp` UInt8,
`expected_countries` String,
`source` String,
`exp_url` String,
`notes` String
)
ENGINE = EmbeddedRocksDB
PRIMARY KEY name;

3 changes: 3 additions & 0 deletions fastpath/debian/etc/ooni/fastpath.conf
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@ db_uri = postgresql://readonly@localhost/metadb
# S3 access credentials
s3_access_key =
s3_secret_key =


clickhouse_url = clickhouse://default:default@clickhouse-server:9000
42 changes: 42 additions & 0 deletions fastpath/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
services:
fastpath:
build:
context: .
dockerfile: Dockerfile
container_name: ooni-fastpath
ports:
- "5000:5000"
- "8472:8472"
volumes:
- .:/app
working_dir: /app
profiles:
- default
- all

# This service is used only for testing, in prod we use the actual clickhouse db
clickhouse-server:
image: clickhouse/clickhouse-server:latest
container_name: clickhouse-server
environment:
- CLICKHOUSE_DB=default
- CLICKHOUSE_USER=default
- CLICKHOUSE_PASSWORD=default
ports:
- "9000:9000"
- "8123:8123"
- "9009:9009"
volumes:
- ./clickhouse_init.sql:/docker-entrypoint-initdb.d/init.sql
healthcheck:
test: ["CMD", "clickhouse-client", "--query", "select 1;"]
interval: 30s
retries: 3
start_period: 60s
timeout: 10s
profiles:
- all
- clickhouse

volumes:
clickhouse-data:
15 changes: 15 additions & 0 deletions fastpath/fastpath.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# This is the default configration file used by Docker. Replace it or modify it to
# to set up docker
[DEFAULT]
# Collector hostnames, comma separated
collectors = localhost

# Database connection URI
db_uri = postgresql://readonly@localhost/metadb

# S3 access credentials
s3_access_key =
s3_secret_key =


clickhouse_url = clickhouse://default:default@clickhouse-server:9000
8 changes: 4 additions & 4 deletions fastpath/fastpath/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import time
import yaml

from pkg_resources import parse_version
from packaging.version import Version
import ujson # debdeps: python3-ujson

try:
Expand Down Expand Up @@ -1367,14 +1367,14 @@ def score_signal(msm: dict) -> dict:
scores["accuracy"] = 0.0
return scores

if parse_version(tv) <= parse_version("0.2.3") and start_time >= datetime(
if Version(tv) <= Version("0.2.3") and start_time >= datetime(
2023, 11, 7
):
# https://github.com/ooni/probe/issues/2627
scores["accuracy"] = 0.0
return scores

if parse_version(tv) < parse_version("0.2.2") and start_time >= datetime(
if Version(tv) < Version("0.2.2") and start_time >= datetime(
2022, 10, 19
):
scores["accuracy"] = 0.0
Expand All @@ -1384,7 +1384,7 @@ def score_signal(msm: dict) -> dict:
# engine_version < 3.17.2 and measurement_start_time > 2023-05-02
annot = g_or(msm, "annotations", {})
ev = g_or(annot, "engine_version", "0.0.0")
if parse_version(ev) < parse_version("3.17.2") and start_time >= datetime(
if Version(ev) < Version("3.17.2") and start_time >= datetime(
2023, 5, 2
):
scores["accuracy"] = 0.0
Expand Down
4 changes: 3 additions & 1 deletion fastpath/fastpath/localhttpfeeder.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def load_config(self):
assert key in self.cfg.settings
self.cfg.set(key, value)

self.cfg.set('reload', True)

def load(self):
return self.application

Expand All @@ -40,5 +42,5 @@ def handler_app(environ, start_response):
start_response("200 OK", [])
return [b""]

options = {"bind": f"127.0.0.1:{API_PORT}"}
options = {"bind": f"0.0.0.0:{API_PORT}"}
MsmtFeeder(handler_app, options).run()
30 changes: 30 additions & 0 deletions fastpath/fastpath/tests/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
services:
fastpath:
build:
context: ../../
dockerfile: Dockerfile
ports:
- "5000"
- "8472"
working_dir: /app
depends_on:
clickhouse-server:
condition: service_healthy

clickhouse-server:
image: clickhouse/clickhouse-server:latest
environment:
- CLICKHOUSE_USER=default
- CLICKHOUSE_PASSWORD=default
ports:
- "9000"
- "8123"
- "9009"
volumes:
- ../../clickhouse_init.sql:/docker-entrypoint-initdb.d/init.sql
healthcheck:
test: ["CMD", "clickhouse-client", "--query", "select 1;"]
interval: 30s
retries: 3
start_period: 60s
timeout: 10s
Loading
Loading