diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
new file mode 100644
index 0000000..eb50fa2
--- /dev/null
+++ b/.github/workflows/deploy.yml
@@ -0,0 +1,76 @@
+name: Deploy ingestor
+
+on:
+  push:
+    branches:
+      - version/aeolus
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+
+      - name: Configure aws credentials
+        uses: aws-actions/configure-aws-credentials@master
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_PROD }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_KEY_PROD }}
+          aws-region: ${{ secrets.AWS_REGION }}
+
+      - name: Get envionmental values
+        uses: aws-actions/aws-secretsmanager-get-secrets@v2
+        with:
+          secret-ids: |
+            AEOLUS, openaq-env/aeolus
+          name-transformation: uppercase
+          parse-json-secrets: true
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+
+      - name: Install CDK
+        run: |
+          npm install -g aws-cdk
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install Poetry
+        uses: snok/install-poetry@v1
+
+      - name: Deploy stack
+        env:
+          ENV: "aeolus"
+          PROJECT: "openaq"
+
+          ## deployment variables
+          # CDK_ACCOUNT: ${{ secrets.CDK_ACCOUNT }}
+          # CDK_REGION: ${{ secrets.CDK_REGION }}
+
+          VPC_ID: ${{ env.AEOLUS_VPC_ID }}
+
+          TOPIC_ARN: ${{ env.AEOLUS_FETCH_OBJECT_TOPIC_ARN }}
+
+          ## application variables
+          DATABASE_READ_USER: ${{ env.AEOLUS_DATABASE_READ_USER }}
+          DATABASE_READ_PASSWORD: ${{ env.AEOLUS_DATABASE_READ_PASSWORD }}
+          DATABASE_WRITE_USER: ${{ env.AEOLUS_DATABASE_WRITE_USER }}
+          DATABASE_WRITE_PASSWORD: ${{ env.AEOLUS_DATABASE_WRITE_PASSWORD }}
+          DATABASE_DB: ${{ env.AEOLUS_DATABASE_DB }}
+          DATABASE_HOST: ${{ env.AEOLUS_DATABASE_HOST }}
+          DATABASE_PORT: ${{ env.AEOLUS_DATABASE_PORT }}
+          FETCH_BUCKET: ${{ env.AEOLUS_FETCH_BUCKET }}
+          ETL_BUCKET: ${{ env.AEOLUS_FETCH_BUCKET }}
+          PAUSE_INGESTING: False
+
+
+        working-directory: ./cdk
+        run: |
+          poetry self add poetry-plugin-export
+          poetry install
+          cdk deploy openaq-ingest-aeolus --require-approval never
diff --git a/README.md b/README.md
index d90d3e4..a5a4068 100644
--- a/README.md
+++ b/README.md
@@ -2,3 +2,5 @@
 
 
 # Testing a realtime file
+
+# Testing files
diff --git a/benchmark.py b/benchmark.py
new file mode 100644
index 0000000..e58dbda
--- /dev/null
+++ b/benchmark.py
@@ -0,0 +1,95 @@
+import logging
+import os
+import sys
+import argparse
+from time import time
+import re
+
+logger = logging.getLogger(__name__)
+
+parser = argparse.ArgumentParser(
+    description="""
+Test benchmarks for ingestion
+    """)
+
+parser.add_argument(
+	'--name',
+	type=str,
+	required=False,
+	default="4xlarge",
+	help='Name to use for the test'
+	)
+parser.add_argument(
+	'--env',
+	type=str,
+	default='.env',
+	required=False,
+	help='The dot env file to use'
+	)
+parser.add_argument(
+	'--debug',
+	action="store_true",
+	help='Output at DEBUG level'
+	)
+args = parser.parse_args()
+
+if 'DOTENV' not in os.environ.keys() and args.env is not None:
+    os.environ['DOTENV'] = args.env
+
+if args.debug:
+    os.environ['LOG_LEVEL'] = 'DEBUG'
+
+from ingest.settings import settings
+from fake import config, get_locations, as_realtime
+from ingest.fetch import load_realtime
+
+logging.basicConfig(
+    format='[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s',
+    level=settings.LOG_LEVEL.upper(),
+    force=True,
+)
+
+f = open(f"benchmark_ingest_output_{args.name}.csv", "w")
+f.writelines("name,key,locations,inserted_nodes,updated_nodes,total_meas,inserted_meas,ingest_time,process_time,log_time,copy_time,load_process_time\n")
+n = 10
+locations = [50, 250, 1000]
+keys = []
+ii = 1
+
+## make a set of files
+for r in locations:
+	for i in range(n):
+		config(source=f"benchmark-test-{r}-{i+1}", gz=True)
+		l = get_locations(n=r)
+		key = as_realtime(l["locations"], l["latitude"], l["longitude"])
+		keys.append({ "key": key, "locations": len(l["locations"]) })
+		ii=+1
+
+
+## ingest each of the
+for i, k in enumerate(keys):
+	key = k["key"]
+	locations = k["locations"]
+	logger.info(f"Ingesting {i+1} of {len(keys)}: {key} with {locations} locations")
+
+	start_time = time()
+	copy_time, load_process_time, log_time, notice = load_realtime([
+		(-1, key, None)
+	  ])
+	m = re.findall('([a-z-]+): (.+?),', notice)
+
+	process_time = round(float(m[17][1]))
+	total_meas = int(m[0][1])
+	inserted_meas = int(m[9][1])
+	updated_nodes = int(m[8][1])
+	inserted_nodes = int(m[11][1])
+	ingest_time = round((time() - start_time)*1000)
+	f.writelines(f"'{args.name}','{key}',{locations},{inserted_nodes},{updated_nodes},{total_meas},{inserted_meas},{ingest_time},{process_time},{log_time},{copy_time},{load_process_time}\n")
+
+	logger.info(
+		"loaded realtime records, timer: %0.4f, process: %0.4f",
+		ingest_time, process_time
+		)
+
+
+f.close()
diff --git a/cdk/app.py b/cdk/app.py
index 8318018..e0b9a04 100644
--- a/cdk/app.py
+++ b/cdk/app.py
@@ -3,6 +3,7 @@
     Environment,
     Tags,
 )
+import os
 
 from lambda_ingest_stack import LambdaIngestStack
 
@@ -19,16 +20,23 @@
 
 app = aws_cdk.App()
 
+env = Environment(
+	account=os.environ['CDK_DEFAULT_ACCOUNT'],
+	region=os.environ['CDK_DEFAULT_REGION']
+	)
+
 ingest = LambdaIngestStack(
     app,
     f"openaq-ingest-{settings.ENV}",
     env_name=settings.ENV,
     lambda_env=lambda_env,
     fetch_bucket=settings.FETCH_BUCKET,
-    ingest_lambda_timeout=settings.INGEST_LAMBDA_TIMEOUT,
-    ingest_lambda_memory_size=settings.INGEST_LAMBDA_MEMORY_SIZE,
-    ingest_rate_minutes=settings.INGEST_RATE_MINUTES,
+	vpc_id=settings.VPC_ID,
+    lambda_timeout=settings.LAMBDA_TIMEOUT,
+    lambda_memory_size=settings.LAMBDA_MEMORY_SIZE,
+    rate_minutes=settings.RATE_MINUTES,
     topic_arn=settings.TOPIC_ARN,
+	env=env,
 )
 
 Tags.of(ingest).add("project", settings.PROJECT)
diff --git a/cdk/cdk.json b/cdk/cdk.json
index f1770f9..76af4a2 100644
--- a/cdk/cdk.json
+++ b/cdk/cdk.json
@@ -1,5 +1,5 @@
 {
-  "app": "python3.8 app.py",
+  "app": "poetry run python app.py",
   "context": {
     "aws-cdk:enableDiffNoFail": "true",
     "@aws-cdk/core:stackRelativeExports": "true",
diff --git a/cdk/config.py b/cdk/config.py
index ccae88d..03cb150 100644
--- a/cdk/config.py
+++ b/cdk/config.py
@@ -1,5 +1,8 @@
 from typing import List
-from pydantic import BaseSettings
+from pydantic_settings import (
+    BaseSettings,
+    SettingsConfigDict,
+    )
 from pathlib import Path
 from os import environ
 
@@ -8,18 +11,17 @@ class Settings(BaseSettings):
     FETCH_BUCKET: str
     ENV: str = "staging"
     PROJECT: str = "openaq"
-    INGEST_LAMBDA_TIMEOUT: int = 900
-    INGEST_LAMBDA_MEMORY_SIZE: int = 1536
-    INGEST_RATE_MINUTES: int = 15
+    LAMBDA_TIMEOUT: int = 900
+    LAMBDA_MEMORY_SIZE: int = 1536
+    RATE_MINUTES: int = 15
     LOG_LEVEL: str = 'INFO'
     TOPIC_ARN: str = None
+    VPC_ID: str = None
 
-    class Config:
-        parent = Path(__file__).resolve().parent.parent
-        if 'DOTENV' in environ:
-            env_file = Path.joinpath(parent, environ['DOTENV'])
-        else:
-            env_file = Path.joinpath(parent, ".env")
+
+    model_config = SettingsConfigDict(
+        extra="ignore", env_file=f"../{environ.get('DOTENV', '.env')}", env_file_encoding="utf-8"
+    )
 
 
 settings = Settings()
diff --git a/cdk/lambda_ingest_stack.py b/cdk/lambda_ingest_stack.py
index 3b2e380..b12d179 100644
--- a/cdk/lambda_ingest_stack.py
+++ b/cdk/lambda_ingest_stack.py
@@ -2,8 +2,10 @@
 from typing import Dict
 
 from aws_cdk import (
+	Environment,
     aws_lambda,
     aws_s3,
+	aws_ec2,
     Stack,
     Duration,
     aws_events,
@@ -24,18 +26,23 @@ def __init__(
         self,
         scope: Construct,
         id: str,
+        env: Environment,
         env_name: str,
         lambda_env: Dict,
         fetch_bucket: str,
-        ingest_lambda_timeout: int,
-        ingest_lambda_memory_size: int,
-        ingest_rate_minutes: int = 15,
+        lambda_timeout: int,
+        lambda_memory_size: int,
+        rate_minutes: int = 15,
         topic_arn: str = None,
+        vpc_id: str = None,
         **kwargs,
     ) -> None:
         """Lambda plus cronjob to ingest metadata,
         realtime and pipeline data"""
-        super().__init__(scope, id, *kwargs)
+        super().__init__(scope, id, env=env,*kwargs)
+
+        if vpc_id is not None:
+            vpc_id = aws_ec2.Vpc.from_lookup(self, f"{id}-vpc", vpc_id=vpc_id)
 
         ingest_function = aws_lambda.Function(
             self,
@@ -58,11 +65,12 @@ def __init__(
                 ],
             ),
             handler="ingest.handler.handler",
-            runtime=aws_lambda.Runtime.PYTHON_3_8,
+			vpc=vpc_id,
+            runtime=aws_lambda.Runtime.PYTHON_3_12,
             allow_public_subnet=True,
-            memory_size=ingest_lambda_memory_size,
+            memory_size=lambda_memory_size,
             environment=stringify_settings(lambda_env),
-            timeout=Duration.seconds(ingest_lambda_timeout),
+            timeout=Duration.seconds(lambda_timeout),
             layers=[
                 create_dependencies_layer(
                     self,
@@ -81,12 +89,12 @@ def __init__(
 
         # Set how often the ingester will run
         # If 0 the ingester will not run automatically
-        if ingest_rate_minutes > 0:
+        if rate_minutes > 0:
             aws_events.Rule(
                 self,
                 f"{id}-ingest-event-rule",
                 schedule=aws_events.Schedule.cron(
-                    minute=f"0/{ingest_rate_minutes}"
+                    minute=f"0/{rate_minutes}"
                 ),
                 targets=[
                     aws_events_targets.LambdaFunction(ingest_function),
diff --git a/cdk/requirements.txt b/cdk/requirements.txt
deleted file mode 100644
index f44b370..0000000
--- a/cdk/requirements.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-attrs==21.4.0
-aws-cdk-lib==2.3.0
-aws-cdk.aws-apigatewayv2-alpha==2.3.0a0
-aws-cdk.aws-apigatewayv2-integrations-alpha==2.3.0a0
-cattrs==22.1.0
-constructs==10.1.16
-exceptiongroup==1.0.0rc7
-jsii==1.59.0
-publication==0.0.3
-pydantic==1.9.1
-python-dateutil==2.8.2
-python-dotenv==0.20.0
-six==1.16.0
-typing_extensions==4.2.0
diff --git a/cdk/utils.py b/cdk/utils.py
index 42bea63..1e7cec4 100644
--- a/cdk/utils.py
+++ b/cdk/utils.py
@@ -19,20 +19,22 @@ def create_dependencies_layer(
         function_name: str,
         requirements_path: Path
 ) -> aws_lambda.LayerVersion:
-    requirements_file = str(requirements_path.resolve())
+    #requirements_file = str(requirements_path.resolve())
     output_dir = f'../.build/{function_name}'
     layer_id = f'openaq-{function_name}-{env_name}-dependencies'
 
-    if not environ.get('SKIP_PIP'):
-        print(f'Building {layer_id} from {requirements_file} into {output_dir}')
+    if not environ.get('SKIP_BUILD'):
+        print(f'Building {layer_id} into {output_dir}')
         subprocess.run(
-            f"""python3.8 -m pip install -qq -r {requirements_file} \
+            f"""
+             poetry export --without=cdk -o requirements.txt --without-hashes && \
+             poetry run python -m pip install -qq -r requirements.txt \
             -t {output_dir}/python && \
             cd {output_dir}/python && \
             find . -type f -name '*.pyc' | \
               while read f; do n=$(echo $f | \
               sed 's/__pycache__\///' | \
-              sed 's/.cpython-[2-3] [0-9]//'); \
+              sed 's/.cpython-[2-3][0-9]//'); \
               cp $f $n; \
               done \
             && find . -type d -a -name '__pycache__' -print0 | xargs -0 rm -rf \
@@ -47,5 +49,5 @@ def create_dependencies_layer(
         self,
         layer_id,
         code=layer_code,
-        compatible_runtimes=[aws_lambda.Runtime.PYTHON_3_8]
+        compatible_runtimes=[aws_lambda.Runtime.PYTHON_3_12]
     )
diff --git a/check.py b/check.py
index 22976cf..71020fc 100644
--- a/check.py
+++ b/check.py
@@ -1,9 +1,11 @@
 import argparse
 import logging
 import os
-import json
+import sys
+import orjson
+import psycopg2
 
-logger = logging.getLogger(__name__)
+logger = logging.getLogger('check.py')
 
 #os.chdir('/home/christian/git/caparker/openaq-ingestor/ingest')
 #print(os.getcwd())
@@ -16,6 +18,12 @@
     """)
 parser.add_argument('--id', type=int, required=False,
                     help='The fetchlogs_id value')
+parser.add_argument('--file', type=str, required=False,
+                    help='A local file to load')
+parser.add_argument('--batch', type=str, required=False,
+                    help='The batch id value. Loads files based on batch uuid.')
+parser.add_argument('--pattern', type=str, required=False,
+                    help='A reqex to match keys for loading')
 parser.add_argument('--env', type=str, required=False,
                     help='The dot env file to use')
 parser.add_argument('--profile', type=str, required=False,
@@ -24,11 +32,11 @@
                     help="""Either the number of entries to list
                     (sorted by date) or the number of days to go
                     back if using the summary or rejects arguments""")
-parser.add_argument('--pipeline', type=int, required=False, default=1,
+parser.add_argument('--pipeline', type=int, required=False, default=0,
                     help="""The number of pipeline files to load at a time""")
-parser.add_argument('--metadata', type=int, required=False, default=1,
+parser.add_argument('--metadata', type=int, required=False, default=0,
                     help="""The number of metadata files to load at a time""")
-parser.add_argument('--realtime', type=int, required=False, default=1,
+parser.add_argument('--realtime', type=int, required=False, default=0,
                     help="""The number of realtime files to load at a time""")
 parser.add_argument('--fix', action="store_true",
                     help='Automatically attempt to fix the problem')
@@ -48,6 +56,8 @@
                     help='Show list of errors')
 parser.add_argument('--resubmit', action="store_true",
                     help='Mark the fetchlogs file for resubmittal')
+parser.add_argument('--keep', action="store_true",
+                    help='Do not use TEMP tables for the ingest staging tables')
 args = parser.parse_args()
 
 if 'DOTENV' not in os.environ.keys() and args.env is not None:
@@ -62,32 +72,42 @@
 if args.debug:
     os.environ['LOG_LEVEL'] = 'DEBUG'
 
+if args.keep:
+    os.environ['USE_TEMP_TABLES'] = 'False'
+
 from botocore.exceptions import ClientError
-from ingest.handler import cronhandler, logger
+from ingest.handler import cronhandler
 from ingest.settings import settings
 
 from ingest.lcs import (
-    load_metadata_db,
-    load_measurements_db,
-    load_measurements_file,
+    load_metadata,
+    load_metadata_batch,
+)
+
+from ingest.lcsV2 import (
     load_measurements,
-    get_measurements,
+    load_measurements_batch,
 )
 
 from ingest.fetch import (
     load_realtime,
+    create_staging_table,
     parse_json,
 )
 
 from ingest.utils import (
+	load_fetchlogs,
     load_errors_list,
     load_errors_summary,
     load_rejects_summary,
+    get_data,
     get_object,
     put_object,
     get_logs_from_ids,
     get_logs_from_pattern,
     mark_success,
+    StringIteratorIO,
+    deconstruct_path,
 )
 
 
@@ -107,18 +127,19 @@ def check_realtime_key(key: str, fix: bool = False):
     n = len(lines)
     errors = []
     for jdx, line in enumerate(lines):
-        try:
-            # first just try and load it
-            obj = json.loads(line)
-        except Exception as e:
-            errors.append(jdx)
-            print(f"*** Loading error on line #{jdx} (of {n}): {e}\n{line}")
-        try:
-            # then we can try to parse it
-            parse_json(obj)
-        except Exception as e:
-            errors.append(jdx)
-            print(f"*** Parsing error on line #{jdx} (of {n}): {e}\n{line}")
+        if len(line) > 0:
+            try:
+                # first just try and load it
+                obj = orjson.loads(line)
+            except Exception as e:
+                errors.append(jdx)
+                print(f"*** Loading error on line #{jdx} (of {n}): {e}\n{line}")
+            try:
+                # then we can try to parse it
+                parse_json(obj)
+            except Exception as e:
+                errors.append(jdx)
+                print(f"*** Parsing error on line #{jdx} (of {n}): {e}\n{line}")
 
     if len(errors) > 0 and fix:
         # remove the bad rows and then replace the file
@@ -135,6 +156,15 @@ def check_realtime_key(key: str, fix: bool = False):
         mark_success(key=key, reset=True)
 
 
+if args.file is not None:
+    # check if the files exists
+    # is it a realtime file or a lcs file?
+    # upload the file
+    load_realtime([
+        (-1, args.file, None)
+    ])
+    sys.exit()
+
 # If we have passed an id than we check that
 if args.id is not None:
     # get the details for that id
@@ -142,27 +172,65 @@ def check_realtime_key(key: str, fix: bool = False):
     # get just the keys
     keys = [log[1] for log in logs]
     # loop through and check each
+    logger.info(f"Downloading {len(keys)} files")
     for idx, key in enumerate(keys):
+        if args.download:
+            # we may be using the new source pat
+            p = deconstruct_path(key)
+            download_path = f'~/Downloads/{p["bucket"]}/{p["key"]}';
+            logger.info(f'downloading to {download_path}')
+            txt = get_object(**p)
+            fpath = os.path.expanduser(download_path)
+            os.makedirs(os.path.dirname(fpath), exist_ok=True)
+            with open(fpath.replace('.gz', ''), 'w') as f:
+                f.write(txt)
         # if we are resubmiting we dont care
         # what type of file it is
-        if args.resubmit:
+        elif args.resubmit:
             mark_success(key, reset=True, message='resubmitting')
         # figure out what type of file it is
         elif 'realtime' in key:
             if args.load:
-                load_realtime([key])
+                load_realtime([
+                    (args.id, key, None)
+                ])
             else:
                 check_realtime_key(key, args.fix)
+        elif 'stations' in key:
+            load_metadata([
+                {"id": args.id, "Key": key, "LastModified": None}
+            ])
         else:
-            print(key)
+            load_measurements([
+                (args.id, key, None)
+            ])
+
+elif args.batch is not None:
+    # load_measurements_batch(args.batch)
+    load_metadata_batch(args.batch)
+
+elif args.pattern is not None:
+	keys = load_fetchlogs(pattern=args.pattern, limit=25, ascending=True)
+    # loop through and check each
+	for row in keys:
+		id = row[0]
+		key = row[1]
+		last = row[2]
+		logger.debug(f"{key}: {id}")
+		if args.load:
+			if 'realtime' in key:
+				load_realtime([
+                    (id, key, last)
+							  ])
+			elif 'stations' in key:
+				load_metadata([
+					{"id": id, "Key": key, "LastModified": last}
+				])
+			else:
+				load_measurements([
+					(id, key, last)
+				])
 
-        if args.download:
-            print(f'downloading: {key}')
-            txt = get_object(key)
-            fpath = os.path.expanduser(f'~/{key}')
-            os.makedirs(os.path.dirname(fpath), exist_ok=True)
-            with open(fpath.replace('.gz',''), 'w') as f:
-                f.write(txt)
 
 
 # Otherwise if we set the summary flag return a daily summary of errors
diff --git a/ingest/etl_process_measurements.sql b/ingest/etl_process_measurements.sql
new file mode 100644
index 0000000..7c7a44a
--- /dev/null
+++ b/ingest/etl_process_measurements.sql
@@ -0,0 +1,503 @@
+-- lcs_meas_ingest
+DO $$
+DECLARE
+__process_start timestamptz := clock_timestamp();
+__total_measurements int;
+__inserted_measurements int;
+__rejected_measurements int := 0;
+__rejected_nodes int := 0;
+__total_nodes int := 0;
+__updated_nodes int := 0;
+__inserted_nodes int := 0;
+__exported_days int;
+__start_datetime timestamptz;
+__end_datetime timestamptz;
+__inserted_start_datetime timestamptz;
+__inserted_end_datetime timestamptz;
+__process_time_ms int;
+__insert_time_ms int;
+__cache_time_ms int;
+__error_context text;
+__ingest_method text := 'lcs';
+BEGIN
+
+
+DELETE
+FROM staging_measurements
+WHERE ingest_id IS NULL
+OR datetime is NULL
+OR value IS NULL;
+
+--DELETE
+--FROM staging_measurements
+--WHERE datetime < '2018-01-01'::timestamptz
+--OR datetime>now();
+
+DELETE
+FROM rejects
+WHERE fetchlogs_id IN (SELECT fetchlogs_id FROM staging_measurements)
+AND tbl ~* '^meas';
+
+
+SELECT COUNT(1)
+, MIN(datetime)
+, MAX(datetime)
+INTO __total_measurements
+, __start_datetime
+, __end_datetime
+FROM staging_measurements;
+
+
+-- 	The ranking is to deal with the current possibility
+-- that duplicate sensors with the same ingest/source id are created
+	-- this is a short term fix
+	-- a long term fix would not allow duplicate source_id's
+WITH staged_sensors AS (
+  -- this first part significantly speeds it up on slow machines
+  SELECT DISTINCT ingest_id
+  FROM staging_measurements
+), ranked_sensors AS (
+  SELECT s.sensors_id
+	, s.source_id
+	, RANK() OVER (PARTITION BY s.source_id ORDER BY added_on ASC) as rnk
+	FROM sensors s
+	JOIN staged_sensors m ON (s.source_id = m.ingest_id)
+), active_sensors AS (
+	SELECT source_id
+	, sensors_id
+	FROM ranked_sensors
+	WHERE rnk = 1)
+	UPDATE staging_measurements
+	SET sensors_id=s.sensors_id
+	FROM active_sensors s
+	WHERE s.source_id=ingest_id;
+
+
+-- Now we have to fill in any missing information
+-- first add the nodes and systems that dont exist
+-- add just the bare minimum amount of data to the system
+-- we assume that the node information will be added later
+WITH nodes AS (
+INSERT INTO sensor_nodes (
+  source_name
+, site_name
+, source_id
+, metadata)
+SELECT source_name
+, source_name
+, source_id
+, jsonb_build_object('fetchlogs_id', MIN(fetchlogs_id))
+FROM staging_measurements
+WHERE sensors_id IS NULL
+GROUP BY 1,2,3
+ON CONFLICT (source_name, source_id) DO UPDATE
+SET source_id = EXCLUDED.source_id
+, metadata = EXCLUDED.metadata||COALESCE(sensor_nodes.metadata, '{}'::jsonb)
+RETURNING sensor_nodes_id, source_id)
+INSERT INTO sensor_systems (
+  sensor_nodes_id
+, source_id)
+SELECT sensor_nodes_id
+, source_id
+FROM nodes
+ON CONFLICT DO NOTHING;
+
+-- now create a sensor for each
+-- this method depends on us having a match for the parameter
+WITH sen AS (
+  SELECT ingest_id
+  , source_name
+  , source_id
+  , measurand as parameter
+  FROM staging_measurements
+  WHERE sensors_id IS NULL
+  GROUP BY 1,2,3,4
+), inserts AS (
+INSERT INTO sensors (sensor_systems_id, measurands_id, source_id)
+SELECT sy.sensor_systems_id
+, m.measurands_id
+, ingest_id
+FROM sen s
+JOIN measurands_map_view m ON (s.parameter = m.key)
+JOIN sensor_nodes n ON (s.source_name = n.source_name AND s.source_id = n.source_id)
+JOIN sensor_systems sy ON (sy.sensor_nodes_id = n.sensor_nodes_id AND s.source_id = sy.source_id)
+ON CONFLICT DO NOTHING
+RETURNING sensor_systems_id)
+SELECT COUNT(DISTINCT sensor_systems_id) INTO __inserted_nodes
+FROM inserts;
+
+-- try again to find the sensors
+UPDATE staging_measurements
+SET sensors_id=s.sensors_id
+FROM sensors s
+WHERE s.source_id=ingest_id
+AND staging_measurements.sensors_id IS NULL;
+
+
+SELECT COUNT(DISTINCT sensors_id)
+INTO __total_nodes
+FROM staging_measurements;
+
+
+__process_time_ms := 1000 * (extract(epoch FROM clock_timestamp() - __process_start));
+
+-- reject any missing. Most likely due to issues
+-- with the measurand
+WITH r AS (
+INSERT INTO rejects (t,tbl,r,fetchlogs_id)
+SELECT
+    current_timestamp
+    , 'meas-missing-sensors-id'
+    , to_jsonb(staging_measurements)
+    , fetchlogs_id
+FROM staging_measurements
+WHERE sensors_id IS NULL
+RETURNING 1)
+SELECT COUNT(1) INTO __rejected_measurements
+FROM r;
+
+-- restart the clock to measure just inserts
+__process_start := clock_timestamp();
+
+WITH inserts AS (
+INSERT INTO measurements (
+    sensors_id,
+    datetime,
+    value,
+    lon,
+    lat
+) SELECT
+    --DISTINCT
+    sensors_id,
+    datetime,
+    value,
+    lon,
+    lat
+FROM staging_measurements
+WHERE sensors_id IS NOT NULL
+ON CONFLICT DO NOTHING
+RETURNING sensors_id, datetime, value, lat, lon
+), inserted as (
+   INSERT INTO staging_inserted_measurements (sensors_id, datetime, value, lat, lon)
+   SELECT sensors_id
+   , datetime
+   , value
+   , lat
+   , lon
+   FROM inserts
+   RETURNING sensors_id, datetime
+)
+SELECT MIN(datetime)
+, MAX(datetime)
+, COUNT(1)
+INTO __inserted_start_datetime
+, __inserted_end_datetime
+, __inserted_measurements
+FROM inserted;
+
+__insert_time_ms := 1000 * (extract(epoch FROM clock_timestamp() - __process_start));
+
+-- mark the fetchlogs as done
+WITH inserted AS (
+  SELECT m.fetchlogs_id
+  , COUNT(m.*) as n_records
+  , COUNT(t.*) as n_inserted
+  , MIN(m.datetime) as fr_datetime
+  , MAX(m.datetime) as lr_datetime
+  , MIN(t.datetime) as fi_datetime
+  , MAX(t.datetime) as li_datetime
+  FROM staging_measurements m
+  LEFT JOIN staging_inserted_measurements t ON (t.sensors_id = m.sensors_id AND t.datetime = m.datetime)
+  GROUP BY m.fetchlogs_id)
+UPDATE fetchlogs
+SET completed_datetime = CURRENT_TIMESTAMP
+, inserted = COALESCE(n_inserted, 0)
+, records = COALESCE(n_records, 0)
+, first_recorded_datetime = fr_datetime
+, last_recorded_datetime = lr_datetime
+, first_inserted_datetime = fi_datetime
+, last_inserted_datetime = li_datetime
+FROM inserted
+WHERE inserted.fetchlogs_id = fetchlogs.fetchlogs_id;
+
+-- track the time required to update cache tables
+__process_start := clock_timestamp();
+
+-- -- Now we can use those staging_inserted_measurements to update the cache tables
+-- INSERT INTO sensors_latest (
+--   sensors_id
+--   , datetime
+--   , value
+--   , lat
+--   , lon
+--   )
+-- ---- identify the row that has the latest value
+-- WITH numbered AS (
+--   SELECT sensors_id
+--    , datetime
+--    , value
+--    , lat
+--    , lon
+--    , row_number() OVER (PARTITION BY sensors_id ORDER BY datetime DESC) as rn
+--   FROM staging_inserted_measurements
+-- ), latest AS (
+-- ---- only insert those rows
+--   SELECT sensors_id
+--    , datetime
+--    , value
+--    , lat
+--    , lon
+--   FROM numbered
+--   WHERE rn = 1
+-- )
+-- SELECT l.sensors_id
+-- , l.datetime
+-- , l.value
+-- , l.lat
+-- , l.lon
+-- FROM latest l
+-- LEFT JOIN sensors_latest sl ON (l.sensors_id = sl.sensors_id)
+-- WHERE sl.sensors_id IS NULL
+-- OR l.datetime > sl.datetime
+-- ON CONFLICT (sensors_id) DO UPDATE
+-- SET datetime = EXCLUDED.datetime
+-- , value = EXCLUDED.value
+-- , lat = EXCLUDED.lat
+-- , lon = EXCLUDED.lon
+-- , modified_on = now()
+-- --, fetchlogs_id = EXCLUDED.fetchlogs_id
+-- ;
+-- update the exceedances
+INSERT INTO sensor_exceedances (sensors_id, threshold_value, datetime_latest)
+  SELECT
+  m.sensors_id
+  , t.value
+  , MAX(datetime)
+  FROM staging_inserted_measurements m
+  JOIN sensors s ON (m.sensors_id = s.sensors_id)
+  JOIN thresholds t ON (s.measurands_id = t.measurands_id)
+  AND m.value > t.value
+  GROUP BY 1, 2
+  ON CONFLICT (sensors_id, threshold_value) DO UPDATE SET
+  datetime_latest = GREATEST(sensor_exceedances.datetime_latest, EXCLUDED.datetime_latest)
+  , updated_on = now();
+
+
+INSERT INTO sensors_rollup (
+  sensors_id
+  , datetime_first
+  , datetime_last
+  , value_latest
+  , value_count
+  , value_avg
+  , value_sd
+  , value_min
+  , value_max
+  , geom_latest
+  )
+---- identify the row that has the latest value
+WITH numbered AS (
+  SELECT sensors_id
+   , datetime
+   , value
+   , lat
+   , lon
+   , sum(1) OVER (PARTITION BY sensors_id) as value_count
+   , min(datetime) OVER (PARTITION BY sensors_id) as datetime_min
+   , avg(value) OVER (PARTITION BY sensors_id) as value_avg
+   , stddev(value) OVER (PARTITION BY sensors_id) as value_sd
+   , row_number() OVER (PARTITION BY sensors_id ORDER BY datetime DESC) as rn
+  FROM staging_inserted_measurements
+), latest AS (
+---- only insert those rows
+  SELECT sensors_id
+   , datetime
+   , value
+   , value_count
+   , value_avg
+   , value_sd
+   , datetime_min
+   , lat
+   , lon
+  FROM numbered
+  WHERE rn = 1
+)
+SELECT l.sensors_id
+, l.datetime_min -- first
+, l.datetime -- last
+, l.value -- last value
+, l.value_count
+, l.value_avg
+, COALESCE(l.value_sd, 0)
+, l.value -- min
+, l.value -- max
+, public.pt3857(lon, lat)
+FROM latest l
+LEFT JOIN sensors_rollup sr ON (l.sensors_id = sr.sensors_id)
+WHERE sr.sensors_id IS NULL
+OR l.datetime > sr.datetime_last
+OR l.datetime_min < sr.datetime_first
+ON CONFLICT (sensors_id) DO UPDATE
+SET datetime_last = GREATEST(sensors_rollup.datetime_last, EXCLUDED.datetime_last)
+, value_latest = CASE WHEN EXCLUDED.datetime_last > sensors_rollup.datetime_last
+                 THEN EXCLUDED.value_latest
+                 ELSE sensors_rollup.value_latest
+                 END
+, geom_latest = CASE WHEN EXCLUDED.datetime_last > sensors_rollup.datetime_last
+                 THEN EXCLUDED.geom_latest
+                 ELSE sensors_rollup.geom_latest
+                 END
+, value_count = sensors_rollup.value_count + EXCLUDED.value_count
+, value_min = LEAST(sensors_rollup.value_min, EXCLUDED.value_latest)
+, value_max = GREATEST(sensors_rollup.value_max, EXCLUDED.value_latest)
+, datetime_first = LEAST(sensors_rollup.datetime_first, EXCLUDED.datetime_first)
+, modified_on = now()
+--, fetchlogs_id = EXCLUDED.fetchlogs_id
+;
+
+
+-- Update the table that will help to track hourly rollups
+-- this is a replacement to the hourly stats table
+  WITH inserted_hours AS (
+    -- first we group things, adding an hour to make it time-ending after truncating
+    SELECT datetime + '1h'::interval as datetime
+    , utc_offset(datetime + '1h'::interval, tz.tzid) as tz_offset
+    FROM staging_inserted_measurements m
+    JOIN sensors s ON (s.sensors_id = m.sensors_id)
+    JOIN sensor_systems sy ON (s.sensor_systems_id = sy.sensor_systems_id)
+    JOIN sensor_nodes sn ON (sy.sensor_nodes_id = sn.sensor_nodes_id)
+    JOIN timezones tz ON (sn.timezones_id = tz.timezones_id)
+    GROUP BY 1, 2
+   )
+    INSERT INTO hourly_data_queue (datetime, tz_offset)
+    SELECT as_utc_hour(datetime, tz_offset), tz_offset
+    FROM inserted_hours
+    GROUP BY 1, 2
+    ON CONFLICT (datetime, tz_offset) DO UPDATE
+    SET modified_on = now();
+
+
+--Update the export queue/logs to export these records
+--wrap it in a block just in case the database does not have this module installed
+--we subtract the second because the data is assumed to be time ending
+WITH e AS (
+INSERT INTO open_data_export_logs (sensor_nodes_id, day, records, measurands, modified_on)
+SELECT sn.sensor_nodes_id
+, ((m.datetime - '1sec'::interval) AT TIME ZONE (COALESCE(sn.metadata->>'timezone', 'UTC'))::text)::date as day
+, COUNT(1)
+, COUNT(DISTINCT p.measurands_id)
+, MAX(now())
+FROM staging_inserted_measurements m -- meas m
+JOIN sensors s ON (m.sensors_id = s.sensors_id)
+JOIN measurands p ON (s.measurands_id = p.measurands_id)
+JOIN sensor_systems ss ON (s.sensor_systems_id = ss.sensor_systems_id)
+JOIN sensor_nodes sn ON (ss.sensor_nodes_id = sn.sensor_nodes_id)
+GROUP BY sn.sensor_nodes_id
+, ((m.datetime - '1sec'::interval) AT TIME ZONE (COALESCE(sn.metadata->>'timezone', 'UTC'))::text)::date
+ON CONFLICT (sensor_nodes_id, day) DO UPDATE
+SET records = EXCLUDED.records
+, measurands = EXCLUDED.measurands
+, modified_on = EXCLUDED.modified_on
+RETURNING 1)
+SELECT COUNT(1) INTO __exported_days
+FROM e;
+
+
+__cache_time_ms := 1000 * (extract(epoch FROM clock_timestamp() - __process_start));
+
+INSERT INTO ingest_stats (
+    ingest_method
+    -- total
+  , total_measurements_processed
+  , total_measurements_inserted
+  , total_measurements_rejected
+  , total_nodes_processed
+  , total_nodes_inserted
+  , total_nodes_updated
+  , total_nodes_rejected
+  -- total times
+  , total_process_time_ms
+  , total_insert_time_ms
+  , total_cache_time_ms
+  -- latest
+  , latest_measurements_processed
+  , latest_measurements_inserted
+  , latest_measurements_rejected
+  , latest_nodes_processed
+  , latest_nodes_inserted
+  , latest_nodes_updated
+  , latest_nodes_rejected
+  -- times
+  , latest_process_time_ms
+  , latest_insert_time_ms
+  , latest_cache_time_ms
+  ) VALUES (
+  -- totals
+    __ingest_method
+  , __total_measurements
+  , __inserted_measurements
+  , __rejected_measurements
+  , __total_nodes
+  , __inserted_nodes
+  , __updated_nodes
+  , __rejected_nodes
+  -- times
+  , __process_time_ms
+  , __insert_time_ms
+  , __cache_time_ms
+  -- latest
+  , __total_measurements
+  , __inserted_measurements
+  , __rejected_measurements
+  , __total_nodes
+  , __inserted_nodes
+  , __updated_nodes
+  , __rejected_nodes
+  -- times
+  , __process_time_ms
+  , __insert_time_ms
+  , __cache_time_ms
+) ON CONFLICT (ingest_method) DO UPDATE SET
+  -- totals
+   total_measurements_processed = ingest_stats.total_measurements_processed + EXCLUDED.total_measurements_processed
+ , total_measurements_inserted = ingest_stats.total_measurements_inserted + EXCLUDED.total_measurements_inserted
+ , total_measurements_rejected = ingest_stats.total_measurements_rejected + EXCLUDED.total_measurements_rejected
+ , total_nodes_processed = ingest_stats.total_nodes_processed + EXCLUDED.total_nodes_processed
+ , total_nodes_inserted = ingest_stats.total_nodes_inserted + EXCLUDED.total_nodes_inserted
+ , total_nodes_updated = ingest_stats.total_nodes_updated + EXCLUDED.total_nodes_updated
+ , total_nodes_rejected = ingest_stats.total_nodes_rejected + EXCLUDED.total_nodes_rejected
+ , total_process_time_ms = ingest_stats.total_process_time_ms + EXCLUDED.total_process_time_ms
+ , total_insert_time_ms = ingest_stats.total_insert_time_ms + EXCLUDED.total_insert_time_ms
+ , total_cache_time_ms = ingest_stats.total_cache_time_ms + EXCLUDED.total_cache_time_ms
+ -- latest
+ , latest_measurements_processed = EXCLUDED.latest_measurements_processed
+ , latest_measurements_inserted = EXCLUDED.latest_measurements_inserted
+ , latest_measurements_rejected = EXCLUDED.latest_measurements_rejected
+ , latest_nodes_processed = EXCLUDED.latest_nodes_processed
+ , latest_nodes_inserted = EXCLUDED.latest_nodes_inserted
+ , latest_nodes_updated = EXCLUDED.latest_nodes_updated
+ , latest_nodes_rejected = EXCLUDED.latest_nodes_rejected
+ -- times
+ , latest_process_time_ms = EXCLUDED.latest_process_time_ms
+ , latest_insert_time_ms = EXCLUDED.latest_insert_time_ms
+ , latest_cache_time_ms = EXCLUDED.latest_cache_time_ms
+ , ingest_count = ingest_stats.ingest_count + 1
+ , ingested_on = EXCLUDED.ingested_on;
+
+
+RAISE NOTICE 'inserted-measurements: %, inserted-from: %, inserted-to: %, rejected-measurements: %, exported-sensor-days: %, process-time-ms: %, insert-time-ms: %, cache-time-ms: %, source: lcs'
+      , __inserted_measurements
+      , __inserted_start_datetime
+      , __inserted_end_datetime
+      , __rejected_measurements
+      , __exported_days
+      , __process_time_ms
+      , __insert_time_ms
+      , __cache_time_ms;
+
+
+EXCEPTION WHEN OTHERS THEN
+ GET STACKED DIAGNOSTICS __error_context = PG_EXCEPTION_CONTEXT;
+ RAISE NOTICE 'Failed to ingest measurements: %, %', SQLERRM, __error_context;
+
+END $$;
diff --git a/ingest/etl_process_nodes.sql b/ingest/etl_process_nodes.sql
new file mode 100644
index 0000000..5b78e6c
--- /dev/null
+++ b/ingest/etl_process_nodes.sql
@@ -0,0 +1,420 @@
+-- lcs_ingest_full
+DO $$
+DECLARE
+__process_start timestamptz := clock_timestamp();
+__inserted_nodes int;
+__inserted_sensors int;
+__rejected_nodes int;
+__rejected_systems int;
+__rejected_sensors int;
+__rejected_measurands int;
+
+BEGIN
+
+--------------------------
+-- lcs_ingest_nodes.sql --
+--------------------------
+
+DELETE
+FROM staging_sensornodes
+WHERE staging_sensornodes.ingest_id IS NULL;
+
+DELETE
+FROM staging_sensorsystems
+WHERE staging_sensorsystems.ingest_id IS NULL
+OR ingest_sensor_nodes_id IS NULL;
+
+DELETE
+FROM staging_sensors
+WHERE staging_sensors.ingest_id IS NULL
+OR ingest_sensor_systems_id IS NULL;
+
+UPDATE staging_sensors
+SET units  = 'µg/m³'
+WHERE units IN ('µg/m��','��g/m³', 'ug/m3');
+
+
+
+-- match the locations to existing nodes using the source_name/id combo
+UPDATE staging_sensornodes
+SET sensor_nodes_id = s.sensor_nodes_id
+, timezones_id = s.timezones_id
+, countries_id = s.countries_id
+, is_new = false
+, is_moved = st_astext(s.geom) != st_astext(staging_sensornodes.geom)
+FROM sensor_nodes s
+WHERE s.source_name = staging_sensornodes.source_name
+AND s.source_id = staging_sensornodes.source_id
+AND ( staging_sensornodes.matching_method IS NULL
+ OR staging_sensornodes.matching_method = 'ingest-id');
+
+
+-- now update them using the source + spatial method
+UPDATE staging_sensornodes
+SET sensor_nodes_id = s.sensor_nodes_id
+, timezones_id = s.timezones_id
+, countries_id = s.countries_id
+, is_new = false
+, is_moved = st_astext(s.geom) != st_astext(staging_sensornodes.geom)
+FROM sensor_nodes s
+WHERE s.source_name = staging_sensornodes.source_name
+AND st_distance(staging_sensornodes.geom, s.geom) < 0.00001 -- about 1.11 meters difference
+AND staging_sensornodes.matching_method = 'source-spatial';
+
+
+-- only update the nodes where the geom has changed
+-- the geom queries are really slow so we dont want to be doing that all the time
+-- ~18 locations per second
+UPDATE staging_sensornodes SET
+  timezones_id = get_timezones_id(geom)
+, countries_id = get_countries_id(geom)
+WHERE is_new
+  OR is_moved
+  OR timezones_id IS NULL
+  OR countries_id IS NULL;
+
+
+-- we are going to update the source_id  where we are matching via geometry
+-- for ingest-id matches this should not matter.
+UPDATE sensor_nodes
+SET source_id = COALESCE(s.source_id, sensor_nodes.source_id)
+  , geom = COALESCE(s.geom, sensor_nodes.geom)
+  , site_name = COALESCE(s.site_name, sensor_nodes.site_name)
+  , timezones_id = COALESCE(s.timezones_id, sensor_nodes.timezones_id)
+  , countries_id = COALESCE(s.countries_id, sensor_nodes.countries_id)
+  , ismobile = COALESCE(s.ismobile, sensor_nodes.ismobile)
+  , metadata = COALESCE(s.metadata, '{}') || COALESCE(sensor_nodes.metadata, '{}')
+  , modified_on = now()
+FROM staging_sensornodes s
+WHERE sensor_nodes.sensor_nodes_id = s.sensor_nodes_id;
+
+
+-- And now we insert those into the sensor nodes table
+WITH inserts AS (
+INSERT INTO sensor_nodes (
+  site_name
+, source_name
+, ismobile
+, geom
+, metadata
+, source_id
+, timezones_id
+, providers_id
+, countries_id
+)
+SELECT site_name
+, source_name
+, ismobile
+, geom
+, metadata
+, source_id
+, timezones_id
+-- default to the unknown provider
+-- just to make sure we have one set
+, COALESCE(get_providers_id(source_name), 1)
+, countries_id
+FROM staging_sensornodes
+WHERE sensor_nodes_id IS NULL
+ON CONFLICT (source_name, source_id) DO UPDATE
+SET
+    site_name=coalesce(EXCLUDED.site_name,sensor_nodes.site_name)
+    , source_id=COALESCE(EXCLUDED.source_id, sensor_nodes.source_id)
+    , ismobile=coalesce(EXCLUDED.ismobile,sensor_nodes.ismobile)
+    , geom=coalesce(EXCLUDED.geom,sensor_nodes.geom)
+    , metadata=COALESCE(sensor_nodes.metadata, '{}') || COALESCE(EXCLUDED.metadata, '{}')
+    , timezones_id = COALESCE(EXCLUDED.timezones_id, sensor_nodes.timezones_id)
+    , providers_id = COALESCE(EXCLUDED.providers_id, sensor_nodes.providers_id)
+    , modified_on = now()
+RETURNING 1)
+SELECT COUNT(1) INTO __inserted_nodes
+FROM inserts;
+
+----------------------------
+-- lcs_ingest_systems.sql --
+----------------------------
+
+-- fill in any new sensor_nodes_id
+UPDATE staging_sensornodes
+SET sensor_nodes_id = sensor_nodes.sensor_nodes_id
+FROM sensor_nodes
+WHERE staging_sensornodes.sensor_nodes_id is null
+AND sensor_nodes.source_name = staging_sensornodes.source_name
+AND sensor_nodes.source_id = staging_sensornodes.source_id;
+
+-- log anything we were not able to get an id for
+WITH r AS (
+INSERT INTO rejects (t, tbl,r,fetchlogs_id)
+SELECT now()
+, 'staging_sensornodes-missing-nodes-id'
+, to_jsonb(staging_sensornodes)
+, fetchlogs_id
+FROM staging_sensornodes
+WHERE sensor_nodes_id IS NULL
+RETURNING 1)
+SELECT COUNT(1) INTO __rejected_nodes
+FROM r;
+
+--------------------
+-- Sensor Systems --
+--------------------
+
+
+-- make sure that we have a system entry for every ingest_id
+-- this is to deal with fetchers that do not add these data
+INSERT INTO staging_sensorsystems (sensor_nodes_id, ingest_id, fetchlogs_id, metadata)
+SELECT sensor_nodes_id
+--, source_id -- the ingest_id has the source_name in it and we dont need/want that
+, ingest_id
+, fetchlogs_id
+, '{"note":"automatically added for sensor node"}'
+FROM staging_sensornodes
+WHERE is_new AND ingest_id NOT IN (SELECT ingest_sensor_nodes_id FROM staging_sensorsystems)
+ON CONFLICT (ingest_id) DO UPDATE
+  SET sensor_nodes_id = EXCLUDED.sensor_nodes_id
+  ;
+
+-- Now match the sensor nodes to the system
+UPDATE staging_sensorsystems
+SET sensor_nodes_id = staging_sensornodes.sensor_nodes_id
+FROM staging_sensornodes
+WHERE staging_sensorsystems.ingest_sensor_nodes_id = staging_sensornodes.ingest_id;
+
+-- And match to any existing sensor systems
+UPDATE staging_sensorsystems
+SET sensor_systems_id = sensor_systems.sensor_systems_id
+, is_new = false
+FROM sensor_systems
+WHERE sensor_systems.sensor_nodes_id = staging_sensorsystems.sensor_nodes_id
+AND sensor_systems.source_id = staging_sensorsystems.ingest_id;
+
+
+-- log anything we were not able to get an id for
+WITH r AS (
+INSERT INTO rejects (t,tbl,r,fetchlogs_id)
+SELECT now()
+, 'staging_sensorsystems-missing-nodes-id'
+,  to_jsonb(staging_sensorsystems)
+,  fetchlogs_id
+FROM staging_sensorsystems
+WHERE sensor_nodes_id IS NULL
+RETURNING 1)
+SELECT COUNT(1) INTO __rejected_systems
+FROM r;
+
+-- And finally we add/update the sensor systems
+INSERT INTO sensor_systems (sensor_nodes_id, source_id, instruments_id, metadata)
+SELECT sensor_nodes_id
+, s.ingest_id
+, i.instruments_id
+, metadata
+FROM staging_sensorsystems s
+LEFT JOIN instruments i ON (s.instrument_ingest_id = i.ingest_id)
+WHERE sensor_nodes_id IS NOT NULL
+GROUP BY sensor_nodes_id, s.ingest_id, instruments_id, metadata
+ON CONFLICT (sensor_nodes_id, source_id) DO UPDATE SET
+    metadata=COALESCE(sensor_systems.metadata, '{}') || COALESCE(EXCLUDED.metadata, '{}')
+    , instruments_id = EXCLUDED.instruments_id
+    , modified_on = now();
+
+----------------------------
+-- lcs_ingest_sensors.sql --
+----------------------------
+
+-- Match the sensor system data
+UPDATE staging_sensorsystems
+SET sensor_systems_id = sensor_systems.sensor_systems_id
+FROM sensor_systems
+WHERE staging_sensorsystems.sensor_systems_id IS NULL
+AND staging_sensorsystems.sensor_nodes_id=sensor_systems.sensor_nodes_id
+AND staging_sensorsystems.ingest_id=sensor_systems.source_id
+;
+
+WITH r AS (
+INSERT INTO rejects (t, tbl,r,fetchlogs_id)
+SELECT
+  now()
+, 'staging_sensorsystems-missing-systems-id'
+, to_jsonb(staging_sensorsystems)
+, fetchlogs_id
+FROM staging_sensorsystems
+WHERE sensor_systems_id IS NULL
+RETURNING 1)
+SELECT COUNT(1) INTO __rejected_systems
+FROM r;
+
+-------------
+-- SENSORS --
+-------------
+
+ -- We do not want to create default sensors because we are not dealling with measurements here
+UPDATE staging_sensors
+SET sensor_systems_id = staging_sensorsystems.sensor_systems_id
+FROM staging_sensorsystems
+WHERE staging_sensors.ingest_sensor_systems_id = staging_sensorsystems.ingest_id;
+
+WITH r AS (
+INSERT INTO rejects (t,tbl,r,fetchlogs_id)
+SELECT
+  now()
+, 'staging_sensors-missing-systems-id'
+, to_jsonb(staging_sensors)
+, fetchlogs_id
+FROM staging_sensors
+WHERE sensor_systems_id IS NULL
+RETURNING 1)
+SELECT COUNT(1) INTO __rejected_sensors
+FROM r;
+
+
+UPDATE staging_sensors
+SET sensors_id = sensors.sensors_id
+FROM sensors
+WHERE sensors.sensor_systems_id=staging_sensors.sensor_systems_id
+AND sensors.source_id = staging_sensors.ingest_id;
+
+
+UPDATE staging_sensors
+SET measurands_id = m.measurands_id
+FROM (SELECT measurand, MIN(measurands_id) AS measurands_id FROM measurands GROUP BY measurand) as m
+WHERE staging_sensors.measurand=m.measurand
+--AND staging_sensors.units=measurands.units
+;
+
+
+WITH r AS (
+INSERT INTO rejects (t, tbl,r,fetchlogs_id)
+SELECT
+ now()
+, 'staging_sensors-missing-measurands-id'
+, to_jsonb(staging_sensors)
+, fetchlogs_id
+FROM staging_sensors
+WHERE measurands_id IS NULL
+RETURNING 1)
+SELECT COUNT(1) INTO __rejected_measurands
+FROM r;
+
+WITH inserts AS (
+INSERT INTO sensors (
+  source_id
+, sensor_systems_id
+, measurands_id
+, data_logging_period_seconds
+, data_averaging_period_seconds
+, sensor_statuses_id
+, metadata)
+SELECT ingest_id
+, sensor_systems_id
+, measurands_id
+, logging_interval_seconds
+, averaging_interval_seconds
+, COALESCE(ss.sensor_statuses_id, 1)
+, metadata
+FROM staging_sensors s
+LEFT JOIN sensor_statuses ss ON (ss.short_code = s.status)
+WHERE measurands_id is not null
+AND sensor_systems_id is not null
+GROUP BY ingest_id
+, sensor_systems_id
+, measurands_id
+, logging_interval_seconds
+, averaging_interval_seconds
+, ss.sensor_statuses_id
+, metadata
+ON CONFLICT (sensor_systems_id, measurands_id, source_id) DO UPDATE
+SET metadata = COALESCE(sensors.metadata, '{}') || COALESCE(EXCLUDED.metadata, '{}')
+  , data_logging_period_seconds = EXCLUDED.data_logging_period_seconds
+  , data_averaging_period_seconds = EXCLUDED.data_averaging_period_seconds
+  , sensor_statuses_id = EXCLUDED.sensor_statuses_id
+  , modified_on = now()
+RETURNING 1)
+SELECT COUNT(1) INTO __inserted_sensors
+FROM inserts;
+
+UPDATE staging_sensors
+SET sensors_id = sensors.sensors_id
+FROM sensors
+WHERE sensors.sensor_systems_id=staging_sensors.sensor_systems_id
+AND sensors.source_id = staging_sensors.ingest_id;
+
+WITH r AS (
+INSERT INTO rejects (t,tbl,r,fetchlogs_id)
+SELECT
+  now()
+  , 'staging_sensors-missing-sensors-id'
+  , to_jsonb(staging_sensors)
+  , fetchlogs_id
+FROM staging_sensors
+WHERE sensors_id IS NULL
+RETURNING 1)
+SELECT COUNT(1) INTO __rejected_sensors
+FROM r;
+
+
+-- update the period so that we dont have to keep doing it later
+-- we could do this on import as well if we feel this is slowing us down
+UPDATE staging_flags
+  SET period = tstzrange(COALESCE(datetime_from, '-infinity'::timestamptz),COALESCE(datetime_to, 'infinity'::timestamptz), '[]');
+
+-- Now we have to match things
+-- get the right node id and sensors id for the flags
+UPDATE staging_flags
+SET sensors_id = s.sensors_id
+  , sensor_nodes_id = sy.sensor_nodes_id
+FROM sensors s
+JOIN sensor_systems sy ON (s.sensor_systems_id = sy.sensor_systems_id)
+WHERE staging_flags.sensor_ingest_id = s.source_id;
+
+-- and then get the right flags_id
+UPDATE staging_flags
+SET flag_types_id = ft.flag_types_id
+FROM flag_types ft
+WHERE split_part(staging_flags.ingest_id, '::', 1) = ft.ingest_id;
+
+-- now we should look to see if we should be just extending a flag
+UPDATE staging_flags sf
+  SET flags_id = fm.flags_id
+  FROM flags fm
+  -- where the core information is the same (exactly)
+  WHERE sf.sensor_nodes_id = fm.sensor_nodes_id
+  AND sf.flag_types_id = fm.flag_types_id
+  AND ((sf.note = fm.note) OR (sf.note IS NULL AND fm.note IS NULL))
+  -- the periods touch or overlap
+  AND fm.period && sf.period
+  -- and the flagged record sensors contains the current sensors
+  AND fm.sensors_ids @> ARRAY[sf.sensors_id];
+
+-- and finally we will insert the new flags
+INSERT INTO flags (flag_types_id, sensor_nodes_id, sensors_ids, period, note)
+  SELECT flag_types_id
+  , sensor_nodes_id
+  , CASE WHEN sensors_id IS NOT NULL THEN ARRAY[sensors_id] ELSE NULL END
+  , period
+  , note
+  FROM staging_flags
+  WHERE flag_types_id IS NOT NULL
+  AND sensor_nodes_id IS NOT NULL
+  AND flags_id IS NULL;
+
+-- And then update any that need to be updated
+ UPDATE flags fm
+  SET period = sf.period + fm.period
+  , note = sf.note
+  , modified_on = now()
+  FROM staging_flags sf
+  WHERE sf.flags_id = fm.flags_id;
+
+
+------------------
+-- Return stats --
+------------------
+
+RAISE NOTICE 'inserted-nodes: %, inserted-sensors: %, rejected-nodes: %, rejected-sensors: %, rejected-measurands: %, process-time-ms: %, source: lcs'
+      , __inserted_nodes
+      , __inserted_sensors
+      , __rejected_nodes
+      , __rejected_sensors
+      , __rejected_measurands
+      , 1000 * (extract(epoch FROM clock_timestamp() - __process_start));
+
+END $$;
diff --git a/ingest/fetch.py b/ingest/fetch.py
index 450fca3..3f70892 100644
--- a/ingest/fetch.py
+++ b/ingest/fetch.py
@@ -1,10 +1,10 @@
 import gzip
 import io
-import json
 import os
 import logging
-import time
+from time import time
 from datetime import datetime, timedelta
+import orjson
 
 import boto3
 import psycopg2
@@ -15,15 +15,17 @@
     StringIteratorIO,
     clean_csv_value,
     get_query,
+    get_data,
     load_fail,
     load_success,
+    load_fetchlogs,
 )
 
 app = typer.Typer()
 
 dir_path = os.path.dirname(os.path.realpath(__file__))
 
-logger = logging.getLogger(__name__)
+logger = logging.getLogger('fetch')
 
 FETCH_BUCKET = settings.FETCH_BUCKET
 s3 = boto3.resource("s3")
@@ -64,7 +66,7 @@ def parse_json(j, key: str = None):
     else:
         coords = None
 
-    data = json.dumps(j)
+    data = orjson.dumps(j).decode()
 
     row = [
         location,
@@ -87,11 +89,14 @@ def parse_json(j, key: str = None):
 
 
 def create_staging_table(cursor):
-    cursor.execute(get_query("fetch_staging.sql"))
+    cursor.execute(get_query(
+        "fetch_staging.sql",
+        table="TEMP TABLE" if settings.USE_TEMP_TABLES else 'TABLE'
+    ))
 
 
-def copy_data(cursor, key):
-    obj = s3.Object(FETCH_BUCKET, key)
+def copy_data(cursor, key, fetchlogsId=None):
+    #obj = s3.Object(FETCH_BUCKET, key)
     # This should not be checked here,
     # if we ask it to copy data it should do that
     # if we want to prevent duplicate attemps we should
@@ -102,13 +107,15 @@ def copy_data(cursor, key):
     # we are also removing the try/catch
     # if it fails we want to deal with it elsewhere
     logger.debug(f"Copying data for {key}")
-    with gzip.GzipFile(fileobj=obj.get()["Body"]) as gz:
-        f = io.BufferedReader(gz)
+    with get_data(key) as f:
+        # make sure that the file is complete
         iterator = StringIteratorIO(
-            (parse_json(json.loads(line)) for line in f)
+            (f"{fetchlogsId}\t"+parse_json(orjson.loads(line)) for line in f)
         )
+
         query = """
         COPY tempfetchdata (
+        fetchlogs_id,
         location,
         value,
         unit,
@@ -125,6 +132,7 @@ def copy_data(cursor, key):
         avpd_value
         ) FROM STDIN;
         """
+        logger.debug("Loading data from STDIN")
         cursor.copy_expert(query, iterator)
 
 
@@ -132,7 +140,7 @@ def copy_file(cursor, file):
     with gzip.GzipFile(file) as gz:
         f = io.BufferedReader(gz)
         iterator = StringIteratorIO(
-            (parse_json(json.loads(line)) for line in f)
+            (parse_json(orjson.loads(line)) for line in f)
         )
         try:
             query = get_query("fetch_copy.sql")
@@ -141,11 +149,17 @@ def copy_file(cursor, file):
             # load_success(cursor, file)
 
         except Exception as e:
+            logger.warning(f'File copy failed: {e}')
             load_fail(cursor, file, e)
 
 
 def process_data(cursor):
-    query = get_query("fetch_ingest_full.sql")
+    # see file for details on how
+    # to use the variables
+    query = get_query(
+        "fetch_ingest_full.sql",
+        table="TEMP TABLE" if settings.USE_TEMP_TABLES else 'TABLE'
+    )
     cursor.execute(query)
     # if results:
     #    mindate, maxdate = results
@@ -201,7 +215,7 @@ def load_fetch_file(file: str):
 
 @app.command()
 def load_fetch_day(day: str):
-    start = time.time()
+    start = time()
     conn = boto3.client("s3")
     prefix = f"realtime-gzipped/{day}"
     keys = []
@@ -221,7 +235,7 @@ def load_fetch_day(day: str):
             create_staging_table(cursor)
             for key in keys:
                 copy_data(cursor, key)
-            print(f"All data copied {time.time()-start}")
+            print(f"All data copied {time()-start}")
             filter_data(cursor)
             mindate, maxdate = process_data(cursor)
             update_rollups(cursor, mindate=mindate, maxdate=maxdate)
@@ -272,62 +286,69 @@ def submit_file_error(ids, e):
 
 @app.command()
 def load_db(limit: int = 50, ascending: bool = False):
-    order = 'ASC' if ascending else 'DESC'
-    with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
-        connection.set_session(autocommit=True)
-        with connection.cursor() as cursor:
-            cursor.execute(
-                f"""
-                SELECT key
-                ,last_modified
-                ,fetchlogs_id
-                FROM fetchlogs
-                WHERE key~E'^realtime-gzipped/.*\\.ndjson.gz$'
-                AND completed_datetime is null
-                ORDER BY last_modified {order} nulls last
-                LIMIT %s
-                ;
-                """,
-                (limit,),
-            )
-            rows = cursor.fetchall()
-            keys = [r[0] for r in rows]
-            if len(keys) > 0:
-                try:
-                    load_realtime(keys)
-                except Exception as e:
-                    # catch and continue to next page
-                    ids = [r[2] for r in rows]
-                    logger.error(f"""
-                    Error processing realtime files: {e}, {ids}
-                    """)
-                    submit_file_error(ids, e)
-                finally:
-                    connection.commit()
+    pattern = '^realtime-gzipped/.*\\.ndjson.gz$'
+    rows = load_fetchlogs(pattern, limit, ascending)
+    if len(rows) > 0:
+        try:
+            load_realtime(rows)
+        except Exception as e:
+            # catch and continue to next page
+            ids = [r[2] for r in rows]
+            logger.error(f"""
+            Error processing realtime files: {e}, {ids}
+            """)
+            submit_file_error(ids, e)
 
-            return len(keys)
+    return len(rows)
 
 
-def load_realtime(keys):
+def load_realtime(rows):
     # create a connection and share for all keys
+    logger.debug(f"Loading {len(rows)} keys")
+    log_time = -1
+    process_time = -1
+    copy_time = 0
     with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
         connection.set_session(autocommit=True)
         with connection.cursor() as cursor:
             # create all the data staging table
             create_staging_table(cursor)
+            logger.debug('Created realtime staging tables')
             # now copy all the data
-            for key in keys:
-                copy_data(cursor, key)
+            keys = []
+            start = time()
+            for row in rows:
+                key = row[1]
+                fetchlogsId = row[0]
+                logger.debug(f"Loading {key}, id: {fetchlogsId}")
+                try:
+                    copy_data(cursor, key, fetchlogsId)
+                    keys.append(key)
+                    copy_time += (time() - start)
+                except Exception as e:
+                    # all until now is lost
+                    # reset things and try to recover
+                    connection.rollback()
+                    keys = []
+                    load_fail(cursor, fetchlogsId, e)
+                    break
+
             # finally process the data as one
-            process_data(cursor)
-            # we are outputing some stats
-            for notice in connection.notices:
-                print(notice)
-            # mark files as done
-            load_success(cursor, keys)
+            if len(keys) > 0:
+                logger.debug(f"Processing realtime files")
+                start = time()
+                process_data(cursor)
+                process_time = time() - start
+                # we are outputing some stats
+                for notice in connection.notices:
+                    logger.info(notice)
+                # mark files as done
+                start = time()
+                load_success(cursor, keys)
+                log_time = time() - start
             # close and commit
             connection.commit()
-
+            return round(copy_time*1000), round(process_time*1000), round(log_time*1000), notice
 
 if __name__ == "__main__":
     app()
diff --git a/ingest/fetch_filter.sql b/ingest/fetch_filter.sql
deleted file mode 100644
index 156f6ad..0000000
--- a/ingest/fetch_filter.sql
+++ /dev/null
@@ -1,13 +0,0 @@
-DELETE FROM tempfetchdata
-WHERE
-datetime <= (
-    SELECT max(range_end)
-    FROM timescaledb_information.chunks
-    WHERE
-        hypertable_name IN ('rollups', 'measurements')
-        AND is_compressed
-);
-DELETE FROM tempfetchdata WHERE datetime > now();
-DELETE FROM tempfetchdata WHERE datetime < (SELECT max(datetime) - '2 days'::interval from tempfetchdata)
-;
-SELECT min(datetime), max(datetime) FROM tempfetchdata;
\ No newline at end of file
diff --git a/ingest/fetch_ingest1.sql b/ingest/fetch_ingest1.sql
deleted file mode 100644
index 12ef519..0000000
--- a/ingest/fetch_ingest1.sql
+++ /dev/null
@@ -1,45 +0,0 @@
-CREATE TEMP TABLE IF NOT EXISTS tempfetchdata_sensors AS
-WITH t AS (
-SELECT DISTINCT
-    location as site_name,
-    unit as units,
-    parameter as measurand,
-    country,
-    city,
-    jsonb_merge_agg(data) as data,
-    source_name,
-    coords::geometry as geom,
-    source_type,
-    mobile as ismobile,
-    avpd_unit,
-    avpd_value,
-    coords::geometry as cgeom,
-    NULL::int as sensor_nodes_id,
-    null::int as sensor_systems_id,
-    null::int as measurands_id,
-    null::int as sensors_id,
-    null::jsonb as node_metadata,
-    null::jsonb as sensor_metadata,
-    array_agg(tfdid) as tfdids
-FROM tempfetchdata
-GROUP BY
-    location,
-    unit,
-    parameter,
-    country,
-    city,
-    coords,
-    source_type,
-    source_name,
-    mobile,
-    avpd_unit,
-    avpd_value,
-    sensor_nodes_id,
-    sensor_systems_id,
-    measurands_id,
-    sensors_id,
-    node_metadata,
-    sensor_metadata
-)
-SELECT row_number() over () as tfsid, * FROM t;
-CREATE INDEX ON tempfetchdata_sensors (tfsid);
\ No newline at end of file
diff --git a/ingest/fetch_ingest2.sql b/ingest/fetch_ingest2.sql
deleted file mode 100644
index 23beb0f..0000000
--- a/ingest/fetch_ingest2.sql
+++ /dev/null
@@ -1,24 +0,0 @@
--- Cleanup fields
-
-UPDATE tempfetchdata_sensors t SET
-geom = NULL WHERE st_x(geom) = 0 and st_y(geom) =0;
-
-UPDATE tempfetchdata_sensors SET units  = 'µg/m³'
-WHERE units IN ('µg/m��','��g/m³');
-
-UPDATE tempfetchdata_sensors SET
-node_metadata =
-    jsonb_strip_nulls(
-        COALESCE(data, '{}'::jsonb)
-        ||
-        jsonb_build_object(
-            'source_type',
-            'government',
-            'origin',
-            'openaq'
-            )
-    ),
-sensor_metadata = jsonb_strip_nulls(jsonb_build_object(
-    'data_averaging_period_seconds', avpd_value * 3600
-    ))
-;
\ No newline at end of file
diff --git a/ingest/fetch_ingest3.sql b/ingest/fetch_ingest3.sql
deleted file mode 100644
index 1a65a4d..0000000
--- a/ingest/fetch_ingest3.sql
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
-CREATE TEMP TABLE tempfetchdata_nodes AS
-SELECT * FROM (SELECT
-    first_notnull(site_name) as site_name,
-    first_notnull(source_name) as source_name,
-    first_notnull(country) as country,
-    first_notnull(city) as city,
-    --jsonb_merge_agg(node_metadata) as metadata,
-    first_notnull(ismobile) as ismobile,
-    null::int as sensor_nodes_id,
-    null::int as sensor_systems_id,
-    st_centroid(st_collect(geom)) as geom,
-    array_agg(tfsid) as tfsids
-FROM tempfetchdata_sensors
-WHERE geom IS NOT NULL
-GROUP BY
-    sensor_nodes_id,st_snaptogrid(geom, .0001)
-) AS wgeom
-UNION ALL
-SELECT * FROM
-(SELECT
-    site_name,
-    source_name,
-    first_notnull(country) as country,
-    first_notnull(city) as city,
-    --jsonb_merge_agg(node_metadata) as metadata,
-    first_notnull(ismobile) as ismobile,
-    null::int as sensor_nodes_id,
-    null::int as sensor_systems_id,
-    null::geometry as geom,
-    array_agg(tfsid) as tfsids
-FROM tempfetchdata_sensors
-WHERE geom IS NULL
-AND site_name is not null
-and source_name is not null
-GROUP BY
-    site_name, source_name, sensor_nodes_id
-) as nogeom
-;
-*/
-
-CREATE TEMP TABLE tempfetchdata_nodes AS
-SELECT * FROM (SELECT
-    site_name,
-    source_name,
-    country,
-    city,
-    node_metadata as metadata,
-    ismobile,
-    null::int as sensor_nodes_id,
-    null::int as sensor_systems_id,
-    st_centroid(st_collect(geom)) as geom,
-    array_agg(tfsid) as tfsids
-FROM tempfetchdata_sensors
-WHERE geom IS NOT NULL
-GROUP BY
-    1,2,3,4,5,6,7,8,st_snaptogrid(geom, .0001)
-) AS wgeom
-UNION ALL
-SELECT * FROM
-(SELECT
-    site_name,
-    source_name,
-    country,
-    city,
-    node_metadata as metadata,
-    ismobile,
-    null::int as sensor_nodes_id,
-    null::int as sensor_systems_id,
-    null::geometry as geom,
-    array_agg(tfsid) as tfsids
-FROM tempfetchdata_sensors
-WHERE geom IS NULL
-AND site_name is not null
-and source_name is not null
-GROUP BY
-    1,2,3,4,5,6,7,8,9
-) as nogeom
-;
diff --git a/ingest/fetch_ingest4.sql b/ingest/fetch_ingest4.sql
deleted file mode 100644
index 6c6ae00..0000000
--- a/ingest/fetch_ingest4.sql
+++ /dev/null
@@ -1,19 +0,0 @@
--- Lookup Node Ids
-
-UPDATE tempfetchdata_nodes t
-SET sensor_nodes_id = sn.sensor_nodes_id FROM
-sensor_nodes sn
-WHERE t.geom is not null
-AND st_dwithin(sn.geom, t.geom, .0001)
-AND origin='OPENAQ';
-
-UPDATE tempfetchdata_nodes t
-SET sensor_nodes_id = sn.sensor_nodes_id FROM
-sensor_nodes sn
-WHERE
-t.sensor_nodes_id is null AND
-t.site_name is not null
-and t.source_name is not null
-and t.site_name = sn.site_name
-and t.source_name=sn.source_name
-and origin='OPENAQ';
\ No newline at end of file
diff --git a/ingest/fetch_ingest5.sql b/ingest/fetch_ingest5.sql
deleted file mode 100644
index 644dfb5..0000000
--- a/ingest/fetch_ingest5.sql
+++ /dev/null
@@ -1,35 +0,0 @@
--- Update any records that have changed
-
-UPDATE sensor_nodes s SET
-    site_name = COALESCE(t.site_name, s.site_name),
-    source_name = COALESCE(t.source_name, s.source_name),
-    city = COALESCE(t.city, s.city),
-    country = COALESCE(t.country, s.country),
-    ismobile = COALESCE(t.ismobile, s.ismobile),
-    metadata = COALESCE(s.metadata, '{}'::jsonb) || t.metadata,
-    geom = COALESCE(t.geom, s.geom)
-FROM tempfetchdata_nodes t
-WHERE t.sensor_nodes_id = s.sensor_nodes_id AND
-(
-    (s.geom IS NULL and t.geom IS NOT NULL)
-OR
-
-    ROW(
-        t.sensor_nodes_id,
-        t.ismobile,
-        t.site_name,
-        t.source_name,
-        t.city,
-        t.country,
-        t.metadata
-    ) IS DISTINCT FROM (
-        s.sensor_nodes_id,
-        s.ismobile,
-        s.site_name,
-        s.source_name,
-        s.city,
-        s.country,
-        s.metadata
-    )
-)
-;
diff --git a/ingest/fetch_ingest6.sql b/ingest/fetch_ingest6.sql
deleted file mode 100644
index 2025749..0000000
--- a/ingest/fetch_ingest6.sql
+++ /dev/null
@@ -1,27 +0,0 @@
--- Create new nodes where they don't exist
-WITH sn AS (
-INSERT INTO sensor_nodes (
-    site_name,
-    metadata,
-    geom,
-    source_name,
-    city,
-    country,
-    ismobile
-)
-SELECT
-    site_name,
-    metadata,
-    geom,
-    source_name,
-    city,
-    country,
-    ismobile
-FROM tempfetchdata_nodes t
-WHERE t.sensor_nodes_id is NULL
-RETURNING *
-)
-UPDATE tempfetchdata_nodes tf SET sensor_nodes_id = sn.sensor_nodes_id
-FROM sn WHERE tf.sensor_nodes_id is null
-and row(tf.site_name, tf.geom, tf.source_name) is not distinct
-from row(sn.site_name, sn.geom, sn.source_name);
\ No newline at end of file
diff --git a/ingest/fetch_ingest7.sql b/ingest/fetch_ingest7.sql
deleted file mode 100644
index 6df2009..0000000
--- a/ingest/fetch_ingest7.sql
+++ /dev/null
@@ -1,151 +0,0 @@
--- Get sensor systems
-
-
-UPDATE tempfetchdata_nodes t
-SET sensor_systems_id = ss.sensor_systems_id FROM
-sensor_systems ss
-WHERE t.sensor_nodes_id = ss.sensor_nodes_id;
-
--- Add any rows that did not get an id
--- into the rejects table and then delete
-INSERT INTO rejects
-SELECT clock_timestamp(), 'sensor_nodes', to_jsonb(tf) FROM
-tempfetchdata_nodes tf WHERE sensor_nodes_id IS NULL;
-DELETE FROM tempfetchdata_nodes WHERE sensor_nodes_id IS NULL;
-
--- create sensor systems that don't exist
-WITH ss AS (
-INSERT INTO sensor_systems (sensor_nodes_id)
-SELECT DISTINCT sensor_nodes_id FROM tempfetchdata_nodes t
-WHERE t.sensor_systems_id is NULL AND t.sensor_nodes_id IS NOT NULL
-RETURNING *
-) UPDATE tempfetchdata_nodes tf
-SET sensor_systems_id = ss.sensor_systems_id
-FROM ss WHERE tf.sensor_nodes_id=ss.sensor_nodes_id
-and tf.sensor_systems_id is null;
-
--- Add any rows that did not get an id
--- into the rejects table and then delete
-INSERT INTO rejects
-SELECT clock_timestamp(), 'sensor_systems', to_jsonb(tf) FROM
-tempfetchdata_nodes tf WHERE sensor_systems_id IS NULL;
-DELETE FROM tempfetchdata_nodes WHERE sensor_systems_id IS NULL;
-
--- merge sensor node / system ids back to sensors table
-UPDATE tempfetchdata_sensors ts SET
-    sensor_nodes_id = tn.sensor_nodes_id,
-    sensor_systems_id = tn.sensor_systems_id
-FROM
-    tempfetchdata_nodes tn
-WHERE
-    ts.tfsid = ANY(tn.tfsids);
-
-
--- add any measurands that don't exist
-UPDATE tempfetchdata_sensors t SET measurands_id= m.measurands_id FROM
-measurands m
-WHERE t.measurand = m.measurand AND t.units = m.units;
-
-WITH m AS (
-INSERT INTO measurands (measurand, units)
-SELECT DISTINCT measurand, units FROM tempfetchdata_sensors t
-WHERE t.measurands_id is NULL
-RETURNING *
-) UPDATE tempfetchdata_sensors tf SET measurands_id = m.measurands_id
-FROM m WHERE tf.measurand=m.measurand
-and tf.units=m.units and tf.measurands_id is null;
-
--- get cleaned sensors table
-CREATE TEMP TABLE IF NOT EXISTS tempfetchdata_sensors_clean AS
-SELECT
-    null::int as sensors_id,
-    sensor_nodes_id,
-    sensor_systems_id,
-    measurands_id,
-    jsonb_merge_agg(sensor_metadata) as metadata,
-    array_merge_agg(tfdids) as tfdids
-FROM tempfetchdata_sensors
-GROUP BY 1,2,3,4;
-
-
--- get sensor id
-UPDATE tempfetchdata_sensors_clean t
-SET sensors_id = s.sensors_id
-FROM sensors s
-WHERE t.sensor_systems_id = s.sensor_systems_id
-AND t.measurands_id = s.measurands_id
-;
-
--- Add any rows that did not get an id
--- into the rejects table and then delete
-INSERT INTO rejects
-SELECT clock_timestamp()
-, 'sensors'
-, to_jsonb(tf)
-FROM tempfetchdata_sensors_clean tf
-WHERE sensor_systems_id IS NULL
-OR measurands_id IS NULL;
-
-DELETE
-FROM tempfetchdata_sensors_clean
-WHERE sensor_systems_id IS NULL
-OR measurands_id IS NULL;
-
--- add any sensors that don't exist
-WITH s AS (
-    INSERT INTO sensors (
-        sensor_systems_id,
-        measurands_id,
-        metadata
-    )
-    SELECT
-        sensor_systems_id,
-        measurands_id,
-        metadata
-    FROM
-        tempfetchdata_sensors_clean tf
-    WHERE
-        tf.sensors_id IS NULL
-    RETURNING *
-) UPDATE tempfetchdata_sensors_clean tfc
-    SET
-        sensors_id = s.sensors_id
-    FROM s
-    WHERE
-        tfc.sensors_id IS NULL
-        AND
-        s.sensor_systems_id = tfc.sensor_systems_id
-        AND
-        s.measurands_id = tfc.measurands_id
-;
-
-UPDATE tempfetchdata t
-SET sensors_id = ts.sensors_id
-FROM tempfetchdata_sensors_clean ts
-WHERE t.tfdid = ANY(ts.tfdids);
-
--- Add any rows that did not get an id into
--- the rejects table and then delete
-INSERT INTO rejects
-SELECT clock_timestamp()
-, 'sensors'
-, to_jsonb(tf)
-FROM tempfetchdata tf
-WHERE sensors_id IS NULL;
-
-DELETE
-FROM tempfetchdata
-WHERE sensors_id IS NULL;
-
-INSERT INTO measurements (sensors_id, datetime, value)
-SELECT sensors_id, datetime, value
-FROM tempfetchdata
-ON CONFLICT DO NOTHING;
-
-
-UPDATE fetchlogs
-SET completed_datetime=clock_timestamp()
-, last_message = NULL -- reset any previous error
-WHERE key IN (SELECT key FROM ingestfiles);
-
-SELECT min(datetime), max(datetime) FROM tempfetchdata;
diff --git a/ingest/fetch_ingest_full.sql b/ingest/fetch_ingest_full.sql
index 089c647..3234d53 100644
--- a/ingest/fetch_ingest_full.sql
+++ b/ingest/fetch_ingest_full.sql
@@ -1,17 +1,20 @@
--- Get sensor systems
+-- fetch_ingest_full
 DO $$
 DECLARE
 __process_start timestamptz := clock_timestamp();
+__min_measurement_date date := '1970-01-01'::date;
+__max_measurement_date date := current_date + 1;
 __total_measurements int;
+__total_nodes int;
 __updated_nodes int;
 __inserted_nodes int;
 __inserted_sensors int;
 __inserted_measurements int;
 __inserted_measurands int;
-__rejected_nodes int;
+__rejected_nodes int := 0;
 __rejected_systems int;
 __rejected_sensors int;
-__rejected_measurements int;
+__rejected_measurements int := 0;
 __start_datetime timestamptz;
 __end_datetime timestamptz;
 __inserted_start_datetime timestamptz;
@@ -20,41 +23,22 @@ __deleted_timescaledb int;
 __deleted_future_measurements int;
 __deleted_past_measurements int;
 __exported_days int;
+__process_time_ms int;
+__insert_time_ms int;
+__node_time_ms int;
+__cache_time_ms int;
+__ingest_method text := 'realtime';
+__inserted_spatial_rollups int := 0;
 BEGIN
 
-SELECT now() INTO __process_start;
+-- REQUIRED
+-- {table} should be `TEMP TABLE` in production but could be changed to
+-- just `TABLE` if you are debugging and want the temp tables to persist
 
 ---------------------------
 -- File fetch_filter.sql --
 ---------------------------
 
--- Note: I am including this because it already existed
--- I am not sure why its here
--- update: it is likely here because we cannot insert data into
--- compressed partitions
-
-WITH deletes AS (
-  DELETE
-  FROM tempfetchdata
-  WHERE datetime <= (
-    SELECT COALESCE(max(range_end), '1970-01-01'::timestamp)
-    FROM timescaledb_information.chunks
-    WHERE hypertable_name IN ('rollups', 'measurements')
-    AND is_compressed
-    )
-  RETURNING 1)
-SELECT COUNT(1) INTO __deleted_timescaledb
-FROM deletes;
-
--- This makes sense though we should track in case its systemic
-WITH deletes AS (
-  DELETE
-  FROM tempfetchdata
-  WHERE datetime > now()
-  RETURNING 1)
-SELECT COUNT(1) INTO __deleted_future_measurements
-FROM deletes;
-
 -- this seems questionable, I dont want to pass data to this
 -- process only to have some of it filtered out because its too old
 -- Commenting this out because it will prevent us from submitting patch
@@ -67,8 +51,15 @@ FROM deletes;
 -- SELECT COUNT(1) INTO __deleted_past_measurements
 -- FROM deletes;
 
-----------------------------------
+-- use the partitions to determine start and end date
+SELECT partition_start_date
+	, partition_end_date
+INTO __min_measurement_date
+	, __max_measurement_date
+FROM data_table_stats
+WHERE table_name = 'public.measurements';
 
+---------------------------------
 -- start with simple count
 SELECT COUNT(1)
 , MIN(datetime)
@@ -76,13 +67,14 @@ SELECT COUNT(1)
 INTO __total_measurements
 , __start_datetime
 , __end_datetime
-FROM tempfetchdata;
+FROM tempfetchdata
+WHERE datetime <= now();
 
 -- Now we start the old fetch_ingest#.sql files
 -------------
 -- File #1 --
 -------------
-CREATE TEMP TABLE IF NOT EXISTS tempfetchdata_sensors AS
+CREATE {table} IF NOT EXISTS tempfetchdata_sensors AS
 WITH t AS (
 SELECT DISTINCT
     location as site_name,
@@ -97,14 +89,15 @@ SELECT DISTINCT
     mobile as ismobile,
     avpd_unit,
     avpd_value,
-    coords::geometry as cgeom,
-    NULL::int as sensor_nodes_id,
+--    coords::geometry as cgeom,
+    null::int as sensor_nodes_id,
     null::int as sensor_systems_id,
     null::int as measurands_id,
     null::int as sensors_id,
     null::jsonb as node_metadata,
     null::jsonb as sensor_metadata,
-    array_agg(tfdid) as tfdids
+    array_agg(tfdid) as tfdids,
+    fetchlogs_id
 FROM tempfetchdata
 GROUP BY
     location,
@@ -123,10 +116,12 @@ GROUP BY
     measurands_id,
     sensors_id,
     node_metadata,
-    sensor_metadata
+    sensor_metadata,
+    fetchlogs_id
 )
 SELECT row_number() over () as tfsid, *
 FROM t;
+
 CREATE INDEX ON tempfetchdata_sensors (tfsid);
 -------------
 -- File #2 --
@@ -141,20 +136,21 @@ AND st_y(geom) = 0;
 
 UPDATE tempfetchdata_sensors
 SET units  = 'µg/m³'
-WHERE units IN ('µg/m��','��g/m³');
+WHERE units IN ('µg/m��','��g/m³', 'ug/m3');
 
 UPDATE tempfetchdata_sensors
 SET node_metadata =
     jsonb_strip_nulls(
-        COALESCE(data, '{}'::jsonb)
+        COALESCE(data, '{{}}'::jsonb)
         ||
         jsonb_build_object(
-            'source_type',
-            'government',
-            'origin',
-            'openaq'
+            'source_type', 'government',
+            'origin','openaq',
+            'fetchlogs_id', fetchlogs_id
             )
     ),
+    -- the following assumes that avpd_unit is always hours
+    -- which at the last check (2022-12-07) it was
 sensor_metadata = jsonb_strip_nulls(jsonb_build_object(
     'data_averaging_period_seconds', avpd_value * 3600
     ))
@@ -164,23 +160,26 @@ sensor_metadata = jsonb_strip_nulls(jsonb_build_object(
 -- File #3 --
 -------------
 
-CREATE TEMP TABLE IF NOT EXISTS tempfetchdata_nodes AS
+CREATE {table} IF NOT EXISTS tempfetchdata_nodes AS
 SELECT * FROM (SELECT
     site_name,
     source_name,
     country,
     city,
-    node_metadata as metadata,
+    node_metadata::jsonb as metadata,
     ismobile,
     null::int as sensor_nodes_id,
     null::int as sensor_systems_id,
     null::boolean as added,
+    null::text as method,
     st_centroid(st_collect(geom)) as geom,
     array_agg(tfsid) as tfsids
+    , array_agg(st_astext(geom)) as points
+    , COUNT(DISTINCT st_astext(geom)) as n_points
 FROM tempfetchdata_sensors
 WHERE geom IS NOT NULL
 GROUP BY
-    1,2,3,4,5,6,7,8,9,st_snaptogrid(geom, .0001)
+    1,2,3,4,5,6,7,8,9,st_snaptogrid(geom, .00001)
 ) AS wgeom
 UNION ALL
 SELECT * FROM
@@ -189,22 +188,29 @@ SELECT * FROM
     source_name,
     country,
     city,
-    node_metadata as metadata,
+    node_metadata::jsonb as metadata,
     ismobile,
     null::int as sensor_nodes_id,
     null::int as sensor_systems_id,
     null::boolean as added,
+    null::text as method,
     null::geometry as geom,
     array_agg(tfsid) as tfsids
+    , null::text[] as points
+    , 0 as n_points
 FROM tempfetchdata_sensors
 WHERE geom IS NULL
-AND site_name is not null
-and source_name is not null
+AND site_name IS NOT NULL
+AND source_name IS NOT NULL
 GROUP BY
     1,2,3,4,5,6,7,8,9,10
 ) as nogeom
 ;
 
+SELECT COUNT(1)
+INTO __total_nodes
+FROM tempfetchdata_nodes;
+
 -------------
 -- File #4 --
 -------------
@@ -214,6 +220,7 @@ GROUP BY
 UPDATE tempfetchdata_nodes t
 SET sensor_nodes_id = sn.sensor_nodes_id
 , added = FALSE
+, method = 'spatial'
 FROM sensor_nodes sn
 WHERE t.geom IS NOT NULL
 AND st_dwithin(sn.geom, t.geom, .0001)
@@ -222,18 +229,76 @@ AND origin='OPENAQ';
 UPDATE tempfetchdata_nodes t
 SET sensor_nodes_id = sn.sensor_nodes_id
 , added = FALSE
+, method = 'source_id'
 FROM sensor_nodes sn
 WHERE t.sensor_nodes_id is null
 AND t.site_name is not null
 AND t.source_name is not null
 AND t.site_name = sn.site_name
 AND t.source_name=sn.source_name
+AND t.geom IS NULL
 AND origin='OPENAQ';
 
+
+__process_time_ms := 1000 * (extract(epoch FROM clock_timestamp() - __process_start));
+
 -------------
 -- File #5 --
 -------------
 
+--DROP TABLE IF EXISTS checkrealtime_matched;
+-- CREATE TABLE IF NOT EXISTS checkrealtime_matched (
+--   sensor_nodes_id int
+-- , site_name text
+-- , source_name text
+-- , city text
+-- , country text
+-- , origin text
+-- , method text
+-- , geom_old geometry
+-- , geom_new geometry
+-- , added_on timestamptz DEFAULT now()
+-- );
+
+
+-- INSERT INTO checkrealtime_matched
+-- SELECT t.sensor_nodes_id
+-- , format('%s -> %s', s.site_name, t.site_name)
+-- , format('%s -> %s', s.source_name, t.source_name)
+-- , format('%s -> %s', s.city, t.city)
+-- , format('%s -> %s', s.country, t.country)
+-- , origin
+-- , method
+-- , s.geom
+-- , t.geom
+-- FROM tempfetchdata_nodes t
+-- JOIN sensor_nodes s ON (t.sensor_nodes_id = s.sensor_nodes_id)
+-- WHERE ROW(
+--         t.site_name,
+--         t.source_name,
+--         t.city,
+--         t.country,
+--         t.metadata
+--     ) IS DISTINCT FROM (
+--         s.site_name,
+--         s.source_name,
+--         s.city,
+--         s.country,
+--         s.metadata - 'timezone'
+--     );
+
+--  SELECT sensor_nodes_id
+--  , method
+--  , site_name
+--  , source_name
+--  , city
+--  , country
+--  , ROUND(st_distancesphere(geom_new, geom_old)::numeric, 1) as distance
+--  FROM checkrealtime_matched
+-- WHERE st_distancesphere(geom_new, geom_old) > 0
+-- GROUP BY 1,2,3,4,5,6, 7
+-- LIMIT 100;
+
 -- Update any records that have changed
 WITH updates AS (
 UPDATE sensor_nodes s SET
@@ -242,11 +307,14 @@ UPDATE sensor_nodes s SET
     city = COALESCE(t.city, s.city),
     country = COALESCE(t.country, s.country),
     ismobile = COALESCE(t.ismobile, s.ismobile),
-    metadata = COALESCE(s.metadata, '{}'::jsonb) || t.metadata,
+    metadata = COALESCE(s.metadata, '{{}}'::jsonb) || t.metadata,
     geom = COALESCE(t.geom, s.geom)
-    --, modified_on = now()
+    --, timezones_id = get_timezones_id(COALESCE(t.geom, s.geom))
+    , providers_id = get_providers_id(COALESCE(t.source_name, s.source_name))
+    , modified_on = now()
 FROM tempfetchdata_nodes t
-WHERE t.sensor_nodes_id = s.sensor_nodes_id AND
+WHERE t.sensor_nodes_id = s.sensor_nodes_id
+AND
 (
     (s.geom IS NULL and t.geom IS NOT NULL)
 OR
@@ -258,7 +326,7 @@ OR
         t.source_name,
         t.city,
         t.country,
-        t.metadata
+        t.metadata - ARRAY['imported','fetchlogs_id']::text[]
     ) IS DISTINCT FROM (
         s.sensor_nodes_id,
         s.ismobile,
@@ -266,13 +334,62 @@ OR
         s.source_name,
         s.city,
         s.country,
-        s.metadata
+        s.metadata - ARRAY['imported','fetchlogs_id']::text[]
     )
 )
 RETURNING 1)
 SELECT COUNT(1) INTO __updated_nodes
 FROM updates;
 
+
+-- SELECT s.sensor_nodes_id
+-- , t.site_name
+-- , s.site_name
+-- , t.metadata - ARRAY['imported','fetchlogs_id']::text[] as temp
+-- , s.metadata - ARRAY['imported','fetchlogs_id']::text[] as node
+-- FROM tempfetchdata_nodes t
+-- JOIN sensor_nodes s ON (t.sensor_nodes_id = s.sensor_nodes_id)
+-- WHERE (s.geom IS NULL and t.geom IS NOT NULL)
+-- OR
+--     ROW (
+--        t.sensor_nodes_id,
+--       --  t.ismobile,
+--       --  t.site_name,
+--       --  t.source_name,
+--       --  t.city,
+--        -- t.country,
+--         t.metadata - ARRAY['imported','fetchlogs_id']::text[]
+--     ) IS DISTINCT FROM (
+--        s.sensor_nodes_id,
+--       --  s.ismobile,
+--       --  s.site_name,
+--       --  s.source_name,
+--       --  s.city,
+--       -- s.country,
+--         s.metadata - ARRAY['imported','fetchlogs_id']::text[]
+--     )
+-- LIMIT 20;
+
+-- SELECT h.site_name
+-- , n.site_name
+-- , st_astext(h.geom)
+-- , st_astext(n.geom)
+-- , h.origin
+-- , n.origin
+-- , h.metadata - ARRAY['imported','fetchlogs_id']::text[] as history
+-- , n.metadata - ARRAY['imported','fetchlogs_id']::text[] as current
+-- FROM sensor_nodes_history h
+-- JOIN sensor_nodes n USING (sensor_nodes_id)
+-- WHERE created > now() - '2min'::interval;
+
+-- SELECT source_name
+-- , COALESCE(jsonb_array_length(metadata->'attribution'), 0) as attributes
+-- , COUNT(1) as n
+-- FROM sensor_nodes
+-- GROUP BY 1,2
+-- ORDER BY 2 DESC
+-- LIMIT 500;
+
 -------------
 -- File #6 --
 -------------
@@ -287,7 +404,10 @@ INSERT INTO sensor_nodes (
     city,
     country,
     ismobile,
-    origin
+    origin,
+    timezones_id,
+    providers_id,
+    countries_id
 )
 SELECT
     site_name,
@@ -298,6 +418,9 @@ SELECT
     country,
     ismobile,
     'OPENAQ'
+    , get_timezones_id(geom)
+    , get_providers_id(source_name)
+    , get_countries_id(geom)
 FROM tempfetchdata_nodes t
 WHERE t.sensor_nodes_id is NULL
 RETURNING *
@@ -395,7 +518,7 @@ SELECT COUNT(1) INTO __inserted_measurands
 FROM inserts;
 
 -- get cleaned sensors table
-CREATE TEMP TABLE IF NOT EXISTS tempfetchdata_sensors_clean AS
+CREATE {table} IF NOT EXISTS tempfetchdata_sensors_clean AS
 SELECT
     null::int as sensors_id,
     sensor_nodes_id,
@@ -489,6 +612,29 @@ DELETE
 FROM tempfetchdata
 WHERE sensors_id IS NULL;
 
+--DELETE
+--FROM measurements m
+--USING tempfetchdata t
+--WHERE m.datetime = t.datetime
+--AND m.sensors_id = t.sensors_id;
+
+__node_time_ms := 1000 * (extract(epoch FROM clock_timestamp() - __process_start));
+-- restart the clock to measure just inserts
+__process_start := clock_timestamp();
+
+
+-- moved down
+-- count the future measurements
+SELECT COUNT(1) INTO __deleted_future_measurements
+FROM tempfetchdata
+WHERE datetime > __max_measurement_date
+;
+
+	SELECT COUNT(1) INTO __deleted_past_measurements
+FROM tempfetchdata
+WHERE datetime < __min_measurement_date
+;
+
 
 WITH inserts AS (
   INSERT INTO measurements (sensors_id, datetime, value)
@@ -496,12 +642,15 @@ WITH inserts AS (
   , datetime
   , value
   FROM tempfetchdata
+  WHERE datetime > __min_measurement_date
+	AND datetime < __max_measurement_date
   ON CONFLICT DO NOTHING
-  RETURNING sensors_id, datetime
+  RETURNING sensors_id, datetime, value
 ), inserted as (
-   INSERT INTO temp_inserted_measurements (sensors_id, datetime)
+   INSERT INTO temp_inserted_measurements (sensors_id, datetime, value)
    SELECT sensors_id
    , datetime
+   , value
    FROM inserts
    RETURNING sensors_id, datetime
 )
@@ -513,18 +662,203 @@ INTO __inserted_start_datetime
 , __inserted_measurements
 FROM inserted;
 
-
--- No longer going to manage the fetch log in this way
--- WITH updates AS (
---   UPDATE fetchlogs
---   SET completed_datetime = clock_timestamp()
---   , last_message = NULL -- reset any previous error
---   WHERE key IN (SELECT key FROM ingestfiles)
---   RETURNING 1)
--- SELECT COUNT(1) INTO __keys
--- FROM updates;
+__insert_time_ms := 1000 * (extract(epoch FROM clock_timestamp() - __process_start));
+
+-- mark the fetchlogs as done
+WITH inserted AS (
+  SELECT m.fetchlogs_id
+  , COUNT(m.*) as n_records
+  , COUNT(t.*) as n_inserted
+  , MIN(m.datetime) as fr_datetime
+  , MAX(m.datetime) as lr_datetime
+  , MIN(t.datetime) as fi_datetime
+  , MAX(t.datetime) as li_datetime
+  FROM tempfetchdata m
+  LEFT JOIN temp_inserted_measurements t ON (t.sensors_id = m.sensors_id AND t.datetime = m.datetime)
+  GROUP BY m.fetchlogs_id)
+UPDATE fetchlogs
+SET completed_datetime = CURRENT_TIMESTAMP
+, inserted = COALESCE(n_inserted, 0)
+, records = COALESCE(n_records, 0)
+, first_recorded_datetime = fr_datetime
+, last_recorded_datetime = lr_datetime
+, first_inserted_datetime = fi_datetime
+, last_inserted_datetime = li_datetime
+FROM inserted
+WHERE inserted.fetchlogs_id = fetchlogs.fetchlogs_id;
+
+-- track the time required to update cache tables
+__process_start := clock_timestamp();
+
+-- -- Now we can use those temp_inserted_measurements to update the cache tables
+-- INSERT INTO sensors_latest (
+--   sensors_id
+--   , datetime
+--   , value
+--   )
+-- ---- identify the row that has the latest value
+-- WITH numbered AS (
+--   SELECT sensors_id
+--    , datetime
+--    , value
+--    , row_number() OVER (PARTITION BY sensors_id ORDER BY datetime DESC) as rn
+--   FROM temp_inserted_measurements
+-- ), latest AS (
+-- ---- only insert those rows
+--   SELECT sensors_id
+--    , datetime
+--    , value
+--   FROM numbered
+--   WHERE rn = 1
+-- )
+-- SELECT l.sensors_id
+-- , l.datetime
+-- , l.value
+-- FROM latest l
+-- LEFT JOIN sensors_latest sl ON (l.sensors_id = sl.sensors_id)
+-- WHERE sl.sensors_id IS NULL
+-- OR l.datetime > sl.datetime
+-- ON CONFLICT (sensors_id) DO UPDATE
+-- SET datetime = EXCLUDED.datetime
+-- , value = EXCLUDED.value
+-- , modified_on = now()
+-- --, fetchlogs_id = EXCLUDED.fetchlogs_id
+-- ;
+
+-- update the exceedances
+INSERT INTO sensor_exceedances (sensors_id, threshold_value, datetime_latest)
+  SELECT
+  m.sensors_id
+  , t.value
+  , MAX(datetime)
+  FROM temp_inserted_measurements m
+  JOIN sensors s ON (m.sensors_id = s.sensors_id)
+  JOIN thresholds t ON (s.measurands_id = t.measurands_id)
+  AND m.value > t.value
+  GROUP BY 1, 2
+  ON CONFLICT (sensors_id, threshold_value) DO UPDATE SET
+  datetime_latest = GREATEST(sensor_exceedances.datetime_latest, EXCLUDED.datetime_latest)
+  , updated_on = now();
+
+INSERT INTO sensors_rollup (
+  sensors_id
+  , datetime_first
+  , datetime_last
+  , value_latest
+  , value_count
+  , value_avg
+  , value_min
+  , value_max
+  )
+---- identify the row that has the latest value
+WITH numbered AS (
+  SELECT sensors_id
+   , datetime
+   , value
+   , sum(1) OVER (PARTITION BY sensors_id) as value_count
+   , min(datetime) OVER (PARTITION BY sensors_id) as datetime_min
+   , avg(value) OVER (PARTITION BY sensors_id) as value_avg
+   , row_number() OVER (PARTITION BY sensors_id ORDER BY datetime DESC) as rn
+  FROM temp_inserted_measurements
+), latest AS (
+---- only insert those rows
+  SELECT sensors_id
+   , datetime
+   , value
+   , value_count
+   , value_avg
+   , datetime_min
+  FROM numbered
+  WHERE rn = 1
+)
+SELECT l.sensors_id
+, l.datetime_min -- first
+, l.datetime -- last
+, l.value -- last value
+, l.value_count
+, l.value_avg
+, l.value -- min
+, l.value -- max
+FROM latest l
+LEFT JOIN sensors_rollup sr ON (l.sensors_id = sr.sensors_id)
+WHERE sr.sensors_id IS NULL
+OR l.datetime > sr.datetime_last
+OR l.datetime_min < sr.datetime_first
+ON CONFLICT (sensors_id) DO UPDATE
+SET datetime_last = GREATEST(sensors_rollup.datetime_last, EXCLUDED.datetime_last)
+, value_latest = CASE WHEN EXCLUDED.datetime_last > sensors_rollup.datetime_last
+                 THEN EXCLUDED.value_latest
+                 ELSE sensors_rollup.value_latest
+                 END
+, value_count = sensors_rollup.value_count + EXCLUDED.value_count
+, value_min = LEAST(sensors_rollup.value_min, EXCLUDED.value_latest)
+, value_max = GREATEST(sensors_rollup.value_max, EXCLUDED.value_latest)
+, datetime_first = LEAST(sensors_rollup.datetime_first, EXCLUDED.datetime_first)
+, modified_on = now()
+--, fetchlogs_id = EXCLUDED.fetchlogs_id
+;
 
 
+-- WITH spatial_inserts AS (
+-- INSERT INTO sensor_nodes_spatial_rollup (
+-- sensor_nodes_id
+-- , geom
+-- , cell_size
+-- , start_datetime
+-- , end_datetime
+-- , measurements_count
+-- , added_on)
+-- SELECT sensor_nodes_id
+-- , st_snaptogrid(s.geom, 250)
+-- , 250
+-- , MIN(datetime) as start_datetime
+-- , MAX(datetime) as end_datetime
+-- , COUNT(DISTINCT datetime) as measurements
+-- , now()
+-- FROM temp_inserted_measurements
+-- JOIN tempfetchdata_sensors s USING (sensors_id)
+-- JOIN sensor_systems ss USING (sensor_systems_id)
+-- WHERE lat IS NOT NULL
+-- AND lon IS NOT NULL
+-- GROUP BY 1,2
+-- ON CONFLICT (sensor_nodes_id, geom) DO UPDATE SET
+--   start_datetime = LEAST(sensor_nodes_spatial_rollup.start_datetime, EXCLUDED.start_datetime)
+-- , end_datetime = GREATEST(sensor_nodes_spatial_rollup.end_datetime, EXCLUDED.end_datetime)
+-- , measurements_count = sensor_nodes_spatial_rollup.measurements_count + EXCLUDED.measurements_count
+-- , modified_on = now()
+-- RETURNING 1)
+-- SELECT COUNT(1) INTO __inserted_spatial_rollups
+-- FROM spatial_inserts;
+
+
+-- Update the table that will help to track hourly rollups
+--INSERT INTO hourly_stats (datetime)
+--  SELECT date_trunc('hour', datetime)
+--  FROM temp_inserted_measurements
+--  GROUP BY 1
+--ON CONFLICT (datetime) DO UPDATE
+--SET modified_on = now();
+
+  WITH inserted_hours AS (
+    -- first we group things, adding an hour to make it time-ending after truncating
+    SELECT datetime + '1h'::interval as datetime
+    , utc_offset(datetime + '1h'::interval, tz.tzid) as tz_offset
+    FROM temp_inserted_measurements m
+    JOIN sensors s ON (s.sensors_id = m.sensors_id)
+    JOIN sensor_systems sy ON (s.sensor_systems_id = sy.sensor_systems_id)
+    JOIN sensor_nodes sn ON (sy.sensor_nodes_id = sn.sensor_nodes_id)
+    JOIN timezones tz ON (sn.timezones_id = tz.timezones_id)
+    GROUP BY 1, 2
+   )
+    INSERT INTO hourly_data_queue (datetime, tz_offset)
+    SELECT as_utc_hour(datetime, tz_offset), tz_offset
+    FROM inserted_hours
+    GROUP BY 1, 2
+    ON CONFLICT (datetime, tz_offset) DO UPDATE
+    SET modified_on = now();
+
+
+-- update the table that will track the daily exports
 WITH e AS (
 INSERT INTO open_data_export_logs (sensor_nodes_id, day, records, measurands, modified_on)
 SELECT sn.sensor_nodes_id
@@ -548,7 +882,91 @@ SELECT COUNT(1) INTO __exported_days
 FROM e;
 
 
-RAISE NOTICE 'total-measurements: %, deleted-timescaledb: %, deleted-future-measurements: %, deleted-past-measurements: %, from: %, to: %, inserted-from: %, inserted-to: %, updated-nodes: %, inserted-measurements: %, inserted-measurands: %, inserted-nodes: %, rejected-nodes: %, rejected-systems: %, rejected-sensors: %, exported-sensor-days: %, process-time-ms: %, source: fetch'
+__cache_time_ms := 1000 * (extract(epoch FROM clock_timestamp() - __process_start));
+
+
+INSERT INTO ingest_stats (
+    ingest_method
+    -- total
+  , total_measurements_processed
+  , total_measurements_inserted
+  , total_measurements_rejected
+  , total_nodes_processed
+  , total_nodes_inserted
+  , total_nodes_updated
+  , total_nodes_rejected
+  -- total times
+  , total_process_time_ms
+  , total_insert_time_ms
+  , total_cache_time_ms
+  -- latest
+  , latest_measurements_processed
+  , latest_measurements_inserted
+  , latest_measurements_rejected
+  , latest_nodes_processed
+  , latest_nodes_inserted
+  , latest_nodes_updated
+  , latest_nodes_rejected
+  -- times
+  , latest_process_time_ms
+  , latest_insert_time_ms
+  , latest_cache_time_ms
+  ) VALUES (
+  -- totals
+    __ingest_method
+  , __total_measurements
+  , __inserted_measurements
+  , __rejected_measurements
+  , __total_nodes
+  , __inserted_nodes
+  , __updated_nodes
+  , __rejected_nodes
+  -- times
+  , __process_time_ms
+  , __insert_time_ms
+  , __cache_time_ms
+  -- latest
+  , __total_measurements
+  , __inserted_measurements
+  , __rejected_measurements
+  , __total_nodes
+  , __inserted_nodes
+  , __updated_nodes
+  , __rejected_nodes
+  -- times
+  , __process_time_ms
+  , __insert_time_ms
+  , __cache_time_ms
+) ON CONFLICT (ingest_method) DO UPDATE SET
+  -- totals
+   total_measurements_processed = ingest_stats.total_measurements_processed + EXCLUDED.total_measurements_processed
+ , total_measurements_inserted = ingest_stats.total_measurements_inserted + EXCLUDED.total_measurements_inserted
+ , total_measurements_rejected = ingest_stats.total_measurements_rejected + EXCLUDED.total_measurements_rejected
+ , total_nodes_processed = ingest_stats.total_nodes_processed + EXCLUDED.total_nodes_processed
+ , total_nodes_inserted = ingest_stats.total_nodes_inserted + EXCLUDED.total_nodes_inserted
+ , total_nodes_updated = ingest_stats.total_nodes_updated + EXCLUDED.total_nodes_updated
+ , total_nodes_rejected = ingest_stats.total_nodes_rejected + EXCLUDED.total_nodes_rejected
+ , total_process_time_ms = ingest_stats.total_process_time_ms + EXCLUDED.total_process_time_ms
+ , total_insert_time_ms = ingest_stats.total_insert_time_ms + EXCLUDED.total_insert_time_ms
+ , total_cache_time_ms = ingest_stats.total_cache_time_ms + EXCLUDED.total_cache_time_ms
+ -- latest
+ , latest_measurements_processed = EXCLUDED.latest_measurements_processed
+ , latest_measurements_inserted = EXCLUDED.latest_measurements_inserted
+ , latest_measurements_rejected = EXCLUDED.latest_measurements_rejected
+ , latest_nodes_processed = EXCLUDED.latest_nodes_processed
+ , latest_nodes_inserted = EXCLUDED.latest_nodes_inserted
+ , latest_nodes_updated = EXCLUDED.latest_nodes_updated
+ , latest_nodes_rejected = EXCLUDED.latest_nodes_rejected
+ -- times
+ , latest_process_time_ms = EXCLUDED.latest_process_time_ms
+ , latest_insert_time_ms = EXCLUDED.latest_insert_time_ms
+ , latest_cache_time_ms = EXCLUDED.latest_cache_time_ms
+ , ingest_count = ingest_stats.ingest_count + 1
+ , ingested_on = EXCLUDED.ingested_on;
+
+
+
+RAISE NOTICE 'total-measurements: %, deleted-timescaledb: %, deleted-future-measurements: %, deleted-past-measurements: %, from: %, to: %, inserted-from: %, inserted-to: %, updated-nodes: %, inserted-measurements: %, inserted-measurands: %, inserted-nodes: %, rejected-nodes: %, rejected-systems: %, rejected-sensors: %, exported-sensor-days: %, inserted-spatial-rollups: %, process-time-ms: %, insert-time-ms: %, cache-time-ms: %, source: fetch'
       , __total_measurements
       , __deleted_timescaledb
       , __deleted_future_measurements
@@ -565,7 +983,10 @@ RAISE NOTICE 'total-measurements: %, deleted-timescaledb: %, deleted-future-meas
       , __rejected_systems
       , __rejected_sensors
       , __exported_days
-      , 1000 * (extract(epoch FROM clock_timestamp() - __process_start));
+      , __inserted_spatial_rollups
+		  , __process_time_ms
+			, __insert_time_ms
+	    , __cache_time_ms;
 
 END $$;
 
diff --git a/ingest/fetch_staging.sql b/ingest/fetch_staging.sql
index 5ea6e43..48ddc93 100644
--- a/ingest/fetch_staging.sql
+++ b/ingest/fetch_staging.sql
@@ -1,4 +1,11 @@
-CREATE TEMP TABLE IF NOT EXISTS tempfetchdata (
+-- DROP TABLE IF EXISTS tempfetchdata
+-- , temp_inserted_measurements
+-- , tempfetchdata_nodes
+-- , tempfetchdata_sensors
+-- , tempfetchdata_sensors_clean;
+
+CREATE {table} IF NOT EXISTS tempfetchdata (
+    fetchlogs_id int,
     location text,
     value float,
     unit text,
@@ -17,15 +24,19 @@ CREATE TEMP TABLE IF NOT EXISTS tempfetchdata (
     sensors_id int
 );
 
-CREATE TEMP TABLE IF NOT EXISTS ingestfiles(
-    key text
-);
+--CREATE {table} IF NOT EXISTS ingestfiles(
+--    key text
+--);
 
 -- This table will hold measurements that have
 -- actually been inserted into the measurements table
 -- this is to deal with the overlap that we see in the
 -- incoming files
-CREATE TEMP TABLE IF NOT EXISTS temp_inserted_measurements (
-  sensors_id int,
-  datetime timestamptz
+CREATE {table} IF NOT EXISTS temp_inserted_measurements (
+  sensors_id int
+  , datetime timestamptz
+  , value double precision
+  , lat double precision
+  , lon double precision
+  , fetchlogs_id int
 );
diff --git a/ingest/handler.py b/ingest/handler.py
index 2bb72cc..f827c3b 100644
--- a/ingest/handler.py
+++ b/ingest/handler.py
@@ -2,7 +2,8 @@
 import logging
 import psycopg2
 from .settings import settings
-from .lcs import load_measurements_db, load_metadata_db
+from .lcs import load_metadata_db
+from .lcsV2 import load_measurements_db
 from .fetch import load_db
 from time import time
 import json
@@ -38,7 +39,6 @@ def handler(event, context):
                         else:
                             keys = getKeysFromS3Record(record)
 
-                        logger.debug(keys)
                         for obj in keys:
                             bucket = obj['bucket']
                             key = obj['key']
@@ -47,24 +47,29 @@ def handler(event, context):
                             )
 
                             try:
+                                file_size = lov2["Contents"][0]["Size"]
                                 last_modified = lov2["Contents"][0]["LastModified"]
                             except KeyError:
                                 logger.error("""
-                                could not get last modified time from obj
+                                could not get info from obj
                                 """)
+                                file_size = None
                                 last_modified = datetime.now().replace(
                                     tzinfo=timezone.utc
                                 )
 
                             cursor.execute(
                                 """
-                                INSERT INTO fetchlogs (key, last_modified)
-                                VALUES(%s, %s)
+                                INSERT INTO fetchlogs (key
+                                , file_size
+                                , last_modified
+                                )
+                                VALUES(%s, %s, %s)
                                 ON CONFLICT (key) DO UPDATE
                                 SET last_modified=EXCLUDED.last_modified,
                                 completed_datetime=NULL RETURNING *;
                                 """,
-                                (key, last_modified,),
+                                (key, file_size, last_modified,),
                             )
                             row = cursor.fetchone()
                             connection.commit()
@@ -99,6 +104,10 @@ def getKeysFromS3Record(record):
 
 
 def cronhandler(event, context):
+    if settings.PAUSE_INGESTING:
+        logger.info('Ingesting is paused')
+        return None
+
     start_time = time()
     timeout = settings.INGEST_TIMEOUT  # manual timeout for testing
     ascending = settings.FETCH_ASCENDING if 'ascending' not in event else event['ascending']
@@ -107,86 +116,58 @@ def cronhandler(event, context):
     metadata_limit = settings.METADATA_LIMIT if 'metadata_limit' not in event else event['metadata_limit']
 
     logger.info(f"Running cron job: {event['source']}, ascending: {ascending}")
-    with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
-        connection.set_session(autocommit=True)
-        with connection.cursor() as cursor:
-            cursor.execute(
-                """
-                SELECT count(*)
-                FROM fetchlogs
-                WHERE completed_datetime is null
-                AND key ~*'stations';
-                """,
-            )
-            metadata = cursor.fetchone()
-            cursor.execute(
-                """
-                SELECT count(*)
-                FROM fetchlogs
-                WHERE completed_datetime is null
-                AND key ~*'measures';
-                """,
-            )
-            pipeline = cursor.fetchone()
-            cursor.execute(
-                """
-                SELECT count(*)
-                FROM fetchlogs
-                WHERE completed_datetime is null
-                AND key ~*'realtime';
-                """,
-            )
-            realtime = cursor.fetchone()
-            for notice in connection.notices:
-                logger.debug(notice)
-
-    metadata = 0 if metadata is None else metadata[0]
-    realtime = 0 if realtime is None else realtime[0]
-    pipeline = 0 if pipeline is None else pipeline[0]
-    logger.info(f"{metadata_limit}/{metadata} metadata, {realtime_limit}/{realtime} openaq, {pipeline_limit}/{pipeline} pipeline records pending")
 
     # these exceptions are just a failsafe so that if something
     # unaccounted for happens we can still move on to the next
     # process. In case of this type of exception we will need to
     # fix it asap
     try:
-        if metadata > 0 and metadata_limit > 0:
+        if metadata_limit > 0:
             cnt = 0
-            while cnt < metadata and (time() - start_time) < timeout:
-                cnt += load_metadata_db(metadata_limit, ascending)
+            loaded = 1
+            while (
+                    loaded > 0
+                    and (time() - start_time) < timeout
+            ):
+                loaded = load_metadata_db(metadata_limit, ascending)
+                cnt += loaded
                 logger.info(
-                    "loaded %s of %s metadata records, timer: %0.4f",
-                    cnt, metadata, time() - start_time
+                    "loaded %s metadata records, timer: %0.4f",
+                    cnt, time() - start_time
                 )
     except Exception as e:
         logger.error(f"load metadata failed: {e}")
 
     try:
-        if realtime > 0 and realtime_limit > 0:
+        if realtime_limit > 0:
             cnt = 0
             loaded = 1
             while (
                     loaded > 0
-                    and cnt < realtime
                     and (time() - start_time) < timeout
             ):
                 loaded = load_db(realtime_limit, ascending)
                 cnt += loaded
                 logger.info(
-                    "loaded %s of %s fetch records, timer: %0.4f",
-                    cnt, realtime, time() - start_time
+                    "loaded %s fetch records, timer: %0.4f",
+                    cnt, time() - start_time
                 )
     except Exception as e:
         logger.error(f"load realtime failed: {e}")
 
     try:
-        if pipeline > 0 and pipeline_limit > 0:
+        if pipeline_limit > 0:
             cnt = 0
-            while cnt < pipeline and (time() - start_time) < timeout:
-                cnt += load_measurements_db(pipeline_limit, ascending)
+            loaded = 1
+            while (
+                    loaded > 0
+                    and (time() - start_time) < timeout
+            ):
+                loaded = load_measurements_db(pipeline_limit, ascending)
+                cnt += loaded
                 logger.info(
-                    "loaded %s of %s pipeline records, timer: %0.4f",
-                    cnt, pipeline, time() - start_time
+                    "loaded %s pipeline records, timer: %0.4f",
+                    cnt, time() - start_time
                 )
     except Exception as e:
         logger.error(f"load pipeline failed: {e}")
diff --git a/ingest/lcs.py b/ingest/lcs.py
index 43b2da0..c59df22 100644
--- a/ingest/lcs.py
+++ b/ingest/lcs.py
@@ -4,6 +4,7 @@
 import dateparser
 import pytz
 import orjson
+import uuid
 import csv
 from time import time
 from urllib.parse import unquote_plus
@@ -14,7 +15,13 @@
 import typer
 from io import StringIO
 from .settings import settings
-from .utils import get_query, clean_csv_value, StringIteratorIO, fix_units
+from .utils import (
+    get_query,
+    clean_csv_value,
+    StringIteratorIO,
+    fix_units,
+    load_fetchlogs,
+)
 
 s3 = boto3.resource("s3")
 s3c = boto3.client("s3")
@@ -22,7 +29,7 @@
 app = typer.Typer()
 dir_path = os.path.dirname(os.path.realpath(__file__))
 
-FETCH_BUCKET = settings.ETL_BUCKET
+FETCH_BUCKET = settings.FETCH_BUCKET
 
 logger = logging.getLogger(__name__)
 
@@ -91,7 +98,7 @@ def system(self, j, node_id, fetchlogsId):
             self.systems.append(system)
 
     def node(self, j):
-        node = {}
+        node = {"fetchlogs_id": None}
         metadata = {}
         if "sensor_node_id" in j:
             id = j["sensor_node_id"]
@@ -99,9 +106,7 @@ def node(self, j):
             return None
         # if we have passed the fetchlogs_id we should track it
         if "fetchlogs_id" in j:
-            fetchlogsId = j["fetchlogs_id"]
-        else:
-            fetchlogsId = None
+            node["fetchlogs_id"] = j["fetchlogs_id"]
 
         for key, value in j.items():
             key = str.replace(key, "sensor_node_", "")
@@ -122,7 +127,7 @@ def node(self, j):
                 except Exception:
                     node["geom"] = None
             elif key == "sensor_systems":
-                self.system(value, id, fetchlogsId)
+                self.system(value, id, node["fetchlogs_id"])
             else:
                 metadata[key] = value
         node["metadata"] = orjson.dumps(metadata).decode()
@@ -159,12 +164,12 @@ def get_station(self, key, fetchlogsId):
                 self.node(obj)
 
     def load_data(self):
-        logger.debug(f"load_data: {self.keys}")
+        logger.debug(f"load_data: {self.keys}, {self.nodes}")
         with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
             connection.set_session(autocommit=True)
             with connection.cursor() as cursor:
                 start_time = time()
-                self.create_staging_table(cursor)
+                create_staging_table(cursor)
 
                 write_csv(
                     cursor,
@@ -247,17 +252,7 @@ def load_data(self):
     def process_data(self, cursor):
         query = get_query("lcs_ingest_full.sql")
         cursor.execute(query)
-        # query = get_query("lcs_ingest_nodes.sql")
-        # cursor.execute(query)
-
-        # query = get_query("lcs_ingest_systems.sql")
-        # cursor.execute(query)
-
-        # query = get_query("lcs_ingest_sensors.sql")
-        # cursor.execute(query)
 
-    def create_staging_table(self, cursor):
-        cursor.execute(get_query("lcs_staging.sql"))
 
     def get_metadata(self):
         hasnew = False
@@ -266,6 +261,7 @@ def get_metadata(self):
             id = obj["id"]
             last_modified = obj["LastModified"]
             try:
+                logger.debug(f"Loading station file: {id}:{key}")
                 self.get_station(key, id)
                 self.keys.append(
                     {
@@ -286,6 +282,14 @@ def get_metadata(self):
             self.load_data()
 
 
+
+def create_staging_table(cursor):
+	# table and batch are used primarily for testing
+	cursor.execute(get_query(
+		"lcs_staging.sql",
+		table="TEMP TABLE" if settings.USE_TEMP_TABLES else 'TABLE'
+	))
+
 def write_csv(cursor, data, table, columns):
     fields = ",".join(columns)
     sio = StringIO()
@@ -316,23 +320,40 @@ def load_metadata_bucketscan(count=100):
             break
 
 
-def load_metadata_db(count=250, ascending: bool = False):
+def load_metadata_db(limit=250, ascending: bool = False):
     order = 'ASC' if ascending else 'DESC'
+    pattern = 'lcs-etl-pipeline/stations/'
+    rows = load_fetchlogs(pattern, limit, ascending)
+    contents = []
+    for row in rows:
+        logger.debug(row)
+        contents.append(
+            {
+                "Key": unquote_plus(row[1]),
+                "LastModified": row[2],
+                "id": row[0],
+            }
+        )
+    if len(contents) > 0:
+        load_metadata(contents)
+        # data = LCSData(contents)
+        # data.get_metadata()
+    return len(rows)
+
+
+def load_metadata_batch(batch: str):
     with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
         connection.set_session(autocommit=True)
         with connection.cursor() as cursor:
             cursor.execute(
-                f"""
+                """
                 SELECT key
                 , last_modified
                 , fetchlogs_id
                 FROM fetchlogs
-                WHERE key~'lcs-etl-pipeline/stations/'
-                AND completed_datetime is null
-                ORDER BY last_modified {order} nulls last
-                LIMIT %s;
+                WHERE batch_uuid = %s
                 """,
-                (count,),
+                (batch,),
             )
             rows = cursor.fetchall()
             rowcount = cursor.rowcount
@@ -348,11 +369,23 @@ def load_metadata_db(count=250, ascending: bool = False):
             for notice in connection.notices:
                 logger.debug(notice)
     if len(contents) > 0:
-        data = LCSData(contents)
-        data.get_metadata()
+        load_metadata(contents)
+        # data = LCSData(contents)
+        # data.get_metadata()
     return rowcount
 
 
+def load_metadata(keys):
+    logger.debug(f'Load metadata: {len(keys)}')
+    data = LCSData(keys)
+    try:
+        data.get_metadata()
+    except Exception as e:
+        ids = ','.join([str(k['id']) for k in keys])
+        logger.error(f'load error: {e} ids: {ids}')
+        raise
+
+
 def select_object(key):
     key = unquote_plus(key)
     if str.endswith(key, ".gz"):
@@ -362,7 +395,7 @@ def select_object(key):
     try:
         content = ""
         resp = s3c.select_object_content(
-            Bucket=settings.ETL_BUCKET,
+            Bucket=settings.FETCH_BUCKET,
             Key=key,
             ExpressionType="SQL",
             Expression="""
@@ -426,13 +459,19 @@ def get_measurements(key, fetchlogsId):
         dt = row[2]
 
         try:
-            dt = datetime.fromtimestamp(int(dt), timezone.utc)
+            if dt.isnumeric():
+                if len(dt) == 13:
+                    dt = datetime.fromtimestamp(int(dt)/1000.0, timezone.utc)
+                else:
+                    dt = datetime.fromtimestamp(int(dt), timezone.utc)
+                row[2] = dt.isoformat()
         except Exception:
             try:
                 dt = dateparser.parse(dt).replace(tzinfo=timezone.utc)
             except Exception:
                 logger.warning(f"Exception in parsing date for {dt} {Exception}")
-        row[2] = dt.isoformat()
+
+        #row[2] = dt.isoformat()
         # addd the log id for tracing purposes
         row.insert(5, fetchlogsId)
         ret.append(row)
@@ -442,6 +481,7 @@ def get_measurements(key, fetchlogsId):
 
 def submit_file_error(key, e):
     """Update the log to reflect the error and prevent a retry"""
+    logger.error(f"{key}: {e}")
     with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
         connection.set_session(autocommit=True)
         with connection.cursor() as cursor:
@@ -451,26 +491,25 @@ def submit_file_error(key, e):
                 SET completed_datetime = clock_timestamp()
                 , last_message = %s
                 WHERE key = %s
-                """
-            ),
-            (f"ERROR: {e}", key),
+                """,
+                (f"ERROR: {e}", key),
+            )
+
 
 def to_tsv(row):
     tsv = "\t".join(map(clean_csv_value, row)) + "\n"
     return tsv
     return ""
 
+
 def load_measurements_file(fetchlogs_id: int):
     with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
         connection.set_session(autocommit=True)
         with connection.cursor() as cursor:
             cursor.execute(
                 """
-                SELECT key
-                , init_datetime
-                , loaded_datetime
-                , completed_datetime
-                , last_message
+                SELECT fetchlogs_id
+                , key
                 FROM fetchlogs
                 WHERE fetchlogs_id = %s
                 LIMIT 1
@@ -479,34 +518,29 @@ def load_measurements_file(fetchlogs_id: int):
                 (fetchlogs_id,),
             )
             rows = cursor.fetchall()
-            print(rows)
-            keys = [r[0] for r in rows]
-            load_measurements(keys)
+            load_measurements(rows)
+
+
+def load_measurements_batch(batch: str):
+    with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
+        connection.set_session(autocommit=True)
+        with connection.cursor() as cursor:
+            cursor.execute(
+                """
+                SELECT fetchlogs_id
+                , key
+                FROM fetchlogs
+                WHERE batch_uuid = %s
+                """,
+                (batch,),
+            )
+            rows = cursor.fetchall()
+            load_measurements(rows)
 
 
 def load_measurements_db(limit=250, ascending: bool = False):
-    order = 'ASC' if ascending else 'DESC'
-    conn = psycopg2.connect(settings.DATABASE_WRITE_URL)
-    cur = conn.cursor()
-    cur.execute(
-        f"""
-        SELECT fetchlogs_id
-        , key
-        , last_modified
-        FROM fetchlogs
-        WHERE key~E'^lcs-etl-pipeline/measures/.*\\.csv'
-        AND completed_datetime is null
-        ORDER BY last_modified {order} nulls last
-        LIMIT %s
-        ;
-        """,
-        (limit,),
-    )
-    rows = cur.fetchall()
-    # keys = [r[0] for r in rows]
-    conn.commit()
-    cur.close()
-    conn.close()
+    pattern = '^lcs-etl-pipeline/measures/.*\\.csv'
+    rows = load_fetchlogs(pattern, limit, ascending)
     load_measurements(rows)
     return len(rows)
 
@@ -527,13 +561,12 @@ def load_measurements(rows):
     logger.info("load_measurements:get: %s keys; %s rows; %0.4f seconds",
                 len(rows), len(data), time() - start_time)
     if len(data) > 0:
-
         with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
             connection.set_session(autocommit=True)
             with connection.cursor() as cursor:
 
-                cursor.execute(get_query("lcs_meas_staging.sql"))
-                start = time()
+                create_staging_table(cursor)
+
                 write_csv(
                     cursor, new, "keys", ["key",],
                 )
@@ -551,43 +584,11 @@ def load_measurements(rows):
                 mrows = cursor.rowcount
                 status = cursor.statusmessage
                 logger.debug(f"COPY Rows: {mrows} Status: {status}")
-                cursor.execute(
-                    """
-                    INSERT INTO fetchlogs(
-                        key,
-                        loaded_datetime
-                    ) SELECT key, clock_timestamp()
-                    FROM keys
-                    ON CONFLICT (key) DO
-                    UPDATE
-                        SET
-                        loaded_datetime=EXCLUDED.loaded_datetime
-                    ;
-                    """
-                )
-                connection.commit()
+
                 cursor.execute(get_query("lcs_meas_ingest.sql"))
                 for notice in connection.notices:
                     print(notice)
 
-                #irows = cursor.rowcount
-                #logger.info("load_measurements:insert: %s rows; %0.4f seconds", irows, time() - start)
-                #status = cursor.statusmessage
-                #logger.debug(f"INGEST Rows: {irows} Status: {status}")
-                cursor.execute(
-                    """
-                    INSERT INTO fetchlogs(
-                        key,
-                        completed_datetime
-                    ) SELECT key, clock_timestamp()
-                    FROM keys
-                    ON CONFLICT (key) DO
-                    UPDATE
-                        SET
-                        completed_datetime=EXCLUDED.completed_datetime
-                    ;
-                    """
-                )
                 logger.info(
                     "load_measurements: keys: %s; rows: %s; time: %0.4f",
                     len(rows), mrows, time() - start_time)
diff --git a/ingest/lcsV2.py b/ingest/lcsV2.py
new file mode 100644
index 0000000..f3fecae
--- /dev/null
+++ b/ingest/lcsV2.py
@@ -0,0 +1,959 @@
+import os
+import logging
+from datetime import datetime, timezone
+import dateparser
+import pytz
+import orjson
+import uuid
+import csv
+from time import time
+from urllib.parse import unquote_plus
+import warnings
+import re
+
+import boto3
+import psycopg2
+import typer
+from io import StringIO
+from .settings import settings
+from .utils import (
+    get_query,
+    clean_csv_value,
+    StringIteratorIO,
+    fix_units,
+    load_fetchlogs,
+    select_object,
+    get_file,
+)
+
+s3 = boto3.resource("s3")
+s3c = boto3.client("s3")
+
+app = typer.Typer()
+dir_path = os.path.dirname(os.path.realpath(__file__))
+
+FETCH_BUCKET = settings.FETCH_BUCKET
+
+logger = logging.getLogger(__name__)
+
+warnings.filterwarnings(
+    "ignore",
+    message="The localize method is no longer necessary, as this time zone supports the fold attribute",
+)
+
+
+def to_geometry(key, data):
+    # could be passed as lat/lng or coordinates
+    if key in ['lat','lon']:
+        lat = data.get('lat')
+        lon = data.get('lon')
+    elif key == 'coordinates':
+        lat = data.get('coordinates', {}).get('lat')
+        lon = data.get('coordinates', {}).get('lon')
+    if None in [lat, lon]:
+        raise Exception('Missing value for coordinates')
+    # could add more checks
+    return f"SRID=4326;POINT({lon} {lat})"
+
+def to_timestamp(key, data):
+    dt = data.get(key)
+    value = None
+    if dt in [None, '']:
+        logger.warning('Passed none type value for timestamp')
+        # no need for exception, we check for nones later
+        return None;
+    if dt.isnumeric():
+        if len(dt) == 13:
+            dt = datetime.fromtimestamp(int(dt)/1000.0, timezone.utc)
+        else:
+            dt = datetime.fromtimestamp(int(dt), timezone.utc)
+    else:
+        return dt
+        dt = dateparser.parse(dt).replace(tzinfo=timezone.utc)
+
+    return dt.isoformat()
+
+
+class IngestClient:
+    def __init__(
+        self, key=None, fetchlogs_id=None, data=None
+    ):
+        self.key = key
+        self.fetchlogs_id = fetchlogs_id
+        self.keys = []
+        self.st = datetime.now().replace(tzinfo=pytz.UTC)
+        self.sensors = []
+        self.systems = []
+        self.flags = []
+        self.nodes = []
+        self.node_ids = []
+        self.system_ids = []
+        self.sensor_ids = []
+        self.measurements = []
+        self.matching_method = 'ingest-id'
+        self.source = None
+        self.node_map = {
+            "fetchlogs_id": {},
+            "site_name": { "col":"site_name" },
+            "source_name": {},
+            "ismobile": {},
+            "ingest_id": {},
+            "matching_method": {},
+            "location": {"col":"ingest_id"},
+            "sensor_node_id": {"col":"ingest_id"},
+            "label": {"col":"site_name"},
+            "coordinates": {"col":"geom","func": to_geometry },
+            "geometry": {"col":"geom", "func": to_geometry },
+            "lat": {"col":"geom","func": to_geometry },
+            "lon": {"col":"geom","func": to_geometry },
+            }
+        self.measurement_map = {
+            "sensor_id": {"col": "ingest_id"},
+            "ingest_id": {"col": "ingest_id"},
+            "timestamp": {"col": "datetime", "func": to_timestamp },
+            "datetime": {"col": "datetime", "func": to_timestamp },
+            "measure": {"col": "value"},
+            "value": {},
+            "lat": {},
+            "lon": {},
+            }
+        # if fetchlogs_id but no key or data
+        # get key
+        # if key, load data
+        # if data
+        if data is not None and isinstance(data, dict):
+            self.load(data)
+
+    def process(self, key, data, mp):
+        col = None
+        value = None
+        m = mp.get(key)
+        if m is not None:
+            col = m.get('col', key)
+            func = m.get('func')
+            if func is None:
+                # just return value
+                value = data.get(key)
+            else:
+                # functions require key and data
+                value = func(key, data)
+        return col, value
+
+    def dump(self, load: bool = True):
+        """
+        Dump any data that is currenly loaded into the database
+        We will dump if there is data OR if we have loaded any keys
+        We do this because its possible that a file is empty but we
+        need to run the dump method to get the file to be marked as finished
+        """
+        logger.debug(f"Dumping data from {len(self.keys)} files")
+        if len(self.nodes)>0 or len(self.keys)>0:
+            self.dump_locations(load)
+        if len(self.measurements)>0 or len(self.keys)>0:
+            self.dump_measurements(load)
+
+    def dump_locations(self, load: bool = True):
+        """
+        Dump the nodes into the temporary tables
+        """
+        db_table = "TEMP TABLE" if (settings.USE_TEMP_TABLES and load) else "TABLE"
+        logger.debug(f"Dumping {len(self.nodes)} nodes using {db_table} ({settings.USE_TEMP_TABLES}|{load})")
+        with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
+            connection.set_session(autocommit=True)
+            with connection.cursor() as cursor:
+                start_time = time()
+
+                cursor.execute(get_query(
+                    "temp_locations_dump.sql",
+                    table=db_table
+                ))
+
+                write_csv(
+                    cursor,
+                    self.keys,
+                    f"staging_keys",
+                    [
+                        "key",
+                        "last_modified",
+                        "fetchlogs_id",
+                    ],
+                )
+                # update by id instead of key due to matching issue
+                cursor.execute(
+                    """
+                    UPDATE fetchlogs
+                    SET loaded_datetime = clock_timestamp()
+                    , last_message = 'load_data'
+                    WHERE fetchlogs_id IN (SELECT fetchlogs_id FROM staging_keys)
+                    """
+                )
+                connection.commit()
+
+                write_csv(
+                    cursor,
+                    self.nodes,
+                    "staging_sensornodes",
+                    [
+                        "ingest_id",
+                        "site_name",
+                        "matching_method",
+                        "source_name",
+                        "source_id",
+                        "ismobile",
+                        "geom",
+                        "metadata",
+                        "fetchlogs_id",
+                    ],
+                )
+
+                write_csv(
+                    cursor,
+                    self.systems,
+                    "staging_sensorsystems",
+                    [
+                        "ingest_id",
+                        "instrument_ingest_id",
+                        "ingest_sensor_nodes_id",
+                        "metadata",
+                        "fetchlogs_id",
+                    ],
+                )
+
+                write_csv(
+                    cursor,
+                    self.sensors,
+                    "staging_sensors",
+                    [
+                        "ingest_id",
+                        "ingest_sensor_systems_id",
+                        "measurand",
+                        "units",
+                        "status",
+                        "logging_interval_seconds",
+                        "averaging_interval_seconds",
+                        "metadata",
+                        "fetchlogs_id",
+                    ],
+                )
+
+                write_csv(
+                    cursor,
+                    self.flags,
+                    "staging_flags",
+                    [
+                        "ingest_id",
+                        "sensor_ingest_id",
+                        "datetime_from",
+                        "datetime_to",
+                        "note",
+                        "metadata",
+                        "fetchlogs_id",
+                    ],
+                )
+
+                connection.commit()
+
+                # and now we load all the nodes,systems and sensors
+                if load:
+                    query = get_query("etl_process_nodes.sql")
+                    cursor.execute(query)
+
+                for notice in connection.notices:
+                    logger.debug(notice)
+
+                cursor.execute(
+                    """
+                    UPDATE fetchlogs
+                    SET completed_datetime = clock_timestamp()
+                    , last_message = NULL
+                    WHERE fetchlogs_id IN (SELECT fetchlogs_id FROM staging_keys)
+                    """
+                )
+
+                connection.commit()
+                logger.info("dump_locations: locations: %s; time: %0.4f", len(self.nodes), time() - start_time)
+                for notice in connection.notices:
+                    logger.debug(notice)
+
+
+
+    def dump_measurements(self, load: bool = True):
+        db_table = "TEMP TABLE" if (settings.USE_TEMP_TABLES and load) else "TABLE"
+        logger.debug(f"Dumping {len(self.measurements)} measurements using {db_table} ({settings.USE_TEMP_TABLES}|{load})")
+        with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
+            connection.set_session(autocommit=True)
+            with connection.cursor() as cursor:
+                start_time = time()
+
+                cursor.execute(get_query(
+                    "temp_measurements_dump.sql",
+                    table=db_table
+                ))
+
+                iterator = StringIteratorIO(
+                    (to_tsv(line) for line in self.measurements)
+                )
+                cursor.copy_expert(
+                    """
+                    COPY staging_measurements (ingest_id, source_name, source_id, measurand, value, datetime, lon, lat, fetchlogs_id)
+                    FROM stdin;
+                    """,
+                    iterator,
+                )
+
+                if load:
+                    logger.info(f'processing {len(self.measurements)} measurements');
+                    query = get_query("etl_process_measurements.sql")
+                    try:
+                        cursor.execute(query)
+                        connection.commit()
+                        logger.info("dump_measurements: measurements: %s; time: %0.4f", len(self.measurements), time() - start_time)
+                        for notice in connection.notices:
+                            logger.debug(notice)
+
+                    except Exception as err:
+                        logger.error(err)
+
+
+    def load(self, data = {}):
+        if "meta" in data.keys():
+            self.load_metadata(data.get('meta'))
+        if "locations" in data.keys():
+            self.load_locations(data.get('locations'))
+        if "measures" in data.keys():
+            self.load_measurements(data.get('measures'))
+
+
+    def reset(self):
+        """
+        Reset the client to the new state. Mostly for testing purposes
+        """
+        logger.debug("Reseting the client data")
+        self.measurements = []
+        self.nodes = []
+        self.systems = []
+        self.sensors = []
+        self.flags = []
+        self.keys = []
+        self.key = None
+        self.fetchlogs_id = None
+        self.node_ids = []
+        self.system_ids = []
+        self.sensor_ids = []
+
+
+    def load_keys(self, rows):
+        # for each fetchlog we need to read and load
+        for row in rows:
+            key = row[1]
+            fetchlogs_id = row[0]
+            last_modified = row[2]
+            self.load_key(key, fetchlogs_id, last_modified)
+
+
+    def load_key(self, key, fetchlogs_id, last_modified):
+        logger.debug(f"Loading key: {fetchlogs_id}//:{key}")
+        is_csv = bool(re.search(r"\.csv(.gz)?$", key))
+        is_json = bool(re.search(r"\.(nd)?json(.gz)?$", key))
+        self.fetchlogs_id = fetchlogs_id
+
+        # is it a local file? This is used for dev
+        # but likely fine to leave in
+        if os.path.exists(os.path.expanduser(key)):
+            content = get_file(os.path.expanduser(key)).read()
+        else:
+            content = select_object(key)
+
+        if is_json:
+            logger.debug(f"Read JSON containing {len(content)} characters")
+        else:
+            logger.debug(f"Read CSV containing {len(content)} lines")
+
+        if is_csv:
+            # all csv data will be measurements
+            for rw in csv.reader(content.split("\n")):
+                self.add_measurement(rw)
+        elif is_json:
+            # all json data should just be parsed and loaded
+            data = orjson.loads(content)
+            self.load(data)
+        else:
+            raise Exception('No idea what to do')
+
+        # add the key to the table to update
+        self.keys.append({"key": key, "last_modified": last_modified, "fetchlogs_id": fetchlogs_id})
+
+
+    def load_metadata(self, meta):
+        if "source" in meta.keys():
+            self.source = meta.get('source')
+        if "matching_method" in meta.keys():
+            self.matching_method = meta.get('matching_method')
+        if "schema" in meta.keys():
+            self.schema = meta.get('schema')
+
+    def load_locations(self, locations):
+        for loc in locations:
+            self.add_node(loc)
+
+    def load_measurements(self, measurements):
+        logger.debug(f'Loading {len(measurements)} measurements')
+        for meas in measurements:
+            self.add_measurement(meas)
+
+
+    def add_sensor(self, j, system_id, fetchlogsId):
+        for s in j:
+            sensor = {}
+            metadata = {}
+            sensor["ingest_sensor_systems_id"] = system_id
+            sensor["fetchlogs_id"] = fetchlogsId
+
+            if "sensor_id" in s:
+                id = s.get("sensor_id")
+            elif "id" in s:
+                id = s.get("id")
+            else:
+                id = system_id
+
+            if id in self.sensor_ids:
+                # would it make more sense to merge or skip or throw error?
+                # merge and submit a warning maybe?
+                continue
+
+            sensor["ingest_id"] = id
+
+            for key, value in s.items():
+                key = str.replace(key, "sensor_", "")
+                if key == "flags":
+                    self.add_flags(value, id, fetchlogsId)
+                elif key == "measurand_parameter":
+                    sensor["measurand"] = value
+                elif key == "measurand_unit":
+                    sensor["units"] = fix_units(value)
+                elif key == "status":
+                    sensor["status"] = value
+                elif key == "interval_seconds":
+                    sensor["logging_interval_seconds"] = value
+                    sensor["averaging_interval_seconds"] = value
+                else:
+                    metadata[key] = value
+            if not sensor.get('measurand'):
+                # get it from the ingest id
+                ingest_arr = sensor.get('ingest_id').split('-')
+                sensor['measurand'] = ingest_arr[-1] # take the last one
+            sensor["metadata"] = orjson.dumps(metadata).decode()
+            self.sensors.append(sensor)
+            self.sensor_ids.append(id)
+
+    def add_flags(self, flags, sensor_id, fetchlogsId):
+        for f in flags:
+            flag = {}
+            metadata = {}
+            flag["sensor_ingest_id"] = sensor_id
+            flag["fetchlogs_id"] = fetchlogsId
+            for key, value in f.items():
+                key = str.replace(key, "flag_", "")
+                if key == "id":
+                    v = str.replace(value, f"{sensor_id}-", "")
+                    flag["ingest_id"] = v
+
+                elif key == 'datetime_from':
+                    flag["datetime_from"] = value
+                elif key == 'datetime_to':
+                    flag["datetime_to"] = value
+                elif key == 'note':
+                    flag["note"] = value
+                else:
+                    metadata[key] = value
+
+            flag["metadata"] = orjson.dumps(metadata).decode()
+            self.flags.append(flag)
+
+    def add_systems(self, j, node_id, fetchlogsId):
+        for s in j:
+            system = {}
+            metadata = {}
+            if "sensor_system_id" in s:
+                id = s.get("sensor_system_id")
+            elif "system_id" in s:
+                id = s.get("system_id")
+            else:
+                id = node_id
+
+            if id in self.system_ids:
+                # would it make more sense to merge or skip or throw error?
+                continue
+
+            ingest_arr = id.split('-')
+            # this will not work with a uuid passed as a site id
+            if len(ingest_arr) == 3:
+                system["instrument_ingest_id"] = ingest_arr[-1];
+
+            system["ingest_sensor_nodes_id"] = node_id
+            system["ingest_id"] = id
+            system["fetchlogs_id"] = fetchlogsId
+            for key, value in s.items():
+                key = str.replace(key, "sensor_system_", "")
+                if key == "sensors":
+                    self.add_sensor(value, id, fetchlogsId)
+                else:
+                    metadata[key] = value
+            system["metadata"] = orjson.dumps(metadata).decode()
+            self.systems.append(system)
+            self.system_ids.append(id)
+
+    def add_node(self, j):
+        fetchlogs_id = j.get('fetchlogs_id', self.fetchlogs_id)
+        node = { "fetchlogs_id": fetchlogs_id }
+        metadata = {}
+        mp = self.node_map
+
+        for k, v in j.items():
+            # pass the whole measure
+            col, value = self.process(k, j, self.node_map)
+            if col is not None:
+                node[col] = value
+            else:
+                if not k in ['systems','sensor_system']:
+                    metadata[k] = v
+
+        # make sure we actually have data to add
+        if len(node.keys())>0:
+            # check for id
+            ingest_id = node.get('ingest_id')
+            if ingest_id is None:
+                raise Exception('Missing ingest id')
+
+            ingest_arr = ingest_id.split('-')
+            # source name could be set explicitly
+            # or in the ingest id
+            # or in the metadata
+            if node.get('source_name') is None:
+                if len(ingest_arr)>1:
+                    node['source_name'] = ingest_arr[0]
+                elif self.source is not None:
+                    node['source_name'] = self.source
+                else:
+                    raise Exception('Could not find source name')
+
+            # support ingest id that is just the source id
+            if node.get('source_id') is None:
+                if len(ingest_arr)>1:
+                    # updated to handle uuid
+                    node['source_id'] = '-'.join(ingest_arr[1:len(ingest_arr)])
+                else:
+                    node['source_id'] = ingest_arr[0]
+
+            if node.get('matching_method') is None:
+                node['matching_method'] = self.matching_method
+
+            # prevent adding the node more than once
+            # this does not save processing time of course
+            if ingest_id not in self.node_ids:
+                node["metadata"] = orjson.dumps(metadata).decode()
+                self.node_ids.append(ingest_id)
+                self.nodes.append(node)
+            # now look for systems
+            if "sensor_system" in j.keys():
+                self.add_systems(j.get('sensor_system'), node.get('ingest_id'), node.get('fetchlogs_id'))
+            elif "systems" in j.keys():
+                self.add_systems(j.get("systems"), node.get('ingest_id'), node.get('fetchlogs_id'))
+            else:
+                # no systems
+                logger.debug(j.keys())
+        else:
+            logger.warning('nothing mapped to node')
+
+
+    def add_measurement(self, m):
+        # create a row with
+        # ingest_id,datetime,value,lon,lat
+        # where ingest id will be what links to the sensor
+        meas = {}
+        lat = None
+        lon = None
+
+        # csv method
+        if isinstance(m, list):
+            if len(m) < 3:
+                logger.warning(f'Not enough data in list value: {m}')
+                return
+
+            fetchlogs_id = self.fetchlogs_id
+            ingest_id = m[0]
+            value = m[1]
+            # using the same key/data format as below
+            datetime = to_timestamp('dt', {"dt": m[2]})
+            if len(m) == 5:
+                lat = m[3]
+                lon = m[4]
+
+        elif isinstance(m, dict):
+            for k, v in m.items():
+                # pass the whole measure
+                col, value = self.process(k, m, self.measurement_map)
+                if col is not None:
+                    meas[col] = value
+
+            ingest_id = meas.get('ingest_id')
+            datetime = meas.get('datetime')
+            value = meas.get('value')
+            lon = meas.get('lon', None)
+            lat = meas.get('lat', None)
+            fetchlogs_id = m.get('fetchlogs_id', self.fetchlogs_id)
+
+        # parse the ingest id here
+        ingest_arr = ingest_id.split('-')
+        if len(ingest_arr) < 3:
+            logger.warning(f'Not enough information in ingest-id: `{ingest_id}`')
+            return
+
+        source_name = ingest_arr[0]
+        source_id = '-'.join(ingest_arr[1:len(ingest_arr)-1])
+        measurand = ingest_arr[-1]
+
+        if not None in [ingest_id, datetime, source_name, source_id, measurand]:
+            self.measurements.append([ingest_id, source_name, source_id, measurand, value, datetime, lon, lat, fetchlogs_id])
+
+
+
+    def refresh_cached_tables(self):
+        """
+        Refresh the cached tables that we use for most production endpoints.
+        Right now this is just for testing purposes
+        """
+        with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
+            connection.set_session(autocommit=True)
+            with connection.cursor() as cursor:
+                logger.debug("Refreshing the cached tables")
+                cursor.execute("REFRESH MATERIALIZED VIEW locations_view_cached;")
+                cursor.execute("REFRESH MATERIALIZED VIEW locations_manufacturers_cached;")
+                cursor.execute("REFRESH MATERIALIZED VIEW locations_latest_measurements_cached;")
+                cursor.execute("REFRESH MATERIALIZED VIEW providers_view_cached;")
+                cursor.execute("REFRESH MATERIALIZED VIEW countries_view_cached;")
+                cursor.execute("REFRESH MATERIALIZED VIEW parameters_view_cached;")
+
+
+
+    def process_hourly_data(self,n: int = 1000):
+        """
+        Process any pending hourly data rollups.
+        Right now this is just for testing purposes
+        """
+        with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
+            connection.set_session(autocommit=True)
+            with connection.cursor() as cursor:
+                cursor.execute("SELECT datetime, tz_offset FROM fetch_hourly_data_jobs(%s)", (n,))
+                rows = cursor.fetchall()
+                for row in rows:
+                    cursor.execute("SELECT update_hourly_data(%s, %s)", row)
+                    connection.commit()
+
+
+    def process_daily_data(self,n: int = 500):
+        """
+        Process any pending daily data rollups.
+        Right now this is just for testing purposes
+        """
+        with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
+            connection.set_session(autocommit=True)
+            with connection.cursor() as cursor:
+                cursor.execute("SELECT datetime, tz_offset FROM fetch_daily_data_jobs(%s)", (n,))
+                rows = cursor.fetchall()
+                for row in rows:
+                    cursor.execute("SELECT update_daily_data(%s, %s)", row)
+                    connection.commit()
+
+
+    def process_annual_data(self,n: int = 25):
+        """
+        Process any pending annual data rollups.
+        Right now this is just for testing purposes
+        """
+        with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
+            connection.set_session(autocommit=True)
+            with connection.cursor() as cursor:
+                cursor.execute("SELECT datetime, tz_offset FROM fetch_annual_data_jobs(%s)", (n,))
+                rows = cursor.fetchall()
+                for row in rows:
+                    cursor.execute("SELECT update_annual_data(%s, %s)", row)
+                    connection.commit()
+
+
+    def get_metadata(self):
+        hasnew = False
+        for obj in self.page:
+            key = obj["Key"]
+            id = obj["id"]
+            last_modified = obj["LastModified"]
+            try:
+                logger.debug(f"Loading station file: {id}:{key}")
+                self.get_station(key, id)
+                self.keys.append(
+                    {
+                        "key": key,
+                        "last_modified": last_modified,
+                        "fetchlogs_id": id
+                    }
+                )
+                hasnew = True
+            except Exception as e:
+                # catch and continue to next page
+                logger.error(
+                    f"Could not process file: {id}: {key}: {e}"
+                )
+
+        if hasnew:
+            logger.debug(f"get_metadata:hasnew - {self.keys}")
+            self.load_data()
+
+def create_staging_table(cursor):
+	# table and batch are used primarily for testing
+	cursor.execute(get_query(
+		"etl_staging_v2.sql",
+		table="TEMP TABLE" if settings.USE_TEMP_TABLES else 'TABLE'
+	))
+
+def write_csv(cursor, data, table, columns):
+    fields = ",".join(columns)
+    sio = StringIO()
+    writer = csv.DictWriter(sio, columns)
+    writer.writerows(data)
+    sio.seek(0)
+    cursor.copy_expert(
+        f"""
+        copy {table} ({fields}) from stdin with csv;
+        """,
+        sio,
+    )
+    logger.debug(f"table: {table}; rowcount: {cursor.rowcount}")
+
+
+
+
+def load_metadata_bucketscan(count=100):
+    paginator = s3c.get_paginator("list_objects_v2")
+    for page in paginator.paginate(
+        Bucket=FETCH_BUCKET,
+        Prefix="lcs-etl-pipeline/stations",
+        PaginationConfig={"PageSize": count},
+    ):
+        try:
+            contents = page["Contents"]
+            data = LCSData(contents)
+            data.get_metadata()
+        except KeyError:
+            break
+
+
+def load_metadata_db(limit=250, ascending: bool = False):
+    order = 'ASC' if ascending else 'DESC'
+    pattern = 'lcs-etl-pipeline/stations/'
+    rows = load_fetchlogs(pattern, limit, ascending)
+    contents = []
+    for row in rows:
+        logger.debug(row)
+        contents.append(
+            {
+                "Key": unquote_plus(row[1]),
+                "LastModified": row[2],
+                "id": row[0],
+            }
+        )
+    if len(contents) > 0:
+        load_metadata(contents)
+        # data = LCSData(contents)
+        # data.get_metadata()
+    return len(rows)
+
+
+def load_metadata_batch(batch: str):
+    with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
+        connection.set_session(autocommit=True)
+        with connection.cursor() as cursor:
+            cursor.execute(
+                """
+                SELECT key
+                , last_modified
+                , fetchlogs_id
+                FROM fetchlogs
+                WHERE batch_uuid = %s
+                """,
+                (batch,),
+            )
+            rows = cursor.fetchall()
+            rowcount = cursor.rowcount
+            contents = []
+            for row in rows:
+                contents.append(
+                    {
+                        "Key": unquote_plus(row[0]),
+                        "LastModified": row[1],
+                        "id": row[2],
+                    }
+                )
+            for notice in connection.notices:
+                logger.debug(notice)
+    if len(contents) > 0:
+        load_metadata(contents)
+        # data = LCSData(contents)
+        # data.get_metadata()
+    return rowcount
+
+
+def load_metadata(keys):
+    logger.debug(f'Load metadata: {len(keys)}')
+    data = LCSData(keys)
+    try:
+        data.get_metadata()
+    except Exception as e:
+        ids = ','.join([str(k['id']) for k in keys])
+        logger.error(f'load error: {e} ids: {ids}')
+        raise
+
+
+def get_measurements(key, fetchlogsId):
+    start = time()
+    content = select_object(key)
+    fetch_time = time() - start
+
+    ret = []
+    start = time()
+    for row in csv.reader(content.split("\n")):
+        if len(row) not in [3, 5]:
+            continue
+        if len(row) == 5:
+            try:
+                lon = float(row[3])
+                lat = float(row[4])
+                if not (
+                    lon is None
+                    or lat is None
+                    or lat == ""
+                    or lon == ""
+                    or lon == 0
+                    or lat == 0
+                    or lon < -180
+                    or lon > 180
+                    or lat < -90
+                    or lat > 90
+                ):
+                    row[3] = lon
+                    row[4] = lat
+                else:
+                    row[3] = None
+                    row[4] = None
+            except Exception:
+                row[3] = None
+                row[4] = None
+        else:
+            row.insert(3, None)
+            row.insert(4, None)
+        if row[0] == "" or row[0] is None:
+            continue
+        dt = row[2]
+
+        try:
+            if dt.isnumeric():
+                if len(dt) == 13:
+                    dt = datetime.fromtimestamp(int(dt)/1000.0, timezone.utc)
+                else:
+                    dt = datetime.fromtimestamp(int(dt), timezone.utc)
+                row[2] = dt.isoformat()
+        except Exception:
+            try:
+                dt = dateparser.parse(dt).replace(tzinfo=timezone.utc)
+            except Exception:
+                logger.warning(f"Exception in parsing date for {dt} {Exception}")
+
+        #row[2] = dt.isoformat()
+        # addd the log id for tracing purposes
+        row.insert(5, fetchlogsId)
+        ret.append(row)
+    logger.info("get_measurements:csv: %s; size: %s; rows: %s; fetching: %0.4f; reading: %0.4f", key, len(content)/1000, len(ret), fetch_time, time() - start)
+    return ret
+
+
+def submit_file_error(key, e):
+    """Update the log to reflect the error and prevent a retry"""
+    logger.error(f"{key}: {e}")
+    with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
+        connection.set_session(autocommit=True)
+        with connection.cursor() as cursor:
+            cursor.execute(
+                """
+                UPDATE fetchlogs
+                SET completed_datetime = clock_timestamp()
+                , last_message = %s
+                WHERE key = %s
+                """,
+                (f"ERROR: {e}", key),
+            )
+
+
+def to_tsv(row):
+    tsv = "\t".join(map(clean_csv_value, row)) + "\n"
+    return tsv
+    return ""
+
+
+def load_measurements_file(fetchlogs_id: int):
+    with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
+        connection.set_session(autocommit=True)
+        with connection.cursor() as cursor:
+            cursor.execute(
+                """
+                SELECT fetchlogs_id
+                , key
+                FROM fetchlogs
+                WHERE fetchlogs_id = %s
+                LIMIT 1
+                ;
+                """,
+                (fetchlogs_id,),
+            )
+            rows = cursor.fetchall()
+            load_measurements(rows)
+
+
+def load_measurements_batch(batch: str):
+    with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
+        connection.set_session(autocommit=True)
+        with connection.cursor() as cursor:
+            cursor.execute(
+                """
+                SELECT fetchlogs_id
+                , key
+                FROM fetchlogs
+                WHERE batch_uuid = %s
+                """,
+                (batch,),
+            )
+            rows = cursor.fetchall()
+            load_measurements(rows)
+
+
+def load_measurements_db(
+    limit=250,
+    ascending: bool = False,
+    pattern = '^lcs-etl-pipeline/measures/.*\\.(csv|json)'
+    ):
+    rows = load_fetchlogs(pattern, limit, ascending)
+    load_measurements(rows)
+    return len(rows)
+
+
+# Keep seperate from above so we can test rows not from the database
+def load_measurements(rows):
+    logger.debug(f"loading {len(rows)} measurements")
+    start_time = time()
+    # get a client object to hold all the data
+    client = IngestClient()
+    # load all the keys
+    client.load_keys(rows)
+    # and finally we can dump it all into the db
+    client.dump()
+    # write to the log
+    logger.info("load_measurements:get: %s keys; %s measurements; %s locations; %0.4f seconds",
+                len(client.keys), len(client.measurements), len(client.nodes), time() - start_time)
diff --git a/ingest/lcs_ingest_full.sql b/ingest/lcs_ingest_full.sql
index 612cc36..a9523fd 100644
--- a/ingest/lcs_ingest_full.sql
+++ b/ingest/lcs_ingest_full.sql
@@ -1,4 +1,4 @@
--- Get sensor systems
+-- lcs_ingest_full
 DO $$
 DECLARE
 __process_start timestamptz := clock_timestamp();
@@ -29,12 +29,32 @@ FROM ms_sensors
 WHERE ms_sensors.ingest_id IS NULL
 OR ingest_sensor_systems_id IS NULL;
 
+UPDATE ms_sensors
+SET units  = 'µg/m³'
+WHERE units IN ('µg/m��','��g/m³', 'ug/m3');
+
+-- first thing we want to do is to get the source
+-- and the source_id from the ingest id
+-- adding the station forces our method to treat the station as the parameter
+-- the first section as the source name and then the rest as teh source id
+-- this is required for ingest_ids that use `-` in the source_id
+-- e.g. something-blah-blah-blah-pm10
+-- where the sensor node ingest id would be
+-- something-blah-blah-blah
+-- and blah could be read as a paramter value
+UPDATE ms_sensornodes
+SET source_id = CASE
+   WHEN source_name ~* 'purpleair|habitatmap' THEN ingest_id
+   ELSE split_ingest_id(ingest_id||'-station', 2)  -- station is a placeholder
+   END;
+
+
 -- match the sensor nodes to get the sensor_nodes_id
 UPDATE ms_sensornodes
 SET sensor_nodes_id = sensor_nodes.sensor_nodes_id
 FROM sensor_nodes
 WHERE sensor_nodes.source_name = ms_sensornodes.source_name
-AND sensor_nodes.source_id = ms_sensornodes.ingest_id;
+AND sensor_nodes.source_id = ms_sensornodes.source_id;
 
 -- And now we insert those into the sensor nodes table
 -- we are gouping to deal with any duplicates that currently exist
@@ -46,21 +66,30 @@ INSERT INTO sensor_nodes (
 , geom
 , metadata
 , source_id
+, timezones_id
+, providers_id
+, countries_id
 )
 SELECT site_name
 , source_name
 , ismobile
 , geom
 , metadata
-, ingest_id
+, source_id
+, get_timezones_id(geom)
+, get_providers_id(source_name)
+, get_countries_id(geom)
 FROM ms_sensornodes
-GROUP BY site_name, source_name, ismobile, geom, metadata, ingest_id
+GROUP BY 1,2,3,4,5,6,7,8
 ON CONFLICT (source_name, source_id) DO UPDATE
 SET
-    site_name=coalesce(EXCLUDED.site_name,sensor_nodes.site_name),
-    ismobile=coalesce(EXCLUDED.ismobile,sensor_nodes.ismobile),
-    geom=coalesce(EXCLUDED.geom,sensor_nodes.geom),
-    metadata=COALESCE(sensor_nodes.metadata, '{}') || COALESCE(EXCLUDED.metadata, '{}')
+    site_name=coalesce(EXCLUDED.site_name,sensor_nodes.site_name)
+    , ismobile=coalesce(EXCLUDED.ismobile,sensor_nodes.ismobile)
+    , geom=coalesce(EXCLUDED.geom,sensor_nodes.geom)
+    , metadata=COALESCE(sensor_nodes.metadata, '{}') || COALESCE(EXCLUDED.metadata, '{}')
+    , timezones_id = COALESCE(EXCLUDED.timezones_id, sensor_nodes.timezones_id)
+    , providers_id = COALESCE(EXCLUDED.providers_id, sensor_nodes.providers_id)
+    , modified_on = now()
 RETURNING 1)
 SELECT COUNT(1) INTO __inserted_nodes
 FROM inserts;
@@ -75,7 +104,7 @@ SET sensor_nodes_id = sensor_nodes.sensor_nodes_id
 FROM sensor_nodes
 WHERE ms_sensornodes.sensor_nodes_id is null
 AND sensor_nodes.source_name = ms_sensornodes.source_name
-AND sensor_nodes.source_id = ms_sensornodes.ingest_id;
+AND sensor_nodes.source_id = ms_sensornodes.source_id;
 
 -- log anything we were not able to get an id for
 WITH r AS (
@@ -124,7 +153,8 @@ FROM ms_sensorsystems
 WHERE sensor_nodes_id IS NOT NULL
 GROUP BY sensor_nodes_id, ingest_id, metadata
 ON CONFLICT (sensor_nodes_id, source_id) DO UPDATE SET
-    metadata=COALESCE(sensor_systems.metadata, '{}') || COALESCE(EXCLUDED.metadata, '{}');
+    metadata=COALESCE(sensor_systems.metadata, '{}') || COALESCE(EXCLUDED.metadata, '{}')
+    , modified_on = now();
 
 ----------------------------
 -- lcs_ingest_sensors.sql --
@@ -183,27 +213,7 @@ from measurands
 WHERE ms_sensors.measurand=measurands.measurand
 and ms_sensors.units=measurands.units;
 
--- Removed the following because it has the ids hard coded in
--- if we want to continue to filter these out we should do it at the fetcher
--------------------------------------------------------------------------------------------------------------
--- UPDATE ms_sensors                                                                                       --
--- SET measurands_id = 10                                                                                  --
--- WHERE ms_sensors.measurand='ozone'                                                                      --
--- AND ms_sensors.units='ppm';                                                                             --
---                                                                                                         --
--- UPDATE ms_sensors SET measurands_id = 126 WHERE measurands_id is null and ms_sensors.measurand='um010'; --
--- UPDATE ms_sensors SET measurands_id = 130 WHERE measurands_id is null and ms_sensors.measurand='um025'; --
--- UPDATE ms_sensors SET measurands_id = 135 WHERE measurands_id is null and ms_sensors.measurand='um100'; --
--- UPDATE ms_sensors SET measurands_id = 19  WHERE measurands_id is null and ms_sensors.measurand='pm1';   --
--- UPDATE ms_sensors SET measurands_id = 2   WHERE measurands_id is null and ms_sensors.measurand='pm25';  --
--- UPDATE ms_sensors SET measurands_id = 1   WHERE measurands_id is null and ms_sensors.measurand='pm10';  --
---                                                                                                         --
--- DELETE                                                                                                  --
--- FROM ms_sensors                                                                                         --
--- WHERE ingest_id ~* 'purple'                                                                             --
--- AND measurands_id is null                                                                               --
--- AND measurand in ('um003','um050','um005');                                                             --
--------------------------------------------------------------------------------------------------------------
+
 WITH r AS (
 INSERT INTO rejects (t, tbl,r,fetchlogs_id)
 SELECT
diff --git a/ingest/lcs_ingest_nodes.sql b/ingest/lcs_ingest_nodes.sql
deleted file mode 100644
index f077150..0000000
--- a/ingest/lcs_ingest_nodes.sql
+++ /dev/null
@@ -1,28 +0,0 @@
-DELETE FROM ms_sensornodes WHERE ms_sensornodes.ingest_id IS NULL;
-DELETE FROM ms_sensorsystems WHERE ms_sensorsystems.ingest_id is null or ingest_sensor_nodes_id IS NULL;
-DELETE FROM ms_sensors WHERE ms_sensors.ingest_id is null OR ingest_sensor_systems_id IS NULL;
-
-SELECT notify('After Deletes');
-
-UPDATE ms_sensornodes
-SET sensor_nodes_id = sensor_nodes.sensor_nodes_id
-FROM sensor_nodes
-WHERE
-sensor_nodes.source_name = ms_sensornodes.source_name
-AND
-sensor_nodes.source_id = ms_sensornodes.ingest_id;
-
-
-INSERT INTO sensor_nodes (site_name, source_name, ismobile, geom, metadata, source_id)
-SELECT site_name, source_name, ismobile, geom, metadata, ingest_id FROM
-ms_sensornodes
-ON CONFLICT (source_name, source_id) DO
-UPDATE
-    SET
-    site_name=coalesce(EXCLUDED.site_name,sensor_nodes.site_name),
-    ismobile=coalesce(EXCLUDED.ismobile,sensor_nodes.ismobile),
-    geom=coalesce(EXCLUDED.geom,sensor_nodes.geom),
-    metadata=sensor_nodes.metadata || EXCLUDED.metadata
-;
-
-SELECT notify('After nodes');
diff --git a/ingest/lcs_ingest_sensors.sql b/ingest/lcs_ingest_sensors.sql
deleted file mode 100644
index f0750e2..0000000
--- a/ingest/lcs_ingest_sensors.sql
+++ /dev/null
@@ -1,114 +0,0 @@
-
-UPDATE ms_sensorsystems
-SET sensor_systems_id = sensor_systems.sensor_systems_id
-FROM sensor_systems
-WHERE ms_sensorsystems.sensor_systems_id IS NULL
-AND
-ms_sensorsystems.sensor_nodes_id=sensor_systems.sensor_nodes_id
-AND
-ms_sensorsystems.ingest_id=sensor_systems.source_id
-;
-
-INSERT INTO rejects (t, tbl,r) SELECT
-    now(),
-    'ms_sensorsystems',
-    to_jsonb(ms_sensorsystems)
-FROM ms_sensorsystems WHERE sensor_systems_id IS NULL;
-
-UPDATE ms_sensors
-SET sensor_systems_id = ms_sensorsystems.sensor_systems_id
-FROM ms_sensorsystems WHERE
-ms_sensors.ingest_sensor_systems_id = ms_sensorsystems.ingest_id;
-
-INSERT INTO rejects (t, tbl,r) SELECT
-    now(),
-    'ms_sensors',
-    to_jsonb(ms_sensors)
-FROM ms_sensors WHERE sensor_systems_id IS NULL;
-
-
-UPDATE ms_sensors
-SET sensors_id = sensors.sensors_id
-FROM sensors
-WHERE
-sensors.sensor_systems_id=ms_sensors.sensor_systems_id
-AND
-sensors.source_id = ms_sensors.ingest_id;
-
-SELECT count(*) from measurands;
-
-/*
-INSERT INTO measurands (measurand, units)
-SELECT DISTINCT measurand, units FROM ms_sensors
-ON CONFLICT DO NOTHING;
-
-SELECT count(*) from measurands;
-*/
-
-UPDATE ms_sensors
-SET measurands_id = measurands.measurands_id
-from measurands
-WHERE ms_sensors.measurand=measurands.measurand
-and ms_sensors.units=measurands.units;
-
-UPDATE ms_sensors
-SET measurands_id = 10
-WHERE
-ms_sensors.measurand='ozone'
-AND
-ms_sensors.units='ppm';
-
-UPDATE ms_sensors SET measurands_id = 126 WHERE measurands_id is null and ms_sensors.measurand='um010';
-UPDATE ms_sensors SET measurands_id = 130 WHERE measurands_id is null and ms_sensors.measurand='um025';
-UPDATE ms_sensors SET measurands_id = 135 WHERE measurands_id is null and ms_sensors.measurand='um100';
-UPDATE ms_sensors SET measurands_id = 19  WHERE measurands_id is null and ms_sensors.measurand='pm1';
-UPDATE ms_sensors SET measurands_id = 2   WHERE measurands_id is null and ms_sensors.measurand='pm25';
-UPDATE ms_sensors SET measurands_id = 1   WHERE measurands_id is null and ms_sensors.measurand='pm10';
-
-DELETE FROM ms_sensors WHERE ingest_id ~* 'purple' AND measurands_id is null AND measurand in ('um003','um050','um005');
-
-INSERT INTO rejects (t, tbl,r) SELECT
-    now(),
-    'ms_sensors no measurand',
-    to_jsonb(ms_sensors)
-FROM ms_sensors WHERE measurands_id IS NULL;
-
-INSERT INTO sensors (
-  source_id
-, sensor_systems_id
-, measurands_id
-, metadata)
-SELECT ingest_id
-, sensor_systems_id
-, measurands_id
-, metadata
-FROM ms_sensors
-WHERE measurands_id is not null
-AND sensor_systems_id is not null
-GROUP BY ingest_id
-, sensor_systems_id
-, measurands_id
-, metadata
-ON CONFLICT (sensor_systems_id, measurands_id, source_id) DO
-UPDATE SET
-    metadata=sensors.metadata || EXCLUDED.metadata
-;
-
-
-SELECT notify('After sensors');
-
-
-UPDATE ms_sensors
-SET sensors_id = sensors.sensors_id
-FROM sensors
-WHERE
-sensors.sensor_systems_id=ms_sensors.sensor_systems_id
-AND
-sensors.source_id = ms_sensors.ingest_id;
-
-
-INSERT INTO rejects (tbl,r)
-SELECT
-    'ms_sensors',
-    to_jsonb(ms_sensors)
-FROM ms_sensors WHERE sensors_id IS NULL;
diff --git a/ingest/lcs_ingest_systems.sql b/ingest/lcs_ingest_systems.sql
deleted file mode 100644
index 4d0b592..0000000
--- a/ingest/lcs_ingest_systems.sql
+++ /dev/null
@@ -1,53 +0,0 @@
-
--- fill in any new sensor_nodes_id
-UPDATE ms_sensornodes
-SET sensor_nodes_id = sensor_nodes.sensor_nodes_id
-FROM sensor_nodes
-WHERE
-ms_sensornodes.sensor_nodes_id is null
-AND
-sensor_nodes.source_name = ms_sensornodes.source_name
-AND
-sensor_nodes.source_id = ms_sensornodes.ingest_id;
-
--- log anything we were not able to get an id for
-INSERT INTO rejects (t, tbl,r) SELECT
-    now(),
-    'ms_sensornodes',
-    to_jsonb(ms_sensornodes)
-FROM ms_sensornodes WHERE sensor_nodes_id IS NULL;
-
-
-UPDATE ms_sensorsystems
-SET sensor_nodes_id = ms_sensornodes.sensor_nodes_id
-FROM ms_sensornodes WHERE
-ms_sensorsystems.ingest_sensor_nodes_id = ms_sensornodes.ingest_id;
-
-UPDATE ms_sensorsystems
-SET sensor_systems_id = sensor_systems.sensor_systems_id
-FROM sensor_systems
-WHERE
-sensor_systems.sensor_nodes_id = ms_sensorsystems.sensor_nodes_id
-AND
-sensor_systems.source_id = ms_sensorsystems.ingest_id;
-
--- log anything we were not able to get an id for
-INSERT INTO rejects (t, tbl,r) SELECT
-    now(),
-    'ms_sensorsystems',
-    to_jsonb(ms_sensorsystems)
-FROM ms_sensorsystems WHERE sensor_nodes_id IS NULL;
-
-SELECT notify('immediately before insert on systems');
-
-INSERT INTO sensor_systems (sensor_nodes_id, source_id, metadata)
-SELECT sensor_nodes_id, ingest_id, metadata
-FROM ms_sensorsystems
-WHERE sensor_nodes_id IS NOT NULL
-ON CONFLICT (sensor_nodes_id, source_id)
-DO
-UPDATE SET
-    metadata=sensor_systems.metadata || EXCLUDED.metadata
-;
-
-SELECT notify('After systems');
diff --git a/ingest/lcs_meas_ingest.sql b/ingest/lcs_meas_ingest.sql
index 953791e..8ab06a7 100644
--- a/ingest/lcs_meas_ingest.sql
+++ b/ingest/lcs_meas_ingest.sql
@@ -1,10 +1,24 @@
--- Get sensor systems
+-- lcs_meas_ingest
 DO $$
 DECLARE
 __process_start timestamptz := clock_timestamp();
+__total_measurements int;
 __inserted_measurements int;
-__rejected_measurements int;
+__rejected_measurements int := 0;
+__rejected_nodes int := 0;
+__total_nodes int := 0;
+__updated_nodes int := 0;
+__inserted_nodes int := 0;
 __exported_days int;
+__start_datetime timestamptz;
+__end_datetime timestamptz;
+__inserted_start_datetime timestamptz;
+__inserted_end_datetime timestamptz;
+__process_time_ms int;
+__insert_time_ms int;
+__cache_time_ms int;
+__error_context text;
+__ingest_method text := 'lcs';
 BEGIN
 
 DELETE
@@ -13,21 +27,139 @@ WHERE ingest_id IS NULL
 OR datetime is NULL
 OR value IS NULL;
 
-DELETE
-FROM meas
-WHERE datetime < '2018-01-01'::timestamptz
-OR datetime>now();
+--DELETE
+--FROM meas
+--WHERE datetime < '2018-01-01'::timestamptz
+--OR datetime>now();
 
 DELETE
 FROM rejects
 WHERE fetchlogs_id IN (SELECT fetchlogs_id FROM meas)
 AND tbl ~* '^meas';
 
+
+SELECT COUNT(1)
+, MIN(datetime)
+, MAX(datetime)
+INTO __total_measurements
+, __start_datetime
+, __end_datetime
+FROM meas;
+
+
+-- -- 	The ranking is to deal with the current possibility
+-- -- that duplicate sensors with the same ingest/source id are created
+-- 	-- this is a short term fix
+-- 	-- a long term fix would not allow duplicate source_id's
+-- WITH ranked_sensors AS (
+--   SELECT s.sensors_id
+-- 	, s.source_id
+-- 	, RANK() OVER (PARTITION BY s.source_id ORDER BY added_on ASC) as rnk
+-- 	FROM sensors s
+-- 	JOIN meas m ON (s.source_id = m.ingest_id)
+-- 	WHERE s.is_active
+-- ), active_sensors AS (
+-- 	SELECT source_id
+-- 	, sensors_id
+-- 	FROM ranked_sensors
+-- 	WHERE rnk = 1)
+-- 	UPDATE meas
+-- 	SET sensors_id=s.sensors_id
+-- 	FROM active_sensors s
+-- 	WHERE s.source_id=ingest_id;
+
+-- 	The ranking is to deal with the current possibility
+-- that duplicate sensors with the same ingest/source id are created
+	-- this is a short term fix
+	-- a long term fix would not allow duplicate source_id's
+WITH staged_sensors AS (
+  -- this first part signficantly speeds it up on slow machines
+  SELECT DISTINCT ingest_id
+  FROM meas
+), ranked_sensors AS (
+  SELECT s.sensors_id
+	, s.source_id
+	, RANK() OVER (PARTITION BY s.source_id ORDER BY added_on ASC) as rnk
+	FROM sensors s
+	JOIN staged_sensors m ON (s.source_id = m.ingest_id)
+), active_sensors AS (
+	SELECT source_id
+	, sensors_id
+	FROM ranked_sensors
+	WHERE rnk = 1)
+	UPDATE meas
+	SET sensors_id=s.sensors_id
+	FROM active_sensors s
+	WHERE s.source_id=ingest_id;
+
+
+-- first the sensor nodes
+WITH nodes AS (
+INSERT INTO sensor_nodes (
+  source_name
+, site_name
+, source_id
+, metadata)
+SELECT split_ingest_id(ingest_id, 1) as source_name
+, split_ingest_id(ingest_id, 2) as site_name
+, split_ingest_id(ingest_id, 2) as source_id
+, jsonb_build_object('fetchlogs_id', MIN(fetchlogs_id))
+FROM meas
+WHERE sensors_id IS NULL
+GROUP BY 1,2,3
+ON CONFLICT (source_name, source_id) DO UPDATE
+SET source_id = EXCLUDED.source_id
+, metadata = EXCLUDED.metadata||COALESCE(sensor_nodes.metadata, '{}'::jsonb)
+RETURNING sensor_nodes_id, source_id)
+INSERT INTO sensor_systems (
+  sensor_nodes_id
+, source_id)
+SELECT sensor_nodes_id
+, source_id
+FROM nodes
+ON CONFLICT DO NOTHING;
+
+-- now create a sensor for each
+-- this method depends on us having a match for the parameter
+WITH sen AS (
+  SELECT ingest_id
+  , split_ingest_id(ingest_id, 1) as source_name
+  , split_ingest_id(ingest_id, 2) as source_id
+  , split_ingest_id(ingest_id, 3) as parameter
+  FROM meas
+  WHERE sensors_id IS NULL
+  GROUP BY 1,2,3,4
+), inserts AS (
+INSERT INTO sensors (sensor_systems_id, measurands_id, source_id)
+SELECT sy.sensor_systems_id
+, m.measurands_id
+, ingest_id
+FROM sen s
+JOIN measurands_map_view m ON (s.parameter = m.key)
+JOIN sensor_nodes n ON (s.source_name = n.source_name AND s.source_id = n.source_id)
+JOIN sensor_systems sy ON (sy.sensor_nodes_id = n.sensor_nodes_id AND s.source_id = sy.source_id)
+ON CONFLICT DO NOTHING
+RETURNING sensor_systems_id)
+SELECT COUNT(DISTINCT sensor_systems_id) INTO __inserted_nodes
+FROM inserts;
+
+-- try again to find the sensors
 UPDATE meas
 SET sensors_id=s.sensors_id
 FROM sensors s
-WHERE s.source_id=ingest_id;
+WHERE s.source_id=ingest_id
+AND meas.sensors_id IS NULL;
+
+
+SELECT COUNT(DISTINCT sensors_id)
+INTO __total_nodes
+FROM meas;
 
+
+__process_time_ms := 1000 * (extract(epoch FROM clock_timestamp() - __process_start));
+
+-- reject any missing. Most likely due to issues
+-- with the measurand
 WITH r AS (
 INSERT INTO rejects (t,tbl,r,fetchlogs_id)
 SELECT
@@ -41,21 +173,10 @@ RETURNING 1)
 SELECT COUNT(1) INTO __rejected_measurements
 FROM r;
 
+-- restart the clock to measure just inserts
+__process_start := clock_timestamp();
 
-DELETE
-FROM meas
-WHERE sensors_id IS NULL;
-
--- --Some fake data to make it easier to test this section
--- TRUNCATE meas;
--- INSERT INTO meas (ingest_id, sensors_id, value, datetime)
--- SELECT 'fake-ingest'
--- , (SELECT sensors_id FROM sensors ORDER BY random() LIMIT 1)
--- , -99
--- , generate_series(now() - '3day'::interval, current_date, '1hour'::interval);
-
-
-WITH m AS (
+WITH inserts AS (
 INSERT INTO measurements (
     sensors_id,
     datetime,
@@ -63,7 +184,7 @@ INSERT INTO measurements (
     lon,
     lat
 ) SELECT
-    DISTINCT
+    --DISTINCT
     sensors_id,
     datetime,
     value,
@@ -72,13 +193,194 @@ INSERT INTO measurements (
 FROM meas
 WHERE sensors_id IS NOT NULL
 ON CONFLICT DO NOTHING
-RETURNING 1)
-SELECT COUNT(1) INTO __inserted_measurements
-FROM m;
+RETURNING sensors_id, datetime, value, lat, lon
+), inserted as (
+   INSERT INTO temp_inserted_measurements (sensors_id, datetime, value, lat, lon)
+   SELECT sensors_id
+   , datetime
+   , value
+   , lat
+   , lon
+   FROM inserts
+   RETURNING sensors_id, datetime
+)
+SELECT MIN(datetime)
+, MAX(datetime)
+, COUNT(1)
+INTO __inserted_start_datetime
+, __inserted_end_datetime
+, __inserted_measurements
+FROM inserted;
+
+__insert_time_ms := 1000 * (extract(epoch FROM clock_timestamp() - __process_start));
+
+-- mark the fetchlogs as done
+WITH inserted AS (
+  SELECT m.fetchlogs_id
+  , COUNT(m.*) as n_records
+  , COUNT(t.*) as n_inserted
+  , MIN(m.datetime) as fr_datetime
+  , MAX(m.datetime) as lr_datetime
+  , MIN(t.datetime) as fi_datetime
+  , MAX(t.datetime) as li_datetime
+  FROM meas m
+  LEFT JOIN temp_inserted_measurements t ON (t.sensors_id = m.sensors_id AND t.datetime = m.datetime)
+  GROUP BY m.fetchlogs_id)
+UPDATE fetchlogs
+SET completed_datetime = CURRENT_TIMESTAMP
+, inserted = COALESCE(n_inserted, 0)
+, records = COALESCE(n_records, 0)
+, first_recorded_datetime = fr_datetime
+, last_recorded_datetime = lr_datetime
+, first_inserted_datetime = fi_datetime
+, last_inserted_datetime = li_datetime
+FROM inserted
+WHERE inserted.fetchlogs_id = fetchlogs.fetchlogs_id;
+
+-- track the time required to update cache tables
+__process_start := clock_timestamp();
+
+-- -- Now we can use those temp_inserted_measurements to update the cache tables
+-- INSERT INTO sensors_latest (
+--   sensors_id
+--   , datetime
+--   , value
+--   , lat
+--   , lon
+--   )
+-- ---- identify the row that has the latest value
+-- WITH numbered AS (
+--   SELECT sensors_id
+--    , datetime
+--    , value
+--    , lat
+--    , lon
+--    , row_number() OVER (PARTITION BY sensors_id ORDER BY datetime DESC) as rn
+--   FROM temp_inserted_measurements
+-- ), latest AS (
+-- ---- only insert those rows
+--   SELECT sensors_id
+--    , datetime
+--    , value
+--    , lat
+--    , lon
+--   FROM numbered
+--   WHERE rn = 1
+-- )
+-- SELECT l.sensors_id
+-- , l.datetime
+-- , l.value
+-- , l.lat
+-- , l.lon
+-- FROM latest l
+-- LEFT JOIN sensors_latest sl ON (l.sensors_id = sl.sensors_id)
+-- WHERE sl.sensors_id IS NULL
+-- OR l.datetime > sl.datetime
+-- ON CONFLICT (sensors_id) DO UPDATE
+-- SET datetime = EXCLUDED.datetime
+-- , value = EXCLUDED.value
+-- , lat = EXCLUDED.lat
+-- , lon = EXCLUDED.lon
+-- , modified_on = now()
+-- --, fetchlogs_id = EXCLUDED.fetchlogs_id
+-- ;
+-- update the exceedances
+INSERT INTO sensor_exceedances (sensors_id, threshold_value, datetime_latest)
+  SELECT
+  m.sensors_id
+  , t.value
+  , MAX(datetime)
+  FROM temp_inserted_measurements m
+  JOIN sensors s ON (m.sensors_id = s.sensors_id)
+  JOIN thresholds t ON (s.measurands_id = t.measurands_id)
+  AND m.value > t.value
+  GROUP BY 1, 2
+  ON CONFLICT (sensors_id, threshold_value) DO UPDATE SET
+  datetime_latest = GREATEST(sensor_exceedances.datetime_latest, EXCLUDED.datetime_latest)
+  , updated_on = now();
+
 
--- Update the export queue/logs to export these records
--- wrap it in a block just in case the database does not have this module installed
--- we subtract the second because the data is assumed to be time ending
+INSERT INTO sensors_rollup (
+  sensors_id
+  , datetime_first
+  , datetime_last
+  , value_latest
+  , value_count
+  , value_avg
+  , value_min
+  , value_max
+  , geom_latest
+  )
+---- identify the row that has the latest value
+WITH numbered AS (
+  SELECT sensors_id
+   , datetime
+   , value
+   , lat
+   , lon
+   , sum(1) OVER (PARTITION BY sensors_id) as value_count
+   , min(datetime) OVER (PARTITION BY sensors_id) as datetime_min
+   , avg(value) OVER (PARTITION BY sensors_id) as value_avg
+   , row_number() OVER (PARTITION BY sensors_id ORDER BY datetime DESC) as rn
+  FROM temp_inserted_measurements
+), latest AS (
+---- only insert those rows
+  SELECT sensors_id
+   , datetime
+   , value
+   , value_count
+   , value_avg
+   , datetime_min
+   , lat
+   , lon
+  FROM numbered
+  WHERE rn = 1
+)
+SELECT l.sensors_id
+, l.datetime_min -- first
+, l.datetime -- last
+, l.value -- last value
+, l.value_count
+, l.value_avg
+, l.value -- min
+, l.value -- max
+, public.pt3857(lon, lat)
+FROM latest l
+LEFT JOIN sensors_rollup sr ON (l.sensors_id = sr.sensors_id)
+WHERE sr.sensors_id IS NULL
+OR l.datetime > sr.datetime_last
+OR l.datetime_min < sr.datetime_first
+ON CONFLICT (sensors_id) DO UPDATE
+SET datetime_last = GREATEST(sensors_rollup.datetime_last, EXCLUDED.datetime_last)
+, value_latest = CASE WHEN EXCLUDED.datetime_last > sensors_rollup.datetime_last
+                 THEN EXCLUDED.value_latest
+                 ELSE sensors_rollup.value_latest
+                 END
+, geom_latest = CASE WHEN EXCLUDED.datetime_last > sensors_rollup.datetime_last
+                 THEN EXCLUDED.geom_latest
+                 ELSE sensors_rollup.geom_latest
+                 END
+, value_count = sensors_rollup.value_count + EXCLUDED.value_count
+, value_min = LEAST(sensors_rollup.value_min, EXCLUDED.value_latest)
+, value_max = GREATEST(sensors_rollup.value_max, EXCLUDED.value_latest)
+, datetime_first = LEAST(sensors_rollup.datetime_first, EXCLUDED.datetime_first)
+, modified_on = now()
+--, fetchlogs_id = EXCLUDED.fetchlogs_id
+;
+
+
+-- Update the table that will help to track hourly rollups
+INSERT INTO hourly_stats (datetime)
+  SELECT date_trunc('hour', datetime)
+  FROM temp_inserted_measurements
+  GROUP BY 1
+ON CONFLICT (datetime) DO UPDATE
+SET modified_on = now();
+
+
+--Update the export queue/logs to export these records
+--wrap it in a block just in case the database does not have this module installed
+--we subtract the second because the data is assumed to be time ending
 WITH e AS (
 INSERT INTO open_data_export_logs (sensor_nodes_id, day, records, measurands, modified_on)
 SELECT sn.sensor_nodes_id
@@ -86,7 +388,7 @@ SELECT sn.sensor_nodes_id
 , COUNT(1)
 , COUNT(DISTINCT p.measurands_id)
 , MAX(now())
-FROM meas m
+FROM temp_inserted_measurements m -- meas m
 JOIN sensors s ON (m.sensors_id = s.sensors_id)
 JOIN measurands p ON (s.measurands_id = p.measurands_id)
 JOIN sensor_systems ss ON (s.sensor_systems_id = ss.sensor_systems_id)
@@ -101,14 +403,102 @@ RETURNING 1)
 SELECT COUNT(1) INTO __exported_days
 FROM e;
 
-RAISE NOTICE 'inserted-measurements: %, rejected-measurements: %, exported-sensor-days: %, process-time-ms: %, source: lcs'
+
+__cache_time_ms := 1000 * (extract(epoch FROM clock_timestamp() - __process_start));
+
+INSERT INTO ingest_stats (
+    ingest_method
+    -- total
+  , total_measurements_processed
+  , total_measurements_inserted
+  , total_measurements_rejected
+  , total_nodes_processed
+  , total_nodes_inserted
+  , total_nodes_updated
+  , total_nodes_rejected
+  -- total times
+  , total_process_time_ms
+  , total_insert_time_ms
+  , total_cache_time_ms
+  -- latest
+  , latest_measurements_processed
+  , latest_measurements_inserted
+  , latest_measurements_rejected
+  , latest_nodes_processed
+  , latest_nodes_inserted
+  , latest_nodes_updated
+  , latest_nodes_rejected
+  -- times
+  , latest_process_time_ms
+  , latest_insert_time_ms
+  , latest_cache_time_ms
+  ) VALUES (
+  -- totals
+    __ingest_method
+  , __total_measurements
+  , __inserted_measurements
+  , __rejected_measurements
+  , __total_nodes
+  , __inserted_nodes
+  , __updated_nodes
+  , __rejected_nodes
+  -- times
+  , __process_time_ms
+  , __insert_time_ms
+  , __cache_time_ms
+  -- latest
+  , __total_measurements
+  , __inserted_measurements
+  , __rejected_measurements
+  , __total_nodes
+  , __inserted_nodes
+  , __updated_nodes
+  , __rejected_nodes
+  -- times
+  , __process_time_ms
+  , __insert_time_ms
+  , __cache_time_ms
+) ON CONFLICT (ingest_method) DO UPDATE SET
+  -- totals
+   total_measurements_processed = ingest_stats.total_measurements_processed + EXCLUDED.total_measurements_processed
+ , total_measurements_inserted = ingest_stats.total_measurements_inserted + EXCLUDED.total_measurements_inserted
+ , total_measurements_rejected = ingest_stats.total_measurements_rejected + EXCLUDED.total_measurements_rejected
+ , total_nodes_processed = ingest_stats.total_nodes_processed + EXCLUDED.total_nodes_processed
+ , total_nodes_inserted = ingest_stats.total_nodes_inserted + EXCLUDED.total_nodes_inserted
+ , total_nodes_updated = ingest_stats.total_nodes_updated + EXCLUDED.total_nodes_updated
+ , total_nodes_rejected = ingest_stats.total_nodes_rejected + EXCLUDED.total_nodes_rejected
+ , total_process_time_ms = ingest_stats.total_process_time_ms + EXCLUDED.total_process_time_ms
+ , total_insert_time_ms = ingest_stats.total_insert_time_ms + EXCLUDED.total_insert_time_ms
+ , total_cache_time_ms = ingest_stats.total_cache_time_ms + EXCLUDED.total_cache_time_ms
+ -- latest
+ , latest_measurements_processed = EXCLUDED.latest_measurements_processed
+ , latest_measurements_inserted = EXCLUDED.latest_measurements_inserted
+ , latest_measurements_rejected = EXCLUDED.latest_measurements_rejected
+ , latest_nodes_processed = EXCLUDED.latest_nodes_processed
+ , latest_nodes_inserted = EXCLUDED.latest_nodes_inserted
+ , latest_nodes_updated = EXCLUDED.latest_nodes_updated
+ , latest_nodes_rejected = EXCLUDED.latest_nodes_rejected
+ -- times
+ , latest_process_time_ms = EXCLUDED.latest_process_time_ms
+ , latest_insert_time_ms = EXCLUDED.latest_insert_time_ms
+ , latest_cache_time_ms = EXCLUDED.latest_cache_time_ms
+ , ingest_count = ingest_stats.ingest_count + 1
+ , ingested_on = EXCLUDED.ingested_on;
+
+
+RAISE NOTICE 'inserted-measurements: %, inserted-from: %, inserted-to: %, rejected-measurements: %, exported-sensor-days: %, process-time-ms: %, insert-time-ms: %, cache-time-ms: %, source: lcs'
       , __inserted_measurements
+      , __inserted_start_datetime
+      , __inserted_end_datetime
       , __rejected_measurements
       , __exported_days
-      , 1000 * (extract(epoch FROM clock_timestamp() - __process_start));
+      , __process_time_ms
+      , __insert_time_ms
+      , __cache_time_ms;
+
 
 EXCEPTION WHEN OTHERS THEN
- RAISE NOTICE 'Failed to export to logs: %', SQLERRM
- USING HINT = 'Make sure that the open data module is installed';
+ GET STACKED DIAGNOSTICS __error_context = PG_EXCEPTION_CONTEXT;
+ RAISE NOTICE 'Failed to ingest measurements: %, %', SQLERRM, __error_context;
 
 END $$;
diff --git a/ingest/lcs_meas_staging.sql b/ingest/lcs_meas_staging.sql
deleted file mode 100644
index 3f8caf8..0000000
--- a/ingest/lcs_meas_staging.sql
+++ /dev/null
@@ -1,10 +0,0 @@
-CREATE TEMP TABLE meas (
-    ingest_id text,
-    sensors_id int,
-    value float,
-    datetime timestamptz,
-    lon float,
-    lat float,
-    fetchlogs_id int
-);
-CREATE TEMP TABLE keys (key text, last_modified timestamptz);
diff --git a/ingest/lcs_staging.sql b/ingest/lcs_staging.sql
index dcf3067..7b2e7b8 100644
--- a/ingest/lcs_staging.sql
+++ b/ingest/lcs_staging.sql
@@ -1,15 +1,24 @@
-CREATE TEMP TABLE IF NOT EXISTS ms_sensornodes (
+-- DROP TABLE IF EXISTS
+--   ms_sensornodes
+-- , ms_sensorsystems
+-- , ms_sensors
+-- , meas
+-- , keys
+-- , temp_inserted_measurements;
+
+CREATE {table} IF NOT EXISTS ms_sensornodes (
     sensor_nodes_id int,
     ingest_id text,
     site_name text,
     source_name text,
+    source_id text,
     ismobile boolean,
     geom geometry,
     metadata jsonb,
     fetchlogs_id int
 );
 
-CREATE TEMP TABLE IF NOT EXISTS ms_sensorsystems (
+CREATE {table} IF NOT EXISTS ms_sensorsystems (
     sensor_systems_id int,
     ingest_id text,
     ingest_sensor_nodes_id text,
@@ -18,8 +27,7 @@ CREATE TEMP TABLE IF NOT EXISTS ms_sensorsystems (
     fetchlogs_id int
 );
 
-
-CREATE TEMP TABLE IF NOT EXISTS ms_sensors (
+CREATE {table} IF NOT EXISTS ms_sensors (
     ingest_id text,
     sensors_id int,
     sensor_systems_id int,
@@ -31,4 +39,31 @@ CREATE TEMP TABLE IF NOT EXISTS ms_sensors (
     fetchlogs_id int
 );
 
-CREATE TEMP TABLE keys (fetchlogs_id int, key text, last_modified timestamptz);
+CREATE {table} IF NOT EXISTS meas (
+    ingest_id text,
+    sensors_id int,
+    value float,
+    datetime timestamptz,
+    lon float,
+    lat float,
+    fetchlogs_id int
+);
+
+CREATE {table} IF NOT EXISTS keys (
+    fetchlogs_id int
+    , key text
+    , last_modified timestamptz
+    );
+
+-- This table will hold measurements that have
+-- actually been inserted into the measurements table
+-- this is to deal with the overlap that we see in the
+-- incoming files
+CREATE {table} IF NOT EXISTS temp_inserted_measurements (
+  sensors_id int
+  , datetime timestamptz
+  , value double precision
+  , lat double precision
+  , lon double precision
+  , fetchlogs_id int
+);
diff --git a/ingest/settings.py b/ingest/settings.py
index ced8420..f7fa380 100644
--- a/ingest/settings.py
+++ b/ingest/settings.py
@@ -1,5 +1,12 @@
 from typing import Union
-from pydantic import BaseSettings, validator
+
+from pydantic_settings import (
+    BaseSettings,
+    SettingsConfigDict,
+    )
+
+from pydantic import computed_field
+
 from pathlib import Path
 from os import environ
 
@@ -12,9 +19,6 @@ class Settings(BaseSettings):
     DATABASE_DB: str
     DATABASE_HOST: str
     DATABASE_PORT: int
-    DATABASE_READ_URL: Union[str, None]
-    DATABASE_WRITE_URL: Union[str, None]
-    FASTAPI_URL: str
     DRYRUN: bool = False
     FETCH_BUCKET: str
     ETL_BUCKET: str
@@ -24,21 +28,20 @@ class Settings(BaseSettings):
     METADATA_LIMIT: int = 10
     REALTIME_LIMIT: int = 10
     LOG_LEVEL: str = 'INFO'
+    USE_TEMP_TABLES: bool = True
+    PAUSE_INGESTING: bool = False
 
-    @validator('DATABASE_READ_URL', allow_reuse=True)
-    def get_read_url(cls, v, values):
-        return v or f"postgresql://{values['DATABASE_READ_USER']}:{values['DATABASE_READ_PASSWORD']}@{values['DATABASE_HOST']}:{values['DATABASE_PORT']}/{values['DATABASE_DB']}"
+    @computed_field
+    def DATABASE_READ_URL(self) -> str:
+        return f"postgresql://{self.DATABASE_READ_USER}:{self.DATABASE_READ_PASSWORD}@{self.DATABASE_HOST}:{self.DATABASE_PORT}/{self.DATABASE_DB}"
 
-    @validator('DATABASE_WRITE_URL', allow_reuse=True)
-    def get_write_url(cls, v, values):
-        return v or f"postgresql://{values['DATABASE_WRITE_USER']}:{values['DATABASE_WRITE_PASSWORD']}@{values['DATABASE_HOST']}:{values['DATABASE_PORT']}/{values['DATABASE_DB']}"
+    @computed_field
+    def DATABASE_WRITE_URL(self) -> str:
+        return f"postgresql://{self.DATABASE_WRITE_USER}:{self.DATABASE_WRITE_PASSWORD}@{self.DATABASE_HOST}:{self.DATABASE_PORT}/{self.DATABASE_DB}"
 
-    class Config:
-        parent = Path(__file__).resolve().parent.parent
-        if 'DOTENV' in environ:
-            env_file = Path.joinpath(parent, environ['DOTENV'])
-        else:
-            env_file = Path.joinpath(parent, ".env")
+    model_config = SettingsConfigDict(
+        extra="ignore", env_file=f"{environ.get('DOTENV', '.env')}", env_file_encoding="utf-8"
+    )
 
 
 settings = Settings()
diff --git a/ingest/temp_locations_dump.sql b/ingest/temp_locations_dump.sql
new file mode 100644
index 0000000..5cc645a
--- /dev/null
+++ b/ingest/temp_locations_dump.sql
@@ -0,0 +1,74 @@
+DROP TABLE IF EXISTS
+  staging_sensornodes
+, staging_sensorsystems
+, staging_sensors
+, staging_flags
+, staging_keys;
+
+CREATE {table} IF NOT EXISTS staging_keys (
+  fetchlogs_id int,
+  key text,
+  last_modified timestamptz
+);
+
+CREATE {table} IF NOT EXISTS staging_sensornodes (
+    sensor_nodes_id int,
+    is_new boolean DEFAULT true,
+    is_moved boolean DEFAULT false,
+    ingest_id text NOT NULL UNIQUE,
+    source_name text NOT NULL,
+    source_id text NOT NULL,
+    matching_method text NOT NULL DEFAULT 'ingest-id',
+    site_name text,
+    ismobile boolean,
+    geom geometry,
+    timezones_id int,
+    countries_id int,
+    metadata jsonb,
+    fetchlogs_id int,
+    UNIQUE (source_name, source_id)
+);
+
+CREATE {table} IF NOT EXISTS staging_sensorsystems (
+    sensor_systems_id int,
+    is_new boolean DEFAULT true,
+    ingest_id text NOT NULL UNIQUE,
+    instrument_ingest_id text,
+    ingest_sensor_nodes_id text,
+    sensor_nodes_id int,
+    metadata jsonb,
+    fetchlogs_id int
+);
+
+CREATE {table} IF NOT EXISTS staging_sensors (
+    ingest_id text,
+    is_new boolean DEFAULT true,
+   -- source_name text NOT NULL,
+   -- source_id text NOT NULL,
+    sensors_id int,
+    sensor_systems_id int,
+    ingest_sensor_systems_id text,
+    status text,
+    measurand text,
+    units text,
+    measurands_id int,
+    averaging_interval_seconds int,
+    logging_interval_seconds int,
+    metadata jsonb,
+    fetchlogs_id int
+);
+
+CREATE {table} IF NOT EXISTS staging_flags (
+    ingest_id text NOT NULL,
+    sensor_ingest_id text NOT NULL,
+    flags_id int,
+    sensor_nodes_id int,
+    sensors_id int,
+    flag_types_id int,
+    datetime_from timestamptz,
+    datetime_to timestamptz,
+    period tstzrange,
+    note text,
+    metadata jsonb,
+    fetchlogs_id int
+);
diff --git a/ingest/temp_measurements_dump.sql b/ingest/temp_measurements_dump.sql
new file mode 100644
index 0000000..4840750
--- /dev/null
+++ b/ingest/temp_measurements_dump.sql
@@ -0,0 +1,64 @@
+DROP TABLE IF EXISTS
+  staging_sensors
+, staging_measurements
+, staging_inserted_measurements;
+
+
+CREATE {table} IF NOT EXISTS staging_sensors (
+    ingest_id text,
+    is_new boolean DEFAULT true,
+    source_name text NOT NULL,
+    source_id text NOT NULL,
+    sensors_id int,
+    sensor_systems_id int,
+    ingest_sensor_systems_id text,
+    status text,
+    measurand text,
+    units text,
+    measurands_id int,
+    averaging_interval_seconds int,
+    logging_interval_seconds int,
+    metadata jsonb,
+    fetchlogs_id int
+);
+
+-- CREATE {table} IF NOT EXISTS staging_sensors (
+--     ingest_id text NOT NULL,
+--     is_new boolean DEFAULT true,
+--     source_name text NOT NULL,
+--     source_id text NOT NULL,
+--     measurand text NOT NULL,
+--     sensors_id int,
+--     sensor_systems_id int,
+--     ingest_sensor_systems_id text,
+--     units text,
+--     measurands_id int,
+--     metadata jsonb,
+--     fetchlogs_id int
+-- );
+
+CREATE {table} IF NOT EXISTS staging_measurements (
+    ingest_id text NOT NULL,
+    source_name text NOT NULL,
+    source_id text NOT NULL,
+    measurand text NOT NULL,
+    sensors_id int,
+    value float,
+    datetime timestamptz,
+    lon float,
+    lat float,
+    fetchlogs_id int
+);
+
+--This table will hold measurements that have
+--actually been inserted into the measurements table
+--this is to deal with the overlap that we see in the
+--incoming files
+CREATE {table} IF NOT EXISTS staging_inserted_measurements (
+  sensors_id int
+  , datetime timestamptz
+  , value double precision
+  , lat double precision
+  , lon double precision
+  , fetchlogs_id int
+);
diff --git a/ingest/utils.py b/ingest/utils.py
index 32a03df..e4ef524 100644
--- a/ingest/utils.py
+++ b/ingest/utils.py
@@ -1,11 +1,14 @@
 import io
 import os
+import sys
 from pathlib import Path
 import logging
 from urllib.parse import unquote_plus
 import gzip
+import uuid
 
 import boto3
+import re
 from io import StringIO
 import psycopg2
 # import typer
@@ -58,6 +61,7 @@ def read(self, n=None):
         return "".join(line)
 
 
+
 def put_metric(
         namespace,
         metricname,
@@ -107,7 +111,7 @@ def clean_csv_value(value):
 
 
 def get_query(file, **params):
-    logger.debug("get_query: {file}, params: {params}")
+    logger.debug(f"get_query: {file}, params: {params}")
     query = Path(os.path.join(dir_path, file)).read_text()
     if params is not None and len(params) >= 1:
         query = query.format(**params)
@@ -209,12 +213,87 @@ def check_if_done(cursor, key):
     return False
 
 
+def deconstruct_path(key: str):
+	is_local = os.path.isfile(key)
+	is_s3 = bool(re.match(r"s3://[a-zA-Z]+[a-zA-Z0-9_-]+/[a-zA-Z]+", key))
+	is_csv = bool(re.search(r"\.csv(.gz)?$", key))
+	is_json = bool(re.search(r"\.(nd)?json(.gz)?$", key))
+	is_compressed = bool(re.search(r"\.gz$", key))
+	path = {}
+	if is_local:
+		path["local"] = True
+		path["key"] = key
+	elif is_s3:
+		# pull out the bucket name
+		p = key.split("//")[1].split("/")
+		path["bucket"] = p.pop(0)
+		path["key"] = "/".join(p)
+	else:
+		# use the current bucket from settings
+		path["bucket"] = settings.FETCH_BUCKET
+		path["key"] = key
+
+	logger.debug(path)
+	return path
+
+def get_data(key: str):
+	# check to see if we were provided with a path that includes the source
+	# e.g.
+	# s3://bucket/key
+	# local://drive/key
+	# /key (assume local)
+	# or no source
+	# key (no forward slash, assume etl bucket)
+	if re.match(r"local://[a-zA-Z]+", key):
+		key = key.replace("local://", "")
+
+	is_local = os.path.isfile(key)
+	is_s3 = bool(re.match(r"s3://[a-zA-Z]+[a-zA-Z0-9_-]+/[a-zA-Z]+", key))
+	#is_csv = bool(re.search(r"\.csv(.gz)?$", key))
+	#is_json = bool(re.search(r"\.(nd)?json(.gz)?$", key))
+	is_compressed = bool(re.search(r"\.gz$", key))
+	logger.debug(f"checking - {key}\ns3: {is_s3}; is_local: {is_local}")
+
+	if is_local:
+		return get_file(key)
+	elif is_s3:
+		# pull out the bucket name
+		path = key.split("//")[1].split("/")
+		bucket = path.pop(0)
+		key = "/".join(path)
+	else:
+		# use the current bucket from settings
+		bucket = settings.FETCH_BUCKET
+
+	# stream the file
+	logger.debug(f"streaming s3 file data from s3://{bucket}/{key}")
+	obj = s3.get_object(
+		Bucket=bucket,
+		Key=key,
+		)
+	f = obj["Body"]
+	if is_compressed:
+		return gzip.GzipFile(fileobj=obj["Body"])
+	else:
+		return obj["Body"]
+
+
+def get_file(filepath: str):
+	is_compressed = bool(re.search(r"\.gz$", filepath))
+	logger.debug(f"streaming local file data from {filepath}")
+	if is_compressed:
+		return gzip.open(filepath, 'rb')
+	else:
+		return io.open(filepath, "r", encoding="utf-8")
+
+
 def get_object(
         key: str,
-        bucket: str = settings.ETL_BUCKET
+        bucket: str = settings.FETCH_BUCKET
 ):
     key = unquote_plus(key)
     text = ''
+    logger.debug(f"Getting {key} from {bucket}")
     obj = s3.get_object(
         Bucket=bucket,
         Key=key,
@@ -227,10 +306,11 @@ def get_object(
 
     return text
 
+
 def put_object(
         data: str,
         key: str,
-        bucket: str = settings.ETL_BUCKET
+        bucket: str = settings.FETCH_BUCKET
 ):
     out = io.BytesIO()
     with gzip.GzipFile(fileobj=out, mode='wb') as gz:
@@ -282,7 +362,7 @@ def select_object(key: str):
     content = ""
     logger.debug(f"Getting object: {key}, {output_serialization}")
     resp = s3.select_object_content(
-        Bucket=settings.ETL_BUCKET,
+        Bucket=settings.FETCH_BUCKET,
         Key=key,
         ExpressionType="SQL",
         Expression="""
@@ -392,40 +472,23 @@ def load_errors_list(limit: int = 10):
             return rows
 
 
-def load_fail(cursor, key, e):
-    print("full copy failed", key, e)
+def load_fail(cursor, fetchlogsId, e):
+    logger.warning(f"full copy of {fetchlogsId} failed: {e}")
     cursor.execute(
         """
         UPDATE fetchlogs
-        SET
-        last_message=%s
-        WHERE
-        key=%s
+        SET last_message=%s
+        , has_error = true
+        , completed_datetime = clock_timestamp()
+        WHERE fetchlogs_id=%s
         """,
         (
             str(e),
-            key,
+            fetchlogsId,
         ),
     )
 
 
-# def load_success(cursor, key):
-#     cursor.execute(
-#         """
-#         UPDATE fetchlogs
-#         SET
-#         last_message=%s,
-#         loaded_datetime=clock_timestamp()
-#         WHERE
-#         key=%s
-#         """,
-#         (
-#             str(cursor.statusmessage),
-#             key,
-#         ),
-#     )
-
-
 def load_success(cursor, keys, message: str = 'success'):
     cursor.execute(
         """
@@ -433,6 +496,7 @@ def load_success(cursor, keys, message: str = 'success'):
         SET
         last_message=%s
         , completed_datetime=clock_timestamp()
+        , has_error = false
         WHERE key=ANY(%s)
         """,
         (
@@ -442,6 +506,50 @@ def load_success(cursor, keys, message: str = 'success'):
     )
 
 
+def load_fetchlogs(
+        pattern: str,
+        limit: int = 250,
+        ascending: bool = False,
+):
+    order = 'ASC' if ascending else 'DESC'
+    conn = psycopg2.connect(settings.DATABASE_WRITE_URL)
+    cur = conn.cursor()
+    batch_uuid = uuid.uuid4().hex
+    cur.execute(
+        f"""
+        UPDATE fetchlogs
+        SET loaded_datetime = CURRENT_TIMESTAMP
+        , jobs = jobs + 1
+        , batch_uuid = %s
+        FROM (
+          SELECT fetchlogs_id
+          FROM fetchlogs
+          WHERE key~E'{pattern}'
+          AND NOT has_error
+          AND completed_datetime is null
+          AND (
+             loaded_datetime IS NULL
+             OR loaded_datetime < now() - '30min'::interval
+          )
+          ORDER BY last_modified {order} nulls last
+          LIMIT %s
+          FOR UPDATE SKIP LOCKED
+        ) as q
+        WHERE q.fetchlogs_id = fetchlogs.fetchlogs_id
+        RETURNING fetchlogs.fetchlogs_id
+        , fetchlogs.key
+        , fetchlogs.last_modified;
+        """,
+        (batch_uuid, limit,),
+    )
+    rows = cur.fetchall()
+    logger.debug(f'Loaded {len(rows)} from fetchlogs using {pattern}/{order}')
+    conn.commit()
+    cur.close()
+    conn.close()
+    return rows
+
+
 def add_fetchlog(key: str):
     with psycopg2.connect(settings.DATABASE_WRITE_URL) as connection:
         with connection.cursor() as cursor:
@@ -500,6 +608,7 @@ def mark_success(
                 SET
                 last_message=%s
                 , completed_datetime={completed}
+                , has_error = false
                 WHERE {where}
                 """,
                 (
@@ -553,7 +662,7 @@ def crawl(bucket, prefix):
 
 
 def crawl_lcs():
-    crawl(settings.ETL_BUCKET, "lcs-etl-pipeline/")
+    crawl(settings.FETCH_BUCKET, "lcs-etl-pipeline/")
 
 
 def crawl_fetch():
diff --git a/local.py b/local.py
new file mode 100644
index 0000000..7a52adf
--- /dev/null
+++ b/local.py
@@ -0,0 +1,73 @@
+import os
+import sys
+import orjson
+import psycopg2
+import logging
+from time import time
+import csv
+
+
+from ingest.lcsV2 import (
+    IngestClient,
+    load_measurements,
+    load_measurements_db,
+)
+
+from ingest.utils import (
+    select_object,
+    get_file,
+)
+
+logger = logging.getLogger('handler')
+
+logging.basicConfig(
+    format='[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s',
+    level='DEBUG',
+    force=True,
+)
+
+logging.getLogger('boto3').setLevel(logging.WARNING)
+logging.getLogger('botocore').setLevel(logging.WARNING)
+logging.getLogger('urllib3').setLevel(logging.WARNING)
+
+
+# local files
+#load_measurements_db(pattern = '^/home/christian/.*\\.(csv|json)')
+# remote files, make sure it can at least read it
+#load_measurements_db()
+
+## client based methods
+## get a client
+client = IngestClient()
+## load all the data into the client
+client.load_keys([
+    [1, '~/Downloads/openaq-fetches/lcs-etl-pipeline/measures/lovemyair/2024-11-12/1731445632-1snpf.json', '2024-10-23']
+])
+
+## dump just the locations
+client.dump()
+
+# rollups and cached tables
+#client.process_hourly_data()
+#client.process_daily_data()
+#client.process_annual_data()
+#client.refresh_cached_tables()
+
+#client.dump_locations(False)
+#client.dump_measurements(load=False)
+## dump just the measurements
+# client.dump_measurements
+## Dump both
+#client.dump()
+
+# #client.load(data)
+# client.load_metadata(data['meta'])
+# client.load_locations(data['locations'])
+# client.load_measurements(data['measures'])
+
+# #client.dump()
+
+
+# print(time() - start_time)
+# print(f"measurements: {len(client.measurements)}")
+# print(f"locations: {len(client.nodes)}")
diff --git a/matching_clarity_sensors.sql b/matching_clarity_sensors.sql
new file mode 100644
index 0000000..83997b9
--- /dev/null
+++ b/matching_clarity_sensors.sql
@@ -0,0 +1,80 @@
+
+
+CREATE TABLE current_clarity_nodes AS
+SELECT unnest(ARRAY[1285716,1314366,1373846,1378636,1533718,1533720,1533721,1533915,1533917,1533920,1894630,1894631,1894632,1894634,1894636,1894637,1894638,1894639,1894640,1894641,1894642,1894643,1924313,1949202,1949203,1949206,2152632,2152633,2156118,2402491,290475,290476,290477,290478,290479,290480,290481,290482,290483,290484,290485,290487,290488,290489,290490,290491,290492,290495,290496,290498,290499,290500,290501,290502,290504,290505,290506,290508,290510,290512,290513,290515,290517,290518,290519,290520,290521,290522,290523,290524,290526,290528,290529,290530,290531,290532,290533,290534,290535,290536,290537,290538,290540,290541,290542,290543,290544,290545,290546,290549,290551,290552,290553,290554,290555,290557,290558,290559,290560,290561,290563,290564,290565,290566,290567,290569,290570,290571,290572,290573,290574,290575,290576,290578,290582,290583,290584,290585,290587,290588,290589,290590,290591,290593,290594,290595,290596,290597,290599,290600,290601,290602,290603,290604,290605,290606,290607,290608,290609,290610,290611,290614,290615,290616,290618,290620,290621,290622,290623,290624,290625,290626,290628,290629,290630,290632,290633,290634,290635,290636,290637,290638,290639,290641,290642,290643,290644,290645,290646,290648,290649,290650,290651,290652,290653,290654,290655,290656,290657,290658,290659,290660,290661,290662,290664,290665,290667,290668,290670,290671,290672,290674,290675,290677,290678,290679,290680,290681,290683,290685,290686,290687,300026,300027,300028,300030,301884,301885,308728,310353,310354,310355,310356,310357,310358,310360,351822,351823,351824,351825,351826,367083,367107,367110,367112,367113,367114,367116,367117,367118,370742,370743,370744,370750,370751,370752,815609,923364,923365,929705,938377,947124,947125,947126,947127,947128,947130,947132,947133,947134,947137,947138,947139,947140,947141,947142,947143,947144,947150,947151,947152,947153,947154,947155,947156,947157,947158,947159,947160,947161,947162,947163,947164,947165,947166,947168,947169,947170,947171,947172,947173,947174,947175,947176,947177,947178,947180,947182,947183,947184,947185,947186,947187,947188,947189,947190,947191,947192,947194,947195,947196,947197,947198,947199,947200,947201,947202,947203,947204,947205,947206,947207,947208,947210,947211,947212,947213,947214,947216,947217,947218,947219,947220,947221,947222,947223,947224,947225,947226,947227,947228,947229,947230,947231,947232,947234,947235,947236,947237,947238,947239,947240,947241,947242,947243,947244,947245,947246,947247,947248,947249,947250,947251,947252,947253,947254,947255,947256,947257,947258,947259,947260,947261,947262,947264,947265,947266,947267,947268,947270,947271,947273,947274,947275,947276,947277,947278,947279,947280,947281,947283,947284,947285,947286,947287,947288,947289,947290,947291,947292,947295,947296,947297,947298,947299,947300,947301,947302,947303,947304,947305,947306,947307,947308,947309,947310,947312,947313,947314,947315,947316,947317,947318,947319,947320,947321,947322,947323,947324,947325,947326,947327,947328,947329,947330,947332,947334,947335,947336,947338,947339,947340,947341,947342,947343,947344,947345,947346,947347,947348,947349]) as node;
+
+
+
+    WITH clarity AS (
+    SELECT sensor_nodes_id
+  , source_id
+  , site_name
+  , geom
+  , added_on
+  , node IS NOT NULL as is_active
+  FROM sensor_nodes
+  JOIN current_clarity_nodes ON (sensor_nodes_id = node)
+  WHERE source_name = 'clarity')
+  SELECT c.sensor_nodes_id
+  , c.source_id
+  , c.site_name
+  , n.source_id
+  , n.site_name
+  , c.geom = n.geom
+  , is_active
+  , ROUND(st_distance(c.geom, n.geom)::numeric, 4) as distance
+  FROM clarity c
+  LEFT JOIN staging_sensornodes n ON (st_distance(c.geom, n.geom)<0.0001)
+  -- WHERE n.source_id IS NOT NULL OR is_active
+  WHERE n.source_id IS NULL
+  ORDER BY c.sensor_nodes_id DESC NULLS FIRST;
+
+
+
+
+  WITH clarity AS (
+    SELECT sensor_nodes_id
+  , source_id
+  , site_name
+  , geom
+  , added_on
+  , node IS NOT NULL as is_active
+  FROM sensor_nodes
+  LEFT JOIN current_clarity_nodes ON (sensor_nodes_id = node)
+  WHERE source_name = 'clarity')
+  SELECT n.source_id
+  , n.site_name
+  , c.source_id
+  , c.site_name
+  --, c.geom
+  --, n.geom
+  --, c.added_on
+  , c.geom = n.geom
+  , is_active
+  , ROUND(st_distance(c.geom, n.geom)::numeric, 4) as distance
+  , c.sensor_nodes_id
+  , c.sensor_nodes_id = LAG(c.sensor_nodes_id) OVER (ORDER BY c.sensor_nodes_id)
+  FROM staging_sensornodes n
+  --JOIN clarity c ON (n.site_name = c.site_name)
+  --JOIN clarity c ON (n.geom = c.geom)
+  JOIN clarity c ON (n.source_id = c.source_id)
+  --LEFT JOIN clarity c ON (st_distance(c.geom, n.geom)<0.00001)
+  --WHERE n.source_id IS NOT NULL OR is_active
+  WHERE is_active
+  ORDER BY sensor_nodes_id DESC;
+
+
+
+
+
+  SELECT *
+  FROM staging_sensornodes
+  WHERE source_id = 'DBXRI9190';
+
+
+  -- How many active clarity sensor nodes do we have?
+  SELECT string_agg(DISTINCT sensor_nodes_id::text, ',')
+  FROM sensor_nodes_check
+  WHERE source_name = 'clarity'
+  AND datetime_last > current_date
+  ;
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..1c86072
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,25 @@
+[tool.poetry]
+name = "ingest"
+version = "0.1.0"
+description = "Data ingestor for OpenAQ Framework"
+authors = ["OpenAQ <info@openaq.org>"]
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = "^3.12"
+dateparser = "^1.2.0"
+orjson = "^3.10.4"
+psycopg2-binary = "^2.9.9"
+pytz = "^2024.1"
+typer = "^0.12.3"
+typing-extensions = "^4.12.2"
+pydantic = {extras = ["dotenv"], version = "^2.7.3"}
+pydantic-settings = "^2.3.2"
+
+[tool.poetry.group.cdk.dependencies]
+aws-cdk-lib = "^2.145.0"
+boto3 = "^1.34.124"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 4296f15..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-dateparser==1.1.1
-orjson==3.6.8
-psycopg2-binary==2.9.3
-pydantic[dotenv]
-pytz==2022.1
-pytz-deprecation-shim==0.1.0.post0
-typer==0.4.1
-typing_extensions==4.2.0
diff --git a/requirements_dev.txt b/requirements_dev.txt
deleted file mode 100644
index 30ddf82..0000000
--- a/requirements_dev.txt
+++ /dev/null
@@ -1 +0,0 @@
-boto3
diff --git a/tests/benchmark.py b/tests/benchmark.py
new file mode 100644
index 0000000..d8dfc34
--- /dev/null
+++ b/tests/benchmark.py
@@ -0,0 +1,43 @@
+import logging
+import os
+import sys
+import argparse
+
+logger = logging.getLogger(__name__)
+
+parser = argparse.ArgumentParser(
+    description="""
+Test benchmarks for ingestion
+    """)
+
+parser.add_argument(
+	'--name',
+	type=str,
+	required=False,
+	default="test",
+	help='Name to use for the test'
+	)
+parser.add_argument(
+	'--env',
+	type=str,
+	default='.env',
+	required=False,
+	help='The dot env file to use'
+	)
+parser.add_argument(
+	'--debug',
+	action="store_true",
+	help='Output at DEBUG level'
+	)
+args = parser.parse_args()
+
+from ingest.settings import settings
+
+logging.basicConfig(
+    format='[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s',
+    level=settings.LOG_LEVEL.upper(),
+    force=True,
+)
+
+
+print(args)
diff --git a/tests/benchmarking.r b/tests/benchmarking.r
new file mode 100644
index 0000000..3b9a076
--- /dev/null
+++ b/tests/benchmarking.r
@@ -0,0 +1,157 @@
+
+source("~/git/R/ESRfunctions.r")
+
+stats <- dir('tests/benchmark_output', pattern = "*stats.csv$", full.names=TRUE)
+
+params <- data.frame(
+		ram = c(
+				2, 1, 0.25, 0.5, .02, .25, 0.25, 5,
+				4,8,16,32,64,128,256,
+				4,8,16,32,64,128,256,
+				4,8,16,32,64,128,256,
+				4,8,16,32,64,128,256,
+				4,8,16,32,64,128,256,
+				4,8,16,32,64,128,256
+		),
+		cores = c(
+				16, 16, 16, 16, 4, 4,
+				8, 8, 8, 8, 8, 8, 8, 8, 8,
+				16, 16, 16, 16, 16, 16, 16,
+				4, 4, 4, 4, 4, 4, 4,
+				8, 8, 8, 8, 8, 8, 8,
+				16, 16, 16, 16, 16, 16, 16,
+				16, 16, 16, 16, 16, 16, 16
+		),
+		x86 = c(
+				TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
+				TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
+				TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
+				TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
+				TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
+				FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
+				FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
+		),
+		v1 = c(
+				TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
+				TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
+				TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
+				TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
+				FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
+				TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
+				TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+		),
+		ingesting = c(
+				FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
+				FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
+				FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
+				FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
+				FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
+				TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
+				TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+		),
+		row.names = c(
+				"4xlarge_stats.csv", "4xlarge1g_stats.csv", "4xlarge256mb_stats.csv", "4xlarge500mb_stats.csv",
+				"prod_stats.csv", "r5xlarge_stats.csv", "r6i2xlarge_stats.csv", "xxlarge-5gb_stats.csv",
+				"2xlarge-4MB_stats.csv", "2xlarge-8MB_stats.csv","2xlarge-16MB_stats.csv","2xlarge-32MB_stats.csv",
+				"2xlarge-64MB_stats.csv","2xlarge-128MB_stats.csv","2xlarge-256MB_stats.csv",
+				"4xlarge-4MB_stats.csv", "4xlarge-8MB_stats.csv","4xlarge-16MB_stats.csv","4xlarge-32MB_stats.csv",
+				"4xlarge-64MB_stats.csv","4xlarge-128MB_stats.csv","4xlarge-256MB_stats.csv",
+				"xlarge-4MB_stats.csv", "xlarge-8MB_stats.csv","xlarge-16MB_stats.csv","xlarge-32MB_stats.csv",
+				"xlarge-64MB_stats.csv","xlarge-128MB_stats.csv","xlarge-256MB_stats.csv",
+				"2xlargeV2-4MB_stats.csv", "2xlargeV2-8MB_stats.csv","2xlargeV2-16MB_stats.csv","2xlargeV2-32MB_stats.csv",
+				"2xlargeV2-64MB_stats.csv","2xlargeV2-128MB_stats.csv","2xlargeV2-256MB_stats.csv",
+				"2xlargeARM-4MB_stats.csv", "2xlargeARM-8MB_stats.csv","2xlargeARM-16MB_stats.csv","2xlargeARM-32MB_stats.csv",
+				"2xlargeARM-64MB_stats.csv","2xlargeARM-128MB_stats.csv","2xlargeARM-256MB_stats.csv",
+				"2xlargeX86-4MB_stats.csv", "2xlargeX86-8MB_stats.csv","2xlargeX86-16MB_stats.csv","2xlargeX86-32MB_stats.csv",
+				"2xlargeX86-64MB_stats.csv","2xlargeX86-128MB_stats.csv","2xlargeX86-256MB_stats.csv"
+		)
+)
+
+x <- do.call(rbind, lapply(stats, function(path) {
+		x <- read.csv(path)
+		x$path <- basename(path)
+		x[x$Name == 'Aggregated', ]
+		#x[x$Name == 'v2/locations/:id', ]
+		#x[x$Name == 'v2/latest/empty', ]
+}))
+
+x$cores <- params[x$path, "cores"]
+x$ram <- params[x$path, "ram"]
+x$x86 <- params[x$path, "x86"]
+x$v1 <- params[x$path, "v1"]
+x$ingesting <- params[x$path, "ingesting"]
+
+
+x <- x[x$path != "prod_stats.csv", ]
+
+x <- x[order(x$ram), ]
+
+plot(Average.Response.Time ~ cores, x)
+plot(Requests.s ~ cores, x)
+
+plot(Average.Response.Time ~ ram, x)
+
+ncores <- 16
+plot(Requests.s ~ ram, subset(x, cores == ncores))
+plot(Average.Response.Time ~ ram, subset(x, cores == ncores))
+plot(Average.Response.Time ~ cores, subset(x, cores <= ncores), pch=cores, col=1)
+
+plot(Average.Response.Time ~ ram, subset(x, cores <= ncores), pch=cores, col=1)
+legend('topright', legend=unique(x$cores), pch=unique(x$cores), bty='n', ncol=3)
+
+plot(Average.Response.Time ~ ram, subset(x, cores <= ncores), pch=19, col=as.numeric(1:nrow(x) %in% grep('V2', x$path))+1)
+legend('topright', legend=c('V1', 'V2'), pch=19, col=1:2, bty='n', ncol=3)
+
+plot(Average.Response.Time ~ ram, subset(x, cores == ncores), pch=19, col=as.numeric(1:nrow(x) %in% grep('ARM', x$path))+1)
+legend('topright', legend=c('x86', 'ARM'), pch=19, col=1:2, bty='n', ncol=3)
+
+plot(Average.Response.Time ~ ram, subset(x, cores == ncores), pch=19)
+points(Average.Response.Time ~ ram, x[grep('ARM', x$path), ], pch=19, col='red')
+legend('topright', legend=c('x86', 'ARM'), pch=19, col=1:2, bty='n', ncol=3)
+
+plot(Average.Response.Time ~ ram, subset(x, cores == ncores & ingesting), pch=19, col=x$x86+1)
+legend('topright', legend=c('x86', 'ARM'), pch=19, col=1:2, bty='n', ncol=3)
+
+
+points(Average.Response.Time ~ ram, x[grep('ARM', x$path), ], pch=19, col='red')
+legend('topright', legend=c('x86', 'ARM'), pch=19, col=1:2, bty='n', ncol=3)
+
+
+plot(X50. ~ ram, x)
+plot(X75. ~ ram, subset(x, cores == ncores))
+plot(Request.Count ~ ram, subset(x, cores == ncores))
+plot(Failure.Count ~ ram, subset(x, cores == ncores))
+plot(ram ~ cores, x)
+
+
+exporters <- dir('tests/benchmark_output', pattern = "*export_output*", full.names=TRUE)
+
+params <- data.frame(
+		ram = c(
+				64, 0.128, .004, .004,
+				.004, 1, 20, 40,
+				5, 8, .004, .004
+		),
+		cores = c(
+				16, 16, 4, 4,
+				2, 8, 8, 8,
+				8, 8, 4, 4
+		),
+		row.names = c(
+				"4xlarge-wm64gb","4xlarge", "prod", "r5",
+				"small", "xxlarge-wm1g", "xxlarge-wm20g", "xxlarge-wm40g",
+				"xxlarge-wm5g", "xxlarge-wm8g", "xxlarge", "benchmark_export_output"
+		)
+)
+
+x <- do.call(rbind, lapply(exporters, function(path) {
+		x <- read.csv(path, quote="'")
+		x$path <- basename(path)
+		x$test <- gsub("benchmark_export_output_|.csv$", "", basename(path))
+		return(x)
+}))
+x$cores = params[x$test,"cores"]
+x$ram = params[x$test,"ram"]
+
+boxplot(time_ms~cores, x)
+plot(I(time_ms/1000)~jitter(ram, 10),x, log="y")
diff --git a/tests/check_lcs_file.py b/tests/check_lcs_file.py
deleted file mode 100644
index 06fa454..0000000
--- a/tests/check_lcs_file.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import logging
-import sys
-import os
-import json
-
-if 'DOTENV' not in os.environ.keys():
-    os.environ['DOTENV'] = '.env.testing'
-
-if 'AWS_PROFILE' not in os.environ.keys():
-    os.environ['AWS_PROFILE'] = 'python-user'
-
-from pandas import DataFrame
-from botocore.exceptions import ClientError
-from openaq_fastapi.ingest.handler import cronhandler, logger
-from openaq_fastapi.settings import settings
-
-from openaq_fastapi.ingest.lcs import (
-    LCSData,
-    load_metadata_db,
-    load_measurements_db,
-    load_measurements_file,
-    load_measurements,
-    get_measurements,
-)
-
-
-from openaq_fastapi.ingest.utils import (
-    load_errors,
-    select_object,
-    get_object,
-    get_logs_from_ids,
-    get_logs_from_pattern,
-    unquote_plus,
-)
-
-
-# load_realtime('realtime-gzipped/2022-02-04/1643994434.ndjson.gz')
-
-# logs = get_logs_from_pattern('stations/clarity', 2)
-#
-
-# station data
-# logs = get_logs_from_ids(ids=[5544399, 4874871])
-
-# for each of them lets try and import the data
-# contents = []
-# for row in logs:
-#     contents.append(
-#         {"Key": unquote_plus(row[1]), "LastModified": row[6], "id": row[0], }
-#     )
-
-# data = LCSData(contents)
-# data.get_metadata()
-
-
-# measurement data
-logs = get_logs_from_ids(ids=[5609404])
-
-load_measurements(logs)
diff --git a/tests/check_realtime_file.py b/tests/check_realtime_file.py
deleted file mode 100644
index 880417e..0000000
--- a/tests/check_realtime_file.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import logging
-import sys
-import os
-import json
-
-if 'DOTENV' not in os.environ.keys():
-    os.environ['DOTENV'] = '.env.testing'
-
-if 'AWS_PROFILE' not in os.environ.keys():
-    os.environ['AWS_PROFILE'] = 'python-user'
-
-from botocore.exceptions import ClientError
-from openaq_fastapi.ingest.handler import cronhandler, logger
-from openaq_fastapi.settings import settings
-
-from openaq_fastapi.ingest.lcs import (
-    load_metadata_db,
-    load_measurements_db,
-    load_measurements_file,
-    load_measurements,
-    get_measurements,
-)
-
-from openaq_fastapi.ingest.fetch import (
-    load_realtime,
-    parse_json,
-)
-
-from openaq_fastapi.ingest.utils import (
-    load_errors,
-    select_object,
-    get_object,
-    get_logs_from_ids,
-)
-
-
-# load_realtime('realtime-gzipped/2022-02-04/1643994434.ndjson.gz')
-
-logs = get_logs_from_ids(ids=[5634328])
-
-# logs = load_errors()
-
-keys = [log[1] for log in logs]
-
-#load_realtime(keys)
-
-print(f"Found {len(keys)} potential errors")
-
-for idx, key in enumerate(keys):
-    print(f"\n## Checking #{idx}: {key}")
-    # get text of object
-    try:
-        txt = get_object(key)
-    except Exception as e:
-        print(f"\t*** Error getting file: {e}")
-        continue
-    # break into lines
-    lines = txt.split("\n")
-    # check parse for each line
-    n = len(lines)
-    errors = []
-    for jdx, line in enumerate(lines):
-        try:
-            # first just try and load it
-            obj = json.loads(line)
-        except Exception as e:
-            errors.append(jdx)
-            print(f"\t*** Loading error on line #{jdx} (of {n}): {e}\n{line}")
-        try:
-            # then we can try to parse it
-            row = parse_json(obj)
-        except Exception as e:
-            errors.append(jdx)
-            print(f"\t*** Parsing rror on line #{jdx} (of {n}): {e}\n{line}")
-
-
-
-# load_realtime(keys)
- # load_realtime([
- #    'realtime-gzipped/2022-02-05/1644020232.ndjson.gz',
- #    'realtime-gzipped/2022-02-05/1644068231.ndjson.gz'
- # ])
-
-# errors = load_errors(10)
-
-# print(f"Found {len(errors)} possible error files")
-
-# for file in errors:
-#     key = file[3]
-#     print(f"Checking file {key}")
-#     try:
-#         obj = select_object(key)
-#     except ClientError as e:
-#         if e.response['Error']['Code'] == 'JSONParsingError':
-#             print("There was an error parsing the file, fetching as raw file")
-#             print(e.response['Error'])
-#             obj = get_object(key)
-#         else:
-#             print("Some other error")
-#     except Exception as e:
-#         print(f"post-boto error: {e}")
-#         obj = get_object(key)
-
-#     print(obj[-50:])
-#     # save the file locally
-#     filepath = os.path.join(settings.LOCAL_SAVE_DIRECTORY, key)
-#     print(f"Writing file to {filepath}")
-#     os.makedirs(os.path.dirname(filepath), exist_ok=True)
-#     fle = open(filepath.replace(".gz", ""), 'w')
-#     fle.write(obj)
-#     fle.close()
diff --git a/tests/test_file1.json b/tests/test_file1.json
new file mode 100644
index 0000000..228fb0b
--- /dev/null
+++ b/tests/test_file1.json
@@ -0,0 +1,120 @@
+{
+  "meta": {
+    "schema": "v0.1",
+    "source": "local",
+    "matching_method": "ingest-id"
+  },
+  "measures": [
+    {
+      "sensor_id": "local-test_site_1-co",
+      "timestamp": "2024-01-01T00:00:00Z",
+      "measure": 0.01
+    },
+    {
+      "sensor_id": "local-test_site_1-co",
+      "timestamp": "2024-01-02T00:00:00Z",
+      "measure": 0.02
+    },
+    {
+      "sensor_id": "local-test_site_2-wind_speed",
+      "timestamp": "2024-01-01T00:00:00Z",
+      "measure": 0.01
+    },
+    {
+      "sensor_id": "local-test_site_2-wind_speed",
+      "timestamp": "2024-01-02T00:00:00Z",
+      "measure": 0.02
+    }
+  ],
+  "locations": [
+    {
+      "location": "local-test_site_1",
+      "label": "Test Site #1",
+      "lat": "45.56",
+      "lon": -123.45,
+      "ismobile": "false",
+      "systems": [
+        {
+          "system_id": "local-test_site_1-metone:aio2",
+          "manufacturer_name": "MetOne",
+          "model_name": "AIO2",
+          "sensors": [
+            {
+              "sensor_id": "local-test_site_1-wind_speed",
+              "status": "u",
+              "parameter": "ws",
+              "interval_seconds": "3600",
+              "flags": [
+                {
+                  "flag_id": "local-test_site_1-wind_speed-info::2024-01-01",
+                  "datetime_from": "2024-01-01",
+                  "datetime_to": "2024-01-02",
+                  "flag_name": "info",
+                  "note": "initial flag for sensor"
+                }
+              ]
+            }
+          ]
+        },
+        {
+          "system_id": "local-test_site_1-ecotech:serinus_30",
+          "manufacturer_name": "Ecotech",
+          "model_name": "Serinus 30",
+          "sensors": [
+            {
+              "sensor_id": "local-test_site_1-co",
+              "status": "u",
+              "parameter": "co",
+              "interval_seconds": "3600",
+              "flags": [
+                {
+                  "flag_id": "local-test_site_1-co-info::2024-01-01",
+                  "datetime_from": "2024-01-01",
+                  "datetime_to": "2024-01-05",
+                  "flag_name": "info"
+                }
+              ]
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "location": "local-test_site_2",
+      "label": "Test Site #2",
+      "lat": "47.56",
+      "lon": -124.45,
+      "ismobile": "false",
+      "systems": [
+        {
+          "system_id": "local-test_site_2-metone:aio2",
+          "manufacturer_name": "MetOne",
+          "model_name": "AIO2",
+          "sensors": [
+            {
+              "sensor_id": "local-test_site_2-wind_speed",
+              "status": "u",
+              "parameter": "ws",
+              "interval_seconds": "3600",
+              "flags": []
+            },
+            {
+              "sensor_id": "local-test_site_2-wind_direction",
+              "status": "u",
+              "parameter": "wd",
+              "interval_seconds": "3600",
+              "flags": [
+                {
+                  "flag_id": "local-test_site_2-wind_direction-error::2024-01-01",
+                  "datetime_from": "2024-01-01",
+                  "datetime_to": "2024-01-02",
+                  "flag_name": "info"
+                }
+              ]
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
diff --git a/tests/test_file2.json b/tests/test_file2.json
new file mode 100644
index 0000000..9a619d5
--- /dev/null
+++ b/tests/test_file2.json
@@ -0,0 +1,127 @@
+{
+  "meta": {
+    "schema": "v0.1",
+    "source": "local",
+    "matching_method": "ingest-id"
+  },
+  "measures": [
+    {
+      "sensor_id": "local-test_site_1-co",
+      "timestamp": "2024-01-03T00:00:00Z",
+      "measure": 0.03
+    },
+    {
+      "sensor_id": "local-test_site_1-co",
+      "timestamp": "2024-01-04T00:00:00Z",
+      "measure": 0.04
+    },
+    {
+      "sensor_id": "local-test_site_1-wind_speed",
+      "timestamp": "2024-01-03T00:00:00Z",
+      "measure": 0.03
+    },
+    {
+      "sensor_id": "local-test_site_1-wind_speed",
+      "timestamp": "2024-01-04T00:00:00Z",
+      "measure": 0.04
+    }
+  ],
+  "locations": [
+    {
+      "location": "local-test_site_1",
+      "label": "Test Site #1",
+      "lat": "45.56",
+      "lon": -123.45,
+      "ismobile": "false",
+      "systems": [
+        {
+          "system_id": "local-test_site_1-metone:aio2",
+          "manufacturer_name": "MetOne",
+          "model_name": "AIO2",
+          "sensors": [
+            {
+              "sensor_id": "local-test_site_1-wind_speed",
+              "status": "u",
+              "parameter": "ws",
+              "interval_seconds": "3600",
+              "flags": [
+                {
+                  "flag_id": "local-test_site_1-wind_speed-info::2024-01-01",
+                  "datetime_from": "2024-01-02",
+                  "datetime_to": "2024-01-04",
+                  "flag_name": "info",
+                  "note": "initial flag for sensor"
+                },
+                {
+                  "flag_id": "local-test_site_1-wind_speed-info::2024-01-01",
+                  "datetime_from": "2024-01-02",
+                  "datetime_to": "2024-01-04",
+                  "flag_name": "info",
+                  "note": "A new note for this sensor"
+                }
+              ]
+            }
+          ]
+        },
+        {
+          "system_id": "local-test_site_1-ecotech:serinus_30",
+          "manufacturer_name": "Ecotech",
+          "model_name": "Serinus 30",
+          "sensors": [
+            {
+              "sensor_id": "local-test_site_1-co",
+              "status": "u",
+              "parameter": "co",
+              "interval_seconds": "3600",
+              "flags": [
+                {
+                  "flag_id": "local-test_site_1-co-info::2024-01-01",
+                  "datetime_from": "2024-01-01",
+                  "datetime_to": "2024-01-05",
+                  "flag_name": "info"
+                }
+              ]
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "location": "local-test_site_2",
+      "label": "Test Site #2",
+      "lat": "47.56",
+      "lon": -124.45,
+      "ismobile": "false",
+      "systems": [
+        {
+          "system_id": "local-test_site_2-metone:aio2",
+          "manufacturer_name": "MetOne",
+          "model_name": "AIO2",
+          "sensors": [
+            {
+              "sensor_id": "local-test_site_2-wind_speed",
+              "status": "u",
+              "parameter": "ws",
+              "interval_seconds": "3600",
+              "flags": []
+            },
+            {
+              "sensor_id": "local-test_site_2-wind_direction",
+              "status": "u",
+              "parameter": "wd",
+              "interval_seconds": "3600",
+              "flags": [
+                {
+                  "flag_id": "local-test_site_2-wind_direction-error::2024-01-03",
+                  "datetime_from": "2024-01-03",
+                  "datetime_to": "2024-01-04",
+                  "flag_name": "info"
+                }
+              ]
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
diff --git a/tests/test_flags.py b/tests/test_flags.py
new file mode 100644
index 0000000..8ac6fd4
--- /dev/null
+++ b/tests/test_flags.py
@@ -0,0 +1,48 @@
+import os
+import sys
+import orjson
+import psycopg2
+import logging
+from time import time
+import csv
+
+os.chdir(os.path.dirname(os.path.dirname(__file__)))
+
+from ingest.lcsV2 import (
+    IngestClient,
+)
+
+
+logger = logging.getLogger('handler')
+
+logging.basicConfig(
+    format='[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s',
+    level='DEBUG',
+    force=True,
+)
+
+logging.getLogger('boto3').setLevel(logging.WARNING)
+logging.getLogger('botocore').setLevel(logging.WARNING)
+logging.getLogger('urllib3').setLevel(logging.WARNING)
+
+
+## client based methods
+## get a client
+client = IngestClient()
+## load all the data into the client
+client.load_keys([[1, './tests/test_file1.json', '2024-01-01']])
+## load the data
+client.dump(load=True)
+#client.dump_locations(load=False)
+#client.dump_measurements(load=True)
+
+client.reset()
+
+client.load_keys([[2, './tests/test_file2.json', '2024-01-02']])
+## load the data
+client.dump(load=True)
+
+client.process_hourly_data()
+client.process_daily_data()
+client.process_annual_data()
+client.refresh_cached_tables()