Skip to content

Commit

Permalink
Merge pull request #4223 from hove-io/fix_excluded_zones_files
Browse files Browse the repository at this point in the history
Fix excluded_zones generator
  • Loading branch information
azime authored Feb 22, 2024
2 parents d96241d + f10f705 commit d5cce15
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 45 deletions.
4 changes: 4 additions & 0 deletions source/navitiacommon/navitiacommon/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,7 @@ def files_exists_in_zipfile(poi_zipfile, set_files):
z = zipfile.ZipFile(poi_zipfile)
files_from_zip = {member.filename for member in z.infolist()}
return not bool(len(set_files.difference(files_from_zip)))


def is_empty_directory(directory, ext="json"):
return len(glob.glob("{}/*.{}".format(directory, ext))) == 0 if os.path.isdir(directory) else True
40 changes: 23 additions & 17 deletions source/tyr/tyr/binarisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
import navitiacommon.task_pb2
from tyr import celery, redis
from tyr.rabbit_mq_handler import RabbitMqHandler
from navitiacommon import models
from navitiacommon import models, utils
from tyr.helper import get_instance_logger, get_named_arg, get_autocomplete_instance_logger, get_task_logger
from contextlib import contextmanager
import glob
Expand Down Expand Up @@ -1249,25 +1249,31 @@ def poi2asgard(self, instance_config, filename, job_id, dataset_uid):
shutil.rmtree(excluded_zone_dir)

os.mkdir(excluded_zone_dir)
poi_to_excluded_zones(filename, excluded_zone_dir, instance.name)

try:
with collect_metric("poi2Asgard", job, dataset_uid):
asgard_bucket = current_app.config.get('MINIO_ASGARD_BUCKET_NAME', None)
if not asgard_bucket:
dataset.state = "failed"
return

bash_command = (
"env REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt "
"aws s3 sync ./{excluded_zone_dir} s3://{asgard_bucket}/excluded_zones".format(
excluded_zone_dir=excluded_zone_dir, asgard_bucket=asgard_bucket
poi_to_excluded_zones(filename, excluded_zone_dir, instance.name)
if utils.is_empty_directory(excluded_zone_dir):
logger.warning(
"opg_excluded_zones: Impossible to push excluded zones to S3 for instance {}, empty directory".format(
instance.name
)
)
process = subprocess.Popen(bash_command.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
if error:
raise Exception("Error occurred when putting excluded zones to asgard: {}".format(error))
else:
with collect_metric("poi2Asgard", job, dataset_uid):
asgard_bucket = current_app.config.get('MINIO_ASGARD_BUCKET_NAME', None)
if not asgard_bucket:
dataset.state = "failed"
return

bash_command = (
"env REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt "
"aws s3 sync ./{excluded_zone_dir} s3://{asgard_bucket}/excluded_zones".format(
excluded_zone_dir=excluded_zone_dir, asgard_bucket=asgard_bucket
)
)
process = subprocess.Popen(bash_command.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
if error:
raise Exception("Error occurred when putting excluded zones to asgard: {}".format(error))
except:
logger.exception("")
job.state = "failed"
Expand Down
80 changes: 52 additions & 28 deletions source/tyr/tyr/poi_to_excluded_zones.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,81 @@
import csv
import json
import logging
import zipfile


def poi_to_excluded_zones(poi_file, output_dir, instance_name):
logger = logging.getLogger(__name__)
def parse_file(filename):
try:
with open(filename) as csvfile:
reader = csv.reader(csvfile, delimiter=';', quotechar='"')
for row in reader:
yield row
except Exception as e:
logging.getLogger(__name__).error(
"opg_excluded_zones: Unable to read file {}, error ({})".format(filename, str(e))
)
raise


def get_excluded_zones(path):
result = {}
for row in parse_file(path + "/poi_properties.txt"):
if row[1].lower() != "excluded_zones":
continue
try:
result[row[0]] = json.loads(row[2])
except Exception:
logging.getLogger(__name__).error(
"opg_excluded_zones: Ignored line, Invalid json ({})".format(row[2])
)
return result


def get_geometries_ids(path, excluded_zones):
result = {}
for row in parse_file(path + "/poi.txt"):
if row[0] not in excluded_zones:
continue
result[row[0]] = row[7]
return result


def get_geometries_shapes(path):
result = {}
for row in parse_file(path + "/geometries.txt"):
result[row[0]] = row[1]
return result


def poi_to_excluded_zones(poi_file, output_dir, instance_name):
tmp_path = "tmp/poi_{}".format(instance_name)
import zipfile

with zipfile.ZipFile(poi_file, 'r') as zip_ref:
zip_ref.extractall(tmp_path)

excluded_zones = {}
excluded_geometries_ids = {}

# get excluded zones
with open(tmp_path + "/poi_properties.txt") as csvfile:
reader = csv.reader(csvfile, delimiter=';', quotechar='"')
for row in reader:
if row[1].lower() != "excluded_zones":
continue
excluded_zones[row[0]] = json.loads(row[2])
excluded_zones = get_excluded_zones(tmp_path)

# find geometry id
with open(tmp_path + "/poi.txt") as csvfile:
reader = csv.reader(csvfile, delimiter=';', quotechar='"')
for row in reader:
if row[0] not in excluded_zones:
continue
excluded_geometries_ids[row[0]] = row[7]
excluded_geometries_ids = get_geometries_ids(tmp_path, excluded_zones)

if excluded_geometries_ids.keys() != excluded_zones.keys():
logger.warning("not all excluded zone's pois are found in poi.txt")
logger.warning("excluded_geometries_ids: {}".format(excluded_geometries_ids.keys()))
logger.warning("excluded_zones: {}".format(excluded_zones.keys()))
logging.getLogger(__name__).warning("not all excluded zone's pois are found in poi.txt")
logging.getLogger(__name__).warning("excluded_geometries_ids: {}".format(excluded_geometries_ids.keys()))
logging.getLogger(__name__).warning("excluded_zones: {}".format(excluded_zones.keys()))

# read geometries
geometries_shapes = {}
with open(tmp_path + "/geometries.txt") as csvfile:
reader = csv.reader(csvfile, delimiter=';', quotechar='"')
for row in reader:
geometries_shapes[row[0]] = row[1]
geometries_shapes = get_geometries_shapes(tmp_path)

for poi_id, zones in excluded_zones.items():
geometry_id = excluded_geometries_ids.get(poi_id)
if not geometry_id:
logger.error("{} could not be found in poi.txt".format(row[0]))
logging.getLogger(__name__).error("{} could not be found in poi.txt".format(poi_id))
continue

shape = geometries_shapes.get(geometry_id)
if not shape:
logger.error("{} could not be found in geometries.txt".format(geometry_id))
logging.getLogger(__name__).error("{} could not be found in geometries.txt".format(geometry_id))
continue

for i, zone in enumerate(zones):
Expand Down

0 comments on commit d5cce15

Please sign in to comment.