Skip to content

Commit

Permalink
EES-5740 Generate FilterHierarchies on import
Browse files Browse the repository at this point in the history
  • Loading branch information
mmyoungman committed Dec 19, 2024
1 parent b4d0d83 commit a9da4b4
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
namespace GovUk.Education.ExploreEducationStatistics.Content.ViewModels;
using System;
using System.Collections.Generic;

namespace GovUk.Education.ExploreEducationStatistics.Common.ViewModels;

public record DataSetFileFilterHierarchyViewModel(
Guid RootFilterId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace GovUk.Education.ExploreEducationStatistics.Content.Model;

public record DataSetFileFilterHierarchy(
Guid RootFilterId,
List<Guid> ChildFilterIds,
List<Guid> ChildFilterIds, // in order of the tiers
List<Guid> RootOptionIds,
List<Dictionary<Guid, List<Guid>>> Tiers
List<Dictionary<Guid, List<Guid>>> Tiers // also in order i.e. Tier[0] is root -> childFilterIds[0], Tier[1] is childFilterIds[0] -> childFilterIds[1], etc.
);
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ public class FilterMeta
public string? Hint { get; set; }

public required string ColumnName { get; set; }

[JsonIgnore]
public string? GroupCsvColumn { get; set; }
}

public class IndicatorMeta
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public string GetFilterItemLabel(
return rowValues[columnIndex].Trim().NullIfWhiteSpace() ?? DefaultFilterItemLabel;
}

public string GetFilterGroupLabel(
public string GetFilterGroupLabel( // @MarkFix
IReadOnlyList<string> rowValues,
Guid filterId)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public MetaRow GetMetaRow(IReadOnlyList<string> rowValues)
ColumnName = ReadMetaColumnValue(MetaColumns.col_name, rowValues),
ColumnType = Enum.Parse<ColumnType>(columnType!),
Label = ReadMetaColumnValue(MetaColumns.label, rowValues),
FilterGroupingColumn = ReadMetaColumnValue(MetaColumns.filter_grouping_column, rowValues),
FilterGroupingColumn = ReadMetaColumnValue(MetaColumns.filter_grouping_column, rowValues), // @MarkFix
FilterHint = ReadMetaColumnValue(MetaColumns.filter_hint, rowValues),
IndicatorGrouping = ReadMetaColumnValue(MetaColumns.indicator_grouping, rowValues),
IndicatorUnit = EnumUtil.GetFromEnumValue<IndicatorUnit>(!indicatorUnit.IsNullOrEmpty() ? indicatorUnit : ""),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
#nullable enable
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading.Tasks;
using AngleSharp.Dom;
using GovUk.Education.ExploreEducationStatistics.Common.Extensions;
using GovUk.Education.ExploreEducationStatistics.Common.Model;
using GovUk.Education.ExploreEducationStatistics.Common.Model.Data;
using GovUk.Education.ExploreEducationStatistics.Common.Services.Interfaces;
using GovUk.Education.ExploreEducationStatistics.Content.Model;
using GovUk.Education.ExploreEducationStatistics.Content.Model.Database;
using GovUk.Education.ExploreEducationStatistics.Data.Model;
using GovUk.Education.ExploreEducationStatistics.Data.Model.Database;
using GovUk.Education.ExploreEducationStatistics.Data.Processor.Services.Interfaces;
using Microsoft.EntityFrameworkCore;
Expand Down Expand Up @@ -189,6 +193,7 @@ public async Task WriteDataSetFileMeta(Guid subjectId)
Label = f.Label,
Hint = f.Hint,
ColumnName = f.Name,
GroupCsvColumn = f.GroupCsvColumn,
})
.ToListAsync();

Expand Down Expand Up @@ -220,7 +225,112 @@ public async Task WriteDataSetFileMeta(Guid subjectId)
.Single(f => f.Type == FileType.Data
&& f.SubjectId == subjectId);
file.DataSetFileMeta = dataSetFileMeta;

file.FilterHierarchies = await GenerateFilterHierarchies(statisticsDbContext, filters);

await contentDbContext.SaveChangesAsync();
}

private static async Task<List<DataSetFileFilterHierarchy>> GenerateFilterHierarchies(
StatisticsDbContext statisticsDbContext,
List<FilterMeta> filters)
{
var rootFilters = filters
.Where(parentFilter => parentFilter.GroupCsvColumn == null
&& filters.Any(childFilter =>
parentFilter.ColumnName == childFilter.GroupCsvColumn));

var hierarchies = new List<DataSetFileFilterHierarchy>();

foreach (var rootFilter in rootFilters)
{
var hierarchy = await GenerateFilterHierarchy(statisticsDbContext, rootFilter, filters);
hierarchies.Add(hierarchy);
}

return hierarchies;
}

private static async Task<DataSetFileFilterHierarchy> GenerateFilterHierarchy(
StatisticsDbContext statisticsDbContext,
FilterMeta rootFilter,
List<FilterMeta> filters)
{
var rootFilterItemIds = statisticsDbContext.FilterItem
.AsNoTracking()
.Where(fi => fi.FilterGroup.FilterId == rootFilter.Id)
.Select(fi => fi.Id)
.ToHashSet();

var childFilterIds = new List<Guid>();
var tiers = new List<Dictionary<Guid, List<Guid>>>();

var parentFilter = rootFilter;
var parentFilterItemIds = rootFilterItemIds;
var childFilter = filters
.Single(f => f.GroupCsvColumn == parentFilter.ColumnName);

while (true) // one iteration of loop per tier
{
var currentParentFilterId = parentFilter.Id; // avoid closure madness
var currentChildFilterId = childFilter.Id;

childFilterIds.Add(currentChildFilterId);

var filterItemRelationships = await statisticsDbContext.FilterItem
.AsNoTracking()
.Where(fi => fi.FilterGroup.FilterId == currentParentFilterId)
.SelectMany(parentFilterItem =>
statisticsDbContext.ObservationFilterItem
.AsNoTracking()
.Where(childOfi =>
childOfi.FilterId == currentChildFilterId
&& statisticsDbContext.ObservationFilterItem.Any(parentOfi =>
childOfi.ObservationId == parentOfi.ObservationId
&& parentOfi.FilterItemId == parentFilterItem.Id))
.Select(childOfi => new
{
FilterItemId = childOfi.FilterItem.Id,
ParentItemId = parentFilterItem.Id,
})
.ToList())
.Distinct()
.ToListAsync();

var tier = new Dictionary<Guid, List<Guid>>();
foreach (var parentFilterItemId in parentFilterItemIds)
{
var childFilterItemIdsForParentItem = filterItemRelationships
.Where(childFilterItem => childFilterItem.ParentItemId == parentFilterItemId)
.Select(childFilterItem => childFilterItem.FilterItemId)
.ToList();

tier.Add(parentFilterItemId, childFilterItemIdsForParentItem);
}

tiers.Add(tier);

// check whether we're finished
var newChildFilter = filters
.SingleOrDefault(newChildFilter => newChildFilter.GroupCsvColumn == childFilter.ColumnName);
if (newChildFilter == null)
{
break;
}

// if not finished, prepare for next iteration of loop
parentFilter = childFilter;
childFilter = newChildFilter;
parentFilterItemIds = filterItemRelationships
.Select(childFilterItem => childFilterItem.FilterItemId)
.ToHashSet();
}

return new DataSetFileFilterHierarchy(
RootFilterId: rootFilter.Id,
ChildFilterIds: childFilterIds,
RootOptionIds: [.. rootFilterItemIds],
Tiers: tiers);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,7 @@ public async Task FilterSubjectMeta_FiltersAndIndicators()
{
SubjectId = releaseSubject.SubjectId,
Type = FileType.Data,
FilterHierarchies = [], // @MarkFix
},
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
using GovUk.Education.ExploreEducationStatistics.Common.Services.Interfaces;
using GovUk.Education.ExploreEducationStatistics.Common.Services.Interfaces.Security;
using GovUk.Education.ExploreEducationStatistics.Common.Utils;
using GovUk.Education.ExploreEducationStatistics.Common.ViewModels;
using GovUk.Education.ExploreEducationStatistics.Content.Model;
using GovUk.Education.ExploreEducationStatistics.Content.Model.Database;
using GovUk.Education.ExploreEducationStatistics.Data.Model;
Expand All @@ -24,6 +25,7 @@
using GovUk.Education.ExploreEducationStatistics.Data.ViewModels.Meta;
using Microsoft.AspNetCore.Mvc;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Query.SqlExpressions;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using static GovUk.Education.ExploreEducationStatistics.Common.Validators.ValidationUtils;
Expand Down Expand Up @@ -198,6 +200,7 @@ private async Task<SubjectMetaViewModel> GetSubjectMetaViewModelFromRequest(
var stopwatch = Stopwatch.StartNew();

var releaseFile = await contentDbContext.ReleaseFiles
.Include(rf => rf.File)
.Where(rf => rf.ReleaseVersionId == releaseSubject.ReleaseVersionId
&& rf.File.SubjectId == releaseSubject.SubjectId
&& rf.File.Type == FileType.Data)
Expand All @@ -222,10 +225,22 @@ await observationService.GetMatchedObservations(
releaseSubject.SubjectId, releaseFile.IndicatorSequence);
logger.LogTrace("Got Indicators in {Time} ms", stopwatch.Elapsed.TotalMilliseconds);

var filterHierarchies = releaseFile.File.FilterHierarchies!
.Select(fh => new DataSetFileFilterHierarchyViewModel(
RootFilterId: fh.RootFilterId,
ChildFilterIds: fh.ChildFilterIds,
RootOptionIds: fh.RootOptionIds,
Tiers: fh.Tiers
.Select((tier, index) =>
new DataSetFileFilterHierarchyTierViewModel(index, tier))
.ToList()
)).ToList();

return new SubjectMetaViewModel
{
Filters = filters,
Indicators = indicators,
FilterHierarchies = filterHierarchies,
};
}
default:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using GovUk.Education.ExploreEducationStatistics.Common.ViewModels;

namespace GovUk.Education.ExploreEducationStatistics.Data.ViewModels.Meta;

public record SubjectMetaViewModel
Expand All @@ -9,4 +11,6 @@ public record SubjectMetaViewModel
public Dictionary<string, LocationsMetaViewModel> Locations { get; set; } = new();

public TimePeriodsMetaViewModel TimePeriod { get; set; } = new();

public List<DataSetFileFilterHierarchyViewModel>? FilterHierarchies { get; set; } = null;
}

0 comments on commit a9da4b4

Please sign in to comment.