diff --git a/infrastructure/templates/public-api/application/public-api/publicApiApp.bicep b/infrastructure/templates/public-api/application/public-api/publicApiApp.bicep index 5bfc5d1ca52..414920edf6e 100644 --- a/infrastructure/templates/public-api/application/public-api/publicApiApp.bicep +++ b/infrastructure/templates/public-api/application/public-api/publicApiApp.bicep @@ -30,6 +30,9 @@ param apiAppRegistrationClientId string @description('Specifies the Application Insights connection string for this Container App to use for its monitoring.') param appInsightsConnectionString string +@description('Whether to create or update Azure Monitor alerts during this deploy') +param deployAlerts bool + @description('Specifies a set of tags with which to tag the resource in Azure.') param tagValues object @@ -146,6 +149,15 @@ module apiContainerAppModule '../../components/containerApp.bicep' = { } } +module containerAppRestartsAlert '../../components/alerts/containerApps/restarts.bicep' = if (deployAlerts) { + name: '${resourceNames.publicApi.apiApp}RestartsDeploy' + params: { + resourceNames: [resourceNames.publicApi.apiApp] + alertsGroupName: resourceNames.existingResources.alertsGroup + tagValues: tagValues + } +} + output containerAppFqdn string = apiContainerAppModule.outputs.containerAppFqdn output containerAppName string = apiContainerAppModule.outputs.containerAppName output healthProbePath string = '/health' diff --git a/infrastructure/templates/public-api/application/public-api/publicApiDataProcessor.bicep b/infrastructure/templates/public-api/application/public-api/publicApiDataProcessor.bicep index 88be79535df..29a9a450634 100644 --- a/infrastructure/templates/public-api/application/public-api/publicApiDataProcessor.bicep +++ b/infrastructure/templates/public-api/application/public-api/publicApiDataProcessor.bicep @@ -6,14 +6,11 @@ param resourceNames ResourceNames @description('Specifies the location for all resources.') param location string -@description('Alert metric name prefix') -param metricsNamePrefix string - @description('The Application Insights key that is associated with this resource') param applicationInsightsKey string @description('Specifies whether or not the Data Processor Function App already exists.') -param dataProcessorFunctionAppExists bool = false +param dataProcessorFunctionAppExists bool @description('Specifies the Application (Client) Id of a pre-existing App Registration used to represent the Data Processor Function App.') param dataProcessorAppRegistrationClientId string @@ -21,6 +18,9 @@ param dataProcessorAppRegistrationClientId string @description('Public API Storage : Firewall rules.') param storageFirewallRules FirewallRule[] = [] +@description('Whether to create or update Azure Monitor alerts during this deploy') +param deployAlerts bool + @description('Specifies a set of tags with which to tag the resource in Azure.') param tagValues object @@ -68,7 +68,6 @@ module dataProcessorFunctionAppModule '../../components/functionApp.bicep' = { functionAppName: resourceNames.publicApi.dataProcessor appServicePlanName: resourceNames.publicApi.dataProcessor storageAccountsNamePrefix: resourceNames.publicApi.dataProcessorStorageAccountsPrefix - alertsGroupName: resourceNames.existingResources.alertsGroup location: location applicationInsightsKey: applicationInsightsKey subnetId: outboundVnetSubnet.id @@ -98,10 +97,7 @@ module dataProcessorFunctionAppModule '../../components/functionApp.bicep' = { family: 'EP' } preWarmedInstanceCount: 1 - healthCheck: { - path: '/api/HealthCheck' - unhealthyMetricName: '${metricsNamePrefix}Unhealthy' - } + healthCheckPath: '/api/HealthCheck' appSettings: { App__MetaInsertBatchSize: 1000 } @@ -117,6 +113,41 @@ module dataProcessorFunctionAppModule '../../components/functionApp.bicep' = { } } +module functionAppHealthAlert '../../components/alerts/sites/healthAlert.bicep' = if (deployAlerts) { + name: '${resourceNames.publicApi.dataProcessor}HealthDeploy' + params: { + resourceNames: [resourceNames.publicApi.dataProcessor] + alertsGroupName: resourceNames.existingResources.alertsGroup + tagValues: tagValues + } +} + +module storageAccountAvailabilityAlerts '../../components/alerts/storageAccounts/availabilityAlert.bicep' = if (deployAlerts) { + name: '${resourceNames.publicApi.dataProcessor}StorageAvailabilityDeploy' + params: { + resourceNames: [ + dataProcessorFunctionAppModule.outputs.managementStorageAccountName + dataProcessorFunctionAppModule.outputs.slot1StorageAccountName + dataProcessorFunctionAppModule.outputs.slot2StorageAccountName + ] + alertsGroupName: resourceNames.existingResources.alertsGroup + tagValues: tagValues + } +} + +module fileServiceAvailabilityAlerts '../../components/alerts/fileServices/availabilityAlert.bicep' = if (deployAlerts) { + name: '${resourceNames.publicApi.dataProcessor}FsAvailabilityDeploy' + params: { + resourceNames: [ + dataProcessorFunctionAppModule.outputs.managementStorageAccountName + dataProcessorFunctionAppModule.outputs.slot1StorageAccountName + dataProcessorFunctionAppModule.outputs.slot2StorageAccountName + ] + alertsGroupName: resourceNames.existingResources.alertsGroup + tagValues: tagValues + } +} + output managedIdentityName string = dataProcessorFunctionAppManagedIdentity.name output managedIdentityClientId string = dataProcessorFunctionAppManagedIdentity.properties.clientId output publicApiDataFileShareMountPath string = publicApiDataFileShareMountPath diff --git a/infrastructure/templates/public-api/application/public-api/publicApiStorage.bicep b/infrastructure/templates/public-api/application/public-api/publicApiStorage.bicep index ce25740625f..744d1217aa7 100644 --- a/infrastructure/templates/public-api/application/public-api/publicApiStorage.bicep +++ b/infrastructure/templates/public-api/application/public-api/publicApiStorage.bicep @@ -6,14 +6,17 @@ param resourceNames ResourceNames param location string @description('Public API Storage : Size of the file share in GB.') -param publicApiDataFileShareQuota int = 1 +param publicApiDataFileShareQuota int @description('Public API Storage : Firewall rules.') -param storageFirewallRules FirewallRule[] = [] +param storageFirewallRules FirewallRule[] @description('Specifies a set of tags with which to tag the resource in Azure.') param tagValues object +@description('Whether to create or update Azure Monitor alerts during this deploy') +param deployAlerts bool + resource vNet 'Microsoft.Network/virtualNetworks@2023-11-01' existing = { name: resourceNames.existingResources.vNet } @@ -56,6 +59,24 @@ module dataFilesFileShareModule '../../components/fileShare.bicep' = { } } +module storageAccountAvailabilityAlert '../../components/alerts/storageAccounts/availabilityAlert.bicep' = if (deployAlerts) { + name: '${resourceNames.publicApi.publicApiStorageAccount}AvailabilityDeploy' + params: { + resourceNames: [resourceNames.publicApi.publicApiStorageAccount] + alertsGroupName: resourceNames.existingResources.alertsGroup + tagValues: tagValues + } +} + +module fileServiceAvailabilityAlert '../../components/alerts/fileServices/availabilityAlert.bicep' = if (deployAlerts) { + name: '${resourceNames.publicApi.publicApiStorageAccount}FsAvailabilityDeploy' + params: { + resourceNames: [resourceNames.publicApi.publicApiStorageAccount] + alertsGroupName: resourceNames.existingResources.alertsGroup + tagValues: tagValues + } +} + output storageAccountName string = publicApiStorageAccountModule.outputs.storageAccountName output connectionStringSecretName string = publicApiStorageAccountModule.outputs.connectionStringSecretName output accessKeySecretName string = publicApiStorageAccountModule.outputs.accessKeySecretName diff --git a/infrastructure/templates/public-api/application/shared/appGateway.bicep b/infrastructure/templates/public-api/application/shared/appGateway.bicep index 9926990eb6a..d93258d1402 100644 --- a/infrastructure/templates/public-api/application/shared/appGateway.bicep +++ b/infrastructure/templates/public-api/application/shared/appGateway.bicep @@ -24,6 +24,9 @@ param routes AppGatewayRoute[] @description('Rules for how the App Gateway should rewrite URLs') param rewrites AppGatewayRewriteSet[] +@description('Whether to create or update Azure Monitor alerts during this deploy') +param deployAlerts bool + @description('Tags for the resources') param tagValues object @@ -51,3 +54,12 @@ module appGatewayModule '../../components/appGateway.bicep' = { tagValues: tagValues } } + +module backendPoolsHealthAlert '../../components/alerts/appGateways/backendPoolHealth.bicep' = if (deployAlerts) { + name: '${resourceNames.sharedResources.appGateway}BackendPoolsHealthDeploy' + params: { + resourceNames: [resourceNames.sharedResources.appGateway] + alertsGroupName: resourceNames.existingResources.alertsGroup + tagValues: tagValues + } +} diff --git a/infrastructure/templates/public-api/application/shared/postgreSqlFlexibleServer.bicep b/infrastructure/templates/public-api/application/shared/postgreSqlFlexibleServer.bicep index 26a15e4ac14..2f9469be5d8 100644 --- a/infrastructure/templates/public-api/application/shared/postgreSqlFlexibleServer.bicep +++ b/infrastructure/templates/public-api/application/shared/postgreSqlFlexibleServer.bicep @@ -31,6 +31,9 @@ param privateEndpointSubnetId string @description('An array of Entra ID admin principal names for this resource') param entraIdAdminPrincipals PrincipalNameAndId[] = [] +@description('Whether to create or update Azure Monitor alerts during this deploy') +param deployAlerts bool + @description('Specifies a set of tags with which to tag the resource in Azure.') param tagValues object @@ -70,6 +73,15 @@ resource maxPreparedTransactionsConfig 'Microsoft.DBforPostgreSQL/flexibleServer ] } +module databaseAliveAlert '../../components/alerts/flexibleServers/databaseAlive.bicep' = if (deployAlerts) { + name: '${resourceNames.sharedResources.postgreSqlFlexibleServer}DbAliveDeploy' + params: { + resourceNames: [resourceNames.sharedResources.postgreSqlFlexibleServer] + alertsGroupName: resourceNames.existingResources.alertsGroup + tagValues: tagValues + } +} + var managedIdentityConnectionStringTemplate = postgreSqlServerModule.outputs.managedIdentityConnectionStringTemplate var dataProcessorPsqlConnectionStringSecretKey = 'ees-publicapi-data-processor-connectionstring-publicdatadb' diff --git a/infrastructure/templates/public-api/ci/azure-pipelines.yml b/infrastructure/templates/public-api/ci/azure-pipelines.yml index 175f98d62e6..f3d3a984229 100644 --- a/infrastructure/templates/public-api/ci/azure-pipelines.yml +++ b/infrastructure/templates/public-api/ci/azure-pipelines.yml @@ -7,6 +7,9 @@ parameters: - name: updatePsqlFlexibleServer displayName: Does the PostgreSQL Flexible Server require any updates? False by default to avoid unnecessarily lengthy deploys. default: false + - name: deployAlerts + displayName: Whether to create or update Azure Monitor alerts during this deploy. + default: false - name: forceDeployToEnvironment displayName: Set to either dev or test to force a deploy to that environment from the chosen branch. type: string @@ -42,6 +45,8 @@ variables: value: ${{ parameters.deployContainerApp }} - name: updatePsqlFlexibleServer value: ${{ parameters.updatePsqlFlexibleServer }} + - name: deployAlerts + value: ${{ parameters.deployAlerts }} pool: vmImage: $(vmImageName) diff --git a/infrastructure/templates/public-api/ci/jobs/deploy-infrastructure.yml b/infrastructure/templates/public-api/ci/jobs/deploy-infrastructure.yml index 6851c86dec0..4deeb28a247 100644 --- a/infrastructure/templates/public-api/ci/jobs/deploy-infrastructure.yml +++ b/infrastructure/templates/public-api/ci/jobs/deploy-infrastructure.yml @@ -37,6 +37,7 @@ jobs: parameterFile: $(paramFile) deployContainerApp: true updatePsqlFlexibleServer: false + deployAlerts: false dataProcessorExists: true - task: AzureCLI@2 @@ -63,6 +64,7 @@ jobs: parameterFile: $(paramFile) deployContainerApp: $(deployContainerApp) updatePsqlFlexibleServer: $(updatePsqlFlexibleServer) + deployAlerts: $(deployAlerts) dataProcessorExists: $(dataProcessorExists) # - template: ../tasks/assign-app-role-to-service-principal.yml diff --git a/infrastructure/templates/public-api/ci/tasks/deploy-bicep.yml b/infrastructure/templates/public-api/ci/tasks/deploy-bicep.yml index add810c13cc..fa5ceeaa331 100644 --- a/infrastructure/templates/public-api/ci/tasks/deploy-bicep.yml +++ b/infrastructure/templates/public-api/ci/tasks/deploy-bicep.yml @@ -13,13 +13,12 @@ parameters: type: string - name: deployContainerApp type: string - default: true - name: updatePsqlFlexibleServer type: string - default: false + - name: deployAlerts + type: string - name: dataProcessorExists type: string - default: true steps: - task: AzureCLI@2 @@ -48,6 +47,7 @@ steps: dockerImagesTag='$(resources.pipeline.MainBuild.runName)' \ deployContainerApp=${{ parameters.deployContainerApp }} \ updatePsqlFlexibleServer=${{ parameters.updatePsqlFlexibleServer }} \ + deployAlerts=${{ parameters.deployAlerts }} \ dataProcessorFunctionAppExists=${{ parameters.dataProcessorExists }} \ dataProcessorAppRegistrationClientId='$(dataProcessorAppRegistrationClientId)' \ apiAppRegistrationClientId='$(apiAppRegistrationClientId)' diff --git a/infrastructure/templates/public-api/components/alerts/appGateways/backendPoolHealth.bicep b/infrastructure/templates/public-api/components/alerts/appGateways/backendPoolHealth.bicep new file mode 100644 index 00000000000..3fabc48a219 --- /dev/null +++ b/infrastructure/templates/public-api/components/alerts/appGateways/backendPoolHealth.bicep @@ -0,0 +1,33 @@ +import { Severity } from '../types.bicep' + +@description('Names of the resources that these alerts are being applied to.') +param resourceNames string[] + +@description('The alert severity.') +param severity Severity = 'Critical' + +@description('Name of the Alerts Group used to send alert messages.') +param alertsGroupName string + +@description('Tags with which to tag the resource in Azure.') +param tagValues object + +module alerts '../staticMetricAlert.bicep' = [for name in resourceNames: { + name: '${name}BackendHealthAlertModule' + params: { + alertName: '${name}-backend-pool-health' + resourceIds: [resourceId('Microsoft.Network/applicationGateways', name)] + resourceType: 'Microsoft.Network/applicationGateways' + query: { + metric: 'UnhealthyHostCount' + aggregation: 'Total' + operator: 'GreaterThan' + threshold: 0 + } + evaluationFrequency: 'PT1M' + windowSize: 'PT5M' + severity: severity + alertsGroupName: alertsGroupName + tagValues: tagValues + } +}] diff --git a/infrastructure/templates/public-api/components/alerts/containerApps/restarts.bicep b/infrastructure/templates/public-api/components/alerts/containerApps/restarts.bicep new file mode 100644 index 00000000000..fb79c183cfd --- /dev/null +++ b/infrastructure/templates/public-api/components/alerts/containerApps/restarts.bicep @@ -0,0 +1,33 @@ +import { Severity } from '../types.bicep' + +@description('Names of the resources that these alerts are being applied to.') +param resourceNames string[] + +@description('The alert severity.') +param severity Severity = 'Warning' + +@description('Name of the Alerts Group used to send alert messages.') +param alertsGroupName string + +@description('Tags with which to tag the resource in Azure.') +param tagValues object + +module alerts '../staticMetricAlert.bicep' = [for name in resourceNames: { + name: '${name}RestartsAlertModule' + params: { + alertName: '${name}-restarts' + resourceIds: [resourceId('Microsoft.App/containerApps', name)] + resourceType: 'Microsoft.App/containerApps' + query: { + metric: 'RestartCount' + aggregation: 'Total' + operator: 'GreaterThan' + threshold: 0 + } + evaluationFrequency: 'PT1M' + windowSize: 'PT5M' + severity: severity + alertsGroupName: alertsGroupName + tagValues: tagValues + } +}] diff --git a/infrastructure/templates/public-api/components/alerts/fileServices/availabilityAlert.bicep b/infrastructure/templates/public-api/components/alerts/fileServices/availabilityAlert.bicep new file mode 100644 index 00000000000..a4f4e3c7714 --- /dev/null +++ b/infrastructure/templates/public-api/components/alerts/fileServices/availabilityAlert.bicep @@ -0,0 +1,33 @@ +import { Severity } from '../types.bicep' + +@description('Names of the resources that these alerts are being applied to.') +param resourceNames string[] + +@description('The alert severity.') +param severity Severity = 'Critical' + +@description('Name of the Alerts Group used to send alert messages.') +param alertsGroupName string + +@description('Tags with which to tag the resource in Azure.') +param tagValues object + +module alerts '../staticMetricAlert.bicep' = [for name in resourceNames: { + name: '${name}FsAvailabilityAlertModule' + params: { + alertName: '${name}-fileservice-availability' + resourceIds: [resourceId('Microsoft.Storage/storageAccounts/fileServices', name, 'default')] + resourceType: 'Microsoft.Storage/storageAccounts/fileServices' + query: { + metric: 'availability' + aggregation: 'Average' + operator: 'LessThan' + threshold: 100 + } + evaluationFrequency: 'PT1M' + windowSize: 'PT5M' + severity: severity + alertsGroupName: alertsGroupName + tagValues: tagValues + } +}] diff --git a/infrastructure/templates/public-api/components/alerts/flexibleServers/databaseAlive.bicep b/infrastructure/templates/public-api/components/alerts/flexibleServers/databaseAlive.bicep new file mode 100644 index 00000000000..21358742e02 --- /dev/null +++ b/infrastructure/templates/public-api/components/alerts/flexibleServers/databaseAlive.bicep @@ -0,0 +1,33 @@ +import { Severity } from '../types.bicep' + +@description('Names of the resources that these alerts are being applied to.') +param resourceNames string[] + +@description('The alert severity.') +param severity Severity = 'Critical' + +@description('Name of the Alerts Group used to send alert messages.') +param alertsGroupName string + +@description('Tags with which to tag the resource in Azure.') +param tagValues object + +module alerts '../staticMetricAlert.bicep' = [for name in resourceNames: { + name: '${name}DbAliveAlertModule' + params: { + alertName: '${name}-database-alive' + resourceIds: [resourceId('Microsoft.DBforPostgreSQL/flexibleServers', name)] + resourceType: 'Microsoft.DBforPostgreSQL/flexibleServers' + query: { + metric: 'is_db_alive' + aggregation: 'Minimum' + operator: 'LessThan' + threshold: 1 + } + evaluationFrequency: 'PT1M' + windowSize: 'PT5M' + severity: severity + alertsGroupName: alertsGroupName + tagValues: tagValues + } +}] diff --git a/infrastructure/templates/public-api/components/alerts/sites/healthAlert.bicep b/infrastructure/templates/public-api/components/alerts/sites/healthAlert.bicep new file mode 100644 index 00000000000..f7af74839dd --- /dev/null +++ b/infrastructure/templates/public-api/components/alerts/sites/healthAlert.bicep @@ -0,0 +1,44 @@ +import { EvaluationFrequency, WindowSize, Severity } from '../types.bicep' + +@description('Names of the resources that these alerts are being applied to.') +param resourceNames string[] + +@description('Type of the resource that this alert is being applied to.') +param resourceType + | 'Microsoft.Web/sites' + | 'Microsoft.Web/sites/slots' = 'Microsoft.Web/sites' + +@description('The evaluation frequency.') +param evaluationFrequency EvaluationFrequency = 'PT1M' + +@description('The window size.') +param windowSize WindowSize = 'PT5M' + +@description('Name of the Alerts Group used to send alert messages.') +param alertsGroupName string + +@description('Severity level of the alert.') +param severity Severity = 'Critical' + +@description('Tags with which to tag the resource in Azure.') +param tagValues object + +module metricAlertModule '../staticMetricAlert.bicep' = [for name in resourceNames: { + name: '${replace(name, '/', '-')}HealthAlertModule' + params: { + alertName: '${replace(name, '/', '-')}-health' + alertsGroupName: alertsGroupName + resourceIds: [resourceId(resourceType, name)] + resourceType: resourceType + query: { + metric: 'HealthCheckStatus' + aggregation: 'Minimum' + operator: 'LessThan' + threshold: 100 + } + evaluationFrequency: evaluationFrequency + windowSize: windowSize + severity: severity + tagValues: tagValues + } +}] diff --git a/infrastructure/templates/public-api/components/alerts/staticMetricAlert.bicep b/infrastructure/templates/public-api/components/alerts/staticMetricAlert.bicep new file mode 100644 index 00000000000..324df78ee6b --- /dev/null +++ b/infrastructure/templates/public-api/components/alerts/staticMetricAlert.bicep @@ -0,0 +1,70 @@ +import { EvaluationFrequency, MetricName, StaticMetricOperator, ResourceType, TimeAggregation, WindowSize, Severity, severityMapping } from 'types.bicep' + +@description('Name of the alert.') +param alertName string + +@description('Ids of the resources that this alert is being applied to.') +param resourceIds string[] + +@description('Type of the resource that this alert is being applied to.') +param resourceType ResourceType + +@description('The query being used to test if the alert should be fired.') +param query { + metric: MetricName + aggregation: TimeAggregation + operator: StaticMetricOperator + threshold: int +} + +@description('The evaluation frequency.') +param evaluationFrequency EvaluationFrequency = 'PT1M' + +@description('The window size.') +param windowSize WindowSize = 'PT5M' + +@description('The alert severity.') +param severity Severity = 'Error' + +@description('Name of the Alerts Group used to send alert messages.') +param alertsGroupName string + +@description('Tags with which to tag the resource in Azure.') +param tagValues object + +var severityLevel = severityMapping[severity] + +resource alertsActionGroup 'Microsoft.Insights/actionGroups@2023-01-01' existing = { + name: alertsGroupName +} + +resource metricAlertRule 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: alertName + location: 'Global' + properties: { + enabled: true + scopes: resourceIds + severity: severityLevel + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + 'odata.type': length(resourceIds) > 1 ? 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria' : 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' + allOf: [{ + criterionType: 'StaticThresholdCriterion' + name: 'Metric1' + metricName: query.metric + metricNamespace: resourceType + timeAggregation: query.aggregation + operator: query.operator + threshold: query.threshold + skipMetricValidation: false + }] + } + actions: [ + { + actionGroupId: alertsActionGroup.id + } + ] + } + tags: tagValues +} diff --git a/infrastructure/templates/public-api/components/alerts/storageAccounts/availabilityAlert.bicep b/infrastructure/templates/public-api/components/alerts/storageAccounts/availabilityAlert.bicep new file mode 100644 index 00000000000..d71ce969f80 --- /dev/null +++ b/infrastructure/templates/public-api/components/alerts/storageAccounts/availabilityAlert.bicep @@ -0,0 +1,33 @@ +import { Severity } from '../types.bicep' + +@description('Names of the resources that these alerts are being applied to.') +param resourceNames string[] + +@description('The alert severity.') +param severity Severity = 'Critical' + +@description('Name of the Alerts Group used to send alert messages.') +param alertsGroupName string + +@description('Tags with which to tag the resource in Azure.') +param tagValues object + +module alerts '../staticMetricAlert.bicep' = [for name in resourceNames: { + name: '${name}AvailabilityAlertModule' + params: { + alertName: '${name}-availability' + resourceIds: [resourceId('Microsoft.Storage/storageAccounts', name)] + resourceType: 'Microsoft.Storage/storageAccounts' + query: { + metric: 'availability' + aggregation: 'Average' + operator: 'LessThan' + threshold: 100 + } + evaluationFrequency: 'PT1M' + windowSize: 'PT5M' + severity: severity + alertsGroupName: alertsGroupName + tagValues: tagValues + } +}] diff --git a/infrastructure/templates/public-api/components/alerts/types.bicep b/infrastructure/templates/public-api/components/alerts/types.bicep new file mode 100644 index 00000000000..c767d9d5f06 --- /dev/null +++ b/infrastructure/templates/public-api/components/alerts/types.bicep @@ -0,0 +1,68 @@ +@export() +type EvaluationFrequency = 'PT1M' + +@export() +type WindowSize = 'PT5M' + +@export() +type DynamicMetricOperator = + | 'GreaterOrLessThan' + | 'GreaterThan' + | 'LessThan' + + +@export() +type StaticMetricOperator = + | 'Equals' + | 'GreaterThan' + | 'GreaterThanOrEqual' + | 'LessThan' + | 'LessThanOrEqual' + +@export() +type TimeAggregation = + | 'Average' + | 'Count' + | 'Maximum' + | 'Minimum' + | 'Total' + +@export() +type Severity = + | 'Critical' + | 'Error' + | 'Warning' + | 'Informational' + | 'Verbose' + +@export() +var severityMapping = { + Critical: 0 + Error: 1 + Warning: 2 + Informational: 3 + Verbose: 4 +} + +@export() +type ResourceType = + | 'Microsoft.App/containerApps' + | 'Microsoft.DBforPostgreSQL/flexibleServers' + | 'Microsoft.Network/applicationGateways' + | 'Microsoft.Sql/servers/databases' + | 'Microsoft.Storage/storageAccounts' + | 'Microsoft.Storage/storageAccounts/fileServices' + | 'Microsoft.Web/sites' + | 'Microsoft.Web/sites/slots' + +@export() +type MetricName = + | 'availability' + | 'blocked_by_firewall' + | 'connection_failed' + | 'connections_failed' + | 'cpu_percent' + | 'HealthCheckStatus' + | 'is_db_alive' + | 'RestartCount' + | 'UnhealthyHostCount' diff --git a/infrastructure/templates/public-api/components/functionApp.bicep b/infrastructure/templates/public-api/components/functionApp.bicep index 9807d5e75bd..4e274ad4cdf 100644 --- a/infrastructure/templates/public-api/components/functionApp.bicep +++ b/infrastructure/templates/public-api/components/functionApp.bicep @@ -12,9 +12,6 @@ param appServicePlanName string @description('Specifies the name prefix for all storage accounts') param storageAccountsNamePrefix string -@description('Specifies the name of an alerts group for reporting metric alerts') -param alertsGroupName string - @description('Function App Plan : operating system') param appServicePlanOS 'Windows' | 'Linux' = 'Linux' @@ -64,11 +61,8 @@ param preWarmedInstanceCount int? @description('Specifies whether or not the Function App will always be on and not idle after periods of no traffic - must be compatible with the chosen hosting plan') param alwaysOn bool? -@description('Specifies configuration for setting up automatic health checks and metric alerts') -param healthCheck { - path: string - unhealthyMetricName: string -}? +@description('Specifies an optional URL for Azure to use to monitor the health of this resource') +param healthCheckPath string? @description('Specifies additional Azure Storage Accounts to make available to this Function App') param azureFileShares AzureFileShareMount[] = [] @@ -190,7 +184,7 @@ var commonSiteProperties = { reserved: reserved siteConfig: { alwaysOn: alwaysOn ?? null - healthCheckPath: healthCheck != null ? healthCheck!.path : null + healthCheckPath: healthCheckPath preWarmedInstanceCount: preWarmedInstanceCount ?? null netFrameworkVersion: '8.0' linuxFxVersion: appServicePlanOS == 'Linux' ? 'DOTNET-ISOLATED|8.0' : null @@ -232,68 +226,6 @@ module azureAuthentication 'siteAzureAuthentication.bicep' = if (entraIdAuthenti } } -resource alertsActionGroup 'Microsoft.Insights/actionGroups@2023-01-01' existing = { - name: alertsGroupName -} - -var commonUnhealthyMetricAlertRuleProperties = { - enabled: true - severity: 1 - evaluationFrequency: 'PT1M' - windowSize: 'PT5M' - criteria: { - 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' - allOf: [ - { - name: 'Metric1' - criterionType: 'StaticThresholdCriterion' - metricName: 'HealthCheckStatus' - timeAggregation: 'Minimum' - operator: 'LessThan' - threshold: 100 - skipMetricValidation: false - } - ] - } - actions: [ - { - actionGroupId: alertsActionGroup.id - } - ] -} - -resource functionAppUnhealthyMetricAlertRule 'Microsoft.Insights/metricAlerts@2018-03-01' = if (healthCheck != null) { - name: healthCheck!.unhealthyMetricName - location: 'Global' - properties: union(commonUnhealthyMetricAlertRuleProperties, { - scopes: [functionApp.id] - criteria: { - allOf: [union( - commonUnhealthyMetricAlertRuleProperties.criteria.allOf[0], - { - metricNamespace: 'Microsoft.Web/sites' - } - )] - } - }) -} - -resource stagingSlotUnhealthyMetricAlertRule 'Microsoft.Insights/metricAlerts@2018-03-01' = if (healthCheck != null) { - name: '${healthCheck!.unhealthyMetricName}Staging' - location: 'Global' - properties: union(commonUnhealthyMetricAlertRuleProperties, { - scopes: [stagingSlot.id] - criteria: { - allOf: [union( - commonUnhealthyMetricAlertRuleProperties.criteria.allOf[0], - { - metricNamespace: 'Microsoft.Web/sites/slots' - } - )] - } - }) -} - // Allow Key Vault references passed as secure appsettings to be resolved by the Function App and its deployment slots. // Where the staging slot's managed identity differs from the main slot's managed identity, add its id to the list. var keyVaultPrincipalIds = userAssignedManagedIdentityParams != null @@ -431,3 +363,6 @@ module privateEndpointModule 'privateEndpoint.bicep' = if (privateEndpointSubnet } output functionAppName string = functionApp.name +output managementStorageAccountName string = sharedStorageAccountName +output slot1StorageAccountName string = slot1StorageAccountName +output slot2StorageAccountName string = slot2StorageAccountName diff --git a/infrastructure/templates/public-api/main.bicep b/infrastructure/templates/public-api/main.bicep index d3df11ca2b1..92c0cafd7e0 100644 --- a/infrastructure/templates/public-api/main.bicep +++ b/infrastructure/templates/public-api/main.bicep @@ -75,6 +75,8 @@ param deployContainerApp bool = true @description('Does the PostgreSQL Flexible Server require any updates? False by default to avoid unnecessarily lengthy deploys.') param updatePsqlFlexibleServer bool = false +param deployAlerts bool = false + @description('Public URLs of other components in the service.') param publicUrls { contentApi: string @@ -183,6 +185,7 @@ module publicApiStorageModule 'application/public-api/publicApiStorage.bicep' = resourceNames: resourceNames publicApiDataFileShareQuota: publicApiDataFileShareQuota storageFirewallRules: storageFirewallRules + deployAlerts: deployAlerts tagValues: tagValues } } @@ -218,6 +221,7 @@ module postgreSqlServerModule 'application/shared/postgreSqlFlexibleServer.bicep firewallRules: postgreSqlFirewallRules sku: postgreSqlSkuName storageSizeGB: postgreSqlStorageSizeGB + deployAlerts: deployAlerts tagValues: tagValues } dependsOn: [ @@ -262,6 +266,7 @@ module apiAppModule 'application/public-api/publicApiApp.bicep' = if (deployCont publicSiteUrl: publicUrls.publicSite dockerImagesTag: dockerImagesTag appInsightsConnectionString: appInsightsModule.outputs.appInsightsConnectionString + deployAlerts: deployAlerts tagValues: tagValues } dependsOn: [ @@ -356,6 +361,7 @@ module appGatewayModule 'application/shared/appGateway.bicep' = if (deployContai ] } ] + deployAlerts: deployAlerts tagValues: tagValues } } @@ -365,11 +371,11 @@ module dataProcessorModule 'application/public-api/publicApiDataProcessor.bicep' params: { location: location resourceNames: resourceNames - metricsNamePrefix: '${subscription}PublicDataProcessor' applicationInsightsKey: appInsightsModule.outputs.appInsightsKey dataProcessorAppRegistrationClientId: dataProcessorAppRegistrationClientId storageFirewallRules: storageFirewallRules dataProcessorFunctionAppExists: dataProcessorFunctionAppExists + deployAlerts: deployAlerts tagValues: tagValues } dependsOn: [