From 1a6d5cde71c181c3e72fe48b8e410967dc638557 Mon Sep 17 00:00:00 2001 From: Jon Maxwell Diebold Date: Wed, 23 Oct 2024 21:13:49 -0400 Subject: [PATCH 01/25] EQ-437 Prototyping full text search functionality --- app/client/package.json | 2 +- app/client/src/routes/home.tsx | 62 ++++- app/client/src/types/index.ts | 10 +- app/server/app/content/config/fields.json | 33 ++- .../app/content/config/listOptions.json | 1 + app/server/app/content/config/profiles.json | 21 ++ app/server/app/middleware.js | 7 +- app/server/app/routes/attains.js | 257 ++++++++++++++---- etl/app/content-private/tableConfig.json | 105 +++++++ 9 files changed, 426 insertions(+), 72 deletions(-) diff --git a/app/client/package.json b/app/client/package.json index 55a2b7a3..f722b1c6 100644 --- a/app/client/package.json +++ b/app/client/package.json @@ -20,7 +20,7 @@ }, "type": "module", "scripts": { - "dev": "vite", + "dev": "vite --host", "build": "vite build", "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0", "preview": "vite preview" diff --git a/app/client/src/routes/home.tsx b/app/client/src/routes/home.tsx index e78e0260..2fb7be8a 100644 --- a/app/client/src/routes/home.tsx +++ b/app/client/src/routes/home.tsx @@ -41,6 +41,7 @@ import type { Option, SingleOptionField, SingleValueField, + SingleValueRangeField, StaticOptions, Status, Value, @@ -587,7 +588,8 @@ function FilterFieldInputs({ otherField.domain === fieldConfig.domain, ); // All range inputs should have a high and a low boundary field - if (!pairedField || !isSingleValueField(pairedField)) return null; + if (!pairedField || !isSingleValueRangeField(pairedField)) + return null; return [ , fieldConfig.domain, ]; + case 'text': + return [ +
+ + + {tooltip && ( + + )} + +
+ +
+
, + fieldConfig.key, + ]; default: return null; } @@ -618,11 +654,17 @@ function FilterFieldInputs({ return (
- {fieldsJsx.map(([field, key]) => ( -
- {field} -
- ))} + {fieldsJsx.map(([field, key]) => + key === 'docTxt' ? ( +
+ {field} +
+ ) : ( +
+ {field} +
+ ), + )}
); } @@ -1683,10 +1725,16 @@ function isSingleOptionField(field: FilterField): field is SingleOptionField { } // Type narrowing -function isSingleValueField(field: FilterField): field is SingleValueField { +function isSingleValueRangeField( + field: FilterField, +): field is SingleValueRangeField { return field.type === 'date' || field.type === 'year'; } +function isSingleValueField(field: FilterField): field is SingleValueField { + return field.type === 'text' || isSingleValueRangeField(field); +} + // Type narrowing function isYearField(field: FilterField) { return field.type === 'year'; diff --git a/app/client/src/types/index.ts b/app/client/src/types/index.ts index 50de6a10..8d2a9b9a 100644 --- a/app/client/src/types/index.ts +++ b/app/client/src/types/index.ts @@ -26,7 +26,7 @@ type BaseFilterFieldConfig = { label: string; secondaryKey?: string; source?: string; - type: 'date' | 'multiselect' | 'select' | 'year'; + type: 'date' | 'multiselect' | 'select' | 'text' | 'year'; }; // Fields provided in the `domainValues` of the Content context @@ -84,7 +84,13 @@ export type SingleOptionField = BaseFilterFieldConfig & { type: 'select'; }; -export type SingleValueField = BaseFilterFieldConfig & { +export type SingleValueField = SingleValueTextField | SingleValueRangeField; + +export type SingleValueTextField = BaseFilterFieldConfig & { + type: 'text'; +}; + +export type SingleValueRangeField = BaseFilterFieldConfig & { boundary: 'low' | 'high'; domain: string; type: 'date' | 'year'; diff --git a/app/server/app/content/config/fields.json b/app/server/app/content/config/fields.json index 86101384..19e8409d 100644 --- a/app/server/app/content/config/fields.json +++ b/app/server/app/content/config/fields.json @@ -210,6 +210,11 @@ "label": "Delisted Reason", "type": "multiselect" }, + "docTxt": { + "key": "docTxt", + "label": "Search Text / Keyword", + "type": "text" + }, "epaIrCategory": { "key": "epaIrCategory", "label": "EPA IR Category", @@ -493,6 +498,7 @@ "assessmentUnit": "Search for a specific Assessment Unit", "associatedAction": "Search by Associated Action", "catchmentAssessmentUnit": "Search by Assessment Unit or NHDPlus Catchment", + "documentText": "Search by Document Text", "impairmentCause": "Search by Cause of Impairment", "impairmentSource": "Search by Probable Source of Impairment", "overallStatus": "Search by Overall Status", @@ -532,10 +538,21 @@ { "key": "actionsAssessmentUnit", "fields": ["assessmentUnitId"] }, { "key": "timeFrame", - "fields": [ - "fiscalYearEstablishedLo", - "fiscalYearEstablishedHi" - ] + "fields": ["fiscalYearEstablishedLo", "fiscalYearEstablishedHi"] + } + ], + "actionsDocuments": [ + { + "key": "documentText", + "fields": ["docTxt"] + }, + { + "key": "areaOfInterest", + "fields": ["region", "state", "organizationType", "organizationId"] + }, + { + "key": "action", + "fields": ["actionId"] } ], "assessments": [ @@ -706,7 +723,13 @@ }, { "key": "pollutantParameter", - "fields": ["pollutantGroup", "pollutant", "addressedParameterGroup", "addressedParameter", "sourceType"] + "fields": [ + "pollutantGroup", + "pollutant", + "addressedParameterGroup", + "addressedParameter", + "sourceType" + ] }, { "key": "permitId", "fields": ["npdesIdentifier", "otherIdentifier"] }, { diff --git a/app/server/app/content/config/listOptions.json b/app/server/app/content/config/listOptions.json index ae0fdcfc..b422bb44 100644 --- a/app/server/app/content/config/listOptions.json +++ b/app/server/app/content/config/listOptions.json @@ -10,6 +10,7 @@ ], "dataProfile": [ { "value": "actions", "label": "Actions" }, + { "value": "actionsDocuments", "label": "Actions Document Search" }, { "value": "assessmentUnits", "label": "Assessment Units" }, { "value": "assessmentUnitsMonitoringLocations", diff --git a/app/server/app/content/config/profiles.json b/app/server/app/content/config/profiles.json index 2b021e6b..c3e3d53b 100644 --- a/app/server/app/content/config/profiles.json +++ b/app/server/app/content/config/profiles.json @@ -30,6 +30,27 @@ "label": "Actions", "resource": "actions" }, + "actionsDocuments": { + "key": "actionsDocuments", + "description": "Contains information on documents associated with actions to restore and protect water quality.", + "columns": [ + "objectId", + "actionId", + "actionName", + "docId", + "docUrl", + "docFilename", + "docErrors", + "docTxt", + "organizationId", + "organizationName", + "organizationType", + "region", + "state" + ], + "label": "Actions Document Search", + "resource": "actionsDocuments" + }, "assessments": { "key": "assessments", "description": "Contains detailed information on waters assessed under Section 305(b) of the Clean Water Act and waters listed as impaired under Section 303(d) of the Clean Water Act. This includes assessed uses and parameter attainments.", diff --git a/app/server/app/middleware.js b/app/server/app/middleware.js index 609fa718..088259ef 100644 --- a/app/server/app/middleware.js +++ b/app/server/app/middleware.js @@ -63,7 +63,12 @@ async function getActiveSchema(req, res, next) { ); // Add activeSchema to the request object - req.activeSchema = schema.schema_name; + // TODO: Revert this when documents brought in with other data. + if (req.path.includes('/actionsDocuments')) { + req.activeSchema = 'search'; + } else { + req.activeSchema = schema.schema_name; + } next(); } catch (error) { diff --git a/app/server/app/routes/attains.js b/app/server/app/routes/attains.js index d3da6b09..472206b7 100644 --- a/app/server/app/routes/attains.js +++ b/app/server/app/routes/attains.js @@ -45,10 +45,10 @@ class InvalidParameterException extends Error { } class LimitExceededException extends Error { - constructor(limit) { + constructor(value, maximum = maxQuerySize) { super(); this.httpStatusCode = 400; - this.message = `The provided limit (${limit}) exceeds the maximum ${process.env.MAX_VALUES_QUERY_SIZE} allowable limit.`; + this.message = `The provided limit (${value.toLocaleString()}) exceeds the maximum ${maximum.toLocaleString()} allowable limit.`; } } @@ -100,10 +100,12 @@ function getColumnsFromAliases(columnAliases, profile) { } /** Get a subquery if "Latest" is used - * @param {Object} query KnexJS query object + * @param {Express.Request} req + * @param {Object} profile definition of the profile being queried + * @param {Object} params URL query value * @param {Object} columnName name of the "Latest" column * @param {Object} columnType data type of the "Latest" column - * @returns {Object} a different KnexJS query object + * @returns {Object} an updated KnexJS query object */ function createLatestSubquery(req, profile, params, columnName, columnType) { if (!['date', 'numeric', 'timestamptz'].includes(columnType)) return; @@ -155,13 +157,20 @@ function createLatestSubquery(req, profile, params, columnName, columnType) { /** * Creates a stream object from a query. * @param {Object} query KnexJS query object - * @param {Express.Response} req + * @param {Express.Request} req * @param {Express.Response} res * @param {string} format the format of the file attachment * @param {Object} excelDoc Excel workbook and worksheet objects * @param {number} nextId starting objectid for the next page */ -async function createStream(query, req, res, format, wbObject, nextId) { +async function createStream( + query, + req, + res, + format, + wbObject = null, + nextId = null, +) { pool.connect((err, client, done) => { if (err) throw err; @@ -292,7 +301,7 @@ function getQueryParams(req) { } // organize GET parameters to follow what we expect from POST - const optionsParams = ['f', 'format', 'startId']; + const optionsParams = ['f', 'format', 'limit', 'startId']; const parameters = { filters: {}, options: {}, @@ -311,6 +320,7 @@ function getQueryParams(req) { /** * Builds the select clause and where clause of the query based on the provided * profile name. + * @param {express.Request} req * @param {Object} query KnexJS query object * @param {Object} profile definition of the profile being queried * @param {Object} queryParams URL query value @@ -339,16 +349,30 @@ function parseCriteria(req, query, profile, queryParams, countOnly = false) { if (!columns.includes('objectId')) columns.push('objectId'); columns.forEach((col) => { const profileCol = profile.columns.find((pc) => pc.alias === col); - if (profileCol) columnsToReturn.push(profileCol); + if (profileCol && profileCol.output !== false) + columnsToReturn.push(profileCol); }); // build the select query const selectColumns = - columnsToReturn.length > 0 ? columnsToReturn : profile.columns; + columnsToReturn.length > 0 + ? columnsToReturn + : profile.columns.filter((col) => col.output !== false); const selectText = selectColumns.map((col) => col.name === col.alias ? col.name : `${col.name} AS ${col.alias}`, ); - query.select(selectText).orderBy('objectid', 'asc'); + if (profile.tableName === 'actions_documents') { + const rankText = 'ts_rank_cd(doc_tsv, query, 1 | 32)'; + query + .select( + knex.raw( + `to_char(${rankText} * 100, 'FM999') || '%' AS rank, ${selectText}`, + ), + ) + .orderBy(knex.raw(rankText), 'desc'); + } else { + query.select(selectText).orderBy('objectid', 'asc'); + } } // build where clause of the query @@ -359,7 +383,15 @@ function parseCriteria(req, query, profile, queryParams, countOnly = false) { if (lowArg || highArg) { appendRangeToWhere(query, col, lowArg, highArg); } else if (exactArg !== undefined) { - appendToWhere(query, col.name, queryParams.filters[col.alias]); + if (profile.tableName === 'actions_documents' && col.alias === 'docTxt') { + query.fromRaw( + `${req.activeSchema}.${profile.tableName}, websearch_to_tsquery(?) query`, + [exactArg], + ); + query.whereRaw('query @@ doc_tsv'); + } else { + appendToWhere(query, col.name, exactArg); + } } }); @@ -367,6 +399,104 @@ function parseCriteria(req, query, profile, queryParams, countOnly = false) { // add the "latest" subquery to the where clause query.whereIn(['organizationid', latestColumn.name], subQuery); } + + log.debug('query:', query.toString()); +} + +async function executeDocSearch(profile, req, res, preview = false) { + const metadataObj = populateMetdataObjFromRequest(req); + + // output types csv, tab-separated, Excel, or JSON + try { + const queryParams = getQueryParams(req); + validateQueryParams(queryParams, profile); + + const { options: { limit = maxQuerySize } = {} } = queryParams; + + if (limit > maxQuerySize) { + throw new LimitExceededException(queryParams.options.limit); + } + + const query = knex + .withSchema(req.activeSchema) + .from(profile.tableName) + .limit(limit); + + // verify atleast 1 parameter was provided, excluding the columns parameter + if ( + (!queryParams.columns || queryParams.columns.length === 0) && + Object.keys(queryParams.filters).length === 0 && + Object.keys(queryParams.options).length === 0 + ) { + throw new NoParametersException('Please provide at least one parameter'); + } + + parseCriteria(req, query, profile, queryParams); + + // Check that the query doesn't exceed the MAX_QUERY_SIZE. + if (!queryParams.options.limit && (await exceedsMaxSize(query))) { + return res.status(200).json({ + message: `The current query exceeds the maximum query size of ${maxQuerySize.toLocaleString()} rows. Please refine the search, or visit ${ + process.env.SERVER_URL + }/national-downloads to download a compressed dataset`, + }); + } + + const format = queryParams.options.format ?? queryParams.options.f; + if (['csv', 'tsv', 'xlsx'].includes(format)) { + await streamFile(query, req, res, format, profile.tableName); + } else { + await createStream(query, req, res, 'json'); + } + } catch (error) { + log.error( + formatLogMsg( + metadataObj, + `Failed to get data from the "${profile.tableName}" table:`, + error, + ), + ); + return res + .status(error.httpStatusCode ?? 500) + .json({ error: error.toString() }); + } +} + +async function executeDocSearchCountOnly(profile, req, res) { + const metadataObj = populateMetdataObjFromRequest(req); + + // always return json with the count + try { + const queryParams = getQueryParams(req); + validateQueryParams(queryParams, profile); + + // query against the ..._count mv when no filters are applied, for better performance + /*if (Object.keys(queryParams.filters).length === 0) { + const query = knex + .withSchema(req.activeSchema) + .from(`${profile.tableName}_count`); + const count = (await queryPool(query, true)).count; + return res.status(200).json({ count, maxCount: maxQuerySize }); + }*/ + + const query = knex.withSchema(req.activeSchema).from(profile.tableName); + + parseCriteria(req, query, profile, queryParams, true); + + const count = (await queryPool(query.count(), true)).count; + return res.status(200).json({ count, maxCount: maxQuerySize }); + } catch (error) { + log.error( + formatLogMsg( + metadataObj, + `Failed to get count from the "${profile.tableName}" table:`, + error, + ), + ); + return res + .status(error.httpStatusCode ?? 500) + .json({ error: error.toString() }); + } } /** @@ -436,7 +566,7 @@ async function executeQuery(profile, req, res) { /** * Throws an error if multiple instances of a parameter were provided * for an option or filter that accepts a single argument only - * @param {Object} queryFilters URL query value for filters + * @param {Object} queryParams URL query value for filters * @param {Object} profile definition of the profile being queried */ function validateQueryParams(queryParams, profile) { @@ -547,7 +677,7 @@ async function executeQueryCountPerOrgCycle(profile, req, res) { log.error( formatLogMsg( metadataObj, - `Failed to get counts per organizaiton and reporting cycle from the "${profile.tableName}" table:`, + `Failed to get counts per organization and reporting cycle from the "${profile.tableName}" table:`, error, ), ); @@ -559,20 +689,14 @@ async function executeQueryCountPerOrgCycle(profile, req, res) { /** * Retrieves the domain values for a single table column. + * @param {Object} profile definition of the profile being queried * @param {express.Request} req * @param {express.Response} res */ -async function executeValuesQuery(req, res) { +async function executeValuesQuery(profile, req, res) { const metadataObj = populateMetdataObjFromRequest(req); try { - const profile = privateConfig.tableConfig[req.params.profile]; - if (!profile) { - return res - .status(404) - .json({ message: 'The requested profile does not exist' }); - } - const { additionalColumns, ...params } = getQueryParamsValues(req); if (!params.text && !params.limit) { @@ -730,7 +854,7 @@ async function queryColumnValues(profile, columns, params, schema) { const maxValuesQuerySize = parseInt(process.env.MAX_VALUES_QUERY_SIZE); if (params.limit > maxValuesQuerySize) { - throw new LimitExceededException(params.limit); + throw new LimitExceededException(params.limit, maxValuesQuerySize); } query.limit(params.limit ?? maxValuesQuerySize); @@ -911,47 +1035,68 @@ export default function (app, basePath) { Object.entries(privateConfig.tableConfig).forEach( ([profileName, profile]) => { - // create get requests - router.get(`/${profileName}`, async function (req, res) { - await executeQuery(profile, req, res); - }); - router.get(`/${profileName}/count`, async function (req, res) { - await executeQueryCountOnly(profile, req, res); - }); - - // create post requests - router.post(`/${profileName}`, async function (req, res) { - await executeQuery(profile, req, res); - }); - router.post(`/${profileName}/count`, async function (req, res) { - await executeQueryCountOnly(profile, req, res); - }); - // get column domain values - router.post('/:profile/values/:column', async function (req, res) { - await executeValuesQuery(req, res); + router.post(`/${profileName}/values/:column`, async function (req, res) { + await executeValuesQuery(profile, req, res); }); - // get bean counts - router.get(`/${profileName}/countPerOrgCycle`, async function (req, res) { - await executeQueryCountPerOrgCycle(profile, req, res); - }); - router.post( - `/${profileName}/countPerOrgCycle`, - async function (req, res) { - await executeQueryCountPerOrgCycle(profile, req, res); - }, - ); + if (profileName === 'actionsDocuments') { + // create get requests + router.get(`/${profileName}`, async function (req, res) { + await executeDocSearch(profile, req, res); + }); + router.get(`/${profileName}/count`, async function (req, res) { + await executeDocSearchCountOnly(profile, req, res); + }); - router.get('/health/etlDatabase', async function (req, res) { - await checkDatabaseHealth(req, res); - }); + // create post requests + router.post(`/${profileName}`, async function (req, res) { + await executeDocSearch(profile, req, res); + }); + router.post(`/${profileName}/count`, async function (req, res) { + await executeDocSearchCountOnly(profile, req, res); + }); + } else { + // create get requests + router.get(`/${profileName}`, async function (req, res) { + await executeQuery(profile, req, res); + }); + router.get(`/${profileName}/count`, async function (req, res) { + await executeQueryCountOnly(profile, req, res); + }); - router.get('/health/etlDomainValues', async function (req, res) { - await checkDomainValuesHealth(req, res); - }); + // create post requests + router.post(`/${profileName}`, async function (req, res) { + await executeQuery(profile, req, res); + }); + router.post(`/${profileName}/count`, async function (req, res) { + await executeQueryCountOnly(profile, req, res); + }); + + // get bean counts + router.get( + `/${profileName}/countPerOrgCycle`, + async function (req, res) { + await executeQueryCountPerOrgCycle(profile, req, res); + }, + ); + router.post( + `/${profileName}/countPerOrgCycle`, + async function (req, res) { + await executeQueryCountPerOrgCycle(profile, req, res); + }, + ); + } }, ); + router.get('/health/etlDatabase', async function (req, res) { + await checkDatabaseHealth(req, res); + }); + + router.get('/health/etlDomainValues', async function (req, res) { + await checkDomainValuesHealth(req, res); + }); + app.use(`${basePath}api/attains`, router); } diff --git a/etl/app/content-private/tableConfig.json b/etl/app/content-private/tableConfig.json index 395ae26b..b9ab33d1 100644 --- a/etl/app/content-private/tableConfig.json +++ b/etl/app/content-private/tableConfig.json @@ -195,6 +195,111 @@ } ] }, + "actionsDocuments": { + "tableName": "actions_documents", + "idColumn": "objectid", + "createQuery": "CREATE TABLE IF NOT EXISTS search.actions_documents ( objectid serial PRIMARY KEY, actionid VARCHAR(45), actionname VARCHAR(255), doc_id text, doc_txt text, doc_url text, doc_filename text, doc_errors text, organizationid VARCHAR(30), organizationname VARCHAR(150), organizationtype VARCHAR(30), region VARCHAR(2), state VARCHAR(4000) )", + "columns": [ + { + "name": "objectid", + "alias": "objectId", + "skipIndex": true + }, + { + "name": "actionid", + "alias": "actionId" + }, + { + "name": "actionname", + "alias": "actionName" + }, + { + "name": "doc_id", + "alias": "docId" + }, + { + "name": "doc_txt", + "alias": "docTxt", + "output": false + }, + { + "name": "doc_url", + "alias": "docUrl" + }, + { + "name": "doc_filename", + "alias": "docFilename" + }, + { + "name": "doc_errors", + "alias": "docErrors" + }, + { + "name": "organizationid", + "alias": "organizationId" + }, + { + "name": "organizationname", + "alias": "organizationName" + }, + { + "name": "organizationtype", + "alias": "organizationType", + "skipIndex": true + }, + { + "name": "region", + "alias": "region" + }, + { + "name": "state", + "alias": "state" + } + ], + "materializedViewColumns": [ + { + "name": "statename", + "alias": "stateName" + } + ], + "materializedViews": [ + { + "name": "actionsdocuments_actions", + "columns": [ + { + "name": "actionid" + }, + { + "name": "actionname" + }, + { + "name": "organizationid" + }, + { + "name": "organizationname" + }, + { + "name": "organizationtype" + }, + { + "name": "region" + }, + { + "name": "state" + }, + { + "name": "statename" + } + ], + "joins": [ + { + "table": "states", + "joinKey": ["state", "statecode"] + } + ] + } + ] + }, "assessments": { "tableName": "assessments", "idColumn": "objectid", From 34b91145b69341c0e014bb4077705b44b9dfb268 Mon Sep 17 00:00:00 2001 From: Jon Maxwell Diebold Date: Thu, 24 Oct 2024 11:55:59 -0400 Subject: [PATCH 02/25] EQ-437 Added search results preview to UI --- app/client/public/scss/_uswds-theme.scss | 4 + app/client/src/components/inPageNav.tsx | 6 +- app/client/src/images/search.svg | 1 + app/client/src/routes/home.tsx | 104 ++++++++++++- app/server/app/routes/attains.js | 189 +++++++---------------- 5 files changed, 170 insertions(+), 134 deletions(-) create mode 100644 app/client/src/images/search.svg diff --git a/app/client/public/scss/_uswds-theme.scss b/app/client/public/scss/_uswds-theme.scss index d80cbc3a..2da28055 100644 --- a/app/client/public/scss/_uswds-theme.scss +++ b/app/client/public/scss/_uswds-theme.scss @@ -16,6 +16,10 @@ in the form $setting: value, 'output': true, 'responsive': true, ), + $max-height-settings: ( + 'output': true, + 'responsive': true, + ), $theme-font-weight-semibold: 600, $theme-show-notifications: false, $theme-utility-breakpoints: ( diff --git a/app/client/src/components/inPageNav.tsx b/app/client/src/components/inPageNav.tsx index a62a9a12..8d8bab5f 100644 --- a/app/client/src/components/inPageNav.tsx +++ b/app/client/src/components/inPageNav.tsx @@ -113,7 +113,9 @@ function useInPageNavDispatch() { ## Components */ -export function InPageNavLayout({ children }: Readonly<{ children: ReactNode }>) { +export function InPageNavLayout({ + children, +}: Readonly<{ children: ReactNode }>) { return ( {children} @@ -168,7 +170,7 @@ function InPageNavLayoutInner({ children }: Readonly<{ children: ReactNode }>) { )} -
{children}
+
{children}
); } diff --git a/app/client/src/images/search.svg b/app/client/src/images/search.svg new file mode 100644 index 00000000..cd9fd53c --- /dev/null +++ b/app/client/src/images/search.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/app/client/src/routes/home.tsx b/app/client/src/routes/home.tsx index 2fb7be8a..c75a4e38 100644 --- a/app/client/src/routes/home.tsx +++ b/app/client/src/routes/home.tsx @@ -9,6 +9,7 @@ import { import Select from 'react-select'; import { AsyncPaginate, wrapMenuList } from 'react-select-async-paginate'; import Download from 'images/file_download.svg?react'; +import Search from 'images/search.svg?react'; // components import { AccordionItem } from 'components/accordion'; import { Alert } from 'components/alert'; @@ -16,7 +17,7 @@ import { Checkboxes } from 'components/checkboxes'; import { CopyBox } from 'components/copyBox'; import { InfoTooltip } from 'components/tooltip'; import { InPageNavAnchor, NumberedInPageNavLabel } from 'components/inPageNav'; -import { Loading } from 'components/loading'; +import { Loading, LoadingButtonIcon } from 'components/loading'; import { DownloadModal } from 'components/downloadModal'; import { ClearSearchModal } from 'components/clearSearchModal'; import { MenuList as CustomMenuList } from 'components/menuList'; @@ -37,6 +38,7 @@ import type { GroupBase } from 'react-select'; import type { LoadOptions } from 'react-select-async-paginate'; import type { DomainOptions, + FetchState, MultiOptionField, Option, SingleOptionField, @@ -293,6 +295,31 @@ export function QueryBuilder() { downloadConfirmationVisible || clearConfirmationVisible, ); + const [preview, setPreview] = useState< + FetchState>> + >({ + data: null, + status: 'idle', + }); + const executeSearchPreview = () => { + if (preview.status === 'pending') return; + + setPreview({ data: null, status: 'pending' }); + postData({ + url: `${apiUrl}/${profile.resource}/preview`, + apiKey, + data: queryParams, + }) + .then((res) => { + setPreview({ data: res, status: 'success' }); + }) + .catch((err) => { + if (isAbort(err)) return; + console.error(err); + setPreview({ data: null, status: 'failure' }); + }); + }; + const navigate = useNavigate(); return ( @@ -357,6 +384,81 @@ export function QueryBuilder() { staticOptions={staticOptions} /> + {profile.key === 'actionsDocuments' && ( + <> +
+ {preview.status === 'pending' ? ( + + ) : ( + + )} +
+ + {preview.status === 'success' && ( + <> + {preview.data.length === 0 ? ( + No results found + ) : ( +
+ + + + + + + + + + + + + + {preview.data.map((row) => ( + + + + + + + + + + ))} + +
RankDocument URLAction IDRegionStateOrganization IDHMW Plan Summary URL
{row.rank} + + {row.docFilename} + + {row.actionId}{row.region}{row.state}{row.organizationId}
+
+ )} + + )} + + )} + 0 ? columnsToReturn : profile.columns.filter((col) => col.output !== false); - const selectText = selectColumns.map((col) => - col.name === col.alias ? col.name : `${col.name} AS ${col.alias}`, + return selectColumns.map((col) => + col.name === col.alias ? col.name : `${col.name} AS "${col.alias}"`, ); - if (profile.tableName === 'actions_documents') { - const rankText = 'ts_rank_cd(doc_tsv, query, 1 | 32)'; - query - .select( - knex.raw( - `to_char(${rankText} * 100, 'FM999') || '%' AS rank, ${selectText}`, - ), - ) - .orderBy(knex.raw(rankText), 'desc'); - } else { - query.select(selectText).orderBy('objectid', 'asc'); - } + } + + // build select statement of the query + if (!countOnly) { + query.select(getSelectText()).orderBy('objectid', 'asc'); } // build where clause of the query @@ -389,6 +382,17 @@ function parseCriteria(req, query, profile, queryParams, countOnly = false) { [exactArg], ); query.whereRaw('query @@ doc_tsv'); + if (!countOnly) { + const rankText = 'ts_rank_cd(doc_tsv, query, 1 | 32)'; + const selectText = getSelectText(); + query + .select( + knex.raw( + `to_char(${rankText} * 100, 'FM999') || '%' AS rank, ${selectText}`, + ), + ) + .orderBy(knex.raw(rankText), 'desc'); + } } else { appendToWhere(query, col.name, exactArg); } @@ -399,55 +403,32 @@ function parseCriteria(req, query, profile, queryParams, countOnly = false) { // add the "latest" subquery to the where clause query.whereIn(['organizationid', latestColumn.name], subQuery); } - - log.debug('query:', query.toString()); } -async function executeDocSearch(profile, req, res, preview = false) { +/** + * Runs a query against the provided profile name and streams the a portion + * of the result to the client as inline json. + * @param {Object} profile definition of the profile being queried + * @param {express.Request} req + * @param {express.Response} res + */ +async function executeQueryPreview(profile, req, res) { const metadataObj = populateMetdataObjFromRequest(req); - // output types csv, tab-separated, Excel, or JSON try { const queryParams = getQueryParams(req); validateQueryParams(queryParams, profile); - const { options: { limit = maxQuerySize } = {} } = queryParams; - - if (limit > maxQuerySize) { - throw new LimitExceededException(queryParams.options.limit); - } - const query = knex .withSchema(req.activeSchema) .from(profile.tableName) - .limit(limit); - - // verify atleast 1 parameter was provided, excluding the columns parameter - if ( - (!queryParams.columns || queryParams.columns.length === 0) && - Object.keys(queryParams.filters).length === 0 && - Object.keys(queryParams.options).length === 0 - ) { - throw new NoParametersException('Please provide at least one parameter'); - } + .limit(previewSize); parseCriteria(req, query, profile, queryParams); - // Check that the query doesn't exceed the MAX_QUERY_SIZE. - if (!queryParams.options.limit && (await exceedsMaxSize(query))) { - return res.status(200).json({ - message: `The current query exceeds the maximum query size of ${maxQuerySize.toLocaleString()} rows. Please refine the search, or visit ${ - process.env.SERVER_URL - }/national-downloads to download a compressed dataset`, - }); - } + const queryRes = await queryPool(query); - const format = queryParams.options.format ?? queryParams.options.f; - if (['csv', 'tsv', 'xlsx'].includes(format)) { - await streamFile(query, req, res, format, profile.tableName); - } else { - await createStream(query, req, res, 'json'); - } + return res.status(200).json(queryRes); } catch (error) { log.error( formatLogMsg( @@ -462,43 +443,6 @@ async function executeDocSearch(profile, req, res, preview = false) { } } -async function executeDocSearchCountOnly(profile, req, res) { - const metadataObj = populateMetdataObjFromRequest(req); - - // always return json with the count - try { - const queryParams = getQueryParams(req); - validateQueryParams(queryParams, profile); - - // query against the ..._count mv when no filters are applied, for better performance - /*if (Object.keys(queryParams.filters).length === 0) { - const query = knex - .withSchema(req.activeSchema) - .from(`${profile.tableName}_count`); - const count = (await queryPool(query, true)).count; - return res.status(200).json({ count, maxCount: maxQuerySize }); - }*/ - - const query = knex.withSchema(req.activeSchema).from(profile.tableName); - - parseCriteria(req, query, profile, queryParams, true); - - const count = (await queryPool(query.count(), true)).count; - return res.status(200).json({ count, maxCount: maxQuerySize }); - } catch (error) { - log.error( - formatLogMsg( - metadataObj, - `Failed to get count from the "${profile.tableName}" table:`, - error, - ), - ); - return res - .status(error.httpStatusCode ?? 500) - .json({ error: error.toString() }); - } -} - /** * Runs a query against the provided profile name and streams the result to the * client as csv, tsv, xlsx, json file, or inline json. @@ -1040,53 +984,36 @@ export default function (app, basePath) { await executeValuesQuery(profile, req, res); }); - if (profileName === 'actionsDocuments') { - // create get requests - router.get(`/${profileName}`, async function (req, res) { - await executeDocSearch(profile, req, res); - }); - router.get(`/${profileName}/count`, async function (req, res) { - await executeDocSearchCountOnly(profile, req, res); - }); + router.post(`/${profileName}/preview`, async function (req, res) { + await executeQueryPreview(profile, req, res); + }); - // create post requests - router.post(`/${profileName}`, async function (req, res) { - await executeDocSearch(profile, req, res); - }); - router.post(`/${profileName}/count`, async function (req, res) { - await executeDocSearchCountOnly(profile, req, res); - }); - } else { - // create get requests - router.get(`/${profileName}`, async function (req, res) { - await executeQuery(profile, req, res); - }); - router.get(`/${profileName}/count`, async function (req, res) { - await executeQueryCountOnly(profile, req, res); - }); + // create get requests + router.get(`/${profileName}`, async function (req, res) { + await executeQuery(profile, req, res); + }); + router.get(`/${profileName}/count`, async function (req, res) { + await executeQueryCountOnly(profile, req, res); + }); - // create post requests - router.post(`/${profileName}`, async function (req, res) { - await executeQuery(profile, req, res); - }); - router.post(`/${profileName}/count`, async function (req, res) { - await executeQueryCountOnly(profile, req, res); - }); + // create post requests + router.post(`/${profileName}`, async function (req, res) { + await executeQuery(profile, req, res); + }); + router.post(`/${profileName}/count`, async function (req, res) { + await executeQueryCountOnly(profile, req, res); + }); - // get bean counts - router.get( - `/${profileName}/countPerOrgCycle`, - async function (req, res) { - await executeQueryCountPerOrgCycle(profile, req, res); - }, - ); - router.post( - `/${profileName}/countPerOrgCycle`, - async function (req, res) { - await executeQueryCountPerOrgCycle(profile, req, res); - }, - ); - } + // get bean counts + router.get(`/${profileName}/countPerOrgCycle`, async function (req, res) { + await executeQueryCountPerOrgCycle(profile, req, res); + }); + router.post( + `/${profileName}/countPerOrgCycle`, + async function (req, res) { + await executeQueryCountPerOrgCycle(profile, req, res); + }, + ); }, ); From 231414b1ab0f3448d8bbf1efac7caa4dcf972680 Mon Sep 17 00:00:00 2001 From: Jon Maxwell Diebold Date: Thu, 7 Nov 2024 10:11:55 -0500 Subject: [PATCH 03/25] EQ-437 Move percent sign into column header --- app/client/src/routes/home.tsx | 4 ++-- app/server/app/routes/attains.js | 33 +++++++++++++++----------------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/app/client/src/routes/home.tsx b/app/client/src/routes/home.tsx index c75a4e38..a37f35d3 100644 --- a/app/client/src/routes/home.tsx +++ b/app/client/src/routes/home.tsx @@ -386,7 +386,7 @@ export function QueryBuilder() { {profile.key === 'actionsDocuments' && ( <> -
+
{preview.status === 'pending' ? (
); } diff --git a/app/client/src/components/page.tsx b/app/client/src/components/page.tsx index 917130b6..fe4d6739 100644 --- a/app/client/src/components/page.tsx +++ b/app/client/src/components/page.tsx @@ -94,7 +94,12 @@ type HeaderLinkProps = { href: string; }; -function HeaderLink({ className, children, icon, href }: Readonly) { +function HeaderLink({ + className, + children, + icon, + href, +}: Readonly) { const Icon = icon; return ( diff --git a/app/client/src/components/previewModal.tsx b/app/client/src/components/previewModal.tsx new file mode 100644 index 00000000..94c5ce79 --- /dev/null +++ b/app/client/src/components/previewModal.tsx @@ -0,0 +1,178 @@ +import { uniqueId } from 'lodash'; +import { Dialog } from '@reach/dialog'; +import { useEffect, useState } from 'react'; +import Close from 'images/close.svg?react'; +// components +import { Alert } from 'components/alert'; +import { Loading } from 'components/loading'; +// utils +import { isAbort, postData, useAbort } from 'utils'; +// styles +import '@reach/dialog/styles.css'; +// types +import type { FetchState, QueryData, Value } from 'types'; + +export function PreviewModal({ + apiKey, + limit, + onClose, + queryData, + queryUrl, +}: Readonly>) { + const { abort, getSignal } = useAbort(); + + const closeModal = () => { + abort(); + onClose(); + }; + + const [id] = useState(uniqueId('modal-')); + + const [preview, setPreview] = useState< + FetchState>> + >({ + data: null, + status: 'idle', + }); + + useEffect(() => { + setPreview({ data: null, status: 'pending' }); + postData({ + url: queryUrl, + apiKey, + data: { + ...queryData, + options: { + ...queryData.options, + format: 'json', + pageSize: limit, + }, + }, + signal: getSignal(), + }) + .then((res) => { + setPreview({ data: res.data, status: 'success' }); + }) + .catch((err) => { + if (isAbort(err)) return; + console.error(err); + setPreview({ data: null, status: 'failure' }); + }); + }, [apiKey, queryData, queryUrl]); + + return ( + +
+
+

+ Results Preview{' '} + + (limited to {limit} rows) + +

+ {preview.status === 'pending' && ( +
+ +

Searching, please wait...

+
+ )} + {preview.status === 'failure' && ( + + The specified query could not be executed at this time. + + )} + {preview.status === 'success' && ( + <> + {preview.data.length === 0 ? ( + No results found + ) : ( + <> +
+ + {/*
+
*/} + + + + + + + + + + + + + {preview.data.map((row) => ( + + + + + + + + + + ))} + +
+ Rank (%) + + Document URL + Action IDRegionStateOrganization IDHMW Plan Summary URL
{row.rankPercent} + + {row.docFilename} + + {row.actionId}{row.region}{row.state} + {row.organizationId} +
+
+ + )} + + )} +
+ +
+
+ ); +} + +/* +## Types +*/ + +type PreviewModalProps = { + apiKey: string; + limit: number; + onClose: () => void; + queryData: D; + queryUrl: string; +}; + +export default PreviewModal; diff --git a/app/client/src/contexts/content.tsx b/app/client/src/contexts/content.tsx index 51b7a9de..4d384f36 100644 --- a/app/client/src/contexts/content.tsx +++ b/app/client/src/contexts/content.tsx @@ -66,6 +66,7 @@ export type Content = { }; parameters: { debounceMilliseconds: number; + searchPreviewPageSize: number; selectOptionsPageSize: number; }; profileConfig: { diff --git a/app/client/src/routes/home.tsx b/app/client/src/routes/home.tsx index a37f35d3..b7779283 100644 --- a/app/client/src/routes/home.tsx +++ b/app/client/src/routes/home.tsx @@ -21,6 +21,7 @@ import { Loading, LoadingButtonIcon } from 'components/loading'; import { DownloadModal } from 'components/downloadModal'; import { ClearSearchModal } from 'components/clearSearchModal'; import { MenuList as CustomMenuList } from 'components/menuList'; +import { PreviewModal } from 'components/previewModal'; import { RadioButtons } from 'components/radioButtons'; import { SourceSelect } from 'components/sourceSelect'; import { StepIndicator } from 'components/stepIndicator'; @@ -219,6 +220,7 @@ function HomeContent({ content }: Readonly<{ content: Content }>) { format, formatHandler: setFormat, glossary, + previewLimit: content.parameters.searchPreviewPageSize, profile, queryParams, resetFilters, @@ -268,6 +270,7 @@ export function QueryBuilder() { format, formatHandler, glossary, + previewLimit, profile, resetFilters, sourceFields, @@ -277,16 +280,22 @@ export function QueryBuilder() { } = useHomeContext(); const { - clearConfirmationVisible, - closeClearConfirmation, - openClearConfirmation, - } = useClearConfirmationVisibility(); + visible: clearConfirmationVisible, + close: closeClearConfirmation, + open: openClearConfirmation, + } = useModalVisibility(); const { - closeDownloadConfirmation, - downloadConfirmationVisible, - openDownloadConfirmation, - } = useDownloadConfirmationVisibility(); + close: closeDownloadConfirmation, + visible: downloadConfirmationVisible, + open: openDownloadConfirmation, + } = useModalVisibility(); + + const { + close: closeSearchPreview, + visible: searchPreviewVisible, + open: openSearchPreview, + } = useModalVisibility(); const [downloadStatus, setDownloadStatus] = useDownloadStatus( profile, @@ -306,12 +315,19 @@ export function QueryBuilder() { setPreview({ data: null, status: 'pending' }); postData({ - url: `${apiUrl}/${profile.resource}/preview`, + url: `${apiUrl}/${profile.resource}`, apiKey, - data: queryParams, + data: { + ...queryParams, + options: { + ...queryParams.options, + format: 'json', + pageSize: previewLimit, + }, + }, }) .then((res) => { - setPreview({ data: res, status: 'success' }); + setPreview({ data: res.data, status: 'success' }); }) .catch((err) => { if (isAbort(err)) return; @@ -324,33 +340,21 @@ export function QueryBuilder() { return ( <> - {downloadConfirmationVisible && ( - - )} - {clearConfirmationVisible && ( - { - resetFilters(); - navigate('/attains', { replace: true }); - }} - onClose={closeClearConfirmation} - /> - )}
+ {clearConfirmationVisible && ( + { + resetFilters(); + navigate('/attains', { replace: true }); + }} + onClose={closeClearConfirmation} + /> + )} - {profile.key === 'actionsDocuments' && ( + {/*profile.key === 'actionsDocuments' && ( <>
{preview.status === 'pending' ? ( @@ -407,7 +411,7 @@ export function QueryBuilder() { role="img" focusable="false" /> - Search + Preview )}
@@ -417,8 +421,8 @@ export function QueryBuilder() { {preview.data.length === 0 ? ( No results found ) : ( -
- +
+
@@ -433,7 +437,7 @@ export function QueryBuilder() { {preview.data.map((row) => ( - +
Rank (%)
{row.rank}{row.rankPercent} )} + )*/} + + {profile.key === 'actionsDocuments' && ( + <> +
+ +
+ {searchPreviewVisible && ( + + )} + )} Download + {downloadConfirmationVisible && ( + + )} {downloadStatus === 'success' && ( { - setClearConfirmationVisible(false); - }, []); - - const openClearConfirmation = useCallback(() => { - setClearConfirmationVisible(true); - }, []); - - return { - clearConfirmationVisible, - closeClearConfirmation, - openClearConfirmation, - }; -} - -function useDownloadConfirmationVisibility() { - const [downloadConfirmationVisible, setDownloadConfirmationVisible] = - useState(false); +function useModalVisibility() { + const [visible, setVisible] = useState(false); - const closeDownloadConfirmation = useCallback(() => { - setDownloadConfirmationVisible(false); + const close = useCallback(() => { + setVisible(false); }, []); - const openDownloadConfirmation = useCallback(() => { - setDownloadConfirmationVisible(true); + const open = useCallback(() => { + setVisible(true); }, []); return { - closeDownloadConfirmation, - downloadConfirmationVisible, - openDownloadConfirmation, + visible, + close, + open, }; } @@ -2068,6 +2095,8 @@ type FilterQueryData = { }; type HomeContext = { + apiKey: string; + apiUrl: string; filterFields: FilterFields; filterGroups: FilterGroup[]; filterGroupLabels: FilterGroupLabels; @@ -2076,10 +2105,9 @@ type HomeContext = { format: Option; formatHandler: (format: Option) => void; glossary: Content['glossary']; + previewLimit: number; profile: Profile; queryParams: QueryData; - apiKey: string; - apiUrl: string; resetFilters: () => void; sourceFields: SourceFields; sourceHandlers: SourceFieldInputHandlers; diff --git a/app/client/src/routes/nationalDownloads.tsx b/app/client/src/routes/nationalDownloads.tsx index 2658b8e1..38121f4b 100644 --- a/app/client/src/routes/nationalDownloads.tsx +++ b/app/client/src/routes/nationalDownloads.tsx @@ -69,7 +69,9 @@ type NationalDownloadsDataProps = { content: FetchState; }; -function NationalDownloadsData({ content }: Readonly) { +function NationalDownloadsData({ + content, +}: Readonly) { if (content.status !== 'success') return null; return ( @@ -144,7 +146,7 @@ function NationalDownloadsData({ content }: Readonly function ParagraphNoMargin( props: React.ClassAttributes & - React.HTMLAttributes + React.HTMLAttributes, ) { return

{props.children}

; } diff --git a/app/client/src/types/index.ts b/app/client/src/types/index.ts index 8d2a9b9a..497d693f 100644 --- a/app/client/src/types/index.ts +++ b/app/client/src/types/index.ts @@ -80,6 +80,16 @@ export type Option = { value: Value; }; +export type QueryData = { + columns: string[]; + filters: { + [field: string]: Value | Value[]; + }; + options: { + [field: string]: Value; + }; +}; + export type SingleOptionField = BaseFilterFieldConfig & { type: 'select'; }; diff --git a/app/server/.env.example b/app/server/.env.example index f31f26e2..468d47d6 100644 --- a/app/server/.env.example +++ b/app/server/.env.example @@ -18,5 +18,4 @@ DB_POOL_MAX=20 STREAM_BATCH_SIZE=2000 STREAM_HIGH_WATER_MARK=10000 MAX_QUERY_SIZE=1000000 -MAX_VALUES_QUERY_SIZE=100 -JSON_PAGE_SIZE=1000 +MAX_PAGE_SIZE=500 diff --git a/app/server/app/content/config/parameters.json b/app/server/app/content/config/parameters.json index fade84ac..2d773bd4 100644 --- a/app/server/app/content/config/parameters.json +++ b/app/server/app/content/config/parameters.json @@ -1,4 +1,5 @@ { "debounceMilliseconds": 250, + "searchPreviewPageSize": 500, "selectOptionsPageSize": 20 } diff --git a/app/server/app/routes/attains.js b/app/server/app/routes/attains.js index c1a431fb..98800209 100644 --- a/app/server/app/routes/attains.js +++ b/app/server/app/routes/attains.js @@ -17,9 +17,8 @@ import { getPrivateConfig, getS3Client } from '../utilities/s3.js'; import StreamingService from '../utilities/streamingService.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); -const jsonPageSize = parseInt(process.env.JSON_PAGE_SIZE); -const maxQuerySize = parseInt(process.env.MAX_QUERY_SIZE); -const previewSize = parseInt(process.env.PREVIEW_SIZE || 500); +const maxPageSize = parseInt(process.env.MAX_PAGE_SIZE || 500); +const maxQuerySize = parseInt(process.env.MAX_QUERY_SIZE || 1_000_000); const minDateTime = new Date(-8640000000000000); const maxDateTime = new Date(8640000000000000); @@ -38,10 +37,10 @@ class DuplicateParameterException extends Error { } class InvalidParameterException extends Error { - constructor(parameter) { + constructor(parameter, context) { super(); this.httpStatusCode = 400; - this.message = `The parameter '${parameter}' is not valid for the specified profile`; + this.message = `The parameter '${parameter}' is not valid for the specified ${context}`; } } @@ -126,7 +125,7 @@ function createLatestSubquery(req, profile, params, columnName, columnType) { const columnsForFilter = []; const columnNamesForFilter = []; columns.forEach((col) => { - if (params.filters.hasOwnProperty(col.alias)) { + if (col.output !== false && params.filters.hasOwnProperty(col.alias)) { columnsForFilter.push(col); columnNamesForFilter.push(col.name); } @@ -158,19 +157,17 @@ function createLatestSubquery(req, profile, params, columnName, columnType) { /** * Creates a stream object from a query. * @param {Object} query KnexJS query object - * @param {Express.Request} req * @param {Express.Response} res * @param {string} format the format of the file attachment * @param {Object} excelDoc Excel workbook and worksheet objects - * @param {number} nextId starting objectid for the next page + * @param {Object} pageOptions page number and page size for paginated JSON */ async function createStream( query, - req, res, format, wbObject = null, - nextId = null, + pageOptions = null, ) { pool.connect((err, client, done) => { if (err) throw err; @@ -182,7 +179,7 @@ async function createStream( const stream = client.query(qStream); stream.on('end', done); - StreamingService.streamResponse(res, stream, format, wbObject, nextId); + StreamingService.streamResponse(res, stream, format, wbObject, pageOptions); }); } @@ -209,37 +206,24 @@ async function streamFile(query, req, res, format, baseName) { }); const worksheet = workbook.addWorksheet('data'); - createStream(query, req, res, format, { workbook, worksheet }); + createStream(query, res, format, { workbook, worksheet }); } else { - createStream(query, req, res, format); + createStream(query, res, format); } } /** * Streams the results of a query as paginated JSON. * @param {Object} query KnexJS query object - * @param {Express.Response} req * @param {Express.Response} res - * @param {number} startId current objectid to start returning results from + * @param {number} pageNumber current page of results + * @param {number} pageSize number of results per page */ -async function streamJson(query, req, res, startId) { - if (startId) query.where('objectid', '>=', startId); - - const nextId = - ( - await queryPool( - knex - .select('objectId') - .from(query.clone().limit(maxQuerySize).as('q')) - .offset(jsonPageSize) - .limit(1), - true, - ) - )?.objectId ?? null; - - query.limit(jsonPageSize); +async function streamJson(query, res, pageNumber, pageSize) { + if (pageNumber > 1) query.offset((pageNumber - 1) * pageSize); + query.limit(pageSize); - createStream(query, req, res, 'json', null, nextId); + createStream(query, res, 'json', null, { pageNumber, pageSize }); } /** @@ -302,7 +286,7 @@ function getQueryParams(req) { } // organize GET parameters to follow what we expect from POST - const optionsParams = ['f', 'format', 'limit', 'startId']; + const optionsParams = ['f', 'format', 'limit', 'pageNumber', 'pageSize']; const parameters = { filters: {}, options: {}, @@ -367,7 +351,7 @@ function parseCriteria(req, query, profile, queryParams, countOnly = false) { query .select( knex.raw( - `to_char(${rankText} * 100, 'FM999') AS rank, ${selectText}`, + `to_char(${rankText} * 100, 'FM999') AS "rankPercent", ${selectText}`, ), ) .orderBy(knex.raw(rankText), 'desc'); @@ -485,10 +469,12 @@ async function executeQuery(profile, req, res) { if (['csv', 'tsv', 'xlsx'].includes(format)) { await streamFile(query, req, res, format, profile.tableName); } else { - const startId = queryParams.options.startId - ? parseInt(queryParams.options.startId) - : null; - await streamJson(query, req, res, startId); + await streamJson( + query, + res, + parseInt(queryParams.options.pageNumber || 1), + parseInt(queryParams.options.pageSize || 20), + ); } } catch (error) { log.error( @@ -512,14 +498,29 @@ async function executeQuery(profile, req, res) { */ function validateQueryParams(queryParams, profile) { Object.entries(queryParams.options).forEach(([name, value]) => { + // Each option should only be used once. if (Array.isArray(value)) throw new DuplicateParameterException(name); + + // 'pageNumber' and 'pageSize' are only allowed to be used with 'json' format. + const format = queryParams.options.format ?? queryParams.options.f; + if ( + ['pageNumber', 'pageSize'].includes(name) && + ['csv', 'tsv', 'xlsx'].includes(format) + ) { + throw new InvalidParameterException(name, 'response format'); + } + + // 'pageSize' must be less than or equal to the maximum page size. + if (name === 'pageSize' && parseInt(value) > maxPageSize) { + throw new LimitExceededException(value, maxPageSize); + } }); Object.entries(queryParams.filters).forEach(([name, value]) => { const column = profile.columns.find((c) => { if (c.lowParam === name || c.highParam === name || c.alias === name) return c; }); - if (!column) throw new InvalidParameterException(name); + if (!column) throw new InvalidParameterException(name, 'profile'); if (Array.isArray(value)) { if ( column.lowParam === name || @@ -642,7 +643,7 @@ async function executeValuesQuery(profile, req, res) { if (!params.text && !params.limit) { throw new NoParametersException( - `Please provide either a text filter or a limit that does not exceed ${process.env.MAX_VALUES_QUERY_SIZE}.`, + `Please provide either a text filter or a limit that does not exceed ${process.env.MAX_PAGE_SIZE}.`, ); } @@ -793,11 +794,11 @@ async function queryColumnValues(profile, columns, params, schema) { }); } - const maxValuesQuerySize = parseInt(process.env.MAX_VALUES_QUERY_SIZE); - if (params.limit > maxValuesQuerySize) { - throw new LimitExceededException(params.limit, maxValuesQuerySize); + const limit = params.limit ?? maxPageSize; + if (limit > maxPageSize) { + throw new LimitExceededException(params.limit, maxPageSize); } - query.limit(params.limit ?? maxValuesQuerySize); + query.limit(limit); return await queryPool(query); } @@ -981,10 +982,6 @@ export default function (app, basePath) { await executeValuesQuery(profile, req, res); }); - router.post(`/${profileName}/preview`, async function (req, res) { - await executeQueryPreview(profile, req, res); - }); - // create get requests router.get(`/${profileName}`, async function (req, res) { await executeQuery(profile, req, res); @@ -1001,16 +998,21 @@ export default function (app, basePath) { await executeQueryCountOnly(profile, req, res); }); - // get bean counts - router.get(`/${profileName}/countPerOrgCycle`, async function (req, res) { - await executeQueryCountPerOrgCycle(profile, req, res); - }); - router.post( - `/${profileName}/countPerOrgCycle`, - async function (req, res) { - await executeQueryCountPerOrgCycle(profile, req, res); - }, - ); + if (profileName !== 'actionsDocuments') { + // get bean counts + router.get( + `/${profileName}/countPerOrgCycle`, + async function (req, res) { + await executeQueryCountPerOrgCycle(profile, req, res); + }, + ); + router.post( + `/${profileName}/countPerOrgCycle`, + async function (req, res) { + await executeQueryCountPerOrgCycle(profile, req, res); + }, + ); + } }, ); diff --git a/app/server/app/utilities/streamingService.js b/app/server/app/utilities/streamingService.js index 3ab2f584..d3eee715 100644 --- a/app/server/app/utilities/streamingService.js +++ b/app/server/app/utilities/streamingService.js @@ -79,12 +79,17 @@ export default class StreamingService { /** * Transforms the streaming data to json. * @param {function} preHook function for writing initial headers - * @param {number} nextId starting objectid for the next page + * @param {Object} pageOptions page number and page size for paginated JSON * @returns Transform object */ - static getJsonTransform = (preHook, nextId) => { + static getJsonTransform = (preHook, pageOptions = {}) => { + const { pageNumber, pageSize } = pageOptions; const start = '{ "data": ['; - const end = ']' + (nextId ? `, "nextId": ${nextId}` : '') + '}'; + const end = + ']' + + (pageNumber ? `, "pageNumber": ${pageNumber}` : '') + + (pageSize ? `, "pageSize": ${pageSize}` : '') + + '}'; return new Transform({ writableObjectMode: true, transform(data, _encoding, callback) { @@ -153,14 +158,14 @@ export default class StreamingService { * @param {Transform} inStream readable stream from database query * @param {'csv'|'tsv'|'xlsx'|'json'|''} format export format file type * @param {Object} excelDoc Excel workbook and worksheet objects - * @param {number} nextId starting objectid for the next page + * @param {Object} pageOptions page number and page size for paginated JSON */ static streamResponse = ( outStream, inStream, format, excelDoc = null, - nextId = null, + pageOptions = null, ) => { const { preHook, errorHook, errorHandler } = StreamingService.getOptions( outStream, @@ -173,7 +178,7 @@ export default class StreamingService { outStream.end(); }); - let transform = StreamingService.getJsonTransform(preHook, nextId); + let transform = StreamingService.getJsonTransform(preHook, pageOptions); if (format === 'csv' || format === 'tsv') { transform = StreamingService.getBasicTransform(preHook, format); } From 2b5c2466edd1abe00ec77baa2041d93b93cb4f74 Mon Sep 17 00:00:00 2001 From: Jon Maxwell Diebold Date: Thu, 21 Nov 2024 15:08:17 -0500 Subject: [PATCH 05/25] EQ-437 Enabled sorting on table --- app/client/src/components/previewModal.tsx | 119 +++++------ app/client/src/components/table.tsx | 226 +++++++++++++++++++++ app/client/src/types/uswds.d.ts | 1 + 3 files changed, 289 insertions(+), 57 deletions(-) create mode 100644 app/client/src/components/table.tsx create mode 100644 app/client/src/types/uswds.d.ts diff --git a/app/client/src/components/previewModal.tsx b/app/client/src/components/previewModal.tsx index 94c5ce79..763052c1 100644 --- a/app/client/src/components/previewModal.tsx +++ b/app/client/src/components/previewModal.tsx @@ -1,16 +1,17 @@ -import { uniqueId } from 'lodash'; import { Dialog } from '@reach/dialog'; -import { useEffect, useState } from 'react'; import Close from 'images/close.svg?react'; +import { uniqueId } from 'lodash'; +import { useEffect, useMemo, useState } from 'react'; // components import { Alert } from 'components/alert'; import { Loading } from 'components/loading'; +import { Table } from 'components/table'; // utils import { isAbort, postData, useAbort } from 'utils'; // styles import '@reach/dialog/styles.css'; // types -import type { FetchState, QueryData, Value } from 'types'; +import type { FetchState, QueryData } from 'types'; export function PreviewModal({ apiKey, @@ -28,8 +29,9 @@ export function PreviewModal({ const [id] = useState(uniqueId('modal-')); + // Data to be displayed in the preview table. const [preview, setPreview] = useState< - FetchState>> + FetchState> >({ data: null, status: 'idle', @@ -51,7 +53,26 @@ export function PreviewModal({ signal: getSignal(), }) .then((res) => { - setPreview({ data: res.data, status: 'success' }); + const data = res.data.map((row: ActionsDocumentsRow) => ({ + rankPercent: row.rankPercent, + docUrl: { + sortValue: row.docFilename, + value: ( +
+ {row.docFilename} + + ), + }, + actionId: row.actionId, + region: row.region, + state: row.state, + organizationId: row.organizationId, + })); + setPreview({ data, status: 'success' }); }) .catch((err) => { if (isAbort(err)) return; @@ -60,22 +81,32 @@ export function PreviewModal({ }); }, [apiKey, queryData, queryUrl]); + const columns = useMemo( + () => [ + { id: 'rankPercent', name: 'Rank (%)', sortable: true }, + { id: 'docUrl', name: 'Document URL', sortable: true }, + { id: 'actionId', name: 'Action ID', sortable: false }, + { id: 'region', name: 'Region', sortable: false }, + { id: 'state', name: 'State', sortable: false }, + { id: 'organizationId', name: 'Organization ID', sortable: false }, + ], + [], + ); + return (
-

+

Results Preview{' '} - - (limited to {limit} rows) -

+ Limited to {limit} rows {preview.status === 'pending' && (
@@ -93,53 +124,16 @@ export function PreviewModal({ No results found ) : ( <> -
- - {/*
-
*/} - - - - - - - - - - - - - {preview.data.map((row) => ( - - - - - - - - - - ))} - -
- Rank (%) - - Document URL - Action IDRegionStateOrganization IDHMW Plan Summary URL
{row.rankPercent} - - {row.docFilename} - - {row.actionId}{row.region}{row.state} - {row.organizationId} -
-
+ )} @@ -167,6 +161,17 @@ export function PreviewModal({ ## Types */ +type ActionsDocumentsRow = { + actionId: string; + docFilename: string; + docUrl: string; + objectId: string; + organizationId: string; + rankPercent: number; + region: string; + state: string; +}; + type PreviewModalProps = { apiKey: string; limit: number; diff --git a/app/client/src/components/table.tsx b/app/client/src/components/table.tsx new file mode 100644 index 00000000..cb8d2f42 --- /dev/null +++ b/app/client/src/components/table.tsx @@ -0,0 +1,226 @@ +/** Adapted from https://github.com/MetroStar/comet/blob/main/packages/comet-uswds/src/components/table/table.tsx */ +import table from '@uswds/uswds/js/usa-table'; +import classNames from 'classnames'; +import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; + +function isCellSpec(value: any): value is TableCell { + return ( + typeof value === 'object' && value !== null && value.hasOwnProperty('value') + ); +} + +export const Table = ({ + id, + caption, + columns, + data, + sortable = false, + initialSortIndex = 0, + initialSortDir = 'ascending', + scrollable = false, + borderless = false, + stacked = false, + stickyHeader = false, + striped = false, + className, + tabIndex = -1, +}: TableProps): React.ReactElement => { + const [sortDir, setSortDir] = useState(initialSortDir); + const [sortIndex, setSortIndex] = useState(initialSortIndex); + + // Swap sort direction. + const getSortDirection = (prevSortDir: 'ascending' | 'descending') => { + if (prevSortDir === 'descending') { + return 'ascending'; + } else { + return 'descending'; + } + }; + + // If a header of a sortable column is clicked, sort the column or change the sort direction. + const handleHeaderClick = (index: number) => { + const column = columns[index]; + if (column?.sortable) { + if (sortIndex === index) { + setSortDir((prevSortDir) => getSortDirection(prevSortDir)); + } else { + setSortIndex(index); + } + } + }; + + return ( +
{ + if (node && sortable) { + table.on(node); + } + }} + > +
+ + + + {columns + .map((obj) => ({ + ...obj, + sortable: obj.sortable !== undefined ? obj.sortable : true, + })) + .map((column: TableColumn, index: number) => ( + + ))} + + + + {data.map((row, i: number) => { + const rowData: TableCell[] = []; + for (const key in row) { + if (sortable) { + rowData.push({ + value: isCellSpec(row[key]) ? row[key].value : row[key], + sortValue: isCellSpec(row[key]) + ? row[key].sortValue ?? row[key].value + : row[key], + }); + } else { + rowData.push({ + value: isCellSpec(row[key]) ? row[key].value : row[key], + }); + } + } + + return ( + + {rowData.map((col, j) => ( + + ))} + + ); + })} + +
handleHeaderClick(index)} + > + {column.name} +
+ {col.value} +
+ {sortable && ( +
+ )} +
+ ); +}; + +/* +## Types +*/ + +type TableProps = { + /** + * The unique identifier for this component + */ + id: string; + /** + * The table header details for the table + */ + columns: TableColumn[]; + /** + * The data to display in the table rows + */ + data: T[]; + /** + * An optional caption to display above the table + */ + caption?: string; + /** + * A boolean indicating if the table is sortable or not + */ + sortable?: boolean; + /** + * The column index to set as the default sort + */ + initialSortIndex?: number; + /** + * The default sort direction if sortIndex is provided + */ + initialSortDir?: 'ascending' | 'descending'; + /** + * A function to call when the table is sorted + */ + onSort?: () => void; + /** + * A boolean indicating if the table is scrollable or not + */ + scrollable?: boolean; + /** + * A boolean indicating if the table is borderless or not + */ + borderless?: boolean; + /** + * A boolean indicating if the table should use a stacked layout or not + */ + stacked?: boolean; + /** + * A boolean indicating if the table has a sticky header or not + */ + stickyHeader?: boolean; + /** + * A boolean indicating if the table is striped or not + */ + striped?: boolean; + /** + * Additional class names for the table + */ + className?: string; + /** + * Used primarily to make table focusable + */ + tabIndex?: number; +}; + +type TableColumn = { + id: string; + name: string; + sortable?: boolean; +}; + +type TableCell = { + value: string; + sortValue?: string; +}; + +export default Table; diff --git a/app/client/src/types/uswds.d.ts b/app/client/src/types/uswds.d.ts new file mode 100644 index 00000000..ad983596 --- /dev/null +++ b/app/client/src/types/uswds.d.ts @@ -0,0 +1 @@ +declare module '@uswds/uswds/js/usa-table'; From c31aabd95600560a2740ef07d7a89bd4e4f89bb2 Mon Sep 17 00:00:00 2001 From: Jon Maxwell Diebold Date: Wed, 4 Dec 2024 15:38:34 -0500 Subject: [PATCH 06/25] EQ-437 Added new document profiles to ETL --- app/client/package.json | 2 +- app/client/src/components/table.tsx | 2 +- app/client/src/routes/home.tsx | 29 +++--- etl/app/content-private/tableConfig.json | 111 +++++++++++++++-------- etl/app/server/database.js | 47 +++++++--- 5 files changed, 125 insertions(+), 66 deletions(-) diff --git a/app/client/package.json b/app/client/package.json index f722b1c6..55a2b7a3 100644 --- a/app/client/package.json +++ b/app/client/package.json @@ -20,7 +20,7 @@ }, "type": "module", "scripts": { - "dev": "vite --host", + "dev": "vite", "build": "vite build", "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0", "preview": "vite preview" diff --git a/app/client/src/components/table.tsx b/app/client/src/components/table.tsx index cb8d2f42..92655e63 100644 --- a/app/client/src/components/table.tsx +++ b/app/client/src/components/table.tsx @@ -1,7 +1,7 @@ /** Adapted from https://github.com/MetroStar/comet/blob/main/packages/comet-uswds/src/components/table/table.tsx */ import table from '@uswds/uswds/js/usa-table'; import classNames from 'classnames'; -import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; +import { useState } from 'react'; function isCellSpec(value: any): value is TableCell { return ( diff --git a/app/client/src/routes/home.tsx b/app/client/src/routes/home.tsx index b7779283..8a5ae595 100644 --- a/app/client/src/routes/home.tsx +++ b/app/client/src/routes/home.tsx @@ -618,7 +618,7 @@ function FilterFieldInputs({ staticOptions, }: FilterFieldInputsProps) { // Store each field's element in a tuple with its key - const fieldsJsx: Array<[JSX.Element, string]> = removeNulls( + const fieldsJsx: Array<[JSX.Element, string, string]> = removeNulls( fields.map((fieldConfig) => { const sourceFieldConfig = 'source' in fieldConfig && @@ -653,6 +653,7 @@ function FilterFieldInputs({ tooltip={tooltip} />, fieldConfig.key, + fieldConfig.type, ]; } @@ -724,6 +725,7 @@ function FilterFieldInputs({
, fieldConfig.key, + fieldConfig.type, ]; case 'date': case 'year': @@ -760,6 +762,7 @@ function FilterFieldInputs({ type={fieldConfig.type} />, fieldConfig.domain, + fieldConfig.type, ]; case 'text': return [ @@ -794,6 +797,7 @@ function FilterFieldInputs({ , fieldConfig.key, + fieldConfig.type, ]; default: return null; @@ -803,17 +807,18 @@ function FilterFieldInputs({ return (
- {fieldsJsx.map(([field, key]) => - key === 'docTxt' ? ( -
- {field} -
- ) : ( -
- {field} -
- ), - )} + {fieldsJsx.map(([field, key, type]) => ( +
+ {field} +
+ ))}
); } diff --git a/etl/app/content-private/tableConfig.json b/etl/app/content-private/tableConfig.json index b9ab33d1..26afe897 100644 --- a/etl/app/content-private/tableConfig.json +++ b/etl/app/content-private/tableConfig.json @@ -195,16 +195,20 @@ } ] }, - "actionsDocuments": { - "tableName": "actions_documents", + "actionDocuments": { + "tableName": "action_documents", "idColumn": "objectid", - "createQuery": "CREATE TABLE IF NOT EXISTS search.actions_documents ( objectid serial PRIMARY KEY, actionid VARCHAR(45), actionname VARCHAR(255), doc_id text, doc_txt text, doc_url text, doc_filename text, doc_errors text, organizationid VARCHAR(30), organizationname VARCHAR(150), organizationtype VARCHAR(30), region VARCHAR(2), state VARCHAR(4000) )", + "createQuery": "CREATE TABLE IF NOT EXISTS action_documents ( objectid INTEGER PRIMARY KEY, documentkey INTEGER, actionid VARCHAR(45), actiontypename VARCHAR(50), organizationid VARCHAR(30), regionid VARCHAR(2), state VARCHAR(4000), actionname VARCHAR(255), completiondate DATE, tmdldate DATE, actiondocumenturl TEXT )", "columns": [ { "name": "objectid", "alias": "objectId", "skipIndex": true }, + { + "name": "actiondocumenturl", + "alias": "actionDocumentUrl" + }, { "name": "actionid", "alias": "actionId" @@ -214,46 +218,41 @@ "alias": "actionName" }, { - "name": "doc_id", - "alias": "docId" - }, - { - "name": "doc_txt", - "alias": "docTxt", - "output": false + "name": "actiontypename", + "alias": "actionTypeName" }, { - "name": "doc_url", - "alias": "docUrl" - }, - { - "name": "doc_filename", - "alias": "docFilename" + "name": "completiondate", + "alias": "completionDate", + "lowParam": "completionDateLo", + "highParam": "completionDateHi", + "type": "date", + "indexOrder": "desc" }, { - "name": "doc_errors", - "alias": "docErrors" + "name": "documentkey", + "alias": "documentKey", + "type": "numeric" }, { "name": "organizationid", "alias": "organizationId" }, { - "name": "organizationname", - "alias": "organizationName" - }, - { - "name": "organizationtype", - "alias": "organizationType", - "skipIndex": true - }, - { - "name": "region", - "alias": "region" + "name": "regionid", + "alias": "regionId" }, { "name": "state", "alias": "state" + }, + { + "name": "tmdldate", + "alias": "tmdlDate", + "lowParam": "tmdlDateLo", + "highParam": "tmdlDateHi", + "type": "date", + "indexOrder": "desc" } ], "materializedViewColumns": [ @@ -264,7 +263,7 @@ ], "materializedViews": [ { - "name": "actionsdocuments_actions", + "name": "actiondocuments_actions", "columns": [ { "name": "actionid" @@ -276,13 +275,7 @@ "name": "organizationid" }, { - "name": "organizationname" - }, - { - "name": "organizationtype" - }, - { - "name": "region" + "name": "regionid" }, { "name": "state" @@ -1243,6 +1236,50 @@ } ] }, + "documentsText": { + "tableName": "documents_text", + "idColumn": "objectid", + "createQuery": "CREATE TABLE IF NOT EXISTS documents_text ( objectid INTEGER PRIMARY KEY, documentkey INTEGER, documentname TEXT, documentdesc TEXT, documentfilename TEXT, documentfiletypename TEXT, documenttypename TEXT, documenttext TEXT )", + "columns": [ + { + "name": "objectid", + "alias": "objectId", + "skipIndex": true + }, + { + "name": "documentdesc", + "alias": "documentDesc" + }, + { + "name": "documentfilename", + "alias": "documentFileName" + }, + { + "name": "documentfiletypename", + "alias": "documentFileTypeName" + }, + { + "name": "documentkey", + "alias": "documentKey", + "type": "numeric" + }, + { + "name": "documentname", + "alias": "documentName" + }, + { + "name": "documenttext", + "alias": "documentText", + "skipIndex": true + }, + { + "name": "documenttypename", + "alias": "documentTypeName" + } + ], + "materializedViewColumns": [], + "materializedViews": [] + }, "sources": { "tableName": "sources", "idColumn": "objectid", diff --git a/etl/app/server/database.js b/etl/app/server/database.js index c35ec78b..ca4ee4ac 100644 --- a/etl/app/server/database.js +++ b/etl/app/server/database.js @@ -882,6 +882,9 @@ async function createIndexes(s3Config, client, overrideWorkMemory, tableName) { (c) => c.name === 'reportingcycle', ); const hasCycleId = table.columns.find((c) => c.name === 'cycleid'); + const hasAssessmentUnitId = table.columns.find( + (c) => c.name === 'assessmentunitid', + ); const orderByArray = []; if (hasOrgId) { @@ -898,7 +901,8 @@ async function createIndexes(s3Config, client, overrideWorkMemory, tableName) { } let mvName = `${tableName}_countperorgcycle`; - await client.query(` + if (hasAssessmentUnitId) { + await client.query(` CREATE MATERIALIZED VIEW IF NOT EXISTS ${mvName} AS SELECT ${groupByColumns.join( @@ -918,7 +922,8 @@ async function createIndexes(s3Config, client, overrideWorkMemory, tableName) { WITH DATA; `); - log.info(`${tableName}: Created countPerOrgCycle materialized view`); + log.info(`${tableName}: Created countPerOrgCycle materialized view`); + } mvName = `${tableName}_count`; await client.query(` @@ -1014,19 +1019,31 @@ function getProfileEtl( // Extract, transform, and load the new data try { if (isLocal) { - const profileName = `profile_${tableName}`; - let res = await extract(profileName, s3Config); - let chunksProcessed = 0; - const maxChunks = maxChunksOverride ?? process.env.MAX_CHUNKS; - while ( - res.data !== null && - (!maxChunks || chunksProcessed < maxChunks) - ) { - const query = await transform(tableName, columns, res.data); - await client.query(query); - log.info(`Next record offset for table ${tableName}: ${res.next}`); - res = await extract(profileName, s3Config, res.next); - chunksProcessed += 1; + // TODO: Remove this once we have a better way to load local data. + if (['action_documents', 'documents_text'].includes(tableName)) { + const fileLocation = process.env[`LOCAL_${tableName.toUpperCase()}`]; + if (!fileLocation) { + log.warn(`No local data found for ${tableName}`); + return; + } + await client.query(` + COPY ${tableName} FROM '${fileLocation}' DELIMITER ',' CSV HEADER; + `); + } else { + const profileName = `profile_${tableName}`; + let res = await extract(profileName, s3Config); + let chunksProcessed = 0; + const maxChunks = maxChunksOverride ?? process.env.MAX_CHUNKS; + while ( + res.data !== null && + (!maxChunks || chunksProcessed < maxChunks) + ) { + const query = await transform(tableName, columns, res.data); + await client.query(query); + log.info(`Next record offset for table ${tableName}: ${res.next}`); + res = await extract(profileName, s3Config, res.next); + chunksProcessed += 1; + } } } else { await client.query( From 03de730034deeac79d45091eae4716429bbdf48a Mon Sep 17 00:00:00 2001 From: Jon Maxwell Diebold Date: Thu, 5 Dec 2024 11:09:54 -0500 Subject: [PATCH 07/25] EQ-437 Configured auxiliary search vector table and functions --- app/server/app/middleware.js | 7 +-- etl/app/server/database.js | 113 ++++++++++++++++++++++++++++++++++- 2 files changed, 113 insertions(+), 7 deletions(-) diff --git a/app/server/app/middleware.js b/app/server/app/middleware.js index 088259ef..609fa718 100644 --- a/app/server/app/middleware.js +++ b/app/server/app/middleware.js @@ -63,12 +63,7 @@ async function getActiveSchema(req, res, next) { ); // Add activeSchema to the request object - // TODO: Revert this when documents brought in with other data. - if (req.path.includes('/actionsDocuments')) { - req.activeSchema = 'search'; - } else { - req.activeSchema = schema.schema_name; - } + req.activeSchema = schema.schema_name; next(); } catch (error) { diff --git a/etl/app/server/database.js b/etl/app/server/database.js index ca4ee4ac..671814e9 100644 --- a/etl/app/server/database.js +++ b/etl/app/server/database.js @@ -996,6 +996,112 @@ async function transform(tableName, columns, data) { return pgp.helpers.insert(rows, insertColumns, tableName); } +// Create the documents_text_search table and triggers. +async function setupTextSearch(client) { + try { + await client.query('DROP TABLE IF EXISTS documents_text_search'); + + // Create the table. + await client.query(` + CREATE TABLE documents_text_search ( + objectid SERIAL PRIMARY KEY, + documentid INTEGER, + documenttsv TSVECTOR, + CONSTRAINT fk_documentstextsearch_documentstext + FOREIGN KEY (documentid) + REFERENCES documents_text (objectid) + ON DELETE CASCADE + ) + `); + + // Create the index on the vector column. + await client.query(` + CREATE INDEX IF NOT EXISTS documentstextsearch_documenttsv + ON documents_text_search USING gin (documenttsv) + TABLESPACE pg_default + `); + + // Create the trigger function. + const weights = { + documenttext: 'A', + documentdesc: 'A', + documentname: 'B', + }; + await client.query(` + CREATE OR REPLACE FUNCTION process_document (new_row RECORD, chunk_size integer) + RETURNS VOID + AS $$ + DECLARE + tsv tsvector; + chunk text; + start_idx integer := 1; + end_idx integer; + text_length integer := octet_length(new_row.documenttext); + BEGIN + LOOP + -- Process the document in chunks + WHILE start_idx <= text_length LOOP + end_idx := LEAST (start_idx + chunk_size - 1, text_length); + chunk := substr(new_row.documenttext, start_idx, end_idx - start_idx + 1); + tsv := setweight(to_tsvector('pg_catalog.english', coalesce(chunk, '')), '${weights.documenttext}'); + INSERT INTO documents_text_search (documentid, documenttsv) + VALUES (new_row.objectid, tsv); + start_idx := end_idx + 1; + END LOOP; + -- Add document description and name as a single row + tsv := setweight(to_tsvector('pg_catalog.english', coalesce(new_row.documentdesc, '')), '${weights.documentdesc}') || + setweight(to_tsvector('pg_catalog.english', coalesce(new_row.documentname, '')), '${weights.documentname}'); + INSERT INTO documents_text_search (documentid, documenttsv) + VALUES (new_row.objectid, tsv); + RETURN; + END LOOP; + END + $$ + LANGUAGE plpgsql; + `); + await client.query(` + CREATE OR REPLACE FUNCTION documentstext_trigger_fn () + RETURNS TRIGGER + AS $$ + DECLARE + -- Initial chunk size of 1 MB. + chunk_size integer := 1024 * 1024; + BEGIN + LOOP + BEGIN + -- Call the external function to process the document + PERFORM + process_document (NEW, chunk_size); + RETURN NEW; + -- Exit on success + EXCEPTION + WHEN OTHERS THEN + -- Reduce the chunk size and retry + chunk_size := chunk_size / 2; + -- If chunk size is too small, raise an error + IF chunk_size < 1024 THEN + RAISE EXCEPTION 'Chunk size too small, unable to process document: %', NEW.objectid; + END IF; + END; + END LOOP; + END + $$ + LANGUAGE plpgsql; + `); + + // Create the trigger. + await client.query(` + CREATE TRIGGER documentstext_trigger + AFTER INSERT ON documents_text + FOR EACH ROW + EXECUTE FUNCTION documentstext_trigger_fn (); + `); + } catch (err) { + log.warn('Failed to create documents_text_search table'); + throw err; + } +} + // Get the ETL task for a particular profile function getProfileEtl( { createQuery, columns, maxChunksOverride, overrideWorkMemory, tableName }, @@ -1007,7 +1113,7 @@ function getProfileEtl( try { await client.query(`SET search_path TO ${schemaName}`); - await client.query(`DROP TABLE IF EXISTS ${tableName}`); + await client.query(`DROP TABLE IF EXISTS ${tableName} CASCADE`); await client.query(createQuery); log.info(`Table ${tableName} created`); @@ -1016,6 +1122,11 @@ function getProfileEtl( throw err; } + // Create the search vector table and triggers for `documents_text`. + if (tableName === 'documents_text') { + await setupTextSearch(client); + } + // Extract, transform, and load the new data try { if (isLocal) { From 6c273e0567f458d7a93dd2f1146e5bf6ee1bf1ea Mon Sep 17 00:00:00 2001 From: Jon Maxwell Diebold Date: Wed, 11 Dec 2024 21:32:25 -0500 Subject: [PATCH 08/25] EQ-437 Updated server code to handle new profiles --- app/server/app/routes/attains.js | 197 +++++++++++++++-------- etl/app/content-private/tableConfig.json | 118 ++++++++++++++ etl/app/server/database.js | 57 +++++-- 3 files changed, 294 insertions(+), 78 deletions(-) diff --git a/app/server/app/routes/attains.js b/app/server/app/routes/attains.js index 98800209..b39a0164 100644 --- a/app/server/app/routes/attains.js +++ b/app/server/app/routes/attains.js @@ -63,7 +63,7 @@ class NoParametersException extends Error { /** * Searches for a materialized view, associated with the profile, that is applicable to the provided columns/filters. * @param {Object} profile definition of the profile being queried - * @param {Array} columns definitions of columns to return, where the first is the primary column + * @param {Array} columns definitions of columns to return * @param {Array} columnsForFilter names of columns that can be used to filter * @returns definition of a materialized view that is applicable to the desired columns/filters or null if none are suitable */ @@ -78,6 +78,43 @@ function findMaterializedView(profile, columns, columnsForFilter) { }); } +/** + * Searches for a view, associated with the profile, that is applicable to the provided columns. + * @param {Object} profile definition of the profile being queried + * @param {Array} columns aliases of columns to return + * @returns definition of a view that is applicable to the desired columns, or null if none are suitable + */ +function findView(profile, columns) { + if (!profile.views) return; + + const expandedViews = profile.views?.map((view) => ({ + ...view, + columns: view.columns.map((vCol) => { + const pCol = ( + vCol.table + ? Object.values(privateConfig.tableConfig).find( + (p) => p.tableName === vCol.table, + ) + : profile.columns + )?.columns.find((c) => c.name === vCol.name); + if (!pCol) { + throw new Error( + `The view column ${vCol.name} does not exist on the specified profile`, + ); + } + + return pCol; + }), + })); + + return expandedViews.find((view) => { + for (const col of columns) { + if (!view.columns.find((vCol) => vCol.alias === col)) return; + } + return view; + }); +} + /** * Finds full column definitions for the provided array of column aliases * @param {Array} columnAliases array of column aliases to get full column definitions for @@ -125,7 +162,7 @@ function createLatestSubquery(req, profile, params, columnName, columnType) { const columnsForFilter = []; const columnNamesForFilter = []; columns.forEach((col) => { - if (col.output !== false && params.filters.hasOwnProperty(col.alias)) { + if (params.filters.hasOwnProperty(col.alias)) { columnsForFilter.push(col); columnNamesForFilter.push(col.name); } @@ -334,30 +371,16 @@ function parseCriteria(req, query, profile, queryParams, countOnly = false) { if (!columns.includes('objectId')) columns.push('objectId'); columns.forEach((col) => { const profileCol = profile.columns.find((pc) => pc.alias === col); - if (profileCol && profileCol.output !== false) - columnsToReturn.push(profileCol); + if (profileCol) columnsToReturn.push(profileCol); }); // build the select query const selectColumns = - columnsToReturn.length > 0 - ? columnsToReturn - : profile.columns.filter((col) => col.output !== false); + columnsToReturn.length > 0 ? columnsToReturn : profile.columns; const selectText = selectColumns.map((col) => col.name === col.alias ? col.name : `${col.name} AS "${col.alias}"`, ); - if (profile.tableName === 'actions_documents') { - const rankText = 'ts_rank_cd(doc_tsv, query, 1 | 32)'; - query - .select( - knex.raw( - `to_char(${rankText} * 100, 'FM999') AS "rankPercent", ${selectText}`, - ), - ) - .orderBy(knex.raw(rankText), 'desc'); - } else { - query.select(selectText).orderBy('objectid', 'asc'); - } + query.select(selectText).orderBy('objectid', 'asc'); } // build where clause of the query @@ -368,15 +391,7 @@ function parseCriteria(req, query, profile, queryParams, countOnly = false) { if (lowArg || highArg) { appendRangeToWhere(query, col, lowArg, highArg); } else if (exactArg !== undefined) { - if (profile.tableName === 'actions_documents' && col.alias === 'docTxt') { - query.fromRaw( - `${req.activeSchema}.${profile.tableName}, websearch_to_tsquery(?) query`, - [exactArg], - ); - query.whereRaw('query @@ doc_tsv'); - } else { - appendToWhere(query, col.name, exactArg); - } + appendToWhere(query, col.name, exactArg); } }); @@ -386,42 +401,78 @@ function parseCriteria(req, query, profile, queryParams, countOnly = false) { } } -/** - * Runs a query against the provided profile name and streams the a portion - * of the result to the client as inline json. - * @param {Object} profile definition of the profile being queried - * @param {express.Request} req - * @param {express.Response} res - */ -async function executeQueryPreview(profile, req, res) { - const metadataObj = populateMetdataObjFromRequest(req); - - try { - const queryParams = getQueryParams(req); - validateQueryParams(queryParams, profile); - - const query = knex - .withSchema(req.activeSchema) - .from(profile.tableName) - .limit(previewSize); - - parseCriteria(req, query, profile, queryParams); - - const queryRes = await queryPool(query); +function parseDocumentSearchCriteria( + query, + profile, + queryParams, + countOnly = false, +) { + const columnsForFilter = Object.keys(queryParams.filters); + const columnsToReturn = queryParams.columns ?? []; + const view = findView(profile, columnsForFilter.concat(columnsToReturn)); + if (view) query.from(view.name); + const target = view ?? profile; + // NOTE:XXX: This will need to change if we ever have multiple tsvector columns in a single table. + const documentQueryColumn = target.columns.find( + (col) => col.type === 'tsvector', + ); + const isDocumentSearch = + documentQueryColumn && columnsForFilter.includes(documentQueryColumn.alias); + if (!isDocumentSearch && !columnsToReturn.includes('objectId')) { + columnsToReturn.push('objectId'); + } + const columnsToReturnDefs = target.columns.filter( + (col) => col.output !== false && columnsToReturn.includes(col.alias), + ); - return res.status(200).json(queryRes); - } catch (error) { - log.error( - formatLogMsg( - metadataObj, - `Failed to get data from the "${profile.tableName}" table:`, - error, - ), + // Build the select query, filtering down to requested columns, if the user provided that option. + const selectText = ( + columnsToReturn.length > 0 ? columnsToReturnDefs : target.columns + ).map((col) => + col.name === col.alias ? col.name : `${col.name} AS "${col.alias}"`, + ); + if (isDocumentSearch) { + const rankQuery = knex.raw( + `ts_rank_cd(${documentQueryColumn.name}, websearch_to_tsquery(?), 1 | 32) AS rank`, + [queryParams.filters[documentQueryColumn.alias]], ); - return res - .status(error.httpStatusCode ?? 500) - .json({ error: error.toString() }); + query + .with('ranked', (qb) => { + return qb + .select(selectText.concat(rankQuery)) + .from(target.tableName) + .whereRaw(`${documentQueryColumn.name} @@ websearch_to_tsquery(?)`, [ + queryParams.filters[documentQueryColumn.alias], + ]); + }) + .from('ranked') + .groupBy('actionid', 'documentkey'); // TODO: Remove hard-coded columns + if (!countOnly) { + query + .select( + selectText.concat( + knex.raw('ROUND(SUM(rank) * 100, 1) AS "rankPercent"'), + ), + ) + .orderBy('rankPercent', 'desc'); + } + } else if (!countOnly) { + query.select(selectText).orderBy('objectid', 'asc'); } + + // build where clause of the query + profile.columns.forEach((col) => { + if (col.type === 'tsvector') return; + + const lowArg = 'lowParam' in col && queryParams.filters[col.lowParam]; + const highArg = 'highParam' in col && queryParams.filters[col.highParam]; + const exactArg = queryParams.filters[col.alias]; + if (lowArg || highArg) { + appendRangeToWhere(query, col, lowArg, highArg); + } else if (exactArg !== undefined) { + appendToWhere(query, col.name, exactArg); + } + }); } /** @@ -454,7 +505,11 @@ async function executeQuery(profile, req, res) { throw new NoParametersException('Please provide at least one parameter'); } - parseCriteria(req, query, profile, queryParams); + if (profile.tableName === 'action_documents') { + parseDocumentSearchCriteria(query, profile, queryParams); + } else { + parseCriteria(req, query, profile, queryParams); + } // Check that the query doesn't exceed the MAX_QUERY_SIZE. if (await exceedsMaxSize(query)) { @@ -520,7 +575,6 @@ function validateQueryParams(queryParams, profile) { if (c.lowParam === name || c.highParam === name || c.alias === name) return c; }); - if (!column) throw new InvalidParameterException(name, 'profile'); if (Array.isArray(value)) { if ( column.lowParam === name || @@ -529,6 +583,7 @@ function validateQueryParams(queryParams, profile) { ) throw new DuplicateParameterException(name); } + if (!column) throw new InvalidParameterException(name, 'profile'); }); } @@ -569,7 +624,11 @@ async function executeQueryCountOnly(profile, req, res) { const queryParams = getQueryParams(req); // query against the ..._count mv when no filters are applied, for better performance - if (Object.keys(queryParams.filters).length === 0) { + // TODO: Remove hard-coded table name. + if ( + Object.keys(queryParams.filters).length === 0 && + profile.tableName !== 'action_documents' + ) { const query = knex .withSchema(req.activeSchema) .from(`${profile.tableName}_count`); @@ -579,7 +638,11 @@ async function executeQueryCountOnly(profile, req, res) { validateQueryParams(queryParams, profile); - parseCriteria(req, query, profile, queryParams, true); + if (profile.tableName === 'action_documents') { + parseDocumentSearchCriteria(query, profile, queryParams, true); + } else { + parseCriteria(req, query, profile, queryParams, true); + } const count = (await queryPool(query.count(), true)).count; return res.status(200).json({ count, maxCount: maxQuerySize }); @@ -977,6 +1040,8 @@ export default function (app, basePath) { Object.entries(privateConfig.tableConfig).forEach( ([profileName, profile]) => { + if (profile.hidden) return; + // get column domain values router.post(`/${profileName}/values/:column`, async function (req, res) { await executeValuesQuery(profile, req, res); @@ -998,7 +1063,7 @@ export default function (app, basePath) { await executeQueryCountOnly(profile, req, res); }); - if (profileName !== 'actionsDocuments') { + if (profile.includeCycleCount) { // get bean counts router.get( `/${profileName}/countPerOrgCycle`, diff --git a/etl/app/content-private/tableConfig.json b/etl/app/content-private/tableConfig.json index 26afe897..9562dadc 100644 --- a/etl/app/content-private/tableConfig.json +++ b/etl/app/content-private/tableConfig.json @@ -1,8 +1,10 @@ { "actions": { + "source": "attains", "tableName": "actions", "idColumn": "objectid", "createQuery": "CREATE TABLE IF NOT EXISTS actions ( objectid INTEGER PRIMARY KEY, state VARCHAR(4000), region VARCHAR(2), organizationid VARCHAR(30) NOT NULL, organizationname VARCHAR(150) NOT NULL, organizationtype VARCHAR(30) NOT NULL, assessmentunitid VARCHAR(50), assessmentunitname VARCHAR(255), actionid VARCHAR(45) NOT NULL, actionname VARCHAR(255) NOT NULL, completiondate DATE, fiscalyearestablished VARCHAR (4), parameter VARCHAR(240), parametergroup VARCHAR(60), locationdescription VARCHAR(2000), actiontype VARCHAR(50) NOT NULL, watertype VARCHAR(40), watersize NUMERIC(18,4), watersizeunits VARCHAR(15), actionagency VARCHAR(10) NOT NULL, inindiancountry VARCHAR(1), includeinmeasure VARCHAR(1), plansummarylink VARCHAR(116) )", + "includeCycleCount": true, "columns": [ { "name": "objectid", @@ -196,6 +198,7 @@ ] }, "actionDocuments": { + "source": "attains", "tableName": "action_documents", "idColumn": "objectid", "createQuery": "CREATE TABLE IF NOT EXISTS action_documents ( objectid INTEGER PRIMARY KEY, documentkey INTEGER, actionid VARCHAR(45), actiontypename VARCHAR(50), organizationid VARCHAR(30), regionid VARCHAR(2), state VARCHAR(4000), actionname VARCHAR(255), completiondate DATE, tmdldate DATE, actiondocumenturl TEXT )", @@ -291,12 +294,90 @@ } ] } + ], + "views": [ + { + "name": "action_documents_view", + "columns": [ + { + "name": "objectid", + "table": "action_documents" + }, + { + "name": "actiondocumenturl" + }, + { + "name": "actionid" + }, + { + "name": "actionname" + }, + { + "name": "actiontypename" + }, + { + "name": "completiondate" + }, + { + "name": "organizationid" + }, + { + "name": "regionid" + }, + { + "name": "state" + }, + { + "name": "tmdldate" + }, + { + "name": "documentdesc" + }, + { + "name": "documentfilename" + }, + { + "name": "documentfiletypename" + }, + { + "name": "documentkey", + "table": "documents_text" + }, + { + "name": "documentname" + }, + { + "name": "documenttsv" + }, + { + "name": "documenttypename" + } + ], + "joins": [ + { + "table": "documents_text", + "joinKey": [ + "documents_text.documentkey", + "action_documents.documentkey" + ] + }, + { + "table": "documents_text_search", + "joinKey": [ + "documents_text_search.documentid", + "documents_text.objectid" + ] + } + ] + } ] }, "assessments": { + "source": "attains", "tableName": "assessments", "idColumn": "objectid", "createQuery": "CREATE TABLE IF NOT EXISTS assessments ( objectid INTEGER PRIMARY KEY, state VARCHAR(4000), region VARCHAR(2), organizationid VARCHAR(30) NOT NULL, organizationname VARCHAR(150) NOT NULL, organizationtype VARCHAR(30) NOT NULL, reportingcycle NUMERIC(4,0) NOT NULL, cycleid NUMERIC(38,0) NOT NULL, assessmentunitid VARCHAR(50), assessmentunitname VARCHAR(255), cyclelastassessed NUMERIC(4,0) NOT NULL, overallstatus VARCHAR(4000), epaircategory VARCHAR(5), stateircategory VARCHAR(5), parametergroup VARCHAR(60), parametername VARCHAR(240), parameterstatus VARCHAR(240), usegroup VARCHAR(500), usename VARCHAR(255), useircategory VARCHAR(5), usestateircategory VARCHAR(5), usesupport VARCHAR(1), parameterattainment VARCHAR(50), parameterircategory VARCHAR(5), parameterstateircategory VARCHAR(5), cyclefirstlisted NUMERIC(4,0), associatedactionid VARCHAR(45), associatedactionname VARCHAR(255), associatedactiontype VARCHAR(50), locationdescription VARCHAR(2000), watertype VARCHAR(40), watersize NUMERIC(18,4), watersizeunits VARCHAR(15), sizesource VARCHAR(100), sourcescale VARCHAR(30), assessmentunitstatus VARCHAR(1), useclassname VARCHAR(50), assessmentdate DATE, assessmentbasis VARCHAR(30), monitoringstartdate DATE, monitoringenddate DATE, assessmentmethods VARCHAR(150), assessmenttypes VARCHAR(30), delisted VARCHAR(1), delistedreason VARCHAR(100), seasonstartdate DATE, seasonenddate DATE, pollutantindicator VARCHAR(1), cyclescheduledfortmdl NUMERIC(4,0), cycleexpectedtoattain NUMERIC(4,0), cwa303dpriorityranking VARCHAR(25), vision303dpriority VARCHAR(1), alternatelistingidentifier VARCHAR(50), consentdecreecycle NUMERIC(4,0), associatedactionstatus VARCHAR(30), associatedactionagency VARCHAR(10) )", + "includeCycleCount": true, "overrideWorkMemory": "1GB", "columns": [ { @@ -771,9 +852,11 @@ ] }, "assessmentUnits": { + "source": "attains", "tableName": "assessment_units", "idColumn": "objectid", "createQuery": "CREATE TABLE IF NOT EXISTS assessment_units ( objectid INTEGER PRIMARY KEY, state VARCHAR(4000), region VARCHAR(2), organizationid VARCHAR(30) NOT NULL, organizationname VARCHAR(150) NOT NULL, organizationtype VARCHAR(30) NOT NULL, reportingcycle NUMERIC(4,0) NOT NULL, cycleid NUMERIC(38,0) NOT NULL, assessmentunitid VARCHAR(50) NOT NULL, assessmentunitname VARCHAR(255) NOT NULL, locationdescription VARCHAR(2000) NOT NULL, watertype VARCHAR(40) NOT NULL, watersize NUMERIC(18,4) NOT NULL, watersizeunits VARCHAR(15) NOT NULL, assessmentunitstatus VARCHAR(1) NOT NULL, useclassname VARCHAR(50), sizesource VARCHAR(100), sourcescale VARCHAR(30), locationtypecode VARCHAR(22), locationtext VARCHAR(100) )", + "includeCycleCount": true, "columns": [ { "name": "objectid", @@ -938,9 +1021,11 @@ ] }, "assessmentUnitsMonitoringLocations": { + "source": "attains", "tableName": "assessment_units_monitoring_locations", "idColumn": "objectid", "createQuery": "CREATE TABLE IF NOT EXISTS assessment_units_monitoring_locations ( objectid INTEGER PRIMARY KEY, state VARCHAR(4000), region VARCHAR(2), organizationid VARCHAR(30) NOT NULL, organizationname VARCHAR(150) NOT NULL, organizationtype VARCHAR(30) NOT NULL, reportingcycle NUMERIC(4,0) NOT NULL, cycleid NUMERIC(38,0) NOT NULL, assessmentunitid VARCHAR(50) NOT NULL, assessmentunitname VARCHAR(255) NOT NULL, locationdescription VARCHAR(2000) NOT NULL, watertype VARCHAR(40) NOT NULL, watersize NUMERIC(18,4) NOT NULL, watersizeunits VARCHAR(15) NOT NULL, monitoringlocationorgid VARCHAR(30), monitoringlocationid VARCHAR(35), monitoringlocationdatalink VARCHAR(255), assessmentunitstatus VARCHAR(1) NOT NULL, useclassname VARCHAR(50), sizesource VARCHAR(100), sourcescale VARCHAR(30) )", + "includeCycleCount": true, "columns": [ { "name": "objectid", @@ -1111,9 +1196,11 @@ ] }, "catchmentCorrespondence": { + "source": "attains", "tableName": "catchment_correspondence", "idColumn": "objectid", "createQuery": "CREATE TABLE IF NOT EXISTS catchment_correspondence ( objectid INTEGER PRIMARY KEY, state VARCHAR(4000), region VARCHAR(2), organizationid VARCHAR(30) NOT NULL, organizationname VARCHAR(150) NOT NULL, organizationtype VARCHAR(30) NOT NULL, reportingcycle NUMERIC(4,0) NOT NULL, cycleid NUMERIC(38,0) NOT NULL, assessmentunitid VARCHAR(50) NOT NULL, assessmentunitname VARCHAR(255) NOT NULL, catchmentnhdplusid NUMERIC(38,0) )", + "includeCycleCount": true, "overrideWorkMemory": "790MB", "columns": [ { @@ -1237,9 +1324,11 @@ ] }, "documentsText": { + "source": "attains", "tableName": "documents_text", "idColumn": "objectid", "createQuery": "CREATE TABLE IF NOT EXISTS documents_text ( objectid INTEGER PRIMARY KEY, documentkey INTEGER, documentname TEXT, documentdesc TEXT, documentfilename TEXT, documentfiletypename TEXT, documenttypename TEXT, documenttext TEXT )", + "hidden": true, "columns": [ { "name": "objectid", @@ -1280,10 +1369,37 @@ "materializedViewColumns": [], "materializedViews": [] }, + "documentsTextSearch": { + "tableName": "documents_text_search", + "idColumn": "objectid", + "createQuery": "CREATE TABLE IF NOT EXISTS documents_text ( objectid SERIAL PRIMARY KEY, documentid INTEGER, documenttsv TSVECTOR )", + "hidden": true, + "columns": [ + { + "name": "objectid", + "alias": "objectId", + "skipIndex": true + }, + { + "name": "documentid", + "alias": "documentId" + }, + { + "name": "documenttsv", + "alias": "documentQuery", + "type": "tsvector", + "output": false + } + ], + "materializedViewColumns": [], + "materializedViews": [] + }, "sources": { + "source": "attains", "tableName": "sources", "idColumn": "objectid", "createQuery": "CREATE TABLE IF NOT EXISTS sources ( objectid INTEGER PRIMARY KEY, state VARCHAR(4000), region VARCHAR(2), organizationid VARCHAR(30) NOT NULL, organizationname VARCHAR(150) NOT NULL, organizationtype VARCHAR(30) NOT NULL, reportingcycle NUMERIC(4,0) NOT NULL, cycleid NUMERIC(38,0) NOT NULL, assessmentunitid VARCHAR(50) NOT NULL, assessmentunitname VARCHAR(255) NOT NULL, overallstatus VARCHAR(4000), epaircategory VARCHAR(5), stateircategory VARCHAR(5), sourcename VARCHAR(240) NOT NULL, confirmed VARCHAR(1) NOT NULL, parametergroup VARCHAR(60) NOT NULL, causename VARCHAR(240) NOT NULL, locationdescription VARCHAR(2000) NOT NULL, watertype VARCHAR(40) NOT NULL, watersize NUMERIC(18,4) NOT NULL, watersizeunits VARCHAR(15) NOT NULL )", + "includeCycleCount": true, "columns": [ { "name": "objectid", @@ -1464,9 +1580,11 @@ ] }, "tmdl": { + "source": "attains", "tableName": "tmdl", "idColumn": "objectid", "createQuery": "CREATE TABLE IF NOT EXISTS tmdl ( objectid INTEGER PRIMARY KEY, state VARCHAR(4000), region VARCHAR(2), organizationid VARCHAR(30) NOT NULL, organizationname VARCHAR(150) NOT NULL, organizationtype VARCHAR(30) NOT NULL, assessmentunitid VARCHAR(50), assessmentunitname VARCHAR(255), actionid VARCHAR(45) NOT NULL, actionname VARCHAR(255) NOT NULL, completiondate DATE, tmdldate DATE, fiscalyearestablished VARCHAR(4), pollutant VARCHAR(240), pollutantgroup VARCHAR(60), sourcetype VARCHAR(40), addressedparameter VARCHAR(240), addressedparametergroup VARCHAR(60), locationdescription VARCHAR(2000), watertype VARCHAR(40), watersize NUMERIC(18,4), watersizeunits VARCHAR(15), actionagency VARCHAR(10) NOT NULL, loadallocation NUMERIC(21,3), loadallocationunits VARCHAR(40), explicitmarginofsafety VARCHAR(255), implicitmarginofsafety VARCHAR(255), tmdlendpoint1 TEXT, tmdlendpoint2 TEXT, tmdlendpoint3 TEXT, npdesidentifier VARCHAR(60), otheridentifier VARCHAR(4000), wasteloadallocation NUMERIC(24,3), inindiancountry VARCHAR(1), includeinmeasure VARCHAR(1), plansummarylink VARCHAR(116), tmdlendpoint TEXT GENERATED ALWAYS AS (coalesce(tmdlendpoint1, '') || coalesce(tmdlendpoint2, '') || coalesce(tmdlendpoint3, '')) STORED )", + "includeCycleCount": true, "overrideWorkMemory": "790MB", "columns": [ { diff --git a/etl/app/server/database.js b/etl/app/server/database.js index 671814e9..f5644516 100644 --- a/etl/app/server/database.js +++ b/etl/app/server/database.js @@ -587,6 +587,36 @@ async function loadUtilityTables(pool, s3Config, schemaName) { log.info('Utility tables finished updating'); } +async function createProfileViews(pool, schemaName, profile) { + if (!profile.views) return; + + const client = await getClient(pool); + try { + await client.query(`SET search_path TO ${schemaName}`); + let count = 0; + for (const view of profile.views) { + const joinClause = (join) => + `JOIN ${join.table} ON ${join.joinKey[0]} = ${join.joinKey[1]}`; + await client.query(` + CREATE OR REPLACE VIEW ${view.name} + AS + SELECT ${view.columns + .map((col) => (col.table ? `${col.table}.${col.name}` : col.name)) + .join(', ')} + FROM ${profile.tableName} ${ + view.joins ? view.joins.map(joinClause).join(' ') : '' + } + `); + count++; + log.info( + `${profile.tableName}: Created materialized view (${count} of ${profile.views.length}): ${view.name}`, + ); + } + } finally { + client.release(); + } +} + export async function runLoad(pool, s3Config, s3Julian, logId) { log.info('Running ETL process!'); @@ -602,11 +632,20 @@ export async function runLoad(pool, s3Config, s3Julian, logId) { await loadUtilityTables(pool, s3Config, schemaName); // Add tables to schema and import new data - const loadTasks = Object.values(s3Config.tableConfig).map((profile) => { - return loadProfile(profile, pool, schemaName, s3Config, s3Julian); - }); + const loadTasks = Object.values(s3Config.tableConfig) + .filter((profile) => profile.source?.toLowerCase === 'attains') + .map((profile) => { + return loadProfile(profile, pool, schemaName, s3Config, s3Julian); + }); await Promise.all(loadTasks); + // Create views + await Promise.all( + Object.values(s3Config.tableConfig).map((profile) => { + return createProfileViews(pool, schemaName, profile); + }), + ); + const profileStats = await getProfileStats(pool, schemaName, s3Julian); // Verify the etl was successfull and the data matches what we expect. @@ -882,9 +921,6 @@ async function createIndexes(s3Config, client, overrideWorkMemory, tableName) { (c) => c.name === 'reportingcycle', ); const hasCycleId = table.columns.find((c) => c.name === 'cycleid'); - const hasAssessmentUnitId = table.columns.find( - (c) => c.name === 'assessmentunitid', - ); const orderByArray = []; if (hasOrgId) { @@ -901,7 +937,7 @@ async function createIndexes(s3Config, client, overrideWorkMemory, tableName) { } let mvName = `${tableName}_countperorgcycle`; - if (hasAssessmentUnitId) { + if (table.includeCycleCount) { await client.query(` CREATE MATERIALIZED VIEW IF NOT EXISTS ${mvName} AS @@ -996,6 +1032,7 @@ async function transform(tableName, columns, data) { return pgp.helpers.insert(rows, insertColumns, tableName); } +// TODO: Make this a bit more configurable. // Create the documents_text_search table and triggers. async function setupTextSearch(client) { try { @@ -1006,11 +1043,7 @@ async function setupTextSearch(client) { CREATE TABLE documents_text_search ( objectid SERIAL PRIMARY KEY, documentid INTEGER, - documenttsv TSVECTOR, - CONSTRAINT fk_documentstextsearch_documentstext - FOREIGN KEY (documentid) - REFERENCES documents_text (objectid) - ON DELETE CASCADE + documenttsv TSVECTOR ) `); From 1c463478ab21adf33074c74d76c13f152cdd5bde Mon Sep 17 00:00:00 2001 From: Jon Maxwell Diebold Date: Thu, 12 Dec 2024 16:11:26 -0500 Subject: [PATCH 09/25] EQ-437 Updated UI fields to accommodate new profiles --- app/client/src/components/previewModal.tsx | 46 ++++---- app/client/src/components/table.tsx | 8 +- app/client/src/routes/home.tsx | 4 +- app/server/app/content/config/fields.json | 11 +- .../app/content/config/listOptions.json | 2 +- app/server/app/content/config/profiles.json | 28 ++--- app/server/app/routes/attains.js | 100 ++++++++++-------- app/server/app/utilities/streamingService.js | 4 +- etl/app/content-private/tableConfig.json | 21 ++-- 9 files changed, 128 insertions(+), 96 deletions(-) diff --git a/app/client/src/components/previewModal.tsx b/app/client/src/components/previewModal.tsx index 763052c1..c38867ed 100644 --- a/app/client/src/components/previewModal.tsx +++ b/app/client/src/components/previewModal.tsx @@ -1,3 +1,4 @@ +// TODO: Move all table fields to configuration. import { Dialog } from '@reach/dialog'; import Close from 'images/close.svg?react'; import { uniqueId } from 'lodash'; @@ -30,12 +31,12 @@ export function PreviewModal({ const [id] = useState(uniqueId('modal-')); // Data to be displayed in the preview table. - const [preview, setPreview] = useState< - FetchState> - >({ - data: null, - status: 'idle', - }); + const [preview, setPreview] = useState>>( + { + data: null, + status: 'idle', + }, + ); useEffect(() => { setPreview({ data: null, status: 'pending' }); @@ -53,22 +54,22 @@ export function PreviewModal({ signal: getSignal(), }) .then((res) => { - const data = res.data.map((row: ActionsDocumentsRow) => ({ + const data = res.data.map((row: ActionDocumentsRow) => ({ rankPercent: row.rankPercent, - docUrl: { - sortValue: row.docFilename, + actionDocumentUrl: { + sortValue: row.documentFileName, value: ( - {row.docFilename} + {row.documentFileName} ), }, actionId: row.actionId, - region: row.region, + regionId: row.regionId, state: row.state, organizationId: row.organizationId, })); @@ -84,9 +85,9 @@ export function PreviewModal({ const columns = useMemo( () => [ { id: 'rankPercent', name: 'Rank (%)', sortable: true }, - { id: 'docUrl', name: 'Document URL', sortable: true }, + { id: 'actionDocumentUrl', name: 'Document URL', sortable: true }, { id: 'actionId', name: 'Action ID', sortable: false }, - { id: 'region', name: 'Region', sortable: false }, + { id: 'regionId', name: 'Region', sortable: false }, { id: 'state', name: 'State', sortable: false }, { id: 'organizationId', name: 'Organization ID', sortable: false }, ], @@ -161,15 +162,22 @@ export function PreviewModal({ ## Types */ -type ActionsDocumentsRow = { +type ActionDocumentsRow = { + actionDocumentUrl: string; actionId: string; - docFilename: string; - docUrl: string; - objectId: string; + actionName: string; + actionTypeName: string; + completionDate: string; + documentFileName: string; + documentFileTypeName: string; + documentKey: number; + documentName: string; + documentTypeName: string; organizationId: string; rankPercent: number; - region: string; + regionId: string; state: string; + tmdlDate: string; }; type PreviewModalProps = { diff --git a/app/client/src/components/table.tsx b/app/client/src/components/table.tsx index 92655e63..8149100a 100644 --- a/app/client/src/components/table.tsx +++ b/app/client/src/components/table.tsx @@ -25,9 +25,6 @@ export const Table = ({ className, tabIndex = -1, }: TableProps): React.ReactElement => { - const [sortDir, setSortDir] = useState(initialSortDir); - const [sortIndex, setSortIndex] = useState(initialSortIndex); - // Swap sort direction. const getSortDirection = (prevSortDir: 'ascending' | 'descending') => { if (prevSortDir === 'descending') { @@ -37,6 +34,11 @@ export const Table = ({ } }; + const [sortDir, setSortDir] = useState<'ascending' | 'descending'>( + getSortDirection(initialSortDir), // FIXME: This is a bug (possible race condition with `epa.js`), it should be `initialSortDir` + ); + const [sortIndex, setSortIndex] = useState(initialSortIndex); + // If a header of a sortable column is clicked, sort the column or change the sort direction. const handleHeaderClick = (index: number) => { const column = columns[index]; diff --git a/app/client/src/routes/home.tsx b/app/client/src/routes/home.tsx index 8a5ae595..96d4cc06 100644 --- a/app/client/src/routes/home.tsx +++ b/app/client/src/routes/home.tsx @@ -388,7 +388,7 @@ export function QueryBuilder() { staticOptions={staticOptions} /> - {/*profile.key === 'actionsDocuments' && ( + {/*profile.key === 'actionDocuments' && ( <>
{preview.status === 'pending' ? ( @@ -463,7 +463,7 @@ export function QueryBuilder() { )*/} - {profile.key === 'actionsDocuments' && ( + {profile.key === 'actionDocuments' && ( <>
- ) : ( - - )} -
- - {preview.status === 'success' && ( - <> - {preview.data.length === 0 ? ( - No results found - ) : ( -
- - - - - - - - - - - - - - {preview.data.map((row) => ( - - - - - - - - - - ))} - -
Rank (%)Document URLAction IDRegionStateOrganization IDHMW Plan Summary URL
{row.rankPercent} - - {row.docFilename} - - {row.actionId}{row.region}{row.state}{row.organizationId}
-
- )} - - )} - - )*/} - {profile.key === 'actionDocuments' && ( <>
@@ -616,200 +508,205 @@ function FilterFieldInputs({ staticOptions, }: FilterFieldInputsProps) { // Store each field's element in a tuple with its key - const fieldsJsx: Array<[JSX.Element, string, string]> = removeNulls( - fields.map((fieldConfig) => { - const sourceFieldConfig = - 'source' in fieldConfig && - (fieldConfig.source as string) in sourceFields - ? sourceFields[fieldConfig.source as string] - : null; - - const tooltip = - fieldConfig.label in glossary - ? glossary[fieldConfig.label].definition - : null; - - switch (fieldConfig.type) { - case 'multiselect': - case 'select': - if ( - !sourceFieldConfig && - fieldConfig.type === 'multiselect' && - fieldConfig.key in staticOptions && - staticOptions[fieldConfig.key].length <= 5 - ) { + const fieldsJsx: Array<[JSX.Element, string, number | undefined]> = + removeNulls( + fields.map((fieldConfig) => { + const sourceFieldConfig = + 'source' in fieldConfig && + (fieldConfig.source as string) in sourceFields + ? sourceFields[fieldConfig.source as string] + : null; + + const tooltip = + fieldConfig.label in glossary + ? glossary[fieldConfig.label].definition + : null; + + switch (fieldConfig.type) { + case 'multiselect': + case 'select': + if ( + !sourceFieldConfig && + fieldConfig.type === 'multiselect' && + fieldConfig.key in staticOptions && + staticOptions[fieldConfig.key].length <= 5 + ) { + return [ + , + fieldConfig.key, + fieldConfig.width, + ]; + } + + const sourceKey = sourceFieldConfig?.key ?? null; + const sourceValue = sourceFieldConfig + ? sourceState[sourceFieldConfig.id] + : null; + const selectProps = { + apiKey, + apiUrl, + contextFilters: getContextFilters( + fieldConfig, + Object.values(filterFields).concat(Object.values(sourceFields)), + profile, + { + ...queryParams.filters, + ...(sourceKey && sourceValue + ? { [sourceKey]: sourceValue.value } + : {}), + }, + ), + defaultOption: + 'default' in fieldConfig ? fieldConfig.default : null, + filterHandler: filterHandlers[fieldConfig.key], + filterKey: fieldConfig.key, + filterLabel: fieldConfig.label, + filterValue: filterState[fieldConfig.key], + isMulti: isMultiOptionField(fieldConfig), + profile, + secondaryFilterKey: + 'secondaryKey' in fieldConfig ? fieldConfig.secondaryKey : null, + sortDirection: + 'direction' in fieldConfig + ? (fieldConfig.direction as SortDirection) + : 'asc', + sourceKey, + sourceValue, + staticOptions, + } as SelectFilterProps; + + return [ +
+ + + {tooltip && ( + + )} + +
+ {sourceFieldConfig ? ( + + ) : ( + + )} +
+
, + fieldConfig.key, + fieldConfig.width, + ]; + case 'date': + case 'year': + // Prevents range fields from rendering twice + if (fieldConfig.boundary === 'high') return null; + + const pairedField = fields.find( + (otherField) => + otherField.key !== fieldConfig.key && + otherField.type === fieldConfig.type && + otherField.domain === fieldConfig.domain, + ); + // All range inputs should have a high and a low boundary field + if (!pairedField || !isSingleValueRangeField(pairedField)) + return null; + return [ - , - fieldConfig.key, - fieldConfig.type, + fieldConfig.domain, + fieldConfig.width, ]; - } - - const sourceKey = sourceFieldConfig?.key ?? null; - const sourceValue = sourceFieldConfig - ? sourceState[sourceFieldConfig.id] - : null; - const selectProps = { - apiKey, - apiUrl, - contextFilters: getContextFilters( - fieldConfig, - Object.values(filterFields).concat(Object.values(sourceFields)), - profile, - { - ...queryParams.filters, - ...(sourceKey && sourceValue - ? { [sourceKey]: sourceValue.value } - : {}), - }, - ), - defaultOption: - 'default' in fieldConfig ? fieldConfig.default : null, - filterHandler: filterHandlers[fieldConfig.key], - filterKey: fieldConfig.key, - filterLabel: fieldConfig.label, - filterValue: filterState[fieldConfig.key], - isMulti: isMultiOptionField(fieldConfig), - profile, - secondaryFilterKey: - 'secondaryKey' in fieldConfig ? fieldConfig.secondaryKey : null, - sortDirection: - 'direction' in fieldConfig - ? (fieldConfig.direction as SortDirection) - : 'asc', - sourceKey, - sourceValue, - staticOptions, - } as SelectFilterProps; - - return [ -
- - - {tooltip && ( - - )} - -
- {sourceFieldConfig ? ( - + + + {tooltip && ( + + )} + +
+ - ) : ( - - )} -
-
, - fieldConfig.key, - fieldConfig.type, - ]; - case 'date': - case 'year': - // Prevents range fields from rendering twice - if (fieldConfig.boundary === 'high') return null; - - const pairedField = fields.find( - (otherField) => - otherField.key !== fieldConfig.key && - otherField.type === fieldConfig.type && - otherField.domain === fieldConfig.domain, - ); - // All range inputs should have a high and a low boundary field - if (!pairedField || !isSingleValueRangeField(pairedField)) +
+
, + fieldConfig.key, + fieldConfig.width, + ]; + default: return null; + } + }), + ); - return [ - , - fieldConfig.domain, - fieldConfig.type, - ]; - case 'text': - return [ -
- - - {tooltip && ( - - )} - -
- -
-
, - fieldConfig.key, - fieldConfig.type, - ]; - default: - return null; - } - }), - ); + const gridCols = [...Array(12).keys()].map((i) => i + 1); return (
- {fieldsJsx.map(([field, key, type]) => ( + {fieldsJsx.map(([field, key, width]) => (
Date: Mon, 30 Dec 2024 20:08:10 -0500 Subject: [PATCH 12/25] EQ-437 Adjusted table layout --- app/client/public/css/styles.css | 8 ++++++++ app/client/src/components/previewModal.tsx | 10 +++++----- app/client/src/components/table.tsx | 7 ++++++- app/client/src/routes/home.tsx | 22 +++++++++++----------- app/client/src/types/index.ts | 2 +- app/server/app/content/config/fields.json | 4 ++-- app/server/app/routes/attains.js | 7 ++++--- 7 files changed, 37 insertions(+), 23 deletions(-) diff --git a/app/client/public/css/styles.css b/app/client/public/css/styles.css index 55647c34..009f6126 100644 --- a/app/client/public/css/styles.css +++ b/app/client/public/css/styles.css @@ -92,6 +92,10 @@ cursor: pointer; } +.layout-fixed { + table-layout: fixed; +} + .sr-only { border: 0; clip: rect(0, 0, 0, 0); @@ -115,6 +119,10 @@ text-shadow: 0 0 3px rgba(0, 0, 0, 0.5); } +.whitespace-wrap tbody td { + white-space: normal; +} + .width-fit { width: fit-content; } diff --git a/app/client/src/components/previewModal.tsx b/app/client/src/components/previewModal.tsx index c62e6ebf..d0c7b557 100644 --- a/app/client/src/components/previewModal.tsx +++ b/app/client/src/components/previewModal.tsx @@ -93,11 +93,11 @@ export function PreviewModal({ const columns = useMemo( () => [ { id: 'rankPercent', name: 'Rank (%)', sortable: true }, - { id: 'actionDocumentUrl', name: 'Document', sortable: true }, - { id: 'actionId', name: 'Action ID', sortable: false }, - { id: 'regionId', name: 'Region', sortable: false }, - { id: 'state', name: 'State', sortable: false }, - { id: 'organizationId', name: 'Organization ID', sortable: false }, + { id: 'actionDocumentUrl', name: 'Document', sortable: true, width: 300 }, + { id: 'actionId', name: 'Action ID', sortable: true }, + { id: 'regionId', name: 'Region', sortable: true }, + { id: 'state', name: 'State', sortable: true }, + { id: 'organizationId', name: 'Organization ID', sortable: true }, ], [], ); diff --git a/app/client/src/components/table.tsx b/app/client/src/components/table.tsx index 8149100a..3a0576dc 100644 --- a/app/client/src/components/table.tsx +++ b/app/client/src/components/table.tsx @@ -71,9 +71,12 @@ export const Table = ({ { 'usa-table--striped': striped }, { 'usa-table--stacked': stacked }, { 'usa-table--sticky-header': stickyHeader }, + 'layout-fixed', + 'width-full', + 'whitespace-wrap', className, )} - tabIndex={tabIndex} + tabIndex={scrollable ? Math.max(0, tabIndex) : tabIndex} >