From 247708dac0d1db261618b288ba3b5fd8fd2a6cbe Mon Sep 17 00:00:00 2001 From: Avirup Sircar Date: Fri, 1 Nov 2024 14:35:54 -0400 Subject: [PATCH] Added CFEBDSOnTheFlyComputeDealii.t.cpp --- src/basis/CFEBDSOnTheFlyComputeDealii.cpp | 1 + src/basis/CFEBDSOnTheFlyComputeDealii.h | 242 +++ src/basis/CFEBDSOnTheFlyComputeDealii.t.cpp | 1581 +++++++++++++++++ src/basis/CFEBasisDataStorageDealii.t.cpp | 2 +- src/basis/CFEOverlapInverseOpContextGLL.t.cpp | 119 +- src/basis/CFEOverlapOperatorContext.h | 4 +- src/basis/CFEOverlapOperatorContext.t.cpp | 150 +- src/basis/CMakeLists.txt | 1 + ...nrichmentClassicalInterfaceSpherical.t.cpp | 3 +- src/basis/FEBasisOperations.t.cpp | 34 +- src/electrostatics/LaplaceOperatorContextFE.h | 12 + .../LaplaceOperatorContextFE.t.cpp | 32 + .../PoissonLinearSolverFunctionFE.h | 7 +- .../PoissonLinearSolverFunctionFE.t.cpp | 23 +- src/ksdft/DensityCalculator.t.cpp | 294 +-- src/ksdft/KineticFE.t.cpp | 14 +- src/linearAlgebra/BlasLapack.h | 4 +- src/linearAlgebra/BlasLapack.t.cpp | 4 +- src/linearAlgebra/BlasLapackKernels.cpp | 12 +- src/linearAlgebra/BlasLapackKernels.h | 4 +- test/basis/TestBasisDataStorageMemOpt.py | 249 +++ test/basis/src/CMakeLists.txt | 4 + test/basis/src/TestBasisDataStorageMemOpt.cpp | 336 ++++ 23 files changed, 2795 insertions(+), 337 deletions(-) create mode 100644 src/basis/CFEBDSOnTheFlyComputeDealii.cpp create mode 100644 src/basis/CFEBDSOnTheFlyComputeDealii.h create mode 100644 src/basis/CFEBDSOnTheFlyComputeDealii.t.cpp create mode 100644 test/basis/TestBasisDataStorageMemOpt.py create mode 100644 test/basis/src/TestBasisDataStorageMemOpt.cpp diff --git a/src/basis/CFEBDSOnTheFlyComputeDealii.cpp b/src/basis/CFEBDSOnTheFlyComputeDealii.cpp new file mode 100644 index 00000000..3173a9d0 --- /dev/null +++ b/src/basis/CFEBDSOnTheFlyComputeDealii.cpp @@ -0,0 +1 @@ +#include "CFEBDSOnTheFlyComputeDealii.h" diff --git a/src/basis/CFEBDSOnTheFlyComputeDealii.h b/src/basis/CFEBDSOnTheFlyComputeDealii.h new file mode 100644 index 00000000..147a0822 --- /dev/null +++ b/src/basis/CFEBDSOnTheFlyComputeDealii.h @@ -0,0 +1,242 @@ +/****************************************************************************** + * Copyright (c) 2021. * + * The Regents of the University of Michigan and DFT-EFE developers. * + * * + * This file is part of the DFT-EFE code. * + * * + * DFT-EFE is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as * + * published by the Free Software Foundation, either version 3 of * + * the License, or (at your option) any later version. * + * * + * DFT-EFE is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the GNU Lesser General Public * + * License at the top level of DFT-EFE distribution. If not, see * + * . * + ******************************************************************************/ + +/* + * @author Avirup Sircar + */ + +#ifndef dftefeCFEBDSOnTheFlyComputeDealii_h +#define dftefeCFEBDSOnTheFlyComputeDealii_h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace dftefe +{ + namespace basis + { + /** + * @brief An abstract class to store and access data for a given basis, + * such as the basis function values on a quadrature grid, the overlap + * matrix of the basis, etc. + */ + template + class CFEBDSOnTheFlyComputeDealii + : public FEBasisDataStorage + { + public: + using QuadraturePointAttributes = quadrature::QuadraturePointAttributes; + using QuadratureRuleAttributes = quadrature::QuadratureRuleAttributes; + using Storage = + typename BasisDataStorage::Storage; + + CFEBDSOnTheFlyComputeDealii( + std::shared_ptr feBDH, + const quadrature::QuadratureRuleAttributes &quadratureRuleAttributes, + const BasisStorageAttributesBoolMap basisStorageAttributesBoolMap, + const size_type maxCellBlock, + linearAlgebra::LinAlgOpContext &linAlgOpContext); + + ~CFEBDSOnTheFlyComputeDealii() = default; + + std::shared_ptr + getBasisDofHandler() const override; + + void + evaluateBasisData( + const quadrature::QuadratureRuleAttributes &quadratureRuleAttributes, + const BasisStorageAttributesBoolMap basisStorageAttributesBoolMap) + override; + + void + evaluateBasisData( + const quadrature::QuadratureRuleAttributes &quadratureRuleAttributes, + std::shared_ptr + quadratureRuleContainer, + const BasisStorageAttributesBoolMap basisStorageAttributesBoolMap) + override; + + void + evaluateBasisData( + const quadrature::QuadratureRuleAttributes &quadratureRuleAttributes, + std::vector> + quadratureRuleVec, + const BasisStorageAttributesBoolMap basisStorageAttributesBoolMap) + override; + + void + evaluateBasisData( + const quadrature::QuadratureRuleAttributes &quadratureRuleAttributes, + std::shared_ptr + baseQuadratureRuleAdaptive, + std::vector> + & functions, + const std::vector & absoluteTolerances, + const std::vector & relativeTolerances, + const std::vector & integralThresholds, + const double smallestCellVolume, + const unsigned int maxRecursion, + const BasisStorageAttributesBoolMap basisStorageAttributesBoolMap) + override; + + + // void + // evaluateBasisData( + // std::shared_ptr + // quadratureRuleContainer, + // const QuadratureRuleAttributes & quadratureRuleAttributes, + // const BasisStorageAttributesBoolMap boolBasisStorageFlagsObj) + // override; + + void + deleteBasisData() override; + + + + // std::shared_ptr + // getCellQuadratureRuleContainer(std::shared_ptr> + // const QuadratureRuleAttributes &quadratureRuleAttributes) const + // override; + // functions to get data for a basis function on a given quad point in a + // cell + Storage + getBasisData(const QuadraturePointAttributes &attributes, + const size_type basisId) const override; + Storage + getBasisGradientData(const QuadraturePointAttributes &attributes, + const size_type basisId) const override; + Storage + getBasisHessianData(const QuadraturePointAttributes &attributes, + const size_type basisId) const override; + + // functions to get data for a basis function on all quad points in a cell + Storage + getBasisDataInCell(const size_type cellId, + const size_type basisId) const override; + Storage + getBasisGradientDataInCell(const size_type cellId, + const size_type basisId) const override; + Storage + getBasisHessianDataInCell(const size_type cellId, + const size_type basisId) const override; + + // functions to get data for all basis functions on all quad points in a + // cell + Storage + getBasisDataInCell(const size_type cellId) const override; + Storage + getBasisGradientDataInCell(const size_type cellId) const override; + Storage + getBasisHessianDataInCell(const size_type cellId) const override; + + Storage + getJxWInCell(const size_type cellId) const override; + + // functions to get data for all basis functions on all quad points in all + // cells + const Storage & + getBasisDataInAllCells() const override; + const Storage & + getBasisGradientDataInAllCells() const override; + const Storage & + getBasisHessianDataInAllCells() const override; + + const Storage & + getJxWInAllCells() const override; + + // get overlap of two basis functions in a cell + Storage + getBasisOverlap(const size_type cellId, + const size_type basisId1, + const size_type basisId2) const override; + + // get overlap of all the basis functions in a cell + Storage + getBasisOverlapInCell(const size_type cellId) const override; + + // get overlap of all the basis functions in all cells + const Storage & + getBasisOverlapInAllCells() const override; + + + // get the laplace operator in a cell + Storage + getBasisGradNiGradNjInCell(const size_type cellId) const override; + + // get laplace operator in all cells + const Storage & + getBasisGradNiGradNjInAllCells() const override; + + + std::shared_ptr + getQuadratureRuleContainer() const override; + + // functions to get data for basis functions on all quad points in range + // of cells + void + getBasisDataInCellRange(std::pair cellRange, + Storage &basisData) const override; + void + getBasisGradientDataInCellRange( + std::pair cellRange, + Storage & basisGradientData) const override; + + private: + bool d_evaluateBasisData; + std::shared_ptr< + const CFEBasisDofHandlerDealii> + d_feBDH; + std::shared_ptr + d_quadratureRuleContainer; + QuadratureRuleAttributes d_quadratureRuleAttributes; + BasisStorageAttributesBoolMap d_basisStorageAttributesBoolMap; + std::shared_ptr d_basisQuadStorage; + std::shared_ptr d_JxWStorage; + std::shared_ptr d_basisGradientParaCellQuadStorage; + std::shared_ptr d_basisJacobianInvQuadStorage; + std::shared_ptr d_basisHessianQuadStorage; + std::vector d_dofsInCell; + std::vector d_nQuadPointsIncell; + std::vector d_cellStartIdsBasisJacobianInvQuadStorage; + std::vector d_cellStartIdsBasisHessianQuadStorage; + std::vector d_cellStartIdsBasisQuadStorage; + size_type d_maxCellBlock; + Storage d_tmpGradientBlock; + linearAlgebra::LinAlgOpContext &d_linAlgOpContext; + + }; // end of CFEBDSOnTheFlyComputeDealii + } // end of namespace basis +} // end of namespace dftefe +#include +#endif // dftefeCFEBDSOnTheFlyComputeDealii_h diff --git a/src/basis/CFEBDSOnTheFlyComputeDealii.t.cpp b/src/basis/CFEBDSOnTheFlyComputeDealii.t.cpp new file mode 100644 index 00000000..8a6667cc --- /dev/null +++ b/src/basis/CFEBDSOnTheFlyComputeDealii.t.cpp @@ -0,0 +1,1581 @@ + +/****************************************************************************** + * Copyright (c) 2021. * + * The Regents of the University of Michigan and DFT-EFE developers. * + * * + * This file is part of the DFT-EFE code. * + * * + * DFT-EFE is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as * + * published by the Free Software Foundation, either version 3 of * + * the License, or (at your option) any later version. * + * * + * DFT-EFE is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the GNU Lesser General Public * + * License at the top level of DFT-EFE distribution. If not, see * + * . * + ******************************************************************************/ + +/* + * @author Avirup Sircar + */ + +#include +#include +#include "DealiiConversions.h" +#include +#include +#include +#include +#include +#include +namespace dftefe +{ + namespace basis + { + namespace CFEBDSOnTheFlyComputeDealiiInternal + { + template + void + computeJacobianInvTimesGradPara( + std::pair cellRange, + const std::vector & dofsInCell, + const std::vector & nQuadPointsInCell, + const std::shared_ptr< + typename BasisDataStorage::Storage> + & basisJacobianInvQuadStorage, + const std::vector &cellStartIdsBasisJacobianInvQuadStorage, + typename BasisDataStorage::Storage + & tmpGradientBlock, + linearAlgebra::LinAlgOpContext &linAlgOpContext, + typename BasisDataStorage::Storage + &basisGradientData) + { + size_type numCellsInBlock = cellRange.second - cellRange.first; + + size_type numMats = 0; + for (size_type iCell = 0; iCell < numCellsInBlock; ++iCell) + { + for (size_type iQuad = 0; iQuad < nQuadPointsInCell[iCell]; ++iQuad) + { + numMats += 1; + } + } + + utils::MemoryTransfer + memoryTransfer; + + std::vector transA( + numMats, linearAlgebra::blasLapack::Op::NoTrans); + std::vector transB( + numMats, linearAlgebra::blasLapack::Op::NoTrans); + std::vector mSizesTmp(numMats, 0); + std::vector nSizesTmp(numMats, 0); + std::vector kSizesTmp(numMats, 0); + std::vector ldaSizesTmp(numMats, 0); + std::vector ldbSizesTmp(numMats, 0); + std::vector ldcSizesTmp(numMats, 0); + std::vector strideATmp(numMats, 0); + std::vector strideBTmp(numMats, 0); + std::vector strideCTmp(numMats, 0); + + for (size_type iCell = 0; iCell < numCellsInBlock; ++iCell) + { + for (size_type iQuad = 0; iQuad < nQuadPointsInCell[iCell]; ++iQuad) + { + size_type index = iCell * nQuadPointsInCell[iCell] + iQuad; + mSizesTmp[index] = dofsInCell[iCell]; + nSizesTmp[index] = dim; + kSizesTmp[index] = dim; + ldaSizesTmp[index] = mSizesTmp[index]; + ldbSizesTmp[index] = kSizesTmp[index]; + ldcSizesTmp[index] = mSizesTmp[index]; + strideATmp[index] = mSizesTmp[index] * kSizesTmp[index]; + strideBTmp[index] = kSizesTmp[index] * nSizesTmp[index]; + strideCTmp[index] = mSizesTmp[index] * nSizesTmp[index]; + } + } + + utils::MemoryStorage mSizes(numMats); + utils::MemoryStorage nSizes(numMats); + utils::MemoryStorage kSizes(numMats); + utils::MemoryStorage ldaSizes(numMats); + utils::MemoryStorage ldbSizes(numMats); + utils::MemoryStorage ldcSizes(numMats); + utils::MemoryStorage strideA(numMats); + utils::MemoryStorage strideB(numMats); + utils::MemoryStorage strideC(numMats); + memoryTransfer.copy(numMats, mSizes.data(), mSizesTmp.data()); + memoryTransfer.copy(numMats, nSizes.data(), nSizesTmp.data()); + memoryTransfer.copy(numMats, kSizes.data(), kSizesTmp.data()); + memoryTransfer.copy(numMats, ldaSizes.data(), ldaSizesTmp.data()); + memoryTransfer.copy(numMats, ldbSizes.data(), ldbSizesTmp.data()); + memoryTransfer.copy(numMats, ldcSizes.data(), ldcSizesTmp.data()); + memoryTransfer.copy(numMats, strideA.data(), strideATmp.data()); + memoryTransfer.copy(numMats, strideB.data(), strideBTmp.data()); + memoryTransfer.copy(numMats, strideC.data(), strideCTmp.data()); + + ValueTypeBasisData alpha = 1.0; + ValueTypeBasisData beta = 0.0; + + ValueTypeBasisData *B = + basisJacobianInvQuadStorage->data() + + cellStartIdsBasisJacobianInvQuadStorage[cellRange.first]; + linearAlgebra::blasLapack::gemmStridedVarBatched( + linearAlgebra::blasLapack::Layout::ColMajor, + numMats, + transA.data(), + transB.data(), + strideA.data(), + strideB.data(), + strideC.data(), + mSizes.data(), + nSizes.data(), + kSizes.data(), + alpha, + tmpGradientBlock.data(), + ldaSizes.data(), + B, + ldbSizes.data(), + beta, + basisGradientData.data(), + ldcSizes.data(), + linAlgOpContext); + } + + // + // stores the classical FE basis data for a h-refined FE mesh + // (i.e., uniform p in all elements) and for a uniform quadrature + // Gauss or Gauss-Legendre-Lobatto (GLL) quadrature + // rule across all the cells in the mesh. + // + template + void + storeValuesHRefinedSameQuadEveryCell( + std::shared_ptr< + const CFEBasisDofHandlerDealii> + feBDH, + std::shared_ptr< + typename BasisDataStorage::Storage> + &basisQuadStorage, + std::shared_ptr< + typename BasisDataStorage::Storage> + &basisGradientParaCellQuadStorage, + std::shared_ptr< + typename BasisDataStorage::Storage> + &basisJacobianInvQuadStorage, + std::shared_ptr< + typename BasisDataStorage::Storage> + & basisHessianQuadStorage, + const quadrature::QuadratureRuleAttributes &quadratureRuleAttributes, + std::shared_ptr + quadratureRuleContainer, + std::vector &nQuadPointsInCell, + std::vector &cellStartIdsBasisQuadStorage, + std::vector &cellStartIdsBasisJacobianInvQuadStorage, + std::vector &cellStartIdsBasisHessianQuadStorage, + const BasisStorageAttributesBoolMap basisStorageAttributesBoolMap) + { + // for processors where there are no cells + bool numCellsZero = feBDH->nLocallyOwnedCells() == 0 ? true : false; + const quadrature::QuadratureFamily quadratureFamily = + quadratureRuleAttributes.getQuadratureFamily(); + const size_type num1DQuadPoints = + quadratureRuleAttributes.getNum1DPoints(); + dealii::Quadrature dealiiQuadratureRule; + if (quadratureFamily == quadrature::QuadratureFamily::GAUSS) + { + dealiiQuadratureRule = dealii::QGauss(num1DQuadPoints); + } + else if (quadratureFamily == quadrature::QuadratureFamily::GLL) + { + dealiiQuadratureRule = dealii::QGaussLobatto(num1DQuadPoints); + } + else if (quadratureFamily == + quadrature::QuadratureFamily::GAUSS_SUBDIVIDED) + { + if (!numCellsZero) + { + // get the parametric points and jxw in each cell according to + // the attribute. + unsigned int cellIndex = 0; + const std::vector &cellParametricQuadPoints = + quadratureRuleContainer->getCellParametricPoints(cellIndex); + std::vector> + dealiiParametricQuadPoints(0); + + // get the quad weights in each cell + const std::vector &quadWeights = + quadratureRuleContainer->getCellQuadratureWeights(cellIndex); + convertToDealiiPoint(cellParametricQuadPoints, + dealiiParametricQuadPoints); + + // Ask dealii to create quad rule in each cell + dealiiQuadratureRule = + dealii::Quadrature(dealiiParametricQuadPoints, + quadWeights); + } + } + + else + { + utils::throwException( + false, + "In the case of a h-refined finite " + "element mesh with a uniform quadrature rule, support is provided " + "only for Gauss and Gauss-Legendre-Lobatto quadrature rule."); + } + + bool isQuadCartesianTensorStructured = + quadratureRuleAttributes.isCartesianTensorStructured(); + utils::throwException( + isQuadCartesianTensorStructured, + "In the case of a h-refined finite element mesh with a uniform quadrature " + "rule, storing the classical finite element basis data is only supported " + " for a Cartesian tensor structured quadrature grid."); + + dealii::UpdateFlags dealiiUpdateFlags = + dealii::update_values | dealii::update_JxW_values; + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreGradient) + ->second) + dealiiUpdateFlags |= dealii::update_inverse_jacobians; + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreHessian) + ->second) + dealiiUpdateFlags |= dealii::update_hessians; + + // NOTE: cellId 0 passed as we assume h-refine finite element mesh in + // this function + const size_type cellId = 0; + // get real cell feValues + dealii::FEValues dealiiFEValues(feBDH->getReferenceFE(cellId), + dealiiQuadratureRule, + dealiiUpdateFlags); + + dealii::UpdateFlags dealiiUpdateFlagsPara; + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreGradient) + ->second) + dealiiUpdateFlagsPara |= dealii::update_gradients; + // This is for getting the gradient in parametric cell + dealii::FEValues dealiiFEValuesPara(feBDH->getReferenceFE(cellId), + dealiiQuadratureRule, + dealiiUpdateFlags); + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreGradient) + ->second) + { + dealii::Triangulation<3> referenceCell; + dealii::GridGenerator::hyper_cube(referenceCell, 0., 1.); + dealiiFEValuesPara.reinit(referenceCell.begin()); + } + + const size_type numLocallyOwnedCells = feBDH->nLocallyOwnedCells(); + // NOTE: cellId 0 passed as we assume only H refined in this function + const size_type dofsPerCell = feBDH->nCellDofs(cellId); + const size_type nQuadPointsPerCell = + numCellsZero ? 0 : + quadratureRuleContainer->nCellQuadraturePoints(cellId); + + const size_type nDimxDofsPerCellxNumQuad = + dim * dofsPerCell * nQuadPointsPerCell; + const size_type nDimSqxDofsPerCellxNumQuad = + dim * dim * dofsPerCell * nQuadPointsPerCell; + const size_type nDimSqxNumQuad = dim * dim * nQuadPointsPerCell; + const size_type DofsPerCellxNumQuad = dofsPerCell * nQuadPointsPerCell; + + nQuadPointsInCell.resize(numLocallyOwnedCells, nQuadPointsPerCell); + std::vector basisQuadStorageTmp(0); + std::vector basisJacobianInvQuadStorageTmp(0); + std::vector basisGradientParaCellQuadStorageTmp(0); + std::vector basisHessianQuadStorageTmp(0); + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreValues) + ->second) + { + basisQuadStorage = + std::make_shared::Storage>( + dofsPerCell * nQuadPointsPerCell); + basisQuadStorageTmp.resize(dofsPerCell * nQuadPointsPerCell, + ValueTypeBasisData(0)); + cellStartIdsBasisQuadStorage.resize(numLocallyOwnedCells, 0); + } + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreGradient) + ->second) + { + basisJacobianInvQuadStorage = + std::make_shared::Storage>( + numLocallyOwnedCells * nDimSqxNumQuad); + basisJacobianInvQuadStorageTmp.resize(numLocallyOwnedCells * + nDimSqxNumQuad); + basisGradientParaCellQuadStorage = + std::make_shared::Storage>( + nDimxDofsPerCellxNumQuad); + basisGradientParaCellQuadStorageTmp.resize( + nDimxDofsPerCellxNumQuad); + cellStartIdsBasisJacobianInvQuadStorage.resize(numLocallyOwnedCells, + 0); + } + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreHessian) + ->second) + { + basisHessianQuadStorage = + std::make_shared::Storage>( + numLocallyOwnedCells * nDimSqxDofsPerCellxNumQuad); + basisHessianQuadStorageTmp.resize(numLocallyOwnedCells * + nDimSqxDofsPerCellxNumQuad, + ValueTypeBasisData(0)); + cellStartIdsBasisHessianQuadStorage.resize(numLocallyOwnedCells, 0); + } + + auto locallyOwnedCellIter = feBDH->beginLocallyOwnedCells(); + // Do dynamic cast if there is a dof in the processor + std::shared_ptr> feCellDealii = nullptr; + if (numLocallyOwnedCells != 0) + { + feCellDealii = std::dynamic_pointer_cast>( + *locallyOwnedCellIter); + utils::throwException( + feCellDealii != nullptr, + "Dynamic casting of FECellBase to FECellDealii not successful"); + } + + size_type cellIndex = 0; + for (; locallyOwnedCellIter != feBDH->endLocallyOwnedCells(); + ++locallyOwnedCellIter) + { + feCellDealii = std::dynamic_pointer_cast>( + *locallyOwnedCellIter); + dealiiFEValues.reinit(feCellDealii->getDealiiFECellIter()); + // + // NOTE: For a h-refined (i.e., uniform FE order) mesh with the same + // quadraure rule in all elements, the classical FE basis values + // remain the same across as in the reference cell (unit + // n-dimensional cell). Thus, to optimize on memory we only store + // the classical FE basis values on the first cell + // + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreValues) + ->second && + locallyOwnedCellIter == feBDH->beginLocallyOwnedCells()) + { + for (unsigned int iNode = 0; iNode < dofsPerCell; iNode++) + { + for (unsigned int qPoint = 0; qPoint < nQuadPointsPerCell; + qPoint++) + { + auto it = basisQuadStorageTmp.begin() + + cellIndex * DofsPerCellxNumQuad + + qPoint * dofsPerCell + iNode; + // iNode * nQuadPointsPerCell + qPoint; + *it = dealiiFEValues.shape_value(iNode, qPoint); + } + } + } + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreGradient) + ->second) + { + cellStartIdsBasisJacobianInvQuadStorage[cellIndex] = + cellIndex * nDimSqxNumQuad; + if (locallyOwnedCellIter == feBDH->beginLocallyOwnedCells()) + { + for (unsigned int iNode = 0; iNode < dofsPerCell; iNode++) + { + for (unsigned int qPoint = 0; + qPoint < nQuadPointsPerCell; + qPoint++) + { + auto shapeGrad = + dealiiFEValuesPara.shape_grad(iNode, qPoint); + for (unsigned int iDim = 0; iDim < dim; iDim++) + { + auto it = + basisGradientParaCellQuadStorageTmp.begin() + + cellIndex * nDimxDofsPerCellxNumQuad + + qPoint * dim * dofsPerCell + + iDim * dofsPerCell + iNode; + *it = shapeGrad[iDim]; + } + } + } + } + auto &mappingJacInv = dealiiFEValues.get_inverse_jacobians(); + size_type numJacobiansPerCell = nQuadPointsPerCell; + for (unsigned int iQuad = 0; iQuad < numJacobiansPerCell; + ++iQuad) + { + for (unsigned int iDim = 0; iDim < dim; iDim++) + { + for (unsigned int jDim = 0; jDim < dim; jDim++) + { + auto it = basisJacobianInvQuadStorageTmp.begin() + + cellIndex * nDimSqxNumQuad + + iQuad * dim * dim + jDim * dim + iDim; + *it = mappingJacInv[iQuad][iDim][jDim]; + } + } + } + } + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreHessian) + ->second) + { + cellStartIdsBasisHessianQuadStorage[cellIndex] = + cellIndex * nDimSqxDofsPerCellxNumQuad; + for (unsigned int iNode = 0; iNode < dofsPerCell; iNode++) + { + for (unsigned int qPoint = 0; qPoint < nQuadPointsPerCell; + qPoint++) + { + auto shapeHessian = + dealiiFEValues.shape_hessian(iNode, qPoint); + for (unsigned int iDim = 0; iDim < dim; iDim++) + { + for (unsigned int jDim = 0; jDim < dim; jDim++) + { + auto it = + basisHessianQuadStorageTmp.begin() + + cellIndex * nDimSqxDofsPerCellxNumQuad + + qPoint * dim * dim * dofsPerCell + + iDim * dim * dofsPerCell + + jDim * dofsPerCell + iNode; + *it = shapeHessian[iDim][jDim]; + } + } + } + } + } + + cellIndex++; + } + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreValues) + ->second) + { + utils::MemoryTransfer::copy( + basisQuadStorageTmp.size(), + basisQuadStorage->data(), + basisQuadStorageTmp.data()); + } + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreGradient) + ->second) + { + utils::MemoryTransfer::copy( + basisGradientParaCellQuadStorageTmp.size(), + basisGradientParaCellQuadStorage->data(), + basisGradientParaCellQuadStorageTmp.data()); + + utils::MemoryTransfer::copy( + basisJacobianInvQuadStorageTmp.size(), + basisJacobianInvQuadStorage->data(), + basisJacobianInvQuadStorageTmp.data()); + } + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreHessian) + ->second) + { + utils::MemoryTransfer::copy( + basisHessianQuadStorageTmp.size(), + basisHessianQuadStorage->data(), + basisHessianQuadStorageTmp.data()); + } + } + } // namespace CFEBDSOnTheFlyComputeDealiiInternal + + template + CFEBDSOnTheFlyComputeDealii:: + CFEBDSOnTheFlyComputeDealii( + std::shared_ptr feBDH, + const quadrature::QuadratureRuleAttributes &quadratureRuleAttributes, + const BasisStorageAttributesBoolMap basisStorageAttributesBoolMap, + const size_type maxCellBlock, + linearAlgebra::LinAlgOpContext &linAlgOpContext) + : d_dofsInCell(0) + , d_quadratureRuleAttributes(quadratureRuleAttributes) + , d_basisStorageAttributesBoolMap(basisStorageAttributesBoolMap) + , d_maxCellBlock(maxCellBlock) + , d_linAlgOpContext(linAlgOpContext) + { + d_evaluateBasisData = false; + d_feBDH = std::dynamic_pointer_cast< + const CFEBasisDofHandlerDealii>( + feBDH); + utils::throwException( + d_feBDH != nullptr, + " Could not cast the FEBasisDofHandler to CFEBasisDofHandlerDealii in CFEBDSOnTheFlyComputeDealii"); + // const size_type numConstraints = constraintsVec.size(); + // const size_type numQuadRuleType = quadratureRuleAttributesVec.size(); + std::shared_ptr> dofHandler = + d_feBDH->getDoFHandler(); + const size_type numLocallyOwnedCells = d_feBDH->nLocallyOwnedCells(); + d_dofsInCell.resize(numLocallyOwnedCells, 0); + for (size_type iCell = 0; iCell < numLocallyOwnedCells; ++iCell) + { + d_dofsInCell[iCell] = d_feBDH->nCellDofs(iCell); + } + d_tmpGradientBlock = + typename BasisDataStorage::Storage(0); + } + + template + void + CFEBDSOnTheFlyComputeDealii:: + evaluateBasisData( + const quadrature::QuadratureRuleAttributes &quadratureRuleAttributes, + const BasisStorageAttributesBoolMap basisStorageAttributesBoolMap) + { + d_evaluateBasisData = true; + utils::throwException( + d_quadratureRuleAttributes == quadratureRuleAttributes, + "Incorrect quadratureRuleAttributes given."); + /** + * @note We assume a linear mapping from the reference cell + * to the real cell. + */ + LinearCellMappingDealii linearCellMappingDealii; + + size_type num1DQuadPoints = quadratureRuleAttributes.getNum1DPoints(); + quadrature::QuadratureFamily quadFamily = + quadratureRuleAttributes.getQuadratureFamily(); + + if (quadFamily == quadrature::QuadratureFamily::GAUSS) + { + std::shared_ptr quadratureRule = + std::make_shared(dim, + num1DQuadPoints); + d_quadratureRuleContainer = + std::make_shared( + quadratureRuleAttributes, + quadratureRule, + d_feBDH->getTriangulation(), + linearCellMappingDealii); + } + else if (quadFamily == quadrature::QuadratureFamily::GLL) + { + std::shared_ptr quadratureRule = + std::make_shared(dim, + num1DQuadPoints); + d_quadratureRuleContainer = + std::make_shared( + quadratureRuleAttributes, + quadratureRule, + d_feBDH->getTriangulation(), + linearCellMappingDealii); + } + else + utils::throwException( + false, "Incorrect arguments given for this Quadrature family."); + + std::shared_ptr< + typename BasisDataStorage::Storage> + basisQuadStorage; + std::shared_ptr< + typename BasisDataStorage::Storage> + basisGradientParaCellQuadStorage; + std::shared_ptr< + typename BasisDataStorage::Storage> + basisJacobianInvQuadStorage; + std::shared_ptr< + typename BasisDataStorage::Storage> + basisHessianQuadStorage; + + std::vector nQuadPointsInCell(0); + std::vector cellStartIdsBasisQuadStorage(0); + std::vector cellStartIdsBasisJacobianInvQuadStorage(0); + std::vector cellStartIdsBasisHessianQuadStorage(0); + CFEBDSOnTheFlyComputeDealiiInternal::storeValuesHRefinedSameQuadEveryCell< + ValueTypeBasisCoeff, + ValueTypeBasisData, + memorySpace, + dim>(d_feBDH, + basisQuadStorage, + basisGradientParaCellQuadStorage, + basisJacobianInvQuadStorage, + basisHessianQuadStorage, + quadratureRuleAttributes, + d_quadratureRuleContainer, + nQuadPointsInCell, + cellStartIdsBasisQuadStorage, + cellStartIdsBasisJacobianInvQuadStorage, + cellStartIdsBasisHessianQuadStorage, + basisStorageAttributesBoolMap); + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreValues) + ->second) + { + d_basisQuadStorage = basisQuadStorage; + d_cellStartIdsBasisQuadStorage = cellStartIdsBasisQuadStorage; + } + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreGradient) + ->second) + { + d_basisGradientParaCellQuadStorage = basisGradientParaCellQuadStorage; + d_basisJacobianInvQuadStorage = basisJacobianInvQuadStorage; + d_cellStartIdsBasisJacobianInvQuadStorage = + cellStartIdsBasisJacobianInvQuadStorage; + d_tmpGradientBlock.resize(d_dofsInCell[0] * nQuadPointsInCell[0] * + dim * d_maxCellBlock); + size_type gradientParaCellSize = + basisGradientParaCellQuadStorage->size(); + for (size_type iCell = 0; iCell < d_maxCellBlock; ++iCell) + { + d_tmpGradientBlock.template copyFrom( + basisGradientParaCellQuadStorage->data(), + gradientParaCellSize, + 0, + gradientParaCellSize * iCell); + } + } + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreHessian) + ->second) + { + d_basisHessianQuadStorage = basisHessianQuadStorage; + d_cellStartIdsBasisHessianQuadStorage = + cellStartIdsBasisHessianQuadStorage; + } + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreOverlap) + ->second) + { + utils::throwException( + false, + "Basis Overlap not implemented in CFEBDSOnTheFlyComputeDealii"); + } + d_nQuadPointsIncell = nQuadPointsInCell; + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreGradNiGradNj) + ->second) + { + utils::throwException( + false, + "Basis GradNiGradNj not implemented in CFEBDSOnTheFlyComputeDealii"); + } + + if (basisStorageAttributesBoolMap.find(BasisStorageAttributes::StoreJxW) + ->second) + { + std::shared_ptr< + typename BasisDataStorage::Storage> + jxwQuadStorage; + + const std::vector &jxwVec = + d_quadratureRuleContainer->getJxW(); + jxwQuadStorage = + std::make_shared::Storage>( + jxwVec.size()); + + utils::MemoryTransfer::copy( + jxwVec.size(), jxwQuadStorage->data(), jxwVec.data()); + + d_JxWStorage = jxwQuadStorage; + } + } + + template + void + CFEBDSOnTheFlyComputeDealii:: + evaluateBasisData( + const quadrature::QuadratureRuleAttributes &quadratureRuleAttributes, + std::shared_ptr + quadratureRuleContainer, + const BasisStorageAttributesBoolMap basisStorageAttributesBoolMap) + { + d_evaluateBasisData = true; + utils::throwException( + d_quadratureRuleAttributes == quadratureRuleAttributes, + "Incorrect quadratureRuleAttributes given."); + /** + * @note We assume a linear mapping from the reference cell + * to the real cell. + */ + LinearCellMappingDealii linearCellMappingDealii; + + quadrature::QuadratureFamily quadFamily = + quadratureRuleAttributes.getQuadratureFamily(); + + if (quadFamily == quadrature::QuadratureFamily::GAUSS_SUBDIVIDED) + d_quadratureRuleContainer = quadratureRuleContainer; + else + utils::throwException( + false, + "Incorrect arguments given for this Quadrature family. On the fly computation is not available for non-uniform quadrature rule in cells."); + + std::shared_ptr< + typename BasisDataStorage::Storage> + basisQuadStorage; + std::shared_ptr< + typename BasisDataStorage::Storage> + basisGradientParaCellQuadStorage; + std::shared_ptr< + typename BasisDataStorage::Storage> + basisJacobianInvQuadStorage; + std::shared_ptr< + typename BasisDataStorage::Storage> + basisHessianQuadStorage; + std::vector nQuadPointsInCell(0); + std::vector cellStartIdsBasisQuadStorage(0); + std::vector cellStartIdsBasisJacobianInvQuadStorage(0); + std::vector cellStartIdsBasisHessianQuadStorage(0); + + CFEBDSOnTheFlyComputeDealiiInternal::storeValuesHRefinedSameQuadEveryCell< + ValueTypeBasisCoeff, + ValueTypeBasisData, + memorySpace, + dim>(d_feBDH, + basisQuadStorage, + basisGradientParaCellQuadStorage, + basisJacobianInvQuadStorage, + basisHessianQuadStorage, + quadratureRuleAttributes, + d_quadratureRuleContainer, + nQuadPointsInCell, + cellStartIdsBasisQuadStorage, + cellStartIdsBasisJacobianInvQuadStorage, + cellStartIdsBasisHessianQuadStorage, + basisStorageAttributesBoolMap); + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreValues) + ->second) + { + d_basisQuadStorage = basisQuadStorage; + d_cellStartIdsBasisQuadStorage = cellStartIdsBasisQuadStorage; + } + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreGradient) + ->second) + { + d_basisGradientParaCellQuadStorage = basisGradientParaCellQuadStorage; + d_basisJacobianInvQuadStorage = basisJacobianInvQuadStorage; + d_cellStartIdsBasisJacobianInvQuadStorage = + cellStartIdsBasisJacobianInvQuadStorage; + d_tmpGradientBlock.resize(d_dofsInCell[0] * nQuadPointsInCell[0] * + dim * d_maxCellBlock); + size_type gradientParaCellSize = + basisGradientParaCellQuadStorage->size(); + for (size_type iCell = 0; iCell < d_maxCellBlock; ++iCell) + { + d_tmpGradientBlock.template copyFrom( + basisGradientParaCellQuadStorage->data(), + gradientParaCellSize, + 0, + gradientParaCellSize * iCell); + } + } + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreHessian) + ->second) + { + d_basisHessianQuadStorage = basisHessianQuadStorage; + d_cellStartIdsBasisHessianQuadStorage = + cellStartIdsBasisHessianQuadStorage; + } + + d_nQuadPointsIncell = nQuadPointsInCell; + + if (basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreGradNiGradNj) + ->second) + { + utils::throwException( + false, + "Basis GradNiGradNj not implemented in CFEBDSOnTheFlyComputeDealii"); + } + + if (basisStorageAttributesBoolMap.find(BasisStorageAttributes::StoreJxW) + ->second) + { + std::shared_ptr< + typename BasisDataStorage::Storage> + jxwQuadStorage; + + const std::vector &jxwVec = + d_quadratureRuleContainer->getJxW(); + jxwQuadStorage = + std::make_shared::Storage>( + jxwVec.size()); + + utils::MemoryTransfer::copy( + jxwVec.size(), jxwQuadStorage->data(), jxwVec.data()); + + d_JxWStorage = jxwQuadStorage; + } + } + + + template + void + CFEBDSOnTheFlyComputeDealii:: + evaluateBasisData( + const quadrature::QuadratureRuleAttributes &quadratureRuleAttributes, + std::vector> + quadratureRuleVec, + const BasisStorageAttributesBoolMap basisStorageAttributesBoolMap) + { + utils::throwException( + false, + "On the fly computation is not available for non-uniform quadrature rule in cells."); + } + + template + void + CFEBDSOnTheFlyComputeDealii:: + evaluateBasisData( + const quadrature::QuadratureRuleAttributes &quadratureRuleAttributes, + std::shared_ptr + baseQuadratureRuleAdaptive, + std::vector> + & functions, + const std::vector & absoluteTolerances, + const std::vector & relativeTolerances, + const std::vector & integralThresholds, + const double smallestCellVolume, + const unsigned int maxRecursion, + const BasisStorageAttributesBoolMap basisStorageAttributesBoolMap) + { + utils::throwException( + false, + "On the fly computation is not available for non-uniform/adaptive quadrature rule in cells."); + } + + //------------------OTHER FNS ----------------------------- + + template + const typename BasisDataStorage::Storage & + CFEBDSOnTheFlyComputeDealii::getBasisDataInAllCells() const + { + utils::throwException( + d_evaluateBasisData, + "Cannot call function before calling evaluateBasisData()"); + + utils::throwException( + d_basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreValues) + ->second, + "Basis values are not evaluated for the given QuadratureRuleAttributes"); + return *(d_basisQuadStorage); + } + + template + const typename BasisDataStorage::Storage & + CFEBDSOnTheFlyComputeDealii::getBasisGradientDataInAllCells() const + { + utils::throwException( + false, + "getBasisGradientDataInAllCells() is not implemented in CFEBDSOnTheFlyComputeDealii"); + // typename BasisDataStorage::Storage + // dummy( + // 0); + return d_tmpGradientBlock; + } + + template + const typename BasisDataStorage::Storage & + CFEBDSOnTheFlyComputeDealii::getBasisHessianDataInAllCells() const + { + utils::throwException( + d_evaluateBasisData, + "Cannot call function before calling evaluateBasisData()"); + + utils::throwException( + d_basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreHessian) + ->second, + "Basis Hessians are not evaluated for the given QuadratureRuleAttributes"); + return *(d_basisHessianQuadStorage); + } + + template + const typename BasisDataStorage::Storage & + CFEBDSOnTheFlyComputeDealii::getJxWInAllCells() const + { + utils::throwException( + d_evaluateBasisData, + "Cannot call function before calling evaluateBasisData()"); + + utils::throwException( + d_basisStorageAttributesBoolMap.find(BasisStorageAttributes::StoreJxW) + ->second, + "JxW values are not stored for the given QuadratureRuleAttributes"); + return *(d_JxWStorage); + } + + + template + typename BasisDataStorage::Storage + CFEBDSOnTheFlyComputeDealii::getBasisDataInCell(const size_type cellId) + const + { + utils::throwException( + d_evaluateBasisData, + "Cannot call function before calling evaluateBasisData()"); + + utils::throwException( + d_basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreValues) + ->second, + "Basis values are not evaluated for the given QuadratureRuleAttributes"); + std::shared_ptr< + typename BasisDataStorage::Storage> + basisQuadStorage = d_basisQuadStorage; + const std::vector &cellStartIds = + d_cellStartIdsBasisQuadStorage; + const std::vector &nQuadPointsInCell = d_nQuadPointsIncell; + const size_type sizeToCopy = + nQuadPointsInCell[cellId] * d_dofsInCell[cellId]; + typename BasisDataStorage::Storage + returnValue(sizeToCopy); + utils::MemoryTransfer::copy( + sizeToCopy, + returnValue.data(), + basisQuadStorage->data() + cellStartIds[cellId]); + return returnValue; + } + + template + void + CFEBDSOnTheFlyComputeDealii< + ValueTypeBasisCoeff, + ValueTypeBasisData, + memorySpace, + dim>::getBasisDataInCellRange(std::pair cellRange, + Storage &basisData) const + { + utils::throwException( + d_evaluateBasisData == true, + "Cannot call function before calling evaluateBasisData()"); + + utils::throwException( + d_basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreValues) + ->second, + "Basis values are not evaluated for the given QuadratureRuleAttributes"); + + std::shared_ptr< + typename BasisDataStorage::Storage> + basisQuadStorage = d_basisQuadStorage; + const std::vector &cellStartIds = + d_cellStartIdsBasisQuadStorage; + const std::vector &nQuadPointsInCell = d_nQuadPointsIncell; + for (size_type cellId = cellRange.first; cellId < cellRange.second; + cellId++) + utils::MemoryTransfer::copy( + nQuadPointsInCell[cellId] * d_dofsInCell[cellId], + basisData.data() + cellStartIds[cellId] - + cellStartIds[cellRange.first], + basisQuadStorage->data() + cellStartIds[cellId]); + } + + template + typename BasisDataStorage::Storage + CFEBDSOnTheFlyComputeDealii::getBasisGradientDataInCell(const size_type + cellId) const + { + utils::throwException( + d_evaluateBasisData, + "Cannot call function before calling evaluateBasisData()"); + + utils::throwException( + d_basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreGradient) + ->second, + "Basis gradient values are not evaluated for the given QuadratureRuleAttributes"); + + const size_type sizeToCopy = + d_nQuadPointsIncell[cellId] * d_dofsInCell[cellId] * dim; + typename BasisDataStorage::Storage + returnValue(sizeToCopy); + + std::pair cellPair(cellId, cellId + 1); + + CFEBDSOnTheFlyComputeDealiiInternal:: + computeJacobianInvTimesGradPara( + cellPair, + d_dofsInCell, + d_nQuadPointsIncell, + d_basisJacobianInvQuadStorage, + d_cellStartIdsBasisJacobianInvQuadStorage, + *d_basisGradientParaCellQuadStorage, + d_linAlgOpContext, + returnValue); + + return returnValue; + } + + template + void + CFEBDSOnTheFlyComputeDealii:: + getBasisGradientDataInCellRange(std::pair cellRange, + Storage &basisGradientData) const + { + utils::throwException( + d_evaluateBasisData, + "Cannot call function before calling evaluateBasisData()"); + + utils::throwException( + d_basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreGradient) + ->second, + "Basis gradient values are not evaluated for the given QuadratureRuleAttributes"); + + if ((cellRange.second - cellRange.first) > d_maxCellBlock) + { + d_tmpGradientBlock.resize(d_dofsInCell[0] * d_nQuadPointsIncell[0] * + dim * (cellRange.second - cellRange.first)); + d_tmpGradientBlock.setValue(0); + size_type gradientParaCellSize = + d_basisGradientParaCellQuadStorage->size(); + for (size_type iCell = 0; + iCell < (cellRange.second - cellRange.first); + ++iCell) + { + d_tmpGradientBlock.template copyFrom( + d_basisGradientParaCellQuadStorage->data(), + gradientParaCellSize, + 0, + gradientParaCellSize * iCell); + } + } + CFEBDSOnTheFlyComputeDealiiInternal:: + computeJacobianInvTimesGradPara( + cellRange, + d_dofsInCell, + d_nQuadPointsIncell, + d_basisJacobianInvQuadStorage, + d_cellStartIdsBasisJacobianInvQuadStorage, + d_tmpGradientBlock, + d_linAlgOpContext, + basisGradientData); + } + + template + typename BasisDataStorage::Storage + CFEBDSOnTheFlyComputeDealii::getBasisHessianDataInCell(const size_type + cellId) const + { + utils::throwException( + d_evaluateBasisData, + "Cannot call function before calling evaluateBasisData()"); + + utils::throwException( + d_basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreHessian) + ->second, + "Basis hessian values are not evaluated for the given QuadratureRuleAttributes"); + std::shared_ptr< + typename BasisDataStorage::Storage> + basisHessianQuadStorage = d_basisHessianQuadStorage; + const std::vector &cellStartIds = + d_cellStartIdsBasisHessianQuadStorage; + const std::vector &nQuadPointsInCell = d_nQuadPointsIncell; + const size_type sizeToCopy = + nQuadPointsInCell[cellId] * d_dofsInCell[cellId] * dim * dim; + typename BasisDataStorage::Storage + returnValue(sizeToCopy); + utils::MemoryTransfer::copy( + sizeToCopy, + returnValue.data(), + basisHessianQuadStorage->data() + cellStartIds[cellId]); + return returnValue; + } + + template + typename BasisDataStorage::Storage + CFEBDSOnTheFlyComputeDealii::getJxWInCell(const size_type cellId) const + { + utils::throwException( + d_evaluateBasisData, + "Cannot call function before calling evaluateBasisData()"); + + utils::throwException( + d_basisStorageAttributesBoolMap.find(BasisStorageAttributes::StoreJxW) + ->second, + "JxW values are not evaluated for the given QuadratureRuleAttributes"); + std::shared_ptr< + typename BasisDataStorage::Storage> + jxwQuadStorage = d_JxWStorage; + + const std::vector &nQuadPointsInCell = d_nQuadPointsIncell; + const size_type sizeToCopy = nQuadPointsInCell[cellId]; + typename BasisDataStorage::Storage + returnValue(sizeToCopy); + utils::MemoryTransfer::copy( + sizeToCopy, + returnValue.data(), + jxwQuadStorage->data() + + d_quadratureRuleContainer->getCellQuadStartId(cellId)); + return returnValue; + } + + template + typename BasisDataStorage::Storage + CFEBDSOnTheFlyComputeDealii< + ValueTypeBasisCoeff, + ValueTypeBasisData, + memorySpace, + dim>::getBasisData(const QuadraturePointAttributes &attributes, + const size_type basisId) const + { + utils::throwException( + d_evaluateBasisData, + "Cannot call function before calling evaluateBasisData()"); + utils::throwException( + d_basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreValues) + ->second, + "Basis values are not evaluated for the given QuadraturePointAttributes"); + const quadrature::QuadratureRuleAttributes quadratureRuleAttributes = + *(attributes.quadratureRuleAttributesPtr); + const size_type cellId = attributes.cellId; + const size_type quadPointId = attributes.quadPointId; + std::shared_ptr< + typename BasisDataStorage::Storage> + basisQuadStorage = d_basisQuadStorage; + const std::vector &cellStartIds = + d_cellStartIdsBasisQuadStorage; + const std::vector &nQuadPointsInCell = d_nQuadPointsIncell; + typename BasisDataStorage::Storage + returnValue(1); + utils::MemoryTransfer::copy( + 1, + returnValue.data(), + basisQuadStorage->data() + cellStartIds[cellId] + + quadPointId * d_dofsInCell[cellId] + basisId); + return returnValue; + } + + template + typename BasisDataStorage::Storage + CFEBDSOnTheFlyComputeDealii< + ValueTypeBasisCoeff, + ValueTypeBasisData, + memorySpace, + dim>::getBasisGradientData(const QuadraturePointAttributes &attributes, + const size_type basisId) const + { + utils::throwException( + d_evaluateBasisData, + "Cannot call function before calling evaluateBasisData()"); + utils::throwException( + d_basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreGradient) + ->second, + "Basis gradient values are not evaluated for the given QuadraturePointAttributes"); + const quadrature::QuadratureRuleAttributes quadratureRuleAttributes = + *(attributes.quadratureRuleAttributesPtr); + const size_type cellId = attributes.cellId; + const size_type quadPointId = attributes.quadPointId; + typename BasisDataStorage::Storage + basisGradientQuadStorage(getBasisGradientDataInCell(cellId)); + const std::vector &nQuadPointsInCell = d_nQuadPointsIncell; + typename BasisDataStorage::Storage + returnValue(dim); + for (size_type iDim = 0; iDim < dim; ++iDim) + { + utils::MemoryTransfer::copy( + 1, + returnValue.data() + iDim, + basisGradientQuadStorage.data() + + quadPointId * d_dofsInCell[cellId] * dim + + iDim * d_dofsInCell[cellId] + basisId); + } + return returnValue; + } + + template + typename BasisDataStorage::Storage + CFEBDSOnTheFlyComputeDealii< + ValueTypeBasisCoeff, + ValueTypeBasisData, + memorySpace, + dim>::getBasisHessianData(const QuadraturePointAttributes &attributes, + const size_type basisId) const + { + utils::throwException( + d_evaluateBasisData, + "Cannot call function before calling evaluateBasisData()"); + utils::throwException( + d_basisStorageAttributesBoolMap + .find(BasisStorageAttributes::StoreHessian) + ->second, + "Basis hessian values are not evaluated for the given QuadraturePointAttributes"); + const quadrature::QuadratureRuleAttributes quadratureRuleAttributes = + *(attributes.quadratureRuleAttributesPtr); + const size_type cellId = attributes.cellId; + const size_type quadPointId = attributes.quadPointId; + std::shared_ptr< + typename BasisDataStorage::Storage> + basisHessianQuadStorage = d_basisHessianQuadStorage; + const std::vector &cellStartIds = + d_cellStartIdsBasisHessianQuadStorage; + const std::vector &nQuadPointsInCell = d_nQuadPointsIncell; + typename BasisDataStorage::Storage + returnValue(dim * dim); + for (size_type iDim = 0; iDim < dim; ++iDim) + { + for (size_type jDim = 0; jDim < dim; ++jDim) + { + utils::MemoryTransfer::copy( + 1, + returnValue.data() + iDim * dim + jDim, + basisHessianQuadStorage->data() + cellStartIds[cellId] + + quadPointId * d_dofsInCell[cellId] * dim * dim + + (iDim * dim + jDim) * d_dofsInCell[cellId] + basisId); + } + } + return returnValue; + } + + + template + const typename BasisDataStorage::Storage & + CFEBDSOnTheFlyComputeDealii::getBasisOverlapInAllCells() const + { + utils::throwException( + false, "Basis Overlap not implemented in CFEBDSOnTheFlyComputeDealii"); + // typename BasisDataStorage::Storage + // dummy( + // 0); + return d_tmpGradientBlock; + } + + template + typename BasisDataStorage::Storage + CFEBDSOnTheFlyComputeDealii::getBasisOverlapInCell(const size_type + cellId) const + { + utils::throwException( + false, "Basis Overlap not implemented in CFEBDSOnTheFlyComputeDealii"); + typename BasisDataStorage::Storage dummy( + 0); + return dummy; + } + + template + typename BasisDataStorage::Storage + CFEBDSOnTheFlyComputeDealii::getBasisOverlap(const size_type cellId, + const size_type basisId1, + const size_type basisId2) + const + { + utils::throwException( + false, "Basis Overlap not implemented in CFEBDSOnTheFlyComputeDealii"); + typename BasisDataStorage::Storage dummy( + 0); + return dummy; + } + + template + void + CFEBDSOnTheFlyComputeDealii::deleteBasisData() + { + utils::throwException( + (d_basisQuadStorage).use_count() == 1, + "More than one owner for the basis quadrature storage found in CFEBDSOnTheFlyComputeDealii. Not safe to delete it."); + delete (d_basisQuadStorage).get(); + + utils::throwException( + (d_basisJacobianInvQuadStorage).use_count() == 1, + "More than one owner for the basis quadrature storage found in CFEBDSOnTheFlyComputeDealii. Not safe to delete it."); + delete (d_basisJacobianInvQuadStorage).get(); + + utils::throwException( + (d_basisGradientParaCellQuadStorage).use_count() == 1, + "More than one owner for the basis quadrature storage found in CFEBDSOnTheFlyComputeDealii. Not safe to delete it."); + delete (d_basisGradientParaCellQuadStorage).get(); + + utils::throwException( + (d_basisHessianQuadStorage).use_count() == 1, + "More than one owner for the basis quadrature storage found in CFEBDSOnTheFlyComputeDealii. Not safe to delete it."); + delete (d_basisHessianQuadStorage).get(); + + d_tmpGradientBlock.resize(0); + } + + template + typename BasisDataStorage::Storage + CFEBDSOnTheFlyComputeDealii::getBasisDataInCell(const size_type cellId, + const size_type + basisId) const + { + utils::throwException( + false, + "getBasisDataInCell() for a given basisId is not implemented in CFEBDSOnTheFlyComputeDealii"); + typename BasisDataStorage::Storage dummy( + 0); + return dummy; + } + + template + typename BasisDataStorage::Storage + CFEBDSOnTheFlyComputeDealii< + ValueTypeBasisCoeff, + ValueTypeBasisData, + memorySpace, + dim>::getBasisGradientDataInCell(const size_type cellId, + const size_type basisId) const + { + utils::throwException( + false, + "getBasisGradientDataInCell() for a given basisId is not implemented in CFEBDSOnTheFlyComputeDealii"); + typename BasisDataStorage::Storage dummy( + 0); + return dummy; + } + + template + typename BasisDataStorage::Storage + CFEBDSOnTheFlyComputeDealii< + ValueTypeBasisCoeff, + ValueTypeBasisData, + memorySpace, + dim>::getBasisHessianDataInCell(const size_type cellId, + const size_type basisId) const + { + utils::throwException( + false, + "getBasisHessianDataInCell() for a given basisId is not implemented in CFEBDSOnTheFlyComputeDealii"); + typename BasisDataStorage::Storage dummy( + 0); + return dummy; + } + + template + std::shared_ptr + CFEBDSOnTheFlyComputeDealii::getQuadratureRuleContainer() const + { + utils::throwException( + d_evaluateBasisData, + "Cannot call function before calling evaluateBasisData()"); + + return d_quadratureRuleContainer; + } + + template + typename BasisDataStorage::Storage + CFEBDSOnTheFlyComputeDealii::getBasisGradNiGradNjInCell(const size_type + cellId) const + { + utils::throwException( + false, + "Basis GradNiGradNj not implemented in CFEBDSOnTheFlyComputeDealii"); + typename BasisDataStorage::Storage dummy( + 0); + return dummy; + } + + // get overlap of all the basis functions in all cells + template + const typename BasisDataStorage::Storage & + CFEBDSOnTheFlyComputeDealii::getBasisGradNiGradNjInAllCells() const + { + utils::throwException( + false, + "Basis GradNiGradNj not implemented in CFEBDSOnTheFlyComputeDealii"); + // typename BasisDataStorage::Storage + // dummy( + // 0); + return d_tmpGradientBlock; + } + + template + std::shared_ptr + CFEBDSOnTheFlyComputeDealii::getBasisDofHandler() const + { + return d_feBDH; + } + } // namespace basis +} // namespace dftefe diff --git a/src/basis/CFEBasisDataStorageDealii.t.cpp b/src/basis/CFEBasisDataStorageDealii.t.cpp index befc3a80..6cf978bd 100644 --- a/src/basis/CFEBasisDataStorageDealii.t.cpp +++ b/src/basis/CFEBasisDataStorageDealii.t.cpp @@ -21,7 +21,7 @@ ******************************************************************************/ /* - * @author Bikash Kanungo, Vishal Subramanian + * @author Bikash Kanungo, Vishal Subramanian, Avirup Sircar */ #include diff --git a/src/basis/CFEOverlapInverseOpContextGLL.t.cpp b/src/basis/CFEOverlapInverseOpContextGLL.t.cpp index 8755ca66..b0ea67be 100644 --- a/src/basis/CFEOverlapInverseOpContextGLL.t.cpp +++ b/src/basis/CFEOverlapInverseOpContextGLL.t.cpp @@ -188,7 +188,8 @@ namespace dftefe &classicalGLLBasisDataStorage, std::shared_ptr< const FEBasisDofHandler> cfeBDH, - utils::MemoryStorage &NiNjInAllCells) + utils::MemoryStorage &NiNjInAllCells, + linearAlgebra::LinAlgOpContext & linAlgOpContext) { // Set up the overlap matrix quadrature storages. @@ -218,14 +219,12 @@ namespace dftefe NiNjInAllCells.resize(basisOverlapSize, ValueTypeOperator(0)); basisOverlapTmp.resize(basisOverlapSize, ValueTypeOperator(0)); - auto basisOverlapTmpIter = basisOverlapTmp.begin(); - size_type cellIndex = 0; + // auto basisOverlapTmpIter = basisOverlapTmp.begin(); + size_type cellIndex = 0; - const utils::MemoryStorage - &basisDataInAllCellsClassicalBlock = - classicalGLLBasisDataStorage.getBasisDataInAllCells(); - - size_type cumulativeEnrichmentBlockDofQuadPointsOffset = 0; + // const utils::MemoryStorage + // &basisDataInAllCellsClassicalBlock = + // classicalGLLBasisDataStorage.getBasisDataInAllCells(); locallyOwnedCellIter = cfeBDH->beginLocallyOwnedCells(); for (; locallyOwnedCellIter != cfeBDH->endLocallyOwnedCells(); @@ -239,31 +238,80 @@ namespace dftefe classicalGLLBasisDataStorage.getQuadratureRuleContainer() ->getCellJxW(cellIndex); - const ValueTypeOperator *cumulativeClassicalBlockDofQuadPoints = - basisDataInAllCellsClassicalBlock.data(); /*GLL Quad rule*/ - - for (unsigned int iNode = 0; iNode < dofsPerCell; iNode++) - { - for (unsigned int jNode = 0; jNode < dofsPerCell; jNode++) - { - *basisOverlapTmpIter = 0.0; - // Ni_classical* Ni_classical of the classicalBlockBasisData - for (unsigned int qPoint = 0; - qPoint < nQuadPointInCellClassicalBlock; - qPoint++) - { - *basisOverlapTmpIter += - *(cumulativeClassicalBlockDofQuadPoints + - dofsPerCell * qPoint + iNode - /*nQuadPointInCellClassicalBlock * iNode + qPoint*/) * - *(cumulativeClassicalBlockDofQuadPoints + - dofsPerCell * qPoint + jNode - /*nQuadPointInCellClassicalBlock * jNode + qPoint*/) * - cellJxWValuesClassicalBlock[qPoint]; - } - basisOverlapTmpIter++; - } - } + const utils::MemoryStorage & + basisDataInCell = classicalGLLBasisDataStorage.getBasisDataInCell( + cellIndex); /*GLL Quad rule*/ + + std::vector JxWxNCellConj( + dofsPerCell * nQuadPointInCellClassicalBlock); + + size_type stride = 0; + size_type m = 1, n = dofsPerCell, + k = nQuadPointInCellClassicalBlock; + + linearAlgebra::blasLapack::scaleStridedVarBatched( + 1, + linearAlgebra::blasLapack::Layout::ColMajor, + linearAlgebra::blasLapack::ScalarOp::Identity, + linearAlgebra::blasLapack::ScalarOp::Conj, + &stride, + &stride, + &stride, + &m, + &n, + &k, + cellJxWValuesClassicalBlock.data(), + basisDataInCell.data(), + JxWxNCellConj.data(), + linAlgOpContext); + + linearAlgebra::blasLapack:: + gemm( + linearAlgebra::blasLapack::Layout::ColMajor, + linearAlgebra::blasLapack::Op::NoTrans, + linearAlgebra::blasLapack::Op::Trans, + dofsPerCell, + dofsPerCell, + nQuadPointInCellClassicalBlock, + (ValueTypeOperand)1.0, + JxWxNCellConj.data(), + dofsPerCell, + basisDataInCell.data(), + dofsPerCell, + (ValueTypeOperand)0.0, + basisOverlapTmp.data() + cumulativeBasisOverlapId, + dofsPerCell, + linAlgOpContext); + + // const ValueTypeOperator *cumulativeClassicalBlockDofQuadPoints = + // basisDataInAllCellsClassicalBlock.data(); /*GLL Quad rule*/ + + // for (unsigned int iNode = 0; iNode < dofsPerCell; iNode++) + // { + // for (unsigned int jNode = 0; jNode < dofsPerCell; jNode++) + // { + // *basisOverlapTmpIter = 0.0; + // // Ni_classical* Ni_classical of the + // classicalBlockBasisData for (unsigned int qPoint = 0; + // qPoint < nQuadPointInCellClassicalBlock; + // qPoint++) + // { + // *basisOverlapTmpIter += + // *(cumulativeClassicalBlockDofQuadPoints + + // dofsPerCell * qPoint + iNode + // /*nQuadPointInCellClassicalBlock * iNode + + // qPoint*/) * + // *(cumulativeClassicalBlockDofQuadPoints + + // dofsPerCell * qPoint + jNode + // /*nQuadPointInCellClassicalBlock * jNode + + // qPoint*/) * + // cellJxWValuesClassicalBlock[qPoint]; + // } + // basisOverlapTmpIter++; + // } + // } cumulativeBasisOverlapId += dofsPerCell * dofsPerCell; cellIndex++; @@ -338,7 +386,10 @@ namespace dftefe ValueTypeOperator, ValueTypeOperand, memorySpace, - dim>(classicalGLLBasisDataStorage, cfeBDH, NiNjInAllCells); + dim>(classicalGLLBasisDataStorage, + cfeBDH, + NiNjInAllCells, + *linAlgOpContext); // auto NiNjInAllCells = // classicalGLLBasisDataStorage.getBasisOverlapInAllCells(); diff --git a/src/basis/CFEOverlapOperatorContext.h b/src/basis/CFEOverlapOperatorContext.h index f4a48aab..7daf46bf 100644 --- a/src/basis/CFEOverlapOperatorContext.h +++ b/src/basis/CFEOverlapOperatorContext.h @@ -87,7 +87,9 @@ namespace dftefe const FEBasisDataStorage & feBasisDataStorage, const size_type maxCellBlock, - const size_type maxFieldBlock); + const size_type maxFieldBlock, + std::shared_ptr> + linAlgOpContext); CFEOverlapOperatorContext( const FEBasisManager &feBasisDataStorage, std::shared_ptr> - & basisOverlap, - std::vector &cellStartIdsBasisOverlap, - std::vector &dofsInCellVec) + & basisOverlap, + std::vector & cellStartIdsBasisOverlap, + std::vector & dofsInCellVec, + linearAlgebra::LinAlgOpContext &linAlgOpContext) { std::shared_ptr< const FEBasisDofHandler> @@ -76,24 +77,24 @@ namespace dftefe basisOverlapTmp.resize(numLocallyOwnedCells * dofsPerCell * dofsPerCell, ValueTypeOperator(0)); - auto locallyOwnedCellIter = feBDH->beginLocallyOwnedCells(); - auto basisOverlapTmpIter = basisOverlapTmp.begin(); - size_type cellIndex = 0; - - const utils::MemoryStorage - &basisDataInAllCells = feBasisDataStorage.getBasisDataInAllCells(); - - size_type cumulativeQuadPoints = 0, cumulativeDofQuadPointsOffset = 0; - bool isConstantDofsAndQuadPointsInCell = false; - quadrature::QuadratureFamily quadFamily = - feBasisDataStorage.getQuadratureRuleContainer() - ->getQuadratureRuleAttributes() - .getQuadratureFamily(); - if ((quadFamily == quadrature::QuadratureFamily::GAUSS || - quadFamily == quadrature::QuadratureFamily::GLL || - quadFamily == quadrature::QuadratureFamily::GAUSS_SUBDIVIDED) && - !feBDH->isVariableDofsPerCell()) - isConstantDofsAndQuadPointsInCell = true; + auto locallyOwnedCellIter = feBDH->beginLocallyOwnedCells(); + // auto basisOverlapTmpIter = basisOverlapTmp.begin(); + size_type cellIndex = 0; + + // const utils::MemoryStorage + // &basisDataInAllCells = feBasisDataStorage.getBasisDataInAllCells(); + + // size_type cumulativeQuadPoints = 0, cumulativeDofQuadPointsOffset = + // 0; bool isConstantDofsAndQuadPointsInCell = false; + // quadrature::QuadratureFamily quadFamily = + // feBasisDataStorage.getQuadratureRuleContainer() + // ->getQuadratureRuleAttributes() + // .getQuadratureFamily(); + // if ((quadFamily == quadrature::QuadratureFamily::GAUSS || + // quadFamily == quadrature::QuadratureFamily::GLL || + // quadFamily == quadrature::QuadratureFamily::GAUSS_SUBDIVIDED) && + // !feBDH->isVariableDofsPerCell()) + // isConstantDofsAndQuadPointsInCell = true; for (; locallyOwnedCellIter != feBDH->endLocallyOwnedCells(); ++locallyOwnedCellIter) { @@ -105,36 +106,83 @@ namespace dftefe feBasisDataStorage.getQuadratureRuleContainer()->getCellJxW( cellIndex); - const ValueTypeOperator *cumulativeDofQuadPoints = - basisDataInAllCells.data() + cumulativeDofQuadPointsOffset; + const utils::MemoryStorage + &basisDataInCell = + feBasisDataStorage.getBasisDataInCell(cellIndex); + + std::vector JxWxNCellConj(dofsPerCell * + nQuadPointInCell); + + size_type stride = 0; + size_type m = 1, n = dofsPerCell, k = nQuadPointInCell; + + linearAlgebra::blasLapack::scaleStridedVarBatched( + 1, + linearAlgebra::blasLapack::Layout::ColMajor, + linearAlgebra::blasLapack::ScalarOp::Identity, + linearAlgebra::blasLapack::ScalarOp::Conj, + &stride, + &stride, + &stride, + &m, + &n, + &k, + cellJxWValues.data(), + basisDataInCell.data(), + JxWxNCellConj.data(), + linAlgOpContext); - for (unsigned int iNode = 0; iNode < dofsPerCell; iNode++) - { - for (unsigned int jNode = 0; jNode < dofsPerCell; jNode++) - { - *basisOverlapTmpIter = 0.0; - for (unsigned int qPoint = 0; qPoint < nQuadPointInCell; - qPoint++) - { - *basisOverlapTmpIter += - *(cumulativeDofQuadPoints + dofsPerCell * qPoint + - iNode - /*nQuadPointInCell * iNode + qPoint*/) * - *(cumulativeDofQuadPoints + dofsPerCell * qPoint + - jNode - /*nQuadPointInCell * jNode + qPoint*/) * - cellJxWValues[qPoint]; - } - basisOverlapTmpIter++; - } - } + linearAlgebra::blasLapack:: + gemm( + linearAlgebra::blasLapack::Layout::ColMajor, + linearAlgebra::blasLapack::Op::NoTrans, + linearAlgebra::blasLapack::Op::Trans, + dofsPerCell, + dofsPerCell, + nQuadPointInCell, + (ValueTypeOperand)1.0, + JxWxNCellConj.data(), + dofsPerCell, + basisDataInCell.data(), + dofsPerCell, + (ValueTypeOperand)0.0, + basisOverlapTmp.data() + cumulativeBasisOverlapId, + dofsPerCell, + linAlgOpContext); + + // const ValueTypeOperator *cumulativeDofQuadPoints = + // basisDataInAllCells.data() + cumulativeDofQuadPointsOffset; + + // for (unsigned int iNode = 0; iNode < dofsPerCell; iNode++) + // { + // for (unsigned int jNode = 0; jNode < dofsPerCell; jNode++) + // { + // *basisOverlapTmpIter = 0.0; + // for (unsigned int qPoint = 0; qPoint < nQuadPointInCell; + // qPoint++) + // { + // *basisOverlapTmpIter += + // *(cumulativeDofQuadPoints + dofsPerCell * qPoint + + // iNode + // /*nQuadPointInCell * iNode + qPoint*/) * + // *(cumulativeDofQuadPoints + dofsPerCell * qPoint + + // jNode + // /*nQuadPointInCell * jNode + qPoint*/) * + // cellJxWValues[qPoint]; + // } + // basisOverlapTmpIter++; + // } + // } cellStartIdsBasisOverlap[cellIndex] = cumulativeBasisOverlapId; cumulativeBasisOverlapId += dofsPerCell * dofsPerCell; cellIndex++; - cumulativeQuadPoints += nQuadPointInCell; - if (!isConstantDofsAndQuadPointsInCell) - cumulativeDofQuadPointsOffset += nQuadPointInCell * dofsPerCell; + // cumulativeQuadPoints += nQuadPointInCell; + // if (!isConstantDofsAndQuadPointsInCell) + // cumulativeDofQuadPointsOffset += nQuadPointInCell * + // dofsPerCell; } utils::MemoryTransfer::copy( @@ -385,7 +433,9 @@ namespace dftefe const FEBasisDataStorage & feBasisDataStorage, const size_type maxCellBlock, - const size_type maxFieldBlock) + const size_type maxFieldBlock, + std::shared_ptr> + linAlgOpContext) : d_feBasisManager(&feBasisManager) , d_maxCellBlock(maxCellBlock) , d_maxFieldBlock(maxFieldBlock) @@ -399,7 +449,8 @@ namespace dftefe dim>(feBasisDataStorage, d_basisOverlap, d_cellStartIdsBasisOverlap, - d_dofsInCell); + d_dofsInCell, + *linAlgOpContext); } template (feBasisDataStorage, d_basisOverlap, d_cellStartIdsBasisOverlap, - d_dofsInCell); + d_dofsInCell, + *linAlgOpContext); d_diagonal = std::make_shared>( diff --git a/src/basis/CMakeLists.txt b/src/basis/CMakeLists.txt index 14d17c0f..189f1228 100644 --- a/src/basis/CMakeLists.txt +++ b/src/basis/CMakeLists.txt @@ -17,6 +17,7 @@ set(DFT-EFE-BASIS-SOURCES CFEBasisDofHandlerDealii.cpp FEBasisManager.cpp CFEBasisDataStorageDealii.cpp + CFEBDSOnTheFlyComputeDealii.cpp FEBasisOperations.cpp AtomIdsPartition.cpp EnrichmentIdsPartition.cpp diff --git a/src/basis/EnrichmentClassicalInterfaceSpherical.t.cpp b/src/basis/EnrichmentClassicalInterfaceSpherical.t.cpp index 7f40585c..067a53df 100644 --- a/src/basis/EnrichmentClassicalInterfaceSpherical.t.cpp +++ b/src/basis/EnrichmentClassicalInterfaceSpherical.t.cpp @@ -309,7 +309,8 @@ namespace dftefe *d_cfeBasisManager, *cfeBasisDataStorageOverlapMatrix, L2ProjectionDefaults::CELL_BATCH_SIZE, - nTotalEnrichmentIds); + nTotalEnrichmentIds, + linAlgOpContext); std::shared_ptr scalarOpA( - numCellsInBlock, - linearAlgebra::blasLapack::ScalarOp::Identity); - std::vector scalarOpB( - numCellsInBlock, linearAlgebra::blasLapack::ScalarOp::Conj); + linearAlgebra::blasLapack::ScalarOp scalarOpA = + linearAlgebra::blasLapack::ScalarOp::Identity; + linearAlgebra::blasLapack::ScalarOp scalarOpB = + linearAlgebra::blasLapack::ScalarOp::Conj; std::vector mTmp(numCellsInBlock, 0); std::vector nTmp(numCellsInBlock, 0); std::vector kTmp(numCellsInBlock, 0); @@ -232,8 +231,8 @@ namespace dftefe memorySpace>( numCellsInBlock, layout, - scalarOpA.data(), - scalarOpB.data(), + scalarOpA, + scalarOpB, stA.data(), stB.data(), stC.data(), @@ -268,9 +267,7 @@ namespace dftefe } */ - std::fill(scalarOpB.begin(), - scalarOpB.end(), - linearAlgebra::blasLapack::ScalarOp::Identity); + scalarOpB = linearAlgebra::blasLapack::ScalarOp::Identity; /* Other params from previous declarations*/ linearAlgebra::blasLapack::scaleStridedVarBatched< @@ -279,8 +276,8 @@ namespace dftefe memorySpace>( numCellsInBlock, layout, - scalarOpA.data(), - scalarOpB.data(), + scalarOpA, + scalarOpB, stA.data(), stB.data(), stC.data(), @@ -524,11 +521,10 @@ namespace dftefe basisGradientDataInCellRange); /** --- Storages --------- */ - std::vector scalarOpA( - numCellsInBlock, - linearAlgebra::blasLapack::ScalarOp::Identity); - std::vector scalarOpB( - numCellsInBlock, linearAlgebra::blasLapack::ScalarOp::Conj); + linearAlgebra::blasLapack::ScalarOp scalarOpA = + linearAlgebra::blasLapack::ScalarOp::Identity; + linearAlgebra::blasLapack::ScalarOp scalarOpB = + linearAlgebra::blasLapack::ScalarOp::Conj; std::vector mTmp(numCellsInBlock, 0); std::vector nTmp(numCellsInBlock, 0); std::vector kTmp(numCellsInBlock, 0); @@ -571,8 +567,8 @@ namespace dftefe memorySpace>( numCellsInBlock, layout, - scalarOpA.data(), - scalarOpB.data(), + scalarOpA, + scalarOpB, stA.data(), stB.data(), stC.data(), diff --git a/src/electrostatics/LaplaceOperatorContextFE.h b/src/electrostatics/LaplaceOperatorContextFE.h index 7c96b73c..68b05d25 100644 --- a/src/electrostatics/LaplaceOperatorContextFE.h +++ b/src/electrostatics/LaplaceOperatorContextFE.h @@ -91,6 +91,15 @@ namespace dftefe const size_type maxCellBlock, const size_type maxFieldBlock); + void + reinit( + const basis:: + FEBasisManager + &feBasisManagerX, + const basis:: + FEBasisManager + &feBasisManagerY); + ~LaplaceOperatorContextFE() = default; // void @@ -110,6 +119,9 @@ namespace dftefe const basis:: FEBasisManager *d_feBasisManagerY; + std::shared_ptr< + const basis::FEBasisDataStorage> + d_feBasisDataStorage; utils::MemoryStorage d_gradNiGradNjInAllCells; size_type d_maxFieldBlock, d_maxCellBlock; diff --git a/src/electrostatics/LaplaceOperatorContextFE.t.cpp b/src/electrostatics/LaplaceOperatorContextFE.t.cpp index 53d50e4c..8f87ce92 100644 --- a/src/electrostatics/LaplaceOperatorContextFE.t.cpp +++ b/src/electrostatics/LaplaceOperatorContextFE.t.cpp @@ -284,6 +284,7 @@ namespace dftefe const size_type maxFieldBlock) : d_feBasisManagerX(&feBasisManagerX) , d_feBasisManagerY(&feBasisManagerY) + , d_feBasisDataStorage(feBasisDataStorage) , d_maxFieldBlock(maxFieldBlock) , d_maxCellBlock(maxCellBlock) { @@ -326,6 +327,37 @@ namespace dftefe // feBasisDataStorage->getBasisGradNiGradNjInAllCells(); } + template + void + LaplaceOperatorContextFE< + ValueTypeOperator, + ValueTypeOperand, + memorySpace, + dim>::reinit(const basis::FEBasisManager &feBasisManagerX, + const basis::FEBasisManager &feBasisManagerY) + { + d_feBasisManagerX = &feBasisManagerX; + d_feBasisManagerY = &feBasisManagerY; + utils::throwException( + &(feBasisManagerX.getBasisDofHandler()) == + &(feBasisManagerY.getBasisDofHandler()), + "feBasisManager of X and Y vectors are not from same basisDofhandler"); + + utils::throwException( + &(feBasisManagerX.getBasisDofHandler()) == + (d_feBasisDataStorage->getBasisDofHandler()).get(), + "feBasisManager of X and Y vectors and feBasisDataStorage are not from the same basisDofHandler"); + } + template > d_AxContext; - std::shared_ptr> + std::shared_ptr> d_AxContextNHDB; std::shared_ptrgetConstraints().distributeParentToChild( d_fieldInHomoDBCVec, d_numComponents); - d_AxContextNHDB = - std::make_shared>( - *d_feBasisManagerField, - *d_feBasisManagerHomo, - d_feBasisDataStorageStiffnessMatrix, - d_linAlgOpContext, - d_maxCellBlock, - d_maxFieldBlock); // handling the inhomogeneous DBC in RHS + d_AxContextNHDB->reinit(*d_feBasisManagerField, + *d_feBasisManagerHomo); + // = std::make_shared>( + // *d_feBasisManagerField, + // *d_feBasisManagerHomo, + // d_feBasisDataStorageStiffnessMatrix, + // d_linAlgOpContext, + // d_maxCellBlock, + // d_maxFieldBlock); // handling the inhomogeneous DBC in RHS } // Compute RHS diff --git a/src/ksdft/DensityCalculator.t.cpp b/src/ksdft/DensityCalculator.t.cpp index d7c4f4b8..0aff0d2f 100644 --- a/src/ksdft/DensityCalculator.t.cpp +++ b/src/ksdft/DensityCalculator.t.cpp @@ -50,216 +50,114 @@ namespace dftefe { size_type numPsiInBatch = occupationInBatch.size(); // hadamard for psi^C psi = mod psi^2 - linearAlgebra::blasLapack:: - hadamardProduct( - psiBatchQuad.nEntries(), - psiBatchQuad.begin(), - psiBatchQuad.begin(), - linearAlgebra::blasLapack::ScalarOp::Conj, - linearAlgebra::blasLapack::ScalarOp::Identity, - psiBatchQuad.begin(), - linAlgOpContext); + // linearAlgebra::blasLapack:: + // hadamardProduct( + // psiBatchQuad.nEntries(), + // psiBatchQuad.begin(), + // psiBatchQuad.begin(), + // linearAlgebra::blasLapack::ScalarOp::Conj, + // linearAlgebra::blasLapack::ScalarOp::Identity, + // psiBatchQuad.begin(), + // linAlgOpContext); /*----------- TODO : Optimize this -------------------------------*/ // convert to psiBatchQuad to realType and multiply by 2 - for (size_type iCell = 0; iCell < psiBatchQuad.nCells(); iCell++) - { - std::vector a( - quadRuleContainer->nCellQuadraturePoints(iCell) * numPsiInBatch); - std::vector b( - quadRuleContainer->nCellQuadraturePoints(iCell) * numPsiInBatch); - psiBatchQuad.template getCellValues( - iCell, a.data()); - for (size_type i = 0; i < b.size(); i++) - b[i] = 2.0 * utils::realPart(a[i]); - psiModSqBatchQuad.template setCellValues( - iCell, b.data()); - } - // for (size_type iCell = 0; iCell < psiBatchQuad.nCells(); iCell++) // { - // for(size_type iQuad = 0 ; iQuad < - // quadRuleContainer->nCellQuadraturePoints(iCell) ; iQuad++) - // { - // std::vector a(numPsiInBatch); - // RealType b = 0; - // psiBatchQuad - // .template getCellQuadValues( - // iCell, iQuad, a.data()); - // for (size_type i = 0; i < a.size(); i++) - // b += 2.0 * a[i] * a[i] * occupationInBatch[i]; - // rhoBatch - // .template setCellQuadValues( - // iCell, iQuad, &b); - // } + // std::vector a( + // quadRuleContainer->nCellQuadraturePoints(iCell) * + // numPsiInBatch); + // std::vector b( + // quadRuleContainer->nCellQuadraturePoints(iCell) * + // numPsiInBatch); + // psiBatchQuad.template getCellValues( + // iCell, a.data()); + // for (size_type i = 0; i < b.size(); i++) + // b[i] = 2.0 * utils::realPart(a[i]); + // psiModSqBatchQuad.template + // setCellValues( + // iCell, b.data()); // } - // gemm for fi * mod psi^2 - - utils::MemoryTransfer - memoryTransfer; - - // Create occupancy in memspace - utils::MemoryStorage occupationInBatchMemspace( - occupationInBatch.size()); - memoryTransfer.copy(occupationInBatch.size(), - occupationInBatchMemspace.data(), - occupationInBatch.data()); - - size_type AStartOffset = 0; - size_type CStartOffset = 0; - for (size_type cellStartId = 0; cellStartId < numLocallyOwnedCells; - cellStartId += cellBlockSize) + ValueType *psiBatchQuadIter = psiBatchQuad.begin(); + RealType * rhoBatchIter = rhoBatch.begin(); + size_type cumulativeQuadInCell, cumulativeQuadPsiInCell = 0; + for (size_type iCell = 0; iCell < psiBatchQuad.nCells(); iCell++) { - const size_type cellEndId = - std::min(cellStartId + cellBlockSize, numLocallyOwnedCells); - const size_type numCellsInBlock = cellEndId - cellStartId; + size_type numQuadInCell = + quadRuleContainer->nCellQuadraturePoints(iCell); + for (size_type iQuad = 0; iQuad < numQuadInCell; iQuad++) + { + RealType b = 0; + for (size_type i = 0; i < numPsiInBatch; i++) + { + const ValueType psi = + psiBatchQuadIter[cumulativeQuadPsiInCell + + numPsiInBatch * iQuad + i]; + b += 2.0 * utils::absSq(psi) * occupationInBatch[i]; + } + rhoBatchIter[cumulativeQuadInCell + iQuad] = b; + } + cumulativeQuadPsiInCell += numQuadInCell * numPsiInBatch; + cumulativeQuadInCell += numQuadInCell; + } - RealType alpha = 1.0; - RealType beta = 0.0; + // // gemm for fi * mod psi^2 - RealType *C = rhoBatch.begin() + CStartOffset; + // utils::MemoryTransfer + // memoryTransfer; - size_type n = 0; - for (size_type iCell = 0; iCell < numCellsInBlock; ++iCell) - { - n += - quadRuleContainer->nCellQuadraturePoints(cellStartId + iCell); - } + // // Create occupancy in memspace + // utils::MemoryStorage + // occupationInBatchMemspace( + // occupationInBatch.size()); + // memoryTransfer.copy(occupationInBatch.size(), + // occupationInBatchMemspace.data(), + // occupationInBatch.data()); - linearAlgebra::blasLapack::gemm( - linearAlgebra::blasLapack::Layout::ColMajor, - linearAlgebra::blasLapack::Op::Trans, - linearAlgebra::blasLapack::Op::NoTrans, - 1, - n, - numPsiInBatch, - alpha, - occupationInBatchMemspace.data(), - numPsiInBatch, - psiModSqBatchQuad.begin() + AStartOffset, - numPsiInBatch, - beta, - C, - 1, - linAlgOpContext); - - - // std::vector transA( - // numCellsInBlock, linearAlgebra::blasLapack::Op::Trans); - // std::vector transB( - // numCellsInBlock, linearAlgebra::blasLapack::Op::NoTrans); - // std::vector mSizesTmp(numCellsInBlock, 0); - // std::vector nSizesTmp(numCellsInBlock, 0); - // std::vector kSizesTmp(numCellsInBlock, 0); - // std::vector ldaSizesTmp(numCellsInBlock, 0); - // std::vector ldbSizesTmp(numCellsInBlock, 0); - // std::vector ldcSizesTmp(numCellsInBlock, 0); - // std::vector strideATmp(numCellsInBlock, 0); - // std::vector strideBTmp(numCellsInBlock, 0); - // std::vector strideCTmp(numCellsInBlock, 0); - - // for (size_type iCell = 0; iCell < numCellsInBlock; ++iCell) - // { - // const size_type cellId = cellStartId + iCell; - // mSizesTmp[iCell] = 1; - // nSizesTmp[iCell] = - // quadRuleContainer->nCellQuadraturePoints(cellId); - // kSizesTmp[iCell] = numPsiInBatch; - // ldaSizesTmp[iCell] = kSizesTmp[iCell]; - // ldbSizesTmp[iCell] = kSizesTmp[iCell]; - // ldcSizesTmp[iCell] = mSizesTmp[iCell]; - // strideBTmp[iCell] = kSizesTmp[iCell] * nSizesTmp[iCell]; - // strideCTmp[iCell] = mSizesTmp[iCell] * nSizesTmp[iCell]; - // strideATmp[iCell] = 0; - // } - - // utils::MemoryStorage mSizes( - // numCellsInBlock); - // utils::MemoryStorage nSizes( - // numCellsInBlock); - // utils::MemoryStorage kSizes( - // numCellsInBlock); - // utils::MemoryStorage ldaSizes( - // numCellsInBlock); - // utils::MemoryStorage ldbSizes( - // numCellsInBlock); - // utils::MemoryStorage ldcSizes( - // numCellsInBlock); - // utils::MemoryStorage strideA( - // numCellsInBlock); - // utils::MemoryStorage strideB( - // numCellsInBlock); - // utils::MemoryStorage strideC( - // numCellsInBlock); - - - // memoryTransfer.copy(numCellsInBlock, - // mSizes.data(), - // mSizesTmp.data()); - // memoryTransfer.copy(numCellsInBlock, - // nSizes.data(), - // nSizesTmp.data()); - // memoryTransfer.copy(numCellsInBlock, - // kSizes.data(), - // kSizesTmp.data()); - // memoryTransfer.copy(numCellsInBlock, - // ldaSizes.data(), - // ldaSizesTmp.data()); - // memoryTransfer.copy(numCellsInBlock, - // ldbSizes.data(), - // ldbSizesTmp.data()); - // memoryTransfer.copy(numCellsInBlock, - // ldcSizes.data(), - // ldcSizesTmp.data()); - // memoryTransfer.copy(numCellsInBlock, - // strideA.data(), - // strideATmp.data()); - // memoryTransfer.copy(numCellsInBlock, - // strideB.data(), - // strideBTmp.data()); - // memoryTransfer.copy(numCellsInBlock, - // strideC.data(), - // strideCTmp.data()); - - // // Create occupancy in memspace - // utils::MemoryStorage - // occupationInBatchMemspace(occupationInBatch.size()); - // memoryTransfer.copy(occupationInBatch.size(), - // occupationInBatchMemspace.data(), - // occupationInBatch.data()); - - // RealType alpha = 1.0; - // RealType beta = 0.0; - - // RealType *C = rhoBatch.begin() + CStartOffset; - - // linearAlgebra::blasLapack:: - // gemmStridedVarBatched( - // linearAlgebra::blasLapack::Layout::ColMajor, - // numCellsInBlock, - // transA.data(), - // transB.data(), - // strideA.data(), - // strideB.data(), - // strideC.data(), - // mSizes.data(), - // nSizes.data(), - // kSizes.data(), - // alpha, - // occupationInBatchMemspace.data(), - // ldaSizes.data(), - // psiModSqBatchQuad.begin() + AStartOffset, - // ldbSizes.data(), - // beta, - // C, - // ldcSizes.data(), - // linAlgOpContext); - - AStartOffset += - numPsiInBatch * n; // mSizesTmp[iCell] * kSizesTmp[iCell]; - CStartOffset += n; // mSizesTmp[iCell] * nSizesTmp[iCell]; - } + // size_type AStartOffset = 0; + // size_type CStartOffset = 0; + // for (size_type cellStartId = 0; cellStartId < numLocallyOwnedCells; + // cellStartId += cellBlockSize) + // { + // const size_type cellEndId = + // std::min(cellStartId + cellBlockSize, numLocallyOwnedCells); + // const size_type numCellsInBlock = cellEndId - cellStartId; + + // RealType alpha = 1.0; + // RealType beta = 0.0; + + // RealType *C = rhoBatch.begin() + CStartOffset; + + // size_type n = 0; + // for (size_type iCell = 0; iCell < numCellsInBlock; ++iCell) + // { + // n += + // quadRuleContainer->nCellQuadraturePoints(cellStartId + + // iCell); + // } + + // linearAlgebra::blasLapack::gemm( + // linearAlgebra::blasLapack::Layout::ColMajor, + // linearAlgebra::blasLapack::Op::Trans, + // linearAlgebra::blasLapack::Op::NoTrans, + // 1, + // n, + // numPsiInBatch, + // alpha, + // occupationInBatchMemspace.data(), + // numPsiInBatch, + // psiModSqBatchQuad.begin() + AStartOffset, + // numPsiInBatch, + // beta, + // C, + // 1, + // linAlgOpContext); + + // AStartOffset += + // numPsiInBatch * n; + // CStartOffset += n; + // } } } // namespace DensityCalculatorInternal diff --git a/src/ksdft/KineticFE.t.cpp b/src/ksdft/KineticFE.t.cpp index 54202f7c..c0b89960 100644 --- a/src/ksdft/KineticFE.t.cpp +++ b/src/ksdft/KineticFE.t.cpp @@ -196,12 +196,10 @@ namespace dftefe const size_type numLocallyOwnedCells = feBMPsi.nLocallyOwnedCells(); - std::vector scalarOpA( - numLocallyOwnedCells, - linearAlgebra::blasLapack::ScalarOp::Identity); - std::vector scalarOpB( - numLocallyOwnedCells, - linearAlgebra::blasLapack::ScalarOp::Identity); + linearAlgebra::blasLapack::ScalarOp scalarOpA = + linearAlgebra::blasLapack::ScalarOp::Identity; + linearAlgebra::blasLapack::ScalarOp scalarOpB = + linearAlgebra::blasLapack::ScalarOp::Identity; std::vector mTmp(numLocallyOwnedCells, 0); std::vector nTmp(numLocallyOwnedCells, 0); std::vector kTmp(numLocallyOwnedCells, 0); @@ -259,8 +257,8 @@ namespace dftefe scaleStridedVarBatched( numLocallyOwnedCells, layout, - scalarOpA.data(), - scalarOpB.data(), + scalarOpA, + scalarOpB, stA.data(), stB.data(), stC.data(), diff --git a/src/linearAlgebra/BlasLapack.h b/src/linearAlgebra/BlasLapack.h index c45eec4e..6026f4b4 100644 --- a/src/linearAlgebra/BlasLapack.h +++ b/src/linearAlgebra/BlasLapack.h @@ -219,8 +219,8 @@ namespace dftefe void scaleStridedVarBatched(const size_type numMats, const Layout layout, - const ScalarOp * scalarOpA, - const ScalarOp * scalarOpB, + const ScalarOp & scalarOpA, + const ScalarOp & scalarOpB, const size_type * stridea, const size_type * strideb, const size_type * stridec, diff --git a/src/linearAlgebra/BlasLapack.t.cpp b/src/linearAlgebra/BlasLapack.t.cpp index 1ce00e8a..bce7bb12 100644 --- a/src/linearAlgebra/BlasLapack.t.cpp +++ b/src/linearAlgebra/BlasLapack.t.cpp @@ -161,8 +161,8 @@ namespace dftefe void scaleStridedVarBatched(const size_type numMats, const Layout layout, - const ScalarOp * scalarOpA, - const ScalarOp * scalarOpB, + const ScalarOp & scalarOpA, + const ScalarOp & scalarOpB, const size_type * stridea, const size_type * strideb, const size_type * stridec, diff --git a/src/linearAlgebra/BlasLapackKernels.cpp b/src/linearAlgebra/BlasLapackKernels.cpp index 05b31e23..bd923c6a 100644 --- a/src/linearAlgebra/BlasLapackKernels.cpp +++ b/src/linearAlgebra/BlasLapackKernels.cpp @@ -281,8 +281,8 @@ namespace dftefe KernelsTwoValueTypes:: scaleStridedVarBatched(const size_type numMats, const Layout layout, - const ScalarOp * scalarOpA, - const ScalarOp * scalarOpB, + const ScalarOp & scalarOpA, + const ScalarOp & scalarOpB, const size_type * stridea, const size_type * strideb, const size_type * stridec, @@ -307,8 +307,8 @@ namespace dftefe hadamardProduct(numrows, (dA + cumulativeA + icolA * numrows), (dB + cumulativeB + icolB * numrows), - *(scalarOpA + ibatch), - *(scalarOpB + ibatch), + scalarOpA, + scalarOpB, (dC + cumulativeC + icolA * *(n + ibatch) * numrows + icolB * numrows)); @@ -332,8 +332,8 @@ namespace dftefe *(dA + cumulativeA + irowB * *(m + ibatch) + icolA), (dB + cumulativeB + irowB * *(n + ibatch)), - *(scalarOpA + ibatch), - *(scalarOpB + ibatch), + scalarOpA, + scalarOpB, (dC + cumulativeC + irowB * *(m + ibatch) * *(n + ibatch) + icolA * *(n + ibatch))); diff --git a/src/linearAlgebra/BlasLapackKernels.h b/src/linearAlgebra/BlasLapackKernels.h index 09606cda..50aa27a9 100644 --- a/src/linearAlgebra/BlasLapackKernels.h +++ b/src/linearAlgebra/BlasLapackKernels.h @@ -125,8 +125,8 @@ namespace dftefe static void scaleStridedVarBatched(const size_type numMats, const Layout layout, - const ScalarOp * scalarOpA, - const ScalarOp * scalarOpB, + const ScalarOp & scalarOpA, + const ScalarOp & scalarOpB, const size_type * stridea, const size_type * strideb, const size_type * stridec, diff --git a/test/basis/TestBasisDataStorageMemOpt.py b/test/basis/TestBasisDataStorageMemOpt.py new file mode 100644 index 00000000..a8489495 --- /dev/null +++ b/test/basis/TestBasisDataStorageMemOpt.py @@ -0,0 +1,249 @@ +# Copyright (c) 2021-2022. +# The Regents of the University of Michigan and DFT-EFE developers. +# +# This file is part of the DFT-EFE code. +# +# DFT-EFE is free software: you can redistribute it and/or modify +# it under the terms of the Lesser GNU General Public License as +# published by the Free Software Foundation, either version 3 of +# the License, or (at your option) any later version. +# +# DFT-EFE is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the Lesser GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License at the top level of DFT-EFE distribution. If not, see +# . + +# @author Vishal Subramanian + +import reframe as rfm +import reframe.utility.sanity as sn +from reframe.utility.sanity import evaluate +from reframe.core.backends import getlauncher +import os +DFTEFE_PATH='' +if not 'DFTEFE_PATH' in os.environ: + raise Exception('''DFTEFE_PATH is not set. Please use 'export + DFTEFE_PATH=/path/to/dft-efe/parent/folder''') +else: + DFTEFE_PATH = os.environ['DFTEFE_PATH'] +parser = rfm.utility.import_module_from_file(DFTEFE_PATH+"/test/Parser.py") +cu = rfm.utility.import_module_from_file(DFTEFE_PATH+"/test/CompareUtil.py") +ss = rfm.utility.import_module_from_file(DFTEFE_PATH+"/test/SetupSystems.py") +bincpy = rfm.utility.import_module_from_file(DFTEFE_PATH+"/test/BinaryCopier.py") +cmflags = rfm.utility.import_module_from_file(DFTEFE_PATH+"/CMakeFlagsParser.py") + +@rfm.simple_test +class BuildOnlyTestBasisDataStorageMemOpt(rfm.CompileOnlyRegressionTest): + descr = 'Compile only test for TestBasisDataStorageMemOpt' + build_system = 'CMake' + make_opts = ['TestBasisDataStorageMemOpt'] + sourcesdir = './src' + tagsDict = {'compileOrRun': 'compile', 'unitOrAggregate': + 'unit', 'slowOrFast': 'fast', 'arch': 'cpu', + 'serialOrParallel': 'serial'} + tags = {x.lower() for x in tagsDict.values()} + valid_systems = ss.getValidSystems(tagsDict['arch']) + valid_prog_environs = ['*'] + keep_files = [] + config_opts = cmflags.getConfig() + + @run_before('compile') + def set_compiler_flags(self): + self.build_system.make_opts = self.make_opts + self.build_system.config_opts = self.config_opts + + @sanity_function + def validate_test(self): + hasWarning = True + hasError = True + msgWarning = "Found warning(s) while compiling." + msgError = "Found error(s) while compiling." + matches = evaluate(sn.findall(r'(?i)warning', evaluate(self.stdout))) + if len(matches) == 0: + hasWarning = False + + matchesOut = evaluate(sn.findall(r'(?i)error', evaluate(self.stdout))) + matchesErr = evaluate(sn.findall(r'(?i)error', evaluate(self.stderr))) + if len(matchesOut) == 0 and len(matchesErr) == 0: + hasError = False + + hasTestPassed = not hasWarning and not hasError + msg = "" + if hasError: + msg = msgError + + elif hasWarning: + msg = msgWarning + + else: + msg = "" + bincpy.BinCpy(os.path.dirname(os.path.abspath(__file__))) + return sn.assert_true(hasTestPassed, msg=msg) + + + + +@rfm.simple_test +class BuildAndRunTestBasisDataStorageMemOpt(rfm.RegressionTest): + target_name = 'TestBasisDataStorageMemOpt' + descr = '''A build and run test to verify the accuracy of interpolation to quad points''' + build_system = 'CMake' + make_opts = [target_name] + # NOTE: Need to specify the name of the executable, as + # ReFrame has no way of knowing that while building from CMake + executable = "./"+target_name + tagsDict = {'compileOrRun': 'compile', 'unitOrAggregate': + 'unit','slowOrFast': 'fast', 'arch': 'cpu', + 'serialOrParallel': 'serial'} + tags = {x.lower() for x in tagsDict.values()} + valid_systems = ss.getValidSystems(tagsDict['arch']) + valid_prog_environs = ['*'] + config_opts = cmflags.getConfig() + + + @run_before('compile') + def set_compiler_flags(self): + self.build_system.make_opts = self.make_opts + self.build_system.config_opts = self.config_opts + + + @run_before('run') + def set_launcher_and_resources(self): + if "serial" in self.tags: + self.job.launcher = getlauncher('local')() + + if "parallel" in self.tags: + self.job.launcher.options = ['-n 2'] + self.extra_resources = ss.setResources(self.tagsDict['arch'], + time_limit = "00:05:00", + num_nodes = 1, + num_tasks_per_node = 2, + ntasks = 2, + mem_per_cpu = '2gb') + + + @sanity_function + def validate_test(self): + # This test does not generate any output. It throws an exception + # if the logic of MPICommunicatorP2P fails to find the ghost data + hasAssertFail = True + hasThrownException = True + hasError = True + msgError = '''Found error(s) in + BuildAndRunTestBasisDataStorageMemOpt.''' + msgThrownException = '''Found exceptions in + BuildAndRunTestBasisDataStorageMemOpt.''' + msgAssertFail = '''Found assert fail(s) in + BuildAndRunTestBasisDataStorageMemOpt.''' + matchesOut = evaluate(sn.findall(r'(?i)error', evaluate(self.stdout))) + matchesErr = evaluate(sn.findall(r'(?i)error', evaluate(self.stderr))) + if len(matchesOut) == 0 and len(matchesErr) == 0: + hasError = False + + matchesOut = evaluate(sn.findall(r'(?i)assert', evaluate(self.stdout))) + matchesErr = evaluate(sn.findall(r'(?i)assert', evaluate(self.stderr))) + if len(matchesOut) == 0 and len(matchesErr) == 0: + hasAssertFail = False + + matchesOut = evaluate(sn.findall(r'(?i)throw', evaluate(self.stdout))) + matchesErr = evaluate(sn.findall(r'(?i)throw', evaluate(self.stderr))) + if len(matchesOut) == 0 and len(matchesErr) == 0: + hasThrownException = False + + hasTestPassed = not any([hasError, hasAssertFail, hasThrownException]) + + msg = "" + if hasError: + msg = msgError + + elif hasAssertFail: + msg = msgAssertFail + + elif hasThrownException: + msg = msgThrownException + + else: + msg="" + + return sn.assert_true(hasTestPassed, msg=msg) + + +@rfm.simple_test +class RunOnlyTestBasisDataStorageMemOpt(rfm.RunOnlyRegressionTest): + target_name = 'TestBasisDataStorageMemOpt' + descr = '''A run only test to verify the accuracy of interpolation''' + build_system = 'CMake' + make_opts = [target_name] + executable = os.path.dirname(os.path.abspath(__file__))+"/executable/"+target_name + tagsDict = {'compileOrRun': 'run', 'unitOrAggregate': + 'unit','slowOrFast': 'fast', 'arch': 'cpu', + 'serialOrParallel': 'serial'} + tags = {x.lower() for x in tagsDict.values()} + valid_systems = ss.getValidSystems(tagsDict['arch']) + valid_prog_environs = ['*'] + config_opts = cmflags.getConfig() + + + @run_before('run') + def set_launcher_and_resources(self): + if "serial" in self.tags: + self.job.launcher = getlauncher('local')() + + if "parallel" in self.tags: + self.job.launcher.options = ['-n 2'] + self.extra_resources = ss.setResources(self.tagsDict['arch'], + time_limit = "00:05:00", + num_nodes = 1, + num_tasks_per_node = 2, + ntasks = 2, + mem_per_cpu = '2gb') + + + @sanity_function + def validate_test(self): + # This test does not generate any output. It throws an exception + # if the logic of MPICommunicatorP2P fails to find the ghost data + hasAssertFail = True + hasThrownException = True + hasError = True + msgError = '''Found error(s) in + RunOnlyTestBasisDataStorageMemOpt.''' + msgThrownException = '''Found exceptions in + RunOnlyTestBasisDataStorageMemOpt''' + msgAssertFail = '''Found assert fail(s) in + RunOnlyTestBasisDataStorageMemOpt''' + matchesOut = evaluate(sn.findall(r'(?i)error', evaluate(self.stdout))) + matchesErr = evaluate(sn.findall(r'(?i)error', evaluate(self.stderr))) + if len(matchesOut) == 0 and len(matchesErr) == 0: + hasError = False + + matchesOut = evaluate(sn.findall(r'(?i)assert', evaluate(self.stdout))) + matchesErr = evaluate(sn.findall(r'(?i)assert', evaluate(self.stderr))) + if len(matchesOut) == 0 and len(matchesErr) == 0: + hasAssertFail = False + + matchesOut = evaluate(sn.findall(r'(?i)throw', evaluate(self.stdout))) + matchesErr = evaluate(sn.findall(r'(?i)throw', evaluate(self.stderr))) + if len(matchesOut) == 0 and len(matchesErr) == 0: + hasThrownException = False + + hasTestPassed = not any([hasError, hasAssertFail, hasThrownException]) + + msg = "" + if hasError: + msg = msgError + + elif hasAssertFail: + msg = msgAssertFail + + elif hasThrownException: + msg = msgThrownException + + else: + msg="" + + return sn.assert_true(hasTestPassed, msg=msg) diff --git a/test/basis/src/CMakeLists.txt b/test/basis/src/CMakeLists.txt index 9aa14a78..4904c7ed 100644 --- a/test/basis/src/CMakeLists.txt +++ b/test/basis/src/CMakeLists.txt @@ -89,4 +89,8 @@ if(ENABLE_MPI) target_link_libraries(TestGenerateMesh PUBLIC dft-efe-basis dft-efe-utils dft-efe-quadrature dft-efe-linalg dft-efe-atoms) set_target_properties(TestGenerateMesh PROPERTIES OUTPUT_NAME "TestGenerateMesh" SUFFIX ".x") + add_executable(TestBasisDataStorageMemOpt TestBasisDataStorageMemOpt.cpp ) + target_link_libraries(TestBasisDataStorageMemOpt PUBLIC dft-efe-basis dft-efe-utils dft-efe-quadrature dft-efe-linalg dft-efe-atoms) + set_target_properties(TestBasisDataStorageMemOpt PROPERTIES OUTPUT_NAME "TestBasisDataStorageMemOpt" SUFFIX ".x") + endif() \ No newline at end of file diff --git a/test/basis/src/TestBasisDataStorageMemOpt.cpp b/test/basis/src/TestBasisDataStorageMemOpt.cpp new file mode 100644 index 00000000..77ab8f27 --- /dev/null +++ b/test/basis/src/TestBasisDataStorageMemOpt.cpp @@ -0,0 +1,336 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int main() +{ + + std::cout<<" Entering test data storage \n"; + + //initialize MPI + // NOTE : The test case only works for orthogonalized EFE basis + + int mpiInitFlag = 0; + dftefe::utils::mpi::MPIInitialized(&mpiInitFlag); + if(!mpiInitFlag) + { + dftefe::utils::mpi::MPIInit(NULL, NULL); + } + + dftefe::utils::mpi::MPIComm comm = dftefe::utils::mpi::MPICommWorld; + + // Get the rank of the process + int rank; + dftefe::utils::mpi::MPICommRank(comm, &rank); + + // Get nProcs + int numProcs; + dftefe::utils::mpi::MPICommSize(comm, &numProcs); + + int blasQueue = 0; + int lapackQueue = 0; + std::shared_ptr> blasQueuePtr = std::make_shared + >(blasQueue); + std::shared_ptr> lapackQueuePtr = std::make_shared + >(lapackQueue); + std::shared_ptr> linAlgOpContext = + std::make_shared>(blasQueuePtr, lapackQueuePtr); + + // Set up Triangulation + const unsigned int dim = 3; + std::shared_ptr triangulationBase = + std::make_shared>(comm); + std::vector subdivisions = {10, 10, 10}; + std::vector isPeriodicFlags(dim, false); + std::vector domainVectors(dim, + dftefe::utils::Point(dim, 0.0)); + + double xmax = 10.; + double ymax = 10.; + double zmax = 10.; + unsigned int numComponents = 1; + bool isAdaptiveGrid = false; + + domainVectors[0][0] = xmax; + domainVectors[1][1] = ymax; + domainVectors[2][2] = zmax; + + // initialize the triangulation + triangulationBase->initializeTriangulationConstruction(); + triangulationBase->createUniformParallelepiped(subdivisions, + domainVectors, + isPeriodicFlags); + triangulationBase->finalizeTriangulationConstruction(); + + // Enrichment data file consisting of g(r,\theta,\phi) = f(r)*Y_lm(\theta, \phi) + std::string sourceDir = "/home/avirup/dft-efe/test/basis/src/"; + std::string atomDataFile = "AtomData.in"; + std::string inputFileName = sourceDir + atomDataFile; + std::fstream fstream; + + fstream.open(inputFileName, std::fstream::in); + + // read the input file and create atomSymbolVec vector and atom coordinates vector. + std::vector atomCoordinatesVec; + std::vector coordinates; + coordinates.resize(dim,0.); + std::vector atomSymbolVec; + std::string symbol; + atomSymbolVec.resize(0); + std::string line; + while (std::getline(fstream, line)){ + std::stringstream ss(line); + ss >> symbol; + for(unsigned int i=0 ; i> coordinates[i]; + } + atomCoordinatesVec.push_back(coordinates); + atomSymbolVec.push_back(symbol); + } + dftefe::utils::mpi::MPIBarrier(comm); + + std::map atomSymbolToFilename; + for (auto i:atomSymbolVec ) + { + atomSymbolToFilename[i] = sourceDir + i + ".xml"; + } + + std::vector fieldNames{"vnuclear"}; + std::vector metadataNames{ "symbol", "Z", "charge", "NR", "r" }; + std::shared_ptr atomSphericalDataContainer = + std::make_shared(atomSymbolToFilename, + fieldNames, + metadataNames); + + std::string fieldName = "vnuclear"; + double atomPartitionTolerance = 1e-6; + + if(isAdaptiveGrid) + { + int flag = 1; + int mpiReducedFlag = 1; + bool refineFlag = true; + while(mpiReducedFlag) + { + flag = 1; + auto triaCellIter = triangulationBase->beginLocal(); + for( ; triaCellIter != triangulationBase->endLocal(); triaCellIter++) + { + refineFlag = false; + dftefe::utils::Point centerPoint(dim, 0.0); + (*triaCellIter)->center(centerPoint); + double dist = (centerPoint[0] - 5)* (centerPoint[0] - 5); + dist += (centerPoint[1] - 5)* (centerPoint[1] - 5); + dist += (centerPoint[2] - 5)* (centerPoint[2] - 5); + dist = std::sqrt(dist); + if((dist < 1.0) || centerPoint[0] < 1.0) + refineFlag = true; + if ( refineFlag ) + { + (*triaCellIter)->setRefineFlag(); + flag = 0; + } + } + triangulationBase->executeCoarseningAndRefinement(); + triangulationBase->finalizeTriangulationConstruction(); + // Mpi_allreduce that all the flags are 1 (mpi_max) + int err = dftefe::utils::mpi::MPIAllreduce( + &flag, + &mpiReducedFlag, + 1, + dftefe::utils::mpi::MPIInt, + dftefe::utils::mpi::MPIMin, + comm); + std::pair mpiIsSuccessAndMsg = + dftefe::utils::mpi::MPIErrIsSuccessAndMsg(err); + dftefe::utils::throwException(mpiIsSuccessAndMsg.first, + "MPI Error:" + mpiIsSuccessAndMsg.second); + } + } + // Make orthogonalized EFE basis + + unsigned int feOrder = 3; + + // Set up the vector of scalarSpatialRealFunctions for adaptive quadrature + std::vector> functionsVec(0); + unsigned int numfun = 2; + functionsVec.resize(numfun); // Enrichment Functions + std::vector tolerances(numfun); + std::vector integralThresholds(numfun); + for ( unsigned int i=0 ;i < functionsVec.size() ; i++ ) + { + functionsVec[i] = std::make_shared>( + atomSphericalDataContainer, + atomSymbolVec, + atomCoordinatesVec, + fieldName, + i); + tolerances[i] = 1e3; + integralThresholds[i] = 1e3; + } + + double smallestCellVolume = 1e-12; + unsigned int maxRecursion = 1000; + + //Set up quadAttr for Rhs and OverlapMatrix + + dftefe::quadrature::QuadratureRuleAttributes quadAttrAdaptive(dftefe::quadrature::QuadratureFamily::ADAPTIVE,false); + + // Set up base quadrature rule for adaptive quadrature + + unsigned int num1DGaussSize = feOrder + 1; + std::shared_ptr baseQuadRule = + std::make_shared(dim, num1DGaussSize); + + dftefe::quadrature::QuadratureRuleAttributes quadAttrGauss(dftefe::quadrature::QuadratureFamily::GAUSS,true,num1DGaussSize); + + std::shared_ptr cellMapping = std::make_shared>(); + std::shared_ptr parentToChildCellsManager = std::make_shared>(); + + std::shared_ptr quadRuleContainerAdaptive = + std::make_shared + (quadAttrAdaptive, + baseQuadRule, + triangulationBase, + *cellMapping, + *parentToChildCellsManager, + functionsVec, + tolerances, + tolerances, + integralThresholds, + smallestCellVolume, + maxRecursion); + + // Set the CFE basis manager and handler for bassiInterfaceCoeffcient distributed vector + std::shared_ptr> cfeBasisDofHandler = + std::make_shared>(triangulationBase, feOrder, comm); + + dftefe::basis::BasisStorageAttributesBoolMap basisAttrMap; + basisAttrMap[dftefe::basis::BasisStorageAttributes::StoreValues] = true; + basisAttrMap[dftefe::basis::BasisStorageAttributes::StoreGradient] = true; + basisAttrMap[dftefe::basis::BasisStorageAttributes::StoreHessian] = false; + basisAttrMap[dftefe::basis::BasisStorageAttributes::StoreOverlap] = false; + basisAttrMap[dftefe::basis::BasisStorageAttributes::StoreGradNiGradNj] = false; + basisAttrMap[dftefe::basis::BasisStorageAttributes::StoreJxW] = true; + + // Set up the CFE Basis Data Storage for Rhs + std::shared_ptr> cfeBasisDataStorageAdaptUniformQuad = + std::make_shared> + (cfeBasisDofHandler, quadAttrAdaptive, basisAttrMap); + // evaluate basis data + cfeBasisDataStorageAdaptUniformQuad->evaluateBasisData(quadAttrAdaptive, quadRuleContainerAdaptive, basisAttrMap); + + unsigned int cellBlockSize = 10; + + // Set up the CFE Basis Data Storage for Rhs + std::shared_ptr> cfeBasisDataStorageUniformQuad = + std::make_shared> + (cfeBasisDofHandler, quadAttrGauss, basisAttrMap, cellBlockSize ,*linAlgOpContext); + // evaluate basis data + cfeBasisDataStorageUniformQuad->evaluateBasisData(quadAttrGauss, basisAttrMap); + size_type numLocallyOwnedCells = cfeBasisDofHandler->nLocallyOwnedCells(); + for(int i = 0 ; i < numLocallyOwnedCells ; i += cellBlockSize) + { + size_type cellStartId = i; + size_type cellEndId = std::min(i+cellBlockSize, numLocallyOwnedCells); + auto basisDataStorageInCellRange = + cfeBasisDataStorageUniformQuad->getBasisDataStorageInCellRange(std::make_pair(cellStartId, cellEndId)); + for(int j = 0 ; j < cellEndId - cellStartId ; j++) + { + auto basisDataStorageInCellAdaptUniform = cfeBasisDataStorageAdaptUniformQuad->getBasisDataInCell(j); + for(int k = 0 ; k < basisDataStorageInCellAdaptUniform.size() ; k++) + { + std::cout << *(basisDataStorageInCellRange.data() + dofsPerCell * nQuadPerCell * j + k) << + "\t" << *(basisDataStorageInCellAdaptUniform.data() + k) << "\n"; + } + } + } + +// // Create the enrichmentClassicalInterface object +// std::shared_ptr> +// (cfeBasisDataStorageOverlapMatrix, +// cfeBasisDataStorageRhs, +// atomSphericalDataContainer, +// atomPartitionTolerance, +// atomSymbolVec, +// atomCoordinatesVec, +// fieldName, +// linAlgOpContext, +// comm); + +// // initialize the basis +// std::shared_ptr> basisDofHandler = +// std::make_shared>( +// enrichClassIntfce, feOrder, comm); + +// std::map dofCoords; +// basisDofHandler->getBasisCenters(dofCoords); + +// std::cout << "Locally owned cells : " <nLocallyOwnedCells() << "\n"; +// std::cout << "Total Number of dofs : " << basisDofHandler->nGlobalNodes() << "\n"; + +// basisAttrMap[dftefe::basis::BasisStorageAttributes::StoreValues] = true; +// basisAttrMap[dftefe::basis::BasisStorageAttributes::StoreGradient] = false; +// basisAttrMap[dftefe::basis::BasisStorageAttributes::StoreHessian] = false; +// basisAttrMap[dftefe::basis::BasisStorageAttributes::StoreOverlap] = false; +// basisAttrMap[dftefe::basis::BasisStorageAttributes::StoreGradNiGradNj] = false; +// basisAttrMap[dftefe::basis::BasisStorageAttributes::StoreJxW] = false; + +// // Set up Adaptive quadrature for EFE Basis Data Storage +// std::shared_ptr> feBasisData = +// std::make_shared> +// (basisDofHandler, quadAttrAdaptive, basisAttrMap); + +// // evaluate basis data +// feBasisData->evaluateBasisData(quadAttrAdaptive, quadRuleContainerAdaptive, basisAttrMap); + + + dftefe::utils::mpi::MPIBarrier(comm); + + //gracefully end MPI + + int mpiFinalFlag = 0; + dftefe::utils::mpi::MPIFinalized(&mpiFinalFlag); + if(!mpiFinalFlag) + { + dftefe::utils::mpi::MPIFinalize(); + } +}