diff --git a/src/2d/bouss/amr2.f90 b/src/2d/bouss/amr2.f90
index 09e2b241b..bc0932b82 100644
--- a/src/2d/bouss/amr2.f90
+++ b/src/2d/bouss/amr2.f90
@@ -127,18 +127,18 @@ program amr2
 
     ! Local variables
     integer :: i, iaux, mw, level
-    integer :: ierr
+    integer :: ierr ! error flag for PETSc calls
     integer :: ndim, nvar, naux, mcapa1, mindim, dimensional_split
     integer :: nstart, nsteps, nv1, nx, ny, lentotsave, num_gauge_SAVE
     integer :: omp_get_max_threads, maxthreads
     real(kind=8) :: time, ratmet, cut, dtinit, dt_max
-    logical :: vtime, rest, output_t0    
+    logical :: vtime, rest, output_t0
 
     ! Timing variables
     integer(kind=8) :: clock_start, clock_finish, clock_rate, ttotal, count_max
     real(kind=8) :: ttotalcpu
     integer(kind=8) :: tick_clock_finish
-    integer, parameter :: timing_unit = 48
+    integer :: timing_unit = 0
     character(len=512) :: timing_line, timing_substr
     character(len=*), parameter :: timing_base_name = "timing."
     character(len=*), parameter :: timing_header_format =                      &
@@ -160,10 +160,12 @@ program amr2
     character(len=*), parameter :: parmfile = 'fort.parameters'
 
 #ifdef HAVE_PETSC
-!   needs to be very first line in main
-      call PetscInitialize(ierr)
-      CHKERRA(ierr);
+    !   needs to be very first line in main
+    PetscCallA(PetscOptionsSetValue(PETSC_NULL_OPTIONS,'-mpi_linear_solver_server','',ierr))
+    PetscCallA(PetscOptionsSetValue(PETSC_NULL_OPTIONS,'-mpi_linear_solver_server_view','',ierr))
+    PetscCallA(PetscInitialize(ierr))
 #endif
+    timing_unit = 48
 
     ! Open parameter and debug files
     open(dbugunit,file=dbugfile,status='unknown',form='formatted')
@@ -599,7 +601,7 @@ program amr2
         time = t0
         nstart = 0
     endif
-
+    call PetscViewerASCIIStdoutSetFileUnit(outunit,ierr)
 
     write(parmunit,*) ' '
     write(parmunit,*) '--------------------------------------------'
@@ -860,7 +862,6 @@ program amr2
     write(*,format_string)
     write(*,*)
     write(timing_unit,*)
-    close(timing_unit)
 
     if (isolver.eq. 2) then
 !     if (countIterRef == 0) then
@@ -928,12 +929,16 @@ program amr2
 
     write(outunit,"(//,' ------  end of AMRCLAW integration --------  ')")
 
-    ! Close output and debug files.
-    close(outunit)
+    ! Close debug file.
     close(dbugunit)
+    close(outunit)
 
 #ifdef HAVE_PETSC
-      call PetscFinalize(ierr)
+    call PetscViewerASCIIStdoutSetFileUnit(timing_unit,ierr)
+    call PetscFinalize(ierr)
 #endif
 
-end program amr2
+    if (timing_unit .gt. 0) then
+       close(timing_unit)
+    endif
+    end program amr2
diff --git a/src/2d/bouss/bouss_module.f90 b/src/2d/bouss/bouss_module.f90
index cd57ab121..051de2645 100644
--- a/src/2d/bouss/bouss_module.f90
+++ b/src/2d/bouss/bouss_module.f90
@@ -49,8 +49,8 @@ module bouss_module
        integer :: numColsTot
 
        ! for CRS format
-       integer, allocatable, dimension(:) :: rowPtr, cols
-       real(kind=8), allocatable, dimension(:) :: vals 
+       integer, pointer :: rowPtr(:), cols(:)
+       real(kind=8), pointer :: vals(:)
 
        ! intfCountc are the # equations (cells)  added for you as a coarse grid
        ! these are the equations under a fine grid on the first interior cells
@@ -166,7 +166,7 @@ subroutine set_bouss(rest,time,naux)
         crs = .true.
     endif
 
-    ! crs = .false.  ! uncomment this line to force CRS with SGN for testing
+    !crs = .false.  ! uncomment this line to force CRS with SGN for testing
 
     !------------------------------------------
     if (rest) then
diff --git a/src/2d/bouss/buildSparseMatrixMScoo.f90 b/src/2d/bouss/buildSparseMatrixMScoo.f90
index 8e013380c..79f7b4a3f 100644
--- a/src/2d/bouss/buildSparseMatrixMScoo.f90
+++ b/src/2d/bouss/buildSparseMatrixMScoo.f90
@@ -11,7 +11,7 @@ subroutine buildSparseMatrixMScoo(rhs,nvar,naux,levelBouss,numBoussCells)
     integer, intent(in) :: nvar, naux, levelBouss, numBoussCells
     
     !! pass in numBoussCells for this level so can dimension this array
-    real(kind=8) :: rhs(0:2*numBoussCells) 
+    real(kind=8) :: rhs(2*numBoussCells) 
     
     type(matrix_patchIndex), pointer :: mi
     type(matrix_levInfo),  pointer :: minfo
diff --git a/src/2d/bouss/buildSparseMatrixSGNcoo.f90 b/src/2d/bouss/buildSparseMatrixSGNcoo.f90
index 4e5fa75b5..5f800a1ee 100644
--- a/src/2d/bouss/buildSparseMatrixSGNcoo.f90
+++ b/src/2d/bouss/buildSparseMatrixSGNcoo.f90
@@ -16,7 +16,7 @@ subroutine buildSparseMatrixSGNcoo(q,qold,aux,soln,rhs,                    &
     integer, intent(in) :: nvar, naux, levelBouss, numBoussCells
     
     !! pass in numBoussCells for this level so can dimension these arrays
-    real(kind=8) :: soln(0:2*numBoussCells), rhs(0:2*numBoussCells) 
+    real(kind=8) :: soln(2*numBoussCells), rhs(2*numBoussCells) 
     
     type(matrix_patchIndex), pointer :: mi
     type(matrix_levInfo),  pointer :: minfo
diff --git a/src/2d/bouss/buildSparseMatrixSGNcrs.f90 b/src/2d/bouss/buildSparseMatrixSGNcrs.f90
index 0a22a7130..9d3b8a9b2 100644
--- a/src/2d/bouss/buildSparseMatrixSGNcrs.f90
+++ b/src/2d/bouss/buildSparseMatrixSGNcrs.f90
@@ -17,7 +17,7 @@ subroutine buildSparseMatrixSGNcrs(q,qold,aux,soln,rhs,rowPtr,cols,vals,   &
     integer, intent(inout) :: cols(0:24*numBoussCells)
     
     !! pass in numBoussCells for this level so can dimension these arrays
-    real(kind=8) :: soln(0:2*numBoussCells), rhs(0:2*numBoussCells) 
+    real(kind=8) :: soln(0:2*numBoussCells-1), rhs(0:2*numBoussCells-1) 
     
     type(matrix_patchIndex), pointer :: mi
     type(matrix_levInfo),  pointer :: minfo
diff --git a/src/2d/bouss/implicit_update_bouss_2Calls.f90 b/src/2d/bouss/implicit_update_bouss_2Calls.f90
index 586e95f9a..484b02013 100644
--- a/src/2d/bouss/implicit_update_bouss_2Calls.f90
+++ b/src/2d/bouss/implicit_update_bouss_2Calls.f90
@@ -12,15 +12,20 @@ subroutine implicit_update(nvar,naux,levelBouss,numBoussCells,doUpdate,time)
     use amr_module
     use topo_module, only: topo_finalized
     use bouss_module
-        
+#include <petsc/finclude/petscvec.h>
+    use petscvec
+
     implicit none
     
     integer, intent(in) :: nvar, naux, levelBouss, numBoussCells
     logical, intent(in) :: doUpdate
     
     !! pass in numBoussCells for this level so can dimension this array
-    real(kind=8) :: soln(0:2*numBoussCells)
-    real(kind=8) :: rhs(0:2*numBoussCells)
+    Vec :: v_soln,v_rhs
+    integer :: ierr
+ 
+    real(kind=8),pointer :: soln(:)
+    real(kind=8),pointer :: rhs(:)
     
     type(matrix_patchIndex), pointer :: mi
     type(matrix_levInfo),  pointer :: minfo
@@ -38,6 +43,11 @@ subroutine implicit_update(nvar,naux,levelBouss,numBoussCells,doUpdate,time)
 #ifdef WHERE_AM_I
     write(*,*) "starting implicit_update for level ",levelBouss
 #endif
+    PetscCallA(VecCreateSeq(PETSC_COMM_SELF,2*numBoussCells,v_rhs,ierr))
+    PetscCallA(VecCreateSeq(PETSC_COMM_SELF,2*numBoussCells,v_soln,ierr))
+    PetscCallA(VecGetArrayF90(v_rhs,rhs,ierr))
+    PetscCallA(VecGetArrayF90(v_soln,soln,ierr))
+    
     dt = possk(levelBouss)
     nD = numBoussCells ! shorthand for size of matrix blocks D1 to D4
     
@@ -102,7 +112,8 @@ subroutine implicit_update(nvar,naux,levelBouss,numBoussCells,doUpdate,time)
       else ! CRS format
          minfo%vals = 0
          minfo%cols = -1  ! to recognize if not overwritten with real col indices
-        call prepBuildSparseMatrixSGNcrs(soln,rhs,nvar,naux,levelBouss,numBoussCells)
+         call prepBuildSparseMatrixSGNcrs(soln,rhs,nvar,naux,levelBouss,numBoussCells)
+         CHKMEMQ
       endif
     endif
     
@@ -141,7 +152,12 @@ subroutine implicit_update(nvar,naux,levelBouss,numBoussCells,doUpdate,time)
        if (minfo%numColsTot .gt. 0 .or. minfo%matrix_nelt .gt. 0) then
           call system_clock(clock_startLinSolve,clock_rate)
           call cpu_time(cpu_startLinSolve)
-          call petsc_driver(soln,rhs,levelBouss,numBoussCells,time,topo_finalized)
+          PetscCallA(VecRestoreArrayF90(v_rhs,rhs,ierr))
+          PetscCallA(VecRestoreArrayF90(v_soln,soln,ierr))
+          CHKMEMQ;
+          call petsc_driver(v_soln,v_rhs,levelBouss,numBoussCells,time,topo_finalized)
+          PetscCallA(VecGetArrayF90(v_rhs,rhs,ierr))
+          PetscCallA(VecGetArrayF90(v_soln,soln,ierr))
           call system_clock(clock_finishLinSolve,clock_rate)
           call cpu_time(cpu_finishLinSolve)
           !write(89,*)" level ",levelBouss,"  rhs   soln"
@@ -201,6 +217,7 @@ subroutine implicit_update(nvar,naux,levelBouss,numBoussCells,doUpdate,time)
         !endif
     end do        
 !$OMP END PARALLEL DO
+    CHKMEMQ
 
     !! next loop to copy from either other grids at same level and to
     !! update ghost cell momenta with newly copied psi
@@ -238,6 +255,10 @@ subroutine implicit_update(nvar,naux,levelBouss,numBoussCells,doUpdate,time)
     write(*,*) "ending   implicit_update for level ",levelBouss
 #endif
         
+    PetscCallA(VecRestoreArrayF90(v_rhs,rhs,ierr))
+    PetscCallA(VecRestoreArrayF90(v_soln,soln,ierr))
+    PetscCallA(VecDestroy(v_rhs,ierr))
+    PetscCallA(VecDestroy(v_soln,ierr))
 
 contains
 
@@ -255,8 +276,6 @@ integer pure function iaddaux(iaux,i,j)
         iaddaux = locaux + iaux-1 + naux*((j-1+nghost)*mitot+i+nghost-1)
     end function iaddaux
 
-
-
 end subroutine implicit_update
 
 subroutine solnUpdate(valnew,valOther,nb,mitot,mjtot,nghost,nvar,dt, &
@@ -279,7 +298,7 @@ subroutine solnUpdate(valnew,valOther,nb,mitot,mjtot,nghost,nvar,dt, &
 
     integer, intent(in) :: nvar,nghost,mitot,mjtot,nD,mptr,nb,levelBouss
     logical, intent(in) :: doUpdate
-    real(kind=8), intent(in) :: soln(0:2*nD), dt, rhs(0:2*nD)
+    real(kind=8), intent(in) :: soln(2*nD), dt, rhs(2*nD)
 
     real(kind=8), intent(inout) :: valnew(nvar,mitot,mjtot)
     real(kind=8), intent(inout) :: valOther(nvar,mitot,mjtot)
@@ -415,7 +434,7 @@ subroutine solnUpdateSGN(valnew,valOther,nb,mitot,mjtot,nghost,nvar,dt, &
 
     integer, intent(in) :: nvar,naux,nghost,mitot,mjtot,nD,mptr,nb,levelBouss
     logical, intent(in) :: doUpdate
-    real(kind=8), intent(in) :: soln(0:2*nD), dt, rhs(0:2*nD)
+    real(kind=8), intent(in) :: soln(2*nD), dt, rhs(2*nD)
 
     real(kind=8), intent(inout) :: valnew(nvar,mitot,mjtot)
     real(kind=8), intent(inout) :: valOther(nvar,mitot,mjtot)
@@ -505,9 +524,9 @@ subroutine solnUpdateSGN(valnew,valOther,nb,mitot,mjtot,nghost,nvar,dt, &
                          (grav/alpha * etay(i,j) - soln(k_ij+nD))
                  else  ! CRS format,  different mapping
                   valnew(2,i,j) = valnew(2,i,j) + dt * valnew(1,i,j)*     &
-                         (grav/alpha * etax(i,j) - soln(2*(k_ij-1)))
+                         (grav/alpha * etax(i,j) - soln(2*(k_ij-1)+1))
                   valnew(3,i,j) = valnew(3,i,j) + dt * valnew(1,i,j)*     &
-                         (grav/alpha * etay(i,j) - soln(2*k_ij-1))
+                         (grav/alpha * etay(i,j) - soln(2*k_ij-1+1))
                  endif
 
                 ! save update in components 4,5 for Dirichlet BC next iteration
@@ -521,8 +540,8 @@ subroutine solnUpdateSGN(valnew,valOther,nb,mitot,mjtot,nghost,nvar,dt, &
                 else
                     !valOther(4,i,j) = soln(2*k_ij-1)
                     !valOther(5,i,j) = soln(2*k_ij)
-                    valOther(4,i,j) = soln(2*(k_ij-1))
-                    valOther(5,i,j) = soln(2*k_ij-1)
+                    valOther(4,i,j) = soln(2*(k_ij-1)+1)
+                    valOther(5,i,j) = soln(2*k_ij-1+1)
                  endif
             else  ! not a bouss cell. set to zero
                valOther(4,i,j) = 0.d0
@@ -610,8 +629,8 @@ subroutine solnUpdateSGN(valnew,valOther,nb,mitot,mjtot,nghost,nvar,dt, &
                 valnew(4,i,j) = soln(k_ij)
                 valnew(5,i,j) = soln(k_ij+nD)
               else
-                valnew(4,i,j) = soln(2*(k_ij-1))
-                valnew(5,i,j) = soln(2*k_ij-1)
+                valnew(4,i,j) = soln(2*(k_ij-1)+1)
+                valnew(5,i,j) = soln(2*k_ij-1+1)
               endif
             else
               valnew(4,i,j) = 0.d0
diff --git a/src/2d/bouss/petsc_driver.f90 b/src/2d/bouss/petsc_driver.f90
index d0974cb54..a6247ec66 100644
--- a/src/2d/bouss/petsc_driver.f90
+++ b/src/2d/bouss/petsc_driver.f90
@@ -1,4 +1,4 @@
-subroutine petsc_driver(soln,rhs_geo,levelBouss,numBoussCells,time,topo_finalized)
+subroutine petsc_driver(solution,rhs,levelBouss,numBoussCells,time,topo_finalized)
 
     ! use petsc to solve sparse linear system
     ! called this way so can allocate storage of correct size
@@ -6,18 +6,13 @@ subroutine petsc_driver(soln,rhs_geo,levelBouss,numBoussCells,time,topo_finalize
     use bouss_module
 #ifdef HAVE_PETSC
 #include <petsc/finclude/petscksp.h>
-!!#include "petscmat.h"
     use petscksp
     implicit none
     
     integer, intent(in) :: levelBouss, numBoussCells
-    real(kind=8), intent(inout) :: rhs_geo(0:2*numBoussCells)
     real(kind=8), intent(in) :: time
     logical, intent(in) :: topo_finalized
     
-    !! pass in numBoussCells for this level so can dimension this array
-    real(kind=8), intent(out) :: soln(0:2*numBoussCells)
-    
     type(matrix_levInfo),  pointer :: minfo
     
     integer :: numCores, omp_get_max_threads,i,nelt
@@ -87,6 +82,7 @@ subroutine petsc_driver(soln,rhs_geo,levelBouss,numBoussCells,time,topo_finalize
             end do
           else 
             ! using CRS sparse matrix format
+            CHKMEMQ;
             call MatCreateSeqAijWithArrays(PETSC_COMM_SELF,2*numBoussCells,  & 
                        2*numBoussCells,minfo%rowPtr,minfo%cols,  &
                        minfo%vals,Jr(levelBouss),ierr)
@@ -136,17 +132,15 @@ subroutine petsc_driver(soln,rhs_geo,levelBouss,numBoussCells,time,topo_finalize
       ! if you want to use previous soln as initial guess,
       ! set soln to old soln here
 
-      if (.not. crs) then
-        ! switch to 0-based indexing for COO triplet format
-        ! (already 0-based for CRS)
-        do i = 1, 2*numBousscells
-           rhs_geo(i-1) = rhs_geo(i) 
-        end do
-      endif 
+!      if (.not. crs) then
+!        ! switch to 0-based indexing for COO triplet format
+!        ! (already 0-based for CRS)
+!        do i = 1, 2*numBousscells
+!           rhs_geo(i-1) = rhs_geo(i) 
+!        end do
+!      endif 
       
-      call VecCreateSeqWithArray(PETSC_COMM_SELF,1,n,rhs_geo,rhs,ierr)
-      CHKERRA(ierr)
-      call VecCreateSeqWithArray(PETSC_COMM_SELF,1,n,soln,solution,ierr)
+
       CHKERRA(ierr)
 
       ! save matrices for debugging. comment out to turn off
@@ -167,23 +161,17 @@ subroutine petsc_driver(soln,rhs_geo,levelBouss,numBoussCells,time,topo_finalize
       !endif
 
       ! check if need to adjust soln back to 1 indexing
-      if (.not. crs) then ! bump both  back up
-        do i = 1, 2*numBoussCells
-          soln(2*numBoussCells-i+1) = soln(2*numBoussCells-i)
-          rhs_geo(2*numBoussCells-i+1) = rhs_geo(2*numBoussCells-i) 
-        end do
-      endif
+!      if (.not. crs) then ! bump both  back up
+!        do i = 1, 2*numBoussCells
+!          soln(2*numBoussCells-i+1) = soln(2*numBoussCells-i)
+!          rhs_geo(2*numBoussCells-i+1) = rhs_geo(2*numBoussCells-i) 
+!        end do
+!      endif
       
       !! if itnum > itmax then
       !! call KSPGetResidualNorm(ksp(levelBouss),resmax,ierr)
       CHKERRA(ierr)
       ! petsc already puts solution into my array soln so just return
-
-       ! create and destroy each step since fast
-       call VecDestroy(rhs,ierr)
-       CHKERRA(ierr)
-       call VecDestroy(solution,ierr)
-       CHKERRA(ierr)
 #endif
 
 end subroutine petsc_driver
diff --git a/src/2d/bouss/prepBuildSparseMatrixSGNcoo.f90 b/src/2d/bouss/prepBuildSparseMatrixSGNcoo.f90
index 35f7154c5..4e69106bf 100644
--- a/src/2d/bouss/prepBuildSparseMatrixSGNcoo.f90
+++ b/src/2d/bouss/prepBuildSparseMatrixSGNcoo.f90
@@ -11,7 +11,7 @@ subroutine prepBuildSparseMatrixSGNcoo(soln,rhs,nvar,naux,levelBouss,numBoussCel
     integer, intent(in) :: nvar, naux, levelBouss, numBoussCells
     
     !! pass in numBoussCells for this level so can dimension this array
-    real(kind=8) :: soln(0:2*numBoussCells), rhs(0:2*numBoussCells) 
+    real(kind=8) :: soln(2*numBoussCells), rhs(2*numBoussCells) 
     real(kind=8) fms
     
     type(matrix_patchIndex), pointer :: mi
diff --git a/src/2d/bouss/prepBuildSparseMatrixSGNcrs.f90 b/src/2d/bouss/prepBuildSparseMatrixSGNcrs.f90
index cbb9ea378..e22f21081 100644
--- a/src/2d/bouss/prepBuildSparseMatrixSGNcrs.f90
+++ b/src/2d/bouss/prepBuildSparseMatrixSGNcrs.f90
@@ -11,7 +11,7 @@ subroutine prepBuildSparseMatrixSGNcrs(soln,rhs,nvar,naux,levelBouss,numBoussCel
     integer, intent(in) :: nvar, naux, levelBouss, numBoussCells
     
     !! pass in numBoussCells for this level so can dimension this array
-    real(kind=8) :: soln(0:2*numBoussCells), rhs(0:2*numBoussCells) 
+    real(kind=8) :: soln(0:2*numBoussCells-1), rhs(0:2*numBoussCells-1) 
     
     type(matrix_patchIndex), pointer :: mi
     type(matrix_levInfo),  pointer :: minfo
diff --git a/src/2d/bouss/regrid.f b/src/2d/bouss/regrid.f90
similarity index 74%
rename from src/2d/bouss/regrid.f
rename to src/2d/bouss/regrid.f90
index 7f03b2129..e9a8c86ce 100644
--- a/src/2d/bouss/regrid.f
+++ b/src/2d/bouss/regrid.f90
@@ -1,18 +1,19 @@
-c
-c -----------------------------------------------------------
-c
+
       subroutine regrid  (nvar,lbase,cut,naux,start_time)
-c
+
       use amr_module
       use bouss_module
+#include <petsc/finclude/petscsys.h>
+      use petscsys
       implicit double precision (a-h,o-z)
       integer newnumgrids(maxlv)
       integer(kind=8) :: clock_start2, clock_finish, clock_rate
       type(matrix_patchIndex), pointer :: mi
       type(matrix_levInfo),  pointer :: minfo
+      PetscErrorCode ierr
 
-c
-c :::::::::::::::::::::::::::: REGRID :::::::::::::::::::::::::::::::
+!
+! :::::::::::::::::::::::::::: REGRID :::::::::::::::::::::::::::::::
 
 !> Flag points on each grid with a level > = lbase.
 !! cluster them, and fit new subgrids around the clusters.
@@ -21,19 +22,19 @@ subroutine regrid  (nvar,lbase,cut,naux,start_time)
 !! information to the error grid before clustering. (project)
 !! order of grid examination - all grids at the same level, then
 !! do the next coarser level.
-c
-c input parameters:
-c     lbase  = highest level that stays fixed during regridding
-c     cutoff = criteria for measuring goodness of rect. fit.
+!
+! input parameters:
+!     lbase  = highest level that stays fixed during regridding
+!     cutoff = criteria for measuring goodness of rect. fit.
 
-c local variables:
-c     lcheck = the level being examined.
-c     lfnew  = finest grid to be. will replace lfine.
+! local variables:
+!     lcheck = the level being examined.
+!     lfnew  = finest grid to be. will replace lfine.
 
-c global
-c    mstart  = start of very coarsest grids.
-c :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
-c
+! global
+!    mstart  = start of very coarsest grids.
+! :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+!
       lcheck    = min0(lfine,mxnest-1)
       lfnew     = lbase
       do  i   = 1, mxnest
@@ -41,21 +42,21 @@ subroutine regrid  (nvar,lbase,cut,naux,start_time)
         newstl(i) = 0
       end do
       time      = rnode(timemult, lstart(lbase))
-c
+!
  20   if (lcheck .lt. lbase) go to 50
           call grdfit(lbase,lcheck,nvar,naux,cut,time,start_time)
           if (newstl(lcheck+1) .eq. 0) go to 40
           lfnew = max0(lcheck + 1,lfnew)
  40       continue
           lcheck = lcheck - 1
-c
+!
       go to 20
  50   continue
-c
-c  end of level loop
-c
-c  remaining tasks left in regridding:
-c  1.  count number of new grids at each level  
+!
+!  end of level loop
+!
+!  remaining tasks left in regridding:
+!  1.  count number of new grids at each level  
       maxnumnewgrids = 0   ! max over all levels. needed for dimensioning
       do lev = lbase+1,lfnew 
           ngridcount = 0
@@ -68,22 +69,22 @@ subroutine regrid  (nvar,lbase,cut,naux,start_time)
  55       newnumgrids(lev) = ngridcount
           maxnumnewgrids = max(maxnumnewgrids,ngridcount)
       end do
-c
-c  2. interpolate storage for the new grids.  the starting pointers
-c  for each level are in newstl. also reclaim some space before new
-c  allocations.
+!
+!  2. interpolate storage for the new grids.  the starting pointers
+!  for each level are in newstl. also reclaim some space before new
+!  allocations.
       call system_clock(clock_start2,clock_rate)
       call gfixup(lbase,lfnew,nvar,naux,newnumgrids,maxnumnewgrids)
       call system_clock(clock_finish,clock_rate)
       timeGrdfit2 = timeGrdfit2 + clock_finish - clock_start2
-c
-c  3. merge data structures (newstl and lstart )
-c  finish storage allocation, reclaim space, etc. set up boundary
-c  flux conservation arrays
-c
+!
+!  3. merge data structures (newstl and lstart )
+!  finish storage allocation, reclaim space, etc. set up boundary
+!  flux conservation arrays
+!
 
-c     get rid of old bouss storage
-      if (max(lbase+1,minLevelBouss) .le. maxLevelBouss .and.
+!     get rid of old bouss storage
+      if (max(lbase+1,minLevelBouss) .le. maxLevelBouss .and.           &
      &     ibouss .gt. 0) then  ! need to redo bouss stuff
         do lev = max(lbase+1,minLevelBouss), maxLevelBouss
           minfo => matrix_info_allLevs(lev)
@@ -93,12 +94,14 @@ subroutine regrid  (nvar,lbase,cut,naux,start_time)
                deallocate(mi%mindex)
                deallocate(mi%isBouss)
             end do
-            deallocate(minfo%matrix_indices) ! get rid of old one 
+            deallocate(minfo%matrix_indices) ! get rid of old one
             ! also deallocate matrix since new one will have diff size
             if (.not. crs) then ! COO triplet format
-             deallocate(minfo%matrix_ia,minfo%matrix_ja,minfo%matrix_sa)
-            else  ! CRS format
-             deallocate(minfo%rowPtr, minfo%cols, minfo%vals)
+              deallocate(minfo%matrix_ia,minfo%matrix_ja,minfo%matrix_sa)
+            else                  ! CRS format
+              PetscCallA(PetscShmgetDeallocateArrayInt(minfo%rowPtr, ierr))
+              PetscCallA(PetscShmgetDeallocateArrayInt(minfo%cols, ierr))
+              PetscCallA(PetscShmgetDeallocateArrayScalar(minfo%vals, ierr))
             endif
             minfo%numBoussGrids = 0  ! reset for new counting
             minfo%numBoussCells = 0  ! reset for new counting
@@ -125,7 +128,7 @@ subroutine regrid  (nvar,lbase,cut,naux,start_time)
          newGrids(lbase+1:maxlv) = .true.
       endif
 
-c     set deepest bathy for each grid
+!     set deepest bathy for each grid
       if (lbase .lt. maxLevelBouss .and. ibouss.gt.0) then ! set up new bouss stuff
         do lev = lbase+1, lfine
            minfo => matrix_info_allLevs(lev)
@@ -159,19 +162,19 @@ subroutine regrid  (nvar,lbase,cut,naux,start_time)
         end do
 #endif
       endif
-c
-c  reset numgrids per level, needed for omp parallelization.
-c  note that grids may have disappeared, so next loop resets to 0
-c  if there are no grids from lfine+1 to mxnest
-c
+!
+!  reset numgrids per level, needed for omp parallelization.
+!  note that grids may have disappeared, so next loop resets to 0
+!  if there are no grids from lfine+1 to mxnest
+!
       do 72 levnew = lbase+1, mxnest
         mptr = lstart(levnew)
         ngridcount = 0
         ncells = 0
         do while (mptr .gt. 0)
            ngridcount = ngridcount + 1
-           ncells = ncells + (node(ndihi,mptr)-node(ndilo,mptr)+1)
-     .                     * (node(ndjhi,mptr)-node(ndjlo,mptr)+1)
+           ncells = ncells + (node(ndihi,mptr)-node(ndilo,mptr)+1)       &
+     &                     * (node(ndjhi,mptr)-node(ndjlo,mptr)+1)
            mptr = node(levelptr, mptr)
          end do
          if (ngridcount .ne. newnumgrids(levnew)) then
@@ -182,8 +185,8 @@ subroutine regrid  (nvar,lbase,cut,naux,start_time)
          numcells(levnew) = ncells
          avenumgrids(levnew) = avenumgrids(levnew) + ngridcount
          iregridcount(levnew) = iregridcount(levnew) + 1
-c        sort grids to first ones are the most work. this helps load
-c        balancing, but doesn't help locality
+!        sort grids to first ones are the most work. this helps load
+!        balancing, but doesn't help locality
          !commented out enxt line because now already sorted in prepnewgrids above
          ! changed 9/25/23 by mjb for better grid placement in filval par for loop 
          if (ngridcount .gt. 1) call arrangeGrids(levnew,ngridcount)
@@ -191,7 +194,7 @@ subroutine regrid  (nvar,lbase,cut,naux,start_time)
          if (verbosity_regrid .ge. levnew) then
            write(*,100) ngridcount,ncells,levnew
            write(outunit,100) ngridcount,ncells,levnew
- 100       format("there are ",i6," grids with ",i10,
+ 100       format("there are ",i6," grids with ",i10,                    &
      &            " cells at level ", i3)
          endif
 72     continue
@@ -200,14 +203,14 @@ subroutine regrid  (nvar,lbase,cut,naux,start_time)
         call prepf(level+1,nvar,naux)
         call prepc(level,nvar)
  60   continue
-c
-c      set up array of grids instead of recomputing at each step
+!
+!      set up array of grids instead of recomputing at each step
        call makeGridList(lbase)
        do levnew = lbase+1, lfine
           call makeBndryList(levnew)   ! does one level at a time
        end do
-c
-c     set up for Bouss matrices
+!
+!     set up for Bouss matrices
       if (ibouss .gt. 0) then ! 0 is SWE, otherwise setup for bouss
          do lev = lbase+1,lfine
            if (lev.le. maxLevelBouss .and. lev .ge. minLevelBouss)then
@@ -216,12 +219,12 @@ subroutine regrid  (nvar,lbase,cut,naux,start_time)
            endif
          end do
       endif
-c
+!
       return
       end
-c
-c -------------------------------------------------------------------
-c
+!
+! -------------------------------------------------------------------
+!
 !> Sort all grids at level **level**. 
 !! Put the most expensive grid in **lstart(level)** 
 !! Cost is measured by number of cells.
@@ -229,33 +232,33 @@ subroutine regrid  (nvar,lbase,cut,naux,start_time)
 !! decreases from list head to list tail
 
       subroutine arrangeGrids(level, numg)
-c
+!
       use amr_module
       implicit double precision (a-h,o-z)
       integer listgrids(numg), cost(numg), index(numg), prevptr
-c
-c   slow sort for now, putting most expensive grids first on lstart list
-c   measure cost by number of cells
-c
+!
+!   slow sort for now, putting most expensive grids first on lstart list
+!   measure cost by number of cells
+!
        mptr = lstart(level)
        do i = 1, numg
          listgrids(i) = mptr
-         cost(i) =  (node(ndihi,mptr)-node(ndilo,mptr)+3) *
-     1              (node(ndjhi,mptr)-node(ndjlo,mptr)+2)
+         cost(i) =  (node(ndihi,mptr)-node(ndilo,mptr)+3) *              &
+     &              (node(ndjhi,mptr)-node(ndjlo,mptr)+2)
          index(i) = i
          mptr = node(levelptr, mptr)
        end do
-c
-c        write(*,*)" before sorting"
-c       write(*,*) index
-c
+!
+!        write(*,*)" before sorting"
+!       write(*,*) index
+!
        call  qsorti(index, numg, cost)
 
-c       write(*,*)"after sorting"
-c       write(*,*) index
+!       write(*,*)"after sorting"
+!       write(*,*) index
 
-c qsort returns in ascending order, repack in descending order
-c grids can stay in place, just their levelptrs need to change
+! qsort returns in ascending order, repack in descending order
+! grids can stay in place, just their levelptrs need to change
        lstart(level) = listgrids(index(numg))  ! last grid is most expensive
        prevptr = listgrids(index(numg))
        do i = 1, numg-1             
diff --git a/src/2d/bouss/restrt.f b/src/2d/bouss/restrt.f
index 1457adbe6..6303e5570 100644
--- a/src/2d/bouss/restrt.f
+++ b/src/2d/bouss/restrt.f
@@ -77,12 +77,22 @@ subroutine restrt(nsteps,time,nvar,naux)
      5              timeRegridding,timeRegriddingCPU,
      6              timeValout,timeValoutCPU
 
-!     WRITE(*,*)"REMEMBER:  RESETTING Solver TIMERS For special tests"
-!     timeLinSolve = 0
-!     timeLinSolveCPU = 0.d0
-!     timePrepBuild = 0
-!     timePrepBuildCPU = 0.d0
-
+      WRITE(*,*)"REMEMBER:  RESETTING Solver TIMERS For special tests"
+      timeLinSolve = 0
+      timeLinSolveCPU = 0.d0
+      timePrepBuild = 0
+      timePrepBuildCPU = 0.d0
+      timeTick = 0
+      timeTickCPU = 0.0d0
+      timeStepgrid = 0
+      timeStepgridCPU = 0.0d0
+      timeBound = 0
+      timeBoundCPU = 0.d0
+      timeRegridding = 0
+      timeRegriddingCPU = 0.d0
+      timeValout = 0
+      timeValoutCPU = 0.d0
+      
       if (mxnest .gt. mxnold) then
         do ifg = 1, FG_num_fgrids
           fg => FG_fgrids(ifg)
diff --git a/src/2d/bouss/rpt2_geoclaw_sym.f b/src/2d/bouss/rpt2_geoclaw_sym.f
new file mode 100644
index 000000000..c241b13a3
--- /dev/null
+++ b/src/2d/bouss/rpt2_geoclaw_sym.f
@@ -0,0 +1,264 @@
+! =====================================================
+      subroutine rpt2(ixy,imp,maxm,meqn,mwaves,maux,mbc,mx,
+     &                ql,qr,aux1,aux2,aux3,asdq,bmasdq,bpasdq)
+! =====================================================
+      use geoclaw_module, only: g => grav, tol => dry_tolerance
+      use geoclaw_module, only: coordinate_system,earth_radius,deg2rad
+
+      implicit none
+!
+!     # Riemann solver in the transverse direction using an einfeldt
+!     Jacobian.
+
+!-----------------------last modified 1/10/05----------------------
+
+      integer ixy,maxm,meqn,maux,mwaves,mbc,mx,imp
+
+      double precision  ql(meqn,1-mbc:maxm+mbc)
+      double precision  qr(meqn,1-mbc:maxm+mbc)
+      double precision  asdq(meqn,1-mbc:maxm+mbc)
+      double precision  bmasdq(meqn,1-mbc:maxm+mbc)
+      double precision  bpasdq(meqn,1-mbc:maxm+mbc)
+      double precision  aux1(maux,1-mbc:maxm+mbc)
+      double precision  aux2(maux,1-mbc:maxm+mbc)
+      double precision  aux3(maux,1-mbc:maxm+mbc)
+
+      double precision  s(mwaves)
+      double precision  r(meqn,mwaves)
+      double precision  beta(mwaves)
+      double precision  abs_tol
+      double precision  hl,hr,hul,hur,hvl,hvr,vl,vr,ul,ur,bl,br
+      double precision  uhat,vhat,hhat,roe1,roe3,s1,s2,s3,s1down,s3up
+      double precision  delf1,delf2,delf3,dxdcd,dxdcu
+      double precision  dxdcm,dxdcp,topo1,topo3,eta
+
+      integer i,m,mw,mu,mv
+      
+      !write(83,*) 'rpt2, imp = ',imp
+          
+      ! initialize all components to 0:
+      bmasdq(:,:) = 0.d0
+      bpasdq(:,:) = 0.d0
+
+      abs_tol=tol
+
+      if (ixy.eq.1) then
+        mu = 2
+        mv = 3
+      else
+        mu = 3
+        mv = 2
+      endif
+
+      do i=2-mbc,mx+mbc
+
+         hl=qr(1,i-1) 
+         hr=ql(1,i) 
+         hul=qr(mu,i-1) 
+         hur=ql(mu,i) 
+         hvl=qr(mv,i-1) 
+         hvr=ql(mv,i)
+
+!===========determine velocity from momentum===========================
+       if (hl.lt.abs_tol) then
+          hl=0.d0
+          ul=0.d0
+          vl=0.d0
+       else
+          ul=hul/hl
+          vl=hvl/hl
+       endif
+
+       if (hr.lt.abs_tol) then
+          hr=0.d0
+          ur=0.d0
+          vr=0.d0
+       else
+          ur=hur/hr
+          vr=hvr/hr
+       endif
+
+       do mw=1,mwaves
+          s(mw)=0.d0
+          beta(mw)=0.d0
+          do m=1,meqn
+             r(m,mw)=0.d0
+          enddo
+       enddo
+      dxdcp = 1.d0
+      dxdcm = 1.d0
+
+      if (hl <= tol .and. hr <= tol) go to 90
+
+*      !check and see if cell that transverse waves are going in is high and dry
+       if (imp.eq.1) then
+            eta = qr(1,i-1)  + aux2(1,i-1)
+            topo1 = aux1(1,i-1)
+            topo3 = aux3(1,i-1)
+c            s1 = vl-sqrt(g*hl)
+c            s3 = vl+sqrt(g*hl)
+c            s2 = 0.5d0*(s1+s3)
+       else
+            eta = ql(1,i) + aux2(1,i)
+            topo1 = aux1(1,i)
+            topo3 = aux3(1,i)
+c            s1 = vr-sqrt(g*hr)
+c            s3 = vr+sqrt(g*hr)
+c            s2 = 0.5d0*(s1+s3)
+       endif
+       if (eta.lt.max(topo1,topo3)) go to 90
+
+      if (coordinate_system.eq.2) then
+         if (ixy.eq.2) then
+             dxdcp=(earth_radius*deg2rad)
+            dxdcm = dxdcp
+         else
+            if (imp.eq.1) then
+               dxdcp = earth_radius*cos(aux3(3,i-1))*deg2rad
+               dxdcm = earth_radius*cos(aux1(3,i-1))*deg2rad
+            else
+               dxdcp = earth_radius*cos(aux3(3,i))*deg2rad
+               dxdcm = earth_radius*cos(aux1(3,i))*deg2rad
+            endif
+         endif
+      endif
+
+c=====Determine some speeds necessary for the Jacobian=================
+c            vhat=(vr*dsqrt(hr))/(dsqrt(hr)+dsqrt(hl)) +
+c     &        (vl*dsqrt(hl))/(dsqrt(hr)+dsqrt(hl))
+c
+c            uhat=(ur*dsqrt(hr))/(dsqrt(hr)+dsqrt(hl)) +
+c     &        (ul*dsqrt(hl))/(dsqrt(hr)+dsqrt(hl))
+c            hhat=(hr+hl)/2.d0
+
+            ! Note that we used left right states to define Roe averages,
+            ! which is consistent with those used in rpn2.
+            ! But now we are computing upgoing, downgoing waves either in
+            ! cell on left (if imp==1) or on right (if imp==2) so we
+            ! should perhaps use Roe averages based on values above or below,
+            ! but these aren't available.
+
+            !roe1=vhat-dsqrt(g*hhat)
+            !roe3=vhat+dsqrt(g*hhat)
+
+            ! modified to at least use hl,vl or hr,vr properly based on imp:
+            ! (since s1 and s3 are now vertical velocities,
+            ! it made no sense to use h,v in cell i-1 for downgoing 
+            ! and cell i for upgoing)
+
+            if (imp == 1) then
+                ! asdq is leftgoing, use q from cell i-1:
+                if (hl <= tol) go to 90
+                s1 = vl-dsqrt(g*hl)
+                s3 = vl+dsqrt(g*hl)
+                s2 = vl
+                uhat = ul
+                hhat = hl
+            else
+                ! asdq is rightgoing, use q from cell i:
+                if (hr <= tol) go to 90
+                s1 = vr-dsqrt(g*hr)
+                s3 = vr+dsqrt(g*hr)
+                s2 = vr
+                uhat = ur
+                hhat = hr
+            endif
+
+            ! don't use Roe averages:
+            !s1=dmin1(roe1,s1down)
+            !s3=dmax1(roe3,s3up)
+
+            !s2=0.5d0*(s1+s3)
+
+           s(1)=s1
+           s(2)=s2
+           s(3)=s3
+c=======================Determine asdq decomposition (beta)============
+         delf1=asdq(1,i)
+         delf2=asdq(mu,i)
+         delf3=asdq(mv, i)
+
+         ! fixed bug in beta(2): uhat in place of s(2)
+         beta(1) = (s3*delf1/(s3-s1))-(delf3/(s3-s1))
+         beta(2) = -uhat*delf1 + delf2
+         beta(3) = (delf3/(s3-s1))-(s1*delf1/(s3-s1))
+c======================End =================================================
+
+c=====================Set-up eigenvectors===================================
+         r(1,1) = 1.d0
+         r(2,1) = uhat    ! fixed bug, uhat not s2
+         r(3,1) = s1
+
+         r(1,2) = 0.d0
+         r(2,2) = 1.d0
+         r(3,2) = 0.d0
+
+         r(1,3) = 1.d0
+         r(2,3) = uhat    ! fixed bug, uhat not s2
+         r(3,3) = s3
+c============================================================================
+90      continue
+c============= compute fluctuations==========================================
+
+               bmasdq(1,i)=0.0d0
+               bpasdq(1,i)=0.0d0
+               bmasdq(2,i)=0.0d0
+               bpasdq(2,i)=0.0d0
+               bmasdq(3,i)=0.0d0
+               bpasdq(3,i)=0.0d0
+               
+               do  mw=1,3
+                  print*,  s(mw)
+                  print*,  abs(s(mw))
+                  print*, g,hhat
+                  print*, dsqrt(g*hhat)
+               if ((abs(s(mw)) > 0.d0) .and. 
+     &             (abs(s(mw)) < 0.001d0*dsqrt(g*hhat))) then
+                 ! split correction symmetrically if nearly zero
+                 ! Note wave drops out if s(mw)==0 exactly, so don't split
+                 bmasdq(1,i) =bmasdq(1,i) +
+     &                        0.5d0*dxdcm*s(mw)*beta(mw)*r(1,mw)
+                 bmasdq(mu,i)=bmasdq(mu,i)+ 
+     &                        0.5d0*dxdcm*s(mw)*beta(mw)*r(2,mw)
+                 bmasdq(mv,i)=bmasdq(mv,i)+ 
+     &                        0.5d0*dxdcm*s(mw)*beta(mw)*r(3,mw)
+                 bpasdq(1,i) =bpasdq(1,i) + 
+     &                        0.5d0*dxdcp*s(mw)*beta(mw)*r(1,mw)
+                 bpasdq(mu,i)=bpasdq(mu,i)+ 
+     &                        0.5d0*dxdcp*s(mw)*beta(mw)*r(2,mw)
+                 bpasdq(mv,i)=bpasdq(mv,i)+ 
+     &                        0.5d0*dxdcp*s(mw)*beta(mw)*r(3,mw)
+               elseif (s(mw).lt.0.d0) then
+                 bmasdq(1,i) =bmasdq(1,i) + dxdcm*s(mw)*beta(mw)*r(1,mw)
+                 bmasdq(mu,i)=bmasdq(mu,i)+ dxdcm*s(mw)*beta(mw)*r(2,mw)
+                 bmasdq(mv,i)=bmasdq(mv,i)+ dxdcm*s(mw)*beta(mw)*r(3,mw)
+               elseif (s(mw).gt.0.d0) then
+                 bpasdq(1,i) =bpasdq(1,i) + dxdcp*s(mw)*beta(mw)*r(1,mw)
+                 bpasdq(mu,i)=bpasdq(mu,i)+ dxdcp*s(mw)*beta(mw)*r(2,mw)
+                 bpasdq(mv,i)=bpasdq(mv,i)+ dxdcp*s(mw)*beta(mw)*r(3,mw)
+               endif
+            enddo
+            
+        !if ((i>3) .and. (i<6)) then
+        if (.false.) then
+            ! DEBUG
+            write(83,*) 'i = ',i
+            write(83,831) s(1),s(2),s(3)
+            write(83,831) beta(1),beta(2),beta(3)
+            do m=1,3
+                write(83,831) r(m,1),r(m,2),r(m,3)
+            enddo
+            do m=1,3
+                write(83,831) asdq(m,i), bmasdq(m,i), bpasdq(m,i)
+ 831            format(3d20.12)
+            enddo
+        endif
+c========================================================================
+         enddo  ! do i loop
+c
+
+
+c
+
+      return
+      end
diff --git a/src/2d/bouss/setMatrixIndex.f90 b/src/2d/bouss/setMatrixIndex.f90
index 71d6b5892..1c5dfb38e 100644
--- a/src/2d/bouss/setMatrixIndex.f90
+++ b/src/2d/bouss/setMatrixIndex.f90
@@ -5,6 +5,8 @@ subroutine setMatrixIndex(levelBouss)
 
       use amr_module
       use bouss_module
+#include <petsc/finclude/petscsys.h>
+      use petscsys
       implicit none
 
       integer, intent(in) :: levelBouss
@@ -16,6 +18,8 @@ subroutine setMatrixIndex(levelBouss)
       type(matrix_levInfo),  pointer :: minfo
       integer bndNum, imax,imin,jmax,jmin,icount,nextSpot,max_matrix_nelt
       integer ixlo,ixhi,jxlo,jxhi,imlo,imhi,jmlo,jmhi,mtest,nbtest
+      PetscErrorCode ierr
+      PetscInt zero
 
 !
 !     traverse grids in this exact order, after arrangeGrids has been called
@@ -30,6 +34,7 @@ subroutine setMatrixIndex(levelBouss)
 !         cells at edges of grid  that still have a -1 in their stencil 
 !         will be treated as SWE points.
 !
+      zero = 0
       minfo => matrix_info_allLevs(levelBouss)
 
       k = 0  ! keep running total of number of unknowns in matrix system 
@@ -81,10 +86,14 @@ subroutine setMatrixIndex(levelBouss)
                      minfo%matrix_ja(max_matrix_nelt),     &
                      minfo%matrix_sa(max_matrix_nelt))
           else  ! CRS format
-            allocate(minfo%cols(0:max_matrix_nelt),        &
-                     minfo%vals(0:max_matrix_nelt),        &
-                     minfo%rowPtr(0:2*minfo%numBoussCells))
-          endif
+!            allocate(minfo%cols(0:max_matrix_nelt),        &
+!                     minfo%vals(0:max_matrix_nelt),        &
+!                     minfo%rowPtr(0:2*minfo%numBoussCells))
+            PetscCallA(PetscShmgetAllocateArrayInt(zero, 2*minfo%numBoussCells + 1, minfo%rowPtr, ierr))
+            PetscCallA(PetscShmgetAllocateArrayInt(zero, max_matrix_nelt, minfo%cols, ierr))
+            PetscCallA(PetscShmgetAllocateArrayScalar(zero, max_matrix_nelt, minfo%vals, ierr))
+            CHKMEMQ;
+         endif
       endif
 
 !     loop again to set boundary values from other grids