-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwwm_petsc_block.F90
2354 lines (2159 loc) · 95 KB
/
wwm_petsc_block.F90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include "wwm_functions.h"
!**********************************************************************
!* *
!**********************************************************************
!>\todo merge asparApp2Petsc_small(NNZ) and oAsparApp2Petsc_small(NNZ). as one array?
!>\todo openmp: add nowait klausel.
!>\todo openmp: timing CADVXY3
!>\todo openmp: CADVXY3 copy CURTXY etc. array in petsc order
!>\todo openmp: tryp CADVXY2 with openmp instead of CADVXY3
!>\todo openmp: add a single/master parallel block for all OMP DO blocks
!>\todo openmp: how to bind alle openmp thread to one physical cpu for cache eff?
!>\todo openmp: false sharing. many thread access the same cacheline. avoid with petsc ordering?
!>\todo performance: figure out which thread needs max. (computing) time or flops/sec. filter out MPI message transfer time
!>\todo performance: check MPI message length/numbers with and without solver call. Is the *Assembly() call really one big problem?
!>\todo debug: add petsc finalize state
#ifdef PETSC
# ifdef MPI_PARALL_GRID
!> Data and mehtods to use one matrix for all nodes, frequency and directions.
!> The matrix has a dimension of (Number of Nodes * frequency * directions)^2
!>
!>
!>
!> We work here with so-called small and big matrices. A small matrix hold the values for all nodes but for only
!> one frequency and direction.
!>
!> Example with only the diagonal filled:
!>
!><pre>
!> IP 1 2 . . . MNP
!> +-------------+
!> 1 | 1 |
!> 2 | 1 |
!> . | 1 |
!> . | 1 |
!> . | 1 |
!> MNP | 1 |
!> +-------------+
!></pre>
!>
!> A big matrix hold the values for all nodes, frequency and directions.
!> Example for MNP nodes, two frequency and directions. Only the diagonal is filled:
!>
!><pre>
!> IP ISS IDD
!> +-------------------------+
!> 1 1 1 | 1 |
!> 2 | 1 |
!> 2 1 | 1 |
!> 2 | 1 |
!> 2 1 1 | 1 |
!> 2 | 1 |
!> 2 1 | 1 |
!> 2 | 1 |
!> . . . | . |
!> . . . | . |
!> . . . | . |
!> MNP 2 2 | 1 |
!> +-------------------------+
!></pre>
!>
!> As you can see the direction (MSD,IDD) is the fast running index.
!>
!> To map from a small matrix with a given row/col number with the knowledge of ISS/IDD to the row/col
!> in the big matrix is very easy:
!>
!> row/col big = IP * NUMSIG*NUMDIR + ISS*NUMDIR + IDD (IP, ISS, IDD must count from zero)
!>
!> We store the matrix in sparce format. So there are arrays IA, JA and A (or aspar) to access the matrix entries.
!> If the sparse arrays represent a petsc matrix they have a "_petsc_small" prefix. IA_petsc_small, JA_petsc_small.
!> And if the sparse arrays represent the big matrix in PETSC ordering, they have a "_petsc" prefix. IA_petsc, JA_petsc, aspar_petsc.
!>
!> The difficulty is to create a mapping which maps into a sparse matrix.
!> And the difficulty^2 is to map between application and PETSC ordering in sparse matrix format.
!>
!> I'll later describe how the mapping is initialized.
!>
!> Now I'll explain how to set a value into the big matrix in PETSC ordering.
!> First to know: There is no super easy way. All of them are more or less difficult.
!>
!> You need the node number (IP), actual frequency (ISS), actual direction (IDD) and the position
!> in the aspar array for a small matrix in application ordering. The last thing is usually just a counter J.
!>
!> In pseudo code \n
!> value = aspar(J) \n
!> position = aspar2petscAspar(IP, ISS, IDD, J) \n
!> bigAsparPetsc(position) = value \n
!>
!> The function aspar2petscAspar calculate the position in the big matrix petsc aspar array.
!>
!> The function aspar2petscAspar is not very fast. If you call it MNP*NUMSIG*NUMDIR times there will be a huge overhead.
!> But you can calc the aspar position by hand.
!>
!> Example in pseudo code
!>
!><pre>
!> DO IP = 1, MNP
!> IPpetsc = ALO2PLO(IP-1)+1
!> petscAsparPosi1 = NUMSIG*NUMDIR * IA_petsc_small(IPpetsc)
!> nConnNode = IA_petsc_small(IPpetsc+1) - IA_petsc_small(IPpetsc)
!> DO ISS = 1, NUMSIG
!> petscAsparPosi2 = petscAsparPosi1 + (ISS-1)* NUMDIR *nConnNode
!> DO IDD = 1, NUMDIR
!> petscAsparPosi3 = petscAsparPosi2 + (IDD-1)*nConnNode
!> DO I = 1, CCON(IP)
!> J = J + 1
!> petscAsparPosi4 = petscAsparPosi3 + (asparApp2Petsc_small(J) - IA_petsc_small(IPpetsc)) + 1
!> bigASPAR_petsc(petscAsparPosi4) = value
!> END DO
!> END DO !IDD
!> END DO !ISS
!> END DO !IP
!></pre>
!>
!> In words: Loop over all nodes, frequency and directions. And in every loop, update the position in aspar.
!**********************************************************************
!* *
!**********************************************************************
MODULE PETSC_BLOCK
implicit none
#include "finclude/petscsysdef.h"
#include "finclude/petscaodef.h"
#include "finclude/petscisdef.h"
#include "finclude/petscvecdef.h"
#include "finclude/petscmatdef.h"
#include "finclude/petsckspdef.h"
! uncommment this to enable debug outputs like solver time and iterations and additional debug outputs
#define PETSC_DEBUG 1
!> number of local rows/cols big matrix (all frequency and directions)
integer :: nLocalRowBigMatrix = 0
!> number of global rows/cols big matrix (all frequency and directions)
integer :: nGlobalRowBigMatrix = 0
!> IA in CSR format in petsc local order for the big matrix
PetscInt, allocatable :: IA_petsc(:)
!> colum index in CSR format in petsc local order for the big matrix
PetscInt, allocatable :: JA_petsc(:)
!> Matrix values in CSR format for the big matrix
PetscScalar, allocatable :: ASPAR_petsc(:)
!> offdiagonal IA CSR format in petsc local order for the big matrix
PetscInt, allocatable :: oIA_petsc(:)
!> offdiagonal colum index in CSR fromat in petsc GLOABL order for the big matrix?? petsc doku said this should in local order
PetscInt, allocatable :: oJA_petsc(:)
!> offdiagonal submatrix values in CSR format for the big matrix
PetscScalar, allocatable :: oASPAR_petsc(:)
!> for the small matrix in petsc order
integer, allocatable :: IA_petsc_small(:), oIA_petsc_small(:)
!> colum index in CSR format for the small matrix in petsc local order
! map from app aspar position to petsc aspar position (small matrix)
integer, allocatable :: AsparApp2Petsc_small(:)
integer, allocatable :: oAsparApp2Petsc_small(:)
# ifdef DIRECT_METHOD
integer, allocatable :: AsparApp2Petsc(:)
integer, allocatable :: oAsparApp2Petsc(:)
integer, allocatable :: IA_Ptotal(:,:,:)
integer, allocatable :: I_DIAGtotal(:,:,:)
# endif
! crazy fortran. it runs faster if one get this array every time from the stack instead from heap at init.
! locality ..., stack overflow danger...
! real(kind=8), allocatable :: ASPAR(:)
! max number of adj nodes per node
integer :: maxNumConnNode
! cumulative J sparse counter. size 1:MNP.
integer, allocatable :: Jcum(:)
contains
!> @brief Initialize Petsc. create the mappings, matrix, vectors and finally the solver and preconditioner
!**********************************************************************
!* *
!**********************************************************************
SUBROUTINE PETSC_INIT_BLOCK
USE DATAPOOL, only: MNP, CCON, NNZ, RKIND, DBG, np_global, np, npg, ne_global, iplg, ipgl, inp
USE DATAPOOL, ONLY : REFRACTION_IMPL, FREQ_SHIFT_IMPL, SOURCE_IMPL
! NUMSIG - # frequency
! NUMDIR - # directions
use datapool, only: NUMSIG, NUMDIR, comm, ICOMP
! np_global - # nodes gloabl
! np - # nodes local non augmented
! npg - # ghost
! npa - # nodes aufmented
! nea - # elemnts augmented
! int::iplg(ip) global element index. local to global LUT
! llsit_type::ipgl(ipgb) ipgb is a global node. global to local LUT
! int::nnp(ip) total # of surrounding nodes for node ip
! int::inp(ip, 1:nnp(ip)) list of surrounding nodes
use petscpool
use petscsys
use petscmat
! use omp_lib
IMPLICIT NONE
integer :: ierr, istat
integer :: IP, ISS, IDD
integer :: nghostBlock
integer, allocatable :: onlyGhostsBlock(:)
integer :: nOMPthreads
call MPI_Comm_rank(comm, rank, ierr)
call MPI_Comm_size(comm, nProcs, ierr)
IF (ICOMP .lt. 3) THEN
IF (REFRACTION_IMPL) THEN
CALL WWM_ABORT('You need ICOMP=3 for REFRACTION_IMPL')
END IF
IF (FREQ_SHIFT_IMPL) THEN
CALL WWM_ABORT('You need ICOMP=3 for FREQ_SHIFT_IMPL')
END IF
END IF
# ifndef DIRECT_METHOD
IF (REFRACTION_IMPL) THEN
CALL WWM_ABORT('You need DIRECT_METHOD for REFRACTION_IMPL')
END IF
IF (FREQ_SHIFT_IMPL) THEN
CALL WWM_ABORT('You need DIRECT_METHOD for FREQ_SHIFT_IMPL')
END IF
IF (SOURCE_IMPL) THEN
CALL WWM_ABORT('You need DIRECT_METHOD for SOURCE_IMPL')
END IF
# endif
# ifdef PETSC_DEBUG
call PetscPrintf(PETSC_COMM_WORLD, "PETSC_INIT_BLOCK\n", petscErr);CHKERRQ(petscErr)
!!$OMP PARALLEL
! nOMPthreads = omp_get_num_threads()
!!$OMP END PARALLEL
! write(DBG%FHNDL,*) "openmp threads", nOMPthreads
# endif
call createMappings
call createMatrix
! create ghost blocks
nghostBlock = nghost * NUMSIG * NUMDIR
allocate(onlyGhostsBlock(0:nghostBlock-1), stat=istat)
if(istat /= 0) CALL WWM_ABORT('allocation error in wwm_petsc_block 1')
nghostBlock = 0
do IP = 1, nghost
do ISS = 1, NUMSIG
do IDD = 1, NUMDIR
onlyGhostsBlock(toRowIndex(IP, ISS, IDD)) = toRowIndex(onlyGhosts(IP-1), ISS, IDD)
end do
end do
end do
! create X vector
call VecCreateGhost(PETSC_COMM_WORLD, &
& nNodesWithoutInterfaceGhosts * NUMSIG * NUMDIR, &
& np_global * NUMSIG * NUMDIR, &
& nghostBlock, &
& onlyGhostsBlock, &
& myX, petscErr);CHKERRQ(petscErr)
! call VecCreateMPI(MPI_COMM_WORLD, \
! nNodesWithoutInterfaceGhosts * NUMSIG * NUMDIR, \
! np_global * NUMSIG * NUMDIR, \
! myX, petscErr);CHKERRQ(petscErr)
! create B vector
call VecDuplicate(myX, myB, petscErr);;CHKERRQ(petscErr)
call createSolver()
# ifdef PETSC_DEBUG
if(rank == 0) call printSolverTolerance(solver)
if(rank == 0) call printKSPType(Solver);
# endif
deallocate(onlyGhostsBlock, stat=istat)
if(istat /= 0) CALL WWM_ABORT('allocation error in wwm_petsc_block 2')
! create cumulative J sparse counter
allocate(Jcum(MNP+1), stat=istat)
if(istat /= 0) CALL WWM_ABORT('allocation error in wwm_petsc_block 3')
Jcum = 0
do IP=1, MNP
Jcum(IP+1) = Jcum(IP) + CCON(IP)
end do
END SUBROUTINE
!**********************************************************************
!* *
!**********************************************************************
!> create PETSC matrix which uses fortran arrays
SUBROUTINE createMatrix
use datapool, only: NUMSIG, NUMDIR, comm, np_global
use petscpool
use petscsys
use petscmat
implicit none
nLocalRowBigMatrix = nNodesWithoutInterfaceGhosts * NUMSIG*NUMDIR
nGlobalRowBigMatrix = np_global * NUMSIG*NUMDIR
call createCSR_petsc()
# ifndef DIRECT_METHOD
call createCSR_petsc_small()
# endif
call MatCreateMPIAIJWithSplitArrays(PETSC_COMM_WORLD, &
& nLocalRowBigMatrix, nLocalRowBigMatrix, &
& nGlobalRowBigMatrix, nGlobalRowBigMatrix, &
& IA_petsc, JA_petsc, ASPAR_petsc, &
& oIA_petsc, oJA_petsc, oASPAR_petsc, &
& matrix, petscErr);CHKERRQ(petscErr);
! indicates that any add or insertion that would generate a new entry in the nonzero structure instead produces an error.
call MatSetOption(matrix, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE, petscErr);CHKERRQ(petscErr)
! indicates that any add or insertion that would generate a new entry that has not been preallocated will instead produce an error
call MatSetOption(matrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE, petscErr);CHKERRQ(petscErr)
! indicates entries destined for other processors should be dropped, rather than stashed
call MatSetOption(matrix, MAT_IGNORE_OFF_PROC_ENTRIES, PETSC_TRUE, petscErr);CHKERRQ(petscErr)
! you know each process will only zero its own rows.
! This avoids all reductions in the zero row routines and thus improves performance for very large process counts.
call MatSetOption(matrix, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE, petscErr);CHKERRQ(petscErr)
end SUBROUTINE
!**********************************************************************
!* *
!**********************************************************************
!> create IA JA ASPAR petsc array for big sparse matrix
SUBROUTINE createCSR_petsc
use datapool, only: NNZ, MNE, INE, MNP, NUMSIG, NUMDIR, RKIND, DBG, iplg, JA, myrank, IOBP, LTHBOUND, LSIGBOUND, DEP, DMIN
USE DATAPOOL, ONLY : REFRACTION_IMPL, FREQ_SHIFT_IMPL
use petscpool
use algorithm, only: bubbleSort, genericData
implicit none
! running variable node number
integer :: IP = 0
! node number in petsc order
integer :: IPpetsc = 0
! running frequency
integer :: ISS = 0
! running direction
integer :: IDD = 0
! running variable
integer :: i = 0, J = 0, o_J = 0
integer :: istat, ThePos, NbRefr, NbFreq, TheVal
! number of nonzero without interface and ghosts
integer :: nnz_new
! number of nonzeros in the offdiagonal submatrix without interface and ghosts
integer :: o_nnz_new
# ifdef DIRECT_METHOD
integer NNZint, idxpos
integer IDprev, IDnext
# endif
integer idx
type(genericData), allocatable :: toSort(:)
integer :: nToSort = 0
type(genericData), allocatable :: o_toSort(:)
integer :: o_nToSort = 0
integer :: bigMatrixRow
! calc max number of adj nodes per node
maxNumConnNode = 0
do IP = 1, MNP
if(IA_P(IP+1) - IA_P(IP) > maxNumConnNode) then
maxNumConnNode = IA_P(IP+1) - IA_P(IP)
end if
end do
! simpley calc NNZ and o_NNZ for one MSD and NUMDIR; for one small matrix
! then multiply with NUMSIG NUMDIR and you have the numbers for the big matrix
! calc NNZ and offdiagonal NNZ
! iterate over all petsc rows and the nodes in this row
! if one is a ghost or interface nodes, increase offdiagonal NNZ
nnz_new = 0
o_nnz_new = 0
do IPpetsc = 1, nNodesWithoutInterfaceGhosts
IP = PLO2ALO(IPpetsc-1)+1
do i = IA_P(IP)+1, IA_P(IP+1)
if(ALOold2ALO(JA(i)) .eq. -999) then
o_nnz_new = o_nnz_new + NUMSIG*NUMDIR
else
nnz_new = nnz_new + NUMSIG*NUMDIR
endif
end do
end do
# ifdef DIRECT_METHOD
IF (FREQ_SHIFT_IMPL) THEN
NbFreq=nNodesWithoutInterfaceGhosts
nnz_new=nnz_new + NUMDIR*(2*(NUMSIG-1))*NbFreq
maxNumConnNode = maxNumConnNode +2
END IF
IF (REFRACTION_IMPL) THEN
NbRefr=nNodesWithoutInterfaceGhosts
nnz_new=nnz_new + NUMSIG*(2*NUMDIR)*NbRefr
maxNumConnNode = maxNumConnNode + 2
END IF
NNZint=nnz_new+o_nnz_new
# endif
! we have now for every node their connected nodes
! iterate over connNode array to create IA and JA
! +1 because we have to store the diagonal node number too
allocate(IA_petsc(nLocalRowBigMatrix+1), &
& JA_petsc(nnz_new), ASPAR_petsc(nnz_new), &
& oIA_petsc(nLocalRowBigMatrix+1), &
& oJA_petsc(o_nnz_new), oASPAR_petsc(o_nnz_new), &
& toSort(maxNumConnNode), o_toSort(maxNumConnNode), &
# ifdef DIRECT_METHOD
& AsparApp2Petsc(NNZint), oAsparApp2Petsc(NNZint), &
& IA_Ptotal(NUMSIG,NUMDIR,nNodesWithoutInterfaceGhosts), &
& I_DIAGtotal(NUMSIG,NUMDIR,nNodesWithoutInterfaceGhosts), &
# endif
& stat=istat)
if(istat /= 0) CALL WWM_ABORT('allocation error in wwm_petsc_block 4')
# ifdef DIRECT_METHOD
AsparApp2Petsc = -999
oAsparApp2Petsc = -999
IA_Ptotal=0
# endif
IA_petsc = 0
JA_petsc = 0
ASPAR_petsc = 0
oIA_petsc = 0
oJA_petsc = 0
oASPAR_petsc = 0
! to create IA_petsc and JA_petsc we have to iterate over all nodes, frequency, directions and
! their connected nodes and map the node number to petsc order.
! After that, we perform a topological sort on the nodes to ensure ascending ordering.
! Example:
! Let adj be an array with nodenumbers in Application Ordering adj=[10,25,85,99]
! After mapping to petsc, the new nodenumber are adj=[76,11,67,44]
! After sort: adj=[11,44,67,76]
! Do the sort with a simple bubble sort. yes, bubble sort is vey slow,
! but we have only a few numbers to sort (max 10 I assume).
! over all petsc IP rows
# ifdef DIRECT_METHOD
idxpos=0
# endif
do IPpetsc = 1, nNodesWithoutInterfaceGhosts
IP = PLO2ALO(IPpetsc-1)+1
! die anzahl NNZ pro zeile ist fuer alle IS ID gleich.
do ISS = 1, NUMSIG
do IDD = 1, NUMDIR
bigMatrixRow = toRowIndex(IPpetsc, ISS, IDD)
! fill with the largest numner petscInt can hold
toSort(:)%id = HUGE(0)
nToSort = 0
o_toSort(:)%id = HUGE(0)
o_nToSort = 0
! over all nodes in this row
# ifdef DIRECT_METHOD
IA_Ptotal(ISS,IDD,IPpetsc)=idxpos
WRITE(740+myrank,*) 'IS/ID/IP=', ISS, IDD, IPpetsc
WRITE(740+myrank,*) 'idxpos=', idxpos
# endif
do i = IA_P(IP)+1, IA_P(IP+1)
! found a ghost node, treat them special
# ifdef DIRECT_METHOD
idxpos=idxpos+1
# endif
if(ALOold2ALO(JA(i)) .eq. -999) then
o_ntoSort = o_ntoSort + 1
! store the old position in ASPAR
# ifndef DIRECT_METHOD
o_toSort(o_nToSort)%userData = i
# else
o_toSort(o_nToSort)%userData = idxpos
# endif
!> \todo offdiagonal part with petsc global order? don't know why but it seems to work
ThePos=toRowIndex( AGO2PGO(iplg(JA(i))-1)+1, ISS, IDD)
o_toSort(o_nToSort)%id = ThePos
! not a ghost node
else
nToSort = nToSort + 1
ThePos=toRowIndex( ALO2PLO(JA_P(i))+1, ISS, IDD )
toSort(nToSort)%id = ThePos
# ifndef DIRECT_METHOD
toSort(nToSort)%userData = i
# else
toSort(nToSort)%userData = idxpos
# endif
end if
end do ! cols
# ifdef DIRECT_METHOD
IF (FREQ_SHIFT_IMPL) THEN
IF (ISS .gt. 1) THEN
nToSort = nToSort + 1
idxpos=idxpos+1
ThePos=toRowIndex(IPpetsc, ISS-1, IDD)
toSort(nToSort)%userData = idxpos
toSort(nToSort)%id = ThePos
END IF
IF (ISS .lt. NUMSIG) THEN
nToSort = nToSort + 1
idxpos=idxpos+1
ThePos=toRowIndex(IPpetsc, ISS+1, IDD)
toSort(nToSort)%userData = idxpos
toSort(nToSort)%id = ThePos
END IF
END IF
IF (REFRACTION_IMPL) THEN
IF (IDD == 1) THEN
IDprev=NUMDIR
ELSE
IDprev=IDD-1
END IF
IF (IDD == NUMDIR) THEN
IDnext=1
ELSE
IDnext=IDD+1
END IF
!
nToSort = nToSort + 1
idxpos=idxpos+1
ThePos=toRowIndex(IPpetsc, ISS, IDprev)
toSort(nToSort)%userData = idxpos
toSort(nToSort)%id = ThePos
!
nToSort = nToSort + 1
idxpos=idxpos+1
ThePos=toRowIndex(IPpetsc, ISS, IDnext)
toSort(nToSort)%userData = idxpos
toSort(nToSort)%id = ThePos
END IF
# endif
call bubbleSort(toSort, nToSort)
call bubbleSort(o_toSort, o_nToSort)
idx=toRowIndex(IPpetsc, ISS, IDD) +1
IA_petsc(idx + 1) = IA_petsc(idx) + nToSort
do i = 1, nToSort
J = J + 1
# ifdef DIRECT_METHOD
AsparApp2Petsc(toSort(i)%userData) = J
# endif
JA_petsc(J) = toSort(i)%id
# ifdef DIRECT_METHOD
IF (JA_petsc(J) .eq. bigMatrixRow) THEN
I_DIAGtotal(ISS,IDD,IPpetsc)=J
END IF
# endif
end do
oIA_petsc(idx + 1) = oIA_petsc(idx) + o_nToSort
do i = 1, o_nToSort
o_J = o_J + 1
# ifdef DIRECT_METHOD
oAsparApp2Petsc(o_toSort(i)%userData) = o_J
# endif
oJA_petsc(o_J) = o_toSort(i)%id
end do
end do
end do
end do
deallocate(toSort, o_toSort, stat=istat)
if(istat /= 0) CALL WWM_ABORT('allocation error in wwm_petsc_block 5')
end SUBROUTINE
!**********************************************************************
!* *
!**********************************************************************
# ifndef DIRECT_METHOD
!> create IA petsc array for the small sparse matrix
SUBROUTINE createCSR_petsc_small()
use datapool, only: NNZ, MNE, INE, MNP, RKIND, DBG, iplg, JA
use petscpool
use algorithm, only: bubbleSort, genericData
implicit none
! max number of adj nodes per node
integer :: maxNumConnNode
! running variable node number
integer :: IP = 0
! node number in petsc order
integer :: IPpetsc = 0
! running variable
integer :: i = 0, j = 0, oj = 0
integer :: istat
! number of nonzero without interface and ghosts
integer :: nnz_new = 0
! number of nonzeros in the offdiagonal submatrix without interface and ghosts
integer :: o_nnz_new = 0
type(genericData), allocatable :: toSort(:)
integer :: nToSort = 0
type(genericData), allocatable :: o_toSort(:)
integer :: o_nToSort = 0
! calc max number of adj nodes per node
maxNumConnNode = 0
do IP = 1, MNP
if(IA_P(IP+1) - IA_P(IP) > maxNumConnNode) then
maxNumConnNode = IA_P(IP+1) - IA_P(IP)
end if
end do
! calc NNZ and offdiagonal NNZ
! iterate over all petsc rows and the nodes in this row
! if one is a ghost or interface nodes, increase offdiagonal NNZ
nnz_new = 0
o_nnz_new = 0
do IPpetsc = 1, nNodesWithoutInterfaceGhosts
IP = PLO2ALO(IPpetsc-1)+1
do i = IA_P(IP)+1, IA_P(IP+1)
if(ALOold2ALO(JA(i)) .eq. -999) then
o_nnz_new = o_nnz_new + 1
else
nnz_new = nnz_new + 1
endif
end do
end do
! write(DBG%FHNDL,*) rank, "petsc_small nnz_new", nnz_new, " old", NNZ
! write(DBG%FHNDL,*) rank, "o_nnz_new", o_nnz_new
! we have now for every node their connected nodes
! iterate over connNode array to create IA and JA
allocate(IA_petsc_small(nNodesWithoutInterfaceGhosts+1), &
& oIA_petsc_small(nNodesWithoutInterfaceGhosts+1), &
& toSort(maxNumConnNode), o_toSort(maxNumConnNode), &
& AsparApp2Petsc_small(NNZ), oAsparApp2Petsc_small(NNZ), &
& stat=istat)
if(istat /= 0) CALL WWM_ABORT('allocation error in wwm_petsc_block 6')
IA_petsc_small = 0
oIA_petsc_small = 0
AsparApp2Petsc_small = -999
oAsparApp2Petsc_small = -999
! to create IA_petsc JA_petsc we have to iterate over all nodes and
! their connected nodes and map to petsc order.
! the node numbers of the connected nodes in petsc order are not sorted.
! sort them with a simple bubble sort. yes, bubble sort is vey slow,
! but we have only a few numbers to sort (max 10 i assume).
J = 0
oJ = 0
do IPpetsc = 1, nNodesWithoutInterfaceGhosts
IP = PLO2ALO(IPpetsc-1)+1
! fill with the largest numner petscInt can hold
toSort(:)%id = HUGE(0)
nToSort = 0
o_toSort(:)%id = HUGE(0)
o_nToSort = 0
! over all nodes in this row
do i = IA_P(IP)+1, IA_P(IP+1)
! found a ghost node, treat them special
if(ALOold2ALO(JA(i)) .eq. -999) then
o_ntoSort = o_ntoSort + 1
!> \todo offdiagonal part with petsc global order? don't know why but it seems to work
o_toSort(o_nToSort)%id = AGO2PGO(iplg(JA(i))-1)
! maybe because ALO2PLO has wrong values for offsubmatrix (ghost) nodes? so sorting is not possible
! o_toSort(o_nToSort).id = ALO2PLO(JA_P( i ))
! store the old position in ASPAR
o_toSort(o_nToSort)%userData = i
! not a ghost node
else
nToSort = nToSort + 1
! petsc local node number to sort for
toSort(nToSort)%id = ALO2PLO(JA_P( i ))
! store the old position in ASPAR
toSort(nToSort)%userData = i
end if
end do
call bubbleSort(toSort, nToSort)
call bubbleSort(o_toSort, o_nToSort)
! write the sorted cols to the mappings
do i = 1, nToSort
! rein app aspar position. raus petsc aspar position
AsparApp2Petsc_small(toSort(i)%userData) = J
J = J + 1
end do
IA_petsc_small(IPpetsc+1) = IA_petsc_small(IPpetsc) + nToSort
do i = 1, o_nToSort
oAsparApp2Petsc_small(o_toSort(i)%userData) = oJ
oJ = oJ + 1
end do
oIA_petsc_small(IPpetsc+1) = oIA_petsc_small(IPpetsc) + o_nToSort
end do
deallocate(toSort, o_toSort, stat=istat)
if(istat /= 0) CALL WWM_ABORT('allocation error in wwm_petsc_block 7')
end SUBROUTINE
# endif
!**********************************************************************
!* *
!**********************************************************************
!> copy the old solution back to the petsc myX vector
SUBROUTINE useOldSolution()
use datapool, only: NUMSIG, NUMDIR, MNP, AC2, RKIND, np, iplg
use petscsys
use petscmat
use petscpool
implicit none
PetscScalar :: eEntry
PetscInt :: rowGlobal
integer :: IP, IPglobal, IDD, ISS
! fill X vector
! iterate over all resident (and interface) nodes
! map it to petsc global ordering
! map it to block index
! and insert the value from AC2 into X vector
eEntry = 0;
call VecSet(myX, eEntry, petscErr);CHKERRQ(petscErr)
do IP = 1, np ! over all local nodes
! this is a interface node (row). ignore it. just increase counter
if(ALOold2ALO(IP) .eq. -999) then
cycle
end if
IPglobal = AGO2PGO( iplg(IP)-1 ) + 1 ! map to petsc global order
do IDD = 1, NUMDIR
do ISS = 1, NUMSIG
rowGlobal = toRowIndex(IPglobal, ISS, IDD)
eEntry = AC2(ISS, IDD,IP)
call VecSetValue(myX, rowGlobal, eEntry, ADD_VALUES, petscErr);CHKERRQ(petscErr)
end do
end do
end do
call VecAssemblyBegin(myX, petscErr);CHKERRQ(petscErr)
call VecAssemblyEnd(myX, petscErr);CHKERRQ(petscErr)
end SUBROUTINE
!**********************************************************************
!* *
!**********************************************************************
!> calc only rhs and fill direct into petsc myB
!> use newer code from fluct
SUBROUTINE calcB
use datapool, only: NUMSIG, NUMDIR, MNP, INE, ONESIXTH, ONETHIRD
use datapool, only : IOBPD, IOBWB, DEP, DMIN, CCON, IE_CELL
use datapool, only : TRIA, LBCWA, LBCSP, LINHOM, IWBMNP, IOBDP
use datapool, only : IWBNDLC, WBAC, SI, ICOMP, SMETHOD
use datapool, only : PHIA, DT4A, MAXMNECON, AC2, RKIND
use datapool, only : TWO, RKIND, iplg, exchange_p2d
USE DATAPOOL, ONLY : SOURCE_IMPL
use petscpool
use petscsys
use petscvec
implicit none
integer :: IP, IDD, ISS, IPpetsc
INTEGER :: I
INTEGER :: IPGL1, IE
integer idx
! to temp store the element areas
real(rkind) :: TRIA03arr(MAXMNECON)
PetscScalar :: value
value = 0;
call VecSet(myB, value, petscErr);CHKERRQ(petscErr)
call VecGetArrayF90(myB, myBtemp, petscErr);CHKERRQ(petscErr)
do IP = 1, MNP
! this is a interface node (row). ignore it.
if(ALOold2ALO(IP) .eq. -999) then
cycle
end if
IPpetsc = ALO2PLO(IP-1) +1
! temp store all element areas connected to IP
do I = 1, CCON(IP)
IE = IE_CELL(Jcum(IP) + I)
TRIA03arr(I) = ONETHIRD * TRIA(IE)
end do
! wenn der knoten tief genug liegt und was mitm rand ist, dann alle richtungen/frequenezn auf ihn loslassen
!if(IOBWB(IP) .EQ. 1) then
do ISS = 1, NUMSIG
do IDD = 1, NUMDIR
! wenn der Knoten irgend ne randbedingung erfuellt,dann alte loesung mit dreieckflaeche verrechnen
idx=toRowIndex(IPpetsc, ISS, IDD) + 1
!if(IOBPD(IDD,IP) .EQ. 1) then
! value = SUM(TRIA03arr(1:CCON(IP)) * AC2(ISS, IDD,IP))
value = SUM(TRIA03arr(1:CCON(IP)) * AC2(ISS, IDD, IP))*IOBPD(IDD,IP)*IOBDP(IP)*IOBWB(IP)
! IP in Petsc local order
myBtemp(idx) = value + myBtemp(idx)
! wenn der Knoten die Randbedingun nicht erfuellt, dann setze ihn fuer diese richtung null
!else
! myBtemp(idx) = 0
!endif
end do ! IDD
end do ! ISS
! set node to 0 for all frequency/directions
!else
! value = 0.
! do ISS = 1, NUMSIG
! do IDD = 1, NUMDIR
! myBtemp(toRowIndex(IPpetsc, ISS, IDD) + 1) = 0
! end do
! end do
!endif
end do ! IP
if (LBCWA .OR. LBCSP) then
if (LINHOM) then
do IP = 1, IWBMNP
IPGL1 = IWBNDLC(IP)
if(ALOold2ALO(IPGL1) .eq. -999) cycle ! this is a interface node (row). ignore it
IPpetsc = ALO2PLO(IPGL1-1) + 1
do ISS = 1, NUMSIG ! over all frequency
do IDD = 1, NUMDIR ! over all directions
myBtemp(toRowIndex(IPpetsc, ISS, IDD) + 1) = SI(IPGL1) * WBAC(ISS,IDD,IP)
end do ! NUMDIR
end do ! NUMSIG
end do ! IP
else ! LINHOM
do IP = 1, IWBMNP
IPGL1 = IWBNDLC(IP)
if(ALOold2ALO(IPGL1) .eq. -999) cycle ! this is a interface node (row). ignore it. just increase counter
IPpetsc = ALO2PLO(IPGL1-1) + 1
do ISS = 1, NUMSIG ! over all frequency
do IDD = 1, NUMDIR ! over all directions
myBtemp(toRowIndex(IPpetsc, ISS, IDD) + 1) = SI(IPGL1) * WBAC(ISS,IDD,1)
end do ! NUMDIR
end do ! NUMSIG
end do ! IP
endif ! LINHOM
end if
!AR: This is not nice to many useless if ... endif must be cleaned and tested
if(ICOMP .GE. 2 .AND. SMETHOD .GT. 0) then ! ! nur wenn wind und so, schleife auch ausfuehren...
do IP = 1, MNP
if(ALOold2ALO(IP) .eq. -999) cycle ! this is a interface node (row). ignore it. just increase counter
if (IOBWB(IP) .EQ. 1) then
IPpetsc = ALO2PLO(IP-1) + 1
do ISS = 1, NUMSIG ! over all frequency
do IDD = 1, NUMDIR ! over all directions
if(IOBPD(IDD,IP) .EQ. 1) then
IF (SOURCE_IMPL) THEN
value = PHIA(IP,ISS,IDD) * DT4A * SI(IP) ! Add source term to the right hand side
idx=toRowIndex(IPpetsc, ISS, IDD) + 1
myBtemp(idx) = value + myBtemp(idx)
END IF
endif
end do
end do
end if
end do
endif
call VecRestoreArrayF90(myB, myBtemp, petscErr)
CHKERRQ(petscErr)
call VecAssemblyBegin(myB, petscErr);CHKERRQ(petscErr)
call VecAssemblyEnd(myB, petscErr);CHKERRQ(petscErr)
end SUBROUTINE
!**********************************************************************
!* *
!**********************************************************************
!> assembling the linear equation system
!> wite direct into the petsc matrix. with openmp improvement
#ifndef DIRECT_METHOD
SUBROUTINE calcASPARomp(IP)
use datapool, only: NUMSIG, NUMDIR, MNP, INE, myrank
use datapool, only: ONESIXTH, ONETHIRD, ZERO, ONE
use datapool, only: THR, IEN, CCON, IE_CELL, POS_CELL, TRIA
use datapool, only: DT4A, POSI, ZERO, ONE, TWO
use datapool, only: IWBMNP, IWBNDLC, IOBPD, ICOMP, SMETHOD
use datapool, only: IOBWB, DEP, DMIN, MAXMNECON, TWO
use datapool, only: IOBP, I_DIAG, SI, LBCSP, LBCWA, LINHOM
use datapool, only: NP_RES, NNZ, MNE, WBAC
use datapool, only: rkind, np_global, np, npg, inp, iplg
use datapool, only: DT4F, DS_INCR, DT4D, DDIR, TAIL_ARR
use petscpool
implicit none
integer, intent(in) :: IP
integer :: IDD, ISS, IPpetsc
integer :: I
integer :: IPGL1, IE, POS
integer :: I1, I2, I3, idxpos
integer :: POS_TRICK(3,2)
real(rkind) :: DTK, TMP3
real(rkind) :: LAMBDA(2, MAXMNECON)
real(rkind) :: FL11(MAXMNECON), FL12(MAXMNECON)
real(rkind) :: FL21(MAXMNECON), FL22(MAXMNECON)
real(rkind) :: FL31(MAXMNECON), FL32(MAXMNECON)
real(rkind) :: CRFS(3, MAXMNECON), K(3, MAXMNECON)
real(rkind) :: KP(3,MAXMNECON)
real(rkind) :: KM(3, MAXMNECON)
real(rkind) :: K1
real(rkind) :: DELTAL(3,MAXMNECON)
real(rkind) :: NM(MAXMNECON)
! uncomment this for CADVXY2
! store all node numbers for CADVXY2
!> \todo MAXMNECON*3 is too much. we only need maxNumConnNode
! integer :: nodeList(MAXMNECON*3)
! number of nodes in the nodeList array
! integer :: nodeListSize
! real(kind=rkind) :: C(2, MAXMNECON*3, NUMDIR)
! uncomment this for CADVXY3
! element numbers to compute for
integer :: elementList(MAXMNECON)
! nukber of element in the elementList array
integer :: elementListSize
! store the result from CADVXY3. one frequency, all directions and conn nodes from IP
! first index: x/y
! second index: conn nodes from IP. Three nodes per element. This is not optimal. Some nodes are calculated twice
! size of array C1-C3 ca 15kb. Fit into a cache line :)
real(kind=rkind) :: C1(2, MAXMNECON, NUMDIR)
real(kind=rkind) :: C2(2, MAXMNECON, NUMDIR)
real(kind=rkind) :: C3(2, MAXMNECON, NUMDIR)
PetscScalar value1, value2, value3
! to temporays save some values.
integer :: IEarr(MAXMNECON), POSarr(MAXMNECON)
integer :: I1arr(MAXMNECON), I2arr(MAXMNECON), I3arr(MAXMNECON)
real(kind=rkind) :: TRIA03arr(MAXMNECON)
! number of elements connected to a node
integer :: NECON
! posistion in aspar for big matrix. see function aspar2petscAspar
integer :: petscAsparPosi1, petscAsparPosi2, petscAsparPosi3
integer :: petscAsparPosi4, idx
! number of connected nodes for IPpetsc
integer :: nConnNode
POS_TRICK(1,1) = 2
POS_TRICK(1,2) = 3
POS_TRICK(2,1) = 3
POS_TRICK(2,2) = 1
POS_TRICK(3,1) = 1
POS_TRICK(3,2) = 2
I = 0
IPGL1 = 0
IE = 0
POS = 0
I1 = 0
I2 = 0
I3 = 0
DTK = 0
TMP3 = 0
FL11 = 0
FL12 = 0
FL21 = 0
FL22 = 0
FL31 = 0
FL32 = 0
CRFS = 0
K1 = 0
! this is an ghost or interface node. ignore it
if(ALO2PLO(IP-1) .lt. 0) then
return
endif
IPpetsc = ALO2PLO(IP-1)+1
! update position in petsc aspar array
petscAsparPosi1 = NUMSIG*NUMDIR * IA_petsc_small(IPpetsc)
nConnNode = IA_petsc_small(IPpetsc+1) - IA_petsc_small(IPpetsc)
! uncomment this for CADVXY2
! nodeList = 0
! nodeListSize = CCON(IP) * 3
! uncomment this for CADVXY3
elementListSize = CCON(IP)
! loop over all connectec elements and nodes from IP
! and temporays save some values
! To fill the matrix we loop over MNP NUMSIG NUMDIR. The following values
! dosen't change in the NUMSIG NUMDIR loop so we can temporays store them.