Actual source code: fninvsqrt.c

slepc-3.16.1 2021-11-17
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2021, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Inverse square root function  x^(-1/2)
 12: */

 14: #include <slepc/private/fnimpl.h>
 15: #include <slepcblaslapack.h>

 17: PetscErrorCode FNEvaluateFunction_Invsqrt(FN fn,PetscScalar x,PetscScalar *y)
 18: {
 20:   if (x==0.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Function not defined in the requested value");
 21: #if !defined(PETSC_USE_COMPLEX)
 22:   if (x<0.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Function not defined in the requested value");
 23: #endif
 24:   *y = 1.0/PetscSqrtScalar(x);
 25:   return(0);
 26: }

 28: PetscErrorCode FNEvaluateDerivative_Invsqrt(FN fn,PetscScalar x,PetscScalar *y)
 29: {
 31:   if (x==0.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
 32: #if !defined(PETSC_USE_COMPLEX)
 33:   if (x<0.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
 34: #endif
 35:   *y = -1.0/(2.0*PetscPowScalarReal(x,1.5));
 36:   return(0);
 37: }

 39: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Schur(FN fn,Mat A,Mat B)
 40: {
 42:   PetscBLASInt   n=0,ld,*ipiv,info;
 43:   PetscScalar    *Ba,*Wa;
 44:   PetscInt       m;
 45:   Mat            W;

 48:   FN_AllocateWorkMat(fn,A,&W);
 49:   if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
 50:   MatDenseGetArray(B,&Ba);
 51:   MatDenseGetArray(W,&Wa);
 52:   /* compute B = sqrtm(A) */
 53:   MatGetSize(A,&m,NULL);
 54:   PetscBLASIntCast(m,&n);
 55:   ld = n;
 56:   FNSqrtmSchur(fn,n,Ba,n,PETSC_FALSE);
 57:   /* compute B = A\B */
 58:   PetscMalloc1(ld,&ipiv);
 59:   PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
 60:   SlepcCheckLapackInfo("gesv",info);
 61:   PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
 62:   PetscFree(ipiv);
 63:   MatDenseRestoreArray(W,&Wa);
 64:   MatDenseRestoreArray(B,&Ba);
 65:   FN_FreeWorkMat(fn,&W);
 66:   return(0);
 67: }

 69: PetscErrorCode FNEvaluateFunctionMatVec_Invsqrt_Schur(FN fn,Mat A,Vec v)
 70: {
 72:   PetscBLASInt   n=0,ld,*ipiv,info,one=1;
 73:   PetscScalar    *Ba,*Wa;
 74:   PetscInt       m;
 75:   Mat            B,W;

 78:   FN_AllocateWorkMat(fn,A,&B);
 79:   FN_AllocateWorkMat(fn,A,&W);
 80:   MatDenseGetArray(B,&Ba);
 81:   MatDenseGetArray(W,&Wa);
 82:   /* compute B_1 = sqrtm(A)*e_1 */
 83:   MatGetSize(A,&m,NULL);
 84:   PetscBLASIntCast(m,&n);
 85:   ld = n;
 86:   FNSqrtmSchur(fn,n,Ba,n,PETSC_TRUE);
 87:   /* compute B_1 = A\B_1 */
 88:   PetscMalloc1(ld,&ipiv);
 89:   PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&one,Wa,&ld,ipiv,Ba,&ld,&info));
 90:   SlepcCheckLapackInfo("gesv",info);
 91:   PetscFree(ipiv);
 92:   MatDenseRestoreArray(W,&Wa);
 93:   MatDenseRestoreArray(B,&Ba);
 94:   MatGetColumnVector(B,v,0);
 95:   FN_FreeWorkMat(fn,&W);
 96:   FN_FreeWorkMat(fn,&B);
 97:   return(0);
 98: }

100: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_DBP(FN fn,Mat A,Mat B)
101: {
103:   PetscBLASInt   n=0;
104:   PetscScalar    *T;
105:   PetscInt       m;

108:   if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
109:   MatDenseGetArray(B,&T);
110:   MatGetSize(A,&m,NULL);
111:   PetscBLASIntCast(m,&n);
112:   FNSqrtmDenmanBeavers(fn,n,T,n,PETSC_TRUE);
113:   MatDenseRestoreArray(B,&T);
114:   return(0);
115: }

117: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_NS(FN fn,Mat A,Mat B)
118: {
120:   PetscBLASInt   n=0;
121:   PetscScalar    *T;
122:   PetscInt       m;

125:   if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
126:   MatDenseGetArray(B,&T);
127:   MatGetSize(A,&m,NULL);
128:   PetscBLASIntCast(m,&n);
129:   FNSqrtmNewtonSchulz(fn,n,T,n,PETSC_TRUE);
130:   MatDenseRestoreArray(B,&T);
131:   return(0);
132: }

134: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Sadeghi(FN fn,Mat A,Mat B)
135: {
137:   PetscBLASInt   n=0,ld,*ipiv,info;
138:   PetscScalar    *Ba,*Wa;
139:   PetscInt       m;
140:   Mat            W;

143:   FN_AllocateWorkMat(fn,A,&W);
144:   if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
145:   MatDenseGetArray(B,&Ba);
146:   MatDenseGetArray(W,&Wa);
147:   /* compute B = sqrtm(A) */
148:   MatGetSize(A,&m,NULL);
149:   PetscBLASIntCast(m,&n);
150:   ld = n;
151:   FNSqrtmSadeghi(fn,n,Ba,n);
152:   /* compute B = A\B */
153:   PetscMalloc1(ld,&ipiv);
154:   PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
155:   SlepcCheckLapackInfo("gesv",info);
156:   PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
157:   PetscFree(ipiv);
158:   MatDenseRestoreArray(W,&Wa);
159:   MatDenseRestoreArray(B,&Ba);
160:   FN_FreeWorkMat(fn,&W);
161:   return(0);
162: }

164: #if defined(PETSC_HAVE_CUDA)
165: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_NS_CUDA(FN fn,Mat A,Mat B)
166: {
168:   PetscBLASInt   n=0;
169:   PetscScalar    *Ba;
170:   PetscInt       m;

173:   if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
174:   MatDenseGetArray(B,&Ba);
175:   MatGetSize(A,&m,NULL);
176:   PetscBLASIntCast(m,&n);
177:   FNSqrtmNewtonSchulz_CUDA(fn,n,Ba,n,PETSC_TRUE);
178:   MatDenseRestoreArray(B,&Ba);
179:   return(0);
180: }

182: #if defined(PETSC_HAVE_MAGMA)
183: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_DBP_CUDAm(FN fn,Mat A,Mat B)
184: {
186:   PetscBLASInt   n=0;
187:   PetscScalar    *T;
188:   PetscInt       m;

191:   if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
192:   MatDenseGetArray(B,&T);
193:   MatGetSize(A,&m,NULL);
194:   PetscBLASIntCast(m,&n);
195:   FNSqrtmDenmanBeavers_CUDAm(fn,n,T,n,PETSC_TRUE);
196:   MatDenseRestoreArray(B,&T);
197:   return(0);
198: }

200: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Sadeghi_CUDAm(FN fn,Mat A,Mat B)
201: {
203:   PetscBLASInt   n=0,ld,*ipiv,info;
204:   PetscScalar    *Ba,*Wa;
205:   PetscInt       m;
206:   Mat            W;

209:   FN_AllocateWorkMat(fn,A,&W);
210:   if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
211:   MatDenseGetArray(B,&Ba);
212:   MatDenseGetArray(W,&Wa);
213:   /* compute B = sqrtm(A) */
214:   MatGetSize(A,&m,NULL);
215:   PetscBLASIntCast(m,&n);
216:   ld = n;
217:   FNSqrtmSadeghi_CUDAm(fn,n,Ba,n);
218:   /* compute B = A\B */
219:   PetscMalloc1(ld,&ipiv);
220:   PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
221:   SlepcCheckLapackInfo("gesv",info);
222:   PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
223:   PetscFree(ipiv);
224:   MatDenseRestoreArray(W,&Wa);
225:   MatDenseRestoreArray(B,&Ba);
226:   FN_FreeWorkMat(fn,&W);
227:   return(0);
228: }
229: #endif /* PETSC_HAVE_MAGMA */
230: #endif /* PETSC_HAVE_CUDA */

232: PetscErrorCode FNView_Invsqrt(FN fn,PetscViewer viewer)
233: {
235:   PetscBool      isascii;
236:   char           str[50];
237:   const char     *methodname[] = {
238:                   "Schur method for inv(A)*sqrtm(A)",
239:                   "Denman-Beavers (product form)",
240:                   "Newton-Schulz iteration",
241:                   "Sadeghi iteration"
242: #if defined(PETSC_HAVE_CUDA)
243:                  ,"Newton-Schulz iteration CUDA"
244: #if defined(PETSC_HAVE_MAGMA)
245:                  ,"Denman-Beavers (product form) CUDA/MAGMA",
246:                   "Sadeghi iteration CUDA/MAGMA"
247: #endif
248: #endif
249:   };
250:   const int      nmeth=sizeof(methodname)/sizeof(methodname[0]);

253:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
254:   if (isascii) {
255:     if (fn->beta==(PetscScalar)1.0) {
256:       if (fn->alpha==(PetscScalar)1.0) {
257:         PetscViewerASCIIPrintf(viewer,"  Inverse square root: x^(-1/2)\n");
258:       } else {
259:         SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
260:         PetscViewerASCIIPrintf(viewer,"  Inverse square root: (%s*x)^(-1/2)\n",str);
261:       }
262:     } else {
263:       SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE);
264:       if (fn->alpha==(PetscScalar)1.0) {
265:         PetscViewerASCIIPrintf(viewer,"  Inverse square root: %s*x^(-1/2)\n",str);
266:       } else {
267:         PetscViewerASCIIPrintf(viewer,"  Inverse square root: %s",str);
268:         PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
269:         SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
270:         PetscViewerASCIIPrintf(viewer,"*(%s*x)^(-1/2)\n",str);
271:         PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
272:       }
273:     }
274:     if (fn->method<nmeth) {
275:       PetscViewerASCIIPrintf(viewer,"  computing matrix functions with: %s\n",methodname[fn->method]);
276:     }
277:   }
278:   return(0);
279: }

281: SLEPC_EXTERN PetscErrorCode FNCreate_Invsqrt(FN fn)
282: {
284:   fn->ops->evaluatefunction          = FNEvaluateFunction_Invsqrt;
285:   fn->ops->evaluatederivative        = FNEvaluateDerivative_Invsqrt;
286:   fn->ops->evaluatefunctionmat[0]    = FNEvaluateFunctionMat_Invsqrt_Schur;
287:   fn->ops->evaluatefunctionmat[1]    = FNEvaluateFunctionMat_Invsqrt_DBP;
288:   fn->ops->evaluatefunctionmat[2]    = FNEvaluateFunctionMat_Invsqrt_NS;
289:   fn->ops->evaluatefunctionmat[3]    = FNEvaluateFunctionMat_Invsqrt_Sadeghi;
290: #if defined(PETSC_HAVE_CUDA)
291:   fn->ops->evaluatefunctionmat[4]    = FNEvaluateFunctionMat_Invsqrt_NS_CUDA;
292: #if defined(PETSC_HAVE_MAGMA)
293:   fn->ops->evaluatefunctionmat[5]    = FNEvaluateFunctionMat_Invsqrt_DBP_CUDAm;
294:   fn->ops->evaluatefunctionmat[6]    = FNEvaluateFunctionMat_Invsqrt_Sadeghi_CUDAm;
295: #endif /* PETSC_HAVE_MAGMA */
296: #endif /* PETSC_HAVE_CUDA */
297:   fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Invsqrt_Schur;
298:   fn->ops->view                      = FNView_Invsqrt;
299:   return(0);
300: }