这是一个简单的代码从书 PETSC偏微分方程:C和Python 中的数值解。它计算atan(x)
的零点。如果我在一个进程中运行它,它就能正常运行。但是,如果我在两个进程中运行,它就不会运行。我真的不知道怎么了。有人有什么想法吗?
下面是代码:
#include <petsc.h>
extern PetscErrorCode FormFunction(SNES, Vec, Vec, void*);
int main(int argc,char **argv) {
PetscErrorCode ierr;
SNES snes; // nonlinear solver
Vec x, r; // solution, residual vectors
PetscReal x0 = 2.0;
PetscInitialize(&argc,&argv,(char*)0,help);
ierr = PetscOptionsBegin(PETSC_COMM_WORLD,"","options to atan","");CHKERRQ(ierr);
ierr = PetscOptionsReal("-x0","initial value","atan.c",x0,&x0,NULL); CHKERRQ(ierr);
ierr = PetscOptionsEnd();CHKERRQ(ierr);
ierr = VecCreate(PETSC_COMM_WORLD,&x); CHKERRQ(ierr);
ierr = VecSetSizes(x,PETSC_DECIDE,1); CHKERRQ(ierr);
ierr = VecSetFromOptions(x); CHKERRQ(ierr);
ierr = VecSet(x,x0); CHKERRQ(ierr);
ierr = VecDuplicate(x,&r); CHKERRQ(ierr);
ierr = SNESCreate(PETSC_COMM_WORLD,&snes); CHKERRQ(ierr);
ierr = SNESSetFunction(snes,r,FormFunction,NULL); CHKERRQ(ierr);
ierr = SNESSetFromOptions(snes); CHKERRQ(ierr);
ierr = SNESSolve(snes,NULL,x); CHKERRQ(ierr);
ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD); CHKERRQ(ierr);
VecDestroy(&x); VecDestroy(&r); SNESDestroy(&snes);
PetscFinalize();
return 0;
}
PetscErrorCode FormFunction(SNES snes, Vec x, Vec F, void *ctx) {
PetscErrorCode ierr;
const PetscReal *ax;
PetscReal *aF;
ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr);
ierr = VecGetArray(F,&aF);CHKERRQ(ierr);
aF[0] = atan(ax[0]);
ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr);
ierr = VecRestoreArray(F,&aF);CHKERRQ(ierr);
return 0;
}
下面是makefile:
include ${PETSC_DIR}/lib/petsc/conf/variables
include ${PETSC_DIR}/lib/petsc/conf/rules
atan: atan.o chkopts;-${FLINKER} -o atan atan.o ${PETSC_LIB}; ${RM} atan.o
我用来在一个进程中运行的命令:
~/petsc/arch-darwin-c-opt/bin/mpiexec -n 1 ./atan -snes_fd
这导致:
Vec Object: 1 MPI processes
type: seq
7.55724e-16
我用来在两个进程中运行的命令:
~/petsc/arch-darwin-c-opt/bin/mpiexec -n 2 ./atan -snes_fd
在这种情况下,结果是:
[1]PETSC ERROR: ------------------------------------------------------------------------
[1]PETSC ERROR: Caught signal number 11 SEGV: Segmentation Violation, probably memory access out of range
[1]PETSC ERROR: Try option -start_in_debugger or -on_error_attach_debugger
[1]PETSC ERROR: or see https://www.mcs.anl.gov/petsc/documentation/faq.html#valgrind
[1]PETSC ERROR: or try http://valgrind.org on GNU/linux and Apple Mac OS X to find memory corruption errors
[1]PETSC ERROR: configure using --with-debugging=yes, recompile, link, and run
[1]PETSC ERROR: to get more information on the crash.
[1]PETSC ERROR: --------------------- Error Message --------------------------------------------------------------
[1]PETSC ERROR: Signal received
[1]PETSC ERROR: See https://www.mcs.anl.gov/petsc/documentation/faq.html for trouble shooting.
[1]PETSC ERROR: Petsc Release Version 3.12.4, Feb, 04, 2020
[1]PETSC ERROR: ./atan on a named e5705c1d6678 by Unknown Fri Jun 9 11:40:40 2023
[1]PETSC ERROR: Configure options --build=x86_64-linux-gnu --prefix=/usr --includedir=${prefix}/include --mandir=${prefix}/share/man --infodir=${prefix}/share/info --sysconfdir=/etc --localstatedir=/var --with-silent-rules=0 --libdir=${prefix}/lib/x86_64-linux-gnu --runstatedir=/run --with-maintainer-mode=0 --with-dependency-tracking=0 --with-debugging=0 --shared-library-extension=_real --with-shared-libraries --with-pic=1 --with-cc=mpicc --with-cxx=mpicxx --with-fc=mpif90 --with-cxx-dialect=C++11 --with-opencl=1 --with-blas-lib=-lblas --with-lapack-lib=-llapack --with-scalapack=1 --with-scalapack-lib=-lscalapack-openmpi --with-mumps=1 --with-mumps-include="[]" --with-mumps-lib="-ldmumps -lzmumps -lsmumps -lcmumps -lmumps_common -lpord" --with-suitesparse=1 --with-suitesparse-include=/usr/include/suitesparse --with-suitesparse-lib="-lumfpack -lamd -lcholmod -lklu" --with-ptscotch=1 --with-ptscotch-include=/usr/include/scotch --with-ptscotch-lib="-lptesmumps -lptscotch -lptscotcherr" --with-fftw=1 --with-fftw-include="[]" --with-fftw-lib="-lfftw3 -lfftw3_mpi" --with-superlu=1 --with-superlu-include=/usr/include/superlu --with-superlu-lib=-lsuperlu --with-superlu_dist=1 --with-superlu_dist-include=/usr/include/superlu-dist --with-superlu_dist-lib=-lsuperlu_dist --with-hdf5-include=/usr/include/hdf5/openmpi --with-hdf5-lib="-L/usr/lib/x86_64-linux-gnu/hdf5/openmpi -L/usr/lib/openmpi/lib -lhdf5 -lmpi" --CXX_LINKER_FLAGS=-Wl,--no-as-needed --with-hypre=1 --with-hypre-include=/usr/include/hypre --with-hypre-lib=-lHYPRE_core --prefix=/usr/lib/petscdir/petsc3.12/x86_64-linux-gnu-real --PETSC_ARCH=x86_64-linux-gnu-real CFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -fPIC" CXXFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -fPIC" FCFLAGS="-g -O2 -fstack-protector-strong -fPIC -ffree-line-length-0" FFLAGS="-g -O2 -fstack-protector-strong -fPIC -ffree-line-length-0" CPPFLAGS="-Wdate-time -D_FORTIFY_SOURCE=2" LDFLAGS="-Wl,-Bsymbolic-functions -Wl,-z,relro -fPIC" MAKEFLAGS=w
[1]PETSC ERROR: #1 User provided function() line 0 in unknown file
--------------------------------------------------------------------------
MPI_ABORT was invoked on rank 1 in communicator MPI_COMM_WORLD
with errorcode 59.
NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes.
You may or may not see output from other processes, depending on
exactly when Open MPI kills them.
--------------------------------------------------------------------------
[0]PETSC ERROR: ------------------------------------------------------------------------
[0]PETSC ERROR: Caught signal number 15 Terminate: Some process (or the batch system) has told this process to end
[0]PETSC ERROR: Try option -start_in_debugger or -on_error_attach_debugger
[0]PETSC ERROR: or see https://www.mcs.anl.gov/petsc/documentation/faq.html#valgrind
[0]PETSC ERROR: or try http://valgrind.org on GNU/linux and Apple Mac OS X to find memory corruption errors
[0]PETSC ERROR: configure using --with-debugging=yes, recompile, link, and run
[0]PETSC ERROR: to get more information on the crash.
[0]PETSC ERROR: --------------------- Error Message --------------------------------------------------------------
[0]PETSC ERROR: Signal received
[0]PETSC ERROR: See https://www.mcs.anl.gov/petsc/documentation/faq.html for trouble shooting.
[0]PETSC ERROR: Petsc Release Version 3.12.4, Feb, 04, 2020
[0]PETSC ERROR: ./atan on a named e5705c1d6678 by Unknown Fri Jun 9 11:40:40 2023
[0]PETSC ERROR: Configure options --build=x86_64-linux-gnu --prefix=/usr --includedir=${prefix}/include --mandir=${prefix}/share/man --infodir=${prefix}/share/info --sysconfdir=/etc --localstatedir=/var --with-silent-rules=0 --libdir=${prefix}/lib/x86_64-linux-gnu --runstatedir=/run --with-maintainer-mode=0 --with-dependency-tracking=0 --with-debugging=0 --shared-library-extension=_real --with-shared-libraries --with-pic=1 --with-cc=mpicc --with-cxx=mpicxx --with-fc=mpif90 --with-cxx-dialect=C++11 --with-opencl=1 --with-blas-lib=-lblas --with-lapack-lib=-llapack --with-scalapack=1 --with-scalapack-lib=-lscalapack-openmpi --with-mumps=1 --with-mumps-include="[]" --with-mumps-lib="-ldmumps -lzmumps -lsmumps -lcmumps -lmumps_common -lpord" --with-suitesparse=1 --with-suitesparse-include=/usr/include/suitesparse --with-suitesparse-lib="-lumfpack -lamd -lcholmod -lklu" --with-ptscotch=1 --with-ptscotch-include=/usr/include/scotch --with-ptscotch-lib="-lptesmumps -lptscotch -lptscotcherr" --with-fftw=1 --with-fftw-include="[]" --with-fftw-lib="-lfftw3 -lfftw3_mpi" --with-superlu=1 --with-superlu-include=/usr/include/superlu --with-superlu-lib=-lsuperlu --with-superlu_dist=1 --with-superlu_dist-include=/usr/include/superlu-dist --with-superlu_dist-lib=-lsuperlu_dist --with-hdf5-include=/usr/include/hdf5/openmpi --with-hdf5-lib="-L/usr/lib/x86_64-linux-gnu/hdf5/openmpi -L/usr/lib/openmpi/lib -lhdf5 -lmpi" --CXX_LINKER_FLAGS=-Wl,--no-as-needed --with-hypre=1 --with-hypre-include=/usr/include/hypre --with-hypre-lib=-lHYPRE_core --prefix=/usr/lib/petscdir/petsc3.12/x86_64-linux-gnu-real --PETSC_ARCH=x86_64-linux-gnu-real CFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -fPIC" CXXFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -fPIC" FCFLAGS="-g -O2 -fstack-protector-strong -fPIC -ffree-line-length-0" FFLAGS="-g -O2 -fstack-protector-strong -fPIC -ffree-line-length-0" CPPFLAGS="-Wdate-time -D_FORTIFY_SOURCE=2" LDFLAGS="-Wl,-Bsymbolic-functions -Wl,-z,relro -fPIC" MAKEFLAGS=w
[0]PETSC ERROR: #1 User provided function() line 0 in unknown file
[e5705c1d6678:03125] 1 more process has sent help message help-mpi-api.txt / mpi-abort
[e5705c1d6678:03125] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages
[warn] Epoll MOD(1) on fd 23 failed. Old events were 6; read change was 0 (none); write change was 2 (del); close
我使用最新的Petsc版本(我刚刚安装)在MacOS中运行它。我也在Google Colab中运行它,结果完全相同。您可以看到Colab文件here。
有人能解释一下为什么会发生这种情况吗?
1条答案
按热度按时间vjrehmav1#
这段代码,我认为是书中的c/ch 4/solns/atan. c(即练习4.4)是用牛顿法求解一维方程。因此,Vec只有一个条目。PETSc Vecs每个进程至少需要一个条目,尽管这里的问题只发生在SNESSolve()中。如果你注解掉SNESSolve()行,那么它就不会出现故障,当然你也不会解出方程。请注意,没有理由并行运行此示例。