From ce11a7e8ea83cf0e4838e56eb0ab95439e68f64f Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 4 Oct 2024 11:40:48 +0000 Subject: [PATCH 001/122] feat: adding env vars needed for multinode --- src/ansys/mapdl/core/launcher.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 349602c667f..97375fb8890 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1768,6 +1768,18 @@ def launch_mapdl( f"The machine has {machine_cores} cores. PyMAPDL is asking for {nproc} cores." ) + # Setting env vars + env_vars = update_env_vars(add_env_vars, replace_env_vars) + + if ON_SLURM: + if not env_vars: + env_vars = {} + + env_vars.setdefault("ANS_CMD_NODIAG", "TRUE") + # Passing env vars for MAPDL run on multiple nodes + env_vars.setdefault("ANS_MULTIPLE_NODES", "1") + env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") + start_parm.update( { "exec_file": exec_file, From 61ad61beb942234e19ba6041c2e4d976e5ed397c Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 4 Oct 2024 11:40:48 +0000 Subject: [PATCH 002/122] feat: adding env vars needed for multinode --- src/ansys/mapdl/core/launcher.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 349602c667f..97375fb8890 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1768,6 +1768,18 @@ def launch_mapdl( f"The machine has {machine_cores} cores. PyMAPDL is asking for {nproc} cores." ) + # Setting env vars + env_vars = update_env_vars(add_env_vars, replace_env_vars) + + if ON_SLURM: + if not env_vars: + env_vars = {} + + env_vars.setdefault("ANS_CMD_NODIAG", "TRUE") + # Passing env vars for MAPDL run on multiple nodes + env_vars.setdefault("ANS_MULTIPLE_NODES", "1") + env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") + start_parm.update( { "exec_file": exec_file, From e9b91d4bf527818ca6fee602606d190effa520c9 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 11:18:55 +0000 Subject: [PATCH 003/122] feat: renaming hpc detection argument --- src/ansys/mapdl/core/launcher.py | 36 ++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 97375fb8890..34aebe9d507 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1089,7 +1089,7 @@ def launch_mapdl( add_env_vars: Optional[Dict[str, str]] = None, replace_env_vars: Optional[Dict[str, str]] = None, version: Optional[Union[int, str]] = None, - detect_slurm_config: bool = True, + detect_HPC: bool = True, **kwargs: Dict[str, Any], ) -> Union[MapdlGrpc, "MapdlConsole"]: """Start MAPDL locally. @@ -1118,12 +1118,15 @@ def launch_mapdl( MAPDL jobname. Defaults to ``'file'``. nproc : int, optional - Number of processors. Defaults to 2. + Number of processors. Defaults to 2. If running on an HPC cluster, + this value is adjusted to the number of CPUs allocated to the job, + unless ``detect_HPC`` is set to "false". ram : float, optional - Total size in megabytes of the workspace (memory) used for the initial allocation. - The default is ``None``, in which case 2 GB (2048 MB) is used. To force a fixed size - throughout the run, specify a negative number. + Total size in megabytes of the workspace (memory) used for the initial + allocation. The default is ``None``, in which case 2 GB (2048 MB) is + used. To force a fixed size throughout the run, specify a negative + number. mode : str, optional Mode to launch MAPDL. Must be one of the following: @@ -1276,6 +1279,13 @@ def launch_mapdl( export PYMAPDL_MAPDL_VERSION=22.2 + detect_HPC: bool, optional + Whether detect if PyMAPDL is running on an HPC cluster or not. Currently + only SLURM clusters are supported. By detaul, it is set to true. + This option can be bypassed if the environment variable + ``PYMAPDL_ON_SLURM`` is set to "true". For more information visit + :ref:`ref_hpc_slurm`. + kwargs : dict, optional These keyword arguments are interface specific or for development purposes. See Notes for more details. @@ -1447,6 +1457,12 @@ def launch_mapdl( "ANSYSLMD_LICENSE_FILE":"1055@MYSERVER"} >>> mapdl = launch_mapdl(replace_env_vars=my_env_vars) """ + # Checking specific env var + if not nproc: + nproc = os.environ.get("PYMAPDL_NPROC", None) + if nproc: + nproc = int(nproc) + # By default ON_SLURM = os.environ.get("PYMAPDL_ON_SLURM", None) if ON_SLURM is None: @@ -1462,7 +1478,7 @@ def launch_mapdl( and bool(os.environ.get("SLURM_JOB_ID", "")) ) - if detect_slurm_config and ON_SLURM: + if detect_HPC and ON_SLURM: LOG.info("On Slurm mode.") # extracting parameters @@ -2134,7 +2150,7 @@ def get_value( # ntasks is for mpi SLURM_NTASKS = get_value("SLURM_NTASKS", kwargs) LOG.info(f"SLURM_NTASKS: {SLURM_NTASKS}") - # Sharing tasks acrros multiple nodes (DMP) + # Sharing tasks across multiple nodes (DMP) # the format of this envvar is a bit tricky. Avoiding it for the moment. # SLURM_TASKS_PER_NODE = int( # kwargs.pop( @@ -2178,12 +2194,6 @@ def get_value( jobname = os.environ.get("SLURM_JOB_NAME", "file") LOG.info(f"Using jobname: {jobname}") - # Checking specific env var - if not nproc: - nproc = os.environ.get("PYMAPDL_NPROC", None) - if nproc: - nproc = int(nproc) - if not nproc: ## Attempt to calculate the appropriate number of cores: # Reference: https://stackoverflow.com/a/51141287/6650211 From c714d39e6def24794d86ddd6c429430fb443fdf7 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 11:22:00 +0000 Subject: [PATCH 004/122] docs: adding documentation --- .../extended_examples/hpc/hpc_ml_ga.rst | 2 +- doc/source/user_guide/hpc/examples.rst | 6 + doc/source/user_guide/hpc/pymapdl.rst | 219 ++++++++---------- doc/source/user_guide/hpc/settings.rst | 135 +++++++++++ doc/source/user_guide/hpc/troubleshooting.rst | 110 +++++++-- doc/source/user_guide/index.rst | 2 + 6 files changed, 320 insertions(+), 154 deletions(-) create mode 100644 doc/source/user_guide/hpc/examples.rst create mode 100644 doc/source/user_guide/hpc/settings.rst diff --git a/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst b/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst index 30570b5c6b9..fb87bb7e6d4 100644 --- a/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst +++ b/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst @@ -251,7 +251,7 @@ this script. If you have problems when creating the virtual environment or accessing it from the compute nodes, - see :ref:`ref_hpc_pymapdl_job`. + see :ref:`ref_hpc_troubleshooting`. 3. Install the requirements for this example from the :download:`requirements.txt ` file. diff --git a/doc/source/user_guide/hpc/examples.rst b/doc/source/user_guide/hpc/examples.rst new file mode 100644 index 00000000000..bcd96ebc8b8 --- /dev/null +++ b/doc/source/user_guide/hpc/examples.rst @@ -0,0 +1,6 @@ + +Examples +======== + +For an example that uses a machine learning genetic algorithm in +an HPC system managed by SLURM scheduler, see :ref:`hpc_ml_ga_example`. diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index f921e0b9d32..c70628f3a7a 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -1,138 +1,130 @@ -.. _ref_hpc_pymapdl: - - -============================= -PyMAPDL on SLURM HPC clusters -============================= .. _ref_hpc_pymapdl_job: -Submit a PyMAPDL job -==================== +======================= +PyMAPDL on HPC Clusters +======================= -Using PyMAPDL in an HPC environment managed by SLURM scheduler has certain requirements: -* **An Ansys installation must be accessible from all the compute nodes**. - This normally implies that the ``ANSYS`` installation directory is in a - shared drive or directory. Your HPC cluster administrator - should provide you with the path to the ``ANSYS`` directory. +Introduction +============ -* **A compatible Python installation must be accessible from all the compute nodes**. - For compatible Python versions, see :ref:`ref_pymapdl_installation`. +PyMAPDL communicates with MAPDL using the gRPC protocol. +This protocol offers many advantages and features, for more information +see :ref:`ref_project_page`. +One of these features is that it is not required to have both, +PyMAPDL and MAPDL processes, running on the same machine. +This possibility open the door to many configurations, depending +on whether you run them both or not on the HPC compute nodes. +Additionally, you might to be able interact with them (``interactive`` mode) +or not (``batch`` mode). -Additionally, you must perform a few key steps to ensure efficient job -execution and resource utilization. Subsequent topics describe these steps. +Currently, the supported configurations are: -Check the Python installation ------------------------------ +* `Submit a PyMAPDL batch job to the cluster from the entrypoint node` -The PyMAPDL Python package (``ansys-mapdl-core``) must be installed in a virtual -environment that is accessible from the compute nodes. -To see where your Python distribution is installed, use this code: +Since v0.68.5, PyMAPDL can take advantage of the tigh integration +between the scheduler and MAPDL to read the job configuration and +launch an MAPDL instance that can use all the resources allocated +to that job. +For instance, if a SLURM job has allocated 8 nodes with 4 cores each, +then PyMAPDL will launch an MAPDL instance which will use 32 cores +spawning across those 8 nodes. +This behaviour can disabled if passing the environment variable +:envvar:`PYMAPDL_ON_SLURM` or passing the argument `detect_HPC=False` +to :func:`launch_mapdl() `. -.. code-block:: console - user@machine:~$ which python3 - /usr/bin/python3 -To print the version of Python you have available, use this code: +Submit a PyMAPDL batch job to the cluster from the entrypoint node +================================================================== -.. code-block:: console +Many HPC clusters allow their users to login in a machine using +``ssh``, ``vnc``, ``rdp``, or similar technologies and submit a job +to the cluster from there. +This entrypoint machine, sometimes known as *head node* or *entrypoint node*, +might be a virtual machine (VDI/VM). - user@machine:~$ python3 --version - Python 3.9.16 +In such cases, once the Python virtual environment with PyMAPDL is already +set and is accessible to all the compute nodes, launching a +PyMAPDL job is very easy to do using ``sbatch`` command. +No changes are needed on a PyMAPDL script to run it on an SLURM cluster. -You should be aware that your machine might have installed other Python versions. -To find out if those installations are already in the ``PATH`` environment variable, -you can press the **Tab** key to use autocomplete: +First the virtual environment must be activated in the current terminal. .. code-block:: console - user@machine:~$ which python3[TAB] - python3 python3-intel64 python3.10-config python3.11 python3.12 python3.8 python3.8-intel64 python3.9-config - python3-config python3.10 python3.10-intel64 python3.11-config python3.12-config python3.8-config python3.9 - $ which python3.10 - /usr/bin/python3.10 + user@entrypoint-machine:~$ export VENV_PATH=/my/path/to/the/venv + user@entrypoint-machine:~$ source $VENV_PATH/bin/activate -You should use a Python version that is compatible with PyMAPDL. -For more information, see :ref:`ref_pymapdl_installation`. +Once the virtual environment has been activated, you can launch any Python +script if they do have the proper Python shebang (``#!/usr/bin/env python3``). -The ``which`` command returns the path where the Python executable is installed. -You can use that executable to create your own Python virtual environment in a directory -that is accessible from all the compute nodes. -For most HPC clusters, the ``/home/$user`` directory is generally available to all nodes. -You can then create the virtual environment in the ``/home/user/.venv`` directory: +For instance, to launch the following Python script ``main.py``: -.. code-block:: console +.. code-block:: python + :caption: ``main.py`` file + + #!/usr/bin/env python3 - user@machine:~$ python3 -m venv /home/user/.venv + from ansys.mapdl.core import launch_mapdl -After activating the virtual environment, you can install PyMAPDL. + mapdl = launch_mapdl(run_location="/home/ubuntu/tmp/tmp/mapdl", loglevel="debug") + print(mapdl.prep7()) + print(f'Number of CPUs: {mapdl.get_value("ACTIVE", 0, "NUMCPU")}') -Install PyMAPDL ---------------- + mapdl.exit() -To install PyMAPDL on the activated virtual environment, run the following commands: +You can just run in your console: .. code-block:: console - user@machine:~$ source /home/user/.venv/bin/activate - (.venv) user@machine:~$ pip install ansys-mapdl-core - Collecting ansys-mapdl-core - Downloading ansys_mapdl_core-0.68.1-py3-none-any.whl (26.9 MB) - ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 26.9/26.9 MB 37.3 MB/s eta 0:00:00 - Collecting pexpect>=4.8.0 - Using cached pexpect-4.9.0-py2.py3-none-any.whl (63 kB) - Collecting click>=8.1.3 - ... + (venv) user@entrypoint-machine:~$ sbatch main.py -To test if this virtual environment is accessible from the compute nodes, -run this ``test.sh`` bash script: +Alternatively, you can remove the shebang from the python file and use a +Python executable call: -.. code-block:: bash +.. code-block:: console + + (venv) user@entrypoint-machine:~$ sbatch python main.py - #!/bin/bash - #SBATCH --job-name=myjob - #SBATCH --nodes=1 - #SBATCH --ntasks-per-node=4 - #SBATCH --time=01:00:00 +Additionally, you can can change the amount of cores used in your +job, by setting the :envvar:`PYMAPDL_NPROC` to the desired value. + +.. code-block:: console - # Commands to run - echo "Testing Python!" - source /home/user/.venv/bin/activate - python -c "from ansys.mapdl import core;print(f'PyMAPDL version {core.__version__} was successfully imported.')" + (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch main.py -then you can run that script using: +You can also add ``sbatch`` options to the command: .. code-block:: console - user@machine:~$ srun test.sh + (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch main.py -This command might take a minute or two to complete, depending on the amount of free -resources available in the cluster. -On the console, you should see this output: -.. code-block:: text +For instance, to launch a PyMAPDL job which start a four cores MAPDL instance +on a 10 CPUs SLURM job, you can use: + +.. code-block:: bash - Testing Python! - PyMAPDL version 0.68.1 was successfully imported. + (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch --partition=qsmall --nodes=10 --ntasks-per-node=1 main.py -If you see an error in the output, see :ref:`ref_hpc_troubleshooting`, especially -:ref:`ref_python_venv_not_accesible`. -Submit a PyMAPDL job --------------------- +Using a submission script +------------------------- -To submit a PyMAPDL job, you must create two files: +In case you need to customize more your job, you can create a SLURM +submission script to submit a PyMAPDL job. +In this case, you must create two files: - Python script with the PyMAPDL code -- Bash script that activates the virtual environment and calls the Python script - -**Python script:** ``pymapdl_script.py`` +- Bash script that activates the virtual environment and calls the + Python script. .. code-block:: python + :caption: ``main.py`` python script from ansys.mapdl.core import launch_mapdl @@ -147,59 +139,30 @@ To submit a PyMAPDL job, you must create two files: mapdl.exit() -**Bash script:** ``job.sh`` - .. code-block:: bash + :caption: ``job.sh`` execution script - source /home/user/.venv/bin/activate - python pymapdl_script.py + source /home/user/.venv/bin/activate + python main.py To start the simulation, you use this code: -.. code-block:: console - - user@machine:~$ srun job.sh - - -The bash script allows you to customize the environment before running the Python script. -This bash script performs such tasks as creating environment variables, moving to -different directories, and printing to ensure your configuration is correct. However, -this bash script is not mandatory. -You can avoid having the ``job.sh`` bash script if the virtual environment is activated -and you pass all the environment variables to the job: - -.. code-block:: console - - user@machine:~$ source /home/user/.venv/bin/activate - (.venv) user@machine:~$ srun python pymapdl_script.py --export=ALL - - -The ``--export=ALL`` argument might not be needed, depending on the cluster configuration. -Furthermore, you can omit the Python call in the preceding command if you include the -Python shebang (``#!/usr/bin/python3``) in the first line of the ``pymapdl_script.py`` script. - -.. code-block:: console - - user@machine:~$ source /home/user/.venv/bin/activate - (.venv) user@machine:~$ srun pymapdl_script.py --export=ALL - -If you prefer to run the job in the background, you can use the ``sbatch`` -command instead of the ``srun`` command. However, in this case, the Bash file is needed: - .. code-block:: console user@machine:~$ sbatch job.sh - Submitted batch job 1 -Here is the expected output of the job: +In this case, the Python virtual environment does not need to be activated +before submission since it is activated later in the script. + +The expected output of the job is .. code-block:: text Number of CPUs: 10.0 -Examples -======== - -For an example that uses a machine learning genetic algorithm in -an HPC system managed by SLURM scheduler, see :ref:`hpc_ml_ga_example`. +The bash script allows you to customize the environment before running the +Python script. +This bash script performs tasks such as creating environment variables, +moving files to different directories, and printing to ensure your +configuration is correct. diff --git a/doc/source/user_guide/hpc/settings.rst b/doc/source/user_guide/hpc/settings.rst new file mode 100644 index 00000000000..f4366ab6f05 --- /dev/null +++ b/doc/source/user_guide/hpc/settings.rst @@ -0,0 +1,135 @@ +.. _ref_setting_pymapdl_on_hpc: + +=============== +Setting PyMAPDL +=============== + +Requirements +============ + +Using PyMAPDL in an HPC environment managed by SLURM scheduler has certain +requirements: + +* **An Ansys installation must be accessible from all the compute nodes**. + This normally implies that the ``ANSYS`` installation directory is in a + shared drive or directory. Your HPC cluster administrator + should provide you with the path to the ``ANSYS`` directory. + +* **A compatible Python installation must be accessible from all the compute + nodes**. + For compatible Python versions, see :ref:`ref_pymapdl_installation`. + +Additionally, you must perform a few key steps to ensure efficient job +execution and resource utilization. Subsequent topics describe these steps. + +Check the Python installation +============================= + +The PyMAPDL Python package (``ansys-mapdl-core``) must be installed in +a virtual environment that is accessible from the compute nodes. + +To see where your Python distribution is installed, use this code: + +.. code-block:: console + + user@machine:~$ which python3 + /usr/bin/python3 + +To print the version of Python you have available, use this code: + +.. code-block:: console + + user@machine:~$ python3 --version + Python 3.9.16 + +You should be aware that your machine might have other Python versions +installed. +To find out if those installations are already in the ``PATH`` environment +variable, you can press the **Tab** key to use autocomplete: + +.. code-block:: console + + user@machine:~$ which python3[TAB] + python3 python3-intel64 python3.10-config python3.11 python3.12 python3.8 python3.8-intel64 python3.9-config + python3-config python3.10 python3.10-intel64 python3.11-config python3.12-config python3.8-config python3.9 + $ which python3.10 + /usr/bin/python3.10 + +You should use a Python version that is compatible with PyMAPDL. +For more information, see :ref:`ref_pymapdl_installation`. + +.. warning:: + + Contact your cluster administrator if you cannot find a Python version + compatible with PyMAPDL. + + +The ``which`` command returns the path where the Python executable is +installed. +You can use that executable to create your own Python virtual environment +in a directory that is accessible from all the compute nodes. +For most HPC clusters, the ``/home/$user`` directory is generally available +to all nodes. +You can then create the virtual environment in the ``/home/user/.venv`` +directory: + +.. code-block:: console + + user@machine:~$ python3 -m venv /home/user/.venv + +After activating the virtual environment, you can install PyMAPDL. + +.. _ref_install_pymapdl_on_hpc: + +Install PyMAPDL +=============== + +To install PyMAPDL on the activated virtual environment, run the following +commands: + +.. code-block:: console + + user@machine:~$ source /home/user/.venv/bin/activate + (.venv) user@machine:~$ pip install ansys-mapdl-core + Collecting ansys-mapdl-core + Downloading ansys_mapdl_core-0.68.1-py3-none-any.whl (26.9 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 26.9/26.9 MB 37.3 MB/s eta 0:00:00 + Collecting pexpect>=4.8.0 + Using cached pexpect-4.9.0-py2.py3-none-any.whl (63 kB) + Collecting click>=8.1.3 + ... + +To test if this virtual environment is accessible from the compute nodes, +run this ``test.sh`` bash script: + +.. code-block:: bash + + #!/bin/bash + #SBATCH --job-name=myjob + #SBATCH --nodes=1 + #SBATCH --ntasks-per-node=4 + #SBATCH --time=01:00:00 + + # Commands to run + echo "Testing Python!" + source /home/user/.venv/bin/activate + python -c "from ansys.mapdl import core;print(f'PyMAPDL version {core.__version__} was successfully imported.')" + +then you can run that script using: + +.. code-block:: console + + user@machine:~$ srun test.sh + +This command might take a minute or two to complete, depending on the amount of +free resources available in the cluster. +On the console, you should see this output: + +.. code-block:: text + + Testing Python! + PyMAPDL version 0.68.1 was successfully imported. + +If you see an error in the output, see :ref:`ref_hpc_troubleshooting`, +especially :ref:`ref_python_venv_not_accesible`. + diff --git a/doc/source/user_guide/hpc/troubleshooting.rst b/doc/source/user_guide/hpc/troubleshooting.rst index 3a41a60537f..8f84a202df1 100644 --- a/doc/source/user_guide/hpc/troubleshooting.rst +++ b/doc/source/user_guide/hpc/troubleshooting.rst @@ -7,8 +7,19 @@ Troubleshooting Debugging jobs -------------- -- Use ``--output`` and ``--error`` directives in batch scripts to capture - standard output and error messages. +- Use ``--output`` and ``--error`` directives in batch scripts to captures + standard output and error messages to specific files. + + .. code-block:: bash + + #!/bin/bash + #SBATCH --job-name=ansys_job # Job name + #SBATCH --partition=qsmall # Specify the queue/partition name + #SBATCH --output=ansys_job.out # Standard output file + #SBATCH --error=ansys_job.err # Standard error file + + source /home/user/pymapdl/.venv/bin/activate + python /home/user/pymapdl.py - Check SLURM logs for error messages and debugging information. @@ -19,44 +30,90 @@ Python virtual environment is not accessible -------------------------------------------- If there is an error while testing the Python installation, it might mean that the Python environment is not accessible to the compute nodes. -For example, in the following output, PyMAPDL could not be found, meaning that the script -is not using the virtual environment (``/home/user/.venv``): +For example, given the following *bash* script `test.sh`: + +.. code-block:: bash + + source /home/user/.venv/bin/activate + python -c "from ansys.mapdl import core as pymapdl; pymapdl.report()" + +The following output is shown after running in the terminal: .. code-block:: console user@machine:~$ srun test.sh + Testing Python! Traceback (most recent call last): File "", line 1, in ImportError: No module named ansys.mapdl -This could be for a number of reasons. One of them is that the system Python distribution -used to create the virtual environment is not accessible from the compute nodes +As the output shows, PyMAPDL could not be found, meaning that either: +* The virtual environment does not have PyMAPDL installed. + See :ref:`ref_install_pymapdl_on_hpc`. +* Or the script did not activate properly the virtual environment + (``/home/user/.venv``). + +For the second reason, there could be a number of reasons. +One of them is that the system Python distribution used to create +the virtual environment is not accessible from the compute nodes due to one of these reasons: - The virtual environment has been created in a directory that is not accessible from the nodes. -- The virtual environment has been created from a Python - executable that is not available to the compute nodes. - Hence, the virtual environment is not activated. For - example, you might be creating the virtual environment - using Python 3.10, but only Python 3.8 is available - from the compute nodes. - -You can test which Python executable the cluster is using by starting an interactive session in -a compute node with this code: + In this case, your terminal might also show that the + ``activate`` file could not be found. + + .. code-block:: console + + user@machine:~$ srun test.sh + Testing Python! + bash: .venv/bin/activate: No such file or directory + + Depending on your terminal configuration, the above error might be sufficient + to exit the terminal process, or not. + If not, the execution will continue, and the subsequent ``python`` call will + be executed using the default python executable. + It is very likely that the default ``python`` executable does not have + PyMAPDL installed, hence the ``ImportError`` error showed above might appear + too. + +- The virtual environment has been created from a Python executable that is + not available to the compute nodes. Hence, the virtual environment is not + activated. + For example, you might be creating the virtual environment Using + Python 3.10, but only Python 3.8 is available from the compute nodes. + You can test which Python executable the cluster is using by starting an + interactive session in a compute node with this code to list all commands + which starts with ``python``: .. code-block:: console user@machine:~$ srun --pty /bin/bash - user@compute_node_01:~$ compgen -c | grep python # List all commands starting with python + user@compute_node_01:~$ compgen -c | grep python .. the approach to solve this comes from: https://stackoverflow.com/questions/64188693/problem-with-python-environment-and-slurm-srun-sbatch +It should be noticed the above approach assumes that all the nodes have similar +configuration, hence all of them should have the same Python installations +available. + +It is also convenient to be aware that environment variable modules can be +used to activate Python installations. +For more information, see :ref:`ref_envvar_modules_on_hpc`. + + +.. _ref_envvar_modules_on_hpc: + +Using modules to load Python +---------------------------- + Many HPC infrastructures use environment managers to load and unload -software packages using modules and environment variables. -Hence, you might want to make sure that the correct module is loaded in your script. +software packages using modules and environment variables. +Hence, you might want to make sure that the correct module is loaded in your +script. + For information on two of the most common environment managers, see the `Modules documentation `_ and `Lmod documentation `_. Check your cluster documentation to know which environment @@ -76,12 +133,14 @@ Using the Ansys-provided Python installation **For development purposes only** -In certain HPC environments the possibility of installing a different Python version -is limited for security reasons. In such cases, the Python distribution available in -the Ansys installation can be used. -This Python distribution is a customized Python (CPython) -version for Ansys products use only. Its use is **discouraged** -except for very advanced users and special use cases. +In certain HPC environments the possibility of installing a different Python +version is limited for security reasons. +In such cases, the Python distribution available in the Ansys installation +can be used. +This Python distribution is a customized Python (CPython) version for Ansys +products use only. +Its use is **discouraged** except for very advanced users and special use +cases. This Python distribution is in the following directory, where ``%MAPDL_VERSION%`` is the three-digit Ansys version: @@ -98,7 +157,8 @@ For example, here is the directory for Ansys 2024 R2: In Ansys 2024 R1 and later, the unified installer includes CPython 3.10. -Earlier versions include CPython 3.7 (``/commonfiles/CPython/3_7/linx64/Release/python``). +Earlier versions include CPython 3.7 +(``/commonfiles/CPython/3_7/linx64/Release/python``). Because the Ansys installation must be available to all the compute nodes to run simulations using them, this diff --git a/doc/source/user_guide/index.rst b/doc/source/user_guide/index.rst index ef12344826d..c750478f745 100644 --- a/doc/source/user_guide/index.rst +++ b/doc/source/user_guide/index.rst @@ -51,7 +51,9 @@ This section provides a general overview of PyMAPDL and how you use it. :caption: High performance computing hpc/introduction + hpc/settings hpc/pymapdl + hpc/examples hpc/troubleshooting From 492345ba5ebf1872b3abad301ab7b4169f2dfddb Mon Sep 17 00:00:00 2001 From: pyansys-ci-bot <92810346+pyansys-ci-bot@users.noreply.github.com> Date: Mon, 7 Oct 2024 11:34:40 +0000 Subject: [PATCH 005/122] chore: adding changelog file 3466.documentation.md --- doc/changelog.d/3466.documentation.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/changelog.d/3466.documentation.md diff --git a/doc/changelog.d/3466.documentation.md b/doc/changelog.d/3466.documentation.md new file mode 100644 index 00000000000..902767602d1 --- /dev/null +++ b/doc/changelog.d/3466.documentation.md @@ -0,0 +1 @@ +feat: passing tight integration env vars to mapdl \ No newline at end of file From a289dabaaab87bed43b802400d8e2c27c2efea92 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 4 Oct 2024 11:40:48 +0000 Subject: [PATCH 006/122] feat: adding env vars needed for multinode --- src/ansys/mapdl/core/launcher.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 349602c667f..97375fb8890 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1768,6 +1768,18 @@ def launch_mapdl( f"The machine has {machine_cores} cores. PyMAPDL is asking for {nproc} cores." ) + # Setting env vars + env_vars = update_env_vars(add_env_vars, replace_env_vars) + + if ON_SLURM: + if not env_vars: + env_vars = {} + + env_vars.setdefault("ANS_CMD_NODIAG", "TRUE") + # Passing env vars for MAPDL run on multiple nodes + env_vars.setdefault("ANS_MULTIPLE_NODES", "1") + env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") + start_parm.update( { "exec_file": exec_file, From 604bbf8dd0ba21731e08b5ea757757808b2aa945 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 11:18:55 +0000 Subject: [PATCH 007/122] feat: renaming hpc detection argument --- src/ansys/mapdl/core/launcher.py | 36 ++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 97375fb8890..34aebe9d507 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1089,7 +1089,7 @@ def launch_mapdl( add_env_vars: Optional[Dict[str, str]] = None, replace_env_vars: Optional[Dict[str, str]] = None, version: Optional[Union[int, str]] = None, - detect_slurm_config: bool = True, + detect_HPC: bool = True, **kwargs: Dict[str, Any], ) -> Union[MapdlGrpc, "MapdlConsole"]: """Start MAPDL locally. @@ -1118,12 +1118,15 @@ def launch_mapdl( MAPDL jobname. Defaults to ``'file'``. nproc : int, optional - Number of processors. Defaults to 2. + Number of processors. Defaults to 2. If running on an HPC cluster, + this value is adjusted to the number of CPUs allocated to the job, + unless ``detect_HPC`` is set to "false". ram : float, optional - Total size in megabytes of the workspace (memory) used for the initial allocation. - The default is ``None``, in which case 2 GB (2048 MB) is used. To force a fixed size - throughout the run, specify a negative number. + Total size in megabytes of the workspace (memory) used for the initial + allocation. The default is ``None``, in which case 2 GB (2048 MB) is + used. To force a fixed size throughout the run, specify a negative + number. mode : str, optional Mode to launch MAPDL. Must be one of the following: @@ -1276,6 +1279,13 @@ def launch_mapdl( export PYMAPDL_MAPDL_VERSION=22.2 + detect_HPC: bool, optional + Whether detect if PyMAPDL is running on an HPC cluster or not. Currently + only SLURM clusters are supported. By detaul, it is set to true. + This option can be bypassed if the environment variable + ``PYMAPDL_ON_SLURM`` is set to "true". For more information visit + :ref:`ref_hpc_slurm`. + kwargs : dict, optional These keyword arguments are interface specific or for development purposes. See Notes for more details. @@ -1447,6 +1457,12 @@ def launch_mapdl( "ANSYSLMD_LICENSE_FILE":"1055@MYSERVER"} >>> mapdl = launch_mapdl(replace_env_vars=my_env_vars) """ + # Checking specific env var + if not nproc: + nproc = os.environ.get("PYMAPDL_NPROC", None) + if nproc: + nproc = int(nproc) + # By default ON_SLURM = os.environ.get("PYMAPDL_ON_SLURM", None) if ON_SLURM is None: @@ -1462,7 +1478,7 @@ def launch_mapdl( and bool(os.environ.get("SLURM_JOB_ID", "")) ) - if detect_slurm_config and ON_SLURM: + if detect_HPC and ON_SLURM: LOG.info("On Slurm mode.") # extracting parameters @@ -2134,7 +2150,7 @@ def get_value( # ntasks is for mpi SLURM_NTASKS = get_value("SLURM_NTASKS", kwargs) LOG.info(f"SLURM_NTASKS: {SLURM_NTASKS}") - # Sharing tasks acrros multiple nodes (DMP) + # Sharing tasks across multiple nodes (DMP) # the format of this envvar is a bit tricky. Avoiding it for the moment. # SLURM_TASKS_PER_NODE = int( # kwargs.pop( @@ -2178,12 +2194,6 @@ def get_value( jobname = os.environ.get("SLURM_JOB_NAME", "file") LOG.info(f"Using jobname: {jobname}") - # Checking specific env var - if not nproc: - nproc = os.environ.get("PYMAPDL_NPROC", None) - if nproc: - nproc = int(nproc) - if not nproc: ## Attempt to calculate the appropriate number of cores: # Reference: https://stackoverflow.com/a/51141287/6650211 From 1d296519e1758af6c09da85ff9ff5ecc3573a01b Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 11:22:00 +0000 Subject: [PATCH 008/122] docs: adding documentation --- .../extended_examples/hpc/hpc_ml_ga.rst | 2 +- doc/source/user_guide/hpc/pymapdl.rst | 168 +++++++++++++----- doc/source/user_guide/hpc/settings.rst | 49 +++-- doc/source/user_guide/hpc/troubleshooting.rst | 110 +++++++++--- 4 files changed, 244 insertions(+), 85 deletions(-) diff --git a/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst b/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst index 30570b5c6b9..fb87bb7e6d4 100644 --- a/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst +++ b/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst @@ -251,7 +251,7 @@ this script. If you have problems when creating the virtual environment or accessing it from the compute nodes, - see :ref:`ref_hpc_pymapdl_job`. + see :ref:`ref_hpc_troubleshooting`. 3. Install the requirements for this example from the :download:`requirements.txt ` file. diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index e0fddefa78a..c70628f3a7a 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -1,84 +1,168 @@ -.. _ref_hpc_pymapdl: +.. _ref_hpc_pymapdl_job: -============================= -PyMAPDL on SLURM HPC clusters -============================= +======================= +PyMAPDL on HPC Clusters +======================= -.. _ref_hpc_pymapdl_job: -Submit a PyMAPDL job -==================== +Introduction +============ -To submit a PyMAPDL job, you must create two files: +PyMAPDL communicates with MAPDL using the gRPC protocol. +This protocol offers many advantages and features, for more information +see :ref:`ref_project_page`. +One of these features is that it is not required to have both, +PyMAPDL and MAPDL processes, running on the same machine. +This possibility open the door to many configurations, depending +on whether you run them both or not on the HPC compute nodes. +Additionally, you might to be able interact with them (``interactive`` mode) +or not (``batch`` mode). -- Python script with the PyMAPDL code -- Bash script that activates the virtual environment and calls the Python script +Currently, the supported configurations are: + +* `Submit a PyMAPDL batch job to the cluster from the entrypoint node` + + +Since v0.68.5, PyMAPDL can take advantage of the tigh integration +between the scheduler and MAPDL to read the job configuration and +launch an MAPDL instance that can use all the resources allocated +to that job. +For instance, if a SLURM job has allocated 8 nodes with 4 cores each, +then PyMAPDL will launch an MAPDL instance which will use 32 cores +spawning across those 8 nodes. +This behaviour can disabled if passing the environment variable +:envvar:`PYMAPDL_ON_SLURM` or passing the argument `detect_HPC=False` +to :func:`launch_mapdl() `. + + + +Submit a PyMAPDL batch job to the cluster from the entrypoint node +================================================================== + +Many HPC clusters allow their users to login in a machine using +``ssh``, ``vnc``, ``rdp``, or similar technologies and submit a job +to the cluster from there. +This entrypoint machine, sometimes known as *head node* or *entrypoint node*, +might be a virtual machine (VDI/VM). + +In such cases, once the Python virtual environment with PyMAPDL is already +set and is accessible to all the compute nodes, launching a +PyMAPDL job is very easy to do using ``sbatch`` command. +No changes are needed on a PyMAPDL script to run it on an SLURM cluster. + +First the virtual environment must be activated in the current terminal. + +.. code-block:: console + + user@entrypoint-machine:~$ export VENV_PATH=/my/path/to/the/venv + user@entrypoint-machine:~$ source $VENV_PATH/bin/activate -**Python script:** ``pymapdl_script.py`` +Once the virtual environment has been activated, you can launch any Python +script if they do have the proper Python shebang (``#!/usr/bin/env python3``). + +For instance, to launch the following Python script ``main.py``: .. code-block:: python + :caption: ``main.py`` file + + #!/usr/bin/env python3 from ansys.mapdl.core import launch_mapdl - # Number of processors must be lower than the - # number of CPUs allocated for the job. - mapdl = launch_mapdl(nproc=10) + mapdl = launch_mapdl(run_location="/home/ubuntu/tmp/tmp/mapdl", loglevel="debug") - mapdl.prep7() - n_proc = mapdl.get_value("ACTIVE", 0, "NUMCPU") - print(f"Number of CPUs: {n_proc}") + print(mapdl.prep7()) + print(f'Number of CPUs: {mapdl.get_value("ACTIVE", 0, "NUMCPU")}') mapdl.exit() +You can just run in your console: -**Bash script:** ``job.sh`` +.. code-block:: console -.. code-block:: bash + (venv) user@entrypoint-machine:~$ sbatch main.py - source /home/user/.venv/bin/activate - python pymapdl_script.py - -To start the simulation, you use this code: +Alternatively, you can remove the shebang from the python file and use a +Python executable call: .. code-block:: console - user@machine:~$ srun job.sh + (venv) user@entrypoint-machine:~$ sbatch python main.py + +Additionally, you can can change the amount of cores used in your +job, by setting the :envvar:`PYMAPDL_NPROC` to the desired value. + +.. code-block:: console + (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch main.py -The bash script allows you to customize the environment before running the Python script. -This bash script performs such tasks as creating environment variables, moving to -different directories, and printing to ensure your configuration is correct. However, -this bash script is not mandatory. -You can avoid having the ``job.sh`` bash script if the virtual environment is activated -and you pass all the environment variables to the job: +You can also add ``sbatch`` options to the command: .. code-block:: console - user@machine:~$ source /home/user/.venv/bin/activate - (.venv) user@machine:~$ srun python pymapdl_script.py --export=ALL + (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch main.py -The ``--export=ALL`` argument might not be needed, depending on the cluster configuration. -Furthermore, you can omit the Python call in the preceding command if you include the -Python shebang (``#!/usr/bin/python3``) in the first line of the ``pymapdl_script.py`` script. +For instance, to launch a PyMAPDL job which start a four cores MAPDL instance +on a 10 CPUs SLURM job, you can use: -.. code-block:: console +.. code-block:: bash + + (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch --partition=qsmall --nodes=10 --ntasks-per-node=1 main.py - user@machine:~$ source /home/user/.venv/bin/activate - (.venv) user@machine:~$ srun pymapdl_script.py --export=ALL -If you prefer to run the job in the background, you can use the ``sbatch`` -command instead of the ``srun`` command. However, in this case, the Bash file is needed: +Using a submission script +------------------------- + +In case you need to customize more your job, you can create a SLURM +submission script to submit a PyMAPDL job. +In this case, you must create two files: + +- Python script with the PyMAPDL code +- Bash script that activates the virtual environment and calls the + Python script. + +.. code-block:: python + :caption: ``main.py`` python script + + from ansys.mapdl.core import launch_mapdl + + # Number of processors must be lower than the + # number of CPUs allocated for the job. + mapdl = launch_mapdl(nproc=10) + + mapdl.prep7() + n_proc = mapdl.get_value("ACTIVE", 0, "NUMCPU") + print(f"Number of CPUs: {n_proc}") + + mapdl.exit() + + +.. code-block:: bash + :caption: ``job.sh`` execution script + + source /home/user/.venv/bin/activate + python main.py + +To start the simulation, you use this code: .. code-block:: console user@machine:~$ sbatch job.sh - Submitted batch job 1 -Here is the expected output of the job: +In this case, the Python virtual environment does not need to be activated +before submission since it is activated later in the script. + +The expected output of the job is .. code-block:: text Number of CPUs: 10.0 + +The bash script allows you to customize the environment before running the +Python script. +This bash script performs tasks such as creating environment variables, +moving files to different directories, and printing to ensure your +configuration is correct. diff --git a/doc/source/user_guide/hpc/settings.rst b/doc/source/user_guide/hpc/settings.rst index 7f6af61c63e..f4366ab6f05 100644 --- a/doc/source/user_guide/hpc/settings.rst +++ b/doc/source/user_guide/hpc/settings.rst @@ -7,14 +7,16 @@ Setting PyMAPDL Requirements ============ -Using PyMAPDL in an HPC environment managed by SLURM scheduler has certain requirements: +Using PyMAPDL in an HPC environment managed by SLURM scheduler has certain +requirements: * **An Ansys installation must be accessible from all the compute nodes**. This normally implies that the ``ANSYS`` installation directory is in a shared drive or directory. Your HPC cluster administrator should provide you with the path to the ``ANSYS`` directory. -* **A compatible Python installation must be accessible from all the compute nodes**. +* **A compatible Python installation must be accessible from all the compute + nodes**. For compatible Python versions, see :ref:`ref_pymapdl_installation`. Additionally, you must perform a few key steps to ensure efficient job @@ -23,8 +25,8 @@ execution and resource utilization. Subsequent topics describe these steps. Check the Python installation ============================= -The PyMAPDL Python package (``ansys-mapdl-core``) must be installed in a virtual -environment that is accessible from the compute nodes. +The PyMAPDL Python package (``ansys-mapdl-core``) must be installed in +a virtual environment that is accessible from the compute nodes. To see where your Python distribution is installed, use this code: @@ -40,9 +42,10 @@ To print the version of Python you have available, use this code: user@machine:~$ python3 --version Python 3.9.16 -You should be aware that your machine might have installed other Python versions. -To find out if those installations are already in the ``PATH`` environment variable, -you can press the **Tab** key to use autocomplete: +You should be aware that your machine might have other Python versions +installed. +To find out if those installations are already in the ``PATH`` environment +variable, you can press the **Tab** key to use autocomplete: .. code-block:: console @@ -55,11 +58,20 @@ you can press the **Tab** key to use autocomplete: You should use a Python version that is compatible with PyMAPDL. For more information, see :ref:`ref_pymapdl_installation`. -The ``which`` command returns the path where the Python executable is installed. -You can use that executable to create your own Python virtual environment in a directory -that is accessible from all the compute nodes. -For most HPC clusters, the ``/home/$user`` directory is generally available to all nodes. -You can then create the virtual environment in the ``/home/user/.venv`` directory: +.. warning:: + + Contact your cluster administrator if you cannot find a Python version + compatible with PyMAPDL. + + +The ``which`` command returns the path where the Python executable is +installed. +You can use that executable to create your own Python virtual environment +in a directory that is accessible from all the compute nodes. +For most HPC clusters, the ``/home/$user`` directory is generally available +to all nodes. +You can then create the virtual environment in the ``/home/user/.venv`` +directory: .. code-block:: console @@ -67,11 +79,13 @@ You can then create the virtual environment in the ``/home/user/.venv`` director After activating the virtual environment, you can install PyMAPDL. +.. _ref_install_pymapdl_on_hpc: Install PyMAPDL =============== -To install PyMAPDL on the activated virtual environment, run the following commands: +To install PyMAPDL on the activated virtual environment, run the following +commands: .. code-block:: console @@ -107,8 +121,8 @@ then you can run that script using: user@machine:~$ srun test.sh -This command might take a minute or two to complete, depending on the amount of free -resources available in the cluster. +This command might take a minute or two to complete, depending on the amount of +free resources available in the cluster. On the console, you should see this output: .. code-block:: text @@ -116,5 +130,6 @@ On the console, you should see this output: Testing Python! PyMAPDL version 0.68.1 was successfully imported. -If you see an error in the output, see :ref:`ref_hpc_troubleshooting`, especially -:ref:`ref_python_venv_not_accesible`. +If you see an error in the output, see :ref:`ref_hpc_troubleshooting`, +especially :ref:`ref_python_venv_not_accesible`. + diff --git a/doc/source/user_guide/hpc/troubleshooting.rst b/doc/source/user_guide/hpc/troubleshooting.rst index 3a41a60537f..8f84a202df1 100644 --- a/doc/source/user_guide/hpc/troubleshooting.rst +++ b/doc/source/user_guide/hpc/troubleshooting.rst @@ -7,8 +7,19 @@ Troubleshooting Debugging jobs -------------- -- Use ``--output`` and ``--error`` directives in batch scripts to capture - standard output and error messages. +- Use ``--output`` and ``--error`` directives in batch scripts to captures + standard output and error messages to specific files. + + .. code-block:: bash + + #!/bin/bash + #SBATCH --job-name=ansys_job # Job name + #SBATCH --partition=qsmall # Specify the queue/partition name + #SBATCH --output=ansys_job.out # Standard output file + #SBATCH --error=ansys_job.err # Standard error file + + source /home/user/pymapdl/.venv/bin/activate + python /home/user/pymapdl.py - Check SLURM logs for error messages and debugging information. @@ -19,44 +30,90 @@ Python virtual environment is not accessible -------------------------------------------- If there is an error while testing the Python installation, it might mean that the Python environment is not accessible to the compute nodes. -For example, in the following output, PyMAPDL could not be found, meaning that the script -is not using the virtual environment (``/home/user/.venv``): +For example, given the following *bash* script `test.sh`: + +.. code-block:: bash + + source /home/user/.venv/bin/activate + python -c "from ansys.mapdl import core as pymapdl; pymapdl.report()" + +The following output is shown after running in the terminal: .. code-block:: console user@machine:~$ srun test.sh + Testing Python! Traceback (most recent call last): File "", line 1, in ImportError: No module named ansys.mapdl -This could be for a number of reasons. One of them is that the system Python distribution -used to create the virtual environment is not accessible from the compute nodes +As the output shows, PyMAPDL could not be found, meaning that either: +* The virtual environment does not have PyMAPDL installed. + See :ref:`ref_install_pymapdl_on_hpc`. +* Or the script did not activate properly the virtual environment + (``/home/user/.venv``). + +For the second reason, there could be a number of reasons. +One of them is that the system Python distribution used to create +the virtual environment is not accessible from the compute nodes due to one of these reasons: - The virtual environment has been created in a directory that is not accessible from the nodes. -- The virtual environment has been created from a Python - executable that is not available to the compute nodes. - Hence, the virtual environment is not activated. For - example, you might be creating the virtual environment - using Python 3.10, but only Python 3.8 is available - from the compute nodes. - -You can test which Python executable the cluster is using by starting an interactive session in -a compute node with this code: + In this case, your terminal might also show that the + ``activate`` file could not be found. + + .. code-block:: console + + user@machine:~$ srun test.sh + Testing Python! + bash: .venv/bin/activate: No such file or directory + + Depending on your terminal configuration, the above error might be sufficient + to exit the terminal process, or not. + If not, the execution will continue, and the subsequent ``python`` call will + be executed using the default python executable. + It is very likely that the default ``python`` executable does not have + PyMAPDL installed, hence the ``ImportError`` error showed above might appear + too. + +- The virtual environment has been created from a Python executable that is + not available to the compute nodes. Hence, the virtual environment is not + activated. + For example, you might be creating the virtual environment Using + Python 3.10, but only Python 3.8 is available from the compute nodes. + You can test which Python executable the cluster is using by starting an + interactive session in a compute node with this code to list all commands + which starts with ``python``: .. code-block:: console user@machine:~$ srun --pty /bin/bash - user@compute_node_01:~$ compgen -c | grep python # List all commands starting with python + user@compute_node_01:~$ compgen -c | grep python .. the approach to solve this comes from: https://stackoverflow.com/questions/64188693/problem-with-python-environment-and-slurm-srun-sbatch +It should be noticed the above approach assumes that all the nodes have similar +configuration, hence all of them should have the same Python installations +available. + +It is also convenient to be aware that environment variable modules can be +used to activate Python installations. +For more information, see :ref:`ref_envvar_modules_on_hpc`. + + +.. _ref_envvar_modules_on_hpc: + +Using modules to load Python +---------------------------- + Many HPC infrastructures use environment managers to load and unload -software packages using modules and environment variables. -Hence, you might want to make sure that the correct module is loaded in your script. +software packages using modules and environment variables. +Hence, you might want to make sure that the correct module is loaded in your +script. + For information on two of the most common environment managers, see the `Modules documentation `_ and `Lmod documentation `_. Check your cluster documentation to know which environment @@ -76,12 +133,14 @@ Using the Ansys-provided Python installation **For development purposes only** -In certain HPC environments the possibility of installing a different Python version -is limited for security reasons. In such cases, the Python distribution available in -the Ansys installation can be used. -This Python distribution is a customized Python (CPython) -version for Ansys products use only. Its use is **discouraged** -except for very advanced users and special use cases. +In certain HPC environments the possibility of installing a different Python +version is limited for security reasons. +In such cases, the Python distribution available in the Ansys installation +can be used. +This Python distribution is a customized Python (CPython) version for Ansys +products use only. +Its use is **discouraged** except for very advanced users and special use +cases. This Python distribution is in the following directory, where ``%MAPDL_VERSION%`` is the three-digit Ansys version: @@ -98,7 +157,8 @@ For example, here is the directory for Ansys 2024 R2: In Ansys 2024 R1 and later, the unified installer includes CPython 3.10. -Earlier versions include CPython 3.7 (``/commonfiles/CPython/3_7/linx64/Release/python``). +Earlier versions include CPython 3.7 +(``/commonfiles/CPython/3_7/linx64/Release/python``). Because the Ansys installation must be available to all the compute nodes to run simulations using them, this From 96929a8f6302bfb855bbae492fa2e463cf923321 Mon Sep 17 00:00:00 2001 From: pyansys-ci-bot <92810346+pyansys-ci-bot@users.noreply.github.com> Date: Mon, 7 Oct 2024 11:34:40 +0000 Subject: [PATCH 009/122] chore: adding changelog file 3466.documentation.md --- doc/changelog.d/3466.documentation.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/changelog.d/3466.documentation.md diff --git a/doc/changelog.d/3466.documentation.md b/doc/changelog.d/3466.documentation.md new file mode 100644 index 00000000000..902767602d1 --- /dev/null +++ b/doc/changelog.d/3466.documentation.md @@ -0,0 +1 @@ +feat: passing tight integration env vars to mapdl \ No newline at end of file From 6ab1d65bc376ae823e79a39cbe043f74f10446f9 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 12:36:55 +0000 Subject: [PATCH 010/122] fix: vale issues --- doc/source/user_guide/hpc/pymapdl.rst | 16 ++++++++-------- doc/source/user_guide/hpc/troubleshooting.rst | 14 +++++++------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index c70628f3a7a..f24134509f9 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -29,9 +29,9 @@ between the scheduler and MAPDL to read the job configuration and launch an MAPDL instance that can use all the resources allocated to that job. For instance, if a SLURM job has allocated 8 nodes with 4 cores each, -then PyMAPDL will launch an MAPDL instance which will use 32 cores +then PyMAPDL launches an MAPDL instance which uses 32 cores spawning across those 8 nodes. -This behaviour can disabled if passing the environment variable +This behaviour can turn off if passing the environment variable :envvar:`PYMAPDL_ON_SLURM` or passing the argument `detect_HPC=False` to :func:`launch_mapdl() `. @@ -73,7 +73,7 @@ For instance, to launch the following Python script ``main.py``: mapdl = launch_mapdl(run_location="/home/ubuntu/tmp/tmp/mapdl", loglevel="debug") print(mapdl.prep7()) - print(f'Number of CPUs: {mapdl.get_value("ACTIVE", 0, "NUMCPU")}') + print(f'Number of CPU: {mapdl.get_value("ACTIVE", 0, "NUMCPU")}') mapdl.exit() @@ -90,7 +90,7 @@ Python executable call: (venv) user@entrypoint-machine:~$ sbatch python main.py -Additionally, you can can change the amount of cores used in your +Additionally, you can change the amount of cores used in your job, by setting the :envvar:`PYMAPDL_NPROC` to the desired value. .. code-block:: console @@ -105,7 +105,7 @@ You can also add ``sbatch`` options to the command: For instance, to launch a PyMAPDL job which start a four cores MAPDL instance -on a 10 CPUs SLURM job, you can use: +on a 10 CPU SLURM job, you can use: .. code-block:: bash @@ -129,12 +129,12 @@ In this case, you must create two files: from ansys.mapdl.core import launch_mapdl # Number of processors must be lower than the - # number of CPUs allocated for the job. + # number of CPU allocated for the job. mapdl = launch_mapdl(nproc=10) mapdl.prep7() n_proc = mapdl.get_value("ACTIVE", 0, "NUMCPU") - print(f"Number of CPUs: {n_proc}") + print(f"Number of CPU: {n_proc}") mapdl.exit() @@ -158,7 +158,7 @@ The expected output of the job is .. code-block:: text - Number of CPUs: 10.0 + Number of CPU: 10.0 The bash script allows you to customize the environment before running the diff --git a/doc/source/user_guide/hpc/troubleshooting.rst b/doc/source/user_guide/hpc/troubleshooting.rst index 8f84a202df1..9e79d0fc8ba 100644 --- a/doc/source/user_guide/hpc/troubleshooting.rst +++ b/doc/source/user_guide/hpc/troubleshooting.rst @@ -70,13 +70,13 @@ due to one of these reasons: Testing Python! bash: .venv/bin/activate: No such file or directory - Depending on your terminal configuration, the above error might be sufficient - to exit the terminal process, or not. - If not, the execution will continue, and the subsequent ``python`` call will - be executed using the default python executable. + Depending on your terminal configuration, the preceding error might be + sufficient to exit the terminal process, or not. + If not, the execution continues, and the subsequent ``python`` call is + executed using the default python executable. It is very likely that the default ``python`` executable does not have - PyMAPDL installed, hence the ``ImportError`` error showed above might appear - too. + PyMAPDL installed, hence the ``ImportError`` error showed preceding might + appear too. - The virtual environment has been created from a Python executable that is not available to the compute nodes. Hence, the virtual environment is not @@ -95,7 +95,7 @@ due to one of these reasons: .. the approach to solve this comes from: https://stackoverflow.com/questions/64188693/problem-with-python-environment-and-slurm-srun-sbatch -It should be noticed the above approach assumes that all the nodes have similar +It should be noticed the preceding approach assumes that all the nodes have similar configuration, hence all of them should have the same Python installations available. From e45d2e5d4fb97359605f445f462fa4b9cf76515a Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:35:36 +0200 Subject: [PATCH 011/122] chore: To fix sphinx build Squashed commit of the following: commit c1d1a3ea278e6461bcc91e1c965f6e6a46d00bc3 Author: German <28149841+germa89@users.noreply.github.com> Date: Mon Oct 7 15:33:19 2024 +0200 ci: retrigger CICD commit b7b5c30a422413d203a31f5a29b7e57f93a0ab08 Author: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon Oct 7 13:31:55 2024 +0000 ci: auto fixes from pre-commit.com hooks. for more information, see https://pre-commit.ci commit 32a1c0203fc5101f429aafafba26a28cc06bf24c Author: Revathy Venugopal <104772255+Revathyvenugopal162@users.noreply.github.com> Date: Mon Oct 7 15:31:24 2024 +0200 fix: add suggestions Co-authored-by: German <28149841+germa89@users.noreply.github.com> commit 575a219ef8b135b234f2ec5f24a9585298845eca Merge: f2afe139f be1be2e2c Author: Revathyvenugopal162 Date: Mon Oct 7 15:09:01 2024 +0200 Merge branch 'fix/add-build-cheatsheet-as-env-varaible' of https://github.com/ansys/pymapdl into fix/add-build-cheatsheet-as-env-varaible commit f2afe139f693f4f1979506662c514692280487a9 Author: Revathyvenugopal162 Date: Mon Oct 7 15:08:58 2024 +0200 fix: precommit commit be1be2e2ca4f8736db0b180ab3d8cc6bff696412 Author: pyansys-ci-bot <92810346+pyansys-ci-bot@users.noreply.github.com> Date: Mon Oct 7 13:07:35 2024 +0000 chore: adding changelog file 3468.fixed.md commit f052a4dba77cb586be59232d2627d7814077f094 Author: Revathyvenugopal162 Date: Mon Oct 7 15:05:56 2024 +0200 fix: add build cheatsheet as env variable within doc-build --- .github/workflows/ci.yml | 1 + doc/changelog.d/3468.fixed.md | 1 + doc/source/conf.py | 10 +++++++--- 3 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 doc/changelog.d/3468.fixed.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 30f13961753..543cc91b08c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,6 +34,7 @@ env: MAPDL_PACKAGE: ghcr.io/ansys/mapdl ON_CI: True PYTEST_ARGUMENTS: '-vvv -ra --durations=10 --maxfail=3 --reruns 3 --reruns-delay 4 --cov=ansys.mapdl.core --cov-report=html' + BUILD_CHEATSHEET: True # Following env vars when changed will "reset" the mentioned cache, # by changing the cache file name. It is rendered as ...-v%RESET_XXX%-... diff --git a/doc/changelog.d/3468.fixed.md b/doc/changelog.d/3468.fixed.md new file mode 100644 index 00000000000..ab369c1e41b --- /dev/null +++ b/doc/changelog.d/3468.fixed.md @@ -0,0 +1 @@ +fix: add ``build cheatsheet`` as env variable within doc-build \ No newline at end of file diff --git a/doc/source/conf.py b/doc/source/conf.py index f874f6f4c74..90bcebb79da 100755 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -311,13 +311,17 @@ "json_url": f"https://{cname}/versions.json", "version_match": switcher_version, }, - "cheatsheet": { +} + +BUILD_CHEATSHEET = os.environ.get("BUILD_CHEATSHEET", "true").lower() == "true" + +if BUILD_CHEATSHEET: + html_theme_options["cheatsheet"] = { "file": "cheat_sheet/cheat_sheet.qmd", "title": "PyMAPDL cheat sheet", "version": f"v{version}", "pages": ["getting_started/learning"], - }, -} + } html_context = { "display_github": True, # Integrate GitHub From bb2b90afbbb07fbd86069618d27f3276c32db726 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:56:36 +0200 Subject: [PATCH 012/122] docs: expanding a bit troubleshooting advices and small format fix --- doc/source/user_guide/hpc/pymapdl.rst | 11 ++++--- doc/source/user_guide/hpc/troubleshooting.rst | 31 ++++++++++++++++--- doc/source/user_guide/troubleshoot.rst | 1 + 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index f24134509f9..070cfdcda09 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -21,7 +21,7 @@ or not (``batch`` mode). Currently, the supported configurations are: -* `Submit a PyMAPDL batch job to the cluster from the entrypoint node` +* :ref:`ref_pymapdl_batch_in_cluster_hpc` Since v0.68.5, PyMAPDL can take advantage of the tigh integration @@ -36,6 +36,7 @@ This behaviour can turn off if passing the environment variable to :func:`launch_mapdl() `. +.. _ref_pymapdl_batch_in_cluster_hpc: Submit a PyMAPDL batch job to the cluster from the entrypoint node ================================================================== @@ -64,7 +65,7 @@ script if they do have the proper Python shebang (``#!/usr/bin/env python3``). For instance, to launch the following Python script ``main.py``: .. code-block:: python - :caption: ``main.py`` file + :caption: main.py #!/usr/bin/env python3 @@ -107,7 +108,7 @@ You can also add ``sbatch`` options to the command: For instance, to launch a PyMAPDL job which start a four cores MAPDL instance on a 10 CPU SLURM job, you can use: -.. code-block:: bash +.. code-block:: console (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch --partition=qsmall --nodes=10 --ntasks-per-node=1 main.py @@ -124,7 +125,7 @@ In this case, you must create two files: Python script. .. code-block:: python - :caption: ``main.py`` python script + :caption: main.py from ansys.mapdl.core import launch_mapdl @@ -140,7 +141,7 @@ In this case, you must create two files: .. code-block:: bash - :caption: ``job.sh`` execution script + :caption: job.sh source /home/user/.venv/bin/activate python main.py diff --git a/doc/source/user_guide/hpc/troubleshooting.rst b/doc/source/user_guide/hpc/troubleshooting.rst index 9e79d0fc8ba..528c00fea9d 100644 --- a/doc/source/user_guide/hpc/troubleshooting.rst +++ b/doc/source/user_guide/hpc/troubleshooting.rst @@ -22,6 +22,26 @@ Debugging jobs python /home/user/pymapdl.py - Check SLURM logs for error messages and debugging information. +- It is also good idea to print the environment variables in your bash script, using + ``printenv``. Additionally, you can filter them using ``grep``. + + .. code-block:: bash + + #!/bin/bash + #SBATCH --job-name=ansys_job # Job name + #SBATCH --partition=qsmall # Specify the queue/partition name + #SBATCH --output=ansys_job.out # Standard output file + #SBATCH --error=ansys_job.err # Standard error file + + printenv | grep "PYMAPDL" # Print env vars which contains 'PYMAPDL' + printenv | grep "SLURM" # Print env vars which contains 'SLURM' + source /home/user/pymapdl/.venv/bin/activate + python /home/user/pymapdl.py + +- Use PyMAPDL logging to printout valuable information. To activate this, see + :ref:`ref_debug_pymapdl`. + +- In case you need more help, visit :ref:`ref_troubleshooting`. .. _ref_python_venv_not_accesible: @@ -49,8 +69,10 @@ The following output is shown after running in the terminal: ImportError: No module named ansys.mapdl As the output shows, PyMAPDL could not be found, meaning that either: + * The virtual environment does not have PyMAPDL installed. See :ref:`ref_install_pymapdl_on_hpc`. + * Or the script did not activate properly the virtual environment (``/home/user/.venv``). @@ -59,10 +81,9 @@ One of them is that the system Python distribution used to create the virtual environment is not accessible from the compute nodes due to one of these reasons: -- The virtual environment has been created in a - directory that is not accessible from the nodes. - In this case, your terminal might also show that the - ``activate`` file could not be found. +- The virtual environment has been created in a directory that is + not accessible from the nodes. In this case, your terminal might + also show that the ``activate`` file could not be found. .. code-block:: console @@ -176,6 +197,8 @@ the compute nodes: user@machine:~$ export PY_PATH=/ansys_inc/v241/commonfiles/CPython/3_10/linx64/Release/Python + This path needs to be adapted to where Ansys is installed and also which version is used. + #. For only Ansys 2024 R1 and earlier, patch the ``PATH`` and ``LD_LIBRARY_PATH`` environment variables: diff --git a/doc/source/user_guide/troubleshoot.rst b/doc/source/user_guide/troubleshoot.rst index 54cf12d0c70..74a2b63f35b 100644 --- a/doc/source/user_guide/troubleshoot.rst +++ b/doc/source/user_guide/troubleshoot.rst @@ -8,6 +8,7 @@ Troubleshooting PyMAPDL To help you resolve any problems that you might have when using PyMAPDL, some of the most common problems and frequently asked questions are posted here. +.. _ref_debug_pymapdl: Debug in PyMAPDL ---------------- From 330f33c30a966eeda24aed4bd5caabb30d97511e Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 14:03:57 +0000 Subject: [PATCH 013/122] docs: fix vale --- doc/source/user_guide/hpc/pymapdl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index f24134509f9..5a3faa99625 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -24,7 +24,7 @@ Currently, the supported configurations are: * `Submit a PyMAPDL batch job to the cluster from the entrypoint node` -Since v0.68.5, PyMAPDL can take advantage of the tigh integration +Since v0.68.5, PyMAPDL can take advantage of the tight integration between the scheduler and MAPDL to read the job configuration and launch an MAPDL instance that can use all the resources allocated to that job. From ac54f2c987817093698f5cd3d0c101e637038d29 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 16:42:24 +0200 Subject: [PATCH 014/122] fix: nproc tests --- src/ansys/mapdl/core/launcher.py | 1 + tests/test_launcher.py | 23 +++++++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 34aebe9d507..18fa51c27e2 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -2295,4 +2295,5 @@ def pack_parameters(locals_var): dict_["start_instance"] = locals_var["start_instance"] dict_["version"] = locals_var["version"] dict_["additional_switches"] = locals_var["additional_switches"] + dict_["nproc"] = locals_var["nproc"] return dict_ diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 5be0570c311..1b66a598633 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -604,7 +604,6 @@ def test_deprecate_verbose(): ), pytest.param( { - "PYMAPDL_NPROC": 5, "SLURM_JOB_NAME": "myawesomejob", "SLURM_NTASKS": 2, "SLURM_CPUS_PER_TASK": 2, @@ -613,12 +612,11 @@ def test_deprecate_verbose(): "SLURM_MEM_PER_NODE": None, "SLURM_NODELIST": None, }, - {"nproc": 5, "jobname": "myawesomejob"}, - id="Testing PYMAPDL_NPROC and SLURM_JOB_NAME", + {"nproc": 4, "jobname": "myawesomejob"}, + id="Testing SLURM_JOB_NAME", ), pytest.param( { - "PYMAPDL_NPROC": 5, "SLURM_JOB_NAME": "myawesomejob", "SLURM_NTASKS": 2, "SLURM_CPUS_PER_TASK": 2, @@ -628,8 +626,8 @@ def test_deprecate_verbose(): "SLURM_NODELIST": None, "PYMAPDL_MAPDL_EXEC": "asdf/qwer/poiu", }, - {"nproc": 5, "jobname": "myawesomejob", "exec_file": "asdf/qwer/poiu"}, - id="Testing PYMAPDL_NPROC and SLURM_JOB_NAME", + {"nproc": 4, "jobname": "myawesomejob", "exec_file": "asdf/qwer/poiu"}, + id="Testing PYMAPDL_MAPDL_EXEC and SLURM_JOB_NAME", ), ), indirect=["set_env_var_context"], @@ -782,3 +780,16 @@ def test_ip_and_start_instance( assert options["ip"] == ip else: assert options["ip"] in (LOCALHOST, "0.0.0.0") + + +def test_nproc_envvar(monkeypatch): + monkeypatch.setenv("PYMAPDL_NPROC", 10) + args = launch_mapdl(_debug_no_launch=True) + assert args["nproc"] == 10 + + +@pytest.mark.parametrize("nproc,result", [[None, 2], [5, 5]]) +def test_nproc(monkeypatch, nproc, result): + monkeypatch.delenv("PYMAPDL_START_INSTANCE") + args = launch_mapdl(nproc=nproc, _debug_no_launch=True) + assert args["nproc"] == result From 6985ee4adbc37528adea01bc1bfab48234172c33 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 4 Oct 2024 11:40:48 +0000 Subject: [PATCH 015/122] feat: adding env vars needed for multinode --- src/ansys/mapdl/core/launcher.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 349602c667f..97375fb8890 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1768,6 +1768,18 @@ def launch_mapdl( f"The machine has {machine_cores} cores. PyMAPDL is asking for {nproc} cores." ) + # Setting env vars + env_vars = update_env_vars(add_env_vars, replace_env_vars) + + if ON_SLURM: + if not env_vars: + env_vars = {} + + env_vars.setdefault("ANS_CMD_NODIAG", "TRUE") + # Passing env vars for MAPDL run on multiple nodes + env_vars.setdefault("ANS_MULTIPLE_NODES", "1") + env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") + start_parm.update( { "exec_file": exec_file, From 03a05e61b821b91a29f12f2c053524e13fe2347f Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 11:18:55 +0000 Subject: [PATCH 016/122] feat: renaming hpc detection argument --- src/ansys/mapdl/core/launcher.py | 36 ++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 97375fb8890..34aebe9d507 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1089,7 +1089,7 @@ def launch_mapdl( add_env_vars: Optional[Dict[str, str]] = None, replace_env_vars: Optional[Dict[str, str]] = None, version: Optional[Union[int, str]] = None, - detect_slurm_config: bool = True, + detect_HPC: bool = True, **kwargs: Dict[str, Any], ) -> Union[MapdlGrpc, "MapdlConsole"]: """Start MAPDL locally. @@ -1118,12 +1118,15 @@ def launch_mapdl( MAPDL jobname. Defaults to ``'file'``. nproc : int, optional - Number of processors. Defaults to 2. + Number of processors. Defaults to 2. If running on an HPC cluster, + this value is adjusted to the number of CPUs allocated to the job, + unless ``detect_HPC`` is set to "false". ram : float, optional - Total size in megabytes of the workspace (memory) used for the initial allocation. - The default is ``None``, in which case 2 GB (2048 MB) is used. To force a fixed size - throughout the run, specify a negative number. + Total size in megabytes of the workspace (memory) used for the initial + allocation. The default is ``None``, in which case 2 GB (2048 MB) is + used. To force a fixed size throughout the run, specify a negative + number. mode : str, optional Mode to launch MAPDL. Must be one of the following: @@ -1276,6 +1279,13 @@ def launch_mapdl( export PYMAPDL_MAPDL_VERSION=22.2 + detect_HPC: bool, optional + Whether detect if PyMAPDL is running on an HPC cluster or not. Currently + only SLURM clusters are supported. By detaul, it is set to true. + This option can be bypassed if the environment variable + ``PYMAPDL_ON_SLURM`` is set to "true". For more information visit + :ref:`ref_hpc_slurm`. + kwargs : dict, optional These keyword arguments are interface specific or for development purposes. See Notes for more details. @@ -1447,6 +1457,12 @@ def launch_mapdl( "ANSYSLMD_LICENSE_FILE":"1055@MYSERVER"} >>> mapdl = launch_mapdl(replace_env_vars=my_env_vars) """ + # Checking specific env var + if not nproc: + nproc = os.environ.get("PYMAPDL_NPROC", None) + if nproc: + nproc = int(nproc) + # By default ON_SLURM = os.environ.get("PYMAPDL_ON_SLURM", None) if ON_SLURM is None: @@ -1462,7 +1478,7 @@ def launch_mapdl( and bool(os.environ.get("SLURM_JOB_ID", "")) ) - if detect_slurm_config and ON_SLURM: + if detect_HPC and ON_SLURM: LOG.info("On Slurm mode.") # extracting parameters @@ -2134,7 +2150,7 @@ def get_value( # ntasks is for mpi SLURM_NTASKS = get_value("SLURM_NTASKS", kwargs) LOG.info(f"SLURM_NTASKS: {SLURM_NTASKS}") - # Sharing tasks acrros multiple nodes (DMP) + # Sharing tasks across multiple nodes (DMP) # the format of this envvar is a bit tricky. Avoiding it for the moment. # SLURM_TASKS_PER_NODE = int( # kwargs.pop( @@ -2178,12 +2194,6 @@ def get_value( jobname = os.environ.get("SLURM_JOB_NAME", "file") LOG.info(f"Using jobname: {jobname}") - # Checking specific env var - if not nproc: - nproc = os.environ.get("PYMAPDL_NPROC", None) - if nproc: - nproc = int(nproc) - if not nproc: ## Attempt to calculate the appropriate number of cores: # Reference: https://stackoverflow.com/a/51141287/6650211 From d9e3b0d7ad0cd81fce5f256d71c5afbf289b9b8a Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 11:22:00 +0000 Subject: [PATCH 017/122] docs: adding documentation --- .../extended_examples/hpc/hpc_ml_ga.rst | 2 +- doc/source/user_guide/hpc/pymapdl.rst | 168 +++++++++++++----- doc/source/user_guide/hpc/settings.rst | 49 +++-- doc/source/user_guide/hpc/troubleshooting.rst | 110 +++++++++--- 4 files changed, 244 insertions(+), 85 deletions(-) diff --git a/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst b/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst index 30570b5c6b9..fb87bb7e6d4 100644 --- a/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst +++ b/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst @@ -251,7 +251,7 @@ this script. If you have problems when creating the virtual environment or accessing it from the compute nodes, - see :ref:`ref_hpc_pymapdl_job`. + see :ref:`ref_hpc_troubleshooting`. 3. Install the requirements for this example from the :download:`requirements.txt ` file. diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index e0fddefa78a..c70628f3a7a 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -1,84 +1,168 @@ -.. _ref_hpc_pymapdl: +.. _ref_hpc_pymapdl_job: -============================= -PyMAPDL on SLURM HPC clusters -============================= +======================= +PyMAPDL on HPC Clusters +======================= -.. _ref_hpc_pymapdl_job: -Submit a PyMAPDL job -==================== +Introduction +============ -To submit a PyMAPDL job, you must create two files: +PyMAPDL communicates with MAPDL using the gRPC protocol. +This protocol offers many advantages and features, for more information +see :ref:`ref_project_page`. +One of these features is that it is not required to have both, +PyMAPDL and MAPDL processes, running on the same machine. +This possibility open the door to many configurations, depending +on whether you run them both or not on the HPC compute nodes. +Additionally, you might to be able interact with them (``interactive`` mode) +or not (``batch`` mode). -- Python script with the PyMAPDL code -- Bash script that activates the virtual environment and calls the Python script +Currently, the supported configurations are: + +* `Submit a PyMAPDL batch job to the cluster from the entrypoint node` + + +Since v0.68.5, PyMAPDL can take advantage of the tigh integration +between the scheduler and MAPDL to read the job configuration and +launch an MAPDL instance that can use all the resources allocated +to that job. +For instance, if a SLURM job has allocated 8 nodes with 4 cores each, +then PyMAPDL will launch an MAPDL instance which will use 32 cores +spawning across those 8 nodes. +This behaviour can disabled if passing the environment variable +:envvar:`PYMAPDL_ON_SLURM` or passing the argument `detect_HPC=False` +to :func:`launch_mapdl() `. + + + +Submit a PyMAPDL batch job to the cluster from the entrypoint node +================================================================== + +Many HPC clusters allow their users to login in a machine using +``ssh``, ``vnc``, ``rdp``, or similar technologies and submit a job +to the cluster from there. +This entrypoint machine, sometimes known as *head node* or *entrypoint node*, +might be a virtual machine (VDI/VM). + +In such cases, once the Python virtual environment with PyMAPDL is already +set and is accessible to all the compute nodes, launching a +PyMAPDL job is very easy to do using ``sbatch`` command. +No changes are needed on a PyMAPDL script to run it on an SLURM cluster. + +First the virtual environment must be activated in the current terminal. + +.. code-block:: console + + user@entrypoint-machine:~$ export VENV_PATH=/my/path/to/the/venv + user@entrypoint-machine:~$ source $VENV_PATH/bin/activate -**Python script:** ``pymapdl_script.py`` +Once the virtual environment has been activated, you can launch any Python +script if they do have the proper Python shebang (``#!/usr/bin/env python3``). + +For instance, to launch the following Python script ``main.py``: .. code-block:: python + :caption: ``main.py`` file + + #!/usr/bin/env python3 from ansys.mapdl.core import launch_mapdl - # Number of processors must be lower than the - # number of CPUs allocated for the job. - mapdl = launch_mapdl(nproc=10) + mapdl = launch_mapdl(run_location="/home/ubuntu/tmp/tmp/mapdl", loglevel="debug") - mapdl.prep7() - n_proc = mapdl.get_value("ACTIVE", 0, "NUMCPU") - print(f"Number of CPUs: {n_proc}") + print(mapdl.prep7()) + print(f'Number of CPUs: {mapdl.get_value("ACTIVE", 0, "NUMCPU")}') mapdl.exit() +You can just run in your console: -**Bash script:** ``job.sh`` +.. code-block:: console -.. code-block:: bash + (venv) user@entrypoint-machine:~$ sbatch main.py - source /home/user/.venv/bin/activate - python pymapdl_script.py - -To start the simulation, you use this code: +Alternatively, you can remove the shebang from the python file and use a +Python executable call: .. code-block:: console - user@machine:~$ srun job.sh + (venv) user@entrypoint-machine:~$ sbatch python main.py + +Additionally, you can can change the amount of cores used in your +job, by setting the :envvar:`PYMAPDL_NPROC` to the desired value. + +.. code-block:: console + (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch main.py -The bash script allows you to customize the environment before running the Python script. -This bash script performs such tasks as creating environment variables, moving to -different directories, and printing to ensure your configuration is correct. However, -this bash script is not mandatory. -You can avoid having the ``job.sh`` bash script if the virtual environment is activated -and you pass all the environment variables to the job: +You can also add ``sbatch`` options to the command: .. code-block:: console - user@machine:~$ source /home/user/.venv/bin/activate - (.venv) user@machine:~$ srun python pymapdl_script.py --export=ALL + (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch main.py -The ``--export=ALL`` argument might not be needed, depending on the cluster configuration. -Furthermore, you can omit the Python call in the preceding command if you include the -Python shebang (``#!/usr/bin/python3``) in the first line of the ``pymapdl_script.py`` script. +For instance, to launch a PyMAPDL job which start a four cores MAPDL instance +on a 10 CPUs SLURM job, you can use: -.. code-block:: console +.. code-block:: bash + + (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch --partition=qsmall --nodes=10 --ntasks-per-node=1 main.py - user@machine:~$ source /home/user/.venv/bin/activate - (.venv) user@machine:~$ srun pymapdl_script.py --export=ALL -If you prefer to run the job in the background, you can use the ``sbatch`` -command instead of the ``srun`` command. However, in this case, the Bash file is needed: +Using a submission script +------------------------- + +In case you need to customize more your job, you can create a SLURM +submission script to submit a PyMAPDL job. +In this case, you must create two files: + +- Python script with the PyMAPDL code +- Bash script that activates the virtual environment and calls the + Python script. + +.. code-block:: python + :caption: ``main.py`` python script + + from ansys.mapdl.core import launch_mapdl + + # Number of processors must be lower than the + # number of CPUs allocated for the job. + mapdl = launch_mapdl(nproc=10) + + mapdl.prep7() + n_proc = mapdl.get_value("ACTIVE", 0, "NUMCPU") + print(f"Number of CPUs: {n_proc}") + + mapdl.exit() + + +.. code-block:: bash + :caption: ``job.sh`` execution script + + source /home/user/.venv/bin/activate + python main.py + +To start the simulation, you use this code: .. code-block:: console user@machine:~$ sbatch job.sh - Submitted batch job 1 -Here is the expected output of the job: +In this case, the Python virtual environment does not need to be activated +before submission since it is activated later in the script. + +The expected output of the job is .. code-block:: text Number of CPUs: 10.0 + +The bash script allows you to customize the environment before running the +Python script. +This bash script performs tasks such as creating environment variables, +moving files to different directories, and printing to ensure your +configuration is correct. diff --git a/doc/source/user_guide/hpc/settings.rst b/doc/source/user_guide/hpc/settings.rst index 7f6af61c63e..f4366ab6f05 100644 --- a/doc/source/user_guide/hpc/settings.rst +++ b/doc/source/user_guide/hpc/settings.rst @@ -7,14 +7,16 @@ Setting PyMAPDL Requirements ============ -Using PyMAPDL in an HPC environment managed by SLURM scheduler has certain requirements: +Using PyMAPDL in an HPC environment managed by SLURM scheduler has certain +requirements: * **An Ansys installation must be accessible from all the compute nodes**. This normally implies that the ``ANSYS`` installation directory is in a shared drive or directory. Your HPC cluster administrator should provide you with the path to the ``ANSYS`` directory. -* **A compatible Python installation must be accessible from all the compute nodes**. +* **A compatible Python installation must be accessible from all the compute + nodes**. For compatible Python versions, see :ref:`ref_pymapdl_installation`. Additionally, you must perform a few key steps to ensure efficient job @@ -23,8 +25,8 @@ execution and resource utilization. Subsequent topics describe these steps. Check the Python installation ============================= -The PyMAPDL Python package (``ansys-mapdl-core``) must be installed in a virtual -environment that is accessible from the compute nodes. +The PyMAPDL Python package (``ansys-mapdl-core``) must be installed in +a virtual environment that is accessible from the compute nodes. To see where your Python distribution is installed, use this code: @@ -40,9 +42,10 @@ To print the version of Python you have available, use this code: user@machine:~$ python3 --version Python 3.9.16 -You should be aware that your machine might have installed other Python versions. -To find out if those installations are already in the ``PATH`` environment variable, -you can press the **Tab** key to use autocomplete: +You should be aware that your machine might have other Python versions +installed. +To find out if those installations are already in the ``PATH`` environment +variable, you can press the **Tab** key to use autocomplete: .. code-block:: console @@ -55,11 +58,20 @@ you can press the **Tab** key to use autocomplete: You should use a Python version that is compatible with PyMAPDL. For more information, see :ref:`ref_pymapdl_installation`. -The ``which`` command returns the path where the Python executable is installed. -You can use that executable to create your own Python virtual environment in a directory -that is accessible from all the compute nodes. -For most HPC clusters, the ``/home/$user`` directory is generally available to all nodes. -You can then create the virtual environment in the ``/home/user/.venv`` directory: +.. warning:: + + Contact your cluster administrator if you cannot find a Python version + compatible with PyMAPDL. + + +The ``which`` command returns the path where the Python executable is +installed. +You can use that executable to create your own Python virtual environment +in a directory that is accessible from all the compute nodes. +For most HPC clusters, the ``/home/$user`` directory is generally available +to all nodes. +You can then create the virtual environment in the ``/home/user/.venv`` +directory: .. code-block:: console @@ -67,11 +79,13 @@ You can then create the virtual environment in the ``/home/user/.venv`` director After activating the virtual environment, you can install PyMAPDL. +.. _ref_install_pymapdl_on_hpc: Install PyMAPDL =============== -To install PyMAPDL on the activated virtual environment, run the following commands: +To install PyMAPDL on the activated virtual environment, run the following +commands: .. code-block:: console @@ -107,8 +121,8 @@ then you can run that script using: user@machine:~$ srun test.sh -This command might take a minute or two to complete, depending on the amount of free -resources available in the cluster. +This command might take a minute or two to complete, depending on the amount of +free resources available in the cluster. On the console, you should see this output: .. code-block:: text @@ -116,5 +130,6 @@ On the console, you should see this output: Testing Python! PyMAPDL version 0.68.1 was successfully imported. -If you see an error in the output, see :ref:`ref_hpc_troubleshooting`, especially -:ref:`ref_python_venv_not_accesible`. +If you see an error in the output, see :ref:`ref_hpc_troubleshooting`, +especially :ref:`ref_python_venv_not_accesible`. + diff --git a/doc/source/user_guide/hpc/troubleshooting.rst b/doc/source/user_guide/hpc/troubleshooting.rst index 3a41a60537f..8f84a202df1 100644 --- a/doc/source/user_guide/hpc/troubleshooting.rst +++ b/doc/source/user_guide/hpc/troubleshooting.rst @@ -7,8 +7,19 @@ Troubleshooting Debugging jobs -------------- -- Use ``--output`` and ``--error`` directives in batch scripts to capture - standard output and error messages. +- Use ``--output`` and ``--error`` directives in batch scripts to captures + standard output and error messages to specific files. + + .. code-block:: bash + + #!/bin/bash + #SBATCH --job-name=ansys_job # Job name + #SBATCH --partition=qsmall # Specify the queue/partition name + #SBATCH --output=ansys_job.out # Standard output file + #SBATCH --error=ansys_job.err # Standard error file + + source /home/user/pymapdl/.venv/bin/activate + python /home/user/pymapdl.py - Check SLURM logs for error messages and debugging information. @@ -19,44 +30,90 @@ Python virtual environment is not accessible -------------------------------------------- If there is an error while testing the Python installation, it might mean that the Python environment is not accessible to the compute nodes. -For example, in the following output, PyMAPDL could not be found, meaning that the script -is not using the virtual environment (``/home/user/.venv``): +For example, given the following *bash* script `test.sh`: + +.. code-block:: bash + + source /home/user/.venv/bin/activate + python -c "from ansys.mapdl import core as pymapdl; pymapdl.report()" + +The following output is shown after running in the terminal: .. code-block:: console user@machine:~$ srun test.sh + Testing Python! Traceback (most recent call last): File "", line 1, in ImportError: No module named ansys.mapdl -This could be for a number of reasons. One of them is that the system Python distribution -used to create the virtual environment is not accessible from the compute nodes +As the output shows, PyMAPDL could not be found, meaning that either: +* The virtual environment does not have PyMAPDL installed. + See :ref:`ref_install_pymapdl_on_hpc`. +* Or the script did not activate properly the virtual environment + (``/home/user/.venv``). + +For the second reason, there could be a number of reasons. +One of them is that the system Python distribution used to create +the virtual environment is not accessible from the compute nodes due to one of these reasons: - The virtual environment has been created in a directory that is not accessible from the nodes. -- The virtual environment has been created from a Python - executable that is not available to the compute nodes. - Hence, the virtual environment is not activated. For - example, you might be creating the virtual environment - using Python 3.10, but only Python 3.8 is available - from the compute nodes. - -You can test which Python executable the cluster is using by starting an interactive session in -a compute node with this code: + In this case, your terminal might also show that the + ``activate`` file could not be found. + + .. code-block:: console + + user@machine:~$ srun test.sh + Testing Python! + bash: .venv/bin/activate: No such file or directory + + Depending on your terminal configuration, the above error might be sufficient + to exit the terminal process, or not. + If not, the execution will continue, and the subsequent ``python`` call will + be executed using the default python executable. + It is very likely that the default ``python`` executable does not have + PyMAPDL installed, hence the ``ImportError`` error showed above might appear + too. + +- The virtual environment has been created from a Python executable that is + not available to the compute nodes. Hence, the virtual environment is not + activated. + For example, you might be creating the virtual environment Using + Python 3.10, but only Python 3.8 is available from the compute nodes. + You can test which Python executable the cluster is using by starting an + interactive session in a compute node with this code to list all commands + which starts with ``python``: .. code-block:: console user@machine:~$ srun --pty /bin/bash - user@compute_node_01:~$ compgen -c | grep python # List all commands starting with python + user@compute_node_01:~$ compgen -c | grep python .. the approach to solve this comes from: https://stackoverflow.com/questions/64188693/problem-with-python-environment-and-slurm-srun-sbatch +It should be noticed the above approach assumes that all the nodes have similar +configuration, hence all of them should have the same Python installations +available. + +It is also convenient to be aware that environment variable modules can be +used to activate Python installations. +For more information, see :ref:`ref_envvar_modules_on_hpc`. + + +.. _ref_envvar_modules_on_hpc: + +Using modules to load Python +---------------------------- + Many HPC infrastructures use environment managers to load and unload -software packages using modules and environment variables. -Hence, you might want to make sure that the correct module is loaded in your script. +software packages using modules and environment variables. +Hence, you might want to make sure that the correct module is loaded in your +script. + For information on two of the most common environment managers, see the `Modules documentation `_ and `Lmod documentation `_. Check your cluster documentation to know which environment @@ -76,12 +133,14 @@ Using the Ansys-provided Python installation **For development purposes only** -In certain HPC environments the possibility of installing a different Python version -is limited for security reasons. In such cases, the Python distribution available in -the Ansys installation can be used. -This Python distribution is a customized Python (CPython) -version for Ansys products use only. Its use is **discouraged** -except for very advanced users and special use cases. +In certain HPC environments the possibility of installing a different Python +version is limited for security reasons. +In such cases, the Python distribution available in the Ansys installation +can be used. +This Python distribution is a customized Python (CPython) version for Ansys +products use only. +Its use is **discouraged** except for very advanced users and special use +cases. This Python distribution is in the following directory, where ``%MAPDL_VERSION%`` is the three-digit Ansys version: @@ -98,7 +157,8 @@ For example, here is the directory for Ansys 2024 R2: In Ansys 2024 R1 and later, the unified installer includes CPython 3.10. -Earlier versions include CPython 3.7 (``/commonfiles/CPython/3_7/linx64/Release/python``). +Earlier versions include CPython 3.7 +(``/commonfiles/CPython/3_7/linx64/Release/python``). Because the Ansys installation must be available to all the compute nodes to run simulations using them, this From 34bcfc4531c450eb92510abda3ad049a47ce3c6b Mon Sep 17 00:00:00 2001 From: pyansys-ci-bot <92810346+pyansys-ci-bot@users.noreply.github.com> Date: Mon, 7 Oct 2024 11:34:40 +0000 Subject: [PATCH 018/122] chore: adding changelog file 3466.documentation.md --- doc/changelog.d/3466.documentation.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/changelog.d/3466.documentation.md diff --git a/doc/changelog.d/3466.documentation.md b/doc/changelog.d/3466.documentation.md new file mode 100644 index 00000000000..902767602d1 --- /dev/null +++ b/doc/changelog.d/3466.documentation.md @@ -0,0 +1 @@ +feat: passing tight integration env vars to mapdl \ No newline at end of file From 3bc1cc672bb61c8275ee3d1079a697dc168c283d Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 12:36:55 +0000 Subject: [PATCH 019/122] fix: vale issues --- doc/source/user_guide/hpc/pymapdl.rst | 16 ++++++++-------- doc/source/user_guide/hpc/troubleshooting.rst | 14 +++++++------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index c70628f3a7a..f24134509f9 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -29,9 +29,9 @@ between the scheduler and MAPDL to read the job configuration and launch an MAPDL instance that can use all the resources allocated to that job. For instance, if a SLURM job has allocated 8 nodes with 4 cores each, -then PyMAPDL will launch an MAPDL instance which will use 32 cores +then PyMAPDL launches an MAPDL instance which uses 32 cores spawning across those 8 nodes. -This behaviour can disabled if passing the environment variable +This behaviour can turn off if passing the environment variable :envvar:`PYMAPDL_ON_SLURM` or passing the argument `detect_HPC=False` to :func:`launch_mapdl() `. @@ -73,7 +73,7 @@ For instance, to launch the following Python script ``main.py``: mapdl = launch_mapdl(run_location="/home/ubuntu/tmp/tmp/mapdl", loglevel="debug") print(mapdl.prep7()) - print(f'Number of CPUs: {mapdl.get_value("ACTIVE", 0, "NUMCPU")}') + print(f'Number of CPU: {mapdl.get_value("ACTIVE", 0, "NUMCPU")}') mapdl.exit() @@ -90,7 +90,7 @@ Python executable call: (venv) user@entrypoint-machine:~$ sbatch python main.py -Additionally, you can can change the amount of cores used in your +Additionally, you can change the amount of cores used in your job, by setting the :envvar:`PYMAPDL_NPROC` to the desired value. .. code-block:: console @@ -105,7 +105,7 @@ You can also add ``sbatch`` options to the command: For instance, to launch a PyMAPDL job which start a four cores MAPDL instance -on a 10 CPUs SLURM job, you can use: +on a 10 CPU SLURM job, you can use: .. code-block:: bash @@ -129,12 +129,12 @@ In this case, you must create two files: from ansys.mapdl.core import launch_mapdl # Number of processors must be lower than the - # number of CPUs allocated for the job. + # number of CPU allocated for the job. mapdl = launch_mapdl(nproc=10) mapdl.prep7() n_proc = mapdl.get_value("ACTIVE", 0, "NUMCPU") - print(f"Number of CPUs: {n_proc}") + print(f"Number of CPU: {n_proc}") mapdl.exit() @@ -158,7 +158,7 @@ The expected output of the job is .. code-block:: text - Number of CPUs: 10.0 + Number of CPU: 10.0 The bash script allows you to customize the environment before running the diff --git a/doc/source/user_guide/hpc/troubleshooting.rst b/doc/source/user_guide/hpc/troubleshooting.rst index 8f84a202df1..9e79d0fc8ba 100644 --- a/doc/source/user_guide/hpc/troubleshooting.rst +++ b/doc/source/user_guide/hpc/troubleshooting.rst @@ -70,13 +70,13 @@ due to one of these reasons: Testing Python! bash: .venv/bin/activate: No such file or directory - Depending on your terminal configuration, the above error might be sufficient - to exit the terminal process, or not. - If not, the execution will continue, and the subsequent ``python`` call will - be executed using the default python executable. + Depending on your terminal configuration, the preceding error might be + sufficient to exit the terminal process, or not. + If not, the execution continues, and the subsequent ``python`` call is + executed using the default python executable. It is very likely that the default ``python`` executable does not have - PyMAPDL installed, hence the ``ImportError`` error showed above might appear - too. + PyMAPDL installed, hence the ``ImportError`` error showed preceding might + appear too. - The virtual environment has been created from a Python executable that is not available to the compute nodes. Hence, the virtual environment is not @@ -95,7 +95,7 @@ due to one of these reasons: .. the approach to solve this comes from: https://stackoverflow.com/questions/64188693/problem-with-python-environment-and-slurm-srun-sbatch -It should be noticed the above approach assumes that all the nodes have similar +It should be noticed the preceding approach assumes that all the nodes have similar configuration, hence all of them should have the same Python installations available. From 0f1606bfed514f0e556207e97c03a8f7c840bac9 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 14:03:57 +0000 Subject: [PATCH 020/122] docs: fix vale --- doc/source/user_guide/hpc/pymapdl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index f24134509f9..5a3faa99625 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -24,7 +24,7 @@ Currently, the supported configurations are: * `Submit a PyMAPDL batch job to the cluster from the entrypoint node` -Since v0.68.5, PyMAPDL can take advantage of the tigh integration +Since v0.68.5, PyMAPDL can take advantage of the tight integration between the scheduler and MAPDL to read the job configuration and launch an MAPDL instance that can use all the resources allocated to that job. From 89552c9b4eae3eedf658e84f6e30d47816208c65 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:56:36 +0200 Subject: [PATCH 021/122] docs: expanding a bit troubleshooting advices and small format fix --- doc/source/user_guide/hpc/pymapdl.rst | 11 ++++--- doc/source/user_guide/hpc/troubleshooting.rst | 31 ++++++++++++++++--- doc/source/user_guide/troubleshoot.rst | 1 + 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index 5a3faa99625..322621948b7 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -21,7 +21,7 @@ or not (``batch`` mode). Currently, the supported configurations are: -* `Submit a PyMAPDL batch job to the cluster from the entrypoint node` +* :ref:`ref_pymapdl_batch_in_cluster_hpc` Since v0.68.5, PyMAPDL can take advantage of the tight integration @@ -36,6 +36,7 @@ This behaviour can turn off if passing the environment variable to :func:`launch_mapdl() `. +.. _ref_pymapdl_batch_in_cluster_hpc: Submit a PyMAPDL batch job to the cluster from the entrypoint node ================================================================== @@ -64,7 +65,7 @@ script if they do have the proper Python shebang (``#!/usr/bin/env python3``). For instance, to launch the following Python script ``main.py``: .. code-block:: python - :caption: ``main.py`` file + :caption: main.py #!/usr/bin/env python3 @@ -107,7 +108,7 @@ You can also add ``sbatch`` options to the command: For instance, to launch a PyMAPDL job which start a four cores MAPDL instance on a 10 CPU SLURM job, you can use: -.. code-block:: bash +.. code-block:: console (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch --partition=qsmall --nodes=10 --ntasks-per-node=1 main.py @@ -124,7 +125,7 @@ In this case, you must create two files: Python script. .. code-block:: python - :caption: ``main.py`` python script + :caption: main.py from ansys.mapdl.core import launch_mapdl @@ -140,7 +141,7 @@ In this case, you must create two files: .. code-block:: bash - :caption: ``job.sh`` execution script + :caption: job.sh source /home/user/.venv/bin/activate python main.py diff --git a/doc/source/user_guide/hpc/troubleshooting.rst b/doc/source/user_guide/hpc/troubleshooting.rst index 9e79d0fc8ba..528c00fea9d 100644 --- a/doc/source/user_guide/hpc/troubleshooting.rst +++ b/doc/source/user_guide/hpc/troubleshooting.rst @@ -22,6 +22,26 @@ Debugging jobs python /home/user/pymapdl.py - Check SLURM logs for error messages and debugging information. +- It is also good idea to print the environment variables in your bash script, using + ``printenv``. Additionally, you can filter them using ``grep``. + + .. code-block:: bash + + #!/bin/bash + #SBATCH --job-name=ansys_job # Job name + #SBATCH --partition=qsmall # Specify the queue/partition name + #SBATCH --output=ansys_job.out # Standard output file + #SBATCH --error=ansys_job.err # Standard error file + + printenv | grep "PYMAPDL" # Print env vars which contains 'PYMAPDL' + printenv | grep "SLURM" # Print env vars which contains 'SLURM' + source /home/user/pymapdl/.venv/bin/activate + python /home/user/pymapdl.py + +- Use PyMAPDL logging to printout valuable information. To activate this, see + :ref:`ref_debug_pymapdl`. + +- In case you need more help, visit :ref:`ref_troubleshooting`. .. _ref_python_venv_not_accesible: @@ -49,8 +69,10 @@ The following output is shown after running in the terminal: ImportError: No module named ansys.mapdl As the output shows, PyMAPDL could not be found, meaning that either: + * The virtual environment does not have PyMAPDL installed. See :ref:`ref_install_pymapdl_on_hpc`. + * Or the script did not activate properly the virtual environment (``/home/user/.venv``). @@ -59,10 +81,9 @@ One of them is that the system Python distribution used to create the virtual environment is not accessible from the compute nodes due to one of these reasons: -- The virtual environment has been created in a - directory that is not accessible from the nodes. - In this case, your terminal might also show that the - ``activate`` file could not be found. +- The virtual environment has been created in a directory that is + not accessible from the nodes. In this case, your terminal might + also show that the ``activate`` file could not be found. .. code-block:: console @@ -176,6 +197,8 @@ the compute nodes: user@machine:~$ export PY_PATH=/ansys_inc/v241/commonfiles/CPython/3_10/linx64/Release/Python + This path needs to be adapted to where Ansys is installed and also which version is used. + #. For only Ansys 2024 R1 and earlier, patch the ``PATH`` and ``LD_LIBRARY_PATH`` environment variables: diff --git a/doc/source/user_guide/troubleshoot.rst b/doc/source/user_guide/troubleshoot.rst index 54cf12d0c70..74a2b63f35b 100644 --- a/doc/source/user_guide/troubleshoot.rst +++ b/doc/source/user_guide/troubleshoot.rst @@ -8,6 +8,7 @@ Troubleshooting PyMAPDL To help you resolve any problems that you might have when using PyMAPDL, some of the most common problems and frequently asked questions are posted here. +.. _ref_debug_pymapdl: Debug in PyMAPDL ---------------- From c3c6506760a5bf1f09d92fc33d044ddcfc229e77 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 16:42:24 +0200 Subject: [PATCH 022/122] fix: nproc tests --- src/ansys/mapdl/core/launcher.py | 1 + tests/test_launcher.py | 23 +++++++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 34aebe9d507..18fa51c27e2 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -2295,4 +2295,5 @@ def pack_parameters(locals_var): dict_["start_instance"] = locals_var["start_instance"] dict_["version"] = locals_var["version"] dict_["additional_switches"] = locals_var["additional_switches"] + dict_["nproc"] = locals_var["nproc"] return dict_ diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 5be0570c311..1b66a598633 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -604,7 +604,6 @@ def test_deprecate_verbose(): ), pytest.param( { - "PYMAPDL_NPROC": 5, "SLURM_JOB_NAME": "myawesomejob", "SLURM_NTASKS": 2, "SLURM_CPUS_PER_TASK": 2, @@ -613,12 +612,11 @@ def test_deprecate_verbose(): "SLURM_MEM_PER_NODE": None, "SLURM_NODELIST": None, }, - {"nproc": 5, "jobname": "myawesomejob"}, - id="Testing PYMAPDL_NPROC and SLURM_JOB_NAME", + {"nproc": 4, "jobname": "myawesomejob"}, + id="Testing SLURM_JOB_NAME", ), pytest.param( { - "PYMAPDL_NPROC": 5, "SLURM_JOB_NAME": "myawesomejob", "SLURM_NTASKS": 2, "SLURM_CPUS_PER_TASK": 2, @@ -628,8 +626,8 @@ def test_deprecate_verbose(): "SLURM_NODELIST": None, "PYMAPDL_MAPDL_EXEC": "asdf/qwer/poiu", }, - {"nproc": 5, "jobname": "myawesomejob", "exec_file": "asdf/qwer/poiu"}, - id="Testing PYMAPDL_NPROC and SLURM_JOB_NAME", + {"nproc": 4, "jobname": "myawesomejob", "exec_file": "asdf/qwer/poiu"}, + id="Testing PYMAPDL_MAPDL_EXEC and SLURM_JOB_NAME", ), ), indirect=["set_env_var_context"], @@ -782,3 +780,16 @@ def test_ip_and_start_instance( assert options["ip"] == ip else: assert options["ip"] in (LOCALHOST, "0.0.0.0") + + +def test_nproc_envvar(monkeypatch): + monkeypatch.setenv("PYMAPDL_NPROC", 10) + args = launch_mapdl(_debug_no_launch=True) + assert args["nproc"] == 10 + + +@pytest.mark.parametrize("nproc,result", [[None, 2], [5, 5]]) +def test_nproc(monkeypatch, nproc, result): + monkeypatch.delenv("PYMAPDL_START_INSTANCE") + args = launch_mapdl(nproc=nproc, _debug_no_launch=True) + assert args["nproc"] == result From db963c4a111b493da90d451192a34572bc30856c Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 17:37:08 +0200 Subject: [PATCH 023/122] revert: "chore: To fix sphinx build" This reverts commit e45d2e5d4fb97359605f445f462fa4b9cf76515a. --- .github/workflows/ci.yml | 1 - doc/changelog.d/3468.fixed.md | 1 - doc/source/conf.py | 3 ++- 3 files changed, 2 insertions(+), 3 deletions(-) delete mode 100644 doc/changelog.d/3468.fixed.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 543cc91b08c..30f13961753 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,7 +34,6 @@ env: MAPDL_PACKAGE: ghcr.io/ansys/mapdl ON_CI: True PYTEST_ARGUMENTS: '-vvv -ra --durations=10 --maxfail=3 --reruns 3 --reruns-delay 4 --cov=ansys.mapdl.core --cov-report=html' - BUILD_CHEATSHEET: True # Following env vars when changed will "reset" the mentioned cache, # by changing the cache file name. It is rendered as ...-v%RESET_XXX%-... diff --git a/doc/changelog.d/3468.fixed.md b/doc/changelog.d/3468.fixed.md deleted file mode 100644 index ab369c1e41b..00000000000 --- a/doc/changelog.d/3468.fixed.md +++ /dev/null @@ -1 +0,0 @@ -fix: add ``build cheatsheet`` as env variable within doc-build \ No newline at end of file diff --git a/doc/source/conf.py b/doc/source/conf.py index d4e73a7a0b4..040d57eb7ee 100755 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -321,7 +321,8 @@ "title": "PyMAPDL cheat sheet", "version": f"v{version}", "pages": ["getting_started/learning"], - } + }, +} html_context = { "display_github": True, # Integrate GitHub From 1e315196b49ea731e97da031096c55a01e01eb0d Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:45:37 +0000 Subject: [PATCH 024/122] docs: clarifying where everything is running. --- doc/source/user_guide/hpc/pymapdl.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index 322621948b7..75468bfb871 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -49,7 +49,9 @@ might be a virtual machine (VDI/VM). In such cases, once the Python virtual environment with PyMAPDL is already set and is accessible to all the compute nodes, launching a -PyMAPDL job is very easy to do using ``sbatch`` command. +PyMAPDL job from the entrypoint is very easy to do using ``sbatch`` command. +Using ``sbatch`` command, the PyMAPDL runs and launches an MAPDL instance in +the compute nodes. No changes are needed on a PyMAPDL script to run it on an SLURM cluster. First the virtual environment must be activated in the current terminal. From 5c7967c843a0596adbfad872b7f74f60f748f92a Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Tue, 8 Oct 2024 08:51:19 +0200 Subject: [PATCH 025/122] docs: expanding bash example --- doc/source/user_guide/hpc/pymapdl.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index 75468bfb871..6f66ec52ca9 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -145,7 +145,20 @@ In this case, you must create two files: .. code-block:: bash :caption: job.sh + #!/bin/bash + # Set SLURM options + #SBATCH --job-name=ansys_job # Job name + #SBATCH --partition=qsmall # Specify the queue/partition name + #SBATCH --nodes=5 # Number of nodes + #SBATCH --ntasks-per-node=2 # Number of tasks (cores) per node + #SBATCH --time=04:00:00 # Set a time limit for the job (optional but recommended) + + # Set env vars + export MY_ENV_VAR=VALUE + + # Activating Python virtual environment source /home/user/.venv/bin/activate + # Calling Python script python main.py To start the simulation, you use this code: From 880a6b8e971f71536e0739e7de4abcf90b29184b Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Tue, 15 Oct 2024 13:30:46 +0200 Subject: [PATCH 026/122] tests: fix --- tests/test_launcher.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index dee9aa73603..684cfa47631 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -24,9 +24,9 @@ import os import tempfile +from unittest.mock import patch import warnings -import psutil import pytest from ansys.mapdl import core as pymapdl @@ -479,13 +479,6 @@ def test_launching_on_busy_port(mapdl): launch_mapdl(port=mapdl.port) -@requires("local") -def test_cpu_checks(): - machine_cores = psutil.cpu_count(logical=False) - with pytest.raises(NotEnoughResources): - launch_mapdl(nproc=machine_cores + 2) - - def test_fail_channel_port(): with pytest.raises(ValueError): launch_mapdl(channel="something", port="something") @@ -783,14 +776,24 @@ def test_ip_and_start_instance( assert options["ip"] in (LOCALHOST, "0.0.0.0") +def mycpucount(**kwargs): + return 10 # faking 10 cores + + def test_nproc_envvar(monkeypatch): monkeypatch.setenv("PYMAPDL_NPROC", 10) args = launch_mapdl(_debug_no_launch=True) assert args["nproc"] == 10 -@pytest.mark.parametrize("nproc,result", [[None, 2], [5, 5]]) -def test_nproc(monkeypatch, nproc, result): - monkeypatch.delenv("PYMAPDL_START_INSTANCE") - args = launch_mapdl(nproc=nproc, _debug_no_launch=True) - assert args["nproc"] == result +@pytest.mark.parametrize("nproc", [None, 5, 9, 15]) +@patch("psutil.cpu_count", mycpucount) +def test_nproc(monkeypatch, nproc): + monkeypatch.delenv("PYMAPDL_START_INSTANCE", False) + + if nproc and nproc > mycpucount(): + with pytest.raises(NotEnoughResources): + launch_mapdl(nproc=nproc, _debug_no_launch=True) + else: + args = launch_mapdl(nproc=nproc, _debug_no_launch=True) + assert args["nproc"] == (nproc or 2) From 7514c31a0cb470b57a4f930371131932a23d954a Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 17 Oct 2024 10:28:43 +0200 Subject: [PATCH 027/122] docs: adding `PYMAPDL_NPROC` to env var section --- doc/source/user_guide/mapdl.rst | 179 ++++++++++++++++++-------------- 1 file changed, 101 insertions(+), 78 deletions(-) diff --git a/doc/source/user_guide/mapdl.rst b/doc/source/user_guide/mapdl.rst index bfc59931b5e..3b967f08522 100644 --- a/doc/source/user_guide/mapdl.rst +++ b/doc/source/user_guide/mapdl.rst @@ -1097,83 +1097,106 @@ Environment variables ===================== There are several PyMAPDL-specific environment variables that can be -used to control the behavior or launching of PyMAPDL and MAPDL. +used to control the default behavior of PyMAPDL or launching MAPDL. + +It should be mentioned that these environment variables do not have +priority over the arguments given in the corresponding functions. +For instance: + +.. code-block:: console + + user@machine:~$ export PYMAPDL_PORT=50052 + user@machine:~$ python -c "from ansys.mapdl.core import launch_mapdl; mapdl=launch_mapdl(port=60053)" + +The above command will launch an MAPDL instance on the port 60053, +because the argument ``port`` has priority over the environment +variable :envvar:`PYMAPDL_PORT`. + These are described in the following table: -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`PYMAPDL_START_INSTANCE` | Override the behavior of the | -| | :func:`ansys.mapdl.core.launcher.launch_mapdl` function | -| | to only attempt to connect to existing | -| | instances of PyMAPDL. Generally used | -| | in combination with ``PYMAPDL_PORT``. | -| | | -| | **Example:** | -| | | -| | .. code:: console | -| | | -| | export PYMAPDL_START_INSTANCE=True | -| | | -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`PYMAPDL_PORT` | Default port for PyMAPDL to connect to. | -| | | -| | **Example:** | -| | | -| | .. code:: console | -| | | -| | export PYMAPDL_PORT=50052 | -| | | -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`PYMAPDL_IP` | Default IP for PyMAPDL to connect to. | -| | | -| | **Example:** | -| | | -| | .. code:: console | -| | | -| | export PYMAPDL_IP=123.45.67.89 | -| | | -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`ANSYSLMD_LICENSE_FILE` | License file or IP address with port in the format | -| | ``PORT@IP``. Do not confuse with the ``IP`` and | -| | ``PORT`` where the MAPDL instance is running, which | -| | are specified using :envvar:`PYMAPDL_IP` and | -| | :envvar:`PYMAPDL_PORT`. | -| | This is helpful for supplying licensing for | -| | Docker. | -| | | -| | **Example:** | -| | | -| | .. code:: console | -| | | -| | export ANSYSLMD_LICENSE_FILE=1055@123.45.67.89 | -| | | -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`PYMAPDL_MAPDL_EXEC` | Executable path from where to launch MAPDL | -| | instances. | -| | | -| | **Example:** | -| | | -| | .. code:: console | -| | | -| | export PYMAPDL_MAPDL_EXEC=/ansys_inc/v241/ansys/bin/mapdl | -| | | -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`PYMAPDL_MAPDL_VERSION` | Default MAPDL version to launch in case there | -| | are several versions availables. | -| | | -| | **Example:** | -| | | -| | .. code:: console | -| | | -| | export PYMAPDL_MAPDL_VERSION=22.2 | -| | | -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`PYMAPDL_ON_SLURM` | With this environment variable set to ``FALSE``, you can avoid | -| | PyMAPDL from detecting that it is running on a SLURM HPC cluster. | -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`PYMAPDL_MAX_MESSAGE_LENGTH` | Maximum gRPC message length. If your | -| | connection terminates when running | -| | PRNSOL or NLIST, raise this. In bytes, | -| | defaults to 256 MB. | -| | | -| | Only for developing purposes. | -+---------------------------------------+---------------------------------------------------------------------+ ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_START_INSTANCE` | Override the behavior of the | +| | :func:`ansys.mapdl.core.launcher.launch_mapdl` function | +| | to only attempt to connect to existing | +| | instances of PyMAPDL. Generally used | +| | in combination with ``PYMAPDL_PORT``. | +| | | +| | **Example:** | +| | | +| | .. code-block:: console | +| | | +| | user@machine:~$ export PYMAPDL_START_INSTANCE=True | +| | | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_PORT` | Default port for PyMAPDL to connect to. | +| | | +| | **Example:** | +| | | +| | .. code-block:: console | +| | | +| | user@machine:~$ export PYMAPDL_PORT=50052 | +| | | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_IP` | Default IP for PyMAPDL to connect to. | +| | | +| | **Example:** | +| | | +| | .. code-block:: console | +| | | +| | user@machine:~$ export PYMAPDL_IP=123.45.67.89 | +| | | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_NPROC` | Default number of cores for MAPDL to use. | +| | | +| | **Example:** | +| | | +| | .. code-block:: console | +| | | +| | user@machine:~$ export PYMAPDL_NPROC=10 | +| | | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`ANSYSLMD_LICENSE_FILE` | License file or IP address with port in the format | +| | ``PORT@IP``. Do not confuse with the ``IP`` and | +| | ``PORT`` where the MAPDL instance is running, which | +| | are specified using :envvar:`PYMAPDL_IP` and | +| | :envvar:`PYMAPDL_PORT`. | +| | This is helpful for supplying licensing for | +| | Docker. | +| | | +| | **Example:** | +| | | +| | .. code-block:: console | +| | | +| | user@machine:~$ export ANSYSLMD_LICENSE_FILE=1055@123.45.89 | +| | | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_MAPDL_EXEC` | Executable path from where to launch MAPDL | +| | instances. | +| | | +| | **Example:** | +| | | +| | .. code-block:: console | +| | | +| | user@machine:~$ export PYMAPDL_MAPDL_EXEC=/ansys_inc/v241/ansys/bin/mapdl | +| | | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_MAPDL_VERSION` | Default MAPDL version to launch in case there | +| | are several versions availables. | +| | | +| | **Example:** | +| | | +| | .. code-block:: console | +| | | +| | user@machine:~$ export PYMAPDL_MAPDL_VERSION=22.2 | +| | | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_ON_SLURM` | With this environment variable set to ``FALSE``, you can avoid | +| | PyMAPDL from detecting that it is running on a SLURM HPC cluster. | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_MAX_MESSAGE_LENGTH` | Maximum gRPC message length. If your | +| | connection terminates when running | +| | PRNSOL or NLIST, raise this. In bytes, | +| | defaults to 256 MB. | +| | | +| | Only for developing purposes. | ++---------------------------------------+----------------------------------------------------------------------------------+ From 4ccb1468c030fa08be28a2ec27e73adee5fee7b6 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 17 Oct 2024 10:54:04 +0200 Subject: [PATCH 028/122] feat: adding 'pymapdl_proc' to non-slurm run. Adding tests too. --- src/ansys/mapdl/core/launcher.py | 18 ++++++++++------- tests/test_launcher.py | 33 ++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 2c647234988..83d74453be9 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -2330,20 +2330,24 @@ def get_cpus(args: Dict[str, Any]): # Bypassing number of processors checks because VDI/VNC might have # different number of processors than the cluster compute nodes. + # Also the CPUs are set in `get_slurm_options` if args["ON_SLURM"]: return # Setting number of processors machine_cores = psutil.cpu_count(logical=False) + # Some machines only have 1 core + min_cpus = machine_cores if machine_cores < 2 else 2 + if not args["nproc"]: - # Some machines only have 1 core - args["nproc"] = machine_cores if machine_cores < 2 else 2 - else: - if machine_cores < int(args["nproc"]): - raise NotEnoughResources( - f"The machine has {machine_cores} cores. PyMAPDL is asking for {args['nproc']} cores." - ) + # Check the env var `PYMAPDL_NPROC` + args["nproc"] = int(os.environ.get("PYMAPDL_NPROC", min_cpus)) + + if machine_cores < int(args["nproc"]): + raise NotEnoughResources( + f"The machine has {machine_cores} cores. PyMAPDL is asking for {args['nproc']} cores." + ) def remove_err_files(run_location, jobname): diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 2849aed0ab0..e7dc8b48430 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -44,6 +44,7 @@ force_smp_in_student, generate_mapdl_launch_command, generate_start_parameters, + get_cpus, get_exec_file, get_run_location, get_slurm_options, @@ -1112,3 +1113,35 @@ def test_launch_grpc(tmpdir): assert isinstance(kwags["stdin"], type(subprocess.DEVNULL)) assert isinstance(kwags["stdout"], type(subprocess.PIPE)) assert isinstance(kwags["stderr"], type(subprocess.PIPE)) + + +@patch("psutil.cpu_count", lambda *args, **kwags: 5) +@pytest.mark.parametrize("arg", [None, 3, 10]) +@pytest.mark.parametrize("env", [None, 3, 10]) +def test_get_cpus(monkeypatch, arg, env): + if env: + monkeypatch.setenv("PYMAPDL_NPROC", env) + + context = NullContext() + cores_machine = psutil.cpu_count(logical=False) # it is patched + + if (arg and arg > cores_machine) or (arg is None and env and env > cores_machine): + context = pytest.raises(NotEnoughResources) + + args = {"nproc": arg, "ON_SLURM": False} + with context: + get_cpus(args) + + if arg: + assert args["nproc"] == arg + elif env: + assert args["nproc"] == env + else: + assert args["nproc"] == 2 + + +@patch("psutil.cpu_count", lambda *args, **kwags: 1) +def test_get_cpus_min(): + args = {"nproc": None, "ON_SLURM": False} + get_cpus(args) + assert args["nproc"] == 1 From fdf00d180764549c1f033b97d4363f042db52cf3 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 17 Oct 2024 11:04:08 +0200 Subject: [PATCH 029/122] docs: fix vale issue --- doc/source/user_guide/mapdl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/mapdl.rst b/doc/source/user_guide/mapdl.rst index 3b967f08522..b3169e4dd55 100644 --- a/doc/source/user_guide/mapdl.rst +++ b/doc/source/user_guide/mapdl.rst @@ -1108,7 +1108,7 @@ For instance: user@machine:~$ export PYMAPDL_PORT=50052 user@machine:~$ python -c "from ansys.mapdl.core import launch_mapdl; mapdl=launch_mapdl(port=60053)" -The above command will launch an MAPDL instance on the port 60053, +The above command launches an MAPDL instance on the port 60053, because the argument ``port`` has priority over the environment variable :envvar:`PYMAPDL_PORT`. From 4aa477d1ec6b7208c05d7e682b5721b3a5aa92c2 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 17 Oct 2024 11:19:02 +0200 Subject: [PATCH 030/122] docs: fix vale issue --- doc/source/user_guide/mapdl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/mapdl.rst b/doc/source/user_guide/mapdl.rst index b3169e4dd55..d60d8e610d8 100644 --- a/doc/source/user_guide/mapdl.rst +++ b/doc/source/user_guide/mapdl.rst @@ -1108,7 +1108,7 @@ For instance: user@machine:~$ export PYMAPDL_PORT=50052 user@machine:~$ python -c "from ansys.mapdl.core import launch_mapdl; mapdl=launch_mapdl(port=60053)" -The above command launches an MAPDL instance on the port 60053, +The preceding command launches an MAPDL instance on the port 60053, because the argument ``port`` has priority over the environment variable :envvar:`PYMAPDL_PORT`. From 4dadc1d15e45b41b4ae169e5074659528648e7ce Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 17 Oct 2024 11:20:53 +0200 Subject: [PATCH 031/122] fix: replacing env var name --- tests/test_launcher.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 4d516d904fc..3f4acfa113c 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -694,17 +694,17 @@ def test_slurm_ram(monkeypatch, ram, expected, context): @pytest.mark.parametrize("slurm_env_var", ["True", "false", ""]) @pytest.mark.parametrize("slurm_job_name", ["True", "false", ""]) @pytest.mark.parametrize("slurm_job_id", ["True", "false", ""]) -@pytest.mark.parametrize("detect_slurm_config", [True, False, None]) +@pytest.mark.parametrize("detect_HPC", [True, False, None]) def test_is_on_slurm( - monkeypatch, slurm_env_var, slurm_job_name, slurm_job_id, detect_slurm_config + monkeypatch, slurm_env_var, slurm_job_name, slurm_job_id, detect_HPC ): monkeypatch.setenv("PYMAPDL_ON_SLURM", slurm_env_var) monkeypatch.setenv("SLURM_JOB_NAME", slurm_job_name) monkeypatch.setenv("SLURM_JOB_ID", slurm_job_id) - flag = is_on_slurm(args={"detect_slurm_config": detect_slurm_config}) + flag = is_on_slurm(args={"detect_HPC": detect_HPC}) - if detect_slurm_config is not True: + if detect_HPC is not True: assert not flag else: @@ -720,7 +720,7 @@ def test_is_on_slurm( if ON_LOCAL: assert ( launch_mapdl( - detect_slurm_config=detect_slurm_config, + detect_HPC=detect_HPC, _debug_no_launch=True, )["ON_SLURM"] == flag From 5de0ab531dca7652ab24f6b3b4d596b197f0c7aa Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 17 Oct 2024 12:00:48 +0200 Subject: [PATCH 032/122] feat: first 'launch_mapdl_on_cluster` draft --- src/ansys/mapdl/core/launcher.py | 171 +++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 4370e0dc1a6..2ce21a9ba12 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -2383,3 +2383,174 @@ def remove_err_files(run_location, jobname): f'"{run_location}"' ) raise error + + +def launch_mapdl_on_cluster(): + + ######################################## + # Processing arguments + # -------------------- + # + # packing arguments + args = pack_arguments(locals()) # packs args and kwargs + + check_kwargs(args) # check if passing wrong arguments + + pre_check_args(args) + + # SLURM settings + if is_on_slurm(args): + LOG.info("On Slurm mode.") + + # extracting parameters + get_slurm_options(args, kwargs) + + get_cpus(args) + + get_start_instance_arg(args) + + get_ip(args) + + args["port"] = get_port(args["port"], args["start_instance"]) + + get_exec_file(args) + + args["version"] = get_version(args["version"], exec_file) + + if args["start_instance"]: + ######################################## + # Local adjustments + # ----------------- + # + # Only when starting MAPDL (aka Local) + + get_run_location(args) + + # verify lock file does not exist + check_lock_file(args["run_location"], args["jobname"], args["override"]) + + # remove err file so we can track its creation + # (as way to check if MAPDL started or not) + remove_err_files(args["run_location"], args["jobname"]) + + if _HAS_ATP and not args["_debug_no_launch"]: + version = version_from_path("mapdl", args["exec_file"]) + args["mode"] = check_mode(args["mode"], version) + + args["mode"] = "grpc" + + LOG.debug(f"Using mode {args['mode']}") + + args["additional_switches"] = set_license_switch( + args["license_type"], args["additional_switches"] + ) + + env_vars = update_env_vars(args["add_env_vars"], args["replace_env_vars"]) + + ######################################## + # Context specific launching adjustments + # -------------------------------------- + # + if args["start_instance"]: + # Assuming that if login node is ubuntu, the computation ones + # are also ubuntu. + env_vars = configure_ubuntu(env_vars) + + # Set compatible MPI + args["additional_switches"] = set_MPI_additional_switches( + args["additional_switches"], + args["exec_file"], + force_intel=args["force_intel"], + ) + + LOG.debug(f"Using additional switches {args['additional_switches']}.") + + start_parm = generate_start_parameters(args) + + if args["ON_SLURM"]: + env_vars.setdefault("ANS_MULTIPLE_NODES", "1") + env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") + + # Early exit for debugging. + if args["_debug_no_launch"]: + # Early exit, just for testing + return args # type: ignore + + ######################################## + # Sphinx docs adjustments + # ----------------------- + # + # special handling when building the gallery outside of CI. This + # creates an instance of mapdl the first time. + if pymapdl.BUILDING_GALLERY: # pragma: no cover + return create_gallery_instances(args, start_parm) + + ######################################## + # Local launching + # --------------- + # + # Check the license server + if args["license_server_check"]: + LOG.debug("Checking license server.") + lic_check = LicenseChecker(timeout=args["start_timeout"]) + lic_check.start() + + LOG.debug("Starting MAPDL") + + cmd = generate_mapdl_launch_command( + exec_file=args["exec_file"], + jobname=args["jobname"], + nproc=args["nproc"], + ram=args["ram"], + port=args["port"], + additional_switches=args["additional_switches"], + ) + + try: + # TODO: wrap the launch_grpc with sbatch + process = launch_grpc( + cmd=cmd, run_location=args["run_location"], env_vars=env_vars + ) + except Exception as exception: + LOG.error("An error occurred when launching MAPDL.") + raise exception + + # TODO: A way to check if the job is ready. + + if args["just_launch"]: + out = [args["ip"], args["port"]] + if hasattr(process, "pid"): + out += [process.pid] + return out + + try: + mapdl = MapdlGrpc( + cleanup_on_exit=args["cleanup_on_exit"], + loglevel=args["loglevel"], + set_no_abort=args["set_no_abort"], + remove_temp_dir_on_exit=args["remove_temp_dir_on_exit"], + log_apdl=args["log_apdl"], + process=process, + use_vtk=args["use_vtk"], + **start_parm, + ) + + # Setting launched property + mapdl._launched = True + mapdl._env_vars = env_vars + + except Exception as exception: + # Failed to launch for some reason. Check if failure was due + # to the license check + if args["license_server_check"]: + LOG.debug("Checking license server.") + lic_check.check() + + raise exception + + # Stopping license checker + if args["license_server_check"]: + LOG.debug("Stopping license server check.") + lic_check.is_connected = True + + return mapdl From fec31136be38dfb9ab9623ed98e104aad05b0fe8 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 17 Oct 2024 16:22:57 +0000 Subject: [PATCH 033/122] feat: added arguments to 'launch_mapdl_on_cluster'. Added also properties `hostname`, `jobid` and `_mapdl_on_slurm`. --- src/ansys/mapdl/core/launcher.py | 95 ++++++++++++++++++++++++++++-- src/ansys/mapdl/core/mapdl_core.py | 2 + src/ansys/mapdl/core/mapdl_grpc.py | 61 ++++++++++++++++--- src/ansys/mapdl/core/misc.py | 4 ++ 4 files changed, 149 insertions(+), 13 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 2ce21a9ba12..0f3731f5791 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -2385,7 +2385,34 @@ def remove_err_files(run_location, jobname): raise error -def launch_mapdl_on_cluster(): +def launch_mapdl_on_cluster( + exec_file: Optional[str] = None, + run_location: Optional[str] = None, + jobname: str = "file", + *, + nproc: Optional[int] = None, + ram: Optional[Union[int, str]] = None, + mode: Optional[str] = None, + override: bool = False, + loglevel: str = "ERROR", + additional_switches: str = "", + start_timeout: int = 90, + port: Optional[int] = None, + cleanup_on_exit: bool = True, + start_instance: Optional[bool] = None, + ip: Optional[str] = None, + clear_on_connect: bool = True, + log_apdl: Optional[Union[bool, str]] = None, + remove_temp_dir_on_exit: bool = False, + license_server_check: bool = False, + license_type: Optional[bool] = None, + print_com: bool = False, + add_env_vars: Optional[Dict[str, str]] = None, + replace_env_vars: Optional[Dict[str, str]] = None, + version: Optional[Union[int, str]] = None, + detect_HPC: bool = True, + **kwargs: Dict[str, Any], +): ######################################## # Processing arguments @@ -2467,9 +2494,8 @@ def launch_mapdl_on_cluster(): start_parm = generate_start_parameters(args) - if args["ON_SLURM"]: - env_vars.setdefault("ANS_MULTIPLE_NODES", "1") - env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") + env_vars.setdefault("ANS_MULTIPLE_NODES", "1") + env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") # Early exit for debugging. if args["_debug_no_launch"]: @@ -2506,16 +2532,28 @@ def launch_mapdl_on_cluster(): additional_switches=args["additional_switches"], ) + cmd = f"""sbatch --export='ALL' --wrap '{cmd}'""" + try: # TODO: wrap the launch_grpc with sbatch process = launch_grpc( cmd=cmd, run_location=args["run_location"], env_vars=env_vars ) + + out = process.stdout.read().decode() + if "Submitted batch job" not in out: + raise MapdlDidNotStart("PyMAPDL failed to submit the sbatch job.") + except Exception as exception: LOG.error("An error occurred when launching MAPDL.") raise exception # TODO: A way to check if the job is ready. + jobid = get_jobid(out) + batch_host = get_hostname_host_cluster(jobid) + start_parm["ip"] = batch_host + start_parm["hostname"] = batch_host + start_parm["jobid"] = jobid if args["just_launch"]: out = [args["ip"], args["port"]] @@ -2554,3 +2592,52 @@ def launch_mapdl_on_cluster(): lic_check.is_connected = True return mapdl + + +def get_hostname_host_cluster(job_id): + cmd = f"scontrol show jobid -dd {job_id}".split() + LOG.debug(f"Executing the command '{cmd}'") + + ready = False + time_start = time.time() + timeout = 30 # second + + while not ready: + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + ) + stdout = proc.stdout.read().decode() + + if time.time() > time_start + timeout: + raise MapdlDidNotStart("The Job didn't start on time.") + + if "JobState=RUNNING" not in stdout: + LOG.debug("The job is not ready yet. Waiting...") + time.sleep(1) + else: + ready = True + + LOG.debug(f"The 'scontrol' command returned:\n{stdout}") + batchhost = stdout.split("BatchHost=")[1].splitlines()[0] + LOG.debug(f"Batchhost: {batchhost}") + + # we should validate + batchhost_ip = socket.gethostbyname(batchhost) + LOG.debug(f"Batchhost IP: {batchhost_ip}") + + return batchhost + + +def get_jobid(out: str) -> int: + """Extract the jobid from a command output""" + job_id = out.strip().split(" ")[-1] + + try: + job_id = int(job_id) + except ValueError: + LOG.error(f"The console output does not seems to have a valid jobid:\n{out}") + raise ValueError("PyMAPDL could not retrieve the job id.") + + LOG.debug(f"The job id is: {job_id}") + return job_id diff --git a/src/ansys/mapdl/core/mapdl_core.py b/src/ansys/mapdl/core/mapdl_core.py index 2bfffcad692..7deb2c9db0b 100644 --- a/src/ansys/mapdl/core/mapdl_core.py +++ b/src/ansys/mapdl/core/mapdl_core.py @@ -167,7 +167,9 @@ "additional_switches", "check_parameter_names", "exec_file", + "hostname", "ip", + "jobid", "jobname", "nproc", "override", diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index 72f4b9e0541..eb63405eb3e 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -31,11 +31,12 @@ import pathlib import re import shutil +import socket from subprocess import Popen import tempfile import threading import time -from typing import TYPE_CHECKING, List, Literal, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union from uuid import uuid4 from warnings import warn import weakref @@ -84,6 +85,7 @@ from ansys.mapdl.core.misc import ( check_valid_ip, last_created, + only_numbers_and_dots, random_string, run_as_prep7, supress_logging, @@ -360,10 +362,13 @@ def __init__( "If `channel` is specified, neither `port` nor `ip` can be specified." ) if ip is None: - ip = "127.0.0.1" + # We use if here to avoid having ip= '' + if start_parm.get("ip"): + ip: str = start_parm.pop("ip") + else: + ip: str = "127.0.0.1" # port and ip are needed to setup the log - if port is None: from ansys.mapdl.core.launcher import MAPDL_DEFAULT_PORT @@ -371,6 +376,11 @@ def __init__( self._port: int = int(port) + if not only_numbers_and_dots(ip): + # it is a hostname + self._hostname = ip + ip = socket.gethostbyname(ip) + check_valid_ip(ip) self._ip: str = ip @@ -393,7 +403,7 @@ def __init__( self._cleanup: bool = cleanup_on_exit self.remove_temp_dir_on_exit: bool = remove_temp_dir_on_exit self._jobname: str = start_parm.get("jobname", "file") - self._path: str = start_parm.get("run_location", None) + self._path: Optional[str] = start_parm.get("run_location", None) self._busy: bool = False # used to check if running a command on the server self._local: bool = ip in ["127.0.0.1", "127.0.1.1", "localhost"] if "local" in start_parm: # pragma: no cover # allow this to be overridden @@ -424,9 +434,14 @@ def __init__( self._mapdl_process: Popen = start_parm.pop("process", None) # saving for later use (for example open_gui) - start_parm["ip"] = ip start_parm["port"] = port - self._start_parm = start_parm + self._start_parm: Dict[str, Any] = start_parm + + # Storing HPC related stuff + self._jobid: int = start_parm.get("jobid") + self._hostname: str = start_parm.get("hostname") + self._mapdl_on_slurm: bool = bool(self._jobid) + self.finish_job_on_exit: bool = start_parm.get("finish_job_on_exit", True) # Queueing the stds if self._mapdl_process: @@ -997,6 +1012,17 @@ def ip(self): """Return the MAPDL gRPC instance IP.""" return self._ip + @property + def hostname(self): + """Return the hostname of the machine MAPDL is running in.""" + return self._hostname + + @property + def jobid(self): + """Returns the job id where the MAPDL is running in. + This is only applicable if MAPDL is running on an HPC cluster.""" + return self._jobid + @protect_grpc def _send_command(self, cmd: str, mute: bool = False) -> Optional[str]: """Send a MAPDL command and return the response as a string""" @@ -1074,9 +1100,10 @@ def exit(self, save=False, force=False, **kwargs): mapdl_path = self.directory # caching if self._exited is None: self._log.debug("'self._exited' is none.") - return # Some edge cases the class object is not completely initialized but the __del__ method - # is called when exiting python. So, early exit here instead an error in the following - # self.directory command. + return # Some edge cases the class object is not completely + # initialized but the __del__ method + # is called when exiting python. So, early exit here instead an + # error in the following self.directory command. # See issue #1796 elif self._exited: # Already exited. @@ -1123,6 +1150,10 @@ def exit(self, save=False, force=False, **kwargs): # No cover: The CI is working with a single MAPDL instance self._remote_instance.delete() + if self._mapdl_on_slurm: + self.kill_job(self.jobid) + self._log.debug("Job has been cancel.") + self._remove_temp_dir_on_exit(mapdl_path) if self._local and self._port in _LOCAL_PORTS: @@ -3698,3 +3729,15 @@ def get_file_name(path): shutil.copy(file_name, target_dir) return os.path.basename(target_dir) + + def kill_job(self, jobid: int) -> None: + cmd = ["scancel", f"{jobid}"] + # to ensure the job is stopped properly, let's issue the scancel twice. + for i in range(2): + Popen(cmd) + + def __del__(self): + if self._mapdl_on_slurm and self.finish_job_on_exit: + self.exit() + else: + super().__del__() diff --git a/src/ansys/mapdl/core/misc.py b/src/ansys/mapdl/core/misc.py index ffbd46ac1c4..86f593b9488 100644 --- a/src/ansys/mapdl/core/misc.py +++ b/src/ansys/mapdl/core/misc.py @@ -1307,3 +1307,7 @@ def get_active_branch_name(): kind = f"release/{'.'.join(pymapdl.__version__.split('.')[:2])}" return kind + + +def only_numbers_and_dots(s): + return bool(re.fullmatch(r"[0-9.]+", s)) From de403fd21924bb707f4fb03bed42b458cc1df9c1 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 17 Oct 2024 18:17:08 +0000 Subject: [PATCH 034/122] feat: better error messages. Created 'generate_sbatch_command'. --- src/ansys/mapdl/core/launcher.py | 120 ++++++++++++++++++++++++----- src/ansys/mapdl/core/mapdl_core.py | 1 + 2 files changed, 103 insertions(+), 18 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 0f3731f5791..a940f03e66f 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -407,7 +407,7 @@ def generate_mapdl_launch_command( ] command_parm = [ - each for each in command_parm if command_parm + each for each in command_parm if each.strip() ] # cleaning empty args. command = " ".join(command_parm) @@ -445,8 +445,23 @@ def launch_grpc( # disable all MAPDL pop-up errors: env_vars.setdefault("ANS_CMD_NODIAG", "TRUE") + if "sbatch" in cmd: + header = "Running an MAPDL instance on the Cluster:" + shell = os.name != "nt" + cmd_ = " ".join(cmd) + else: + header = "Running an MAPDL instance" + shell = False # To prevent shell injection + cmd_ = cmd + LOG.info( - f"Running a local instance in {run_location} with the following command: '{cmd}'" + "\n============" + "\n============" + f"{header}:\nLocation:\n{run_location}\n" + f"Command:\n{' '.join(cmd)}\n" + f"Env vars:\n{env_vars}" + "\n============" + "\n============" ) if os.name == "nt": @@ -458,8 +473,8 @@ def launch_grpc( LOG.debug("MAPDL starting in background.") process = subprocess.Popen( - cmd, - shell=os.name != "nt", + cmd_, + shell=shell, # It does not work without shell. cwd=run_location, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, @@ -2532,25 +2547,35 @@ def launch_mapdl_on_cluster( additional_switches=args["additional_switches"], ) - cmd = f"""sbatch --export='ALL' --wrap '{cmd}'""" + cmd = generate_sbatch_command(cmd, sbatch_args=args.get("sbatch_args")) + jobid = None try: # TODO: wrap the launch_grpc with sbatch process = launch_grpc( cmd=cmd, run_location=args["run_location"], env_vars=env_vars ) - out = process.stdout.read().decode() - if "Submitted batch job" not in out: - raise MapdlDidNotStart("PyMAPDL failed to submit the sbatch job.") + stdout = process.stdout.read().decode() + if "Submitted batch job" not in stdout: + stderr = process.stderr.read().decode() + raise MapdlDidNotStart( + f"PyMAPDL failed to submit the sbatch job:\n{stderr}" + ) + + jobid = get_jobid(stdout) + batch_host = get_hostname_host_cluster(jobid) except Exception as exception: LOG.error("An error occurred when launching MAPDL.") + + if start_parm.get("finish_job_on_exit", True) and jobid: + LOG.debug(f"Killing HPC job with id: {jobid}") + subprocess.Popen(["scancel", str(jobid)]) + raise exception # TODO: A way to check if the job is ready. - jobid = get_jobid(out) - batch_host = get_hostname_host_cluster(jobid) start_parm["ip"] = batch_host start_parm["hostname"] = batch_host start_parm["jobid"] = jobid @@ -2594,14 +2619,14 @@ def launch_mapdl_on_cluster( return mapdl -def get_hostname_host_cluster(job_id): +def get_hostname_host_cluster(job_id: int) -> str: cmd = f"scontrol show jobid -dd {job_id}".split() LOG.debug(f"Executing the command '{cmd}'") ready = False time_start = time.time() timeout = 30 # second - + counter = 0 while not ready: proc = subprocess.Popen( cmd, @@ -2610,16 +2635,30 @@ def get_hostname_host_cluster(job_id): stdout = proc.stdout.read().decode() if time.time() > time_start + timeout: - raise MapdlDidNotStart("The Job didn't start on time.") + state = stdout.split("JobState=")[1].split(" ")[0] + try: + hostname_msg = f"The BatchHost for this job is '{get_hostname_from_scontrol(stdout)}'" + except (IndexError, AttributeError): + hostname_msg = f"PyMAPDL couldn't get the BatchHost hostname" + raise MapdlDidNotStart( + f"The HPC job (id: {job_id}) didn't start on time. " + f"The job state is '{state}'. " + f"{hostname_msg}. " + "You can check more information by issuing in your console:\n" + f" scontrol show jobid -dd {job_id}" + ) if "JobState=RUNNING" not in stdout: - LOG.debug("The job is not ready yet. Waiting...") + counter += 1 time.sleep(1) + if (counter % 3 + 1) == 0: # print every 3 seconds. Skipping the first. + LOG.debug("The job is not ready yet. Waiting...") + print("The job is not ready yet. Waiting...") else: ready = True LOG.debug(f"The 'scontrol' command returned:\n{stdout}") - batchhost = stdout.split("BatchHost=")[1].splitlines()[0] + batchhost = get_hostname_from_scontrol(stdout) LOG.debug(f"Batchhost: {batchhost}") # we should validate @@ -2629,15 +2668,60 @@ def get_hostname_host_cluster(job_id): return batchhost -def get_jobid(out: str) -> int: +def get_jobid(stdout: str) -> int: """Extract the jobid from a command output""" - job_id = out.strip().split(" ")[-1] + job_id = stdout.strip().split(" ")[-1] try: job_id = int(job_id) except ValueError: - LOG.error(f"The console output does not seems to have a valid jobid:\n{out}") + LOG.error(f"The console output does not seems to have a valid jobid:\n{stdout}") raise ValueError("PyMAPDL could not retrieve the job id.") LOG.debug(f"The job id is: {job_id}") return job_id + + +def generate_sbatch_command( + cmd: Union[str, List[str]], sbatch_args: Optional[Union[str, Dict[str, str]]] +) -> List[str]: + """Generate sbatch command for a given MAPDL launch command.""" + + def add_minus(arg: str): + if not arg: + return "" + + arg = str(arg) + + if not arg.startswith("-"): + if len(arg) == 1: + arg = f"-{arg}" + else: + arg = f"--{arg}" + return arg + + if sbatch_args: + if isinstance(sbatch_args, dict): + sbatch_args = " ".join( + [f"{add_minus(key)}='{value}'" for key, value in sbatch_args.items()] + ) + else: + sbatch_args = "" + + if "wrap" in sbatch_args: + raise ValueError( + "The sbatch argument 'wrap' is used by PyMAPDL to submit the job." + "Hence you cannot use it as sbatch argument." + ) + LOG.debug(f"The additional sbatch arguments are: {sbatch_args}") + + if isinstance(cmd, list): + cmd = " ".join(cmd) + + cmd = ["sbatch", sbatch_args, "--wrap", f"'{cmd}'"] + cmd = [each for each in cmd if bool(each)] + return cmd + + +def get_hostname_from_scontrol(stdout: str) -> str: + return stdout.split("BatchHost=")[1].splitlines()[0] diff --git a/src/ansys/mapdl/core/mapdl_core.py b/src/ansys/mapdl/core/mapdl_core.py index 7deb2c9db0b..9609aef507f 100644 --- a/src/ansys/mapdl/core/mapdl_core.py +++ b/src/ansys/mapdl/core/mapdl_core.py @@ -167,6 +167,7 @@ "additional_switches", "check_parameter_names", "exec_file", + "finish_job_on_exit", "hostname", "ip", "jobid", From d8348c4de9abcd575f5e18ce87fc285cd8347c5f Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 17 Oct 2024 18:38:24 +0000 Subject: [PATCH 035/122] refactor: rename 'detect_HPC' to 'detect_hpc'. Introducing 'launch_on_hpc'. --- doc/source/user_guide/hpc/pymapdl.rst | 3 +- doc/source/user_guide/mapdl.rst | 2 +- src/ansys/mapdl/core/launcher.py | 79 ++++++++++++++++----------- tests/test_launcher.py | 12 ++-- 4 files changed, 55 insertions(+), 41 deletions(-) diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index 6f66ec52ca9..63d1e3ca1b5 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -32,7 +32,8 @@ For instance, if a SLURM job has allocated 8 nodes with 4 cores each, then PyMAPDL launches an MAPDL instance which uses 32 cores spawning across those 8 nodes. This behaviour can turn off if passing the environment variable -:envvar:`PYMAPDL_ON_SLURM` or passing the argument `detect_HPC=False` +:envvar:`PYMAPDL_RUNNING_ON_SLURM` with ``'false'`` value +or passing the argument `detect_hpc=False` to :func:`launch_mapdl() `. diff --git a/doc/source/user_guide/mapdl.rst b/doc/source/user_guide/mapdl.rst index d60d8e610d8..1899d2f77cb 100644 --- a/doc/source/user_guide/mapdl.rst +++ b/doc/source/user_guide/mapdl.rst @@ -1190,7 +1190,7 @@ These are described in the following table: | | user@machine:~$ export PYMAPDL_MAPDL_VERSION=22.2 | | | | +---------------------------------------+----------------------------------------------------------------------------------+ -| :envvar:`PYMAPDL_ON_SLURM` | With this environment variable set to ``FALSE``, you can avoid | +| :envvar:`PYMAPDL_RUNNING_ON_SLURM` | With this environment variable set to ``FALSE``, you can avoid | | | PyMAPDL from detecting that it is running on a SLURM HPC cluster. | +---------------------------------------+----------------------------------------------------------------------------------+ | :envvar:`PYMAPDL_MAX_MESSAGE_LENGTH` | Maximum gRPC message length. If your | diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index a940f03e66f..308b9f113d8 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -115,7 +115,7 @@ "add_env_vars", "replace_env_vars", "version", - "detect_HPC", + "detect_hpc", "set_no_abort", "force_intel" # Non documented args @@ -996,7 +996,8 @@ def launch_mapdl( add_env_vars: Optional[Dict[str, str]] = None, replace_env_vars: Optional[Dict[str, str]] = None, version: Optional[Union[int, str]] = None, - detect_HPC: bool = True, + detect_hpc: bool = True, + launch_on_hpc: bool = False, **kwargs: Dict[str, Any], ) -> Union[MapdlGrpc, "MapdlConsole"]: """Start MAPDL locally. @@ -1027,7 +1028,7 @@ def launch_mapdl( nproc : int, optional Number of processors. Defaults to 2. If running on an HPC cluster, this value is adjusted to the number of CPUs allocated to the job, - unless ``detect_HPC`` is set to "false". + unless ``detect_hpc`` is set to "false". ram : float, optional Total size in megabytes of the workspace (memory) used for the initial @@ -1074,9 +1075,10 @@ def launch_mapdl( port : int Port to launch MAPDL gRPC on. Final port will be the first port available after (or including) this port. Defaults to - 50052. You can also override the port default with the - environment variable ``PYMAPDL_PORT=`` - This argument has priority over the environment variable. + 50052. You can also provide this value through the environment variable + :envvar:`PYMAPDL_PORT`. For instance ``PYMAPDL_PORT=50053``. + However the argument (if specified) has precedence over the environment + variable. If this environment variable is empty, it is as it is not set. cleanup_on_exit : bool, optional Exit MAPDL when python exits or the mapdl Python instance is @@ -1085,9 +1087,11 @@ def launch_mapdl( start_instance : bool, optional When False, connect to an existing MAPDL instance at ``ip`` and ``port``, which default to ip ``'127.0.0.1'`` at port 50052. - Otherwise, launch a local instance of MAPDL. You can also - override the default behavior of this keyword argument with - the environment variable ``PYMAPDL_START_INSTANCE=FALSE``. + Otherwise, launch a local instance of MAPDL. You can also + provide this value through the environment variable + :envvar:`PYMAPDL_START_INSTANCE`. + However the argument (if specified) has precedence over the environment + variable. If this environment variable is empty, it is as it is not set. ip : str, optional Used only when ``start_instance`` is ``False``. If provided, @@ -1095,10 +1099,11 @@ def launch_mapdl( ``PYMAPDL_START_INSTANCE``) is ``True`` then, an exception is raised. Specify the IP address of the MAPDL instance to connect to. You can also provide a hostname as an alternative to an IP address. - Defaults to ``'127.0.0.1'``. You can also override the - default behavior of this keyword argument with the - environment variable ``PYMAPDL_IP=``. If this environment variable - is empty, it is as it is not set. + Defaults to ``'127.0.0.1'``. + You can also provide this value through the environment variable + :envvar:`PYMAPDL_IP`. For instance ``PYMAPDL_IP=123.45.67.89``. + However the argument (if specified) has precedence over the environment + variable. If this environment variable is empty, it is as it is not set. clear_on_connect : bool, optional Defaults to ``True``, giving you a fresh environment when @@ -1156,27 +1161,35 @@ def launch_mapdl( floats (i.e. ``version=22.2``). To retrieve the available installed versions, use the function :meth:`ansys.tools.path.path.get_available_ansys_installations`. + You can also provide this value through the environment variable + :envvar:`PYMAPDL_MAPDL_VERSION`. + For instance ``PYMAPDL_MAPDL_VERSION=22.2``. + However the argument (if specified) has precedence over the environment + variable. If this environment variable is empty, it is as it is not set. - .. note:: - - The default version can be also set through the environment variable - ``PYMAPDL_MAPDL_VERSION``. For example: - - .. code:: console - - export PYMAPDL_MAPDL_VERSION=22.2 - - detect_HPC: bool, optional + detect_hpc: bool, optional Whether detect if PyMAPDL is running on an HPC cluster or not. Currently only SLURM clusters are supported. By detaul, it is set to true. This option can be bypassed if the environment variable - ``PYMAPDL_ON_SLURM`` is set to "true". For more information visit - :ref:`ref_hpc_slurm`. + ``PYMAPDL_RUNNING_ON_HPC`` is set to "false". + For more information visit :ref:`ref_hpc_slurm`. + + launch_on_hpc: bool, optional + If ``True``, it uses the implemented scheduler (SLURM only) to launch + an MAPDL instance on the HPC. In this case you can pass the argument + 'scheduler_options' to ``launch_mapdl` to specify arguments as a + string or as a dictionary. + For more information visit :ref:`ref_hpc_slurm`. kwargs : dict, optional These keyword arguments are interface specific or for development purposes. See Notes for more details. + scheduler_options : :class:`str`, :class:`dict` + Use it to specify options to the scheduler run command. It can be a + string or a dictionary with arguments and its values (both as strings). + For more information visit :ref:`ref_hpc_slurm`. + set_no_abort : :class:`bool` *(Development use only)* Sets MAPDL to not abort at the first error within /BATCH mode. @@ -1437,7 +1450,7 @@ def launch_mapdl( cleanup_on_exit=args["cleanup_on_exit"], version=args["version"] ) - if args["ON_SLURM"]: + if args["RUNNING_ON_HPC"]: env_vars.setdefault("ANS_MULTIPLE_NODES", "1") env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") @@ -1923,18 +1936,18 @@ def pack_arguments(locals_): def is_on_slurm(args: Dict[str, Any]) -> bool: - args["ON_SLURM"] = os.environ.get("PYMAPDL_ON_SLURM", "True") + args["RUNNING_ON_HPC"] = os.environ.get("PYMAPDL_RUNNING_ON_HPC", "True") - is_flag_false = args["ON_SLURM"].lower() == "false" + is_flag_false = args["RUNNING_ON_HPC"].lower() == "false" # Let's require the following env vars to exist to go into slurm mode. - args["ON_SLURM"] = bool( - args["detect_HPC"] + args["RUNNING_ON_HPC"] = bool( + args["detect_hpc"] and not is_flag_false # default is true and os.environ.get("SLURM_JOB_NAME") and os.environ.get("SLURM_JOB_ID") ) - return args["ON_SLURM"] + return args["RUNNING_ON_HPC"] def generate_start_parameters(args: Dict[str, Any]) -> Dict[str, Any]: @@ -2360,7 +2373,7 @@ def get_cpus(args: Dict[str, Any]): # Bypassing number of processors checks because VDI/VNC might have # different number of processors than the cluster compute nodes. # Also the CPUs are set in `get_slurm_options` - if args["ON_SLURM"]: + if args["RUNNING_ON_HPC"]: return # Setting number of processors @@ -2425,7 +2438,7 @@ def launch_mapdl_on_cluster( add_env_vars: Optional[Dict[str, str]] = None, replace_env_vars: Optional[Dict[str, str]] = None, version: Optional[Union[int, str]] = None, - detect_HPC: bool = True, + detect_hpc: bool = True, **kwargs: Dict[str, Any], ): diff --git a/tests/test_launcher.py b/tests/test_launcher.py index d553320f6bf..af581b00fd2 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -695,17 +695,17 @@ def test_slurm_ram(monkeypatch, ram, expected, context): @pytest.mark.parametrize("slurm_env_var", ["True", "false", ""]) @pytest.mark.parametrize("slurm_job_name", ["True", "false", ""]) @pytest.mark.parametrize("slurm_job_id", ["True", "false", ""]) -@pytest.mark.parametrize("detect_HPC", [True, False, None]) +@pytest.mark.parametrize("detect_hpc", [True, False, None]) def test_is_on_slurm( - monkeypatch, slurm_env_var, slurm_job_name, slurm_job_id, detect_HPC + monkeypatch, slurm_env_var, slurm_job_name, slurm_job_id, detect_hpc ): - monkeypatch.setenv("PYMAPDL_ON_SLURM", slurm_env_var) + monkeypatch.setenv("PYMAPDL_RUNNING_ON_SLURM", slurm_env_var) monkeypatch.setenv("SLURM_JOB_NAME", slurm_job_name) monkeypatch.setenv("SLURM_JOB_ID", slurm_job_id) - flag = is_on_slurm(args={"detect_HPC": detect_HPC}) + flag = is_on_slurm(args={"detect_hpc": detect_hpc}) - if detect_HPC is not True: + if detect_hpc is not True: assert not flag else: @@ -721,7 +721,7 @@ def test_is_on_slurm( if ON_LOCAL: assert ( launch_mapdl( - detect_HPC=detect_HPC, + detect_hpc=detect_hpc, _debug_no_launch=True, )["ON_SLURM"] == flag From 7a6f7f0f10ebfeda5eca383fba7f940bbf2639ff Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 17 Oct 2024 19:28:52 +0000 Subject: [PATCH 036/122] refactor: move all the functionality to launch_mapdl --- src/ansys/mapdl/core/launcher.py | 199 ++++++++++++++++++----------- src/ansys/mapdl/core/mapdl_core.py | 2 + src/ansys/mapdl/core/mapdl_grpc.py | 3 +- 3 files changed, 125 insertions(+), 79 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 308b9f113d8..7d1c3bae34f 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -92,37 +92,38 @@ ALLOWABLE_VERSION_INT = tuple(SUPPORTED_ANSYS_VERSIONS.keys()) ALLOWABLE_LAUNCH_MAPDL_ARGS = [ - "exec_file", - "run_location", - "jobname", - "nproc", - "ram", - "mode", - "override", - "loglevel", + "add_env_vars", "additional_switches", - "start_timeout", - "port", "cleanup_on_exit", - "start_instance", - "ip", "clear_on_connect", - "log_apdl", - "remove_temp_dir_on_exit", + "detect_hpc", + "exec_file", + "force_intel" "ip", + "jobname", + "launch_on_hpc", "license_server_check", "license_type", + "log_apdl", + "loglevel", + "mode", + "nproc", + "override", + "port", "print_com", - "add_env_vars", + "ram", + "remove_temp_dir_on_exit", "replace_env_vars", - "version", - "detect_hpc", + "run_location", + "scheduler_options", "set_no_abort", - "force_intel" + "start_instance", + "start_timeout", + "version", # Non documented args - "use_vtk", + "_debug_no_launch", "just_launch", "on_pool", - "_debug_no_launch", + "use_vtk", ] ON_WSL = os.name == "posix" and ( @@ -1369,7 +1370,7 @@ def launch_mapdl( pre_check_args(args) # SLURM settings - if is_on_slurm(args): + if is_running_on_slurm(args): LOG.info("On Slurm mode.") # extracting parameters @@ -1416,13 +1417,18 @@ def launch_mapdl( args["license_type"], args["additional_switches"] ) - env_vars = update_env_vars(args["add_env_vars"], args["replace_env_vars"]) + env_vars: Dict[str, str] = update_env_vars( + args["add_env_vars"], args["replace_env_vars"] + ) ######################################## # Context specific launching adjustments # -------------------------------------- # if args["start_instance"]: + # ON HPC: + # Assuming that if login node is ubuntu, the computation ones + # are also ubuntu. env_vars = configure_ubuntu(env_vars) # Set SMP by default if student version is used. @@ -1450,7 +1456,7 @@ def launch_mapdl( cleanup_on_exit=args["cleanup_on_exit"], version=args["version"] ) - if args["RUNNING_ON_HPC"]: + if args["RUNNING_ON_HPC"] or args["launch_on_hpc"]: env_vars.setdefault("ANS_MULTIPLE_NODES", "1") env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") @@ -1499,43 +1505,71 @@ def launch_mapdl( lic_check = LicenseChecker(timeout=args["start_timeout"]) lic_check.start() - try: - LOG.debug("Starting MAPDL") - if args["mode"] == "console": - from ansys.mapdl.core.mapdl_console import MapdlConsole + LOG.debug("Starting MAPDL") + if args["mode"] == "console": + from ansys.mapdl.core.mapdl_console import MapdlConsole - mapdl = MapdlConsole( - loglevel=args["loglevel"], - log_apdl=args["log_apdl"], - use_vtk=args["use_vtk"], - **start_parm, - ) + mapdl = MapdlConsole( + loglevel=args["loglevel"], + log_apdl=args["log_apdl"], + use_vtk=args["use_vtk"], + **start_parm, + ) - elif args["mode"] == "grpc": + elif args["mode"] == "grpc": - cmd = generate_mapdl_launch_command( - exec_file=args["exec_file"], - jobname=args["jobname"], - nproc=args["nproc"], - ram=args["ram"], - port=args["port"], - additional_switches=args["additional_switches"], - ) + cmd = generate_mapdl_launch_command( + exec_file=args["exec_file"], + jobname=args["jobname"], + nproc=args["nproc"], + ram=args["ram"], + port=args["port"], + additional_switches=args["additional_switches"], + ) + if args["launch_on_hpc"]: + cmd = generate_sbatch_command(cmd, sbatch_args=args.get("sbatch_args")) + + try: process = launch_grpc( cmd=cmd, run_location=args["run_location"], env_vars=env_vars ) - check_mapdl_launch( - process, args["run_location"], args["start_timeout"], cmd - ) + if args["launch_on_hpc"]: + check_mapdl_launch_on_hpc(process, start_parm) + else: + # Local mapdl launch check + check_mapdl_launch( + process, args["run_location"], args["start_timeout"], cmd + ) + + except Exception as exception: + LOG.error("An error occurred when launching MAPDL.") + + jobid: int = args.get("jobid", "Not found") + + if ( + args["launch_on_hpc"] + and start_parm.get("finish_job_on_exit", True) + and jobid not in ["Not found", None] + ): + + LOG.debug(f"Killing HPC job with id: {jobid}") + subprocess.Popen(["scancel", str(jobid)]) - if args["just_launch"]: - out = [args["ip"], args["port"]] - if hasattr(process, "pid"): - out += [process.pid] - return out + if args["license_server_check"]: + LOG.debug("Checking license server.") + lic_check.check() + raise exception + + if args["just_launch"]: + out = [args["ip"], args["port"]] + if hasattr(process, "pid"): + out += [process.pid] + return out + + try: mapdl = MapdlGrpc( cleanup_on_exit=args["cleanup_on_exit"], loglevel=args["loglevel"], @@ -1547,23 +1581,13 @@ def launch_mapdl( **start_parm, ) - # Setting launched property - mapdl._launched = True - mapdl._env_vars = env_vars + # Setting launched property + mapdl._launched = True + mapdl._env_vars = env_vars - except Exception as exception: - # Failed to launch for some reason. Check if failure was due - # to the license check - if args["license_server_check"]: - LOG.debug("Checking license server.") - lic_check.check() - - raise exception - - # Stopping license checker - if args["license_server_check"]: - LOG.debug("Stopping license server check.") - lic_check.is_connected = True + except Exception as exception: + LOG.error("An error occurred when connecting to MAPDL.") + raise exception return mapdl @@ -1930,11 +1954,13 @@ def pack_arguments(locals_): args["_debug_no_launch"] = locals_.get( "_debug_no_launch", locals_["kwargs"].get("_debug_no_launch", None) ) + args.setdefault("launch_on_hpc", False) + args.setdefault("ip", None) return args -def is_on_slurm(args: Dict[str, Any]) -> bool: +def is_running_on_slurm(args: Dict[str, Any]) -> bool: args["RUNNING_ON_HPC"] = os.environ.get("PYMAPDL_RUNNING_ON_HPC", "True") @@ -2386,7 +2412,7 @@ def get_cpus(args: Dict[str, Any]): # Check the env var `PYMAPDL_NPROC` args["nproc"] = int(os.environ.get("PYMAPDL_NPROC", min_cpus)) - if machine_cores < int(args["nproc"]): + if not args["launch_on_hpc"] and machine_cores < int(args["nproc"]): raise NotEnoughResources( f"The machine has {machine_cores} cores. PyMAPDL is asking for {args['nproc']} cores." ) @@ -2454,7 +2480,7 @@ def launch_mapdl_on_cluster( pre_check_args(args) # SLURM settings - if is_on_slurm(args): + if is_running_on_slurm(args): LOG.info("On Slurm mode.") # extracting parameters @@ -2569,15 +2595,16 @@ def launch_mapdl_on_cluster( cmd=cmd, run_location=args["run_location"], env_vars=env_vars ) - stdout = process.stdout.read().decode() - if "Submitted batch job" not in stdout: - stderr = process.stderr.read().decode() - raise MapdlDidNotStart( - f"PyMAPDL failed to submit the sbatch job:\n{stderr}" - ) + def check_mapdl_launch_on_hpc(process, args): + stdout = process.stdout.read().decode() + if "Submitted batch job" not in stdout: + stderr = process.stderr.read().decode() + raise MapdlDidNotStart( + f"PyMAPDL failed to submit the sbatch job:\n{stderr}" + ) - jobid = get_jobid(stdout) - batch_host = get_hostname_host_cluster(jobid) + jobid = get_jobid(stdout) + batch_host = get_hostname_host_cluster(jobid) except Exception as exception: LOG.error("An error occurred when launching MAPDL.") @@ -2650,7 +2677,8 @@ def get_hostname_host_cluster(job_id: int) -> str: if time.time() > time_start + timeout: state = stdout.split("JobState=")[1].split(" ")[0] try: - hostname_msg = f"The BatchHost for this job is '{get_hostname_from_scontrol(stdout)}'" + host = get_hostname_from_scontrol(stdout) + hostname_msg = f"The BatchHost for this job is '{host}'" except (IndexError, AttributeError): hostname_msg = f"PyMAPDL couldn't get the BatchHost hostname" raise MapdlDidNotStart( @@ -2738,3 +2766,18 @@ def add_minus(arg: str): def get_hostname_from_scontrol(stdout: str) -> str: return stdout.split("BatchHost=")[1].splitlines()[0] + + +def check_mapdl_launch_on_hpc(process: subprocess.Popen, start_parm: Dict[str, str]): + stdout = process.stdout.read().decode() + if "Submitted batch job" not in stdout: + stderr = process.stderr.read().decode() + raise MapdlDidNotStart(f"PyMAPDL failed to submit the sbatch job:\n{stderr}") + + jobid = get_jobid(stdout) + batch_host = get_hostname_host_cluster(jobid) + batch_ip = socket.gethostbyname(batch_host) + + start_parm["ip"] = batch_ip + start_parm["hostname"] = batch_host + start_parm["jobid"] = jobid diff --git a/src/ansys/mapdl/core/mapdl_core.py b/src/ansys/mapdl/core/mapdl_core.py index 9609aef507f..a129b5c954f 100644 --- a/src/ansys/mapdl/core/mapdl_core.py +++ b/src/ansys/mapdl/core/mapdl_core.py @@ -166,12 +166,14 @@ _ALLOWED_START_PARM = [ "additional_switches", "check_parameter_names", + "env_vars", "exec_file", "finish_job_on_exit", "hostname", "ip", "jobid", "jobname", + "launch_on_hpc", "nproc", "override", "port", diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index eb63405eb3e..2c35fe22e8e 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -415,6 +415,7 @@ def __init__( self.__server_version: Optional[str] = None self._state: Optional[grpc.Future] = None self._timeout: int = timeout + self._env_vars: Dict[str, str] = start_parm.get("env_vars", {}) self._pids: List[Union[int, None]] = [] self._channel_state: grpc.ChannelConnectivity = ( grpc.ChannelConnectivity.CONNECTING @@ -1152,7 +1153,7 @@ def exit(self, save=False, force=False, **kwargs): if self._mapdl_on_slurm: self.kill_job(self.jobid) - self._log.debug("Job has been cancel.") + self._log.debug(f"Job (id: {self.jobid}) has been cancel.") self._remove_temp_dir_on_exit(mapdl_path) From d0c3f2553cd2b2198e5a0f66c9ffa91614018f89 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 17 Oct 2024 19:35:20 +0000 Subject: [PATCH 037/122] feat: launched is fixed now in 'launcher' silently. --- src/ansys/mapdl/core/launcher.py | 19 +++++++++++++++++-- src/ansys/mapdl/core/mapdl_core.py | 3 ++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 7d1c3bae34f..3ef48958a2b 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1473,6 +1473,7 @@ def launch_mapdl( LOG.debug( f"Connecting to an existing instance of MAPDL at {args['ip']}:{args['port']}" ) + start_parm["launched"] = False mapdl = MapdlGrpc( cleanup_on_exit=False, @@ -1507,6 +1508,10 @@ def launch_mapdl( LOG.debug("Starting MAPDL") if args["mode"] == "console": + ######################################## + # Launch MAPDL on console mode + # ---------------------------- + # from ansys.mapdl.core.mapdl_console import MapdlConsole mapdl = MapdlConsole( @@ -1517,7 +1522,10 @@ def launch_mapdl( ) elif args["mode"] == "grpc": - + ######################################## + # Launch MAPDL with gRPC + # ---------------------- + # cmd = generate_mapdl_launch_command( exec_file=args["exec_file"], jobname=args["jobname"], @@ -1528,9 +1536,11 @@ def launch_mapdl( ) if args["launch_on_hpc"]: + # wrapping command if on HPC cmd = generate_sbatch_command(cmd, sbatch_args=args.get("sbatch_args")) try: + # process = launch_grpc( cmd=cmd, run_location=args["run_location"], env_vars=env_vars ) @@ -1569,6 +1579,10 @@ def launch_mapdl( out += [process.pid] return out + ######################################## + # Connect to MAPDL using gRPC + # --------------------------- + # try: mapdl = MapdlGrpc( cleanup_on_exit=args["cleanup_on_exit"], @@ -1583,7 +1597,6 @@ def launch_mapdl( # Setting launched property mapdl._launched = True - mapdl._env_vars = env_vars except Exception as exception: LOG.error("An error occurred when connecting to MAPDL.") @@ -2012,6 +2025,8 @@ def generate_start_parameters(args: Dict[str, Any]) -> Dict[str, Any]: start_parm["override"] = args["override"] start_parm["timeout"] = args["start_timeout"] + start_parm["launched"] = True + LOG.debug(f"Using start parameters {start_parm}") return start_parm diff --git a/src/ansys/mapdl/core/mapdl_core.py b/src/ansys/mapdl/core/mapdl_core.py index a129b5c954f..2a9feb2ef5c 100644 --- a/src/ansys/mapdl/core/mapdl_core.py +++ b/src/ansys/mapdl/core/mapdl_core.py @@ -167,6 +167,7 @@ "additional_switches", "check_parameter_names", "env_vars", + "launched", "exec_file", "finish_job_on_exit", "hostname", @@ -249,7 +250,7 @@ def __init__( self._response = None self._mode = None self._mapdl_process = None - self._launched: bool = False + self._launched: bool = start_parm.get("launched", False) self._stderr = None self._stdout = None self._file_type_for_plots = file_type_for_plots From bd4606c4e93aa3888b7ecca9b17967c1294ce825 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 18 Oct 2024 10:39:23 +0200 Subject: [PATCH 038/122] refactor: using `PYMAPDL_RUNNING_ON_HPC` as env var. Fixing bugs and tests --- doc/source/user_guide/hpc/pymapdl.rst | 2 +- doc/source/user_guide/mapdl.rst | 2 +- src/ansys/mapdl/core/launcher.py | 15 ++++++----- src/ansys/mapdl/core/mapdl_grpc.py | 11 +++++--- tests/test_launcher.py | 38 ++++++--------------------- 5 files changed, 26 insertions(+), 42 deletions(-) diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index 63d1e3ca1b5..b39a02fa8ce 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -32,7 +32,7 @@ For instance, if a SLURM job has allocated 8 nodes with 4 cores each, then PyMAPDL launches an MAPDL instance which uses 32 cores spawning across those 8 nodes. This behaviour can turn off if passing the environment variable -:envvar:`PYMAPDL_RUNNING_ON_SLURM` with ``'false'`` value +:envvar:`PYMAPDL_RUNNING_ON_HPC` with ``'false'`` value or passing the argument `detect_hpc=False` to :func:`launch_mapdl() `. diff --git a/doc/source/user_guide/mapdl.rst b/doc/source/user_guide/mapdl.rst index 1899d2f77cb..9a150478ae0 100644 --- a/doc/source/user_guide/mapdl.rst +++ b/doc/source/user_guide/mapdl.rst @@ -1190,7 +1190,7 @@ These are described in the following table: | | user@machine:~$ export PYMAPDL_MAPDL_VERSION=22.2 | | | | +---------------------------------------+----------------------------------------------------------------------------------+ -| :envvar:`PYMAPDL_RUNNING_ON_SLURM` | With this environment variable set to ``FALSE``, you can avoid | +| :envvar:`PYMAPDL_RUNNING_ON_HPC` | With this environment variable set to ``FALSE``, you can avoid | | | PyMAPDL from detecting that it is running on a SLURM HPC cluster. | +---------------------------------------+----------------------------------------------------------------------------------+ | :envvar:`PYMAPDL_MAX_MESSAGE_LENGTH` | Maximum gRPC message length. If your | diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 3ef48958a2b..9bf0fd8fb53 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -99,6 +99,7 @@ "detect_hpc", "exec_file", "force_intel" "ip", + "ip", "jobname", "launch_on_hpc", "license_server_check", @@ -1456,7 +1457,7 @@ def launch_mapdl( cleanup_on_exit=args["cleanup_on_exit"], version=args["version"] ) - if args["RUNNING_ON_HPC"] or args["launch_on_hpc"]: + if args["running_on_hpc"] or args["launch_on_hpc"]: env_vars.setdefault("ANS_MULTIPLE_NODES", "1") env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") @@ -1975,18 +1976,18 @@ def pack_arguments(locals_): def is_running_on_slurm(args: Dict[str, Any]) -> bool: - args["RUNNING_ON_HPC"] = os.environ.get("PYMAPDL_RUNNING_ON_HPC", "True") + args["running_on_hpc"] = os.environ.get("PYMAPDL_running_on_hpc", "True") - is_flag_false = args["RUNNING_ON_HPC"].lower() == "false" + is_flag_false = args["running_on_hpc"].lower() == "false" # Let's require the following env vars to exist to go into slurm mode. - args["RUNNING_ON_HPC"] = bool( + args["running_on_hpc"] = bool( args["detect_hpc"] and not is_flag_false # default is true and os.environ.get("SLURM_JOB_NAME") and os.environ.get("SLURM_JOB_ID") ) - return args["RUNNING_ON_HPC"] + return args["running_on_hpc"] def generate_start_parameters(args: Dict[str, Any]) -> Dict[str, Any]: @@ -2414,7 +2415,7 @@ def get_cpus(args: Dict[str, Any]): # Bypassing number of processors checks because VDI/VNC might have # different number of processors than the cluster compute nodes. # Also the CPUs are set in `get_slurm_options` - if args["RUNNING_ON_HPC"]: + if args["running_on_hpc"]: return # Setting number of processors @@ -2427,7 +2428,7 @@ def get_cpus(args: Dict[str, Any]): # Check the env var `PYMAPDL_NPROC` args["nproc"] = int(os.environ.get("PYMAPDL_NPROC", min_cpus)) - if not args["launch_on_hpc"] and machine_cores < int(args["nproc"]): + if not args.get("launch_on_hpc", False) and machine_cores < int(args["nproc"]): raise NotEnoughResources( f"The machine has {machine_cores} cores. PyMAPDL is asking for {args['nproc']} cores." ) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index 2c35fe22e8e..6350275eee7 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -441,7 +441,7 @@ def __init__( # Storing HPC related stuff self._jobid: int = start_parm.get("jobid") self._hostname: str = start_parm.get("hostname") - self._mapdl_on_slurm: bool = bool(self._jobid) + self._mapdl_on_hpc: bool = bool(self._jobid) self.finish_job_on_exit: bool = start_parm.get("finish_job_on_exit", True) # Queueing the stds @@ -1151,7 +1151,7 @@ def exit(self, save=False, force=False, **kwargs): # No cover: The CI is working with a single MAPDL instance self._remote_instance.delete() - if self._mapdl_on_slurm: + if self._mapdl_on_hpc and self.finish_job_on_exit: self.kill_job(self.jobid) self._log.debug(f"Job (id: {self.jobid}) has been cancel.") @@ -3738,7 +3738,12 @@ def kill_job(self, jobid: int) -> None: Popen(cmd) def __del__(self): - if self._mapdl_on_slurm and self.finish_job_on_exit: + # For some reason, some tests do not seem this attribute. + if ( + hasattr(self, "_mapdl_on_hpc") + and self._mapdl_on_hpc + and self.finish_job_on_exit + ): self.exit() else: super().__del__() diff --git a/tests/test_launcher.py b/tests/test_launcher.py index af581b00fd2..381c3179063 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -28,6 +28,7 @@ from unittest.mock import patch import warnings +import psutil import pytest from ansys.mapdl import core as pymapdl @@ -49,7 +50,7 @@ get_slurm_options, get_start_instance, get_version, - is_on_slurm, + is_running_on_slurm, launch_grpc, launch_mapdl, remove_err_files, @@ -696,14 +697,14 @@ def test_slurm_ram(monkeypatch, ram, expected, context): @pytest.mark.parametrize("slurm_job_name", ["True", "false", ""]) @pytest.mark.parametrize("slurm_job_id", ["True", "false", ""]) @pytest.mark.parametrize("detect_hpc", [True, False, None]) -def test_is_on_slurm( +def test_is_running_on_slurm( monkeypatch, slurm_env_var, slurm_job_name, slurm_job_id, detect_hpc ): - monkeypatch.setenv("PYMAPDL_RUNNING_ON_SLURM", slurm_env_var) + monkeypatch.setenv("PYMAPDL_RUNNING_ON_HPC", slurm_env_var) monkeypatch.setenv("SLURM_JOB_NAME", slurm_job_name) monkeypatch.setenv("SLURM_JOB_ID", slurm_job_id) - flag = is_on_slurm(args={"detect_hpc": detect_hpc}) + flag = is_running_on_slurm(args={"detect_hpc": detect_hpc}) if detect_hpc is not True: assert not flag @@ -881,31 +882,8 @@ def test_ip_and_start_instance( assert options["ip"] in (LOCALHOST, "0.0.0.0", "127.0.0.1") -def mycpucount(**kwargs): - return 10 # faking 10 cores - - -def test_nproc_envvar(monkeypatch): - monkeypatch.setenv("PYMAPDL_NPROC", 10) - args = launch_mapdl(_debug_no_launch=True) - assert args["nproc"] == 10 - - -@pytest.mark.parametrize("nproc", [None, 5, 9, 15]) -@patch("psutil.cpu_count", mycpucount) -def test_nproc(monkeypatch, nproc): - monkeypatch.delenv("PYMAPDL_START_INSTANCE", False) - - if nproc and nproc > mycpucount(): - with pytest.raises(NotEnoughResources): - launch_mapdl(nproc=nproc, _debug_no_launch=True) - else: - args = launch_mapdl(nproc=nproc, _debug_no_launch=True) - assert args["nproc"] == (nproc or 2) - - @patch("os.name", "nt") -@patch("psutil.cpu_count", mycpucount) +@patch("psutil.cpu_count", lambda *args, **kwargs: 10) def test_generate_mapdl_launch_command_windows(): assert os.name == "nt" # Checking mocking is properly done @@ -1137,7 +1115,7 @@ def test_get_cpus(monkeypatch, arg, env): if (arg and arg > cores_machine) or (arg is None and env and env > cores_machine): context = pytest.raises(NotEnoughResources) - args = {"nproc": arg, "ON_SLURM": False} + args = {"nproc": arg, "RUNNING_ON_HPC": False} with context: get_cpus(args) @@ -1151,6 +1129,6 @@ def test_get_cpus(monkeypatch, arg, env): @patch("psutil.cpu_count", lambda *args, **kwags: 1) def test_get_cpus_min(): - args = {"nproc": None, "ON_SLURM": False} + args = {"nproc": None, "RUNNING_ON_HPC": False} get_cpus(args) assert args["nproc"] == 1 From 75280c62bbf31787541c813ce2197992af5f6f14 Mon Sep 17 00:00:00 2001 From: pyansys-ci-bot <92810346+pyansys-ci-bot@users.noreply.github.com> Date: Fri, 18 Oct 2024 08:44:22 +0000 Subject: [PATCH 039/122] chore: adding changelog file 3497.documentation.md [dependabot-skip] --- doc/changelog.d/3497.documentation.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/changelog.d/3497.documentation.md diff --git a/doc/changelog.d/3497.documentation.md b/doc/changelog.d/3497.documentation.md new file mode 100644 index 00000000000..9034ad66062 --- /dev/null +++ b/doc/changelog.d/3497.documentation.md @@ -0,0 +1 @@ +feat: support for launching an MAPDL instance in an SLURM HPC cluster \ No newline at end of file From f0757021ee58fecea648100f368165d39e521cf1 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:30:11 +0200 Subject: [PATCH 040/122] refactor: rename to `scheduler_args` --- src/ansys/mapdl/core/launcher.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 9bf0fd8fb53..025d030d20d 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1538,7 +1538,9 @@ def launch_mapdl( if args["launch_on_hpc"]: # wrapping command if on HPC - cmd = generate_sbatch_command(cmd, sbatch_args=args.get("sbatch_args")) + cmd = generate_sbatch_command( + cmd, scheduler_args=args.get("scheduler_args") + ) try: # @@ -2602,7 +2604,7 @@ def launch_mapdl_on_cluster( additional_switches=args["additional_switches"], ) - cmd = generate_sbatch_command(cmd, sbatch_args=args.get("sbatch_args")) + cmd = generate_sbatch_command(cmd, scheduler_args=args.get("scheduler_args")) jobid = None try: @@ -2740,7 +2742,7 @@ def get_jobid(stdout: str) -> int: def generate_sbatch_command( - cmd: Union[str, List[str]], sbatch_args: Optional[Union[str, Dict[str, str]]] + cmd: Union[str, List[str]], scheduler_args: Optional[Union[str, Dict[str, str]]] ) -> List[str]: """Generate sbatch command for a given MAPDL launch command.""" @@ -2757,25 +2759,25 @@ def add_minus(arg: str): arg = f"--{arg}" return arg - if sbatch_args: - if isinstance(sbatch_args, dict): - sbatch_args = " ".join( - [f"{add_minus(key)}='{value}'" for key, value in sbatch_args.items()] + if scheduler_args: + if isinstance(scheduler_args, dict): + scheduler_args = " ".join( + [f"{add_minus(key)}='{value}'" for key, value in scheduler_args.items()] ) else: - sbatch_args = "" + scheduler_args = "" - if "wrap" in sbatch_args: + if "wrap" in scheduler_args: raise ValueError( "The sbatch argument 'wrap' is used by PyMAPDL to submit the job." "Hence you cannot use it as sbatch argument." ) - LOG.debug(f"The additional sbatch arguments are: {sbatch_args}") + LOG.debug(f"The additional sbatch arguments are: {scheduler_args}") if isinstance(cmd, list): cmd = " ".join(cmd) - cmd = ["sbatch", sbatch_args, "--wrap", f"'{cmd}'"] + cmd = ["sbatch", scheduler_args, "--wrap", f"'{cmd}'"] cmd = [each for each in cmd if bool(each)] return cmd From 31366168402cfac0aa1c48dac3fed43752882b02 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:41:57 +0200 Subject: [PATCH 041/122] fix: launching issues --- src/ansys/mapdl/core/launcher.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 025d030d20d..02c2454bac1 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -320,7 +320,7 @@ def generate_mapdl_launch_command( ram: Optional[int] = None, port: int = MAPDL_DEFAULT_PORT, additional_switches: str = "", -) -> str: +) -> List[str]: """Generate the command line to start MAPDL in gRPC mode. Parameters @@ -357,7 +357,7 @@ def generate_mapdl_launch_command( Returns ------- - str + List[str] Command """ @@ -414,11 +414,11 @@ def generate_mapdl_launch_command( command = " ".join(command_parm) LOG.debug(f"Generated command: {command}") - return command + return command_parm def launch_grpc( - cmd: str, + cmd: List[str], run_location: str = None, env_vars: Optional[Dict[str, str]] = None, ) -> subprocess.Popen: @@ -447,10 +447,11 @@ def launch_grpc( # disable all MAPDL pop-up errors: env_vars.setdefault("ANS_CMD_NODIAG", "TRUE") + cmd_string = " ".join(cmd) if "sbatch" in cmd: header = "Running an MAPDL instance on the Cluster:" shell = os.name != "nt" - cmd_ = " ".join(cmd) + cmd_ = cmd_string else: header = "Running an MAPDL instance" shell = False # To prevent shell injection @@ -460,7 +461,7 @@ def launch_grpc( "\n============" "\n============" f"{header}:\nLocation:\n{run_location}\n" - f"Command:\n{' '.join(cmd)}\n" + f"Command:\n{cmd_string}\n" f"Env vars:\n{env_vars}" "\n============" "\n============" @@ -468,7 +469,7 @@ def launch_grpc( if os.name == "nt": # getting tmp file name - tmp_inp = cmd.split()[cmd.split().index("-i") + 1] + tmp_inp = cmd_string.split()[cmd_string.split().index("-i") + 1] with open(os.path.join(run_location, tmp_inp), "w") as f: f.write("FINISH\r\n") LOG.debug(f"Writing temporary input file: {tmp_inp} with 'FINISH' command.") From 80b96d5c6b6ce4d46e9469854853e8a81a68ed9b Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:13:41 +0000 Subject: [PATCH 042/122] fix: tests --- src/ansys/mapdl/core/launcher.py | 9 +++--- tests/test_launcher.py | 53 ++++++++++++++++++++++++++++---- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 02c2454bac1..1a0be08bba1 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -411,10 +411,12 @@ def generate_mapdl_launch_command( command_parm = [ each for each in command_parm if each.strip() ] # cleaning empty args. - command = " ".join(command_parm) + + command = " ".join(command_parm[1:]).split(" ") + command.insert(0, f"{exec_file}") LOG.debug(f"Generated command: {command}") - return command_parm + return command def launch_grpc( @@ -1978,8 +1980,7 @@ def pack_arguments(locals_): def is_running_on_slurm(args: Dict[str, Any]) -> bool: - - args["running_on_hpc"] = os.environ.get("PYMAPDL_running_on_hpc", "True") + args["running_on_hpc"] = os.environ.get("PYMAPDL_RUNNING_ON_HPC", "True") is_flag_false = args["running_on_hpc"].lower() == "false" diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 381c3179063..bcb5456010d 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -724,7 +724,7 @@ def test_is_running_on_slurm( launch_mapdl( detect_hpc=detect_hpc, _debug_no_launch=True, - )["ON_SLURM"] + )["running_on_hpc"] == flag ) @@ -903,7 +903,27 @@ def test_generate_mapdl_launch_command_windows(): additional_switches=additional_switches, ) - assert f'"{exec_file}" ' in cmd + assert isinstance(cmd, list) + + assert f"{exec_file}" in cmd + assert "-j" in cmd + assert f"{jobname}" in cmd + assert "-port" in cmd + assert f"{port}" in cmd + assert "-m" in cmd + assert f"{ram*1024}" in cmd + assert "-np" in cmd + assert f"{nproc}" in cmd + assert "-grpc" in cmd + assert f"{additional_switches}" in cmd + assert "-b" in cmd + assert "-i" in cmd + assert ".__tmp__.inp" in cmd + assert "-o" in cmd + assert ".__tmp__.out" in cmd + + cmd = " ".join(cmd) + assert f"{exec_file} " in cmd assert f" -j {jobname} " in cmd assert f" -port {port} " in cmd assert f" -m {ram*1024} " in cmd @@ -933,7 +953,28 @@ def test_generate_mapdl_launch_command_linux(): additional_switches=additional_switches, ) - assert f'"{exec_file}" ' in cmd + assert isinstance(cmd, list) + + assert f"{exec_file}" in cmd + assert "-j" in cmd + assert f"{jobname}" in cmd + assert "-port" in cmd + assert f"{port}" in cmd + assert "-m" in cmd + assert f"{ram*1024}" in cmd + assert "-np" in cmd + assert f"{nproc}" in cmd + assert "-grpc" in cmd + assert f"{additional_switches}" in cmd + + assert "-b" not in cmd + assert "-i" not in cmd + assert ".__tmp__.inp" not in cmd + assert "-o" not in cmd + assert ".__tmp__.out" not in cmd + + cmd = " ".join(cmd) + assert f"{exec_file} " in cmd assert f" -j {jobname} " in cmd assert f" -port {port} " in cmd assert f" -m {ram*1024} " in cmd @@ -1084,7 +1125,7 @@ def fake_subprocess_open(*args, **kwargs): @patch("os.name", "nt") @patch("subprocess.Popen", fake_subprocess_open) def test_launch_grpc(tmpdir): - cmd = "ansys.exe -b -i my_input.inp -o my_output.inp" + cmd = "ansys.exe -b -i my_input.inp -o my_output.inp".split(" ") run_location = str(tmpdir) kwags = launch_grpc(cmd, run_location) @@ -1115,7 +1156,7 @@ def test_get_cpus(monkeypatch, arg, env): if (arg and arg > cores_machine) or (arg is None and env and env > cores_machine): context = pytest.raises(NotEnoughResources) - args = {"nproc": arg, "RUNNING_ON_HPC": False} + args = {"nproc": arg, "running_on_hpc": False} with context: get_cpus(args) @@ -1129,6 +1170,6 @@ def test_get_cpus(monkeypatch, arg, env): @patch("psutil.cpu_count", lambda *args, **kwags: 1) def test_get_cpus_min(): - args = {"nproc": None, "RUNNING_ON_HPC": False} + args = {"nproc": None, "running_on_hpc": False} get_cpus(args) assert args["nproc"] == 1 From e9ba446ec495c42a4442712678dc5ab66b98ed0d Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:56:56 +0200 Subject: [PATCH 043/122] docs: formatting changes. --- src/ansys/mapdl/core/launcher.py | 118 +++++++++++++++++-------------- 1 file changed, 63 insertions(+), 55 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 1a0be08bba1..1759b1eec95 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -327,7 +327,7 @@ def generate_mapdl_launch_command( ---------- exec_file : str, optional The location of the MAPDL executable. Will use the cached - location when left at the default ``None``. + location when left at the default :class:`None`. jobname : str, optional MAPDL jobname. Defaults to ``'file'``. @@ -337,7 +337,7 @@ def generate_mapdl_launch_command( ram : float, optional Total size in megabytes of the workspace (memory) used for the initial allocation. - The default is ``None``, in which case 2 GB (2048 MB) is used. To force a fixed size + The default is :class:`None`, in which case 2 GB (2048 MB) is used. To force a fixed size throughout the run, specify a negative number. port : int @@ -700,8 +700,8 @@ def get_start_instance(start_instance: Optional[Union[bool, str]] = None) -> boo Returns ------- bool - ``True`` when the ``PYMAPDL_START_INSTANCE`` environment variable is - true, ``False`` when PYMAPDL_START_INSTANCE is false. If unset, + :class:`True` when the ``PYMAPDL_START_INSTANCE`` environment variable is + true, :class:`False` when PYMAPDL_START_INSTANCE is false. If unset, returns ``start_instance``. Raises @@ -881,7 +881,7 @@ def set_MPI_additional_switches( exec_path : str Path to the MAPDL executable. force_intel : bool, optional - Force the usage of intelmpi. The default is ``False``. + Force the usage of intelmpi. The default is :class:`False`. Returns ------- @@ -1011,17 +1011,15 @@ def launch_mapdl( ---------- exec_file : str, optional The location of the MAPDL executable. Will use the cached - location when left at the default ``None`` and no environment + location when left at the default :class:`None` and no environment variable is set. - .. note:: + The executable path can be also set through the environment variable + :envvar:`PYMAPDL_MAPDL_EXEC`. For example: - The executable path can be also set through the environment variable - ``PYMAPDL_MAPDL_EXEC``. For example: + .. code:: console - .. code:: console - - export PYMAPDL_MAPDL_EXEC=/ansys_inc/v211/ansys/bin/mapdl + export PYMAPDL_MAPDL_EXEC=/ansys_inc/v211/ansys/bin/mapdl run_location : str, optional MAPDL working directory. Defaults to a temporary working @@ -1031,13 +1029,13 @@ def launch_mapdl( MAPDL jobname. Defaults to ``'file'``. nproc : int, optional - Number of processors. Defaults to 2. If running on an HPC cluster, + Number of processors. Defaults to ``2``. If running on an HPC cluster, this value is adjusted to the number of CPUs allocated to the job, - unless ``detect_hpc`` is set to "false". + unless the argument ``detect_hpc`` is set to ``"false"``. ram : float, optional Total size in megabytes of the workspace (memory) used for the initial - allocation. The default is ``None``, in which case 2 GB (2048 MB) is + allocation. The default is :class:`None`, in which case 2 GB (2048 MB) is used. To force a fixed size throughout the run, specify a negative number. @@ -1080,7 +1078,7 @@ def launch_mapdl( port : int Port to launch MAPDL gRPC on. Final port will be the first port available after (or including) this port. Defaults to - 50052. You can also provide this value through the environment variable + ``50052``. You can also provide this value through the environment variable :envvar:`PYMAPDL_PORT`. For instance ``PYMAPDL_PORT=50053``. However the argument (if specified) has precedence over the environment variable. If this environment variable is empty, it is as it is not set. @@ -1090,8 +1088,8 @@ def launch_mapdl( garbage collected. start_instance : bool, optional - When False, connect to an existing MAPDL instance at ``ip`` - and ``port``, which default to ip ``'127.0.0.1'`` at port 50052. + When :class:`False`, connect to an existing MAPDL instance at ``ip`` + and ``port``, which default to ip ``'127.0.0.1'`` at port ``50052``. Otherwise, launch a local instance of MAPDL. You can also provide this value through the environment variable :envvar:`PYMAPDL_START_INSTANCE`. @@ -1099,21 +1097,22 @@ def launch_mapdl( variable. If this environment variable is empty, it is as it is not set. ip : str, optional - Used only when ``start_instance`` is ``False``. If provided, - and ``start_instance`` (or its correspondent environment variable - ``PYMAPDL_START_INSTANCE``) is ``True`` then, an exception is raised. Specify the IP address of the MAPDL instance to connect to. You can also provide a hostname as an alternative to an IP address. Defaults to ``'127.0.0.1'``. + Used only when ``start_instance`` is :class:`False`. If this argument + is provided, and ``start_instance`` (or its correspondent environment + variable :envvar:`PYMAPDL_START_INSTANCE`) is :class:`True` then, an + exception is raised. You can also provide this value through the environment variable :envvar:`PYMAPDL_IP`. For instance ``PYMAPDL_IP=123.45.67.89``. However the argument (if specified) has precedence over the environment variable. If this environment variable is empty, it is as it is not set. clear_on_connect : bool, optional - Defaults to ``True``, giving you a fresh environment when + Defaults to :class:`True`, giving you a fresh environment when connecting to MAPDL. When if ``start_instance`` is specified - it defaults to ``False``. + it defaults to :class:`False`. log_apdl : str, optional Enables logging every APDL command to the local disk. This @@ -1123,45 +1122,46 @@ def launch_mapdl( ``log_apdl='pymapdl_log.txt'``). By default this is disabled. remove_temp_dir_on_exit : bool, optional - When ``run_location`` is ``None``, this launcher creates a new MAPDL + When ``run_location`` is :class:`None`, this launcher creates a new MAPDL working directory within the user temporary directory, obtainable with ``tempfile.gettempdir()``. When this parameter is - ``True``, this directory will be deleted when MAPDL is exited. Default - ``False``. + :class:`True`, this directory will be deleted when MAPDL is exited. + Default to :class:`False`. If you change the working directory, PyMAPDL does not delete the original working directory nor the new one. license_server_check : bool, optional Check if the license server is available if MAPDL fails to - start. Only available on ``mode='grpc'``. Defaults ``False``. + start. Only available on ``mode='grpc'``. Defaults :class:`False`. license_type : str, optional Enable license type selection. You can input a string for its license name (for example ``'meba'`` or ``'ansys'``) or its description ("enterprise solver" or "enterprise" respectively). You can also use legacy licenses (for example ``'aa_t_a'``) but it will - also raise a warning. If it is not used (``None``), no specific license - will be requested, being up to the license server to provide a specific - license type. Default is ``None``. + also raise a warning. If it is not used (:class:`None`), no specific + license will be requested, being up to the license server to provide a + specific license type. Default is :class:`None`. print_com : bool, optional Print the command ``/COM`` arguments to the standard output. - Default ``False``. + Default :class:`False`. add_env_vars : dict, optional The provided dictionary will be used to extend the MAPDL process environment variables. If you want to control all of the environment - variables, use the argument ``replace_env_vars``. Defaults to ``None``. + variables, use the argument ``replace_env_vars``. + Defaults to :class:`None`. replace_env_vars : dict, optional The provided dictionary will be used to replace all the MAPDL process environment variables. It replace the system environment variables which otherwise would be used in the process. To just add some environment variables to the MAPDL - process, use ``add_env_vars``. Defaults to ``None``. + process, use ``add_env_vars``. Defaults to :class:`None`. version : float, optional - Version of MAPDL to launch. If ``None``, the latest version is used. + Version of MAPDL to launch. If :class:`None`, the latest version is used. Versions can be provided as integers (i.e. ``version=222``) or floats (i.e. ``version=22.2``). To retrieve the available installed versions, use the function @@ -1172,24 +1172,26 @@ def launch_mapdl( However the argument (if specified) has precedence over the environment variable. If this environment variable is empty, it is as it is not set. - detect_hpc: bool, optional + detect_hpc : bool, optional Whether detect if PyMAPDL is running on an HPC cluster or not. Currently only SLURM clusters are supported. By detaul, it is set to true. This option can be bypassed if the environment variable - ``PYMAPDL_RUNNING_ON_HPC`` is set to "false". + :envvar:`PYMAPDL_RUNNING_ON_HPC` is set to ``"false"``. For more information visit :ref:`ref_hpc_slurm`. - launch_on_hpc: bool, optional - If ``True``, it uses the implemented scheduler (SLURM only) to launch + launch_on_hpc : bool, optional + If :class:`True`, it uses the implemented scheduler (SLURM only) to launch an MAPDL instance on the HPC. In this case you can pass the argument - 'scheduler_options' to ``launch_mapdl` to specify arguments as a - string or as a dictionary. + '`scheduler_options`' to + :func:`launch_mapdl() ` + to specify arguments as a string or as a dictionary. For more information visit :ref:`ref_hpc_slurm`. - kwargs : dict, optional + kwargs : dict, Optional These keyword arguments are interface specific or for development purposes. See Notes for more details. + scheduler_options : :class:`str`, :class:`dict` Use it to specify options to the scheduler run command. It can be a string or a dictionary with arguments and its values (both as strings). @@ -1198,14 +1200,15 @@ def launch_mapdl( set_no_abort : :class:`bool` *(Development use only)* Sets MAPDL to not abort at the first error within /BATCH mode. - Defaults to ``True``. + Defaults to :class:`True`. force_intel : :class:`bool` *(Development use only)* Forces the use of Intel message pass interface (MPI) in versions between - Ansys 2021R0 and 2022R2, where because of VPNs issues this MPI is deactivated - by default. See :ref:`vpn_issues_troubleshooting` for more information. - Defaults to ``False``. + Ansys 2021R0 and 2022R2, where because of VPNs issues this MPI is + deactivated by default. + See :ref:`vpn_issues_troubleshooting` for more information. + Defaults to :class:`False`. Returns ------- @@ -1319,8 +1322,10 @@ def launch_mapdl( Enables shared-memory parallelism. See the Parallel Processing Guide for more information. + **PyPIM** + If the environment is configured to use `PyPIM `_ - and ``start_instance`` is ``True``, then starting the instance will be delegated to PyPIM. + and ``start_instance`` is :class:`True`, then starting the instance will be delegated to PyPIM. In this event, most of the options will be ignored and the server side configuration will be used. @@ -1449,16 +1454,19 @@ def launch_mapdl( LOG.debug(f"Using additional switches {args['additional_switches']}.") - start_parm = generate_start_parameters(args) + # Delegating to PyPIM + if _HAS_PIM and exec_file is None and pypim.is_configured(): + # Start MAPDL with PyPIM if the environment is configured for it + # and the user did not pass a directive on how to launch it. + LOG.info( + "Starting MAPDL remotely. The startup configuration will be ignored." + ) - if _HAS_PIM and exec_file is None and pypim.is_configured(): - # Start MAPDL with PyPIM if the environment is configured for it - # and the user did not pass a directive on how to launch it. - LOG.info("Starting MAPDL remotely. The startup configuration will be ignored.") + return launch_remote_mapdl( + cleanup_on_exit=args["cleanup_on_exit"], version=args["version"] + ) - return launch_remote_mapdl( - cleanup_on_exit=args["cleanup_on_exit"], version=args["version"] - ) + start_parm = generate_start_parameters(args) if args["running_on_hpc"] or args["launch_on_hpc"]: env_vars.setdefault("ANS_MULTIPLE_NODES", "1") @@ -1614,7 +1622,7 @@ def launch_mapdl( def check_mode(mode: ALLOWABLE_MODES, version: ALLOWABLE_VERSION_INT): """Check if the MAPDL server mode matches the allowable version - If ``None``, the newest mode will be selected. + If :class:`None`, the newest mode will be selected. Returns a value from ``ALLOWABLE_MODES``. """ From 993f7ffa5c66ea5e7e2e7f21ed302b41370dd95b Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 18 Oct 2024 17:10:52 +0200 Subject: [PATCH 044/122] docs: more cosmetic changes. --- src/ansys/mapdl/core/launcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 1759b1eec95..d098bb2f89c 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1226,8 +1226,8 @@ def launch_mapdl( **Additional switches** These are the MAPDL switch options as of 2020R2 applicable for - running MAPDL as a service via gRPC. Excluded switches such as - ``"-j"`` either not applicable or are set via keyword arguments. + running MAPDL as a service via gRPC. Excluded switches not applicable or + are set via keyword arguments such as ``"-j"`` . \\-acc Enables the use of GPU hardware. See GPU From 03aab7e28531ab43d9afeaa51c1b4c885dd210e0 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 18 Oct 2024 17:12:04 +0200 Subject: [PATCH 045/122] tests: adding 'launch_grpc' testing. --- tests/test_launcher.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index bcb5456010d..7a36dfa1c04 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -1123,24 +1123,34 @@ def fake_subprocess_open(*args, **kwargs): @patch("os.name", "nt") +@pytest.mark.parametrize("launch_on_hpc", [None, False, True]) @patch("subprocess.Popen", fake_subprocess_open) -def test_launch_grpc(tmpdir): - cmd = "ansys.exe -b -i my_input.inp -o my_output.inp".split(" ") +def test_launch_grpc(tmpdir, launch_on_hpc): + if launch_on_hpc: + cmd = ["sbatch", "--wrap", "'ansys.exe -b -i my_input.inp -o my_output.inp'"] + else: + cmd = "ansys.exe -b -i my_input.inp -o my_output.inp".split(" ") run_location = str(tmpdir) - kwags = launch_grpc(cmd, run_location) + kwargs = launch_grpc(cmd, run_location) inp_file = os.path.join(run_location, "my_input.inp") assert os.path.exists(inp_file) with open(inp_file, "r") as fid: assert "FINISH" in fid.read() - assert cmd == kwags["cmd"] - assert not kwags["shell"] - assert "TRUE" == kwags["env"].pop("ANS_CMD_NODIAG") - assert not kwags["env"] - assert isinstance(kwags["stdin"], type(subprocess.DEVNULL)) - assert isinstance(kwags["stdout"], type(subprocess.PIPE)) - assert isinstance(kwags["stderr"], type(subprocess.PIPE)) + if launch_on_hpc: + assert "sbatch" in kwargs["cmd"] + assert "--wrap" in kwargs["cmd"] + assert " ".join(cmd) == kwargs["cmd"] + else: + assert cmd == kwargs["cmd"] + + assert not kwargs["shell"] + assert "TRUE" == kwargs["env"].pop("ANS_CMD_NODIAG") + assert not kwargs["env"] + assert isinstance(kwargs["stdin"], type(subprocess.DEVNULL)) + assert isinstance(kwargs["stdout"], type(subprocess.PIPE)) + assert isinstance(kwargs["stderr"], type(subprocess.PIPE)) @patch("psutil.cpu_count", lambda *args, **kwags: 5) @@ -1173,3 +1183,6 @@ def test_get_cpus_min(): args = {"nproc": None, "running_on_hpc": False} get_cpus(args) assert args["nproc"] == 1 + + +# def test_launch_grpc_slurm() From 22953a55b74b6409fb4ed746a817464d1ccf58d1 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 21 Oct 2024 09:59:48 +0200 Subject: [PATCH 046/122] tests: adding some unit tests --- src/ansys/mapdl/core/launcher.py | 11 ++- tests/test_launcher.py | 117 ++++++++++++++++++++++++++++++- 2 files changed, 125 insertions(+), 3 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index d098bb2f89c..f030e0df49e 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -2767,7 +2767,11 @@ def add_minus(arg: str): arg = f"-{arg}" else: arg = f"--{arg}" - return arg + elif not arg.startswith("--") and len(arg) > 2: + # missing one "-" for a long argument + arg = f"-{arg}" + + return arg if scheduler_args: if isinstance(scheduler_args, dict): @@ -2800,7 +2804,10 @@ def check_mapdl_launch_on_hpc(process: subprocess.Popen, start_parm: Dict[str, s stdout = process.stdout.read().decode() if "Submitted batch job" not in stdout: stderr = process.stderr.read().decode() - raise MapdlDidNotStart(f"PyMAPDL failed to submit the sbatch job:\n{stderr}") + raise MapdlDidNotStart( + "PyMAPDL failed to submit the sbatch job:\n" + f"stdout:\n{stdout}\nstderr:\n{stderr}" + ) jobid = get_jobid(stdout) batch_host = get_hostname_host_cluster(jobid) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 7a36dfa1c04..361b1cfc3fa 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -25,6 +25,7 @@ import os import subprocess import tempfile +from time import sleep from unittest.mock import patch import warnings @@ -33,6 +34,7 @@ from ansys.mapdl import core as pymapdl from ansys.mapdl.core.errors import ( + MapdlDidNotStart, NotEnoughResources, PortAlreadyInUseByAnMAPDLInstance, ) @@ -41,8 +43,10 @@ LOCALHOST, _is_ubuntu, _parse_ip_route, + check_mapdl_launch_on_hpc, force_smp_in_student, generate_mapdl_launch_command, + generate_sbatch_command, generate_start_parameters, get_cpus, get_exec_file, @@ -92,6 +96,27 @@ start_timeout = 30 # Seconds +def get_fake_process(message_stdout, message_stderr="", time_sleep=0): + class stdout: + def read(self): + return message_stdout.encode() + + class stderr: + def read(self): + return message_stderr.encode() + + class myprocess: + pass + + process = myprocess() + process.stdout = stdout() + process.stderr = stderr() + + sleep(time_sleep) + + return process + + @pytest.fixture def fake_local_mapdl(mapdl): """Fixture to execute asserts before and after a test is run""" @@ -1185,4 +1210,94 @@ def test_get_cpus_min(): assert args["nproc"] == 1 -# def test_launch_grpc_slurm() +@pytest.mark.parametrize( + "scheduler_args", + [None, "-N 10", {"N": 10, "nodes": 10, "-tasks": 3, "--ntask-per-node": 2}], +) +def test_generate_sbatch_command(scheduler_args): + cmd = [ + "/ansys_inc/v242/ansys/bin/ansys242", + "-j", + "myjob", + "-np", + "10", + "-m", + "1024", + "-port", + "50052", + "-my_add=switch", + ] + + cmd_post = generate_sbatch_command(cmd, scheduler_args) + + assert cmd_post[0] == "sbatch" + if scheduler_args: + if isinstance(scheduler_args, dict): + assert ( + cmd_post[1] == "-N='10' --nodes='10' --tasks='3' --ntask-per-node='2'" + ) + else: + assert cmd_post[1] == scheduler_args + + assert cmd_post[-2] == "--wrap" + assert cmd_post[-1] == f"""'{" ".join(cmd)}'""" + + +@pytest.mark.parametrize( + "scheduler_args", [None, "--wrap '/bin/bash", {"--wrap": "/bin/bash", "nodes": 10}] +) +def test_generate_sbatch_wrap_in_arg(scheduler_args): + cmd = ["/ansys_inc/v242/ansys/bin/ansys242", "-grpc"] + if scheduler_args: + context = pytest.raises( + ValueError, + match="The sbatch argument 'wrap' is used by PyMAPDL to submit the job.", + ) + else: + context = NullContext() + + with context: + cmd_post = generate_sbatch_command(cmd, scheduler_args) + + +def myfakegethostbyname(*args, **kwargs): + return "mycoolhostname" + + +def myfakegethostbynameIP(*args, **kwargs): + return "123.45.67.89" + + +@pytest.mark.parametrize( + "message_stdout, message_stderr", + [ + ["Submitted batch job 1001", ""], + ["Submission failed", "Something very bad happened"], + ], +) +@patch("socket.gethostbyname", myfakegethostbynameIP) +@patch("ansys.mapdl.core.launcher.get_hostname_host_cluster", myfakegethostbyname) +def test_check_mapdl_launch_on_hpc(message_stdout, message_stderr): + + process = get_fake_process(message_stdout, message_stderr) + + start_parm = {} + if "Submitted batch job" in message_stdout: + context = NullContext() + + else: + context = pytest.raises( + MapdlDidNotStart, + match=f"stdout:\n{message_stdout}\nstderr:\n{message_stderr}", + ) + + with context: + check_mapdl_launch_on_hpc(process, start_parm) + + if "Submitted batch job" in message_stdout: + start_parm["ip"] == "123.45.67.89" + start_parm["hostname"] == "mycoolhostname" + start_parm["jobid"] == 1001 + + +# def test_get_hostname_host_cluster() From 60bf932184c30afa614b57147304e01e15fe2371 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:09:33 +0200 Subject: [PATCH 047/122] fix: unit tests --- tests/test_launcher.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index d553320f6bf..63ea33d1d32 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -28,6 +28,7 @@ from unittest.mock import patch import warnings +import psutil import pytest from ansys.mapdl import core as pymapdl @@ -885,6 +886,7 @@ def mycpucount(**kwargs): return 10 # faking 10 cores +@patch("psutil.cpu_count", mycpucount) def test_nproc_envvar(monkeypatch): monkeypatch.setenv("PYMAPDL_NPROC", 10) args = launch_mapdl(_debug_no_launch=True) From d027edde88f8cd8fb8313637289063a4a8c11e71 Mon Sep 17 00:00:00 2001 From: pyansys-ci-bot <92810346+pyansys-ci-bot@users.noreply.github.com> Date: Mon, 21 Oct 2024 09:19:50 +0000 Subject: [PATCH 048/122] chore: adding changelog file 3466.documentation.md [dependabot-skip] --- doc/changelog.d/3466.documentation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/changelog.d/3466.documentation.md b/doc/changelog.d/3466.documentation.md index 902767602d1..7c211ed0d19 100644 --- a/doc/changelog.d/3466.documentation.md +++ b/doc/changelog.d/3466.documentation.md @@ -1 +1 @@ -feat: passing tight integration env vars to mapdl \ No newline at end of file +docs: documenting using pymapdl on clusters \ No newline at end of file From 83a1d797c627c88229108e36117ca77f38af48aa Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 21 Oct 2024 12:42:30 +0200 Subject: [PATCH 049/122] fix: adding missing import --- src/ansys/mapdl/core/misc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ansys/mapdl/core/misc.py b/src/ansys/mapdl/core/misc.py index 254c91f8a14..e4a13f011e4 100644 --- a/src/ansys/mapdl/core/misc.py +++ b/src/ansys/mapdl/core/misc.py @@ -29,6 +29,7 @@ from pathlib import Path import platform import random +import re import socket import string import tempfile From 6fb698da9d41cde3351148ab37df166c42fe2dea Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 21 Oct 2024 13:24:03 +0200 Subject: [PATCH 050/122] refactoring: `check_mapdl_launch_on_hpc` and addressing codacity issues --- src/ansys/mapdl/core/launcher.py | 79 +++++++++++++++++++++--------- src/ansys/mapdl/core/mapdl_grpc.py | 2 +- tests/test_launcher.py | 7 +-- 3 files changed, 60 insertions(+), 28 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 9a0a3914f82..79e76375150 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -2614,16 +2614,8 @@ def launch_mapdl_on_cluster( cmd=cmd, run_location=args["run_location"], env_vars=env_vars ) - def check_mapdl_launch_on_hpc(process, args): - stdout = process.stdout.read().decode() - if "Submitted batch job" not in stdout: - stderr = process.stderr.read().decode() - raise MapdlDidNotStart( - f"PyMAPDL failed to submit the sbatch job:\n{stderr}" - ) - - jobid = get_jobid(stdout) - batch_host = get_hostname_host_cluster(jobid) + jobid = check_mapdl_launch_on_hpc(process, start_parm) + get_job_info(jobid=jobid, start_parm=start_parm) except Exception as exception: LOG.error("An error occurred when launching MAPDL.") @@ -2634,11 +2626,6 @@ def check_mapdl_launch_on_hpc(process, args): raise exception - # TODO: A way to check if the job is ready. - start_parm["ip"] = batch_host - start_parm["hostname"] = batch_host - start_parm["jobid"] = jobid - if args["just_launch"]: out = [args["ip"], args["port"]] if hasattr(process, "pid"): @@ -2678,13 +2665,12 @@ def check_mapdl_launch_on_hpc(process, args): return mapdl -def get_hostname_host_cluster(job_id: int) -> str: +def get_hostname_host_cluster(job_id: int, timeout: int = 30) -> str: cmd = f"scontrol show jobid -dd {job_id}".split() LOG.debug(f"Executing the command '{cmd}'") ready = False time_start = time.time() - timeout = 30 # second counter = 0 while not ready: proc = subprocess.Popen( @@ -2699,9 +2685,9 @@ def get_hostname_host_cluster(job_id: int) -> str: host = get_hostname_from_scontrol(stdout) hostname_msg = f"The BatchHost for this job is '{host}'" except (IndexError, AttributeError): - hostname_msg = f"PyMAPDL couldn't get the BatchHost hostname" + hostname_msg = "PyMAPDL couldn't get the BatchHost hostname" raise MapdlDidNotStart( - f"The HPC job (id: {job_id}) didn't start on time. " + f"The HPC job (id: {job_id}) didn't start on time (timeout={timeout}). " f"The job state is '{state}'. " f"{hostname_msg}. " "You can check more information by issuing in your console:\n" @@ -2725,7 +2711,7 @@ def get_hostname_host_cluster(job_id: int) -> str: batchhost_ip = socket.gethostbyname(batchhost) LOG.debug(f"Batchhost IP: {batchhost_ip}") - return batchhost + return batchhost, batchhost_ip def get_jobid(stdout: str) -> int: @@ -2791,7 +2777,32 @@ def get_hostname_from_scontrol(stdout: str) -> str: return stdout.split("BatchHost=")[1].splitlines()[0] -def check_mapdl_launch_on_hpc(process: subprocess.Popen, start_parm: Dict[str, str]): +def check_mapdl_launch_on_hpc( + process: subprocess.Popen, start_parm: Dict[str, str] +) -> int: + """Check if the job is ready on the HPC + + Check if the job has been successfully submitted, and additionally, it does + retrieve the BathcHost hostname which is the IP to connect to using the gRPC + interface. + + Parameters + ---------- + process : subprocess.Popen + Process used to submit the job. The stdout is read from there. + start_parm : Dict[str, str] + To store the job ID, the BatchHost hostname and IP into. + + Returns + ------- + int : + The jobID + + Raises + ------ + MapdlDidNotStart + The job submission failed. + """ stdout = process.stdout.read().decode() if "Submitted batch job" not in stdout: stderr = process.stderr.read().decode() @@ -2800,9 +2811,29 @@ def check_mapdl_launch_on_hpc(process: subprocess.Popen, start_parm: Dict[str, s f"stdout:\n{stdout}\nstderr:\n{stderr}" ) - jobid = get_jobid(stdout) - batch_host = get_hostname_host_cluster(jobid) - batch_ip = socket.gethostbyname(batch_host) + return get_jobid(stdout) + + +def get_job_info(jobid: int, start_parm: Dict[str, str], timeout: int = 30): + """Get job info like BatchHost IP and hostname + + Get BatchHost hostname and ip and stores them in the start_parm argument + + Parameters + ---------- + jobid : int + Job ID + start_parm : Dict[str, str] + Starting parameters for MAPDL. + timeout : int + Timeout for checking if the job is ready. Default checks for + 'start_instance' key in the 'start_parm' argument, if none + is found, it passes :class:`None` to + :func:`ansys.mapdl.core.launcher.get_hostname_host_cluster`. + """ + timeout = timeout or start_parm.get("start_instance") + + batch_host, batch_ip = get_hostname_host_cluster(jobid, timeout=timeout) start_parm["ip"] = batch_ip start_parm["hostname"] = batch_host diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index fc70b0449c6..0564df9e200 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -3730,7 +3730,7 @@ def get_file_name(path): def kill_job(self, jobid: int) -> None: cmd = ["scancel", f"{jobid}"] # to ensure the job is stopped properly, let's issue the scancel twice. - for i in range(2): + for _ in range(2): Popen(cmd) def __del__(self): diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 361b1cfc3fa..cff271a8c5a 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -1258,6 +1258,7 @@ def test_generate_sbatch_wrap_in_arg(scheduler_args): with context: cmd_post = generate_sbatch_command(cmd, scheduler_args) + assert cmd[0] in cmd_post[-1] def myfakegethostbyname(*args, **kwargs): @@ -1295,9 +1296,9 @@ def test_check_mapdl_launch_on_hpc(message_stdout, message_stderr): check_mapdl_launch_on_hpc(process, start_parm) if "Submitted batch job" in message_stdout: - start_parm["ip"] == "123.45.67.89" - start_parm["hostname"] == "mycoolhostname" - start_parm["jobid"] == 1001 + assert start_parm["ip"] == "123.45.67.89" + assert start_parm["hostname"] == "mycoolhostname" + assert start_parm["jobid"] == 1001 # def test_get_hostname_host_cluster() From 524c4b4ccd2232bead55c68bbf659ebdfc28ed19 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 21 Oct 2024 16:41:58 +0200 Subject: [PATCH 051/122] fix: test --- tests/test_launcher.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index cff271a8c5a..58217e35140 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -1293,12 +1293,4 @@ def test_check_mapdl_launch_on_hpc(message_stdout, message_stderr): ) with context: - check_mapdl_launch_on_hpc(process, start_parm) - - if "Submitted batch job" in message_stdout: - assert start_parm["ip"] == "123.45.67.89" - assert start_parm["hostname"] == "mycoolhostname" - assert start_parm["jobid"] == 1001 - - -# def test_get_hostname_host_cluster() + assert check_mapdl_launch_on_hpc(process, start_parm) == 1001 From 58549fbeb28d96f0aa9e5324070ed665a1e3a8ff Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 09:39:16 +0200 Subject: [PATCH 052/122] refactor: exit method. Externalising to _exit_mapdl function. --- src/ansys/mapdl/core/mapdl_grpc.py | 58 +++++++++++++++--------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index 0564df9e200..804f0673183 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -32,7 +32,7 @@ import re import shutil import socket -from subprocess import Popen +import subprocess import tempfile import threading import time @@ -428,7 +428,7 @@ def __init__( self._subscribe_to_channel() # connect and validate to the channel - self._mapdl_process: Popen = start_parm.pop("process", None) + self._mapdl_process: subprocess.Popen = start_parm.pop("process", None) # saving for later use (for example open_gui) start_parm["port"] = port @@ -1126,36 +1126,42 @@ def exit(self, save=False, force=False, **kwargs): self._log.info("Ignoring exit due as BUILDING_GALLERY=True") return + # Actually exiting MAPDL instance self._exiting = True - - if not kwargs.pop("fake_exit", False): - # This cannot/should not be faked - if self._local: - self._cache_pids() # Recache processes - - if os.name == "nt": - self._kill_server() - self._close_process() - self._remove_lock_file(mapdl_path) - else: - self._kill_server() - + self._exit_mapdl(path=mapdl_path) self._exited = True - self._exiting = False + # Exiting HPC job + if self._mapdl_on_hpc and self.finish_job_on_exit: + self.kill_job(self.jobid) + self._log.debug(f"Job (id: {self.jobid}) has been cancel.") + + # Exiting remov if self._remote_instance: # pragma: no cover # No cover: The CI is working with a single MAPDL instance self._remote_instance.delete() - if self._mapdl_on_hpc and self.finish_job_on_exit: - self.kill_job(self.jobid) - self._log.debug(f"Job (id: {self.jobid}) has been cancel.") + self._exiting = False + # Post-kill tasks self._remove_temp_dir_on_exit(mapdl_path) if self._local and self._port in pymapdl._LOCAL_PORTS: pymapdl._LOCAL_PORTS.remove(self._port) + def _exit_mapdl(self, path: str = None) -> None: + """Exit MAPDL and remove the lock file in `path`""" + # This cannot/should not be faked + if self._local: + self._cache_pids() # Recache processes + + if os.name == "nt": + self._kill_server() + self._close_process() + self._remove_lock_file(path) + else: + self._kill_server() + def _remove_temp_dir_on_exit(self, path=None): """Removes the temporary directory created by the launcher. @@ -3730,16 +3736,8 @@ def get_file_name(path): def kill_job(self, jobid: int) -> None: cmd = ["scancel", f"{jobid}"] # to ensure the job is stopped properly, let's issue the scancel twice. - for _ in range(2): - Popen(cmd) + subprocess.Popen(cmd) def __del__(self): - # For some reason, some tests do not seem this attribute. - if ( - hasattr(self, "_mapdl_on_hpc") - and self._mapdl_on_hpc - and self.finish_job_on_exit - ): - self.exit() - else: - super().__del__() + """In case the object is deleted""" + self.exit() From 327538e05bcedb0a69b19c498068de9b55afcabd Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 09:40:08 +0200 Subject: [PATCH 053/122] fix: not running all tests. --- src/ansys/mapdl/core/helpers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/ansys/mapdl/core/helpers.py b/src/ansys/mapdl/core/helpers.py index edb29704abf..308520e5d01 100644 --- a/src/ansys/mapdl/core/helpers.py +++ b/src/ansys/mapdl/core/helpers.py @@ -32,9 +32,7 @@ def is_installed(package_name: str) -> bool: """Check if a package is installed""" - - if os.name == "nt": - package_name = package_name.replace("-", ".") + package_name = package_name.replace("-", ".") try: importlib.import_module(package_name) From d8f77a94d367bb972a1d91f94d3e424963be5fd2 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 09:41:54 +0200 Subject: [PATCH 054/122] tests: adding test to __del__. --- tests/test_mapdl.py | 47 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tests/test_mapdl.py b/tests/test_mapdl.py index 909860f461a..6cf9db41925 100644 --- a/tests/test_mapdl.py +++ b/tests/test_mapdl.py @@ -28,6 +28,7 @@ import shutil import tempfile import time +from unittest.mock import patch import grpc import numpy as np @@ -2460,3 +2461,49 @@ def test_no_flush_stored(mapdl): assert not mapdl._store_commands assert mapdl._stored_commands == [] + + +# @requires("gprc") +@pytest.mark.parametrize("ip", ["123.45.67.89", "myhostname"]) +@patch( + "ansys.mapdl.core.mapdl_grpc.MapdlGrpc._multi_connect", lambda *args, **kwargs: True +) +@patch("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._run", lambda *args, **kwargs: "") +@patch( + "ansys.mapdl.core.mapdl_grpc.MapdlGrpc._create_channel", lambda *args, **kwargs: "" +) +@patch( + "ansys.mapdl.core.mapdl_grpc.MapdlGrpc._subscribe_to_channel", + lambda *args, **kwargs: "", +) +@patch( + "ansys.mapdl.core.mapdl_grpc.MapdlGrpc._run_at_connect", lambda *args, **kwargs: "" +) +@patch("socket.gethostbyname", lambda *args, **kwargs: "123.45.67.99") +def test_ip_hostname_in_start_parm(ip): + start_parm = { + "ip": ip, + "local": False, + "set_no_abort": False, + "hostname": "myhost", + "jobid": 1001, + } + + mapdl = pymapdl.Mapdl(disable_run_at_connect=False, **start_parm) + + if ip == "myhostname": + assert mapdl.ip == "123.45.67.99" + else: + assert mapdl.ip == ip + + assert mapdl.hostname == "myhost" + assert mapdl.hostname == 1001 + + +@patch("ansys.mapdl.core.Mapdl.__init__", lambda *args, **kwargs: None) +def test_delete_mapdl_object(mapdl): + mapdl_b = pymapdl.Mapdl() + + with patch("ansys.mapdl.core.Mapdl.exit") as mock_popen: + del mapdl_b + mock_popen.assert_called_once() From 775c893173a2bc4f5d1fde55f6167dc6b93c46a9 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 13:13:33 +0200 Subject: [PATCH 055/122] refactor: patching exit to avoid raising exception. I need to fix this later better. --- src/ansys/mapdl/core/mapdl_grpc.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index 804f0673183..44423f8e454 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -1067,7 +1067,10 @@ def _threaded_heartbeat(self): except Exception: continue + # Placing logging in the exit method raises exceptions when + # this is triggered by "__del__" @protect_from(ValueError, "I/O operation on closed file.") + @protect_from(AttributeError, "'MapdlGrpc' object has no attribute '_log'") def exit(self, save=False, force=False, **kwargs): """Exit MAPDL. @@ -1090,9 +1093,10 @@ def exit(self, save=False, force=False, **kwargs): >>> mapdl.exit() """ # check if permitted to start (and hence exit) instances - self._log.debug( - f"Exiting MAPLD gRPC instance {self.ip}:{self.port} on '{self._path}'." - ) + if hasattr(self, "_log"): + self._log.debug( + f"Exiting MAPLD gRPC instance {self.ip}:{self.port} on '{self._path}'." + ) mapdl_path = self.directory # caching if self._exited is None: @@ -3741,3 +3745,6 @@ def kill_job(self, jobid: int) -> None: def __del__(self): """In case the object is deleted""" self.exit() + # Adding super call per: + # https://docs.python.org/3/reference/datamodel.html#object.__del__ + super().__del__() From 5e14addbe28df5ec96925ed21f507ed8ce0dd62b Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 13:18:05 +0200 Subject: [PATCH 056/122] refactor: not asking for version or checking exec_file path if 'launch_on_hpc' is true. --- src/ansys/mapdl/core/launcher.py | 54 +++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 79e76375150..7dafd22aa83 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -23,6 +23,7 @@ """Module for launching MAPDL locally or connecting to a remote instance with gRPC.""" import atexit +from functools import wraps import os import platform from queue import Empty, Queue @@ -61,7 +62,21 @@ import ansys.platform.instancemanagement as pypim if _HAS_ATP: - from ansys.tools.path import find_ansys, get_ansys_path, version_from_path + from ansys.tools.path import find_ansys, get_ansys_path + from ansys.tools.path import version_from_path as _version_from_path + + @wraps(_version_from_path) + def version_from_path(*args, **kwargs): + """Wrap ansys.tool.path.version_from_path to raise a warning if the + executable couldn't be found""" + if kwargs.pop("launch_on_hpc", False): + try: + return _version_from_path(*args, **kwargs) + except RuntimeError: + warnings.warn("PyMAPDL could not find the ANSYS executable. ") + else: + return _version_from_path(*args, **kwargs) + if TYPE_CHECKING: # pragma: no cover from ansys.mapdl.core.mapdl_console import MapdlConsole @@ -352,11 +367,6 @@ def generate_mapdl_launch_command( Command """ - # verify version - if _HAS_ATP: - if version_from_path("mapdl", exec_file) < 202: - raise VersionError("The MAPDL gRPC interface requires MAPDL 20.2 or later") - cpu_sw = "-np %d" % nproc if ram: @@ -1386,7 +1396,9 @@ def launch_mapdl( get_exec_file(args) - args["version"] = get_version(args["version"], exec_file) + args["version"] = get_version( + args["version"], args.get("exec_file"), launch_on_hpc=args["launch_on_hpc"] + ) if args["start_instance"]: ######################################## @@ -1404,9 +1416,8 @@ def launch_mapdl( # (as way to check if MAPDL started or not) remove_err_files(args["run_location"], args["jobname"]) - if _HAS_ATP and not args["_debug_no_launch"]: - version = version_from_path("mapdl", args["exec_file"]) - args["mode"] = check_mode(args["mode"], version) + # Check for a valid connection mode + args["mode"] = check_mode(args["mode"], args["version"]) if not args["mode"]: args["mode"] = "grpc" @@ -1510,7 +1521,7 @@ def launch_mapdl( lic_check.start() LOG.debug("Starting MAPDL") - if args["mode"] == "console": + if args["mode"] == "console": # pragma: no cover ######################################## # Launch MAPDL on console mode # ---------------------------- @@ -1610,13 +1621,16 @@ def launch_mapdl( return mapdl -def check_mode(mode: ALLOWABLE_MODES, version: ALLOWABLE_VERSION_INT): +def check_mode(mode: ALLOWABLE_MODES, version: Optional[int] = None): """Check if the MAPDL server mode matches the allowable version If :class:`None`, the newest mode will be selected. Returns a value from ``ALLOWABLE_MODES``. """ + if not version: + return mode + if isinstance(mode, str): mode = mode.lower() if mode == "grpc": @@ -2158,6 +2172,7 @@ def get_port(port: Optional[int] = None, start_instance: Optional[bool] = None) def get_version( version: Optional[Union[str, int]] = None, exec_file: Optional[str] = None, + launch_on_hpc: bool = False, ) -> Optional[int]: """Get MAPDL version @@ -2180,6 +2195,14 @@ def get_version( version = os.getenv("PYMAPDL_MAPDL_VERSION") if not version: + # verify version + if exec_file and _HAS_ATP: + version = version_from_path("mapdl", exec_file, launch_on_hpc=launch_on_hpc) + if version and version < 202: + raise VersionError( + "The MAPDL gRPC interface requires MAPDL 20.2 or later" + ) + # Early exit return @@ -2287,8 +2310,7 @@ def get_exec_file(args: Dict[str, Any]) -> None: FileNotFoundError Invalid MAPDL executable """ - - args["exec_file"] = os.getenv("PYMAPDL_MAPDL_EXEC", args.get("exec_file")) + args["exec_file"] = args.get("exec_file") or os.getenv("PYMAPDL_MAPDL_EXEC") if not args["start_instance"] and args["exec_file"] is None: # 'exec_file' is not needed if the instance is not going to be launch @@ -2318,7 +2340,9 @@ def get_exec_file(args: Dict[str, Any]) -> None: "'exec_file' argument." ) else: # verify ansys exists at this location - if not os.path.isfile(args["exec_file"]): + if not args.get("launch_on_hpc", False) and not os.path.isfile( + args["exec_file"] + ): raise FileNotFoundError( f'Invalid MAPDL executable at "{args["exec_file"]}"\n' "Enter one manually using exec_file=" From b577f6457ba34fa282eabc350693b290d92134f1 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 13:27:21 +0200 Subject: [PATCH 057/122] tests: increasing coverage --- tests/test_launcher.py | 71 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 58217e35140..611c04621d9 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -1294,3 +1294,74 @@ def test_check_mapdl_launch_on_hpc(message_stdout, message_stderr): with context: assert check_mapdl_launch_on_hpc(process, start_parm) == 1001 + + +@patch("ansys.mapdl.core.Mapdl._exit_mapdl", lambda *args, **kwargs: None) +def test_exit_job(mapdl): + # Setting to exit + mapdl._mapdl_on_hpc = True + mapdl.finish_job_on_exit = True + mapdl.remove_temp_dir_on_exit = False + + mapdl._jobid = 1001 + assert mapdl.jobid == 1001 + + with patch("subprocess.Popen") as mock_popen: + mapdl.exit(force=True) + mock_popen.assert_called_once_with(["scancel", "1001"]) + + mapdl._exited = False + + +@patch( + "ansys.tools.path.path._get_application_path", + lambda *args, **kwargs: "path/to/mapdl/executable", +) +@patch("ansys.tools.path.path._mapdl_version_from_path", lambda *args, **kwargs: 242) +def test_launch_on_hpc_found_ansys(monkeypatch): + monkeypatch.delenv("PYMAPDL_START_INSTANCE", False) + + with patch("ansys.mapdl.core.launcher.launch_grpc") as mock_launch_grpc: + mock_launch_grpc.return_value = get_fake_process("Submitted batch job 1001") + mapdl = launch_mapdl( + launch_on_hpc=True, + ) + + mock_launch_grpc.assert_called_once() + cmd = mock_launch_grpc.call_args_list[0][1]["cmd"] + env_vars = mock_launch_grpc.call_args_list[0][1]["env_vars"] + + assert "sbatch" in cmd + assert "--wrap" in cmd + assert "path/to/mapdl/executable" in cmd[-1] + assert "-grpc" in cmd[-1] + + assert env_vars.get("ANS_MULTIPLE_NODES") == "1" + assert env_vars.get("HYDRA_BOOTSTRAP") == "slurm" + + +def test_launch_on_hpc_not_found_ansys(monkeypatch): + monkeypatch.delenv("PYMAPDL_START_INSTANCE", False) + exec_file = "path/to/mapdl/v242/executable/ansys242" + with patch("ansys.mapdl.core.launcher.launch_grpc") as mock_launch_grpc: + mock_launch_grpc.return_value = get_fake_process("Submitted batch job 1001") + + with pytest.warns( + UserWarning, match="PyMAPDL could not find the ANSYS executable." + ): + mapdl = launch_mapdl( + launch_on_hpc=True, + exec_file=exec_file, + ) + + mock_launch_grpc.assert_called_once() + cmd = mock_launch_grpc.call_args_list[0][1]["cmd"] + env_vars = mock_launch_grpc.call_args_list[0][1]["env_vars"] + + assert "sbatch" in cmd + assert "--wrap" in cmd + assert exec_file in cmd[-1] + assert "-grpc" in cmd[-1] + + assert env_vars.get("ANS_MULTIPLE_NODES") == "1" + assert env_vars.get("HYDRA_BOOTSTRAP") == "slurm" From ee60582a3c708c16336a319c1493545b7f55ae15 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 17:23:44 +0200 Subject: [PATCH 058/122] test: adding stack for patching MAPDL launching. --- src/ansys/mapdl/core/misc.py | 11 +++++++++++ tests/conftest.py | 24 ++++++++++++++++++++++++ tests/test_mapdl.py | 27 ++++++--------------------- 3 files changed, 41 insertions(+), 21 deletions(-) diff --git a/src/ansys/mapdl/core/misc.py b/src/ansys/mapdl/core/misc.py index e4a13f011e4..1afad96b7de 100644 --- a/src/ansys/mapdl/core/misc.py +++ b/src/ansys/mapdl/core/misc.py @@ -872,3 +872,14 @@ def get_active_branch_name(): def only_numbers_and_dots(s): return bool(re.fullmatch(r"[0-9.]+", s)) + + +def stack(*decorators): + """Stack multiple decorators on top of each other""" + + def deco(f): + for dec in reversed(decorators): + f = dec(f) + return f + + return deco diff --git a/tests/conftest.py b/tests/conftest.py index b0434fe3087..700fcb67636 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,6 +28,7 @@ import subprocess from sys import platform import time +from unittest.mock import patch from _pytest.terminal import TerminalReporter # for terminal customization import psutil @@ -644,6 +645,29 @@ def mapdl(request, tmpdir_factory): ) +# Necessary patches to patch Mapdl launch +def func_which_returns(return_=None): + return lambda *args, **kwargs: return_ + + +# Methods to patch in MAPDL when launching +_meth_patch_MAPDL_launch = ( + # method, and its return + ("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._connect", func_which_returns(True)), + ("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._run", func_which_returns("")), + ("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._create_channel", func_which_returns("")), + ( + "ansys.mapdl.core.mapdl_grpc.MapdlGrpc._subscribe_to_channel", + func_which_returns(""), + ), + ("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._run_at_connect", func_which_returns("")), + ("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._exit_mapdl", func_which_returns(None)), + ("socket.gethostbyname", func_which_returns("123.45.67.99")), +) + +PATCH_MAPDL_START = [patch(method, ret) for method, ret in _meth_patch_MAPDL_launch] + + @pytest.fixture(scope="function") def set_env_var(request, monkeypatch): """Set an environment variable from given requests, this fixture must be used with `parametrize`""" diff --git a/tests/test_mapdl.py b/tests/test_mapdl.py index 6cf9db41925..1fe55797b4f 100644 --- a/tests/test_mapdl.py +++ b/tests/test_mapdl.py @@ -35,7 +35,7 @@ import psutil import pytest -from conftest import VALID_PORTS, has_dependency +from conftest import PATCH_MAPDL_START, VALID_PORTS, has_dependency if has_dependency("pyvista"): from pyvista import MultiBlock @@ -54,7 +54,7 @@ ) from ansys.mapdl.core.launcher import launch_mapdl from ansys.mapdl.core.mapdl_grpc import SESSION_ID_NAME -from ansys.mapdl.core.misc import random_string +from ansys.mapdl.core.misc import random_string, stack from conftest import IS_SMP, ON_CI, ON_LOCAL, QUICK_LAUNCH_SWITCHES, requires # Path to files needed for examples @@ -2463,23 +2463,8 @@ def test_no_flush_stored(mapdl): assert mapdl._stored_commands == [] -# @requires("gprc") @pytest.mark.parametrize("ip", ["123.45.67.89", "myhostname"]) -@patch( - "ansys.mapdl.core.mapdl_grpc.MapdlGrpc._multi_connect", lambda *args, **kwargs: True -) -@patch("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._run", lambda *args, **kwargs: "") -@patch( - "ansys.mapdl.core.mapdl_grpc.MapdlGrpc._create_channel", lambda *args, **kwargs: "" -) -@patch( - "ansys.mapdl.core.mapdl_grpc.MapdlGrpc._subscribe_to_channel", - lambda *args, **kwargs: "", -) -@patch( - "ansys.mapdl.core.mapdl_grpc.MapdlGrpc._run_at_connect", lambda *args, **kwargs: "" -) -@patch("socket.gethostbyname", lambda *args, **kwargs: "123.45.67.99") +@stack(*PATCH_MAPDL_START) def test_ip_hostname_in_start_parm(ip): start_parm = { "ip": ip, @@ -2497,13 +2482,13 @@ def test_ip_hostname_in_start_parm(ip): assert mapdl.ip == ip assert mapdl.hostname == "myhost" - assert mapdl.hostname == 1001 + del mapdl @patch("ansys.mapdl.core.Mapdl.__init__", lambda *args, **kwargs: None) def test_delete_mapdl_object(mapdl): mapdl_b = pymapdl.Mapdl() - with patch("ansys.mapdl.core.Mapdl.exit") as mock_popen: + with patch("ansys.mapdl.core.Mapdl.exit") as mock_exit: del mapdl_b - mock_popen.assert_called_once() + mock_exit.assert_called_once() From f7f95726a0aaf890efbac32ea80d4aa48f76ae8e Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 17:25:20 +0200 Subject: [PATCH 059/122] refactor: to allow more coverage --- src/ansys/mapdl/core/launcher.py | 71 ++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 7dafd22aa83..6488adb0fd0 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1562,7 +1562,8 @@ def launch_mapdl( ) if args["launch_on_hpc"]: - check_mapdl_launch_on_hpc(process, start_parm) + start_parm["jobid"] = check_mapdl_launch_on_hpc(process, start_parm) + get_job_info(start_parm=start_parm) else: # Local mapdl launch check check_mapdl_launch( @@ -1572,7 +1573,7 @@ def launch_mapdl( except Exception as exception: LOG.error("An error occurred when launching MAPDL.") - jobid: int = args.get("jobid", "Not found") + jobid: int = start_parm.get("jobid", "Not found") if ( args["launch_on_hpc"] @@ -1581,7 +1582,7 @@ def launch_mapdl( ): LOG.debug(f"Killing HPC job with id: {jobid}") - subprocess.Popen(["scancel", str(jobid)]) + kill_job(jobid) if args["license_server_check"]: LOG.debug("Checking license server.") @@ -1611,9 +1612,6 @@ def launch_mapdl( **start_parm, ) - # Setting launched property - mapdl._launched = True - except Exception as exception: LOG.error("An error occurred when connecting to MAPDL.") raise exception @@ -2639,14 +2637,14 @@ def launch_mapdl_on_cluster( ) jobid = check_mapdl_launch_on_hpc(process, start_parm) - get_job_info(jobid=jobid, start_parm=start_parm) + get_job_info(start_parm=start_parm, jobid=jobid) except Exception as exception: LOG.error("An error occurred when launching MAPDL.") if start_parm.get("finish_job_on_exit", True) and jobid: LOG.debug(f"Killing HPC job with id: {jobid}") - subprocess.Popen(["scancel", str(jobid)]) + kill_job(jobid) raise exception @@ -2690,26 +2688,39 @@ def launch_mapdl_on_cluster( def get_hostname_host_cluster(job_id: int, timeout: int = 30) -> str: - cmd = f"scontrol show jobid -dd {job_id}".split() - LOG.debug(f"Executing the command '{cmd}'") + options = f"show jobid -dd {job_id}".split() + LOG.debug(f"Executing the command 'scontrol {' '.join(options)}'") ready = False time_start = time.time() counter = 0 while not ready: - proc = subprocess.Popen( - cmd, - stdout=subprocess.PIPE, - ) + proc = send_scontrol(options) + stdout = proc.stdout.read().decode() + if "JobState=RUNNING" not in stdout: + counter += 1 + time.sleep(1) + if (counter % 3 + 1) == 0: # print every 3 seconds. Skipping the first. + LOG.debug("The job is not ready yet. Waiting...") + print("The job is not ready yet. Waiting...") + else: + ready = True + break + + # Exit by raising exception if time.time() > time_start + timeout: state = stdout.split("JobState=")[1].split(" ")[0] + + # Trying to get the hostname from the last valid message try: host = get_hostname_from_scontrol(stdout) hostname_msg = f"The BatchHost for this job is '{host}'" except (IndexError, AttributeError): hostname_msg = "PyMAPDL couldn't get the BatchHost hostname" + + # Raising exception raise MapdlDidNotStart( f"The HPC job (id: {job_id}) didn't start on time (timeout={timeout}). " f"The job state is '{state}'. " @@ -2718,15 +2729,6 @@ def get_hostname_host_cluster(job_id: int, timeout: int = 30) -> str: f" scontrol show jobid -dd {job_id}" ) - if "JobState=RUNNING" not in stdout: - counter += 1 - time.sleep(1) - if (counter % 3 + 1) == 0: # print every 3 seconds. Skipping the first. - LOG.debug("The job is not ready yet. Waiting...") - print("The job is not ready yet. Waiting...") - else: - ready = True - LOG.debug(f"The 'scontrol' command returned:\n{stdout}") batchhost = get_hostname_from_scontrol(stdout) LOG.debug(f"Batchhost: {batchhost}") @@ -2838,17 +2840,19 @@ def check_mapdl_launch_on_hpc( return get_jobid(stdout) -def get_job_info(jobid: int, start_parm: Dict[str, str], timeout: int = 30): +def get_job_info( + start_parm: Dict[str, str], jobid: Optional[int] = None, timeout: int = 30 +): """Get job info like BatchHost IP and hostname Get BatchHost hostname and ip and stores them in the start_parm argument Parameters ---------- - jobid : int - Job ID start_parm : Dict[str, str] Starting parameters for MAPDL. + jobid : int + Job ID timeout : int Timeout for checking if the job is ready. Default checks for 'start_instance' key in the 'start_parm' argument, if none @@ -2857,8 +2861,23 @@ def get_job_info(jobid: int, start_parm: Dict[str, str], timeout: int = 30): """ timeout = timeout or start_parm.get("start_instance") + jobid = jobid or start_parm["jobid"] + batch_host, batch_ip = get_hostname_host_cluster(jobid, timeout=timeout) start_parm["ip"] = batch_ip start_parm["hostname"] = batch_host start_parm["jobid"] = jobid + + +def kill_job(jobid: int): + """Kill SLURM job""" + subprocess.Popen(["scancel", str(jobid)]) + + +def send_scontrol(args: List[str]): + args.insert(0, "scontrol") + return subprocess.Popen( + args, + stdout=subprocess.PIPE, + ) From 7479173214fed80064f70f9d5f8a9c01c4f2a002 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 17:26:46 +0200 Subject: [PATCH 060/122] feat: avoid checking the underlying processes when running on HPC --- src/ansys/mapdl/core/mapdl_grpc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index 44423f8e454..fa5129ca226 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -402,8 +402,8 @@ def __init__( self._path: Optional[str] = start_parm.get("run_location", None) self._busy: bool = False # used to check if running a command on the server self._local: bool = ip in ["127.0.0.1", "127.0.1.1", "localhost"] - if "local" in start_parm: # pragma: no cover # allow this to be overridden - self._local: bool = start_parm["local"] + self._local: bool = start_parm.get("local", True) + self._launched: bool = start_parm.get("launched", True) self._health_response_queue: Optional["Queue"] = None self._exiting: bool = False self._exited: Optional[bool] = None @@ -441,7 +441,7 @@ def __init__( self.finish_job_on_exit: bool = start_parm.get("finish_job_on_exit", True) # Queueing the stds - if self._mapdl_process: + if not self._mapdl_on_hpc and self._mapdl_process: self._create_process_stds_queue() try: From 338e8ac64d7ef5b67579ffb461f2b1d2c31f9d1d Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 17:27:30 +0200 Subject: [PATCH 061/122] tests: increasing coverage --- tests/test_launcher.py | 174 +++++++++++++++++++++++++++++++++-------- 1 file changed, 143 insertions(+), 31 deletions(-) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 611c04621d9..9e1fef9d00d 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -63,7 +63,14 @@ update_env_vars, ) from ansys.mapdl.core.licensing import LICENSES -from conftest import ON_LOCAL, QUICK_LAUNCH_SWITCHES, NullContext, requires +from ansys.mapdl.core.misc import stack +from conftest import ( + ON_LOCAL, + PATCH_MAPDL_START, + QUICK_LAUNCH_SWITCHES, + NullContext, + requires, +) try: from ansys.tools.path import ( @@ -1318,50 +1325,155 @@ def test_exit_job(mapdl): lambda *args, **kwargs: "path/to/mapdl/executable", ) @patch("ansys.tools.path.path._mapdl_version_from_path", lambda *args, **kwargs: 242) -def test_launch_on_hpc_found_ansys(monkeypatch): +@stack(*PATCH_MAPDL_START) +@patch("ansys.mapdl.core.launcher.launch_grpc") +@patch("ansys.mapdl.core.mapdl_grpc.MapdlGrpc.kill_job") +@patch("ansys.mapdl.core.launcher.send_scontrol") +def test_launch_on_hpc_found_ansys(mck_ssctrl, mck_del, mck_launch_grpc, monkeypatch): monkeypatch.delenv("PYMAPDL_START_INSTANCE", False) - with patch("ansys.mapdl.core.launcher.launch_grpc") as mock_launch_grpc: - mock_launch_grpc.return_value = get_fake_process("Submitted batch job 1001") + mck_launch_grpc.return_value = get_fake_process("Submitted batch job 1001") + mck_ssctrl.return_value = get_fake_process( + "a long scontrol...\nJobState=RUNNING\n...\nBatchHost=myhostname\n...\nin message" + ) + + mapdl_a = launch_mapdl( + launch_on_hpc=True, + ) + mapdl_a.exit() + + mck_launch_grpc.assert_called_once() + cmd = mck_launch_grpc.call_args_list[0][1]["cmd"] + env_vars = mck_launch_grpc.call_args_list[0][1]["env_vars"] + + assert "sbatch" in cmd + assert "--wrap" in cmd + assert "path/to/mapdl/executable" in cmd[-1] + assert "-grpc" in cmd[-1] + + assert env_vars.get("ANS_MULTIPLE_NODES") == "1" + assert env_vars.get("HYDRA_BOOTSTRAP") == "slurm" + + mck_ssctrl.assert_called_once() + assert "show" in mck_ssctrl.call_args[0][0] + assert "1001" in mck_ssctrl.call_args[0][0] + + mck_del.assert_called_once() + + +@stack(*PATCH_MAPDL_START) +@patch("ansys.mapdl.core.mapdl_grpc.MapdlGrpc.kill_job") +@patch("ansys.mapdl.core.launcher.launch_grpc") +@patch("ansys.mapdl.core.launcher.send_scontrol") +def test_launch_on_hpc_not_found_ansys(mck_sc, mck_lgrpc, mck_kj, monkeypatch): + monkeypatch.delenv("PYMAPDL_START_INSTANCE", False) + exec_file = "path/to/mapdl/v242/executable/ansys242" + + mck_lgrpc.return_value = get_fake_process("Submitted batch job 1001") + mck_kj.return_value = None + mck_sc.return_value = get_fake_process( + "a long scontrol...\nJobState=RUNNING\n...\nBatchHost=myhostname\n...\nin message" + ) + + with pytest.warns( + UserWarning, match="PyMAPDL could not find the ANSYS executable." + ): mapdl = launch_mapdl( launch_on_hpc=True, + exec_file=exec_file, ) + mapdl.exit() + + mck_lgrpc.assert_called_once() + cmd = mck_lgrpc.call_args_list[0][1]["cmd"] + env_vars = mck_lgrpc.call_args_list[0][1]["env_vars"] + + assert "sbatch" in cmd + assert "--wrap" in cmd + assert exec_file in cmd[-1] + assert "-grpc" in cmd[-1] - mock_launch_grpc.assert_called_once() - cmd = mock_launch_grpc.call_args_list[0][1]["cmd"] - env_vars = mock_launch_grpc.call_args_list[0][1]["env_vars"] + assert env_vars.get("ANS_MULTIPLE_NODES") == "1" + assert env_vars.get("HYDRA_BOOTSTRAP") == "slurm" - assert "sbatch" in cmd - assert "--wrap" in cmd - assert "path/to/mapdl/executable" in cmd[-1] - assert "-grpc" in cmd[-1] + mck_sc.assert_called_once() + assert "show" in mck_sc.call_args[0][0] + assert "1001" in mck_sc.call_args[0][0] - assert env_vars.get("ANS_MULTIPLE_NODES") == "1" - assert env_vars.get("HYDRA_BOOTSTRAP") == "slurm" + mck_kj.assert_called_once() -def test_launch_on_hpc_not_found_ansys(monkeypatch): +def test_launch_on_hpc_exception_launch_mapdl(monkeypatch): monkeypatch.delenv("PYMAPDL_START_INSTANCE", False) exec_file = "path/to/mapdl/v242/executable/ansys242" + + process = get_fake_process("ERROR") + with patch("ansys.mapdl.core.launcher.launch_grpc") as mock_launch_grpc: - mock_launch_grpc.return_value = get_fake_process("Submitted batch job 1001") + with patch("ansys.mapdl.core.launcher.kill_job") as mock_popen: - with pytest.warns( - UserWarning, match="PyMAPDL could not find the ANSYS executable." - ): - mapdl = launch_mapdl( - launch_on_hpc=True, - exec_file=exec_file, - ) + mock_launch_grpc.return_value = process + + with pytest.raises( + Exception, match="PyMAPDL failed to submit the sbatch job:" + ): + mapdl = launch_mapdl( + launch_on_hpc=True, + exec_file=exec_file, + ) + + mock_launch_grpc.assert_called_once() + cmd = mock_launch_grpc.call_args_list[0][1]["cmd"] + env_vars = mock_launch_grpc.call_args_list[0][1]["env_vars"] + + assert "sbatch" in cmd + assert "--wrap" in cmd + assert exec_file in cmd[-1] + assert "-grpc" in cmd[-1] + + assert env_vars.get("ANS_MULTIPLE_NODES") == "1" + assert env_vars.get("HYDRA_BOOTSTRAP") == "slurm" + + # Popen wi + mock_popen.assert_not_called() + + +def test_launch_on_hpc_exception_successfull_sbatch(monkeypatch): + monkeypatch.delenv("PYMAPDL_START_INSTANCE", False) + exec_file = "path/to/mapdl/v242/executable/ansys242" + + def raise_exception(*args, **kwargs): + raise Exception("Fake exception when launching MAPDL") + + process_launch_grpc = get_fake_process("Submitted batch job 1001") + + process_scontrol = get_fake_process("Submitted batch job 1001") + process_scontrol.stdout.read = raise_exception + + with patch("ansys.mapdl.core.launcher.launch_grpc") as mock_launch_grpc: + with patch("ansys.mapdl.core.launcher.send_scontrol") as mock_scontrol: + with patch("ansys.mapdl.core.launcher.kill_job") as mock_kill_job: + + mock_launch_grpc.return_value = process_launch_grpc + mock_scontrol.return_value = process_scontrol + + with pytest.raises( + Exception, match="Fake exception when launching MAPDL" + ): + mapdl = launch_mapdl( + launch_on_hpc=True, + exec_file=exec_file, + ) + + mock_launch_grpc.assert_called_once() + cmd = mock_launch_grpc.call_args_list[0][1]["cmd"] + env_vars = mock_launch_grpc.call_args_list[0][1]["env_vars"] - mock_launch_grpc.assert_called_once() - cmd = mock_launch_grpc.call_args_list[0][1]["cmd"] - env_vars = mock_launch_grpc.call_args_list[0][1]["env_vars"] + mock_scontrol.assert_called_once() + args = mock_scontrol.call_args_list[0][0][0] - assert "sbatch" in cmd - assert "--wrap" in cmd - assert exec_file in cmd[-1] - assert "-grpc" in cmd[-1] + assert "show" in args + assert "jobid" in args + assert "1001" in args - assert env_vars.get("ANS_MULTIPLE_NODES") == "1" - assert env_vars.get("HYDRA_BOOTSTRAP") == "slurm" + mock_kill_job.assert_called_once() From cf45184dc72a1b189b700ebad33d71bdc6eba921 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 17:29:43 +0200 Subject: [PATCH 062/122] chore: adding coverage to default pytesting. Adding _commands for checking coverage. --- pyproject.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 480246e92e9..47aa7cf2c36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,7 +120,7 @@ pymapdl_convert_script = "ansys.mapdl.core.cli:old_pymapdl_convert_script_entry_ pymapdl = "ansys.mapdl.core.cli:main" [tool.pytest.ini_options] -addopts = "-ra -vvv --maxfail=10" +addopts = "-ra -vvv --maxfail=10 --cov=ansys.mapdl.core --cov-report=html" junit_family = "legacy" filterwarnings = [ "ignore::FutureWarning", @@ -148,8 +148,6 @@ src_paths = ["doc", "src", "tests"] [tool.coverage.run] source = ["ansys/pymapdl"] omit = [ - # omit commands - "ansys/mapdl/core/_commands/*", # ignore legacy interfaces "ansys/mapdl/core/mapdl_console.py", "ansys/mapdl/core/jupyter.py", From 715e3a7d7abefc15ca819857095c68284dbfd59f Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 19:18:12 +0200 Subject: [PATCH 063/122] fix: remote launcher --- src/ansys/mapdl/core/launcher.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index b58d598d1fa..1db551517c9 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1475,17 +1475,19 @@ def launch_mapdl( LOG.debug(f"Using additional switches {args['additional_switches']}.") - # Delegating to PyPIM - if _HAS_PIM and exec_file is None and pypim.is_configured(): - # Start MAPDL with PyPIM if the environment is configured for it - # and the user did not pass a directive on how to launch it. - LOG.info( - "Starting MAPDL remotely. The startup configuration will be ignored." - ) + ######################################## + # PyPIM connection + # ---------------- + # Delegating to PyPIM if applicable + # + if _HAS_PIM and exec_file is None and pypim.is_configured(): + # Start MAPDL with PyPIM if the environment is configured for it + # and the user did not pass a directive on how to launch it. + LOG.info("Starting MAPDL remotely. The startup configuration will be ignored.") - return launch_remote_mapdl( - cleanup_on_exit=args["cleanup_on_exit"], version=args["version"] - ) + return launch_remote_mapdl( + cleanup_on_exit=args["cleanup_on_exit"], version=args["version"] + ) start_parm = generate_start_parameters(args) From 8ae518ed8205e8f930e72e58222565daa9846c98 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 17:27:58 +0000 Subject: [PATCH 064/122] fix: raising exceptions in __del__ method --- src/ansys/mapdl/core/mapdl_grpc.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index d62b6eeeae8..e159a6cc2f6 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -3748,7 +3748,14 @@ def kill_job(self, jobid: int) -> None: def __del__(self): """In case the object is deleted""" - self.exit() + try: + self.exit() + except Exception as e: + pass + # Adding super call per: # https://docs.python.org/3/reference/datamodel.html#object.__del__ - super().__del__() + try: + super().__del__() + except Exception as e: + pass From c7b9ede802b427a00274cfa4fb989ee151a967de Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 18:03:28 +0000 Subject: [PATCH 065/122] fix: weird missing reference (import) when exiting --- src/ansys/mapdl/core/mapdl_grpc.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index e159a6cc2f6..262192e2f96 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -1097,6 +1097,8 @@ def exit(self, save=False, force=False, **kwargs): >>> mapdl.exit() """ # check if permitted to start (and hence exit) instances + from ansys.mapdl import core as pymapdl + if hasattr(self, "_log"): self._log.debug( f"Exiting MAPLD gRPC instance {self.ip}:{self.port} on '{self._path}'." @@ -1127,9 +1129,8 @@ def exit(self, save=False, force=False, **kwargs): if not get_start_instance(): self._log.info("Ignoring exit due to PYMAPDL_START_INSTANCE=False") return - # or building the gallery - from ansys.mapdl import core as pymapdl + # or building the gallery if pymapdl.BUILDING_GALLERY: self._log.info("Ignoring exit due as BUILDING_GALLERY=True") return @@ -1144,7 +1145,7 @@ def exit(self, save=False, force=False, **kwargs): self.kill_job(self.jobid) self._log.debug(f"Job (id: {self.jobid}) has been cancel.") - # Exiting remov + # Exiting remote instances if self._remote_instance: # pragma: no cover # No cover: The CI is working with a single MAPDL instance self._remote_instance.delete() From 7e12b2ea190a99f05af6de3a9d8e80f8b3f4ece8 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 20:05:02 +0200 Subject: [PATCH 066/122] chore/making sure we regress to the right state after the tests --- tests/conftest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 700fcb67636..3cd91cccbcc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -439,7 +439,8 @@ def run_before_and_after_tests( yield # this is where the testing happens assert prev == mapdl.is_local - assert not mapdl.exited + assert not mapdl.exited, "MAPDL is exited after the test. It should have not!" + assert not mapdl._mapdl_on_hpc, "Mapdl class is on HPC mode. It should not!" make_sure_not_instances_are_left_open() From 7407411c2f2b3454e18bd51b17342e1e7431df20 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 20:05:27 +0200 Subject: [PATCH 067/122] test: fix test --- tests/test_launcher.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 41150df7b36..bc1f4852970 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -1306,20 +1306,30 @@ def test_check_mapdl_launch_on_hpc(message_stdout, message_stderr): @patch("ansys.mapdl.core.Mapdl._exit_mapdl", lambda *args, **kwargs: None) -def test_exit_job(mapdl): +@patch("ansys.mapdl.core.mapdl_grpc.MapdlGrpc.kill_job") +def test_exit_job(mock_popen, mapdl): # Setting to exit mapdl._mapdl_on_hpc = True mapdl.finish_job_on_exit = True + prev_rem = mapdl.remove_temp_dir_on_exit mapdl.remove_temp_dir_on_exit = False + mock_popen.return_value = lambda *args, **kwargs: True + mapdl._jobid = 1001 assert mapdl.jobid == 1001 - with patch("subprocess.Popen") as mock_popen: - mapdl.exit(force=True) - mock_popen.assert_called_once_with(["scancel", "1001"]) + mapdl.exit(force=True) + # Returning to state + mapdl._jobid = None mapdl._exited = False + mapdl._mapdl_on_hpc = False + mapdl.finish_job_on_exit = False + mapdl.remove_temp_dir_on_exit = prev_rem + + # Checking + mock_popen.assert_called_once_with(1001) @patch( From a61f649b228adc80b685ad4954a091baed2dfe6f Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 24 Oct 2024 10:11:40 +0200 Subject: [PATCH 068/122] fix: not checking the mode --- src/ansys/mapdl/core/launcher.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 18a4cb0494d..a1d39e79ed4 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1434,8 +1434,8 @@ def launch_mapdl( # (as way to check if MAPDL started or not) remove_err_files(args["run_location"], args["jobname"]) - # Check for a valid connection mode - args["mode"] = check_mode(args["mode"], args["version"]) + # Check for a valid connection mode + args["mode"] = check_mode(args["mode"], args["version"]) if not args["mode"]: args["mode"] = "grpc" @@ -1649,7 +1649,9 @@ def check_mode(mode: ALLOWABLE_MODES, version: Optional[int] = None): Returns a value from ``ALLOWABLE_MODES``. """ - if not version: + if not mode and not version: + return "grpc" + elif not version: return mode if isinstance(mode, str): From a14da83fcdc69fc8faaceec1ec487fbb363191b0 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 24 Oct 2024 13:08:28 +0200 Subject: [PATCH 069/122] refactor: reorg ip section on init. Adding better str representation to MapdlGrpc --- src/ansys/mapdl/core/mapdl_grpc.py | 43 +++++++++++++++++------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index 262192e2f96..abc5f97f91f 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -361,28 +361,33 @@ def __init__( "If `channel` is specified, neither `port` nor `ip` can be specified." ) if ip is None: - # We use if here to avoid having ip= '' - if start_parm.get("ip"): - ip: str = start_parm.pop("ip") - else: - ip: str = "127.0.0.1" - - # port and ip are needed to setup the log - if port is None: - from ansys.mapdl.core.launcher import MAPDL_DEFAULT_PORT - - port = MAPDL_DEFAULT_PORT - - self._port: int = int(port) + ip = start_parm.pop("ip", None) or "127.0.0.1" + # setting hostname if not only_numbers_and_dots(ip): # it is a hostname self._hostname = ip ip = socket.gethostbyname(ip) + else: + # it is an IP + self._hostname = ( + "localhost" + if ip in ["127.0.0.1", "127.0.1.1", "localhost"] + else socket.gethostbyaddr(ip)[0] + ) check_valid_ip(ip) self._ip: str = ip + # port and ip are needed to setup the log + if port is None: + from ansys.mapdl.core.launcher import MAPDL_DEFAULT_PORT + + port = MAPDL_DEFAULT_PORT + + self._port: int = int(port) + start_parm["port"] = self._port # store for `open_gui` + super().__init__( loglevel=loglevel, log_apdl=log_apdl, @@ -404,7 +409,6 @@ def __init__( self._jobname: str = start_parm.get("jobname", "file") self._path: Optional[str] = start_parm.get("run_location", None) self._busy: bool = False # used to check if running a command on the server - self._local: bool = ip in ["127.0.0.1", "127.0.1.1", "localhost"] self._local: bool = start_parm.get("local", True) self._launched: bool = start_parm.get("launched", True) self._health_response_queue: Optional["Queue"] = None @@ -434,12 +438,10 @@ def __init__( self._mapdl_process: subprocess.Popen = start_parm.pop("process", None) # saving for later use (for example open_gui) - start_parm["port"] = port self._start_parm: Dict[str, Any] = start_parm # Storing HPC related stuff self._jobid: int = start_parm.get("jobid") - self._hostname: str = start_parm.get("hostname") self._mapdl_on_hpc: bool = bool(self._jobid) self.finish_job_on_exit: bool = start_parm.get("finish_job_on_exit", True) @@ -2938,9 +2940,14 @@ def __str__(self): en = stats.find("*** PrePro") product = "\n".join(stats[st:en].splitlines()[1:]).strip() - info = f"Product: {product}\n" + info = f"Mapdl\n" + info += f"-----\n" + info += f"PyMAPDL Version: {__version__}\n" + info += f"Interface: grpc\n" + info += f"Product: {product}\n" info += f"MAPDL Version: {self.version}\n" - info += f"ansys.mapdl Version: {__version__}\n" + info += f"Running on: {self.hostname}\n" + info += f" ({self.ip})" return info @supress_logging From b17ec37c3c7869fa69d4123d39bb5b93e2c3e1c1 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 24 Oct 2024 18:23:29 +0200 Subject: [PATCH 070/122] feat: avoid killing MAPDL if not `finish_job_on_exit`. Adding also a property for `finish_job_on_exit`. --- src/ansys/mapdl/core/mapdl_grpc.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index abc5f97f91f..fb5b5ceb02b 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -1026,6 +1026,12 @@ def jobid(self): This is only applicable if MAPDL is running on an HPC cluster.""" return self._jobid + @property + def mapdl_on_hpc(self): + """Returns :class:`True` if the MAPDL instance has been run using + a scheduler""" + return self._mapdl_on_hpc + @protect_grpc def _send_command(self, cmd: str, mute: bool = False) -> Optional[str]: """Send a MAPDL command and return the response as a string""" @@ -1138,14 +1144,15 @@ def exit(self, save=False, force=False, **kwargs): return # Actually exiting MAPDL instance - self._exiting = True - self._exit_mapdl(path=mapdl_path) - self._exited = True - - # Exiting HPC job - if self._mapdl_on_hpc and self.finish_job_on_exit: - self.kill_job(self.jobid) - self._log.debug(f"Job (id: {self.jobid}) has been cancel.") + if finish_job_on_exit: + self._exiting = True + self._exit_mapdl(path=mapdl_path) + self._exited = True + + # Exiting HPC job + if self._mapdl_on_hpc: + self.kill_job(self.jobid) + self._log.debug(f"Job (id: {self.jobid}) has been cancel.") # Exiting remote instances if self._remote_instance: # pragma: no cover From b8898a88f702d837bcfb55407711d86f55d05ae0 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 24 Oct 2024 18:25:14 +0200 Subject: [PATCH 071/122] feat: raising error if specifying IP when `launch_on_hpc`. --- src/ansys/mapdl/core/launcher.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index a1d39e79ed4..9cae997bd86 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -2449,6 +2449,17 @@ def pre_check_args(args): if args["exec_file"] and args["version"]: raise ValueError("Cannot specify both ``exec_file`` and ``version``.") + if args["launch_on_hpc"] and args["ip"]: + raise ValueError( + "PyMAPDL cannot ensure a specific IP will be used when launching " + "MAPDL on a cluster. Hence the 'ip' argument is not compatible. " + "If you want to connect to an already started MAPDL instance, " + "just connect normally as you would with a remote instance. " + "For example:\n\n" + ">>> mapdl = launch_mapdl(start_instance=False, ip='123.45.67.89')\n\n" + "where '123.45.67.89' is the IP of the machine where MAPDL is running." + ) + def get_cpus(args: Dict[str, Any]): """Get number of CPUs From 24cf555cf230e707c0d11172ab90570f2a333401 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 24 Oct 2024 18:29:46 +0200 Subject: [PATCH 072/122] feat: increasing grpc error handling options to 3s or 5 attempts. --- src/ansys/mapdl/core/errors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ansys/mapdl/core/errors.py b/src/ansys/mapdl/core/errors.py index 19a09e22ba6..ebbffcfa53b 100644 --- a/src/ansys/mapdl/core/errors.py +++ b/src/ansys/mapdl/core/errors.py @@ -307,9 +307,9 @@ def wrapper(*args, **kwargs): old_handler = signal.signal(signal.SIGINT, handler) # Capture gRPC exceptions - n_attempts = 3 - initial_backoff = 0.05 - multiplier_backoff = 3 + n_attempts = 5 + initial_backoff = 0.1 + multiplier_backoff = 2 i_attemps = 0 From 4614b6ebaac6d2ca3d9e68cd9cb41afd90651f56 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 24 Oct 2024 19:18:33 +0000 Subject: [PATCH 073/122] feat: renaming to scheduler_options. Using variable default start_timeout. Raise an exception if scheduler options are given, but not nproc. Fix scontrol call. --- src/ansys/mapdl/core/launcher.py | 72 ++++++++++++++++++++++---------- tests/test_launcher.py | 21 +++++----- 2 files changed, 60 insertions(+), 33 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index a1d39e79ed4..02c038b2992 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -473,8 +473,8 @@ def launch_grpc( LOG.info( "\n============" - "\n============" - f"{header}:\nLocation:\n{run_location}\n" + "\n============\n" + f"{header}\nLocation:\n{run_location}\n" f"Command:\n{cmd_string}\n" f"Env vars:\n{env_vars}" "\n============" @@ -1007,7 +1007,7 @@ def launch_mapdl( override: bool = False, loglevel: str = "ERROR", additional_switches: str = "", - start_timeout: int = 45, + start_timeout: Optional[int] = None, port: Optional[int] = None, cleanup_on_exit: bool = True, start_instance: Optional[bool] = None, @@ -1093,7 +1093,8 @@ def launch_mapdl( section for additional details. start_timeout : float, optional - Maximum allowable time to connect to the MAPDL server. + Maximum allowable time to connect to the MAPDL server. By default it is + 45 seconds, however, it is increased to 90 seconds if running on HPC. port : int Port to launch MAPDL gRPC on. Final port will be the first @@ -1572,7 +1573,7 @@ def launch_mapdl( if args["launch_on_hpc"]: # wrapping command if on HPC cmd = generate_sbatch_command( - cmd, scheduler_args=args.get("scheduler_args") + cmd, scheduler_options=args.get("scheduler_options") ) try: @@ -1887,7 +1888,8 @@ def get_value( SLURM_CPUS_PER_TASK = get_value("SLURM_CPUS_PER_TASK", kwargs) LOG.info(f"SLURM_CPUS_PER_TASK: {SLURM_CPUS_PER_TASK}") - # Set to value of the --ntasks option, if specified. See SLURM_NTASKS. Included for backwards compatibility. + # Set to value of the --ntasks option, if specified. See SLURM_NTASKS. + # Included for backwards compatibility. SLURM_NPROCS = get_value("SLURM_NPROCS", kwargs) LOG.info(f"SLURM_NPROCS: {SLURM_NPROCS}") @@ -2436,7 +2438,7 @@ def check_kwargs(args: Dict[str, Any]): raise ValueError(f"The following arguments are not recognized: {ms_}") -def pre_check_args(args): +def pre_check_args(args: dict[str, Any]): if args["start_instance"] and args["ip"] and not args["on_pool"]: raise ValueError( "When providing a value for the argument 'ip', the argument " @@ -2449,6 +2451,21 @@ def pre_check_args(args): if args["exec_file"] and args["version"]: raise ValueError("Cannot specify both ``exec_file`` and ``version``.") + # Setting timeout + if args["start_timeout"] is None: + if args["launch_on_hpc"]: + args["start_timeout"] = 90 + else: + args["start_timeout"] = 45 + + # Raising warning + if args.get("scheduler_options") and args.get("nproc", None) is None: + raise ValueError( + "PyMAPDL does not read the number of cores from the 'scheduler_options'. " + "Hence you need to specify the number of cores you want to use using " + "the argument 'nproc' in 'launch_mapdl'." + ) + def get_cpus(args: Dict[str, Any]): """Get number of CPUs @@ -2654,7 +2671,7 @@ def launch_mapdl_on_cluster( additional_switches=args["additional_switches"], ) - cmd = generate_sbatch_command(cmd, scheduler_args=args.get("scheduler_args")) + cmd = generate_sbatch_command(cmd, scheduler_options=args.get("scheduler_options")) jobid = None try: @@ -2717,8 +2734,8 @@ def launch_mapdl_on_cluster( def get_hostname_host_cluster(job_id: int, timeout: int = 30) -> str: - options = f"show jobid -dd {job_id}".split() - LOG.debug(f"Executing the command 'scontrol {' '.join(options)}'") + options = f"show jobid -dd {job_id}" + LOG.debug(f"Executing the command 'scontrol {options}'") ready = False time_start = time.time() @@ -2766,6 +2783,9 @@ def get_hostname_host_cluster(job_id: int, timeout: int = 30) -> str: batchhost_ip = socket.gethostbyname(batchhost) LOG.debug(f"Batchhost IP: {batchhost_ip}") + LOG.info( + f"Job {job_id} successfully allocated and running in '{batchhost}'({batchhost_ip})" + ) return batchhost, batchhost_ip @@ -2784,7 +2804,7 @@ def get_jobid(stdout: str) -> int: def generate_sbatch_command( - cmd: Union[str, List[str]], scheduler_args: Optional[Union[str, Dict[str, str]]] + cmd: Union[str, List[str]], scheduler_options: Optional[Union[str, Dict[str, str]]] ) -> List[str]: """Generate sbatch command for a given MAPDL launch command.""" @@ -2805,25 +2825,28 @@ def add_minus(arg: str): return arg - if scheduler_args: - if isinstance(scheduler_args, dict): - scheduler_args = " ".join( - [f"{add_minus(key)}='{value}'" for key, value in scheduler_args.items()] + if scheduler_options: + if isinstance(scheduler_options, dict): + scheduler_options = " ".join( + [ + f"{add_minus(key)}='{value}'" + for key, value in scheduler_options.items() + ] ) else: - scheduler_args = "" + scheduler_options = "" - if "wrap" in scheduler_args: + if "wrap" in scheduler_options: raise ValueError( "The sbatch argument 'wrap' is used by PyMAPDL to submit the job." "Hence you cannot use it as sbatch argument." ) - LOG.debug(f"The additional sbatch arguments are: {scheduler_args}") + LOG.debug(f"The additional sbatch arguments are: {scheduler_options}") if isinstance(cmd, list): cmd = " ".join(cmd) - cmd = ["sbatch", scheduler_args, "--wrap", f"'{cmd}'"] + cmd = ["sbatch", scheduler_options, "--wrap", f"'{cmd}'"] cmd = [each for each in cmd if bool(each)] return cmd @@ -2866,7 +2889,9 @@ def check_mapdl_launch_on_hpc( f"stdout:\n{stdout}\nstderr:\n{stderr}" ) - return get_jobid(stdout) + jobid = get_jobid(stdout) + LOG.info(f"HPC job successfully submitted. JobID: {jobid}") + return jobid def get_job_info( @@ -2904,9 +2929,10 @@ def kill_job(jobid: int): subprocess.Popen(["scancel", str(jobid)]) -def send_scontrol(args: List[str]): - args.insert(0, "scontrol") +def send_scontrol(args: str): + cmd = f"scontrol {args}".split(" ") return subprocess.Popen( - args, + cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, ) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index bc1f4852970..7d28ce36b92 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -1220,10 +1220,10 @@ def test_get_cpus_min(): @pytest.mark.parametrize( - "scheduler_args", + "scheduler_options", [None, "-N 10", {"N": 10, "nodes": 10, "-tasks": 3, "--ntask-per-node": 2}], ) -def test_generate_sbatch_command(scheduler_args): +def test_generate_sbatch_command(scheduler_options): cmd = [ "/ansys_inc/v242/ansys/bin/ansys242", "-j", @@ -1237,27 +1237,28 @@ def test_generate_sbatch_command(scheduler_args): "-my_add=switch", ] - cmd_post = generate_sbatch_command(cmd, scheduler_args) + cmd_post = generate_sbatch_command(cmd, scheduler_options) assert cmd_post[0] == "sbatch" - if scheduler_args: - if isinstance(scheduler_args, dict): + if scheduler_options: + if isinstance(scheduler_options, dict): assert ( cmd_post[1] == "-N='10' --nodes='10' --tasks='3' --ntask-per-node='2'" ) else: - assert cmd_post[1] == scheduler_args + assert cmd_post[1] == scheduler_options assert cmd_post[-2] == "--wrap" assert cmd_post[-1] == f"""'{" ".join(cmd)}'""" @pytest.mark.parametrize( - "scheduler_args", [None, "--wrap '/bin/bash", {"--wrap": "/bin/bash", "nodes": 10}] + "scheduler_options", + [None, "--wrap '/bin/bash", {"--wrap": "/bin/bash", "nodes": 10}], ) -def test_generate_sbatch_wrap_in_arg(scheduler_args): +def test_generate_sbatch_wrap_in_arg(scheduler_options): cmd = ["/ansys_inc/v242/ansys/bin/ansys242", "-grpc"] - if scheduler_args: + if scheduler_options: context = pytest.raises( ValueError, match="The sbatch argument 'wrap' is used by PyMAPDL to submit the job.", @@ -1266,7 +1267,7 @@ def test_generate_sbatch_wrap_in_arg(scheduler_args): context = NullContext() with context: - cmd_post = generate_sbatch_command(cmd, scheduler_args) + cmd_post = generate_sbatch_command(cmd, scheduler_options) assert cmd[0] in cmd_post[-1] From dc801f9a9cdd912d5c675420e4a94b79983fe6f7 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 24 Oct 2024 19:20:42 +0000 Subject: [PATCH 074/122] refactor: added types --- src/ansys/mapdl/core/mapdl_grpc.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index 262192e2f96..51725386638 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -994,36 +994,42 @@ def _run(self, cmd: str, verbose: bool = False, mute: Optional[bool] = None) -> return response.strip() @property - def busy(self): + def busy(self) -> bool: """True when MAPDL gRPC server is executing a command.""" return self._busy @property - def exiting(self): + def exiting(self) -> bool: """Returns true if the MAPDL instance is exiting.""" return self._exiting @property - def port(self): + def port(self) -> int: """Returns the MAPDL gRPC instance port.""" return self._port @property - def ip(self): + def ip(self) -> str: """Return the MAPDL gRPC instance IP.""" return self._ip @property - def hostname(self): + def hostname(self) -> str: """Return the hostname of the machine MAPDL is running in.""" return self._hostname @property - def jobid(self): + def jobid(self) -> int: """Returns the job id where the MAPDL is running in. This is only applicable if MAPDL is running on an HPC cluster.""" return self._jobid + @property + def mapdl_on_hpc(self) -> bool: + """Returns :class:`True` if the MAPDL instance has been launched using + an scheduler.""" + return self._mapdl_on_hpc + @protect_grpc def _send_command(self, cmd: str, mute: bool = False) -> Optional[str]: """Send a MAPDL command and return the response as a string""" From ffb3ea813aee12f012d369a217c627c4f9834988 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 24 Oct 2024 19:21:17 +0000 Subject: [PATCH 075/122] refactor: launcher args order --- src/ansys/mapdl/core/launcher.py | 69 ++++++++++++++------------------ 1 file changed, 29 insertions(+), 40 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 9cae997bd86..4e14086ae97 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -297,9 +297,8 @@ def get_process_at_port(port) -> Optional[psutil.Process]: """Get the process (psutil.Process) running at the given port""" for proc in psutil.process_iter(): try: - connections = proc.connections( - kind="inet" - ) # just to check if we can access the + # just to check if we can access the port + connections = proc.net_connections() except psutil.AccessDenied: continue except psutil.NoSuchProcess: @@ -559,7 +558,7 @@ def check_mapdl_launch( msg = ( str(e) + f"\nRun location: {run_location}" - + f"\nCommand line used: {cmd}\n\n" + + f"\nCommand line used: {' '.join(cmd)}\n\n" ) terminal_output = "\n".join(_get_std_output(std_queue=stdout_queue)).strip() @@ -1412,12 +1411,6 @@ def launch_mapdl( args["port"] = get_port(args["port"], args["start_instance"]) - get_exec_file(args) - - args["version"] = get_version( - args["version"], args.get("exec_file"), launch_on_hpc=args["launch_on_hpc"] - ) - if args["start_instance"]: ######################################## # Local adjustments @@ -1425,6 +1418,23 @@ def launch_mapdl( # # Only when starting MAPDL (aka Local) + get_exec_file(args) + + args["version"] = get_version( + args["version"], args.get("exec_file"), launch_on_hpc=args["launch_on_hpc"] + ) + + # Check for a valid connection mode + args["mode"] = check_mode(args["mode"], args["version"]) + + args["additional_switches"] = set_license_switch( + args["license_type"], args["additional_switches"] + ) + + env_vars: Dict[str, str] = update_env_vars( + args["add_env_vars"], args["replace_env_vars"] + ) + get_run_location(args) # verify lock file does not exist @@ -1434,22 +1444,6 @@ def launch_mapdl( # (as way to check if MAPDL started or not) remove_err_files(args["run_location"], args["jobname"]) - # Check for a valid connection mode - args["mode"] = check_mode(args["mode"], args["version"]) - - if not args["mode"]: - args["mode"] = "grpc" - - LOG.debug(f"Using mode {args['mode']}") - - args["additional_switches"] = set_license_switch( - args["license_type"], args["additional_switches"] - ) - - env_vars: Dict[str, str] = update_env_vars( - args["add_env_vars"], args["replace_env_vars"] - ) - ######################################## # Context specific launching adjustments # -------------------------------------- @@ -1697,6 +1691,7 @@ def check_mode(mode: ALLOWABLE_MODES, version: Optional[int] = None): warnings.warn("MAPDL as a service has not been tested on MAPDL < v13") mode = "console" + LOG.debug(f"Using mode {mode}") return mode @@ -2577,6 +2572,14 @@ def launch_mapdl_on_cluster( args["version"] = get_version(args["version"], exec_file) + args["mode"] = check_mode(args["mode"], args["version"]) + + args["additional_switches"] = set_license_switch( + args["license_type"], args["additional_switches"] + ) + + env_vars = update_env_vars(args["add_env_vars"], args["replace_env_vars"]) + if args["start_instance"]: ######################################## # Local adjustments @@ -2593,20 +2596,6 @@ def launch_mapdl_on_cluster( # (as way to check if MAPDL started or not) remove_err_files(args["run_location"], args["jobname"]) - if _HAS_ATP and not args["_debug_no_launch"]: - version = version_from_path("mapdl", args["exec_file"]) - args["mode"] = check_mode(args["mode"], version) - - args["mode"] = "grpc" - - LOG.debug(f"Using mode {args['mode']}") - - args["additional_switches"] = set_license_switch( - args["license_type"], args["additional_switches"] - ) - - env_vars = update_env_vars(args["add_env_vars"], args["replace_env_vars"]) - ######################################## # Context specific launching adjustments # -------------------------------------- From 519d4bbc37c0441af402bee07c46885129c69b42 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 24 Oct 2024 19:23:32 +0000 Subject: [PATCH 076/122] refactor: tests --- src/ansys/mapdl/core/mapdl_grpc.py | 2 +- tests/conftest.py | 4 ++++ tests/test_launcher.py | 5 +++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index fb5b5ceb02b..12bbdbf2774 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -1144,7 +1144,7 @@ def exit(self, save=False, force=False, **kwargs): return # Actually exiting MAPDL instance - if finish_job_on_exit: + if self.finish_job_on_exit: self._exiting = True self._exit_mapdl(path=mapdl_path) self._exited = True diff --git a/tests/conftest.py b/tests/conftest.py index 3cd91cccbcc..76dd3fc2f4b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -441,6 +441,7 @@ def run_before_and_after_tests( assert prev == mapdl.is_local assert not mapdl.exited, "MAPDL is exited after the test. It should have not!" assert not mapdl._mapdl_on_hpc, "Mapdl class is on HPC mode. It should not!" + assert mapdl.finish_job_on_exit, "Mapdl class should finish the job!" make_sure_not_instances_are_left_open() @@ -624,6 +625,8 @@ def mapdl(request, tmpdir_factory): if START_INSTANCE: mapdl._local = True mapdl._exited = False + # mapdl.finish_job_on_exit = True + assert mapdl.finish_job_on_exit mapdl.exit(save=True, force=True) assert mapdl._exited assert "MAPDL exited" in str(mapdl) @@ -664,6 +667,7 @@ def func_which_returns(return_=None): ("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._run_at_connect", func_which_returns("")), ("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._exit_mapdl", func_which_returns(None)), ("socket.gethostbyname", func_which_returns("123.45.67.99")), + ("socket.gethostbyaddr", func_which_returns("mapdlhostname")), ) PATCH_MAPDL_START = [patch(method, ret) for method, ret in _meth_patch_MAPDL_launch] diff --git a/tests/test_launcher.py b/tests/test_launcher.py index bc1f4852970..7bc30776328 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -242,6 +242,7 @@ def test_license_type_additional_switch(mapdl, license_name): assert f"-p {license_name}" in args["additional_switches"] +@stack(*PATCH_MAPDL_START) @requires("ansys-tools-path") def test_license_type_dummy(mapdl): dummy_license_type = "dummy" @@ -250,7 +251,7 @@ def test_license_type_dummy(mapdl): match="Still PyMAPDL will try to use it but in older MAPDL versions you might experience", ): launch_mapdl( - start_instance=False, + start_instance=True, port=mapdl.port + 1, additional_switches=f" -p {dummy_license_type} " + QUICK_LAUNCH_SWITCHES, start_timeout=start_timeout, @@ -1325,7 +1326,7 @@ def test_exit_job(mock_popen, mapdl): mapdl._jobid = None mapdl._exited = False mapdl._mapdl_on_hpc = False - mapdl.finish_job_on_exit = False + mapdl.finish_job_on_exit = True mapdl.remove_temp_dir_on_exit = prev_rem # Checking From 71feaadd4355521c5746b9c8933dd0d3673e2e46 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 24 Oct 2024 21:39:20 +0200 Subject: [PATCH 077/122] fix: reusing connection attr. --- src/ansys/mapdl/core/launcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index c32cd277acc..21da5432aad 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -298,7 +298,7 @@ def get_process_at_port(port) -> Optional[psutil.Process]: for proc in psutil.process_iter(): try: # just to check if we can access the port - connections = proc.net_connections() + connections = proc.connections() except psutil.AccessDenied: continue except psutil.NoSuchProcess: From 6461a2b8f2dd8fb1e5588db0614514dfade933a1 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 24 Oct 2024 19:50:52 +0000 Subject: [PATCH 078/122] fix: pass start_timeout to `get_job_info`. --- src/ansys/mapdl/core/launcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 3eeea7fdb5e..410efb97342 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1581,7 +1581,7 @@ def launch_mapdl( if args["launch_on_hpc"]: start_parm["jobid"] = check_mapdl_launch_on_hpc(process, start_parm) - get_job_info(start_parm=start_parm) + get_job_info(start_parm=start_parm, timeout=args["start_timeout"]) else: # Local mapdl launch check check_mapdl_launch( From 8fb5103b4b3272e91069250cb2438e492e684035 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Thu, 24 Oct 2024 20:05:16 +0000 Subject: [PATCH 079/122] fix: test --- src/ansys/mapdl/core/launcher.py | 22 ++++++++++------------ tests/test_launcher.py | 1 + 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 3eeea7fdb5e..ea65c4f7f00 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1404,10 +1404,10 @@ def launch_mapdl( # extracting parameters get_slurm_options(args, kwargs) - get_cpus(args) - get_start_instance_arg(args) + get_cpus(args) + get_ip(args) args["port"] = get_port(args["port"], args["start_instance"]) @@ -1425,9 +1425,6 @@ def launch_mapdl( args["version"], args.get("exec_file"), launch_on_hpc=args["launch_on_hpc"] ) - # Check for a valid connection mode - args["mode"] = check_mode(args["mode"], args["version"]) - args["additional_switches"] = set_license_switch( args["license_type"], args["additional_switches"] ) @@ -1445,6 +1442,9 @@ def launch_mapdl( # (as way to check if MAPDL started or not) remove_err_files(args["run_location"], args["jobname"]) + # Check for a valid connection mode + args["mode"] = check_mode(args["mode"], args["version"]) + ######################################## # Context specific launching adjustments # -------------------------------------- @@ -1646,13 +1646,11 @@ def check_mode(mode: ALLOWABLE_MODES, version: Optional[int] = None): """ if not mode and not version: return "grpc" - elif not version: - return mode if isinstance(mode, str): mode = mode.lower() if mode == "grpc": - if version < 211: + if version and version < 211: if version < 202 and os.name == "nt": raise VersionError( "gRPC mode requires MAPDL 2020R2 or newer " "on Windows." @@ -1663,7 +1661,7 @@ def check_mode(mode: ALLOWABLE_MODES, version: Optional[int] = None): elif mode == "console": if os.name == "nt": raise ValueError("Console mode requires Linux.") - if version >= 211: + if version and version >= 211: warnings.warn( "Console mode not recommended in MAPDL 2021R1 or newer.\n" "Recommend using gRPC mode instead." @@ -1675,9 +1673,9 @@ def check_mode(mode: ALLOWABLE_MODES, version: Optional[int] = None): ) else: # auto-select based on best version - if version >= 211: + if version and version >= 211: mode = "grpc" - elif version == 202 and os.name == "nt": + elif version and version == 202 and os.name == "nt": # Windows supports it as of 2020R2 mode = "grpc" else: @@ -1688,7 +1686,7 @@ def check_mode(mode: ALLOWABLE_MODES, version: Optional[int] = None): ) mode = "console" - if version < 130: + if version and version < 130: warnings.warn("MAPDL as a service has not been tested on MAPDL < v13") mode = "console" diff --git a/tests/test_launcher.py b/tests/test_launcher.py index df14b3d69c3..d536c31f15c 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -1334,6 +1334,7 @@ def test_exit_job(mock_popen, mapdl): mock_popen.assert_called_once_with(1001) +@requires("ansys-tools-path") @patch( "ansys.tools.path.path._get_application_path", lambda *args, **kwargs: "path/to/mapdl/executable", From 00b1faa98e66d6570433f8ec0a5209629a8b38ba Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 09:00:01 +0000 Subject: [PATCH 080/122] fix: test --- tests/conftest.py | 9 ++++++++- tests/test_mapdl.py | 7 ++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 76dd3fc2f4b..6b2d226d6df 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -667,7 +667,14 @@ def func_which_returns(return_=None): ("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._run_at_connect", func_which_returns("")), ("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._exit_mapdl", func_which_returns(None)), ("socket.gethostbyname", func_which_returns("123.45.67.99")), - ("socket.gethostbyaddr", func_which_returns("mapdlhostname")), + ( + "socket.gethostbyaddr", + func_which_returns( + [ + "mapdlhostname", + ] + ), + ), ) PATCH_MAPDL_START = [patch(method, ret) for method, ret in _meth_patch_MAPDL_launch] diff --git a/tests/test_mapdl.py b/tests/test_mapdl.py index 1fe55797b4f..8b66984aaa3 100644 --- a/tests/test_mapdl.py +++ b/tests/test_mapdl.py @@ -2470,18 +2470,19 @@ def test_ip_hostname_in_start_parm(ip): "ip": ip, "local": False, "set_no_abort": False, - "hostname": "myhost", "jobid": 1001, } - mapdl = pymapdl.Mapdl(disable_run_at_connect=False, **start_parm) + with patch("socket.gethostbyaddr") as mck_sock: + mck_sock.return_value = ("myhostname",) + mapdl = pymapdl.Mapdl(disable_run_at_connect=False, **start_parm) if ip == "myhostname": assert mapdl.ip == "123.45.67.99" else: assert mapdl.ip == ip - assert mapdl.hostname == "myhost" + assert mapdl.hostname == "myhostname" del mapdl From 64f6e98f44f0c8f62dd4dbbadac13e62d3e0f98c Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 09:11:27 +0000 Subject: [PATCH 081/122] tests: not requiring warning if on minimal since ATP is not present. --- .github/workflows/ci.yml | 2 ++ tests/test_launcher.py | 12 +++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 14e8de1f915..266a1c75d61 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -774,6 +774,8 @@ jobs: env: ON_LOCAL: true ON_UBUNTU: true + TESTING_MINIMAL: true + steps: - name: "Install Git and checkout project" diff --git a/tests/test_launcher.py b/tests/test_launcher.py index d536c31f15c..4ecf60fe157 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -68,6 +68,7 @@ ON_LOCAL, PATCH_MAPDL_START, QUICK_LAUNCH_SWITCHES, + TESTING_MINIMAL, NullContext, requires, ) @@ -1390,9 +1391,14 @@ def test_launch_on_hpc_not_found_ansys(mck_sc, mck_lgrpc, mck_kj, monkeypatch): "a long scontrol...\nJobState=RUNNING\n...\nBatchHost=myhostname\n...\nin message" ) - with pytest.warns( - UserWarning, match="PyMAPDL could not find the ANSYS executable." - ): + if TESTING_MINIMAL: + context = NullContext() + else: + context = pytest.warns( + UserWarning, match="PyMAPDL could not find the ANSYS executable." + ) + + with context: mapdl = launch_mapdl( launch_on_hpc=True, exec_file=exec_file, From 55e09fc966cb53a8d2261bf786b4b46067c9aaf3 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 09:28:58 +0000 Subject: [PATCH 082/122] feat: simplifying directory property --- src/ansys/mapdl/core/mapdl_core.py | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_core.py b/src/ansys/mapdl/core/mapdl_core.py index b5008cf7175..4e56bac2178 100644 --- a/src/ansys/mapdl/core/mapdl_core.py +++ b/src/ansys/mapdl/core/mapdl_core.py @@ -287,7 +287,6 @@ def __init__( self._krylov = None self._on_docker = None self._platform = None - self._path_cache = None # Cache self._print_com: bool = print_com # print the command /COM input. # Start_parameters @@ -505,17 +504,11 @@ def directory(self) -> str: a warning. """ # always attempt to cache the path - i = 0 - while (not self._path and i > 5) or i == 0: - try: - self._path = self.inquire("", "DIRECTORY") - except Exception as e: # pragma: no cover - logger.warning( - f"Failed to get the directory due to the following error: {e}" - ) - i += 1 - if not self._path: # pragma: no cover - time.sleep(0.1) + + try: + self._path = self.inquire("", "DIRECTORY") + except MapdlExitedError: + return self._path # os independent path format if self._path: # self.inquire might return ''. @@ -523,14 +516,10 @@ def directory(self) -> str: # new line to fix path issue, see #416 self._path = repr(self._path)[1:-1] else: # pragma: no cover - if self._path_cache: - return self._path_cache - else: - raise IOError( - f"The directory returned by /INQUIRE is not valid ('{self._path}')." - ) + raise MapdlRuntimeError( + f"The directory returned by /INQUIRE is not valid ('{self._path}')." + ) - self._path_cache = self._path # update return self._path @directory.setter @@ -538,6 +527,7 @@ def directory(self) -> str: def directory(self, path: Union[str, pathlib.Path]) -> None: """Change the directory using ``Mapdl.cwd``""" self.cwd(path) + self._path = path @property def exited(self): From 837e331cdf7636b5fe4d38c5388fdccc925c9185 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 09:31:35 +0000 Subject: [PATCH 083/122] feat: using cached version of directory. --- src/ansys/mapdl/core/mapdl_grpc.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index 217c067c3c1..b65aaeb57f1 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -1112,7 +1112,7 @@ def exit(self, save=False, force=False, **kwargs): f"Exiting MAPLD gRPC instance {self.ip}:{self.port} on '{self._path}'." ) - mapdl_path = self.directory # caching + mapdl_path = self._path # using cached version if self._exited is None: self._log.debug("'self._exited' is none.") return # Some edge cases the class object is not completely @@ -1152,7 +1152,8 @@ def exit(self, save=False, force=False, **kwargs): # Exiting HPC job if self._mapdl_on_hpc: self.kill_job(self.jobid) - self._log.debug(f"Job (id: {self.jobid}) has been cancel.") + if hasattr(self, "_log"): + self._log.debug(f"Job (id: {self.jobid}) has been cancel.") # Exiting remote instances if self._remote_instance: # pragma: no cover @@ -3764,7 +3765,7 @@ def kill_job(self, jobid: int) -> None: def __del__(self): """In case the object is deleted""" try: - self.exit() + self.exit(force=True) except Exception as e: pass From 0ad85129da7db85983a3fd8d1ed0de2f068af94e Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 09:28:58 +0000 Subject: [PATCH 084/122] feat: simplifying directory property --- src/ansys/mapdl/core/mapdl_core.py | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_core.py b/src/ansys/mapdl/core/mapdl_core.py index c380cda7823..62b7154fd6f 100644 --- a/src/ansys/mapdl/core/mapdl_core.py +++ b/src/ansys/mapdl/core/mapdl_core.py @@ -281,7 +281,6 @@ def __init__( self._krylov = None self._on_docker = None self._platform = None - self._path_cache = None # Cache self._print_com: bool = print_com # print the command /COM input. # Start_parameters @@ -499,17 +498,11 @@ def directory(self) -> str: a warning. """ # always attempt to cache the path - i = 0 - while (not self._path and i > 5) or i == 0: - try: - self._path = self.inquire("", "DIRECTORY") - except Exception as e: # pragma: no cover - logger.warning( - f"Failed to get the directory due to the following error: {e}" - ) - i += 1 - if not self._path: # pragma: no cover - time.sleep(0.1) + + try: + self._path = self.inquire("", "DIRECTORY") + except MapdlExitedError: + return self._path # os independent path format if self._path: # self.inquire might return ''. @@ -517,14 +510,10 @@ def directory(self) -> str: # new line to fix path issue, see #416 self._path = repr(self._path)[1:-1] else: # pragma: no cover - if self._path_cache: - return self._path_cache - else: - raise IOError( - f"The directory returned by /INQUIRE is not valid ('{self._path}')." - ) + raise MapdlRuntimeError( + f"The directory returned by /INQUIRE is not valid ('{self._path}')." + ) - self._path_cache = self._path # update return self._path @directory.setter @@ -532,6 +521,7 @@ def directory(self) -> str: def directory(self, path: Union[str, pathlib.Path]) -> None: """Change the directory using ``Mapdl.cwd``""" self.cwd(path) + self._path = path @property def exited(self): From 07a4cf989e86ef95d68f037ac6da37d171a9936c Mon Sep 17 00:00:00 2001 From: pyansys-ci-bot <92810346+pyansys-ci-bot@users.noreply.github.com> Date: Fri, 25 Oct 2024 09:35:22 +0000 Subject: [PATCH 085/122] chore: adding changelog file 3517.miscellaneous.md [dependabot-skip] --- doc/changelog.d/3517.miscellaneous.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/changelog.d/3517.miscellaneous.md diff --git a/doc/changelog.d/3517.miscellaneous.md b/doc/changelog.d/3517.miscellaneous.md new file mode 100644 index 00000000000..649e2025246 --- /dev/null +++ b/doc/changelog.d/3517.miscellaneous.md @@ -0,0 +1 @@ +feat: simplifying directory setter property \ No newline at end of file From 4c6d12265acde6756f1518eea1f31cab12c31354 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 11:59:39 +0200 Subject: [PATCH 086/122] test: adding test --- src/ansys/mapdl/core/mapdl_core.py | 3 +- tests/test_mapdl.py | 59 +++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_core.py b/src/ansys/mapdl/core/mapdl_core.py index 62b7154fd6f..9e9f5930077 100644 --- a/src/ansys/mapdl/core/mapdl_core.py +++ b/src/ansys/mapdl/core/mapdl_core.py @@ -502,7 +502,8 @@ def directory(self) -> str: try: self._path = self.inquire("", "DIRECTORY") except MapdlExitedError: - return self._path + # Let's return the cached path + pass # os independent path format if self._path: # self.inquire might return ''. diff --git a/tests/test_mapdl.py b/tests/test_mapdl.py index 909860f461a..5fa9ee6ee88 100644 --- a/tests/test_mapdl.py +++ b/tests/test_mapdl.py @@ -28,6 +28,7 @@ import shutil import tempfile import time +from unittest.mock import patch import grpc import numpy as np @@ -49,6 +50,7 @@ IncorrectWorkingDirectory, MapdlCommandIgnoredError, MapdlConnectionError, + MapdlExitedError, MapdlRuntimeError, ) from ansys.mapdl.core.launcher import launch_mapdl @@ -67,36 +69,26 @@ PORT1 = 50090 DEPRECATED_COMMANDS = [ + "edadapt", + "edale", "edasmp", "edbound", + "edbvis", "edbx", + "edcadapt", "edcgen", "edclist", "edcmore", "edcnstr", "edcontact", - "edcrb", - "edcurve", - "eddbl", - "eddc", - "edipart", - "edlcs", - "edmp", - "ednb", - "edndtsd", - "ednrot", - "edpart", - "edpc", - "edsp", - "edweld", - "edadapt", - "edale", - "edbvis", - "edcadapt", "edcpu", + "edcrb", "edcsc", "edcts", + "edcurve", "eddamp", + "eddbl", + "eddc", "eddrelax", "eddump", "edenergy", @@ -106,10 +98,18 @@ "edhist", "edhtime", "edint", + "edipart", "edis", + "edlcs", "edload", + "edmp", + "ednb", + "edndtsd", + "ednrot", "edopt", "edout", + "edpart", + "edpc", "edpl", "edpvel", "edrc", @@ -119,10 +119,12 @@ "edrun", "edshell", "edsolv", + "edsp", "edstart", "edterm", "edtp", "edvel", + "edweld", "edwrite", "rexport", ] @@ -2460,3 +2462,24 @@ def test_no_flush_stored(mapdl): assert not mapdl._store_commands assert mapdl._stored_commands == [] + + +def test_directory_setter(mapdl): + # Testing edge cases + prev_path = mapdl._path + + with patch( + "ansys.mapdl.core.Mapdl.inquire", side_effect=MapdlExitedError("mocked error") + ) as mck_inquire: + + assert prev_path == mapdl.directory + + mck_inquire.assert_called_once() + + mapdl._path = None + with pytest.raises( + MapdlRuntimeError, match="The directory returned by /INQUIRE is not valid" + ): + mapdl.directory + + mapdl._path = prev_path From f0a1423d75c69ca282dec367ebd200864e11029b Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 12:25:07 +0200 Subject: [PATCH 087/122] feat: caching directory in cwd --- src/ansys/mapdl/core/mapdl_extended.py | 3 +-- src/ansys/mapdl/core/mapdl_grpc.py | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_extended.py b/src/ansys/mapdl/core/mapdl_extended.py index 0e5c859eb8f..058e1aacd10 100644 --- a/src/ansys/mapdl/core/mapdl_extended.py +++ b/src/ansys/mapdl/core/mapdl_extended.py @@ -357,11 +357,10 @@ def cwd(self, *args, **kwargs): output = super().cwd(*args, mute=False, **kwargs) if output is not None: - if "*** WARNING ***" in output: + if "*** WARNING ***" in output or not self.directory: raise IncorrectWorkingDirectory( "\n" + "\n".join(output.splitlines()[1:]) ) - return output @wraps(_MapdlCore.list) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index b65aaeb57f1..c99490e240a 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -924,6 +924,11 @@ def _run_at_connect(self): self.numvar(200, mute=True) self.inquire("", "DIRECTORY") + + # Caching directory + if not self.directory: + self.directory # try again + self.show(self._file_type_for_plots) self.version # Caching version self.file_type_for_plots # Setting /show,png and caching it. From faed3403d7babd06eebdf12d555e4b6122852f80 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 12:26:16 +0200 Subject: [PATCH 088/122] refactor: mapdl patcher --- tests/conftest.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 6b2d226d6df..98f04a33f76 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -650,26 +650,29 @@ def mapdl(request, tmpdir_factory): # Necessary patches to patch Mapdl launch -def func_which_returns(return_=None): +def _returns(return_=None): return lambda *args, **kwargs: return_ # Methods to patch in MAPDL when launching +def _patch_method(method): + return "ansys.mapdl.core.mapdl_grpc.MapdlGrpc." + method + + _meth_patch_MAPDL_launch = ( # method, and its return - ("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._connect", func_which_returns(True)), - ("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._run", func_which_returns("")), - ("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._create_channel", func_which_returns("")), - ( - "ansys.mapdl.core.mapdl_grpc.MapdlGrpc._subscribe_to_channel", - func_which_returns(""), - ), - ("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._run_at_connect", func_which_returns("")), - ("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._exit_mapdl", func_which_returns(None)), - ("socket.gethostbyname", func_which_returns("123.45.67.99")), + (_patch_method("_connect"), _returns(True)), + (_patch_method("_run"), _returns("")), + (_patch_method("_create_channel"), _returns("")), + (_patch_method("inquire"), _returns("/home/simulation")), + (_patch_method("_subscribe_to_channel"), _returns("")), + (_patch_method("_run_at_connect"), _returns("")), + (_patch_method("_exit_mapdl"), _returns(None)), + # non-mapdl methods + ("socket.gethostbyname", _returns("123.45.67.99")), ( "socket.gethostbyaddr", - func_which_returns( + _returns( [ "mapdlhostname", ] From f3438b592a99ee731709095123095175fa8747ab Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 12:25:07 +0200 Subject: [PATCH 089/122] feat: caching directory in cwd --- src/ansys/mapdl/core/mapdl_extended.py | 3 +-- src/ansys/mapdl/core/mapdl_grpc.py | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_extended.py b/src/ansys/mapdl/core/mapdl_extended.py index 0e5c859eb8f..058e1aacd10 100644 --- a/src/ansys/mapdl/core/mapdl_extended.py +++ b/src/ansys/mapdl/core/mapdl_extended.py @@ -357,11 +357,10 @@ def cwd(self, *args, **kwargs): output = super().cwd(*args, mute=False, **kwargs) if output is not None: - if "*** WARNING ***" in output: + if "*** WARNING ***" in output or not self.directory: raise IncorrectWorkingDirectory( "\n" + "\n".join(output.splitlines()[1:]) ) - return output @wraps(_MapdlCore.list) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index ea0aac63d88..4da72eec3b3 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -906,6 +906,11 @@ def _run_at_connect(self): self.numvar(200, mute=True) self.inquire("", "DIRECTORY") + + # Caching directory + if not self.directory: + self.directory # try again + self.show(self._file_type_for_plots) self.version # Caching version self.file_type_for_plots # Setting /show,png and caching it. From 6c7f718457bd8ea1281d6373a69305cb70f49b88 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 12:34:04 +0200 Subject: [PATCH 090/122] feat: caching directory for sure. --- src/ansys/mapdl/core/mapdl_extended.py | 4 +++- tests/test_mapdl.py | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/ansys/mapdl/core/mapdl_extended.py b/src/ansys/mapdl/core/mapdl_extended.py index 058e1aacd10..b282b514bb2 100644 --- a/src/ansys/mapdl/core/mapdl_extended.py +++ b/src/ansys/mapdl/core/mapdl_extended.py @@ -357,10 +357,12 @@ def cwd(self, *args, **kwargs): output = super().cwd(*args, mute=False, **kwargs) if output is not None: - if "*** WARNING ***" in output or not self.directory: + if "*** WARNING ***" in output: raise IncorrectWorkingDirectory( "\n" + "\n".join(output.splitlines()[1:]) ) + + self.directory # caching return output @wraps(_MapdlCore.list) diff --git a/tests/test_mapdl.py b/tests/test_mapdl.py index 5fa9ee6ee88..f23842197a0 100644 --- a/tests/test_mapdl.py +++ b/tests/test_mapdl.py @@ -2483,3 +2483,13 @@ def test_directory_setter(mapdl): mapdl.directory mapdl._path = prev_path + + +def test_cwd_changing_directory(mapdl): + prev_path = mapdl._path + mapdl._path = None + + mapdl.cwd(prev_path) + + assert mapdl._path == prev_path + assert mapdl.directory == prev_path From d2e70be598da3dbbffd53988722f59f450d72345 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 12:48:08 +0200 Subject: [PATCH 091/122] feat: caching dir at the cwd level. --- src/ansys/mapdl/core/mapdl_core.py | 1 - src/ansys/mapdl/core/mapdl_extended.py | 14 ++++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_core.py b/src/ansys/mapdl/core/mapdl_core.py index 9e9f5930077..0971b713484 100644 --- a/src/ansys/mapdl/core/mapdl_core.py +++ b/src/ansys/mapdl/core/mapdl_core.py @@ -522,7 +522,6 @@ def directory(self) -> str: def directory(self, path: Union[str, pathlib.Path]) -> None: """Change the directory using ``Mapdl.cwd``""" self.cwd(path) - self._path = path @property def exited(self): diff --git a/src/ansys/mapdl/core/mapdl_extended.py b/src/ansys/mapdl/core/mapdl_extended.py index b282b514bb2..ad1e8ab5576 100644 --- a/src/ansys/mapdl/core/mapdl_extended.py +++ b/src/ansys/mapdl/core/mapdl_extended.py @@ -37,6 +37,7 @@ CommandDeprecated, ComponentDoesNotExits, IncorrectWorkingDirectory, + MapdlCommandIgnoredError, MapdlRuntimeError, ) from ansys.mapdl.core.mapdl_core import _MapdlCore @@ -354,15 +355,12 @@ def mpread(self, fname="", ext="", lib="", **kwargs): @wraps(_MapdlCore.cwd) def cwd(self, *args, **kwargs): """Wraps cwd.""" - output = super().cwd(*args, mute=False, **kwargs) + try: + output = super().cwd(*args, mute=False, **kwargs) + except MapdlCommandIgnoredError as e: + raise IncorrectWorkingDirectory(e.args[0]) - if output is not None: - if "*** WARNING ***" in output: - raise IncorrectWorkingDirectory( - "\n" + "\n".join(output.splitlines()[1:]) - ) - - self.directory # caching + self._path = args[0] # caching return output @wraps(_MapdlCore.list) From 393d70d6e7b4e8a77bdc4957cb9e76ec0648decd Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 13:26:12 +0200 Subject: [PATCH 092/122] feat: retry mechanism inside /INQUIRE --- src/ansys/mapdl/core/mapdl_extended.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/ansys/mapdl/core/mapdl_extended.py b/src/ansys/mapdl/core/mapdl_extended.py index ad1e8ab5576..f7af20a0b56 100644 --- a/src/ansys/mapdl/core/mapdl_extended.py +++ b/src/ansys/mapdl/core/mapdl_extended.py @@ -1422,8 +1422,16 @@ def inquire(self, strarray="", func="", arg1="", arg2=""): raise ValueError( f"The arguments (strarray='{strarray}', func='{func}') are not valid." ) + response = "" + n_try = 3 + i_try = 0 + while i_try < n_try and not response: + response = self.run(f"/INQUIRE,{strarray},{func},{arg1},{arg2}", mute=False) + i_try += 1 + else: + if not response: + raise MapdlRuntimeError("/INQUIRE command didn't return a response.") - response = self.run(f"/INQUIRE,{strarray},{func},{arg1},{arg2}", mute=False) if func.upper() in [ "ENV", "TITLE", From 51f528b86b1e34a9dcbdca9537dc0577ff9d5477 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 13:27:14 +0200 Subject: [PATCH 093/122] feat: changing exception message --- src/ansys/mapdl/core/mapdl_core.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_core.py b/src/ansys/mapdl/core/mapdl_core.py index 0971b713484..3bf1fe8a385 100644 --- a/src/ansys/mapdl/core/mapdl_core.py +++ b/src/ansys/mapdl/core/mapdl_core.py @@ -497,8 +497,7 @@ def directory(self) -> str: accessible, ``cwd`` (:func:`MapdlBase.cwd`) will raise a warning. """ - # always attempt to cache the path - + # Inside inquire there is already a retry mechanisim try: self._path = self.inquire("", "DIRECTORY") except MapdlExitedError: @@ -510,9 +509,9 @@ def directory(self) -> str: self._path = self._path.replace("\\", "/") # new line to fix path issue, see #416 self._path = repr(self._path)[1:-1] - else: # pragma: no cover + else: raise MapdlRuntimeError( - f"The directory returned by /INQUIRE is not valid ('{self._path}')." + f"MAPDL could provide a path using /INQUIRE or the cached path ('{self._path}')." ) return self._path From bc7a0055ef3e6e47606d5e6f597779006f380344 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 13:28:11 +0200 Subject: [PATCH 094/122] feat: adding tests --- tests/test_mapdl.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_mapdl.py b/tests/test_mapdl.py index f23842197a0..4f5be64dad7 100644 --- a/tests/test_mapdl.py +++ b/tests/test_mapdl.py @@ -2476,9 +2476,10 @@ def test_directory_setter(mapdl): mck_inquire.assert_called_once() - mapdl._path = None + mapdl._path = "" with pytest.raises( - MapdlRuntimeError, match="The directory returned by /INQUIRE is not valid" + MapdlRuntimeError, + match="MAPDL could provide a path using /INQUIRE or the cached path", ): mapdl.directory From 87711b78c9dcf4bbfc8c0ab6dbe72d6c2c1ed3b7 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 13:30:23 +0200 Subject: [PATCH 095/122] feat: caching directory --- src/ansys/mapdl/core/mapdl_grpc.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index 4da72eec3b3..120ba7b804f 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -905,11 +905,8 @@ def _run_at_connect(self): with self.run_as_routine("POST26"): self.numvar(200, mute=True) - self.inquire("", "DIRECTORY") - # Caching directory - if not self.directory: - self.directory # try again + self.directory self.show(self._file_type_for_plots) self.version # Caching version From b2faf9342a7484fecddc03fcd53800aadd87b235 Mon Sep 17 00:00:00 2001 From: pyansys-ci-bot <92810346+pyansys-ci-bot@users.noreply.github.com> Date: Fri, 25 Oct 2024 11:36:14 +0000 Subject: [PATCH 096/122] chore: adding changelog file 3517.added.md [dependabot-skip] --- doc/changelog.d/3517.added.md | 1 + doc/changelog.d/3517.miscellaneous.md | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 doc/changelog.d/3517.added.md delete mode 100644 doc/changelog.d/3517.miscellaneous.md diff --git a/doc/changelog.d/3517.added.md b/doc/changelog.d/3517.added.md new file mode 100644 index 00000000000..d245230f949 --- /dev/null +++ b/doc/changelog.d/3517.added.md @@ -0,0 +1 @@ +refactor: simplifying directory setter property \ No newline at end of file diff --git a/doc/changelog.d/3517.miscellaneous.md b/doc/changelog.d/3517.miscellaneous.md deleted file mode 100644 index 649e2025246..00000000000 --- a/doc/changelog.d/3517.miscellaneous.md +++ /dev/null @@ -1 +0,0 @@ -feat: simplifying directory setter property \ No newline at end of file From f29405888ae67e5a53975e488ef2c438998dd7f0 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 13:36:23 +0200 Subject: [PATCH 097/122] refactor: avoid else in while. --- src/ansys/mapdl/core/mapdl_extended.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_extended.py b/src/ansys/mapdl/core/mapdl_extended.py index f7af20a0b56..b2834c5d46e 100644 --- a/src/ansys/mapdl/core/mapdl_extended.py +++ b/src/ansys/mapdl/core/mapdl_extended.py @@ -1428,9 +1428,9 @@ def inquire(self, strarray="", func="", arg1="", arg2=""): while i_try < n_try and not response: response = self.run(f"/INQUIRE,{strarray},{func},{arg1},{arg2}", mute=False) i_try += 1 - else: - if not response: - raise MapdlRuntimeError("/INQUIRE command didn't return a response.") + + if not response: + raise MapdlRuntimeError("/INQUIRE command didn't return a response.") if func.upper() in [ "ENV", From 6811cc2351c0f2f32b3f798e79889e4e57a13d59 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 14:08:02 +0200 Subject: [PATCH 098/122] refactor: using a temporary variable to avoid overwrite self._path Raise error if empty response only if non_interactive mode. --- src/ansys/mapdl/core/mapdl_core.py | 13 ++++++++----- src/ansys/mapdl/core/mapdl_extended.py | 19 +++++++++++-------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_core.py b/src/ansys/mapdl/core/mapdl_core.py index 3bf1fe8a385..6c47400cb89 100644 --- a/src/ansys/mapdl/core/mapdl_core.py +++ b/src/ansys/mapdl/core/mapdl_core.py @@ -498,18 +498,21 @@ def directory(self) -> str: a warning. """ # Inside inquire there is already a retry mechanisim + path = None try: - self._path = self.inquire("", "DIRECTORY") + path = self.inquire("", "DIRECTORY") except MapdlExitedError: # Let's return the cached path pass # os independent path format - if self._path: # self.inquire might return ''. - self._path = self._path.replace("\\", "/") + if path: # self.inquire might return ''. + path = path.replace("\\", "/") # new line to fix path issue, see #416 - self._path = repr(self._path)[1:-1] - else: + path = repr(path)[1:-1] + self._path = path + + elif not self._path: raise MapdlRuntimeError( f"MAPDL could provide a path using /INQUIRE or the cached path ('{self._path}')." ) diff --git a/src/ansys/mapdl/core/mapdl_extended.py b/src/ansys/mapdl/core/mapdl_extended.py index b2834c5d46e..92ea2f89aa5 100644 --- a/src/ansys/mapdl/core/mapdl_extended.py +++ b/src/ansys/mapdl/core/mapdl_extended.py @@ -1430,15 +1430,18 @@ def inquire(self, strarray="", func="", arg1="", arg2=""): i_try += 1 if not response: - raise MapdlRuntimeError("/INQUIRE command didn't return a response.") - - if func.upper() in [ - "ENV", - "TITLE", - ]: # the output is multiline, we just need the last line. - response = response.splitlines()[-1] + if not self._store_commands: + raise MapdlRuntimeError("/INQUIRE command didn't return a response.") + else: + if func.upper() in [ + "ENV", + "TITLE", + ]: # the output is multiline, we just need the last line. + response = response.splitlines()[-1] + else: + response = response.split("=")[1].strip() - return response.split("=")[1].strip() + return response @wraps(_MapdlCore.parres) def parres(self, lab="", fname="", ext="", **kwargs): From 4b9648f54b2786ce2d72341973236060735786bc Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 14:26:59 +0200 Subject: [PATCH 099/122] fix: not keeping state between tests --- tests/conftest.py | 1 + tests/test_mapdl.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index b0434fe3087..703bd3a36da 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -439,6 +439,7 @@ def run_before_and_after_tests( assert prev == mapdl.is_local assert not mapdl.exited + assert not mapdl.ignore_errors make_sure_not_instances_are_left_open() diff --git a/tests/test_mapdl.py b/tests/test_mapdl.py index 4f5be64dad7..506c0bbabea 100644 --- a/tests/test_mapdl.py +++ b/tests/test_mapdl.py @@ -469,7 +469,7 @@ def test_error(mapdl): mapdl.a(0, 0, 0, 0) -def test_ignore_error(mapdl): +def test_ignore_errors(mapdl): mapdl.ignore_errors = False assert not mapdl.ignore_errors mapdl.ignore_errors = True @@ -480,8 +480,8 @@ def test_ignore_error(mapdl): out = mapdl._run("A, 0, 0, 0") assert "*** ERROR ***" in out - mapdl.ignore_error = False - assert mapdl.ignore_error is False + mapdl.ignore_errors = False + assert mapdl.ignore_errors is False @requires("grpc") From ed714bd87da601a5ba0a5b433be5571ed9dd3305 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 14:33:07 +0200 Subject: [PATCH 100/122] fix: making sure the state is reset between tests --- tests/test_mapdl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_mapdl.py b/tests/test_mapdl.py index 506c0bbabea..c3812a17f6b 100644 --- a/tests/test_mapdl.py +++ b/tests/test_mapdl.py @@ -1728,6 +1728,7 @@ def test_on_docker(mapdl): def test_deprecation_allow_ignore_warning(mapdl): with pytest.warns(DeprecationWarning, match="'allow_ignore' is being deprecated"): mapdl.allow_ignore = True + mapdl.ignore_errors = False def test_deprecation_allow_ignore_errors_mapping(mapdl): From ed2eb770d372125119658bb9cf583e015c012450 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 14:49:18 +0200 Subject: [PATCH 101/122] fix: warning when exiting. --- src/ansys/mapdl/core/mapdl_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ansys/mapdl/core/mapdl_core.py b/src/ansys/mapdl/core/mapdl_core.py index 6c47400cb89..f9664ad6a80 100644 --- a/src/ansys/mapdl/core/mapdl_core.py +++ b/src/ansys/mapdl/core/mapdl_core.py @@ -2311,7 +2311,7 @@ def __del__(self): self.exit() except Exception as e: try: # logger might be closed - if self._log is not None: + if hasattr(self, "_log") and self._log is not None: self._log.error("exit: %s", str(e)) except ValueError: pass From 521473f5aea08841e2eafd50d31200b5619c8bc0 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 14:50:34 +0200 Subject: [PATCH 102/122] fix: test --- src/ansys/mapdl/core/mapdl_extended.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_extended.py b/src/ansys/mapdl/core/mapdl_extended.py index 185ea885bd8..fa8ce48de9a 100644 --- a/src/ansys/mapdl/core/mapdl_extended.py +++ b/src/ansys/mapdl/core/mapdl_extended.py @@ -1440,8 +1440,8 @@ def inquire(self, strarray="", func="", arg1="", arg2="", **kwargs): "TITLE", ]: # the output is multiline, we just need the last line. response = response.splitlines()[-1] - else: - response = response.split("=")[1].strip() + + response = response.split("=")[1].strip() return response From 0aa6d0c3c44fda823eaf4aeb88128877ed115a54 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 18:00:08 +0200 Subject: [PATCH 103/122] feat: using a trimmed version for delete. --- src/ansys/mapdl/core/mapdl_core.py | 1 + src/ansys/mapdl/core/mapdl_grpc.py | 40 ++++++++++++++---------------- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_core.py b/src/ansys/mapdl/core/mapdl_core.py index c3b661ded90..2a59332b9a8 100644 --- a/src/ansys/mapdl/core/mapdl_core.py +++ b/src/ansys/mapdl/core/mapdl_core.py @@ -185,6 +185,7 @@ "process", "ram", "run_location", + "start_instance", "start_timeout", "timeout", ] diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index c2476345739..20739e6bbc8 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -139,6 +139,13 @@ } +def get_start_instance(*args, **kwargs) -> bool: + """Wraps get_start_instance to avoid circular imports""" + from ansys.mapdl.core.launcher import get_start_instance + + return get_start_instance(*args, **kwargs) + + def chunk_raw(raw, save_as): with io.BytesIO(raw) as f: while True: @@ -408,6 +415,9 @@ def __init__( self.remove_temp_dir_on_exit: bool = remove_temp_dir_on_exit self._jobname: str = start_parm.get("jobname", "file") self._path: Optional[str] = start_parm.get("run_location", None) + self._start_instance: Optional[str] = ( + start_parm.get("start_instance") or get_start_instance() + ) self._busy: bool = False # used to check if running a command on the server self._local: bool = start_parm.get("local", True) self._launched: bool = start_parm.get("launched", True) @@ -1132,11 +1142,8 @@ def exit(self, save=False, force=False, **kwargs): self.save() if not force: - # lazy import here to avoid circular import - from ansys.mapdl.core.launcher import get_start_instance - # ignore this method if PYMAPDL_START_INSTANCE=False - if not get_start_instance(): + if not self._start_instance: self._log.info("Ignoring exit due to PYMAPDL_START_INSTANCE=False") return @@ -1218,16 +1225,8 @@ def _kill_server(self): """ if self._exited: - self._log.debug("MAPDL server already exited") return - try: - self._log.debug("Killing MAPDL server") - except ValueError: - # It might throw ValueError: I/O operation on closed file. - # if the logger already exited. - pass - if ( self._version and self._version >= 24.2 ): # We can't use the non-cached version because of recursion error. @@ -3766,14 +3765,11 @@ def kill_job(self, jobid: int) -> None: def __del__(self): """In case the object is deleted""" - try: - self.exit(force=True) - except Exception as e: - pass + if not self._start_instance: + return - # Adding super call per: - # https://docs.python.org/3/reference/datamodel.html#object.__del__ - try: - super().__del__() - except Exception as e: - pass + self._exit_mapdl(path=self._path) + + # Exiting HPC job + if self._mapdl_on_hpc: + self.kill_job(self.jobid) From a46022785ec66ab6632c8d72d62039a3607023cc Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 18:21:47 +0200 Subject: [PATCH 104/122] refactor: test to pass --- src/ansys/mapdl/core/mapdl_extended.py | 1 + tests/test_mapdl.py | 34 +++++++++----------------- 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_extended.py b/src/ansys/mapdl/core/mapdl_extended.py index fa8ce48de9a..94f2808fd43 100644 --- a/src/ansys/mapdl/core/mapdl_extended.py +++ b/src/ansys/mapdl/core/mapdl_extended.py @@ -1422,6 +1422,7 @@ def inquire(self, strarray="", func="", arg1="", arg2="", **kwargs): raise ValueError( f"The arguments (strarray='{strarray}', func='{func}') are not valid." ) + response = "" n_try = 3 i_try = 0 diff --git a/tests/test_mapdl.py b/tests/test_mapdl.py index 43e7a03bae6..ac587f7451d 100644 --- a/tests/test_mapdl.py +++ b/tests/test_mapdl.py @@ -1932,32 +1932,22 @@ def test_igesin_whitespace(mapdl, cleared, tmpdir): assert int(n_ent[0]) > 0 -def test_save_on_exit(mapdl, cleared): - with mapdl.non_interactive: - mapdl.exit(save=True, fake_exit=True) - mapdl._exited = False # avoiding set exited on the class. - - lines = "\n".join(mapdl._stored_commands.copy()) - assert "SAVE" in lines.upper() - - mapdl._stored_commands = [] # resetting - mapdl.prep7() - - mapdl.prep7() +@pytest.mark.parametrize("save", [None, True, False]) +@patch("ansys.mapdl.core.Mapdl.save") +@patch("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._exit_mapdl") +def test_save_on_exit(mck_exit, mck_save, mapdl, cleared, save): + mck_exit.return_value = None -def test_save_on_exit_not(mapdl, cleared): - with mapdl.non_interactive: - mapdl.exit(save=False, fake_exit=True) - mapdl._exited = False # avoiding set exited on the class. - - lines = "\n".join(mapdl._stored_commands.copy()) - assert "SAVE" not in lines.upper() + mapdl.exit(save=save) + mapdl._exited = False # avoiding set exited on the class. - mapdl._stored_commands = [] # resetting - mapdl.prep7() + if save: + mck_save.assert_called_once() + else: + mck_save.assert_not_called() - mapdl.prep7() + assert mapdl.prep7() def test_input_strings_inside_non_interactive(mapdl, cleared): From 21236e3bc5283308fbc5edce39e405061c0019e2 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 19:00:47 +0200 Subject: [PATCH 105/122] refactor: removing all cleaning from __del__ except ending HPC job. --- src/ansys/mapdl/core/mapdl_grpc.py | 3 +-- tests/test_mapdl.py | 9 --------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index 20739e6bbc8..6758a332abf 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -3765,11 +3765,10 @@ def kill_job(self, jobid: int) -> None: def __del__(self): """In case the object is deleted""" + # We are just going to escape early if needed, and kill the HPC job. if not self._start_instance: return - self._exit_mapdl(path=self._path) - # Exiting HPC job if self._mapdl_on_hpc: self.kill_job(self.jobid) diff --git a/tests/test_mapdl.py b/tests/test_mapdl.py index ac587f7451d..87498500569 100644 --- a/tests/test_mapdl.py +++ b/tests/test_mapdl.py @@ -2478,15 +2478,6 @@ def test_ip_hostname_in_start_parm(ip): del mapdl -@patch("ansys.mapdl.core.Mapdl.__init__", lambda *args, **kwargs: None) -def test_delete_mapdl_object(mapdl): - mapdl_b = pymapdl.Mapdl() - - with patch("ansys.mapdl.core.Mapdl.exit") as mock_exit: - del mapdl_b - mock_exit.assert_called_once() - - def test_directory_setter(mapdl): # Testing edge cases prev_path = mapdl._path From 72985195e2f112576bf37265f357c44dc517be6b Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 20:49:50 +0200 Subject: [PATCH 106/122] refactor: changing `detect_hpc` with `running_on_hpc`. Simplifying `launch_mapdl_on_cluster`. --- doc/source/user_guide/hpc/pymapdl.rst | 2 +- src/ansys/mapdl/core/launcher.py | 278 +++++++------------------- tests/test_launcher.py | 46 ++++- 3 files changed, 115 insertions(+), 211 deletions(-) diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index d67b0311fc6..7ce40eff53c 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -31,7 +31,7 @@ then PyMAPDL launches an MAPDL instance which uses 32 cores spawning across those 8 nodes. This behaviour can turn off if passing the :envvar:`PYMAPDL_RUNNING_ON_HPC` environment variable -with ``'false'`` value or passing the `detect_hpc=False` argument +with ``'false'`` value or passing the `running_on_hpc=False` argument to :func:`launch_mapdl() `. diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index a0e1b60b188..fdfbed04400 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -105,7 +105,7 @@ def version_from_path(*args, **kwargs): "additional_switches", "cleanup_on_exit", "clear_on_connect", - "detect_hpc", + "running_on_hpc", "exec_file", "force_intel" "ip", "ip", @@ -157,6 +157,15 @@ def version_from_path(*args, **kwargs): Be aware of possible errors or unexpected behavior with this configuration. """ +LAUNCH_ON_HCP_ERROR_MESSAGE_IP = ( + "PyMAPDL cannot ensure a specific IP will be used when launching " + "MAPDL on a cluster. Hence the 'ip' argument is not compatible. " + "If you want to connect to an already started MAPDL instance, " + "just connect normally as you would with a remote instance. " + "For example:\n\n" + ">>> mapdl = launch_mapdl(start_instance=False, ip='123.45.67.89')\n\n" + "where '123.45.67.89' is the IP of the machine where MAPDL is running." +) GALLERY_INSTANCE = [None] @@ -1020,7 +1029,7 @@ def launch_mapdl( add_env_vars: Optional[Dict[str, str]] = None, replace_env_vars: Optional[Dict[str, str]] = None, version: Optional[Union[int, str]] = None, - detect_hpc: bool = True, + running_on_hpc: bool = True, launch_on_hpc: bool = False, **kwargs: Dict[str, Any], ) -> Union[MapdlGrpc, "MapdlConsole"]: @@ -1050,7 +1059,7 @@ def launch_mapdl( nproc : int, optional Number of processors. Defaults to ``2``. If running on an HPC cluster, this value is adjusted to the number of CPUs allocated to the job, - unless the argument ``detect_hpc`` is set to ``"false"``. + unless the argument ``running_on_hpc`` is set to ``"false"``. ram : float, optional Total size in megabytes of the workspace (memory) used for the initial @@ -1192,7 +1201,7 @@ def launch_mapdl( However the argument (if specified) has precedence over the environment variable. If this environment variable is empty, it is as it is not set. - detect_hpc: bool, optional + running_on_hpc: bool, optional Whether detect if PyMAPDL is running on an HPC cluster. Currently only SLURM clusters are supported. By default, it is set to true. This option can be bypassed if the :envvar:`PYMAPDL_RUNNING_ON_HPC` @@ -1469,6 +1478,10 @@ def launch_mapdl( LOG.debug(f"Using additional switches {args['additional_switches']}.") + if args["running_on_hpc"] or args["launch_on_hpc"]: + env_vars.setdefault("ANS_MULTIPLE_NODES", "1") + env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") + ######################################## # PyPIM connection # ---------------- @@ -1485,10 +1498,6 @@ def launch_mapdl( start_parm = generate_start_parameters(args) - if args["running_on_hpc"] or args["launch_on_hpc"]: - env_vars.setdefault("ANS_MULTIPLE_NODES", "1") - env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") - # Early exit for debugging. if args["_debug_no_launch"]: # Early exit, just for testing @@ -2003,6 +2012,7 @@ def pack_arguments(locals_): "just_launch", locals_["kwargs"].get("just_launch", None) ) args["on_pool"] = locals_.get("on_pool", locals_["kwargs"].get("on_pool", None)) + args["_debug_no_launch"] = locals_.get( "_debug_no_launch", locals_["kwargs"].get("_debug_no_launch", None) ) @@ -2013,13 +2023,13 @@ def pack_arguments(locals_): def is_running_on_slurm(args: Dict[str, Any]) -> bool: - args["running_on_hpc"] = os.environ.get("PYMAPDL_RUNNING_ON_HPC", "True") + running_on_hpc_env_var = os.environ.get("PYMAPDL_RUNNING_ON_HPC", "True") - is_flag_false = args["running_on_hpc"].lower() == "false" + is_flag_false = running_on_hpc_env_var.lower() == "false" # Let's require the following env vars to exist to go into slurm mode. args["running_on_hpc"] = bool( - args["detect_hpc"] + args["running_on_hpc"] and not is_flag_false # default is true and os.environ.get("SLURM_JOB_NAME") and os.environ.get("SLURM_JOB_ID") @@ -2445,15 +2455,7 @@ def pre_check_args(args: dict[str, Any]): raise ValueError("Cannot specify both ``exec_file`` and ``version``.") if args["launch_on_hpc"] and args["ip"]: - raise ValueError( - "PyMAPDL cannot ensure a specific IP will be used when launching " - "MAPDL on a cluster. Hence the 'ip' argument is not compatible. " - "If you want to connect to an already started MAPDL instance, " - "just connect normally as you would with a remote instance. " - "For example:\n\n" - ">>> mapdl = launch_mapdl(start_instance=False, ip='123.45.67.89')\n\n" - "where '123.45.67.89' is the IP of the machine where MAPDL is running." - ) + raise ValueError(LAUNCH_ON_HCP_ERROR_MESSAGE_IP) # Setting timeout if args["start_timeout"] is None: @@ -2529,206 +2531,72 @@ def remove_err_files(run_location, jobname): def launch_mapdl_on_cluster( - exec_file: Optional[str] = None, - run_location: Optional[str] = None, - jobname: str = "file", + nproc: int, *, - nproc: Optional[int] = None, - ram: Optional[Union[int, str]] = None, - mode: Optional[str] = None, - override: bool = False, - loglevel: str = "ERROR", - additional_switches: str = "", - start_timeout: int = 90, - port: Optional[int] = None, - cleanup_on_exit: bool = True, - start_instance: Optional[bool] = None, - ip: Optional[str] = None, - clear_on_connect: bool = True, - log_apdl: Optional[Union[bool, str]] = None, - remove_temp_dir_on_exit: bool = False, - license_server_check: bool = False, - license_type: Optional[bool] = None, - print_com: bool = False, - add_env_vars: Optional[Dict[str, str]] = None, - replace_env_vars: Optional[Dict[str, str]] = None, - version: Optional[Union[int, str]] = None, - detect_hpc: bool = True, - **kwargs: Dict[str, Any], -): - - ######################################## - # Processing arguments - # -------------------- - # - # packing arguments - args = pack_arguments(locals()) # packs args and kwargs - - check_kwargs(args) # check if passing wrong arguments - - pre_check_args(args) - - # SLURM settings - if is_running_on_slurm(args): - LOG.info("On Slurm mode.") - - # extracting parameters - get_slurm_options(args, kwargs) - - get_cpus(args) - - get_start_instance_arg(args) - - get_ip(args) - - args["port"] = get_port(args["port"], args["start_instance"]) - - get_exec_file(args) - - args["version"] = get_version(args["version"], exec_file) - - args["mode"] = check_mode(args["mode"], args["version"]) - - args["additional_switches"] = set_license_switch( - args["license_type"], args["additional_switches"] - ) - - env_vars = update_env_vars(args["add_env_vars"], args["replace_env_vars"]) - - if args["start_instance"]: - ######################################## - # Local adjustments - # ----------------- - # - # Only when starting MAPDL (aka Local) - - get_run_location(args) - - # verify lock file does not exist - check_lock_file(args["run_location"], args["jobname"], args["override"]) - - # remove err file so we can track its creation - # (as way to check if MAPDL started or not) - remove_err_files(args["run_location"], args["jobname"]) - - ######################################## - # Context specific launching adjustments - # -------------------------------------- - # - if args["start_instance"]: - # Assuming that if login node is ubuntu, the computation ones - # are also ubuntu. - env_vars = configure_ubuntu(env_vars) - - # Set compatible MPI - args["additional_switches"] = set_MPI_additional_switches( - args["additional_switches"], - args["exec_file"], - force_intel=args["force_intel"], - ) - - LOG.debug(f"Using additional switches {args['additional_switches']}.") + scheduler_options: Union[str, Dict[str, str]] = None, + **launch_mapdl_args: Dict[str, Any], +) -> MapdlGrpc: + """Launch MAPDL on a HPC cluster - start_parm = generate_start_parameters(args) + Launches an interactive MAPDL instance on an HPC cluster. - env_vars.setdefault("ANS_MULTIPLE_NODES", "1") - env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") + Parameters + ---------- + nproc : int + Number of CPUs to be used in the simulation. - # Early exit for debugging. - if args["_debug_no_launch"]: - # Early exit, just for testing - return args # type: ignore + scheduler_options : Dict[str, str], optional + A string or dictionary specifying the job configuration for the + scheduler. For example ``scheduler_options = "-N 10"``. - ######################################## - # Sphinx docs adjustments - # ----------------------- - # - # special handling when building the gallery outside of CI. This - # creates an instance of mapdl the first time. - if pymapdl.BUILDING_GALLERY: # pragma: no cover - return create_gallery_instances(args, start_parm) + Returns + ------- + MapdlGrpc + Mapdl instance running on the HPC cluster. - ######################################## - # Local launching - # --------------- - # - # Check the license server - if args["license_server_check"]: - LOG.debug("Checking license server.") - lic_check = LicenseChecker(timeout=args["start_timeout"]) - lic_check.start() + Examples + -------- + Run a job with 10 nodes and 2 tasks per node: - LOG.debug("Starting MAPDL") + >>> from ansys.mapdl.core import launch_mapdl + >>> scheduler_options = {"nodes": 10, "ntasks-per-node": 2} + >>> mapdl = launch_mapdl( + launch_on_hpc=True, + nproc=20, + scheduler_options=scheduler_options + ) - cmd = generate_mapdl_launch_command( - exec_file=args["exec_file"], - jobname=args["jobname"], - nproc=args["nproc"], - ram=args["ram"], - port=args["port"], - additional_switches=args["additional_switches"], - ) + Raises + ------ + ValueError + _description_ + ValueError + _description_ + ValueError + _description_ + """ - cmd = generate_sbatch_command(cmd, scheduler_options=args.get("scheduler_options")) + # Processing the arguments + launch_mapdl_args["launch_on_hpc"] = True - jobid = None - try: - process = launch_grpc( - cmd=cmd, - run_location=args["run_location"], - env_vars=env_vars, - launch_on_hpc=args.get("launch_on_hpc"), + if launch_mapdl_args.get("mode", "grpc") != "grpc": + raise ValueError( + "The only mode allowed for launch MAPDL on an HPC cluster is gRPC." ) - jobid = check_mapdl_launch_on_hpc(process, start_parm) - get_job_info(start_parm=start_parm, jobid=jobid) - - except Exception as exception: - LOG.error("An error occurred when launching MAPDL.") - - if start_parm.get("finish_job_on_exit", True) and jobid: - LOG.debug(f"Killing HPC job with id: {jobid}") - kill_job(jobid) - - raise exception - - if args["just_launch"]: - out = [args["ip"], args["port"]] - if hasattr(process, "pid"): - out += [process.pid] - return out + if launch_mapdl_args.get("ip"): + raise ValueError(LAUNCH_ON_HCP_ERROR_MESSAGE_IP) - try: - mapdl = MapdlGrpc( - cleanup_on_exit=args["cleanup_on_exit"], - loglevel=args["loglevel"], - set_no_abort=args["set_no_abort"], - remove_temp_dir_on_exit=args["remove_temp_dir_on_exit"], - log_apdl=args["log_apdl"], - process=process, - use_vtk=args["use_vtk"], - **start_parm, + if not launch_mapdl_args.get("start_instance", True): + raise ValueError( + "The 'start_instance' argument must be 'True' when launching on HPC." ) - # Setting launched property - mapdl._launched = True - mapdl._env_vars = env_vars - - except Exception as exception: - # Failed to launch for some reason. Check if failure was due - # to the license check - if args["license_server_check"]: - LOG.debug("Checking license server.") - lic_check.check() - - raise exception - - # Stopping license checker - if args["license_server_check"]: - LOG.debug("Stopping license server check.") - lic_check.is_connected = True - - return mapdl + return launch_mapdl( + nproc=nproc, + scheduler_options=scheduler_options, + **launch_mapdl_args, + ) def get_hostname_host_cluster(job_id: int, timeout: int = 30) -> str: diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 4ecf60fe157..b3e3868e33a 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -57,6 +57,7 @@ is_running_on_slurm, launch_grpc, launch_mapdl, + launch_mapdl_on_cluster, remove_err_files, set_license_switch, set_MPI_additional_switches, @@ -730,17 +731,17 @@ def test_slurm_ram(monkeypatch, ram, expected, context): @pytest.mark.parametrize("slurm_env_var", ["True", "false", ""]) @pytest.mark.parametrize("slurm_job_name", ["True", "false", ""]) @pytest.mark.parametrize("slurm_job_id", ["True", "false", ""]) -@pytest.mark.parametrize("detect_hpc", [True, False, None]) +@pytest.mark.parametrize("running_on_hpc", [True, False, None]) def test_is_running_on_slurm( - monkeypatch, slurm_env_var, slurm_job_name, slurm_job_id, detect_hpc + monkeypatch, slurm_env_var, slurm_job_name, slurm_job_id, running_on_hpc ): monkeypatch.setenv("PYMAPDL_RUNNING_ON_HPC", slurm_env_var) monkeypatch.setenv("SLURM_JOB_NAME", slurm_job_name) monkeypatch.setenv("SLURM_JOB_ID", slurm_job_id) - flag = is_running_on_slurm(args={"detect_hpc": detect_hpc}) + flag = is_running_on_slurm(args={"running_on_hpc": running_on_hpc}) - if detect_hpc is not True: + if running_on_hpc is not True: assert not flag else: @@ -756,7 +757,7 @@ def test_is_running_on_slurm( if ON_LOCAL: assert ( launch_mapdl( - detect_hpc=detect_hpc, + running_on_hpc=running_on_hpc, _debug_no_launch=True, )["running_on_hpc"] == flag @@ -1498,3 +1499,38 @@ def raise_exception(*args, **kwargs): assert "1001" in args mock_kill_job.assert_called_once() + + +@pytest.mark.parametrize( + "args,context", + [ + [ + {"nproc": 10, "mode": "console"}, + pytest.raises( + ValueError, + match="The only mode allowed for launch MAPDL on an HPC cluster is gRPC.", + ), + ], + [ + {"nproc": 10, "ip": "123.11.22.33"}, + pytest.raises( + ValueError, + match="PyMAPDL cannot ensure a specific IP will be used when launching MAPDL on a cluster", + ), + ], + [ + {"nproc": 10, "start_instance": False}, + pytest.raises( + ValueError, + match="The 'start_instance' argument must be 'True' when launching on HPC.", + ), + ], + [{"nproc": 10}, NullContext()], + ], +) +@patch("ansys.mapdl.core.launcher.launch_mapdl", lambda *args, **kwargs: kwargs) +def test_launch_mapdl_on_cluster_exceptions(args, context): + with context: + ret = launch_mapdl_on_cluster(**args) + assert ret["launch_on_hpc"] + assert ret["nproc"] == 10 From 737757ff79752252ebb20b70b9318fdef014ad60 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Fri, 25 Oct 2024 20:53:42 +0200 Subject: [PATCH 107/122] docs: adding-sbatch-support (#3513) * docs: expanding a bit the `PyMAPDL on HPC clusters` section * docs: adding info about launching MAPDL in HPC. * chore: adding changelog file 3513.documentation.md [dependabot-skip] * fix: vale issues * docs: changing the name to `scheduler_options`. Add warning about adding nproc. * fix: vale issues * docs: apply suggestions from Kathy code review Co-authored-by: Kathy Pippert <84872299+PipKat@users.noreply.github.com> * docs: adding CPUs. --------- Co-authored-by: pyansys-ci-bot <92810346+pyansys-ci-bot@users.noreply.github.com> Co-authored-by: Kathy Pippert <84872299+PipKat@users.noreply.github.com> --- doc/changelog.d/3513.documentation.md | 1 + .../hpc/launch_mapdl_entrypoint.rst | 234 ++++++++++++++++++ doc/source/user_guide/hpc/pymapdl.rst | 59 +++-- doc/source/user_guide/mapdl.rst | 1 + .../config/vocabularies/ANSYS/accept.txt | 1 + 5 files changed, 280 insertions(+), 16 deletions(-) create mode 100644 doc/changelog.d/3513.documentation.md create mode 100644 doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst diff --git a/doc/changelog.d/3513.documentation.md b/doc/changelog.d/3513.documentation.md new file mode 100644 index 00000000000..b2f60a9a079 --- /dev/null +++ b/doc/changelog.d/3513.documentation.md @@ -0,0 +1 @@ +docs: adding-sbatch-support \ No newline at end of file diff --git a/doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst b/doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst new file mode 100644 index 00000000000..09efdc20b72 --- /dev/null +++ b/doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst @@ -0,0 +1,234 @@ + +.. _ref_pymapdl_interactive_in_cluster_hpc: + +.. _ref_pymapdl_interactive_in_cluster_hpc_from_login: + + +Interactive MAPDL instance launched from the login node +======================================================= + +Starting the instance +--------------------- + +If you are already logged in a login node, you can launch an MAPDL instance as a SLURM job and +connect to it. +To accomplish this, run these commands in your login node. + +.. code:: pycon + + >>> from ansys.mapdl.core import launch_mapdl + >>> mapdl = launch_mapdl(launch_on_hpc=True) + +PyMAPDL submits a job to the scheduler using the appropriate commands. +In case of SLURM, it uses the ``sbatch`` command with the ``--wrap`` argument +to pass the MAPDL command line to start. +Other scheduler arguments can be specified using the ``scheduler_options`` +argument as a Python :class:`dict`: + +.. code:: pycon + + >>> from ansys.mapdl.core import launch_mapdl + >>> scheduler_options = {"nodes": 10, "ntasks-per-node": 2} + >>> mapdl = launch_mapdl(launch_on_hpc=True, nproc=20, scheduler_options=scheduler_options) + + +.. note:: + PyMAPDL cannot infer the number of CPUs that you are requesting from the scheduler. + Hence, you must specify this value using the ``nproc`` argument. + +The double minus (``--``) common in the long version of some scheduler commands +are added automatically if PyMAPDL detects it is missing and the specified +command is long more than 1 character in length). +For instance, the ``ntasks-per-node`` argument is submitted as ``--ntasks-per-node``. + +Or, a single Python string (:class:`str`) is submitted: + +.. code:: pycon + + >>> from ansys.mapdl.core import launch_mapdl + >>> scheduler_options = "-N 10" + >>> mapdl = launch_mapdl(launch_on_hpc=True, scheduler_options=scheduler_options) + +.. warning:: + Because PyMAPDL is already using the ``--wrap`` argument, this argument + cannot be used again. + +The values of each scheduler argument are wrapped in single quotes (`'`). +This might cause parsing issues that can cause the job to fail after successful +submission. + +PyMAPDL passes all the environment variables of the +user to the new job and to the MAPDL instance. +This is usually convenient because many environmental variables are +needed to run the job or MAPDL command. +For instance, the license server is normally stored in the :envvar:`ANSYSLMD_LICENSE_FILE` environment variable. +If you prefer not to pass these environment variables to the job, use the SLURM argument +``--export`` to specify the desired environment variables. +For more information, see `SLURM documentation `_. + + +Working with the instance +------------------------- + +Once the :class:`Mapdl ` object has been created, +it does not differ from a normal :class:`Mapdl ` +instance. +You can retrieve the IP of the MAPDL instance as well as its hostname: + +.. code:: pycon + + >>> mapdl.ip + '123.45.67.89' + >>> mapdl.hostname + 'node0' + +You can also retrieve the job ID: + +.. code:: pycon + + >>> mapdl.jobid + 10001 + +If you want to check whether the instance has been launched using a scheduler, +you can use the :attr:`mapdl_on_hpc ` +attribute: + +.. code:: pycon + + >>> mapdl.mapdl_on_hpc + True + + +Sharing files +^^^^^^^^^^^^^ + +Most of the HPC clusters share the login node filesystem with the compute nodes, +which means that you do not need to do extra work to upload or download files to the MAPDL +instance. You only need to copy them to the location where MAPDL is running. +You can obtain this location with the +:attr:`directory ` attribute. + +If no location is specified in the :func:`launch_mapdl() ` +function, then a temporal location is selected. +It is a good idea to set the ``run_location`` argument to a directory that is accessible +from all the compute nodes. +Normally anything under ``/home/user`` is available to all compute nodes. +If you are unsure where you should launch MAPDL, contact your cluster administrator. + +Additionally, you can use methods like the :meth:`upload ` +and :meth:`download ` to +upload and download files to and from the MAPDL instance respectively. +You do not need ``ssh`` or another similar connection. +However, for large files, you might want to consider alternatives. + + +Exiting MAPDL +------------- + +Exiting MAPDL, either intentionally or unintentionally, stops the job. +This behavior occurs because MAPDL is the main process at the job. Thus, when finished, +the scheduler considers the job done. + +To exit MAPDL, you can use the :meth:`exit() ` method. +This method exits MAPDL and sends a signal to the scheduler to cancel the job. + +.. code-block:: python + + mapdl.exit() + +When the Python process you are running PyMAPDL on finishes without errors, and you have not +issued the :meth:`exit() ` method, the garbage collector +kills the MAPDL instance and its job. This is intended to save resources. + +If you prefer that the job is not killed, set the following attribute in the +:class:`Mapdl ` class: + +.. code-block:: python + + mapdl.finish_job_on_exit = False + + +In this case, you should set a timeout in your job to avoid having the job +running longer than needed. + + +Handling crashes on an HPC +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If MAPDL crashes while running on an HPC, the job finishes right away. +In this case, MAPDL disconnects from MAPDL. +PyMAPDL retries to reconnect to the MAPDL instance up to 5 times, waiting +for up to 5 seconds. +If unsuccessful, you might get an error like this: + +.. code-block:: text + + MAPDL server connection terminated unexpectedly while running: + /INQUIRE,,DIRECTORY,, + called by: + _send_command + + Suggestions: + MAPDL *might* have died because it executed a not-allowed command or ran out of memory. + Check the MAPDL command output for more details. + Open an issue on GitHub if you need assistance: https://github.com/ansys/pymapdl/issues + Error: + failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:50052: Failed to connect to remote host: connect: Connection refused (111) + Full error: + <_InactiveRpcError of RPC that terminated with: + status = StatusCode.UNAVAILABLE + details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:50052: Failed to connect to remote host: connect: Connection refused (111)" + debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-10-24T08:25:04.054559811+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:50052: Failed to connect to remote host: connect: Connection refused (111)"}" + > + +The data of that job is available at :attr:`directory `. +You should set the run location using the ``run_location`` argument. + +While handling this exception, PyMAPDL also cancels the job to avoid resources leaking. +Therefore, the only option is to start a new instance by launching a new job using +the :func:`launch_mapdl() ` function. + +User case on a SLURM cluster +---------------------------- + +Assume a user wants to start a remote MAPDL instance in an HPC cluster +to interact with it. +The user would like to request 10 nodes, and 1 task per node (to avoid clashes +between MAPDL instances). +The user would like to also request 64 GB of RAM. +Because of administration logistics, the user must use the machines in +the ``supercluster01`` partition. +To make PyMAPDL launch an instance like that on SLURM, run the following code: + +.. code-block:: python + + from ansys.mapdl.core import launch_mapdl + from ansys.mapdl.core.examples import vmfiles + + scheduler_options = { + "nodes": 10, + "ntasks-per-node": 1, + "partition": "supercluster01", + "memory": 64, + } + mapdl = launch_mapdl(launch_on_hpc=True, nproc=10, scheduler_options=scheduler_options) + + num_cpu = mapdl.get_value("ACTIVE", 0, "NUMCPU") # It should be equal to 10 + + mapdl.clear() # Not strictly needed. + mapdl.prep7() + + # Run an MAPDL script + mapdl.input(vmfiles["vm1"]) + + # Let's solve again to get the solve printout + mapdl.solution() + output = mapdl.solve() + print(output) + + mapdl.exit() # Kill the MAPDL instance + + +PyMAPDL automatically sets MAPDL to read the job configuration (including machines, +number of CPUs, and memory), which allows MAPDL to use all the resources allocated +to that job. diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index 7ce40eff53c..9382cbf87be 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -19,36 +19,34 @@ on whether or not you run them both on the HPC compute nodes. Additionally, you might be able interact with them (``interactive`` mode) or not (``batch`` mode). -For information on supported configurations, see :ref:`ref_pymapdl_batch_in_cluster_hpc`. +PyMAPDL takes advantage of HPC clusters to launch MAPDL instances +with increased resources. +PyMAPDL automatically sets these MAPDL instances to read the +scheduler job configuration (which includes machines, number +of CPUs, and memory), which allows MAPDL to use all the resources +allocated to that job. +For more information, see :ref:`ref_tight_integration_hpc`. +The following configurations are supported: -Since v0.68.5, PyMAPDL can take advantage of the tight integration -between the scheduler and MAPDL to read the job configuration and -launch an MAPDL instance that can use all the resources allocated -to that job. -For instance, if a SLURM job has allocated 8 nodes with 4 cores each, -then PyMAPDL launches an MAPDL instance which uses 32 cores -spawning across those 8 nodes. -This behaviour can turn off if passing the -:envvar:`PYMAPDL_RUNNING_ON_HPC` environment variable -with ``'false'`` value or passing the `running_on_hpc=False` argument -to :func:`launch_mapdl() `. +* :ref:`ref_pymapdl_batch_in_cluster_hpc`. +* :ref:`ref_pymapdl_interactive_in_cluster_hpc_from_login` .. _ref_pymapdl_batch_in_cluster_hpc: -Submit a PyMAPDL batch job to the cluster from the entrypoint node -================================================================== +Batch job submission from the login node +======================================== Many HPC clusters allow their users to log into a machine using ``ssh``, ``vnc``, ``rdp``, or similar technologies and then submit a job to the cluster from there. -This entrypoint machine, sometimes known as the *head node* or *entrypoint node*, +This login machine, sometimes known as the *head node* or *entrypoint node*, might be a virtual machine (VDI/VM). In such cases, once the Python virtual environment with PyMAPDL is already set and is accessible to all the compute nodes, launching a -PyMAPDL job from the entrypoint node is very easy to do using the ``sbatch`` command. +PyMAPDL job from the login node is very easy to do using the ``sbatch`` command. When the ``sbatch`` command is used, PyMAPDL runs and launches an MAPDL instance in the compute nodes. No changes are needed on a PyMAPDL script to run it on an SLURM cluster. @@ -99,6 +97,8 @@ job by setting the :envvar:`PYMAPDL_NPROC` environment variable to the desired v (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch main.py +For more applicable environment variables, see :ref:`ref_environment_variables`. + You can also add ``sbatch`` options to the command: .. code-block:: console @@ -182,3 +182,30 @@ This bash script performs tasks such as creating environment variables, moving files to different directories, and printing to ensure your configuration is correct. + +.. include:: launch_mapdl_entrypoint.rst + + + +.. _ref_tight_integration_hpc: + +Tight integration between MAPDL and the HPC scheduler +===================================================== + +Since v0.68.5, PyMAPDL can take advantage of the tight integration +between the scheduler and MAPDL to read the job configuration and +launch an MAPDL instance that can use all the resources allocated +to that job. +For instance, if a SLURM job has allocated 8 nodes with 4 cores each, +then PyMAPDL launches an MAPDL instance that uses 32 cores +spawning across those 8 nodes. + +This behavior can turn off by passing the +:envvar:`PYMAPDL_RUNNING_ON_HPC` environment variable +with a ``'false'`` value or passing the ``detect_hpc=False`` argument +to the :func:`launch_mapdl() ` function. + +Alternatively, you can override these settings by either specifying +custom settings in the :func:`launch_mapdl() ` +function's arguments or using specific environment variables. +For more information, see :ref:`ref_environment_variables`. diff --git a/doc/source/user_guide/mapdl.rst b/doc/source/user_guide/mapdl.rst index 2baa8f3aece..fceee2a4e3b 100644 --- a/doc/source/user_guide/mapdl.rst +++ b/doc/source/user_guide/mapdl.rst @@ -1092,6 +1092,7 @@ are unsupported. | * ``LSWRITE`` | |:white_check_mark:| Available (Internally running in :attr:`Mapdl.non_interactive `) | |:white_check_mark:| Available | |:exclamation:| Only in :attr:`Mapdl.non_interactive ` | | +---------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------+ +.. _ref_environment_variables: Environment variables ===================== diff --git a/doc/styles/config/vocabularies/ANSYS/accept.txt b/doc/styles/config/vocabularies/ANSYS/accept.txt index 0d27d173313..583fb27fac1 100644 --- a/doc/styles/config/vocabularies/ANSYS/accept.txt +++ b/doc/styles/config/vocabularies/ANSYS/accept.txt @@ -53,6 +53,7 @@ CentOS7 Chao ci container_layout +CPUs datas delet Dependabot From 78fe1bd10329c25322932ff26c96bb84599b73fd Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 28 Oct 2024 10:16:47 +0100 Subject: [PATCH 108/122] feat: avoid exceptions on `__del__` --- src/ansys/mapdl/core/mapdl_grpc.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index 6758a332abf..029e2659ef2 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -2949,10 +2949,10 @@ def __str__(self): en = stats.find("*** PrePro") product = "\n".join(stats[st:en].splitlines()[1:]).strip() - info = f"Mapdl\n" - info += f"-----\n" + info = "Mapdl\n" + info += "-----\n" info += f"PyMAPDL Version: {__version__}\n" - info += f"Interface: grpc\n" + info += "Interface: grpc\n" info += f"Product: {product}\n" info += f"MAPDL Version: {self.version}\n" info += f"Running on: {self.hostname}\n" @@ -3766,9 +3766,15 @@ def kill_job(self, jobid: int) -> None: def __del__(self): """In case the object is deleted""" # We are just going to escape early if needed, and kill the HPC job. - if not self._start_instance: - return + # The garbage collector remove attributes before we can evaluate this. + try: + # Exiting HPC job + if self._mapdl_on_hpc: + self.kill_job(self.jobid) + + if not self._start_instance: + return - # Exiting HPC job - if self._mapdl_on_hpc: - self.kill_job(self.jobid) + except Exception as e: + # This is on clean up. + pass From 9bcdc9ddda6b7b8fe900da45c63f7755dc0887fe Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 28 Oct 2024 10:17:43 +0100 Subject: [PATCH 109/122] tests: adding tests for get_port and get_ip --- tests/test_launcher.py | 77 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 74 insertions(+), 3 deletions(-) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index b3e3868e33a..3c4eb9db2de 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -50,6 +50,8 @@ generate_start_parameters, get_cpus, get_exec_file, + get_ip, + get_port, get_run_location, get_slurm_options, get_start_instance, @@ -1439,7 +1441,7 @@ def test_launch_on_hpc_exception_launch_mapdl(monkeypatch): with pytest.raises( Exception, match="PyMAPDL failed to submit the sbatch job:" ): - mapdl = launch_mapdl( + launch_mapdl( launch_on_hpc=True, exec_file=exec_file, ) @@ -1482,14 +1484,25 @@ def raise_exception(*args, **kwargs): with pytest.raises( Exception, match="Fake exception when launching MAPDL" ): - mapdl = launch_mapdl( + launch_mapdl( launch_on_hpc=True, exec_file=exec_file, + replace_env_vars={"myenvvar": "myenvvarvalue"}, ) mock_launch_grpc.assert_called_once() cmd = mock_launch_grpc.call_args_list[0][1]["cmd"] - env_vars = mock_launch_grpc.call_args_list[0][1]["env_vars"] + + assert "sbatch" in cmd + assert "--wrap" in cmd + assert exec_file in cmd[-1] + assert "-grpc" in cmd[-1] + + assert mock_launch_grpc.call_args_list[0][1]["env_vars"] == { + "ANS_MULTIPLE_NODES": "1", + "HYDRA_BOOTSTRAP": "slurm", + "myenvvar": "myenvvarvalue", + } mock_scontrol.assert_called_once() args = mock_scontrol.call_args_list[0][0][0] @@ -1534,3 +1547,61 @@ def test_launch_mapdl_on_cluster_exceptions(args, context): ret = launch_mapdl_on_cluster(**args) assert ret["launch_on_hpc"] assert ret["nproc"] == 10 + + +@patch( + "socket.gethostbyname", + lambda *args, **kwargs: "123.45.67.89" if args[0] != LOCALHOST else LOCALHOST, +) +@pytest.mark.parametrize( + "ip,ip_env", + [[None, None], [None, "123.45.67.89"], ["123.45.67.89", "111.22.33.44"]], +) +def test_get_ip(monkeypatch, ip, ip_env): + monkeypatch.delenv("PYMAPDL_IP", False) + if ip_env: + monkeypatch.setenv("PYMAPDL_IP", ip_env) + args = {"ip": ip} + + get_ip(args) + + if ip: + assert args["ip"] == ip + else: + if ip_env: + assert args["ip"] == ip_env + else: + assert args["ip"] == LOCALHOST + + +@pytest.mark.parametrize( + "port,port_envvar,start_instance,port_busy,result", + ( + [None, None, True, False, 50052], # Standard case + [None, None, True, True, 50055], # Busy port case, not sure why it is not 50054 + [None, 50053, True, True, 50053], + [None, 50053, False, False, 50053], + [50054, 50053, True, False, 50054], + [50054, 50053, True, False, 50054], + [50054, None, False, False, 50054], + ), +) +@patch("ansys.mapdl.core._LOCAL_PORTS", []) +def test_get_port(monkeypatch, port, port_envvar, start_instance, port_busy, result): + # Settings + monkeypatch.delenv("PYMAPDL_PORT", False) + if port_envvar: + monkeypatch.setenv("PYMAPDL_PORT", port_envvar) + + # Testing + if port_busy: + # Success after the second retry, it should go up to 2. + # But for some reason, it goes up 3. + side_effect = [True, True, False] + else: + side_effect = [False] + + context = patch("ansys.mapdl.core.launcher.port_in_use", side_effect=side_effect) + + with context: + assert get_port(port, start_instance) == result From c2c666e03199f770f9d4625925c07559318ed3e8 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 28 Oct 2024 10:18:34 +0100 Subject: [PATCH 110/122] feat: using a submitter function for grouping. --- src/ansys/mapdl/core/launcher.py | 55 ++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index fdfbed04400..41bca4ec03a 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -198,10 +198,8 @@ def _is_ubuntu() -> bool: # args value is controlled by the library. # awk is not a partial path - Bandit false positive. # Excluding bandit check. - proc = subprocess.Popen( - ["awk", "-F=", "/^NAME/{print $2}", "/etc/os-release"], - stdout=subprocess.PIPE, - ) # nosec B603 B607 + proc = submitter(["awk", "-F=", "/^NAME/{print $2}", "/etc/os-release"]) + if "ubuntu" in proc.stdout.read().decode().lower(): return True @@ -502,20 +500,15 @@ def launch_grpc( ) LOG.debug("MAPDL starting in background.") - - # cmd is controlled by the library with generate_mapdl_launch_command. - # Excluding bandit check. - process = subprocess.Popen( + return submitter( cmd_, shell=shell, # sbatch does not work without shell. cwd=run_location, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env=env_vars, - ) # nosec B603 - - return process + env_vars=env_vars, + ) def check_mapdl_launch( @@ -2792,13 +2785,47 @@ def get_job_info( def kill_job(jobid: int): """Kill SLURM job""" - subprocess.Popen(["scancel", str(jobid)]) + submitter(["scancel", str(jobid)]) def send_scontrol(args: str): cmd = f"scontrol {args}".split(" ") + return submitter(cmd) + + +def submitter( + cmd: Union[str, List[str]], + *, + executable: str = None, + shell: bool = False, + cwd: str = None, + stdin: subprocess.PIPE = None, + stdout: subprocess.PIPE = None, + stderr: subprocess.PIPE = None, + env_vars: dict[str, str] = None, +): + + if executable: + if isinstance(cmd, list): + cmd = [executable] + cmd + else: + cmd = [executable, cmd] + + if not stdin: + stdin = subprocess.DEVNULL + if not stdout: + stdout = subprocess.PIPE + if not stderr: + stderr = subprocess.PIPE + + # cmd is controlled by the library with generate_mapdl_launch_command. + # Excluding bandit check. return subprocess.Popen( cmd, + shell=shell, # sbatch does not work without shell. + cwd=cwd, + stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - ) + env=env_vars, + ) # nosec B603 B607 From 89e510d263559bd1e9877b0fd4d50d1a91ce2d54 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 28 Oct 2024 10:26:45 +0100 Subject: [PATCH 111/122] tests: attempting clean exit --- src/ansys/mapdl/core/mapdl_grpc.py | 8 +++++++- tests/conftest.py | 7 +++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index 029e2659ef2..3e80ea3e868 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -3769,7 +3769,13 @@ def __del__(self): # The garbage collector remove attributes before we can evaluate this. try: # Exiting HPC job - if self._mapdl_on_hpc: + if ( + hasattr(self, "_mapdl_on_hpc") + and self._mapdl_on_hpc + and hasattr(self, "finish_job_on_exit") + and self.finish_job_on_exit + ): + self.kill_job(self.jobid) if not self._start_instance: diff --git a/tests/conftest.py b/tests/conftest.py index a8805433f7c..2e0ea904340 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -438,6 +438,7 @@ def run_before_and_after_tests( yield # this is where the testing happens + # Check resetting state assert prev == mapdl.is_local assert not mapdl.exited, "MAPDL is exited after the test. It should have not!" assert not mapdl._mapdl_on_hpc, "Mapdl class is on HPC mode. It should not!" @@ -626,8 +627,7 @@ def mapdl(request, tmpdir_factory): if START_INSTANCE: mapdl._local = True mapdl._exited = False - # mapdl.finish_job_on_exit = True - assert mapdl.finish_job_on_exit + assert not mapdl.finish_job_on_exit mapdl.exit(save=True, force=True) assert mapdl._exited assert "MAPDL exited" in str(mapdl) @@ -643,6 +643,9 @@ def mapdl(request, tmpdir_factory): with pytest.raises(MapdlExitedError): mapdl._send_command_stream("/PREP7") + # Delete Mapdl object + del mapdl + SpacedPaths = namedtuple( "SpacedPaths", From b047e12315af60e616bd40a95fd623fcb205750f Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 28 Oct 2024 11:25:09 +0100 Subject: [PATCH 112/122] feat: externalising to function getting the batchhost --- src/ansys/mapdl/core/launcher.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 41bca4ec03a..6fd7133fa41 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -2616,11 +2616,15 @@ def get_hostname_host_cluster(job_id: int, timeout: int = 30) -> str: # Exit by raising exception if time.time() > time_start + timeout: - state = stdout.split("JobState=")[1].split(" ")[0] + state = get_state_from_scontrol(stdout) # Trying to get the hostname from the last valid message try: host = get_hostname_from_scontrol(stdout) + if not host: + # If string is empty, go to the exception clause. + raise IndexError() + hostname_msg = f"The BatchHost for this job is '{host}'" except (IndexError, AttributeError): hostname_msg = "PyMAPDL couldn't get the BatchHost hostname" @@ -2711,7 +2715,11 @@ def add_minus(arg: str): def get_hostname_from_scontrol(stdout: str) -> str: - return stdout.split("BatchHost=")[1].splitlines()[0] + return stdout.split("BatchHost=")[1].splitlines()[0].strip() + + +def get_state_from_scontrol(stdout: str) -> str: + return stdout.split("JobState=")[1].splitlines()[0].strip() def check_mapdl_launch_on_hpc( From db0c394537d0f5bdddec49f2fcace1c7823c5484 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 28 Oct 2024 11:26:31 +0100 Subject: [PATCH 113/122] tests: increasing coverage --- tests/test_launcher.py | 103 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 3c4eb9db2de..a4f52be0588 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -37,6 +37,7 @@ MapdlDidNotStart, NotEnoughResources, PortAlreadyInUseByAnMAPDLInstance, + VersionError, ) from ansys.mapdl.core.launcher import ( _HAS_ATP, @@ -50,7 +51,9 @@ generate_start_parameters, get_cpus, get_exec_file, + get_hostname_host_cluster, get_ip, + get_jobid, get_port, get_run_location, get_slurm_options, @@ -1118,6 +1121,14 @@ def test_get_run_location_no_access(tmpdir): {"exec_file": True, "version": True}, "Cannot specify both ``exec_file`` and ``version``.", ], + [ + {"scheduler_options": True}, + "PyMAPDL does not read the number of cores from the 'scheduler_options'.", + ], + [ + {"launch_on_hpc": True, "ip": "111.22.33.44"}, + "PyMAPDL cannot ensure a specific IP will be used when launching", + ], ], ) def test_pre_check_args(args, match): @@ -1605,3 +1616,95 @@ def test_get_port(monkeypatch, port, port_envvar, start_instance, port_busy, res with context: assert get_port(port, start_instance) == result + + +@pytest.mark.parametrize("stdout", ["Submitted batch job 1001", "Something bad"]) +def test_get_jobid(stdout): + if "1001" in stdout: + context = NullContext() + else: + context = pytest.raises( + ValueError, match="PyMAPDL could not retrieve the job id" + ) + + with context: + jobid = get_jobid(stdout) + assert jobid == 1001 + + +@patch("socket.gethostbyname", lambda *args, **kwargs: "111.22.33.44") +@pytest.mark.parametrize( + "jobid,timeout,time_to_stop,state,hostname, hostname_msg, raises", + [ + [1001, 30, 2, "RUNNING", "myhostname", "BatchHost=myhostname", None], + [ + 1002, + 2, + 3, + "CONFIGURING", + "otherhostname", + "BatchHost=otherhostname", + MapdlDidNotStart, + ], + [1002, 2, 3, "CONFIGURING", "", "BatchHost=", MapdlDidNotStart], + [1002, 2, 3, "CONFIGURING", None, "Batch", MapdlDidNotStart], + ], +) +def test_get_hostname_host_cluster( + jobid, timeout, time_to_stop, state, hostname, hostname_msg, raises +): + def fake_proc(*args, **kwargs): + assert f"show jobid -dd {jobid}" == args[0] + return get_fake_process( + f"a long scontrol...\nJobState={state}\n...\n{hostname_msg}\n...\nin message", + "", + time_to_stop, + ) + + with patch("ansys.mapdl.core.launcher.send_scontrol", fake_proc) as mck_sc: + + if raises: + context = pytest.raises(raises) + else: + context = NullContext() + + with context as excinfo: + batchhost, batchhost_ip = get_hostname_host_cluster( + job_id=jobid, timeout=timeout + ) + + if raises: + assert f"The HPC job (id: {jobid})" in excinfo.value.args[0] + assert f"(timeout={timeout})." in excinfo.value.args[0] + assert f"The job state is '{state}'. " in excinfo.value.args[0] + + if hostname: + assert f"The BatchHost for this job is '{hostname}'" + else: + assert ( + "PyMAPDL couldn't get the BatchHost hostname" + in excinfo.value.args[0] + ) + + else: + assert batchhost == "myhostname" + assert batchhost_ip == "111.22.33.44" + + +@patch("ansys.tools.path.path._mapdl_version_from_path", lambda *args, **kwargs: 201) +@patch("ansys.mapdl.core._HAS_ATP", True) +def test_get_version_version_error(monkeypatch): + monkeypatch.delenv("PYMAPDL_MAPDL_VERSION", False) + + with pytest.raises( + VersionError, match="The MAPDL gRPC interface requires MAPDL 20.2 or later" + ): + get_version(None, "/path/to/executable") + + +@pytest.mark.parametrize("version", [211, 221, 232]) +def test_get_version_env_var(monkeypatch, version): + monkeypatch.setenv("PYMAPDL_MAPDL_VERSION", version) + + assert version == get_version(None) + assert version != get_version(241) From aac941d74ca6db621e0e15de20c4193500c9d855 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 28 Oct 2024 11:58:48 +0100 Subject: [PATCH 114/122] tests: fix --- tests/test_launcher.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index a4f52be0588..da4f4283a50 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -1509,11 +1509,11 @@ def raise_exception(*args, **kwargs): assert exec_file in cmd[-1] assert "-grpc" in cmd[-1] - assert mock_launch_grpc.call_args_list[0][1]["env_vars"] == { - "ANS_MULTIPLE_NODES": "1", - "HYDRA_BOOTSTRAP": "slurm", - "myenvvar": "myenvvarvalue", - } + envvars = mock_launch_grpc.call_args_list[0][1]["env_vars"] + + assert envvars["ANS_MULTIPLE_NODES"] == "1" + assert envvars["HYDRA_BOOTSTRAP"] == "slurm" + assert envvars["myenvvar"] == "myenvvarvalue" mock_scontrol.assert_called_once() args = mock_scontrol.call_args_list[0][0][0] @@ -1691,6 +1691,7 @@ def fake_proc(*args, **kwargs): assert batchhost_ip == "111.22.33.44" +@requires("ansys-tools-path") @patch("ansys.tools.path.path._mapdl_version_from_path", lambda *args, **kwargs: 201) @patch("ansys.mapdl.core._HAS_ATP", True) def test_get_version_version_error(monkeypatch): From c5f54a51524b6c11dd53e2ccf892e8fa6c49852b Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 28 Oct 2024 14:07:07 +0100 Subject: [PATCH 115/122] fix: doc builds --- .github/workflows/ci.yml | 1 - doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst | 5 ----- doc/source/user_guide/hpc/pymapdl.rst | 6 +++++- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 266a1c75d61..f7833d2f1a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -775,7 +775,6 @@ jobs: ON_LOCAL: true ON_UBUNTU: true TESTING_MINIMAL: true - steps: - name: "Install Git and checkout project" diff --git a/doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst b/doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst index 09efdc20b72..f6f05a3de03 100644 --- a/doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst +++ b/doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst @@ -1,9 +1,4 @@ -.. _ref_pymapdl_interactive_in_cluster_hpc: - -.. _ref_pymapdl_interactive_in_cluster_hpc_from_login: - - Interactive MAPDL instance launched from the login node ======================================================= diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index 9382cbf87be..76d27f75eed 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -183,8 +183,12 @@ moving files to different directories, and printing to ensure your configuration is correct. -.. include:: launch_mapdl_entrypoint.rst +.. _ref_pymapdl_interactive_in_cluster_hpc: + +.. _ref_pymapdl_interactive_in_cluster_hpc_from_login: + +.. include:: launch_mapdl_entrypoint.rst .. _ref_tight_integration_hpc: From 30632217853bdb57422331ef0ffe6850ba6aaa97 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 28 Oct 2024 17:57:17 +0100 Subject: [PATCH 116/122] tests: increasing coverage --- src/ansys/mapdl/core/launcher.py | 6 +- tests/test_launcher.py | 114 +++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 2 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 6fd7133fa41..10016c3beb6 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -1658,7 +1658,9 @@ def check_mode(mode: ALLOWABLE_MODES, version: Optional[int] = None): "gRPC mode requires MAPDL 2020R2 or newer " "on Windows." ) elif os.name == "posix": - raise VersionError("gRPC mode requires MAPDL 2021R1 or newer.") + raise VersionError( + "gRPC mode requires MAPDL 2021R1 or newer on Linux." + ) elif mode == "console": if os.name == "nt": @@ -1671,7 +1673,7 @@ def check_mode(mode: ALLOWABLE_MODES, version: Optional[int] = None): else: raise ValueError( f'Invalid MAPDL server mode "{mode}".\n\n' - f"Use one of the following modes:\n{ALLOWABLE_MODES}" + f"Use one of the following modes: {','.join(ALLOWABLE_MODES)}" ) else: # auto-select based on best version diff --git a/tests/test_launcher.py b/tests/test_launcher.py index da4f4283a50..2aaf0cc0fb8 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -45,6 +45,7 @@ _is_ubuntu, _parse_ip_route, check_mapdl_launch_on_hpc, + check_mode, force_smp_in_student, generate_mapdl_launch_command, generate_sbatch_command, @@ -60,10 +61,12 @@ get_start_instance, get_version, is_running_on_slurm, + kill_job, launch_grpc, launch_mapdl, launch_mapdl_on_cluster, remove_err_files, + send_scontrol, set_license_switch, set_MPI_additional_switches, update_env_vars, @@ -1709,3 +1712,114 @@ def test_get_version_env_var(monkeypatch, version): assert version == get_version(None) assert version != get_version(241) + + +@pytest.mark.parametrize( + "mode, version, osname, context, res", + [ + [None, None, None, NullContext(), "grpc"], # default + [ + "grpc", + 201, + "nt", + pytest.raises( + VersionError, match="gRPC mode requires MAPDL 2020R2 or newer on Window" + ), + None, + ], + [ + "grpc", + 202, + "posix", + pytest.raises( + VersionError, match="gRPC mode requires MAPDL 2021R1 or newer on Linux." + ), + None, + ], + ["grpc", 212, "nt", NullContext(), "grpc"], + ["grpc", 221, "posix", NullContext(), "grpc"], + ["grpc", 221, "nt", NullContext(), "grpc"], + [ + "console", + 221, + "nt", + pytest.raises(ValueError, match="Console mode requires Linux."), + None, + ], + [ + "console", + 221, + "posix", + pytest.warns( + UserWarning, + match="Console mode not recommended in MAPDL 2021R1 or newer.", + ), + "console", + ], + [ + "nomode", + 221, + "posix", + pytest.raises(ValueError, match=f'Invalid MAPDL server mode "nomode"'), + None, + ], + [None, 211, "posix", NullContext(), "grpc"], + [None, 211, "nt", NullContext(), "grpc"], + [None, 202, "nt", NullContext(), "grpc"], + [ + None, + 201, + "nt", + pytest.raises(VersionError, match="Running MAPDL as a service requires"), + None, + ], + [None, 202, "posix", NullContext(), "console"], + [None, 201, "posix", NullContext(), "console"], + [ + None, + 110, + "posix", + pytest.warns( + UserWarning, + match="MAPDL as a service has not been tested on MAPDL < v13", + ), + "console", + ], + [ + None, + 110, + "nt", + pytest.raises(VersionError, match="Running MAPDL as a service requires"), + None, + ], + ], +) +def test_check_mode(mode, version, osname, context, res): + with patch("os.name", osname): + with context: + assert res == check_mode(mode, version) + + +@pytest.mark.parametrize("jobid", [1001, 2002]) +@patch("subprocess.Popen", lambda *args, **kwargs: None) +def test_kill_job(jobid): + with patch("ansys.mapdl.core.launcher.submitter") as mck_sub: + assert kill_job(jobid) is None + mck_sub.assert_called_once() + arg = mck_sub.call_args_list[0][0][0] + assert arg[0] == "scancel" + assert arg[1] == str(jobid) + + +@pytest.mark.parametrize("jobid", [1001, 2002]) +@patch("ansys.mapdl.core.launcher.submitter", fake_subprocess_open) # return command +def test_send_scontrol(jobid): + with patch("ansys.mapdl.core.launcher.submitter") as mck_sub: + args = f"my args {jobid}" + assert send_scontrol(args) + + mck_sub.assert_called_once() + arg = mck_sub.call_args_list[0][0][0] + assert " ".join(arg) == f"scontrol my args {jobid}" + assert "scontrol" in arg + assert f"{jobid}" in arg From cddb76be5c5b1b2fcc07dd9c8dc6075cd23236b3 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 28 Oct 2024 18:17:55 +0100 Subject: [PATCH 117/122] fix: not passing args --- src/ansys/mapdl/core/launcher.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 10016c3beb6..ae36861c3a2 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -2831,11 +2831,11 @@ def submitter( # cmd is controlled by the library with generate_mapdl_launch_command. # Excluding bandit check. return subprocess.Popen( - cmd, + args=cmd, shell=shell, # sbatch does not work without shell. cwd=cwd, - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + stdin=stdin, + stdout=stdout, + stderr=stderr, env=env_vars, ) # nosec B603 B607 From a08b3780aeec1152cc468babcea9f930d9034646 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 28 Oct 2024 18:21:35 +0100 Subject: [PATCH 118/122] tests: increase coverage --- tests/test_launcher.py | 73 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 2aaf0cc0fb8..f5cea4f31f8 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -69,6 +69,7 @@ send_scontrol, set_license_switch, set_MPI_additional_switches, + submitter, update_env_vars, ) from ansys.mapdl.core.licensing import LICENSES @@ -1823,3 +1824,75 @@ def test_send_scontrol(jobid): assert " ".join(arg) == f"scontrol my args {jobid}" assert "scontrol" in arg assert f"{jobid}" in arg + + +@pytest.mark.parametrize( + "cmd,executable,shell,cwd,stdin,stdout,stderr,envvars", + [ + ["mycmd", None, True, "my_cwd", None, None, None, None], + [["my", "cmd"], None, True, "my_cwd", None, None, None, None], + [ + "mycmd", + "exec", + False, + "my_other_cwd", + "other_obj", + "other_obj", + "other_obj", + {"aaa": 1}, + ], + [ + ["my", "cmd"], + "exec", + False, + "my_single_cwd", + "other_obj", + "other_obj", + "other_obj", + {"a": "b", "b": "c"}, + ], + ], +) +def test_submitter(cmd, executable, shell, cwd, stdin, stdout, stderr, envvars): + def return_everything(*arg, **kwags): + return arg, kwags + + with patch("subprocess.Popen", return_everything) as mck_popen: + args, kwargs = submitter( + cmd=cmd, + executable=executable, + shell=shell, + cwd=cwd, + stdin=stdin, + stdout=stdout, + stderr=stderr, + env_vars=envvars, + ) + + if executable: + if isinstance(cmd, str): + assert kwargs["args"] == [executable, cmd] + else: # list + assert kwargs["args"] == [executable] + cmd + else: + assert kwargs["args"] == cmd + + assert kwargs["shell"] == shell + assert kwargs["cwd"] == cwd + + if stdin: + assert kwargs["stdin"] == stdin + else: + assert isinstance(kwargs["stdin"], type(subprocess.DEVNULL)) + + if stdout: + assert kwargs["stdout"] == stdout + else: + assert isinstance(kwargs["stdout"], type(subprocess.PIPE)) + + if stderr: + assert kwargs["stderr"] == stderr + else: + assert isinstance(kwargs["stderr"], type(subprocess.PIPE)) + + assert kwargs["env"] == envvars From e3cbf0129d6f500fa80a45074eeaf821665e6c93 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Mon, 28 Oct 2024 18:25:40 +0100 Subject: [PATCH 119/122] fix: tests --- tests/test_launcher.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index f5cea4f31f8..ea86036cf86 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -1171,14 +1171,9 @@ def test_remove_err_files_fail(tmpdir): # testing on windows to account for temp file -def fake_subprocess_open(*args, **kwargs): - kwargs["cmd"] = args[0] - return kwargs - - @patch("os.name", "nt") @pytest.mark.parametrize("launch_on_hpc", [None, False, True]) -@patch("subprocess.Popen", fake_subprocess_open) +@patch("subprocess.Popen", lambda *args, **kwargs: kwargs) def test_launch_grpc(tmpdir, launch_on_hpc): if launch_on_hpc: cmd = ["sbatch", "--wrap", "'ansys.exe -b -i my_input.inp -o my_output.inp'"] @@ -1190,11 +1185,11 @@ def test_launch_grpc(tmpdir, launch_on_hpc): inp_file = os.path.join(run_location, "my_input.inp") if launch_on_hpc: - assert "sbatch" in kwargs["cmd"] - assert "--wrap" in kwargs["cmd"] - assert " ".join(cmd) == kwargs["cmd"] + assert "sbatch" in kwargs["args"] + assert "--wrap" in kwargs["args"] + assert " ".join(cmd) == kwargs["args"] else: - assert cmd == kwargs["cmd"] + assert cmd == kwargs["args"] assert os.path.exists(inp_file) with open(inp_file, "r") as fid: assert "FINISH" in fid.read() @@ -1813,7 +1808,9 @@ def test_kill_job(jobid): @pytest.mark.parametrize("jobid", [1001, 2002]) -@patch("ansys.mapdl.core.launcher.submitter", fake_subprocess_open) # return command +@patch( + "ansys.mapdl.core.launcher.submitter", lambda *args, **kwargs: kwargs +) # return command def test_send_scontrol(jobid): with patch("ansys.mapdl.core.launcher.submitter") as mck_sub: args = f"my args {jobid}" From 3b799a256dd03a8f17e0d520da4857b14a38f9bc Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Tue, 29 Oct 2024 10:26:58 +0100 Subject: [PATCH 120/122] fix: fixture --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 2e0ea904340..384bd314955 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -627,7 +627,7 @@ def mapdl(request, tmpdir_factory): if START_INSTANCE: mapdl._local = True mapdl._exited = False - assert not mapdl.finish_job_on_exit + assert mapdl.finish_job_on_exit mapdl.exit(save=True, force=True) assert mapdl._exited assert "MAPDL exited" in str(mapdl) From f89df704b1ccf8cd41448956f9d62a09fe305afc Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:32:49 +0100 Subject: [PATCH 121/122] ci: uploading bandit reports as artifact. --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f7833d2f1a9..f89de19bdcf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -147,6 +147,7 @@ jobs: token: ${{ secrets.PYANSYS_CI_BOT_TOKEN }} python-package-name: ${{ env.PACKAGE_NAME }} dev-mode: ${{ github.ref != 'refs/heads/main' }} + upload-reports: True docs-build: name: "Build documentation" From ac202b46aaed25af885b03675ca1eb73ba9b083a Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:35:01 +0100 Subject: [PATCH 122/122] docs: adding descriptor to phrase --- doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst b/doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst index f6f05a3de03..d3044c920df 100644 --- a/doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst +++ b/doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst @@ -77,7 +77,7 @@ You can retrieve the IP of the MAPDL instance as well as its hostname: >>> mapdl.hostname 'node0' -You can also retrieve the job ID: +You can also retrieve the SLURM job ID: .. code:: pycon