From 2e73bf965833eb0ecf27cff71404d6155ddd47bd Mon Sep 17 00:00:00 2001 From: <> Date: Mon, 31 Jul 2023 12:18:06 +0000 Subject: [PATCH] Deployed 9e211ca with MkDocs version: 1.5.1 --- 404.html | 2 +- access/index.html | 2 +- access/project/index.html | 2 +- access/ssh/index.html | 2 +- access/virtualmachines-vdi/index.html | 2 +- bespoke/eddash/index.html | 2 +- bespoke/eddash/jhub-git/index.html | 2 +- bespoke/eddash/safe-registration/index.html | 2 +- bespoke/eddash/workshops/index.html | 2 +- bespoke/index.html | 2 +- faq/index.html | 2 +- index.html | 2 +- known-issues/index.html | 2 +- overview/acknowledgements/index.html | 2 +- overview/contacts/index.html | 2 +- overview/index.html | 2 +- .../network-access-controls/index.html | 2 +- safe-haven-services/overview/index.html | 2 +- .../safe-haven-access/index.html | 4 +- .../index.html | 2 +- .../index.html | 2 +- .../L3_submitting_scripts_to_slurm/index.html | 2 +- .../index.html | 2 +- .../L5_parallelised_r_analysis/index.html | 2 +- .../using-the-hpc-cluster/index.html | 2 +- .../virtual-desktop-connections/index.html | 2 +- search/search_index.json | 2 +- services/cs2/access/index.html | 2 +- services/cs2/index.html | 2 +- services/cs2/run/index.html | 2 +- services/datacatalogue/docs/index.html | 2 +- services/datacatalogue/index.html | 2 +- services/datacatalogue/metadata/index.html | 2 +- services/datacatalogue/quickstart/index.html | 2 +- services/datacatalogue/tutorial/index.html | 2 +- services/gpuservice/index.html | 2 +- services/gpuservice/policies/index.html | 2 +- .../training/L1_getting_started/index.html | 44 ++++--- .../index.html | 9 +- .../L3_running_a_pytorch_task/index.html | 11 +- .../training/L4_template_workflow/index.html | 2 +- services/index.html | 2 +- services/jhub/docs/index.html | 2 +- services/jhub/index.html | 2 +- services/jhub/quickstart/index.html | 2 +- services/jhub/tutorial/index.html | 2 +- services/mft/index.html | 2 +- services/mft/quickstart/index.html | 2 +- services/mft/sftp/index.html | 2 +- services/mft/using-the-mft/index.html | 2 +- services/rstudioserver/docs/index.html | 2 +- services/rstudioserver/index.html | 2 +- services/rstudioserver/quickstart/index.html | 2 +- services/rstudioserver/tutorial/index.html | 2 +- services/ultra2/access/index.html | 2 +- services/ultra2/index.html | 2 +- services/ultra2/run/index.html | 2 +- services/virtualmachines/docs/index.html | 2 +- services/virtualmachines/flavours/index.html | 62 ++++----- services/virtualmachines/policies/index.html | 2 +- .../virtualmachines/quickstart/index.html | 2 +- sitemap.xml | 120 +++++++++--------- sitemap.xml.gz | Bin 780 -> 781 bytes status/index.html | 30 ++--- 64 files changed, 211 insertions(+), 181 deletions(-) diff --git a/404.html b/404.html index 129ee9422..38a5fab8a 100644 --- a/404.html +++ b/404.html @@ -11,7 +11,7 @@ - + diff --git a/access/index.html b/access/index.html index 18e112864..4f5e413fc 100644 --- a/access/index.html +++ b/access/index.html @@ -17,7 +17,7 @@ - + diff --git a/access/project/index.html b/access/project/index.html index 310c5c8e2..daccc03f3 100644 --- a/access/project/index.html +++ b/access/project/index.html @@ -17,7 +17,7 @@ - + diff --git a/access/ssh/index.html b/access/ssh/index.html index 7675671d6..908cce785 100644 --- a/access/ssh/index.html +++ b/access/ssh/index.html @@ -17,7 +17,7 @@ - + diff --git a/access/virtualmachines-vdi/index.html b/access/virtualmachines-vdi/index.html index 568947081..97a09027c 100644 --- a/access/virtualmachines-vdi/index.html +++ b/access/virtualmachines-vdi/index.html @@ -17,7 +17,7 @@ - + diff --git a/bespoke/eddash/index.html b/bespoke/eddash/index.html index e6b9a6c98..2ae099f15 100644 --- a/bespoke/eddash/index.html +++ b/bespoke/eddash/index.html @@ -13,7 +13,7 @@ - + diff --git a/bespoke/eddash/jhub-git/index.html b/bespoke/eddash/jhub-git/index.html index c6815fd9e..67a53017e 100644 --- a/bespoke/eddash/jhub-git/index.html +++ b/bespoke/eddash/jhub-git/index.html @@ -17,7 +17,7 @@ - + diff --git a/bespoke/eddash/safe-registration/index.html b/bespoke/eddash/safe-registration/index.html index 04b008ec7..52267b646 100644 --- a/bespoke/eddash/safe-registration/index.html +++ b/bespoke/eddash/safe-registration/index.html @@ -17,7 +17,7 @@ - + diff --git a/bespoke/eddash/workshops/index.html b/bespoke/eddash/workshops/index.html index 711b1e1fb..d2a7768f5 100644 --- a/bespoke/eddash/workshops/index.html +++ b/bespoke/eddash/workshops/index.html @@ -17,7 +17,7 @@ - + diff --git a/bespoke/index.html b/bespoke/index.html index 4be92130d..00a656d05 100644 --- a/bespoke/index.html +++ b/bespoke/index.html @@ -13,7 +13,7 @@ - + diff --git a/faq/index.html b/faq/index.html index 931068df1..d840ac835 100644 --- a/faq/index.html +++ b/faq/index.html @@ -15,7 +15,7 @@ - + diff --git a/index.html b/index.html index d3e35cbfa..b125bcec0 100644 --- a/index.html +++ b/index.html @@ -17,7 +17,7 @@ - + diff --git a/known-issues/index.html b/known-issues/index.html index 321591de1..7a590216f 100644 --- a/known-issues/index.html +++ b/known-issues/index.html @@ -17,7 +17,7 @@ - + diff --git a/overview/acknowledgements/index.html b/overview/acknowledgements/index.html index fae279dbc..bc8bff755 100644 --- a/overview/acknowledgements/index.html +++ b/overview/acknowledgements/index.html @@ -17,7 +17,7 @@ - + diff --git a/overview/contacts/index.html b/overview/contacts/index.html index 2c626b64e..4d8b31076 100644 --- a/overview/contacts/index.html +++ b/overview/contacts/index.html @@ -17,7 +17,7 @@ - + diff --git a/overview/index.html b/overview/index.html index 3c19e0a74..a17ff3567 100644 --- a/overview/index.html +++ b/overview/index.html @@ -17,7 +17,7 @@ - + diff --git a/safe-haven-services/network-access-controls/index.html b/safe-haven-services/network-access-controls/index.html index 1b4c1f8ae..b406fb795 100644 --- a/safe-haven-services/network-access-controls/index.html +++ b/safe-haven-services/network-access-controls/index.html @@ -17,7 +17,7 @@ - + diff --git a/safe-haven-services/overview/index.html b/safe-haven-services/overview/index.html index 8cb4ff014..52d6e5403 100644 --- a/safe-haven-services/overview/index.html +++ b/safe-haven-services/overview/index.html @@ -17,7 +17,7 @@ - + diff --git a/safe-haven-services/safe-haven-access/index.html b/safe-haven-services/safe-haven-access/index.html index fe747bf3a..9fa027ec2 100644 --- a/safe-haven-services/safe-haven-access/index.html +++ b/safe-haven-services/safe-haven-access/index.html @@ -17,7 +17,7 @@ - + @@ -1441,7 +1441,7 @@

Safe Haven Service Access

-

Safe Haven services are accessed from a registered network connection address using a browser. The service URL will be "https://shs.epcc.ed.ac.uk/<service>" where <service> is the Safe Haven service name.

+

Safe Haven services are accessed from a registered network connection address using a browser. The service URL will be "https://shs.epcc.ed.ac.uk/<service>" where <service> is the Safe Haven service name.

The Safe Haven access process is in three stages from multi-factor authentication to project desktop login.

Researchers who are active in many research projects and in more than one Safe Haven will need to pay attention to the service they connect to, the project desktop they login to, and the accounts and identities they are using.

Safe Haven Login

diff --git a/safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/index.html b/safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/index.html index 46813a47c..c958a7145 100644 --- a/safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/index.html +++ b/safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/index.html @@ -17,7 +17,7 @@ - + diff --git a/safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/index.html b/safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/index.html index 347ae931a..9ee63f4a4 100644 --- a/safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/index.html +++ b/safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/index.html @@ -17,7 +17,7 @@ - + diff --git a/safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/index.html b/safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/index.html index b93478378..51f0ae8e6 100644 --- a/safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/index.html +++ b/safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/index.html @@ -17,7 +17,7 @@ - + diff --git a/safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/index.html b/safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/index.html index eaa0dff25..7beed48b5 100644 --- a/safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/index.html +++ b/safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/index.html @@ -17,7 +17,7 @@ - + diff --git a/safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/index.html b/safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/index.html index d28ace794..4f8e31072 100644 --- a/safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/index.html +++ b/safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/index.html @@ -17,7 +17,7 @@ - + diff --git a/safe-haven-services/using-the-hpc-cluster/index.html b/safe-haven-services/using-the-hpc-cluster/index.html index db96f521b..15c7d4f39 100644 --- a/safe-haven-services/using-the-hpc-cluster/index.html +++ b/safe-haven-services/using-the-hpc-cluster/index.html @@ -17,7 +17,7 @@ - + diff --git a/safe-haven-services/virtual-desktop-connections/index.html b/safe-haven-services/virtual-desktop-connections/index.html index 54731b60f..514f4c565 100644 --- a/safe-haven-services/virtual-desktop-connections/index.html +++ b/safe-haven-services/virtual-desktop-connections/index.html @@ -17,7 +17,7 @@ - + diff --git a/search/search_index.json b/search/search_index.json index 9ccca8d3d..22fc11efb 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"EIDF User Documentation","text":"

The Edinburgh International Data Facility (EIDF) is built and operated by EPCC at the University of Edinburgh. EIDF is a place to store, find and work with data of all kinds. You can find more information on the service and the research it supports on the EIDF website.

For more information or for support with our services, please email eidf@epcc.ed.ac.uk in the first instance.

"},{"location":"#what-the-documentation-covers","title":"What the documentation covers","text":"

This documentation gives more in-depth coverage of current EIDF services. It is aimed primarily at developers or power users.

"},{"location":"#contributing-to-the-documentation","title":"Contributing to the documentation","text":"

The source for this documentation is publicly available in the EIDF documentation Github repository so that anyone can contribute to improve the documentation for the service. Contributions can be in the form of improvements or additions to the content and/or addition of Issues providing suggestions for how it can be improved.

Full details of how to contribute can be found in the README.md file of the repository.

This documentation set is a work in progress.

"},{"location":"#credits","title":"Credits","text":"

This documentation draws on the ARCHER2 National Supercomputing Service documentation.

"},{"location":"access/","title":"Accessing EIDF","text":"

Some EIDF services are accessed via a Web browser and some by \"traditional\" command-line ssh.

All EIDF services use the EPCC SAFE service management back end, to ensure compatibility with other EPCC high-performance computing services.

"},{"location":"access/#web-access-to-virtual-machines","title":"Web Access to Virtual Machines","text":"

The Virtual Desktop VM service is browser-based, providing a virtual desktop interface (Apache Guacamole) for \"desktop-in-a-browser\" access. Applications to use the VM service are made through the EIDF Portal.

EIDF Portal: how to ask to join an existing EIDF project and how to apply for a new project

VDI access to virtual machines: how to connect to the virtual desktop interface.

"},{"location":"access/#ssh-access-to-virtual-machines","title":"SSH Access to Virtual Machines","text":"

Users with the appropriate permissions can also use ssh to login to Virtual Desktop VMs

"},{"location":"access/#ssh-access-to-computing-services","title":"SSH Access to Computing Services","text":"

Includes access to the following services:

To login to most command-line services with ssh you should use the username and password you obtained from SAFE when you applied for access, along with the SSH Key you registered when creating the account. You can then login to the host following the appropriately linked instructions above.

"},{"location":"access/project/","title":"EIDF Portal","text":"

Projects using the Virtual Desktop cloud service are accessed via the EIDF Portal.

The EIDF Portal uses EPCC's SAFE service management software to manage user accounts across all EPCC services. To log in to the Portal you will first be redirected to the SAFE log on page. If you do not have a SAFE account follow the instructions in the SAFE documentation how to register and receive your password.

"},{"location":"access/project/#how-to-request-to-join-a-project","title":"How to request to join a project","text":"

Log in to the EIDF Portal and navigate to \"Projects\" and choose \"Request access\". Select the project that you want to join in the \"Project\" dropdown list - you can search for the project name or the project code, e.g. \"eidf0123\".

Now you have to wait for your PI or project manager to accept your request to register.

"},{"location":"access/project/#how-to-apply-for-a-project-as-a-principal-investigator","title":"How to apply for a project as a Principal Investigator","text":""},{"location":"access/project/#create-a-new-project-application","title":"Create a new project application","text":"

Navigate to the EIDF Portal and log in via SAFE if necessary (see above).

Once you have logged in click on \"Applications\" in the menu and choose \"New Application\".

  1. Fill in the Application Title - this will be the name of the project once it is approved.
  2. Choose a start date and an end date for your project.
  3. Click \"Create\" to create your project application.

Once the application has been created you see an overview of the form you are required to fill in. You can revisit the application at any time by clicking on \"Applications\" and choosing \"Your applications\" to display all your current and past applications and their status, or follow the link https://portal.eidf.ac.uk/proposal/.

"},{"location":"access/project/#populate-a-project-application","title":"Populate a project application","text":"

Fill in each section of the application as required:

You can edit and save each section separately and revisit the application at a later time.

"},{"location":"access/project/#datasets","title":"Datasets","text":"

You are required to fill in a \"Dataset\" form for each dataset that you are planning to store and process as part of your project.

We are required to ensure that projects involving \"sensitive\" data have the necessary permissions in place. The answers to these questions will enable us to decide what additional documentation we may need, and whether your project may need to be set up in an independently governed Safe Haven. There may be some projects we are simply unable to host for data protection reasons.

"},{"location":"access/project/#resource-requirements","title":"Resource Requirements","text":"

Add an estimate for each size and type of VM that is required.

"},{"location":"access/project/#submission","title":"Submission","text":"

When you are happy with your application, click \"Submit\". If there are missing fields that are required these are highlighted and your submission will fail.

When your submission was successful the application status is marked as \"Submitted\" and now you have to wait while the EIDF approval team considers your application. You may be contacted if there are any questions regarding your application or further information is required, and you will be notified of the outcome of your application.

"},{"location":"access/project/#approved-project","title":"Approved Project","text":"

If your application was approved, refer to Data Science Virtual Desktops: Quickstart how to view your project and to Data Science Virtual Desktops: Managing VMs how to manage a project and how to create virtual machines and user accounts.

"},{"location":"access/ssh/","title":"SSH Access to Virtual Machines using the EIDF-Gateway Jump Host","text":"

The EIDF-Gateway is an SSH gateway suitable for accessing EIDF Services via a console or terminal. As the gateway cannot be 'landed' on, a user can only pass through it and so the destination (the VM IP) has to be known for the service to work. Users connect to their VM through the jump host using their given accounts.

"},{"location":"access/ssh/#generating-and-adding-an-ssh-key","title":"Generating and Adding an SSH Key","text":"

In order to make use of the EIDF-Gateway, your EIDF account needs an SSH-Key associated with it. If you added one while creating your EIDF account, you can skip this step.

"},{"location":"access/ssh/#check-for-an-existing-ssh-key","title":"Check for an existing SSH Key","text":"

To check if you have an SSH Key associated with your account:

  1. Login to the Portal
  2. Select 'Your Projects'
  3. Select your project name
  4. Select your username

If there is an entry under 'Credentials', then you're all setup. If not, you'll need to generate an SSH-Key, to do this:

"},{"location":"access/ssh/#generate-a-new-ssh-key","title":"Generate a new SSH Key","text":"
  1. Open a new window of whatever terminal you will use to SSH to EIDF.
  2. Generate a new SSH Key: $ ssh-keygen
  3. Input the directory and filename of they key. It's recommended to make this something like 'eidf-gateway' so it's easier to identify later
  4. Press enter to finish generating the key
"},{"location":"access/ssh/#adding-the-new-ssh-key-to-your-account-via-the-portal","title":"Adding the new SSH Key to your account via the Portal","text":"
  1. Login into the Portal
  2. Select 'Your Projects'
  3. Select the relevant project
  4. Select your username
  5. Select the plus button under 'Credentials'
  6. Select 'Choose File' to upload the PUBLIC (.pub) ssh key generated in the last step, or open the .pub file you just created and copy its contents into the text box.
  7. Click 'Upload Credential' It should look something like this:
  8. "},{"location":"access/ssh/#adding-a-new-ssh-key-via-safe","title":"Adding a new SSH Key via SAFE","text":"

    This should not be necessary for most users, so only follow this process if you have an issue or have been told to by the EPCC Helpdesk. If you need to add an SSH Key directly to SAFE, you can follow this guide. However, select your '[username]@EIDF' login account, not 'Archer2' as specified in that guide.

    "},{"location":"access/ssh/#using-the-ssh-key-to-access-eidf-windows-and-linux","title":"Using the SSH-Key to access EIDF - Windows and Linux","text":"
    1. From your local terminal, import the SSH Key you generated above: $ ssh-add [sshkey]
    2. This should return \"Identity added [Path to SSH Key]\" if successful. You can then follow the steps below to access your VM.
    "},{"location":"access/ssh/#accessing-from-windows","title":"Accessing from Windows","text":"

    Windows will require the installation of OpenSSH-Server or MobaXTerm to use SSH. Putty can also be used but won\u2019t be covered in this tutorial.

    "},{"location":"access/ssh/#installing-and-using-openssh","title":"Installing and using OpenSSH","text":"
    1. Click the \u2018Start\u2019 button at the bottom of the screen
    2. Click the \u2018Settings\u2019 cog icon
    3. Search in the top bar \u2018Add or Remove Programs\u2019 and select the entry
    4. Select the \u2018Optional Features\u2019 blue text link
    5. If \u2018OpenSSH Client\u2019 is not under \u2018Installed Features\u2019, click the \u2018Add a Feature\u2019 button
    6. Search \u2018OpenSSH Client\u2019
    7. Select the check box next to \u2018OpenSSH Client\u2019 and click \u2018Install\u2019
    8. Once this is installed, you can reach your VM by opening CMD and running: $ ssh -J [username]@eidf-gateway.epcc.ed.ac.uk [username]@[vm_ip]
    "},{"location":"access/ssh/#installing-mobaxterm","title":"Installing MobaXTerm","text":"
    1. Download MobaXTerm from https://mobaxterm.mobatek.net/
    2. Once installed click the \u2018Session\u2019 button in the top left corner
    3. Click \u2018SSH\u2019
    4. In the \u2018Remote Host\u2019 section, specify the VM IP
    5. Click the \u2018Network Settings\u2019 Tab
    6. Click the \u2018SSH Gateway (jump host)\u2019 button in the middle
    7. Under Gateway Host, specify: eidf-gateway.epcc.ed.ac.uk
    8. Under Username, specify your username
    9. Click \u2018OK\u2019
    10. Click \u2018OK\u2019 to launch the session
    11. For the EIDF-Gateway and VM login prompts, use your password
    "},{"location":"access/ssh/#accessing-from-macoslinux","title":"Accessing From MacOS/Linux","text":"

    OpenSSH is installed on Linux and MacOS usually by default, so you can access the gateway natively from the terminal. The '-J' flag is use to specify that we will access the second specified host by jumping through the first specified host like the example below.

    ssh -J [username]@jumphost [username]@target\n

    To access EIDF Services:

    ssh -J [username]@eidf-gateway.epcc.ed.ac.uk [username]@[vm_ip]\n
    "},{"location":"access/ssh/#password-resets-via-the-eidf-gateway","title":"Password Resets via the EIDF-Gateway","text":"

    You will have to connect to your VM via SSH before you can login with RDP as your initial password needs to be reset, which can only be done via SSH. You can reset your password through the SSH Gateway by connecting to it directly:

    ssh [username]@eidf-gateway.epcc.ed.ac.uk\n

    Your first attempt to log in to your account using the SSH Gateway will prompt you for your initial password (provided in the portal) like a normal login. If this is successful you will choose a new password. You will be asked for your initial password again, followed by two entries of your new password. This will reset the password to your account for both the gateway and the VM. Once this reset has been completed, the session will disconnect and you can login via SSH again using the newly set password.

    You will not be able to directly connect to the gateway again, so to connect to your VM, jump through the SSH Gateway:

    ssh -J [username]@eidf-gateway.epcc.ed.ac.uk [username]@[vm_ip]\n
    "},{"location":"access/virtualmachines-vdi/","title":"Virtual Machines (VMs) and the EIDF Virtual Desktop Interface (VDI)","text":"

    Using the EIDF VDI, members of EIDF projects can connect to VMs that they have been granted access to. The EIDF VDI is a web portal that displays the connections to VMs a user has available to them, and then those connections can be easily initiated by clicking on them in the user interface. Once connected to the target VM, all interactions are mediated through the user's web browser by the EIDF VDI.

    "},{"location":"access/virtualmachines-vdi/#login-to-the-eidf-vdi","title":"Login to the EIDF VDI","text":"

    Once your membership request to join the appropriate EIDF project has been approved, you will be able to login to the EIDF VDI at https://eidf-vdi.epcc.ed.ac.uk/vdi.

    Authentication to the VDI is provided by SAFE, so if you do not have an active web browser session in SAFE, you will be redirected to the SAFE log on page. If you do not have a SAFE account follow the instructions in the SAFE documentation how to register and receive your password.

    "},{"location":"access/virtualmachines-vdi/#navigating-the-eidf-vdi","title":"Navigating the EIDF VDI","text":"

    After you have been authenticated through SAFE and logged into the EIDF VDI, if you have multiple connections available to you that have been associated with your user (typically in the case of research projects), you will be presented with the VDI home screen as shown below:

    VDI home page with list of available VM connections

    Adding connections

    Note that if a project manager has added a new connection for you it may not appear in the list of connections immediately. You must log out and log in again to refresh your connections list.

    "},{"location":"access/virtualmachines-vdi/#connecting-to-a-vm","title":"Connecting to a VM","text":"

    If you have only one connection associated with your VDI user account (typically in the case of workshops), you will be automatically connected to the target VM's virtual desktop. Once you are connected to the VM, you will be asked for your username and password as shown below (if you are participating in a workshop, then you may not be asked for credentials)

    VM virtual desktop connection user account login screen

    Once your credentials have been accepted, you will be connected to your VM's desktop environment. For instance, the screenshot below shows a resulting connection to a Xubuntu 20.04 VM with the Xfce desktop environment.

    VM virtual desktop

    "},{"location":"access/virtualmachines-vdi/#vdi-features-for-the-virtual-desktop","title":"VDI Features for the Virtual Desktop","text":"

    The EIDF VDI is an instance of the Apache Guacamole clientless remote desktop gateway. Since the connection to your VM virtual desktop is entirely managed through Guacamole in the web browser, there are some additional features to be aware of that may assist you when using the VDI.

    "},{"location":"access/virtualmachines-vdi/#the-vdi-menu","title":"The VDI Menu","text":"

    The Guacamole menu is a sidebar which is hidden until explicitly shown. On a desktop or other device which has a hardware keyboard, you can show this menu by pressing <Ctrl> + <Alt> + <Shift> on a Windows PC client, or <Ctrl> + <Command> + <Shift> on a Mac client. To hide the menu, you press the same key combination once again. The menu provides various options, including:

    "},{"location":"access/virtualmachines-vdi/#clipboard-copy-and-paste-functionality","title":"Clipboard Copy and Paste Functionality","text":"

    After you have activated the Guacamole menu using the key combination above, at the top of the menu is a text area labeled \u201cclipboard\u201d along with some basic instructions:

    Text copied/cut within Guacamole will appear here. Changes to the text below will affect the remote clipboard.

    The text area functions as an interface between the remote clipboard and the local clipboard. Text from the local clipboard can be pasted into the text area, causing that text to be sent to the clipboard of the remote desktop. Similarly, if you copy or cut text within the remote desktop, you will see that text within the text area, and can manually copy it into the local clipboard if desired.

    You can use the standard keyboard shortcuts to copy text from your client PC or Mac to the Guacamole menu clipboard, then again copy that text from the Guacamole menu clipboard into an application or CLI terminal on the VM's remote desktop. An example of using the copy and paste clipboard is shown in the screenshot below.

    The EIDF VDI Clipboard

    "},{"location":"access/virtualmachines-vdi/#keyboard-language-and-layout-settings","title":"Keyboard Language and Layout Settings","text":"

    For users who do not have standard English (UK) keyboard layouts, key presses can have unexpected translations as they are transmitted to your VM. Please contact the EIDF helpdesk at eidf@epcc.ed.ac.uk if you are experiencing difficulties with your keyboard mapping, and we will help to resolve this by changing some settings in the Guacamole VDI connection configuration.

    "},{"location":"access/virtualmachines-vdi/#further-information","title":"Further information","text":""},{"location":"bespoke/","title":"Bespoke Services","text":"

    Ed-DaSH

    "},{"location":"bespoke/eddash/","title":"EIDFWorkshops","text":"

    Ed-DaSH Notebook Service

    Ed-DaSH Virtual Machines

    JupyterHub Notebook Service Access

    "},{"location":"bespoke/eddash/jhub-git/","title":"EIDF JupyterHub Notebook Service Access","text":"

    Using the EIDF JupyterHub, users can access a range of services including standard interactive Python notebooks as well as RStudio Server.

    "},{"location":"bespoke/eddash/jhub-git/#ed-dash-workshops","title":"Ed-DaSH Workshops","text":""},{"location":"bespoke/eddash/jhub-git/#accessing","title":"Accessing","text":"

    In order to access the EIDF JupyterHub, authentication is through GitHub, so you must have an account on https://github.com and that account must be a member of the appropriate organization in GitHub. Please ask your project admin or workshop instructor for the workshop GitHub organization details. Please follow the relevant steps listed below to prepare.

    1. If you do not have a GitHub account associated with the email you registered for the workshop with, follow the steps described in Step 1: Creating a GitHub Account
    2. If you do already have a GitHub account associated with the email address you registered for the workshop with, follow the steps described in Step 2: Registering with the Workshop GitHub Organization
    "},{"location":"bespoke/eddash/jhub-git/#step-1-creating-a-github-account","title":"Step 1: Creating a GitHub Account","text":"
    1. Visit https://github.com/signup in your browser
    2. Enter the email address that you used to register for the workshop
    3. Complete the remaining steps of the GitHub registration process
    4. Send an email to ed-dash-support@mlist.is.ed.ac.uk from your GitHub registered email address, including your GitHub username, and ask for an invitation to the workshop GitHub organization
    5. Wait for an email from GitHub inviting you to join the organization, then follow the steps in Step 2: Registering with the Workshop GitHub Organization
    "},{"location":"bespoke/eddash/jhub-git/#step-2-registering-with-the-workshop-github-organization","title":"Step 2: Registering With the Workshop GitHub Organization","text":"
    1. If you already have a GitHub account associated with the email address that you registered for the workshop with, you should have received an email inviting you to join the relevant GitHub organization. If you have not, email ed-dash-support@mlist.is.ed.ac.uk from your GitHub registered email address, including your GitHub username, and ask for an invitation to the workshop GitHub organization
    2. Once you have been invited to the GitHub organization, you will receive an email with the invitation; click on the button as shown Invitation to join the workshop GitHub organization
    3. Clicking on the button in the email will open a new web page with another form as shown below Form to accept the invitation to join the GitHub organization
    4. Again, click on the button to confirm, then the Ed-DaSH-Training GitHub organization page will open
    "},{"location":"bespoke/eddash/safe-registration/","title":"Accessing","text":"

    In order to access the EIDF VDI and connect to EIDF data science cloud VMs, you need to have an active SAFE account. If you already have a SAFE account, you can skip ahead to the Request Project Membership instructions. Otherwise, follow the Register Account in EPCC SAFE instructions immediately below to create the account.

    Info

    Please also see Register and Join a project in the SAFE documentation for more information.

    "},{"location":"bespoke/eddash/safe-registration/#step-1-register-account-in-epcc-safe","title":"Step 1: Register Account in EPCC SAFE","text":"
    1. Go to SAFE signup and complete the registration form
      1. Mandatory fields are: Email, Nationality, First name, Last name, Institution for reporting, Department, and Gender
      2. Your Email should be the one you used to register for the EIDF service (or Ed-DaSH workshop)
      3. If you are unsure, enter 'University of Edinburgh' for Institution for reporting and 'EIDF' for Department SAFE registration form
    2. Submit the form, then accept the SAFE Acceptable Use policy on the next page SAFE User Access Agreement
    3. After you have completed the registration form and accepted the policy, you will receive an email from support@archer2.ac.uk with a password reset URL
    4. Visit the link in the email and generate a new password, then submit the form
    5. You will now be logged into your new account in SAFE
    "},{"location":"bespoke/eddash/safe-registration/#step-2-request-project-membership","title":"Step 2: Request Project Membership","text":"
    1. While logged into SAFE, select the \u2018Request Access\u2019 menu item from the 'Projects' menu in the top menu bar
    2. This will open the 'Apply for project membership' page
    3. Enter the appropriate project ID into the \u2018Project\u2019 field and click the \u2018Next\u2019 button Apply for project membership in SAFE
    4. In the 'Access route' drop down field that appears, select 'Request membership' (not 'Request machine account') Request project membership in SAFE
    5. The project owner will then receive notification of the application and accept your request
    "},{"location":"bespoke/eddash/workshops/","title":"Workshop Setup","text":"

    Please follow the instructions in JupyterHub Notebook Service Access to arrange access to the EIDF Notebook service before continuing. The table below provides the login URL and the relevant GitHub organization to register with.

    Workshop Login URL GitHub Organization Ed-DaSH Introduction to Statistics https://secure.epcc.ed.ac.uk/ed-dash-hub Ed-DaSH-Training Ed-DaSH High-Dimensional Statistics https://secure.epcc.ed.ac.uk/ed-dash-hub Ed-DaSH-Training Ed-DaSH Introduction to Machine Learning with Python https://secure.epcc.ed.ac.uk/ed-dash-hub Ed-DaSH-Training N8 CIR Introduction to Artificial Neural Networks in Python https://secure.epcc.ed.ac.uk/ed-dash-hub Ed-DaSH-Training

    Please follow the sequence of instructions described in the sections below to get ready for the workshop:

    1. Step 1: Accessing the EIDF Notebook Service for the First Time
    2. Step 2: Login to EIDF JupyterHub
    3. Step 3: Creating a New R Script
    "},{"location":"bespoke/eddash/workshops/#step-1-accessing-the-eidf-notebook-service-for-the-first-time","title":"Step 1: Accessing the EIDF Notebook Service for the First Time","text":"

    We will be using the Notebook service provided by the Edinburgh International Data Facility (EIDF). Follow the steps listed below to gain access.

    Warning

    If you are receiving an error response such as '403: Forbidden' when you try to access https://secure.epcc.ed.ac.uk/ed-dash-hub, please send an email to ed-dash-support@mlist.is.ed.ac.uk to request access and also include your IP address which you can find by visiting https://whatismyipaddress.com/ in your browser. Please be aware that if you are accessing the service from outside of the UK, your access might be blocked until you have emailed us with your IP address.

    1. Click on the button
    2. You will be asked to sign in to GitHub, as shown in the form below GitHub sign in form for access to EIDF Notebook Service
    3. Enter your GitHub credentials, or click on the \u2018Create an account\u2019 link if you do not already have one, and follow the prerequisite instructions to register with GitHub and join the workshop organization
    4. Click on the \u2018Sign in\u2019 button
    5. On the next page, you will be asked whether to authorize the workshop organization to access your GitHub account as shown below GitHub form requesting authorization for the workshop organization
    6. Click on the button
    7. At this point, you will receive an email to the email address that you registered with in GitHub, stating that \u201cA third-party OAuth application has been added to your account\u201d for the workshop
    8. If you receive a \u2018403 : Forbidden\u2019 error message on the next screen (if you did not already do so as in step 4 of the prerequisites section) send an email to ed-dash-support@mlist.is.ed.ac.uk from your GitHub registered email address, including your GitHub username, and ask for an invitation to the workshop organization. Otherwise, skip to the next step. N.B. If you are accessing the service from outside of the UK, you may see this error; if so, please contact ed-dash-support@mlist.is.ed.ac.uk to enable access
    9. If you receive a \u2018400 : Bad Request\u2019 error message, you need to accept the invitation that has been emailed to you to join the workshop organization as in the prerequisite instructions
    "},{"location":"bespoke/eddash/workshops/#step-2-login-to-the-eidf-notebook-service","title":"Step 2: Login to the EIDF Notebook Service","text":"

    Now that you have completed registration with the workshop GitHub organization, you can access the workshop RStudio Server in EIDF.

    1. Return to the https://secure.epcc.ed.ac.uk/ed-dash-hub
    2. You will be presented with a choice of server as a list of radio buttons. Select the appropriate one as labelled for your workshop and press the orange 'Start' button
    3. You will now be redirected to the hub spawn pending page for your individual server instance
    4. You will see a message stating that your server is launching. If the page has not updated after 10 seconds, simply refresh the page with the <CTRL> + R or <F5> keys in Windows, or <CMD> + R in macOS
    5. Finally, you will be redirected to either the RStudio Server if it's a statistics workshop, or the Jupyter Lab dashboard otherwise, as shown in the screenshots below The RStudio Server UI The Jupyter Lab Dashboard
    "},{"location":"bespoke/eddash/workshops/#step-3-creating-a-new-r-script","title":"Step 3: Creating a New R Script","text":"

    Follow these quickstart instructions to create your first R script in RStudio Server!

    "},{"location":"faq/","title":"FAQ","text":""},{"location":"faq/#eidf-frequently-asked-questions","title":"EIDF Frequently Asked Questions","text":""},{"location":"faq/#how-do-i-contact-the-eidf-helpdesk","title":"How do I contact the EIDF Helpdesk?","text":"

    Submit a query in the EIDF Portal by selecting \"Submit a Support Request\" in the \"Help and Support\" menu and filling in this form.

    You can also email us at eidf@epcc.ed.ac.uk.

    "},{"location":"faq/#how-do-i-request-more-resources-for-my-project-can-i-extend-my-project","title":"How do I request more resources for my project? Can I extend my project?","text":"

    Submit a support request: In the form select the project that your request relates to and select \"EIDF Project extension: duration and quota\" from the dropdown list of categories. Then enter the new quota or extension date in the description text box below and submit the request.

    The EIDF approval team will consider the extension and you will be notified of the outcome.

    "},{"location":"faq/#new-vms-and-vdi-connections","title":"New VMs and VDI connections","text":"

    My project manager gave me access to a VM but the connection doesn't show up in the VDI connections list?

    This may happen when a machine/VM was added to your connections list while you were logged in to the VDI. Please refresh the connections list by logging out and logging in again, and the new connections should appear.

    "},{"location":"faq/#non-default-ssh-keys","title":"Non-default SSH Keys","text":"

    I have different SSH keys for the SSH gateway and my VM, or I use a key which does not have the default name (~/.ssh/id_rsa) and I cannot login.

    The command syntax shown in our SSH documentation (using the -J <username>@eidf-gateway stanza) makes assumptions about SSH keys and their naming. You should try the full version of the command:

    ssh -o ProxyCommand=\"ssh -i ~/.ssh/<gateway_private_key> -W %h:%p <gateway_username>@eidf-gateway.epcc.ed.ac.uk\" -i ~/.ssh/<vm_private_key> <vm_username>@<vm_ip>\n

    Note that for the majority of users, gateway_username and vm_username are the same, as are gateway_private_key and vm_private_key

    "},{"location":"faq/#username-policy","title":"Username Policy","text":"

    I already have an EIDF username for project Y, can I use this for project X?

    We mandate that every username must be unique across our estate. EPCC machines including EIDF services such as the SDF and DSC VMs, and HPC services such as Cirrus require you to create a new machine account with a unique username for each project you work on. Usernames cannot be used on multiple projects, even if the previous project has finished. However, some projects span multiple machines so you may be able to login to multiple machines with the same username.

    "},{"location":"known-issues/","title":"Known Issues","text":""},{"location":"known-issues/#virtual-desktops","title":"Virtual desktops","text":"

    No known issues.

    "},{"location":"overview/","title":"A Unique Service for Academia and Industry","text":"

    The Edinburgh International Data Facility (EIDF) is a growing set of data and compute services developed to support the Data Driven Innovation Programme at the University of Edinburgh.

    Our goal is to support learners, researchers and innovators across the spectrum, with services from data discovery through simple learn-as-you-play-with-data notebooks to GPU-enabled machine-learning platforms for driving AI application development.

    "},{"location":"overview/#eidf-and-the-data-driven-innovation-initiative","title":"EIDF and the Data-Driven Innovation Initiative","text":"

    Launched at the end of 2018, the Data-Driven Innovation (DDI) programme is one of six funded within the Edinburgh & South-East Scotland City Region Deal. The DDI programme aims to make Edinburgh the \u201cData Capital of Europe\u201d, with ambitious targets to support, enhance and improve talent, research, commercial adoption and entrepreneurship across the region through better use of data.

    The programme targets ten industry sectors, with interactions managed through five DDI Hubs: the Bayes Centre, the Usher Institute, Edinburgh Futures Institute, the National Robotarium, and Easter Bush. The activities of these Hubs are underpinned by EIDF.

    "},{"location":"overview/acknowledgements/","title":"Acknowledging EIDF","text":"

    If you make use of EIDF services in your work, we encourage you to acknowledge us in any publications.

    Acknowledgement of using the facility in publications can be used as an identifiable metric to evaluate the scientific support provided, and helps promote the impact of the wider DDI Programme.

    We encourage our users to ensure that an acknowledgement of EIDF is included in the relevant section of their manuscript. We would suggest:

    This work was supported by the Edinburgh International Data Facility (EIDF) and the Data-Driven Innovation Programme at the University of Edinburgh.

    "},{"location":"overview/contacts/","title":"Contact","text":"

    The Edinburgh International Data Facility is located at the Advanced Computing Facility of EPCC, the supercomputing centre based at the University of Edinburgh.

    "},{"location":"overview/contacts/#email-us","title":"Email us","text":"

    Email EIDF: eidf@epcc.ed.ac.uk

    "},{"location":"overview/contacts/#sign-up","title":"Sign up","text":"

    Join our mailing list to receive updates about EIDF.

    "},{"location":"safe-haven-services/network-access-controls/","title":"Safe Haven Network Access Controls","text":"

    The TRE Safe Haven services are protected against open, global access by IPv4 source address filtering. These network access controls ensure that connections are permitted only from Safe Haven controller partner networks and collaborating research institutions.

    The network access controls are managed by the Safe Haven service controllers who instruct EPCC to add and remove the IPv4 addresses allowed to connect to the service gateway. Researchers must connect to the Safe Haven service by first connecting to their institution or corporate VPN and then connecting to the Safe Haven.

    The Safe Haven IG controller and research project co-ordination teams must submit and confirm IPv4 address filter changes to their service help desk via email.

    "},{"location":"safe-haven-services/overview/","title":"Safe Haven Services","text":"

    The EIDF Trusted Research Environment (TRE) hosts several Safe Haven services that enable researchers to work with sensitive data in a secure environment. These services are operated by EPCC in partnership with Safe Haven controllers who manage the Information Governance (IG) appropriate for the research activities and the data access of their Safe Haven service.

    It is the responsibility of EPCC as the Safe Haven operator to design, implement and administer the technical controls required to deliver the Safe Haven security regime demanded by the Safe Haven controller.

    The role of the Safe Haven controller is to satisfy the needs of the researchers and the data suppliers. The controller is responsible for guaranteeing the confidentiality needs of the data suppliers and matching these with the availability needs of the researchers.

    The service offers secure data sharing and analysis environments allowing researchers access to sensitive data under the terms and conditions prescribed by the data providers. The service prioritises the requirements of the data provider over the demands of the researcher and is an academic TRE operating under the guidance of the Five Safes framework.

    The TRE has dedicated, private cloud infrastructure at EPCC's Advanced Computing Facility (ACF) data centre and has its own HPC cluster and high-performance file systems. When a new Safe Haven service is commissioned in the TRE it is created in a new virtual private cloud providing the Safe Haven service controller with an independent IG domain separate from other Safe Havens in the TRE. All TRE service infrastructure and all TRE project data are hosted at ACF.

    If you have any questions about the EIDF TRE or about Safe Haven services, please contact us.

    "},{"location":"safe-haven-services/safe-haven-access/","title":"Safe Haven Service Access","text":"

    Safe Haven services are accessed from a registered network connection address using a browser. The service URL will be \"https://shs.epcc.ed.ac.uk/<service>\" where <service> is the Safe Haven service name.

    The Safe Haven access process is in three stages from multi-factor authentication to project desktop login.

    Researchers who are active in many research projects and in more than one Safe Haven will need to pay attention to the service they connect to, the project desktop they login to, and the accounts and identities they are using.

    "},{"location":"safe-haven-services/safe-haven-access/#safe-haven-login","title":"Safe Haven Login","text":"

    The first step in the process prompts the user for a Safe Haven username and then for a session PIN code sent via SMS text to the mobile number registered for the username.

    Valid PIN code entry allows the user access to all of the Safe Haven service remote desktop gateways for up to 24 hours without entry of a new PIN code. A user who has successfully entered a PIN code once can access shs.epcc.ed.ac.uk/haven1 and shs.epcc.ed.ac.uk/haven2 without repeating PIN code identity verification.

    When a valid PIN code is accepted, the user is prompted to accept the service use terms and conditions.

    Registration of the user mobile phone number is managed by the Safe Haven IG controller and research project co-ordination teams by submitting and confirming user account changes through the dedicated service help desk via email.

    "},{"location":"safe-haven-services/safe-haven-access/#remote-desktop-gateway-login","title":"Remote Desktop Gateway Login","text":"

    The second step in the access process is for the user to login to the Safe Haven service remote desktop gateway so that a project desktop connection can be chosen. The user is prompted for a Safe Haven service account identity.

    VDI Safe Haven Service Login Page

    Safe Haven accounts are managed by the Safe Haven IG controller and research project co-ordination teams by submitting and confirming user account changes through the dedicated service help desk via email.

    "},{"location":"safe-haven-services/safe-haven-access/#project-desktop-connection","title":"Project Desktop Connection","text":"

    The third stage in the process is to select the virtual connection from those available on the account's home page. An example home page is shown below offering two connection options to the same virtual machine. Remote desktop connections will have an _rdp suffix and SSH terminal connections have an _ssh suffix. The most recently used connections are shown as screen thumbnails at the top of the page and all the connections available to the user are shown in a tree list below this.

    VM connections available home page

    The remote desktop gateway software used in the Safe Haven services in the TRE is the Apache Guacamole web application. Users new to this application can find the user manual here. It is recommended that users read this short guide, but note that the data sharing features such as copy and paste, connection sharing, and file transfers are disabled on all connections in the TRE Safe Havens.

    A remote desktop or SSH connection is used to access data provided for a specific research project. If a researcher is working on multiple projects within a Safe Haven they can only login to one project at a time. Some connections may allow the user to login to any project and some connections will only allow the user to login into one specific project. This depends on project IG restrictions specified by the Safe Haven and project controllers.

    Project desktop accounts are managed by the Safe Haven IG controller and research project co-ordination teams by submitting and confirming user account changes through the dedicated service help desk via email.

    "},{"location":"safe-haven-services/using-the-hpc-cluster/","title":"Using the TRE HPC Cluster","text":""},{"location":"safe-haven-services/using-the-hpc-cluster/#introduction","title":"Introduction","text":"

    The TRE HPC system, also called the SuperDome Flex, is a single node, large memory HPC system. It is provided for compute and data intensive workloads that require more CPU, memory, and better IO performance than can be provided by the project VMs, which have the performance equivalent of small rack mount servers.

    "},{"location":"safe-haven-services/using-the-hpc-cluster/#specifications","title":"Specifications","text":"

    The system is an HPE SuperDome Flex configured with 1152 hyper-threaded cores (576 physical cores) and 18TB of memory, of which 17TB is available to users. User home and project data directories are on network mounted storage pods running the BeeGFS parallel filesystem. This storage is built in blocks of 768TB per pod. Multiple pods are available in the TRE for use by the HPC system and the total storage available will vary depending on the project configuration.

    The HPC system runs Red Hat Enterprise Linux, which is not the same flavour of Linux as the Ubuntu distribution running on the desktop VMs. However, most jobs in the TRE run Python and R, and there are few issues moving between the two version of Linux. Use of virtual environments is strongly encouraged to ensure there are no differences between the desktop and HPC runtimes.

    "},{"location":"safe-haven-services/using-the-hpc-cluster/#software-management","title":"Software Management","text":"

    All system level software installed and configured on the TRE HPC system is managed by the TRE admin team. Software installation requests may be made by the Safe Haven IG controllers, research project co-ordinators, and researchers by submitting change requests through the dedicated service help desk via email.

    Minor software changes will be made as soon as admin effort can be allocated. Major changes are likely to be scheduled for the TRE monthly maintenance session on the first Thursday of each month.

    "},{"location":"safe-haven-services/using-the-hpc-cluster/#hpc-login","title":"HPC Login","text":"

    Login to the HPC system is from the project VM using SSH and is not direct from the VDI. The HPC cluster accounts are the same accounts used on the project VMs, with the same username and password. All project data access on the HPC system is private to the project accounts as it is on the VMs, but it is important to understand that the TRE HPC cluster is shared by projects in other TRE Safe Havens.

    To login to the HPC cluster from the project VMs use ssh shs-sdf01 from an xterm. If you wish to avoid entry of the account password for every SSH session or remote command execution you can use SSH key authentication by following the SSH key configuration instructions here. SSH key passphrases are not strictly enforced within the Safe Haven but are strongly encouraged.

    "},{"location":"safe-haven-services/using-the-hpc-cluster/#running-jobs","title":"Running Jobs","text":"

    To use the HPC system fully and fairly, all jobs must be run using the SLURM job manager. More information about SLURM, running batch jobs and running interactive jobs can be found here. Please read this carefully before using the cluster if you have not used SLURM before. The SLURM site also has a set of useful tutorials on HPC clusters and job scheduling.

    All analysis and processing jobs must be run via SLURM. SLURM manages access to all the cores on the system beyond the first 32. If SLURM is not used and programs are run directly from the command line, then there are only 32 cores available, and these are shared by the other users. Normal code development, short test runs, and debugging can be done from the command line without using SLURM.

    There is only one node

    The HPC system is a single node with all cores sharing all the available memory. SLURM jobs should always specify '#SBATCH --nodes=1' to run correctly.

    SLURM email alerts for job status change events are not supported in the TRE.

    "},{"location":"safe-haven-services/using-the-hpc-cluster/#resource-limits","title":"Resource Limits","text":"

    There are no resource constraints imposed on the default SLURM partition at present. There are user limits (see the output of ulimit -a). If a project has a requirement for more than 200 cores, more than 4TB of memory, or an elapsed runtime of more than 96 hours, a resource reservation request should be made by the researchers through email to the service help desk.

    There are no storage quotas enforced in the HPC cluster storage at present. The project storage requirements are negotiated, and space allocated before the project accounts are released. Storage use is monitored, and guidance will be issued before quotas are imposed on projects.

    The HPC system is managed in the spirit of utilising it as fully as possible and as fairly as possible. This approach works best when researchers are aware of their project workload demands and cooperate rather than compete for cluster resources.

    "},{"location":"safe-haven-services/using-the-hpc-cluster/#python-jobs","title":"Python Jobs","text":"

    A basic script to run a Python job in a virtual environment is shown below.

    #!/bin/bash\n#\n#SBATCH --export=ALL                  # Job inherits all env vars\n#SBATCH --job-name=my_job_name        # Job name\n#SBATCH --mem=512G                    # Job memory request\n#SBATCH --output=job-%j.out           # Standard output file\n#SBATCH --error=job-%j.err            # Standard error file\n#SBATCH --nodes=1                     # Run on a single node\n#SBATCH --ntasks=1                    # Run one task per node\n#SBATCH --time=02:00:00               # Time limit hrs:min:sec\n#SBATCH --partition standard          # Run on partition (queue)\n\npwd\nhostname\ndate \"+DATE: %d/%m/%Y TIME: %H:%M:%S\"\necho \"Running job on a single CPU core\"\n\n# Create the job\u2019s virtual environment\nsource ${HOME}/my_venv/bin/activate\n\n# Run the job code\npython3 ${HOME}/my_job.py\n\ndate \"+DATE: %d/%m/%Y TIME: %H:%M:%S\"\n
    "},{"location":"safe-haven-services/using-the-hpc-cluster/#mpi-jobs","title":"MPI Jobs","text":"

    An example script for a multi-process MPI example is shown. The system currently supports MPICH MPI.

    #!/bin/bash\n#\n#SBATCH --export=ALL\n#SBATCH --job-name=mpi_test\n#SBATCH --output=job-%j.out\n#SBATCH --error=job-%j.err\n#SBATCH --nodes=1\n#SBATCH --ntasks-per-node=5\n#SBATCH --time=05:00\n#SBATCH --partition standard\n\necho \"Submitted Open MPI job\"\necho \"Running on host ${HOSTNAME}\"\necho \"Using ${SLURM_NTASKS_PER_NODE} tasks per node\"\necho \"Using ${SLURM_CPUS_PER_TASK} cpus per task\"\nlet mpi_threads=${SLURM_NTASKS_PER_NODE}*${SLURM_CPUS_PER_TASK}\necho \"Using ${mpi_threads} MPI threads\"\n\n# load Open MPI module\nmodule purge\nmodule load mpi/mpich-x86_64\n\n# run mpi program\nmpirun ${HOME}/test_mpi\n
    "},{"location":"safe-haven-services/using-the-hpc-cluster/#managing-files-and-data","title":"Managing Files and Data","text":"

    There are three file systems to manage in the VM and HPC environment.

    1. The desktop VM /home file system. This can only be used when you login to the VM remote desktop. This file system is local to the VM and is not backed up.
    2. The HPC system /home file system. This can only be used when you login to the HPC system using SSH from the desktop VM. This file system is local to the HPC cluster and is not backed up.
    3. The project file and data space in the /safe_data file system. This file system can only be used when you login to a VM remote desktop session. This file system is backed up.

    The /safe_data file system with the project data cannot be used by the HPC system. The /safe_data file system has restricted access and a relatively slow IO performance compared to the parallel BeeGFS file system storage on the HPC system.

    The process to use the TRE HPC service is to copy and synchronise the project code and data files on the /safe_data file system with the HPC /home file system before and after login sessions and job runs on the HPC cluster. Assuming all the code and data required for the job is in a directory 'current_wip' on the project VM, the workflow is as follows:

    1. Copy project code and data to the HPC cluster (from the desktop VM) rsync -avPz -e ssh /safe_data/my_project/current_wip shs-sdf01:
    2. Run jobs/tests/analysis ssh shs-sdf01, cd current_wip, sbatch/srun my_job
    3. Copy any changed project code and data back to /safe_data (from the desktop VM) rsync -avPz -e ssh shs-sdf01:current_wip /safe_data/my_project
    4. Optionally delete the code and data from the HPC cluster working directory.
    "},{"location":"safe-haven-services/virtual-desktop-connections/","title":"Virtual Machine Connections","text":"

    Sessions on project VMs may be either remote desktop (RDP) logins or SSH terminal logins. Most users will prefer to use the remote desktop connections, but the SSH terminal connection is useful when remote network performance is poor and it must be used for account password changes.

    "},{"location":"safe-haven-services/virtual-desktop-connections/#first-time-login-and-account-password-changes","title":"First Time Login and Account Password Changes","text":"

    Account Password Changes

    Note that first time account login cannot be through RDP as a password change is required. Password reset logins must be SSH terminal sessions as password changes can only be made through SSH connections.

    "},{"location":"safe-haven-services/virtual-desktop-connections/#connecting-to-a-remote-ssh-session","title":"Connecting to a Remote SSH Session","text":"

    When a VM SSH connection is selected the browser screen becomes a text terminal and the user is prompted to \"Login as: \" with a project account name, and then prompted for the account password. This connection type is equivalent to a standard xterm SSH session.

    "},{"location":"safe-haven-services/virtual-desktop-connections/#connecting-to-a-remote-desktop-session","title":"Connecting to a Remote Desktop Session","text":"

    Remote desktop connections work best by first placing the browser in Full Screen mode and leaving it in this mode for the entire duration of the Safe Haven session.

    When a VM RDP connection is selected the browser screen becomes a remote desktop presenting the login screen shown below.

    VM virtual desktop connection user account login screen

    Once the project account credentials have been accepted, a remote dekstop similar to the one shown below is presented. The default VM environment in the TRE is Ubuntu 22.04 with the Xfce desktop.

    VM virtual desktop

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/","title":"Accessing the Superdome Flex inside the EPCC Trusted Research Environment","text":""},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/#what-is-the-superdome-flex","title":"What is the Superdome Flex?","text":"

    The Superdome Flex (SDF) is a high-performance computing cluster manufactured by Hewlett Packard Enterprise. It has been designed to handle multi-core, high-memory tasks in environments where security is paramount. The hardware specifications of the SDF within the Trusted Research Environment (TRE) are as follows:

    The software specification of the SDF are:

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/#key-point","title":"Key Point","text":"

    The SDF is within the TRE. Therefore, the same restrictions apply, i.e. the SDF is isolated from the internet (no downloading code from public GitHub repos) and copying/recording/extracting anything on the SDF outside of the TRE is strictly prohibited unless through approved processes.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/#accessing-the-sdf","title":"Accessing the SDF","text":"

    Users can only access the SDF by ssh-ing into it via their VM desktop.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/#hello-world","title":"Hello world","text":"
    **** On the VM desktop terminal ****\n\nssh shs-sdf01\n<Enter VM password>\n\necho \"Hello World\"\n\nexit\n
    "},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/#sdf-vs-vm-file-systems","title":"SDF vs VM file systems","text":"

    The SDF file system is separate from the VM file system, which is again separate from the project data space. Files need to be transferred between the three systems for any analysis to be completed within the SDF.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/#example-showing-separate-sdf-and-vm-file-systems","title":"Example showing separate SDF and VM file systems","text":"
    **** On the VM desktop terminal ****\n\ncd ~\ntouch test.txt\nls\n\nssh shs-sdf01\n<Enter VM password>\n\nls # test.txt is not here\nexit\n\nscp test.txt shs-sdf01:/home/<USERNAME>/\n\nssh shs-sdf01\n<Enter VM password>\n\nls # test.txt is here\n
    "},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/#example-copying-data-between-project-data-space-and-sdf","title":"Example copying data between project data space and SDF","text":"

    Transferring and synchronising data sets between the project data space and the SDF is easier with the rsync command (rather than manually checking and copying files/folders with scp). rsync only transfers files that are different between the two targets, more details in its manual.

    **** On the VM desktop terminal ****\n\nman rsync # check instructions for using rsync\n\nrsync -avPz -e ssh /safe_data/my_project/ shs-sdf01:/home/<USERNAME>/my_project/ # sync project folder and SDF home folder\n\nssh shs-sdf01\n<Enter VM password>\n\n*** Conduct analysis on SDF ***\n\nexit\n\nrsync -avPz -e ssh /safe_data/my_project/current_wip shs-sdf01:/home/<USERNAME>/my_project/ # sync project file and ssh home page # re-syncronise project folder and SDF home folder\n\n*** Optionally remove the project folder on SDF ***\n
    "},{"location":"safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/","title":"Running R/Python Scripts","text":"

    Running analysis scripts on the SDF is slightly different to running scripts on the Desktop VMs. The Linux distribution differs between the two with the SDF using Red Hat Enterprise Linux (RHEL) and the Desktop VMs using Ubuntu. Therefore, it is highly advisable to use virtual environments (e.g. conda environments) to complete any analysis and aid the transition between the two distributions. Conda should run out of the box on the Desktop VMs, but some configuration is required on the SDF.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/#setting-up-conda-environments-on-you-first-connection-to-the-sdf","title":"Setting up conda environments on you first connection to the SDF","text":"
    *** SDF Terminal ***\n\nconda activate base # Test conda environment\n\n# Conda command will not be found. There is no need to install!\n\neval \"$(/opt/anaconda3/bin/conda shell.bash hook)\" # Tells your terminal where conda is\n\nconda init # changes your .bashrc file so conda is automatically available in the future\n\nconda config --set auto_activate_base false # stop conda base from being activated on startup\n\npython # note python version\n\nexit()\n

    The base conda environment is now available but note that the python and gcc compilers are not the latest (Python 3.9.7 and gcc 7.5.0).

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/#getting-an-up-to-date-python-version","title":"Getting an up-to-date python version","text":"

    In order to get an up-to-date python version we first need to use an updated gcc version. Fortunately, conda has an updated gcc toolset that can be installed.

    *** SDF Terminal ***\n\nconda activate base # If conda isn't already active\n\nconda create -n python-v3.11 gcc_linux-64=11.2.0 python=3.11.3\n\nconda activate python-v3.11\n\npython\n\nexit()\n
    "},{"location":"safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/#running-r-scripts-on-the-sdf","title":"Running R scripts on the SDF","text":"

    The default version of R available on the SDF is v4.1.2. Alternative R versions can be installed using conda similar to the python conda environment above.

    conda create -n r-v4.3 gcc_linux-64=11.2.0 r-base=4.3\n\nconda activate r-v4.3\n\nR\n\nq()\n
    "},{"location":"safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/#final-points","title":"Final points","text":""},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/","title":"Submitting Scripts to Slurm","text":""},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#what-is-slurm","title":"What is Slurm?","text":"

    Slurm is a workload manager that schedules jobs submitted to a shared resource. Slurm is a well-developed tool that can manage large computing clusters, such as ARCHER2, with thousands of users each with different priorities and allocated computing hours. Inside the TRE, Slurm is used to help ensure all users of the SDF get equitable access. Therefore, users who are submitting jobs with high resource requirements (>80 cores, >1TB of memory) may have to wait longer for resource allocation to enable users with lower resource demands to continue their work.

    Slurm is currently set up so all users have equal priority and there is no limit to the total number of CPU hours allocated to a user per month. However, there are limits to the maximum amount of resources that can be allocated to an individual job. Jobs that require more than 200 cores, more than 4TB of memory, or an elapsed runtime of more than 96 hours will be rejected. If users need to submit jobs with large resource demand, they need to submit a resource reservation request by emailing their project's service desk.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#why-do-you-need-to-use-slurm","title":"Why do you need to use Slurm?","text":"

    The SDF is a resource shared across all projects within the TRE and all users should have equal opportunity to use the SDF to complete resource-intense tasks appropriate to their projects. Users of the SDF are required to consider the needs of the wider community by:

    Users can develop code, complete test runs, and debug from the SDF command line without using Slurm. However, only 32 of the 512 cores are accessible without submitting a job request to Slurm. These cores are accessible to all users simultaneously.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#slurm-basics","title":"Slurm basics","text":"

    Slurm revolves around four main entities: nodes, partitions, jobs and job steps. Nodes and partitions are relevant for more complex distributed computing clusters so Slurm can allocate appropriate resources to jobs across multiple pieces of hardware. Jobs are requests for resources and job steps are what need to be completed once the resources have been allocated (completed in sequence or parallel). Job steps can be further broken down into tasks.

    There are four key commands for Slurm users:

    More details on these functions (and several not mentioned here) can be seen on the Slurm website.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#submitting-a-simple-job","title":"Submitting a simple job","text":"
    *** SDF Terminal ***\n\nsqueue -u $USER # Check if there are jobs already queued or running for you\n\nsrun --job-name=my_first_slurm_job --nodes 1 --ntasks 10 --cpus-per-task 2 echo 'Hello World'\n\nsqueue -u $USER --state=CD # List all completed jobs\n

    In this instance, the srun command completes two steps: job submission and job step execution. First, it submits a job request to be allocated 10 CPUs (1 CPU for each of the 10 tasks). Once the resources are available, it executes the job step consisting of 10 tasks each running the 'echo \"Hello World\"' function.

    srun accepts a wide variety of options to specify the resources required to complete its job step. Within the SDF, you must always request 1 node (as there is only one node) and never use the --exclusive option (as no one will have exclusive access to this shared resource). Notice that running srun blocks your terminal from accepting any more commands and the output from each task in the job step, i.e. Hello World in the above example, outputs to your terminal. We will compare this to running a sbatch command.\u0011

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#submitting-a-batch-job","title":"Submitting a batch job","text":"

    Batch jobs are incredibly useful because they run in the background without blocking your terminal. Batch jobs also output the results to a log file rather than straight to your terminal. This allows you to check a job was completed successfully at a later time so you can move on to other things whilst waiting for a job to complete.

    A batch job can be submitted to Slurm by passing a job script to the sbatch command. The first few lines of a job script outline the resources to be requested as part of the job. The remainder of a job script consists of one or more srun commands outlining the job steps that need to be completed (in sequence or parallel) once the resources are available. There are numerous options for defining the resource requirements of a job including:

    More information on the various options are in the sbatch documentation.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#example-job-script","title":"Example Job Script","text":"
    #!/usr/bin/env bash\n#SBATCH -J HelloWorld\n#SBATCH --nodes=1\n#SBATCH --tasks-per-node=10\n#SBATCH --cpus-per-task=2\n\n% Run echo task in sequence\n\nsrun --ntasks 5 --cpus-per-task 2 echo \"Series Task A. Time: \" $(date +\u201d%H:%M:%S\u201d)\n\nsrun --ntasks 5 --cpus-per-task 2 echo \"Series Task B. Time: \" $(date +\u201d%H:%M:%S\u201d)\n\n% Run echo task in parallel with the ampersand character\n\nsrun --exclusive --ntasks 5 --cpus-per-task 2 echo \"Parallel Task A. Time: \" $(date +\u201d%H:%M:%S\u201d) &\n\nsrun --exclusive --ntasks 5 --cpus-per-task 2 echo \"Parallel Task B. Time: \" $(date +\u201d%H:%M:%S\u201d)\n
    "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#example-job-script-submission","title":"Example job script submission","text":"
    *** SDF Terminal ***\n\nnano example_job_script.sh\n\n*** Copy example job script above ***\n\nsbatch example_job_script.sh\n\nsqueue -u $USER -r 5\n\n*** Wait for the batch job to be completed ***\n\ncat example_job_script.log # The series tasks should be grouped together and the parallel tasks interspersed.\n

    The example batch job is intended to show two things: 1) the usefulness of the sbatch command and 2) the versatility of a job script. As the sbatch command allows you to submit scripts and check their outcome at your own discretion, it is the most common way of interacting with Slurm. Meanwhile, the job script command allows you to specify one global resource request and break it up into multiple job steps with different resource demands that can be completed in parallel or in sequence.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#submitting-pythonr-code-to-slurm","title":"Submitting python/R code to Slurm","text":"

    Although submitting job steps containing python/R analysis scripts can be done with srun directly, as below, it is more common to submit bash scripts that call the analysis scripts after setting up the environment (i.e. after calling conda activate).

    **** Python code job submission ****\n\nsrun --job-name=my_first_python_job --nodes 1 --ntasks 10 --cpus-per-task 2 --mem 10G python3 example_script.py\n\n**** R code job submission ****\n\nsrun --job-name=my_first_r_job --nodes 1 --ntasks 10 --cpus-per-task 2 --mem 10G Rscript example_script.R\n
    "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#signposting","title":"Signposting","text":"

    Useful websites for learning more about Slurm:

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/","title":"Parallelised Python analysis with Dask","text":"

    This lesson is adapted from a workshop introducing users to running python scripts on ARCHER2 as developed by Adrian Jackson.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#introduction","title":"Introduction","text":"

    Python does not have native support for parallelisation. Python contains a Global Interpreter Lock (GIL) which means the python interpreter only allows one thread to execute at a time. The advantage of the GIL is that C libraries can be easily integrated into Python scripts without checking if they are thread-safe. However, this means that most common python modules cannot be easily parallelised. Fortunately, there are now several re-implementations of common python modules that work around the GIL and are therefore parallelisable. Dask is a python module that contains a parallelised version of the pandas data frame as well as a general format for parallelising any python code.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#dask","title":"Dask","text":"

    Dask enables thread-safe parallelised python execution by creating task graphs (a graph of the dependencies of the inputs and outputs of each function) and then deducing which ones can be run separately. This lesson introduces some general concepts required for programming using Dask. There are also some exercises with example answers to help you write your first parallelised python scripts.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#arrays-data-frames-and-bags","title":"Arrays, data frames and bags","text":"

    Dask contains three data objects to enable parallelised analysis of large data sets in a way familiar to most python programmers. If the same operations are being applied to a large data set then Dask can split up the data set and apply the operations in parallel. The three data objects that Dask can easily split up are:

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#example-dask-array","title":"Example Dask array","text":"

    You may need to install dask or create a new conda environment with it in.

    conda create -n dask-env gcc_linux-64=11.2.0 python=3.11.3 dask\n\nconda activate dask-env\n

    Try running the following Python using dask:

    import dask.array as da\n\nx = da.random.random((10000, 10000), chunks=(1000, 1000))\n\nprint(x)\n\nprint(x.compute())\n\nprint(x.sum())\n\nprint(x.sum().compute())\n

    This should demonstrate that dask is both straightforward to implement simple parallelism, but also lazy in that it does not compute anything until you force it to with the .compute() function.

    You can also try out dask DataFrames, using the following code:

    import dask.dataframe as dd\n\ndf = dd.read_csv('surveys.csv')\n\ndf.head()\ndf.tail()\n\ndf.weight.max().compute()\n

    You can try using different blocksizes when reading in the csv file, and then undertaking an operation on the data, as follows: Experiment with varying blocksizes, although you should be aware that making your block size too small is likely to cause poor performance (the blocksize affects the number of bytes read in at each operation).

    df = dd.read_csv('surveys.csv', blocksize=\"10000\")\ndf.weight.max().compute()\n

    You can also experiment with Dask Bags to see how that functionality works:

    import dask.bag as db\nfrom operator import add\nb = db.from_sequence([1, 2, 3, 4, 5], npartitions=2)\nprint(b.compute())\n
    "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#dask-delayed","title":"Dask Delayed","text":"

    Dask delayed lets you construct your own task graphs/parallelism from Python functions. You can find out more about dask delayed from the dask documentation Try parallelising the code below using the .delayed function or the @delayed decorator, an example answer can be found here.

    def inc(x):\n    return x + 1\n\ndef double(x):\n    return x * 2\n\ndef add(x, y):\n    return x + y\n\ndata = [1, 2, 3, 4, 5]\n\noutput = []\nfor x in data:\n    a = inc(x)\n    b = double(x)\n    c = add(a, b)\n    output.append(c)\n\ntotal = sum(output)\n\nprint(total)\n
    "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#mandelbrot-exercise","title":"Mandelbrot Exercise","text":"

    The code below calculates the members of a Mandelbrot set using Python functions:

    import sys\nimport time\nimport numpy as np\nimport matplotlib.pyplot as plt\n\ndef mandelbrot(h, w, maxit=20, r=2):\n\"\"\"Returns an image of the Mandelbrot fractal of size (h,w).\"\"\"\n    start = time.time()\n\n    x = np.linspace(-2.5, 1.5, 4*h+1)\n\n    y = np.linspace(-1.5, 1.5, 3*w+1)\n\n    A, B = np.meshgrid(x, y)\n\n    C = A + B*1j\n\n    z = np.zeros_like(C)\n\n    divtime = maxit + np.zeros(z.shape, dtype=int)\n\n    for i in range(maxit):\n        z = z**2 + C\n        diverge = abs(z) > r # who is diverging\n        div_now = diverge & (divtime == maxit) # who is diverging now\n        divtime[div_now] = i # note when\n        z[diverge] = r # avoid diverging too much\n\n    end = time.time()\n\n    return divtime, end-start\n\nh = 2000\nw = 2000\n\nmandelbrot_space, time = mandelbrot(h, w)\n\nplt.imshow(mandelbrot_space)\n\nprint(time)\n

    Your task is to parallelise this code using Dask Array functionality. Using the base python code above, extend it with Dask Array for the main arrays in the computation. Remember you need to specify a chunk size with Dask Arrays, and you will also need to call compute at some point to force Dask to actually undertake the computation. Note, depending on where you run this you may not see any actual speed up of the computation. You need access to extra resources (compute cores) for the calculation to go faster. If in doubt, submit a python script of your solution to the SDF compute nodes to see if you see speed up there. If you are struggling with this parallelisation exercise, there is a solution available for you here.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#pi-exercise","title":"Pi Exercise","text":"

    The code below calculates Pi using a function that can split it up into chunks and calculate each chunk separately. Currently it uses a single chunk to produce the final value of Pi, but that can be changed by calling pi_chunk multiple times with different inputs. This is not necessarily the most efficient method for calculating Pi in serial, but it does enable parallelisation of the calculation of Pi using multiple copies of pi_chunk called simultaneously.

    import time\nimport sys\n\n# Calculate pi in chunks\n\n# n     - total number of steps to be undertaken across all chunks\n# lower - the lowest number of this chunk\n# upper - the upper limit of this chunk such that i < upper\n\ndef pi_chunk(n, lower, upper):\n    step = 1.0 / n\n    p = step * sum(4.0/(1.0 + ((i + 0.5) * (i + 0.5) * step * step)) for i in range(lower, upper))\n    return p\n\n# Number of slices\n\nnum_steps = 10000000\n\nprint(\"Calculating PI using:\\n \" + str(num_steps) + \" slices\")\n\nstart = time.time()\n\n# Calculate using a single chunk containing all steps\n\np = pi_chunk(num_steps, 1, num_steps)\n\nstop = time.time()\n\nprint(\"Obtained value of Pi: \" + str(p))\n\nprint(\"Time taken: \" + str(stop - start) + \" seconds\")\n

    For this exercise, your task is to implemented the above code on the SDF, and then parallelise using Dask. There are a number of different ways you could parallelise this using Dask, but we suggest using the Futures map functionality to run the pi_chunk function on a range of different inputs. Futures map has the following definition:

    Client.map(func, *iterables[, key, workers, ...])\n

    Where func is the function you want to run, and then the subsequent arguments are inputs to that function. To utilise this for the Pi calculation, you will first need to setup and configure a Dask Client to use, and also create and populate lists or vectors of inputs to be passed to the pi_chunk function for each function run that Dask launches.

    If you run Dask with processes then it is possible that you will get errors about forking processes, such as these:

        An attempt has been made to start a new process before the current process has finished its bootstrapping phase.\n    This probably means that you are not using fork to start your child processes and you have forgotten to use the proper idiom in the main module:\n

    In that case you need to encapsulate your code within a main function, using something like this:

    if __name__ == \"__main__\":\n

    If you are struggling with this exercise then there is a solution available for you here.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#signposting","title":"Signposting","text":""},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/","title":"Parallelised R Analysis","text":"

    This lesson is adapted from a workshop introducing users to running R scripts on ARCHER2 as developed by Adrian Jackson.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/#introduction","title":"Introduction","text":"

    In this exercise we are going to try different methods of parallelising R on the SDF. This will include single node parallelisation functionality (e.g. using threads or processes to use cores within a single node), and distributed memory functionality that enables the parallelisation of R programs across multiple nodes.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/#example-parallelised-r-code","title":"Example parallelised R code","text":"

    You may need to activate an R conda environment.

    conda activate r-v4.2\n

    Try running the following R script using R on the SDF login node:

    n <- 8*2048\nA <- matrix( rnorm(n*n), ncol=n, nrow=n )\nB <- matrix( rnorm(n*n), ncol=n, nrow=n )\nC <- A %*% B\n

    You can run this as follows on the SDF (assuming you have saved the above code into a file named matrix.R):

    Rscript ./matrix.R\n

    You can check the resources used by R when running on the login node using this command:

    top -u $USER\n

    If you run the R script in the background using &, as follows, you can then monitor your run using the top command. You may notice when you run your program that at points R uses many more resources than a single core can provide, as demonstrated below:

        PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND\n    178357 adrianj 20 0 15.542 0.014t 13064 R 10862 2.773 9:01.66 R\n

    In the example above it can be seen that >10862% of a single core is being used by R. This is an example of R using automatic parallelisation. You can experiment with controlling the automatic parallelisation using the OMP_NUM_THREADS variable to restrict the number of cores available to R. Try using the following values:

    export OMP_NUM_THREADS=8\n\nexport OMP_NUM_THREADS=4\n\nexport OMP_NUM_THREADS=2\n

    You may also notice that not all the R script is parallelised. Only the actual matrix multiplication is undertaken in parallel, the initialisation/creation of the matrices is done in serial.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/#parallelisation-with-datatables","title":"Parallelisation with data.tables","text":"

    We can also experiment with the implicit parallelism in other libraries, such as data.table. You will first need to install this library on the SDF. To do this you can simply run the following command:

    install.packages(data.table)\n

    Once you have installed data.table you can experiment with the following code:

    library(data.table)\nvenue_data <- data.table( ID = 1:50000000,\nCapacity = sample(100:1000, size = 50000000, replace = T), Code = sample(LETTERS, 50000000, replace = T),\nCountry = rep(c(\"England\",\"Scotland\",\"Wales\",\"NorthernIreland\"), 50000000))\nsystem.time(venue_data[, mean(Capacity), by = Country])\n

    This creates some random data in a large data table and then performs a calculation on it. Try running R with varying numbers of threads to see what impact that has on performance. Remember, you can vary the number of threads R uses by setting OMP_NUM_THREADS= before you run R. If you want to try easily varying the number of threads you can save the above code into a script and run it using Rscript, changing OMP_NUM_THREADS each time you run it, e.g.:

    export OMP_NUM_THREADS=1\n\nRscript ./data_table_test.R\n\nexport OMP_NUM_THREADS=2\n\nRscript ./data_table_test.R\n

    The elapsed time that is printed out when the calculation is run represents how long the script/program took to run. It\u2019s important to bear in mind that, as with the matrix multiplication exercise, not everything will be parallelised. Creating the data table is done in serial so does not benefit from the addition of more threads.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/#loop-and-function-parallelism","title":"Loop and function parallelism","text":"

    R provides a number of different functions to run loops or functions in parallel. One of the most common functions is to use are the {X}apply functions:

    For example:

    res <- lapply(1:3, function(i) {\nsqrt(i)*sqrt(i*2)\n})\n

    The {X}apply functionality supports iteration over a dataset without requiring a loop to be constructed. However, the functions outlined above do not exploit parallelism, even if there is potential for parallelisation many operations that utilise them.

    There are a number of mechanisms that can be used to implement parallelism using the {X}apply functions. One of the simplest is using the parallel library, and the mclapply function:

    library(parallel)\nres <- mclapply(1:3, function(i) {\nsqrt(i)\n})\n

    Try experimenting with the above functions on large numbers of iterations, both with lapply and mclapply. Can you achieve better performance using the MC_CORES environment variable to specify how many parallel processes R uses to complete these calculations? The default on the SDF is 2 cores, but you can increase this in the same way we did for OMP_NUM_THREADS, e.g.:

    export MC_CORES=16\n

    Try different numbers of iterations of the functions (e.g. change 1:3 in the code to something much larger), and different numbers of parallel processes, e.g.:

    export MC_CORES=2\n\nexport MC_CORES=8\n\nexport MC_CORES=16\n

    If you have separate functions then the above approach will provide a simple method for parallelising using the resources within a single node. However, if your functionality is more loop-based, then you may not wish to have to package this up into separate functions to parallelise.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/#parallelisation-with-foreach","title":"Parallelisation with foreach","text":"

    The foreach package can be used to parallelise loops as well as functions. Consider a loop of the following form:

    main_list <- c()\nfor (i in 1:3) {\nmain_list <- c(main_list, sqrt(i))\n}\n

    This can be converted to foreach functionality as follows:

    main_list <- c()\nlibrary(foreach)\nforeach(i=1:3) %do% {\nmain_list <- c(main_list, sqrt(i))\n}\n

    Whilst this approach does not significantly change the performance or functionality of the code, it does let us then exploit parallel functionality in foreach. The %do% can be replaced with a %dopar% which will execute the code in parallel.

    To test this out we\u2019re going to try an example using the randomForest library. We can now run the following code in R:

    library(foreach)\nlibrary(randomForest)\nx <- matrix(runif(50000), 1000)\ny <- gl(2, 500)\nrf <- foreach(ntree=rep(250, 4), .combine=combine) %do%\nrandomForest(x, y, ntree=ntree)\nprint(rf)\n

    Implement the above code and run with a system.time to see how long it takes. Once you have done this you can change the %do% to a %dopar% and re-run. Does this provide any performance benefits?

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/#parallelisation-with-doparallel","title":"Parallelisation with doParallel","text":"

    To exploit the parallelism with dopar we need to provide parallel execution functionality and configure it to use extra cores on the system. One method to do this is using the doParallel package.

    library(doParallel)\nregisterDoParallel(8)\n

    Does this now improve performance when running the randomForest example? Experiment with different numbers of workers by changing the number set in registerDoParallel(8) to see what kind of performance you can get. Note, you may also need to change the number of clusters used in the foreach, e.g. what is specified in the rep(250, 4) part of the code, to enable more than 4 different sets to be run at once if using more than 4 workers. The amount of parallel workers you can use is dependent on the hardware you have access to, the number of workers you specify when you setup your parallel backend, and the amount of chunks of work you have to distribute with your foreach configuration.

    "},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/#cluster-parallelism","title":"Cluster parallelism","text":"

    It is possible to use different parallel backends for foreach. The one we have used in the example above creates new worker processes to provide the parallelism, but you can also use larger numbers of workers through a parallel cluster, e.g.:

    my.cluster <- parallel::makeCluster(8)\nregisterDoParallel(cl = my.cluster)\n

    By default makeCluster creates a socket cluster, where each worker is a new independent process. This can enable running the same R program across a range of systems, as it works on Linux and Windows (and other clients). However, you can also fork the existing R process to create your new workers, e.g.:

    cl <-makeCluster(5, type=\"FORK\")\n

    This saves you from having to create the variables or objects that were setup in the R program/script prior to the creation of the cluster, as they are automatically copied to the workers when using this forking mode. However, it is limited to Linux style systems and cannot scale beyond a single node.

    Once you have finished using a parallel cluster you should shut it down to free up computational resources, using stopCluster, e.g.:

    stopCluster(cl)\n

    When using clusters without the forking approach, you need to distribute objects and variables from the main process to the workers using the clusterExport function, e.g.:

    library(parallel)\nvariableA <- 10\nvariableB <- 20\nmySum <- function(x) variableA + variableB + x\ncl <- makeCluster(4)\nres <- try(parSapply(cl=cl, 1:40, mySum))\n

    The program above will fail because variableA and variableB are not present on the cluster workers. Try the above on the SDF and see what result you get.

    To fix this issue you can modify the program using clusterExport to send variableA and variableB to the workers, prior to running the parSapply e.g.:

    clusterExport(cl=cl, c('variableA', 'variableB'))\n
    "},{"location":"services/","title":"EIDF Services","text":""},{"location":"services/#computing-services","title":"Computing Services","text":"

    Data Science Virtual Desktops

    Managed File Transfer

    Managed JupyterHub

    Cerebras CS-2

    Ultra2

    "},{"location":"services/#data-management-services","title":"Data Management Services","text":"

    Data Catalogue

    "},{"location":"services/cs2/","title":"Cerebras CS-2","text":"

    Get Access

    Running codes

    "},{"location":"services/cs2/access/","title":"Cerebras CS-2","text":""},{"location":"services/cs2/access/#getting-access","title":"Getting Access","text":"

    Access to the Cerebras CS-2 system is currently by arrangement with EPCC. Please email eidf@epcc.ed.ac.uk with a short description of the work you would like to perform.

    "},{"location":"services/cs2/run/","title":"Cerebras CS-2","text":""},{"location":"services/cs2/run/#introduction","title":"Introduction","text":"

    The Cerebras CS-2 system is attached to the SDF-CS1 (Ultra2) system which serves as a host, provides access to files, the SLURM batch system etc.

    "},{"location":"services/cs2/run/#login","title":"Login","text":"

    To login to the host system, use the username and password you obtain from SAFE, along with the SSH Key you registered when creating the account. You can then login directly to the host via: ssh <username>@sdf-cs1.epcc.ed.ac.uk

    "},{"location":"services/cs2/run/#running-jobs","title":"Running Jobs","text":"

    All jobs must be run via SLURM to avoid inconveniencing other users of the system. The csrun_cpu and csrun_wse scripts themselves contain calls to srun to work with the SLURM system, so note the omission of srun in the below examples. Users can either copy these files from /home/y26/shared/bin to their own home directory should they wish, or use the centrally supplied version. In either case, ensure they are in your PATH before execution, eg:

    export PATH=$PATH:/home/y26/shared/bin\n
    "},{"location":"services/cs2/run/#run-on-the-host","title":"Run on the host","text":"

    Jobs can be run on the host system (eg simulations, test scripts) using the csrun_cpu wrapper. Here is the example from the Cerebras documentation on PyTorch. Note that this assumes csrun_cpu is in your path.

    #!/bin/bash\n#SBATCH --job-name=Example        # Job name\n#SBATCH --cpus-per-task=2         # Request 2 cores\n#SBATCH --output=example_%j.log   # Standard output and error log\n#SBATCH --time=01:00:00           # Set time limit for this job to 1 hour\n\ncsrun_cpu python-pt run.py --mode train --compile_only --params configs/<name-of-the-params-file.yaml>\n
    "},{"location":"services/cs2/run/#run-on-the-cs-2","title":"Run on the CS-2","text":"

    The following will run the above PyTorch example on the CS-2 - note the --cs_ip argument with port number passed in via the command line, and the inclusion of the --gres option to request use of the CS-2 via SLURM.

    #!/bin/bash\n#SBATCH --job-name=Example        # Job name\n#SBATCH --tasks-per-node=8        # There is only one node on SDF-CS1\n#SBATCH --cpus-per-task=16        # Each cpu is a core\n#SBATCH --gres=cs:1               # Request CS-2 system\n#SBATCH --output=example_%j.log   # Standard output and error log\n#SBATCH --time=01:00:00           # Set time limit for this job to 1 hour\n\n\ncsrun_wse python-pt run.py --mode train --cs_ip 172.24.102.121:9000 --params configs/<name-of-the-params-file.yaml>\n
    "},{"location":"services/datacatalogue/","title":"EIDF Data Catalogue Information","text":"

    QuickStart

    Tutorial

    Documentation

    Metadata information

    "},{"location":"services/datacatalogue/docs/","title":"Service Documentation","text":""},{"location":"services/datacatalogue/docs/#metadata","title":"Metadata","text":"

    For more information on metadata, please read the following: Metadata

    "},{"location":"services/datacatalogue/docs/#online-support","title":"Online support","text":""},{"location":"services/datacatalogue/metadata/","title":"EIDF Metadata Information","text":""},{"location":"services/datacatalogue/metadata/#what-is-fair","title":"What is FAIR?","text":"

    FAIR stands for Findable, Accessible, Interoperable, and Reusable, and helps emphasise the best practices with publishing and sharing data (more details: FAIR Principles)

    "},{"location":"services/datacatalogue/metadata/#what-is-metadata","title":"What is metadata?","text":"

    Metadata is data about data, to help describe the dataset. Common metadata fields are things like the title of the dataset, who produced it, where it was generated (if relevant), when it was generated, and some key words describing it

    "},{"location":"services/datacatalogue/metadata/#what-is-ckan","title":"What is CKAN?","text":"

    CKAN is a metadata catalogue - i.e. it is a database for metadata rather than data. This will help with all aspects of FAIR:

    "},{"location":"services/datacatalogue/metadata/#what-metadata-will-we-need-to-provide","title":"What metadata will we need to provide?","text":""},{"location":"services/datacatalogue/metadata/#why-do-i-need-to-use-a-controlled-vocabulary","title":"Why do I need to use a controlled vocabulary?","text":"

    Using a standard vocabulary (such as the FAST Vocabulary) has many benefits:

    All of these advantages mean that we, as a project, don't need to think about this - there is no need to reinvent the wheel when other institutes (e.g. National Libraries) have created. You might recognise WorldCat - it is an organisation which manages a global catalogue of ~18000 libraries world-wide, so they are in a good position to generate a comprehensive vocabulary of academic topics!

    "},{"location":"services/datacatalogue/metadata/#what-about-licensing-what-does-cc-by-sa-40-mean","title":"What about licensing? (What does CC-BY-SA 4.0 mean?)","text":"

    The R in FAIR stands for reusable - more specifically it includes this subphrase: \"(Meta)data are released with a clear and accessible data usage license\". This means that we have to tell anyone else who uses the data what they're allowed to do with it - and, under the FAIR philosophy, more freedom is better.

    CC-BY-SA 4.0 allows anyone to remix, adapt, and build upon your work (even for commercial purposes), as long as they credit you and license their new creations under the identical terms. It also explicitly includes Sui Generis Database Rights, giving rights to the curation of a database even if you don't have the rights to the items in a database (e.g. a Spotify playlist, even though you don't own the rights to each track).

    Human readable summary: Creative Commons 4.0 Human Readable Full legal code: Creative Commons 4.0 Legal Code

    "},{"location":"services/datacatalogue/metadata/#im-stuck-how-do-i-get-help","title":"I'm stuck! How do I get help?","text":"

    Contact the EIDF Service Team via eidf@epcc.ed.ac.uk

    "},{"location":"services/datacatalogue/quickstart/","title":"Quickstart","text":""},{"location":"services/datacatalogue/quickstart/#accessing","title":"Accessing","text":""},{"location":"services/datacatalogue/quickstart/#first-task","title":"First Task","text":""},{"location":"services/datacatalogue/quickstart/#further-information","title":"Further information","text":""},{"location":"services/datacatalogue/tutorial/","title":"Tutorial","text":""},{"location":"services/datacatalogue/tutorial/#first-query","title":"First Query","text":""},{"location":"services/gpuservice/","title":"Overview","text":"

    The EIDF GPU Service (EIDFGPUS) uses Nvidia A100 GPUs as accelerators.

    Full Nvidia A100 GPUs are connected to 40GB of dynamic memory.

    Multi-instance usage (MIG) GPUs allow multiple tasks or users to share the same GPU (similar to CPU threading).

    There are two types of MIG GPUs inside the EIDFGPUS the Nvidia A100 3G.20GB GPUs and the Nvidia A100 1G.5GB GPUs which equate to ~1/2 and ~1/7 of a full Nvidia A100 40 GB GPU.

    The current specification of the EIDFGPUS is:

    The EIDFGPUS is managed using Kubernetes, with up to 8 GPUs being on a single node.

    "},{"location":"services/gpuservice/#service-access","title":"Service Access","text":"

    Users should have an EIDF account - EIDF Accounts.

    Project Leads will be able to have access to the EIDFGPUS added to their project during the project application process or through a request to the EIDF helpdesk.

    Each project will be given a namespace to operate in and a kubeconfig file in a Virtual Machine on the EIDF DSC - information on access to VMs is available here.

    "},{"location":"services/gpuservice/#project-quotas","title":"Project Quotas","text":"

    A standard project namespace has the following initial quota (subject to ongoing review):

    Note these quotas are maximum use by a single project, and that during periods of high usage Kubernetes Jobs maybe queued waiting for resource to become available on the cluster.

    "},{"location":"services/gpuservice/#additional-service-policy-information","title":"Additional Service Policy Information","text":"

    Additional information on service policies can be found here.

    "},{"location":"services/gpuservice/#eidf-gpu-service-tutorial","title":"EIDF GPU Service Tutorial","text":"

    This tutorial teaches users how to submit tasks to the EIDFGPUS, but it is not a comprehensive overview of Kubernetes.

    Lesson Objective Getting started with Kubernetes a. What is Kubernetes?b. How to send a task to a GPU node.c. How to define the GPU resources needed. Requesting persistent volumes with Kubernetes a. What is a persistent volume? b. How to request a PV resource. Running a PyTorch task a. Accessing a Pytorch container.b. Submitting a PyTorch task to the cluster.c. Inspecting the results."},{"location":"services/gpuservice/#further-reading-and-help","title":"Further Reading and Help","text":""},{"location":"services/gpuservice/policies/","title":"GPU Service Policies","text":""},{"location":"services/gpuservice/policies/#namespaces","title":"Namespaces","text":"

    Each project will be given a namespace which will have an applied quota.

    Default Quota:

    "},{"location":"services/gpuservice/policies/#kubeconfig","title":"Kubeconfig","text":"

    Each project will be assigned a kubeconfig file for access to the service which will allow operation in the assigned namespace and access to exposed service operators, for example the GPU and CephRBD operators.

    "},{"location":"services/gpuservice/policies/#kubernetes-job-time-to-live","title":"Kubernetes Job Time to Live","text":"

    All Kubernetes Jobs submitted to the service will have a Time to Live (TTL) applied via \"spec.ttlSecondsAfterFinished\" automatically. The default TTL for jobs using the service will be 1 week (604800 seconds). A completed job (in success or error state) will be deleted from the service once one week has elapsed after execution has completed. This will reduce excessive object accumulation on the service.

    Note: This policy is automated and does not require users to change their job specifications.

    "},{"location":"services/gpuservice/training/L1_getting_started/","title":"Getting started with Kubernetes","text":""},{"location":"services/gpuservice/training/L1_getting_started/#introduction","title":"Introduction","text":"

    Kubernetes (K8s) is a systems administration tool originally developed by Google to orchestrate the deployment, scaling, and management of containerised applications.

    Nvidia have created drivers to officially support clusters of Nvidia GPUs managed by K8s.

    Using K8s to manage the EIDFGPUS provides two key advantages:

    "},{"location":"services/gpuservice/training/L1_getting_started/#interacting-with-a-k8s-cluster","title":"Interacting with a K8s cluster","text":"

    An overview of the key components of a K8s container can be seen on the Kubernetes docs website.

    The primary component of a K8s cluster is a pod.

    A pod is a set of one or more containers (and their storage volumes) that share resources.

    Users define the resource requirements of a pod (i.e. number/type of GPU) and the containers to be ran in the pod by writing a yaml file.

    The pod definition yaml file is sent to the cluster using the K8s API and is assigned to an appropriate node to be ran.

    A node is a unit of the cluster, e.g. a group of GPUs or virtual GPUs.

    Multiple pods can be defined and maintained using several different methods depending on purpose: deployments, services and jobs; see the K8s docs for more details.

    Users interact with the K8s API using the kubectl (short for kubernetes control) commands. Some of the kubectl commands are restricted on the EIDF cluster in order to ensure project details are not shared across namespaces. Useful commands are:

    "},{"location":"services/gpuservice/training/L1_getting_started/#creating-your-first-pod","title":"Creating your first pod","text":"

    Nvidia have several prebuilt docker images to perform different tasks on their GPU hardware.

    The list of docker images is available on their website.

    This example uses their CUDA sample code simulating nbody interactions.

    Note how you specify the use of a GPU by setting limits: nvidia.com/gpu: 1.

    1. Open an editor of your choice and create the file test_NBody.yml
    2. Copy the following in to the file:

      apiVersion: v1\nkind: Pod\nmetadata:\ngenerateName: first-pod-\nspec:\nrestartPolicy: OnFailure\ncontainers:\n- name: cudasample\nimage: nvcr.io/nvidia/k8s/cuda-sample:nbody-cuda11.7.1\nargs: [\"-benchmark\", \"-numbodies=512000\", \"-fp64\", \"-fullscreen\"]\nresources:\nlimits:\nnvidia.com/gpu: 1\n
    3. Save the file and exit the editor

    4. Run `kubectl create -f test_NBody.yml'
    5. This will output something like:

      pod/first-pod-7gdtb created\n
    6. Run kubectl get pods

    7. This will output something like:

      pi-tt9kq                                                          0/1     Completed   0              24h\nfirst-pod-24n7n                                                   0/1     Completed   0              24h\nfirst-pod-2j5tc                                                   0/1     Completed   0              24h\nfirst-pod-2kjbx                                                   0/1     Completed   0              24h\nsample-2mnvg                                                      0/1     Completed   0              24h\nsample-4sng2                                                      0/1     Completed   0              24h\nsample-5h6sr                                                      0/1     Completed   0              24h\nsample-6bqql                                                      0/1     Completed   0              24h\nfirst-pod-7gdtb                                                   0/1     Completed   0              39s\nsample-8dnht                                                      0/1     Completed   0              24h\nsample-8pxz4                                                      0/1     Completed   0              24h\nsample-bphjx                                                      0/1     Completed   0              24h\nsample-cp97f                                                      0/1     Completed   0              24h\nsample-gcbbb                                                      0/1     Completed   0              24h\nsample-hdlrr                                                      0/1     Completed   0              24h\n
    8. View the logs of the pod you ran kubectl logs first-pod-7gdtb

    9. This will output something like:

      Run \"nbody -benchmark [-numbodies=<numBodies>]\" to measure performance.\n    -fullscreen       (run n-body simulation in fullscreen mode)\n-fp64             (use double precision floating point values for simulation)\n-hostmem          (stores simulation data in host memory)\n-benchmark        (run benchmark to measure performance)\n-numbodies=<N>    (number of bodies (>= 1) to run in simulation)\n-device=<d>       (where d=0,1,2.... for the CUDA device to use)\n-numdevices=<i>   (where i=(number of CUDA devices > 0) to use for simulation)\n-compare          (compares simulation results running once on the default GPU and once on the CPU)\n-cpu              (run n-body simulation on the CPU)\n-tipsy=<file.bin> (load a tipsy model file for simulation)\n\nNOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.\n\n> Fullscreen mode\n> Simulation data stored in video memory\n> Double precision floating point simulation\n> 1 Devices used for simulation\nGPU Device 0: \"Ampere\" with compute capability 8.0\n\n> Compute 8.0 CUDA device: [NVIDIA A100-SXM4-40GB]\nnumber of bodies = 512000\n512000 bodies, total time for 10 iterations: 10570.778 ms\n= 247.989 billion interactions per second\n= 7439.679 double-precision GFLOP/s at 30 flops per interaction\n
    10. delete your pod with kubectl delete pod first-pod-7gdtb

    "},{"location":"services/gpuservice/training/L1_getting_started/#specifying-gpu-requirements","title":"Specifying GPU requirements","text":"

    If you create multiple pods with the same yaml file and compare their log files you may notice the CUDA device may differ from Compute 8.0 CUDA device: [NVIDIA A100-SXM4-40GB].

    This is because K8s is allocating the pod to any free node irrespective of whether that node contains a full 80GB Nvida A100 or a GPU from a MIG Nvida A100.

    The GPU resource request can be more specific by adding the type of product the pod is requesting to the node selector:

    "},{"location":"services/gpuservice/training/L1_getting_started/#example-yaml-file","title":"Example yaml file","text":"
    apiVersion: v1\nkind: Pod\nmetadata:\ngenerateName: first-pod-\nspec:\nrestartPolicy: OnFailure\ncontainers:\n- name: cudasample\nimage: nvcr.io/nvidia/k8s/cuda-sample:nbody-cuda11.7.1\nargs: [\"-benchmark\", \"-numbodies=512000\", \"-fp64\", \"-fullscreen\"]\nresources:\nlimits:\nnvidia.com/gpu: 1\nnodeSelector:\nnvidia.com/gpu.product: NVIDIA-A100-SXM4-40GB-MIG-1g.5gb\n
    "},{"location":"services/gpuservice/training/L2_requesting_persistent_volumes/","title":"Requesting Persistent Volumes With Kubernetes","text":"

    Pods in the K8s EIDFGPUS are intentionally ephemeral.

    They only last as long as required to complete the task that they were created for.

    Keeping pods ephemeral ensures the cluster resources are released for other users to request.

    However, this means the default storage volumes within a pod are temporary.

    If multiple pods require access to the same large data set or they output large files, then computationally costly file transfers need to be included in every pod instance.

    Instead, K8s allows you to request persistent volumes that can be mounted to multiple pods to share files or collate outputs.

    These persistent volumes will remain even if the pods it is mounted to are deleted, are updated or crash.

    "},{"location":"services/gpuservice/training/L2_requesting_persistent_volumes/#submitting-a-persistent-volume-claim","title":"Submitting a Persistent Volume Claim","text":"

    Before a persistent volume can be mounted to a pod, the required storage resources need to be requested and reserved to your namespace.

    A PersistentVolumeClaim (PVC) needs to be submitted to K8s to request the storage resources.

    The storage resources are held on a Ceph server which can accept requests up 100 TiB. Currently, each PVC can only be accessed by one pod at a time, this limitation is being addressed in further development of the EIDFGPUS. This means at this stage, pods can mount the same PVC in sequence, but not concurrently.

    Example PVCs can be seen on the Kubernetes documentation page.

    All PVCs on the EIDFGPUS must use the csi-rbd-sc storage class.

    "},{"location":"services/gpuservice/training/L2_requesting_persistent_volumes/#example-persistentvolumeclaim","title":"Example PersistentVolumeClaim","text":"
    kind: PersistentVolumeClaim\napiVersion: v1\nmetadata:\nname: test-ceph-pvc\nspec:\naccessModes:\n- ReadWriteOnce\nresources:\nrequests:\nstorage: 2Gi\nstorageClassName: csi-rbd-sc\n

    You create a persistent volume by passing the yaml file to kubectl like a pod specification yaml kubectl create <PV specification yaml> Once you have successfully created a persistent volume you can interact with it using the standard kubectl commands:

    "},{"location":"services/gpuservice/training/L2_requesting_persistent_volumes/#mounting-a-persistent-volume-to-a-pod","title":"Mounting a persistent Volume to a Pod","text":"

    Introducing a persistent volume to a pod requires the addition of a volumeMount option to the container and a volume option linking to the PVC in the pod specification yaml.

    "},{"location":"services/gpuservice/training/L2_requesting_persistent_volumes/#example-pod-specification-yaml-with-mounted-persistent-volume","title":"Example pod specification yaml with mounted persistent volume","text":"
    apiVersion: v1\nkind: Pod\nmetadata:\nname: test-ceph-pvc-pod\nspec:\ncontainers:\n- name: trial\nimage: busybox\ncommand: [\"sleep\", \"infinity\"]\nvolumeMounts:\n- mountPath: /mnt/ceph_rbd\nname: volume\nvolumes:\n- name: volume\npersistentVolumeClaim:\nclaimName: test-ceph-pvc\n
    "},{"location":"services/gpuservice/training/L2_requesting_persistent_volumes/#accessing-the-persistent-volume-outside-a-pod","title":"Accessing the persistent volume outside a pod","text":"

    To move files in/out of the persistent volume from outside a pod you can use the kubectl cp command.

    *** On Login Node ***\nkubectl cp /home/data/test_data.csv test-ceph-pvc-pod:/mnt/ceph_rbd\n

    For more complex file transfers and synchronisation, create a low resource pod with the persistent volume mounted.

    The bash command rsync can be amended to manage file transfers into the mounted PV following this GitHub repo.

    "},{"location":"services/gpuservice/training/L2_requesting_persistent_volumes/#clean-up","title":"Clean up","text":"
    kubectl delete pod test-ceph-pvc-pod\n\nkubectl delete pvc test-ceph-pvc\n
    "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/","title":"Running a PyTorch task","text":"

    In the following lesson, we\u2019ll build a NLP neural network and train it using the EIDFGPUS.

    The model was taken from the PyTorch Tutorials.

    The lesson will be split into three parts:

    "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#load-training-data-and-ml-code-into-a-persistent-volume","title":"Load training data and ML code into a persistent volume","text":""},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#create-a-persistent-volume","title":"Create a persistent volume","text":"

    Request memory from the Ceph server by submitting a PVC to K8s (example pvc spec yaml below).

    kubectl create -f <pvc-spec-yaml>\n
    "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#example-pytorch-persistentvolumeclaim","title":"Example PyTorch PersistentVolumeClaim","text":"
    kind: PersistentVolumeClaim\napiVersion: v1\nmetadata:\nname: pytorch-pvc\nspec:\naccessModes:\n- ReadWriteOnce\nresources:\nrequests:\nstorage: 2Gi\nstorageClassName: csi-rbd-sc\n
    "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#transfer-codedata-to-persistent-volume","title":"Transfer code/data to persistent volume","text":"
    1. Check PVC has been created

      kubectl get pvc <pv-name>\n
    2. Create a lightweight pod with PV mounted (example pod below)

      kubectl create -f lightweight-pod.yaml\n
    3. Download the pytorch code

      wget https://github.com/EPCCed/eidf-docs/raw/main/docs/services/gpuservice/training/resources/example_pytorch_code.py\n
    4. Copy python script into the PV

      kubectl cp example_pytorch_code.py lightweight-pod:/mnt/ceph_rbd/\n
    5. Check files were transferred successfully

      kubectl exec lightweight-pod -- ls /mnt/ceph_rbd\n
    6. Delete lightweight pod

      kubectl delete pod lightweight-pod\n
    "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#example-lightweight-pod-specification","title":"Example lightweight pod specification","text":"
    apiVersion: v1\nkind: Pod\nmetadata:\nname: lightweight-pod\nspec:\ncontainers:\n- name: data-loader\nimage: busybox\ncommand: [\"sleep\", \"infinity\"]\nresources:\nrequests:\ncpu: 1\nvolumeMounts:\n- mountPath: /mnt/ceph_rbd\nname: volume\nvolumes:\n- name: volume\npersistentVolumeClaim:\nclaimName: pytorch-pvc\n
    "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#creating-a-pod-with-a-pytorch-container","title":"Creating a pod with a PyTorch container","text":"

    We will use the pre-made PyTorch Docker image available on Docker Hub to run the PyTorch ML model.

    The PyTorch container will be held within a pod that has the persistent volume mounted and access a MIG GPU.

    Submit the specification file to K8s to create the pod.

    kubectl create -f <pytorch-pod-yaml>\n
    "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#example-pytorch-pod-specification-file","title":"Example PyTorch Pod Specification File","text":"
    apiVersion: v1\nkind: Pod\nmetadata:\nname: pytorch-pod\nspec:\nrestartPolicy: Never\ncontainers:\n- name: pytorch-con\nimage: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel\ncommand: [\"python3\"]\nargs: [\"/mnt/ceph_rbd/example_pytorch_code.py\"]\nvolumeMounts:\n- mountPath: /mnt/ceph_rbd\nname: volume\nresources:\nlimits:\nnvidia.com/gpu: 1\nnodeSelector:\nnvidia.com/gpu.product: NVIDIA-A100-SXM4-40GB-MIG-1g.5gb\nvolumes:\n- name: volume\npersistentVolumeClaim:\nclaimName: pytorch-pvc\n
    "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#reviewing-the-results-of-the-pytorch-model","title":"Reviewing the results of the PyTorch model","text":"

    This is not intended to be an introduction to PyTorch, please see the online tutorial for details about the model.

    1. Check model ran to completion

      kubectl logs <pytorch-pod-name>\n
    2. Spin up lightweight pod to retrieve results

      kubectl create -f lightweight-pod.yaml\n
    3. Copy trained model back to the head node

      kubectl cp lightweight-pod:mnt/ceph_rbd/model.pth model.pth\n
    "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#clean-up","title":"Clean up","text":"
    kubectl delete pod pytorch-pod\n\nkubectl delete pv pytorch-pvc\n
    "},{"location":"services/gpuservice/training/L4_template_workflow/","title":"Template workflow","text":""},{"location":"services/jhub/","title":"EIDF Jupyterhub","text":"

    QuickStart

    Tutorial

    Documentation

    "},{"location":"services/jhub/docs/","title":"Service Documentation","text":""},{"location":"services/jhub/docs/#online-support","title":"Online support","text":""},{"location":"services/jhub/quickstart/","title":"Quickstart","text":""},{"location":"services/jhub/quickstart/#accessing","title":"Accessing","text":""},{"location":"services/jhub/quickstart/#first-task","title":"First Task","text":""},{"location":"services/jhub/quickstart/#further-information","title":"Further information","text":""},{"location":"services/jhub/tutorial/","title":"Tutorial","text":""},{"location":"services/jhub/tutorial/#first-notebook","title":"First notebook","text":""},{"location":"services/mft/","title":"MFT","text":""},{"location":"services/mft/quickstart/","title":"Managed File Transfer","text":""},{"location":"services/mft/quickstart/#getting-to-the-mft","title":"Getting to the MFT","text":"

    The EIDF MFT can be accessed at https://eidf-mft.epcc.ed.ac.uk

    "},{"location":"services/mft/quickstart/#how-it-works","title":"How it works","text":"

    The MFT provides a 'drop' zone for the project. All users in a given project will have access to the same shared transfer area. They will have the ability to upload, download, and delete files from the project's transfer area. This area is linked to a directory within the projects space on the shared backend storage.

    Files which are uploaded are owned by the Linux user 'nobody' and the group ID of whatever project the file is being uploaded to. They have the permissions: Owner = rw Group = r Others = r

    Once the file is opened on the VM, the user that opened it will become the owner and they can make further changes.

    "},{"location":"services/mft/quickstart/#gaining-access-to-the-mft","title":"Gaining access to the MFT","text":"

    By default a project won't have access to the MFT, this has to be enabled. Currently this can be done by the PI sending a request to the EIDF Helpdesk. Once the project is enabled within the MFT, every user with the project will be able to log into the MFT using their usual EIDF credentials.

    "},{"location":"services/mft/sftp/","title":"SFTP","text":"

    Coming Soon

    "},{"location":"services/mft/using-the-mft/","title":"Using the MFT Web Portal","text":""},{"location":"services/mft/using-the-mft/#logging-in","title":"Logging in","text":"

    When you reach the MFT home page you can log in using your usual VM project credentials.

    You will then be asked what type of session you would like to start. Select New Web Client or Web Client and continue.

    "},{"location":"services/mft/using-the-mft/#file-ingress","title":"File Ingress","text":"

    Once logged in, all files currently in the projects transfer directory will be displayed. Click the 'Upload' button under the 'Home' title to open the dialogue for file upload. You can then drag and drop files in, or click 'Browse' to find them locally.

    Once uploaded, the file will be immediately accessible from the project area, and can be used within any EIDF service which has the filesystem mounted.

    "},{"location":"services/mft/using-the-mft/#file-egress","title":"File Egress","text":"

    File egress can be done in the reverse way. By placing the file into the project transfer directory, it will become available in the MFT portal.

    "},{"location":"services/mft/using-the-mft/#file-management","title":"File Management","text":"

    Directories can be created within the project transfer directory, for example with 'Import' and 'Export' to allow for better file management. Files deleted from either the MFT portal or from the VM itself will remove it from the other, as both locations point at the same file. It's only stored in one place, so modifications made from either place will remove the file.

    "},{"location":"services/rstudioserver/","title":"EIDF R Studio Server","text":"

    QuickStart

    Tutorial

    Documentation

    "},{"location":"services/rstudioserver/docs/","title":"Service Documentation","text":""},{"location":"services/rstudioserver/docs/#online-support","title":"Online support","text":""},{"location":"services/rstudioserver/quickstart/","title":"Quickstart","text":""},{"location":"services/rstudioserver/quickstart/#accessing","title":"Accessing","text":""},{"location":"services/rstudioserver/quickstart/#first-task","title":"First Task","text":""},{"location":"services/rstudioserver/quickstart/#creating-a-new-r-script","title":"Creating a New R Script","text":"

    Your RStudio Server session has been initialised now. If you are participating in a workshop, then all the packages and data required for the workshop have been loaded into the workspace. All that remains is to create a new R script to contain your code!

    1. In the RStudio Server UI, open the File menu item at the far left of the main menu bar at the top of the page
    2. Hover over the \u2018New File\u2019 sub-menu item, then select \u2018R Script\u2019 from the expanded menu
    3. A new window pane will appear in the UI as shown below, and you are now ready to start adding the R code to your script! RStudio Server UI screen with new script
    "},{"location":"services/rstudioserver/quickstart/#further-information","title":"Further information","text":""},{"location":"services/rstudioserver/tutorial/","title":"Tutorial","text":""},{"location":"services/rstudioserver/tutorial/#first-notebook","title":"First notebook","text":""},{"location":"services/ultra2/","title":"Ultra2 Large Memory System","text":"

    Get Access

    Running codes

    "},{"location":"services/ultra2/access/","title":"Ultra2 Large Memory System","text":""},{"location":"services/ultra2/access/#getting-access","title":"Getting Access","text":"

    Access to the Ultra2 system (also referred to as the SDF-CS1 system) is currently by arrangement with EPCC. Please email eidf@epcc.ed.ac.uk with a short description of the work you would like to perform.

    "},{"location":"services/ultra2/run/","title":"Ultra2 High Memory System","text":""},{"location":"services/ultra2/run/#introduction","title":"Introduction","text":"

    The Ultra2 system (also called the SDF-CS1) system, is a single logical CPU system based at EPCC. It is suitable for running jobs which require large volumes of non-distributed memory (as opposed to a cluster).

    "},{"location":"services/ultra2/run/#specifications","title":"Specifications","text":"

    The system is a HPE SuperDome Flex containing 576 individual cores in a SMT-1 arrangement (1 thread per core). The system has 18TB of memory available to users. Home directories are network mounted from the EIDF e1000 Lustre filesystem, although some local NVMe storage is available for temporary file storage during runs.

    "},{"location":"services/ultra2/run/#login","title":"Login","text":"

    To login to the host system, use the username and password you obtain from SAFE, along with the SSH Key you registered when creating the account. You can then login directly to the host via: ssh <username>@sdf-cs1.epcc.ed.ac.uk

    "},{"location":"services/ultra2/run/#software","title":"Software","text":"

    The primary software provided is Intel's OneAPI suite containing mpi compilers and runtimes, debuggers and the vTune performance analyser. Standard GNU compilers are also available. The OneAPI suite can be loaded by sourcing the shell script:

    source  /opt/intel/oneapi/setvars.sh\n
    "},{"location":"services/ultra2/run/#running-jobs","title":"Running Jobs","text":"

    All jobs must be run via SLURM to avoid inconveniencing other users of the system. Users should not run jobs directly. Note that the system has one logical processor with a large number of threads and thus appears to SLURM as a single node. This is intentional.

    "},{"location":"services/ultra2/run/#queue-limits","title":"Queue limits","text":"

    We kindly request that users limit their maximum total running job size to 288 cores and 4TB of memory, whether that be a divided into a single job, or a number of jobs. This may be enforced via SLURM in the future.

    "},{"location":"services/ultra2/run/#mpi-jobs","title":"MPI jobs","text":"

    An example script to run a multi-process MPI \"Hello world\" example is shown.

    #!/usr/bin/env bash\n#SBATCH -J HelloWorld\n#SBATCH --nodes=1\n#SBATCH --tasks-per-node=4\n#SBATCH --nodelist=sdf-cs1\n#SBATCH --partition=standard\n##SBATCH --exclusive\n\n\necho \"Running on host ${HOSTNAME}\"\necho \"Using ${SLURM_NTASKS_PER_NODE} tasks per node\"\necho \"Using ${SLURM_CPUS_PER_TASK} cpus per task\"\nlet mpi_threads=${SLURM_NTASKS_PER_NODE}*${SLURM_CPUS_PER_TASK}\necho \"Using ${mpi_threads} MPI threads\"\n\n# Source oneAPI to ensure mpirun available\nif [[ -z \"${SETVARS_COMPLETED}\" ]]; then\nsource /opt/intel/oneapi/setvars.sh\nfi\n\n# mpirun invocation for Intel suite.\nmpirun -n ${mpi_threads} ./helloworld.exe\n
    "},{"location":"services/virtualmachines/docs/","title":"Service Documentation","text":""},{"location":"services/virtualmachines/docs/#project-management-guide","title":"Project Management Guide","text":""},{"location":"services/virtualmachines/docs/#required-member-permissions","title":"Required Member Permissions","text":"

    VMs and user accounts can only be managed by project members with Cloud Admin permissions. This includes the principal investigator (PI) of the project and all project managers (PM). Through SAFE the PI can designate project managers and the PI and PMs can grant a project member the Cloud Admin role:

    1. Click \"Manage Project in SAFE\" at the bottom of the project page (opens a new tab)
    2. On the project management page in SAFE, scroll down to \"Manage Members\"
    3. Click Add project manager or Set member permissions

    For details please refer to the SAFE documentation: How can I designate a user as a project manager?

    "},{"location":"services/virtualmachines/docs/#create-a-vm","title":"Create a VM","text":"

    To create a new VM:

    1. Select the project from the list of your projects, e.g. eidfxxx
    2. Click on the 'New Machine' button
    3. Complete the 'Create Machine' form as follows:

      1. Provide an appropriate name, e.g. dev-01. The project code will be prepended automatically to your VM name, in this case your VM would be named eidfxxx-dev-01.
      2. Select a suitable operating system
      3. Select a machine specification that is suitable
      4. Choose the required disk size (in GB) or leave blank for the default
      5. Tick the checkbox \"Configure RDP access\" if you would like to install RDP and configure VDI connections via RDP for your VM.
      6. Select the package installations from the software catalogue drop-down list, or \"None\" if you don't require any pre-installed packages
    4. Click on 'Create'

    5. You should see the new VM listed under the 'Machines' table on the project page and the status as 'Creating'
    6. Wait while the job to launch the VM completes. This may take up to 10 minutes, depending on the configuration you requested. You have to reload the page to see updates.
    7. Once the job has completed successfully the status shows as 'Active' in the list of machines.

    You may wish to ensure that the machine size selected (number of CPUs and RAM) does not exceed your remaining quota before you press Create, otherwise the request will fail.

    In the list of 'Machines' in the project page in the portal, click on the name of new VM to see the configuration and properties, including the machine specification, its 10.24.*.* IP address and any configured VDI connections.

    "},{"location":"services/virtualmachines/docs/#quota-and-usage","title":"Quota and Usage","text":"

    Each project has a quota for the number of instances, total number of vCPUs, total RAM and storage. You will not be able to create a VM if it exceeds the quota.

    You can view and refresh the project usage compared to the quota in a table near the bottom of the project page. This table will be updated automatically when VMs are created or removed, and you can refresh it manually by pressing the \"Refresh\" button at the top of the table.

    Please contact the helpdesk if your quota requirements have changed.

    "},{"location":"services/virtualmachines/docs/#add-a-user-account","title":"Add a user account","text":"

    User accounts allow project members to log in to the VMs in a project. The Project PI and project managers manage user accounts for each member of the project. Users usually use one account (username and password) to log in to all the VMs in the same project that they can access, however a user may have multiple accounts in a project, for example for different roles.

    1. From the project page in the portal click on the 'Create account' button under the 'Project Accounts' table at the bottom
    2. Complete the 'Create User Account' form as follows:

      1. Choose 'Account user name': this could be something sensible like the first and last names concatenated (or initials) together with the project name. The username is unique across all EPCC systems so the user will not be able to reuse this name in another project once it has been assigned.
      2. Select the project member from the 'Account owner' drop-down field
      3. Click 'Create'

    The new account is allocated a temporary password which the account owner can view in their account details.

    "},{"location":"services/virtualmachines/docs/#adding-access-to-the-vm-for-a-user","title":"Adding Access to the VM for a User","text":"

    User accounts can be granted or denied access to existing VMs.

    1. Click 'Manage' next to an existing user account in the 'Project Accounts' table on the project page, or click on the account name and then 'Manage' on the account details page
    2. Select the checkboxes in the column \"Access\" for the VMs to which this account should have access or uncheck the ones without access
    3. Click the 'Update' button
    4. After a few minutes, the job to give them access to the selected VMs will complete and the account status will show as \"Active\".

    If a user is logged in already to the VDI at https://eidf-vdi.epcc.ed.ac.uk/vdi newly added connections may not appear in their connections list immediately. They must log out and log in again to refresh the connection information, or wait until the login token expires and is refreshed automatically - this might take a while.

    If a user only has one connection available in the VDI they will be automatically directed to the VM with the default connection.

    "},{"location":"services/virtualmachines/docs/#sudo-permissions","title":"Sudo permissions","text":"

    A project manager or PI may also grant sudo permissions to users on selected VMs. Management of sudo permissions must be requested in the project application - if it was not requested or the request was denied the functionality described below is not available.

    1. Click 'Manage' next to an existing user account in the 'Project Accounts' table on the project page
    2. Select the checkboxes in the column \"Sudo\" for the VMs on which this account is granted sudo permissions or uncheck to remove permissions
    3. Make sure \"Access\" is also selected for the sudo VMs to allow login
    4. Click the 'Update' button

    After a few minutes, the job to give the user account sudo permissions on the selected VMs will complete. On the account detail page a \"sudo\" badge will appear next to the selected VMs.

    Please contact the helpdesk if sudo permission management is required but is not available in your project.

    "},{"location":"services/virtualmachines/docs/#first-login","title":"First login","text":"

    A new user account is allocated a temporary password which the user must reset before they can log in for the first time. The password reset will not work when logging in via RDP - they must use a SSH connection, either in the VDI or via an SSH gateway.

    The user can view the temporary password in their account details page.

    "},{"location":"services/virtualmachines/docs/#updating-an-existing-machine","title":"Updating an existing machine","text":""},{"location":"services/virtualmachines/docs/#adding-rdp-access","title":"Adding RDP Access","text":"

    If you did not select RDP access when you created the VM you can add it later:

    1. Open the VM details page by selecting the name on the project page
    2. Click on 'Configure RDP'
    3. The configuration job runs for a few minutes.

    Once the RDP job is completed, all users that are allowed to access the VM will also be permitted to use the RDP connection.

    "},{"location":"services/virtualmachines/docs/#software-catalogue","title":"Software catalogue","text":"

    You can install packages from the software catalogue at a later time, even if you didn't select a package when first creating the machine.

    1. Open the VM details page by selecting the name on the project page
    2. Click on 'Software Catalogue'
    3. Select the configuration you wish to install and press 'Submit'
    4. The configuration job runs for a few minutes.
    "},{"location":"services/virtualmachines/flavours/","title":"Flavours","text":"

    These are the current Virtual Machine (VM) flavours (configurations) available on the the Virtual Desktop cloud service. Note that all VMs are built and configured using the EIDF Portal by PIs/Cloud Admins of projects, except GPU flavours which must be requested via the helpdesk or the support request form.

    Flavour Name vCPUs DRAM in GB Pinned Cores GPU general.v2.tiny 1 2 No No general.v2.small 2 4 No No general.v2.medium 4 8 No No general.v2.large 8 16 No No general.v2.xlarge 16 32 No No capability.v2.8cpu 8 112 Yes No capability.v2.16cpu 16 224 Yes No capability.v2.32cpu 32 448 Yes No capability.v2.48cpu 48 672 Yes No capability.v2.64cpu 64 896 Yes No gpu.v1.8cpu 8 128 Yes Yes gpu.v1.16cpu 16 256 Yes Yes gpu.v1.32cpu 32 512 Yes Yes gpu.v1.48cpu 48 768 Yes Yes"},{"location":"services/virtualmachines/policies/","title":"EIDF Data Science Cloud Policies","text":""},{"location":"services/virtualmachines/policies/#end-of-life-policy-for-user-accounts-and-projects","title":"End of Life Policy for User Accounts and Projects","text":""},{"location":"services/virtualmachines/policies/#what-happens-when-an-account-or-project-is-no-longer-required-or-a-user-leaves-a-project","title":"What happens when an account or project is no longer required, or a user leaves a project","text":"

    These situations are most likely to come about during one of the following scenarios:

    1. The retirement of project (usually one month after project end)
    2. A Principal Investigator (PI) tidying up a project requesting the removal of user(s) no longer working on the project
    3. A user wishing their own account to be removed
    4. A failure by a user to respond to the annual request to verify their email address held in the SAFE

    For each user account involved, assuming the relevant consent is given, the next step can be summarised as one of the following actions:

    It will be possible to have the account re-activated up until resources are removed (as outlined above); after this time it will be necessary to re-apply.

    A user's right to use EIDF is granted by a project. Our policy is to treat the account and associated data as the property of the PI as the owner of the project and its resources. It is the user's responsibility to ensure that any data they store on the EIDF DSC is handled appropriately and to copy off anything that they wish to keep to an appropriate location.

    A project manager or the PI can revoke a user's access accounts within their project at any time, by locking, removing or re-owning the account as appropriate.

    A user may give up access to an account and return it to the control of the project at any time.

    When a project is due to end, the PI will receive notification of the closure of the project and its accounts one month before all project accounts and DSC resources (VMs, data volumes) are closed and cleaned or removed.

    "},{"location":"services/virtualmachines/quickstart/","title":"Quickstart","text":"

    Projects using the Virtual Desktop cloud service are accessed via the EIDF Portal.

    Authentication is provided by SAFE, so if you do not have an active web browser session in SAFE, you will be redirected to the SAFE log on page. If you do not have a SAFE account follow the instructions in the SAFE documentation how to register and receive your password.

    "},{"location":"services/virtualmachines/quickstart/#accessing-your-projects","title":"Accessing your projects","text":"
    1. Log into the portal at https://portal.eidf.ac.uk/. The login will redirect you to the SAFE.

    2. View the projects that you have access to at https://portal.eidf.ac.uk/project/

    "},{"location":"services/virtualmachines/quickstart/#joining-a-project","title":"Joining a project","text":"
    1. Navigate to https://portal.eidf.ac.uk/project/ and click the link to \"Request access\", or choose \"Request Access\" in the \"Project\" menu.

    2. Select the project that you want to join in the \"Project\" dropdown list - you can search for the project name or the project code, e.g. \"eidf0123\".

    Now you have to wait for your PI or project manager to accept your request to join.

    "},{"location":"services/virtualmachines/quickstart/#accessing-a-vm","title":"Accessing a VM","text":"
    1. View your user accounts on the project page.

    2. Click on an account name to view details of the VMs that are you allowed to access with this account, and look up the temporary password allocated to the account.

    3. Follow the link to the Guacamole login or log in directly at https://eidf-vdi.epcc.ed.ac.uk/vdi/. Please see the VDI guide for more information.

    4. Choose the SSH connection to log in for the first time. You will be asked to reset the password.

    Warning

    Do not use RDP to login for the first time as you have to reset your password. Always use SSH to login to the VM for the first time. This can be done either via the VDI or the EIDF-Gateway Jump Host as described here.

    "},{"location":"services/virtualmachines/quickstart/#further-information","title":"Further information","text":"

    Managing VMs: Project management guide to creating, configuring and removing VMs and managing user accounts in the portal.

    Virtual Desktop Interface: Working with the VDI interface.

    "},{"location":"status/","title":"EIDF Service Status","text":"

    The table below represents the broad status of each EIDF service.

    Service Status EIDF Portal VM SSH Gateway VM VDI Gateway Virtual Desktops Cerebras CS-2 SuperDome Flex (SDF-CS1 / Ultra2)"},{"location":"status/#maintenance-sessions","title":"Maintenance Sessions","text":"

    There will be a service outage on the 3rd Thursday of every month from 9am to 5pm. We keep maintenance downtime to a minimum on the service but do occasionally need to perform essential work on the system. Maintenance sessions are used to ensure that:

    The service will be returned to service ahead of 5pm if all the work is completed early.

    "}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"EIDF User Documentation","text":"

    The Edinburgh International Data Facility (EIDF) is built and operated by EPCC at the University of Edinburgh. EIDF is a place to store, find and work with data of all kinds. You can find more information on the service and the research it supports on the EIDF website.

    For more information or for support with our services, please email eidf@epcc.ed.ac.uk in the first instance.

    "},{"location":"#what-the-documentation-covers","title":"What the documentation covers","text":"

    This documentation gives more in-depth coverage of current EIDF services. It is aimed primarily at developers or power users.

    "},{"location":"#contributing-to-the-documentation","title":"Contributing to the documentation","text":"

    The source for this documentation is publicly available in the EIDF documentation Github repository so that anyone can contribute to improve the documentation for the service. Contributions can be in the form of improvements or additions to the content and/or addition of Issues providing suggestions for how it can be improved.

    Full details of how to contribute can be found in the README.md file of the repository.

    This documentation set is a work in progress.

    "},{"location":"#credits","title":"Credits","text":"

    This documentation draws on the ARCHER2 National Supercomputing Service documentation.

    "},{"location":"access/","title":"Accessing EIDF","text":"

    Some EIDF services are accessed via a Web browser and some by \"traditional\" command-line ssh.

    All EIDF services use the EPCC SAFE service management back end, to ensure compatibility with other EPCC high-performance computing services.

    "},{"location":"access/#web-access-to-virtual-machines","title":"Web Access to Virtual Machines","text":"

    The Virtual Desktop VM service is browser-based, providing a virtual desktop interface (Apache Guacamole) for \"desktop-in-a-browser\" access. Applications to use the VM service are made through the EIDF Portal.

    EIDF Portal: how to ask to join an existing EIDF project and how to apply for a new project

    VDI access to virtual machines: how to connect to the virtual desktop interface.

    "},{"location":"access/#ssh-access-to-virtual-machines","title":"SSH Access to Virtual Machines","text":"

    Users with the appropriate permissions can also use ssh to login to Virtual Desktop VMs

    "},{"location":"access/#ssh-access-to-computing-services","title":"SSH Access to Computing Services","text":"

    Includes access to the following services:

    To login to most command-line services with ssh you should use the username and password you obtained from SAFE when you applied for access, along with the SSH Key you registered when creating the account. You can then login to the host following the appropriately linked instructions above.

    "},{"location":"access/project/","title":"EIDF Portal","text":"

    Projects using the Virtual Desktop cloud service are accessed via the EIDF Portal.

    The EIDF Portal uses EPCC's SAFE service management software to manage user accounts across all EPCC services. To log in to the Portal you will first be redirected to the SAFE log on page. If you do not have a SAFE account follow the instructions in the SAFE documentation how to register and receive your password.

    "},{"location":"access/project/#how-to-request-to-join-a-project","title":"How to request to join a project","text":"

    Log in to the EIDF Portal and navigate to \"Projects\" and choose \"Request access\". Select the project that you want to join in the \"Project\" dropdown list - you can search for the project name or the project code, e.g. \"eidf0123\".

    Now you have to wait for your PI or project manager to accept your request to register.

    "},{"location":"access/project/#how-to-apply-for-a-project-as-a-principal-investigator","title":"How to apply for a project as a Principal Investigator","text":""},{"location":"access/project/#create-a-new-project-application","title":"Create a new project application","text":"

    Navigate to the EIDF Portal and log in via SAFE if necessary (see above).

    Once you have logged in click on \"Applications\" in the menu and choose \"New Application\".

    1. Fill in the Application Title - this will be the name of the project once it is approved.
    2. Choose a start date and an end date for your project.
    3. Click \"Create\" to create your project application.

    Once the application has been created you see an overview of the form you are required to fill in. You can revisit the application at any time by clicking on \"Applications\" and choosing \"Your applications\" to display all your current and past applications and their status, or follow the link https://portal.eidf.ac.uk/proposal/.

    "},{"location":"access/project/#populate-a-project-application","title":"Populate a project application","text":"

    Fill in each section of the application as required:

    You can edit and save each section separately and revisit the application at a later time.

    "},{"location":"access/project/#datasets","title":"Datasets","text":"

    You are required to fill in a \"Dataset\" form for each dataset that you are planning to store and process as part of your project.

    We are required to ensure that projects involving \"sensitive\" data have the necessary permissions in place. The answers to these questions will enable us to decide what additional documentation we may need, and whether your project may need to be set up in an independently governed Safe Haven. There may be some projects we are simply unable to host for data protection reasons.

    "},{"location":"access/project/#resource-requirements","title":"Resource Requirements","text":"

    Add an estimate for each size and type of VM that is required.

    "},{"location":"access/project/#submission","title":"Submission","text":"

    When you are happy with your application, click \"Submit\". If there are missing fields that are required these are highlighted and your submission will fail.

    When your submission was successful the application status is marked as \"Submitted\" and now you have to wait while the EIDF approval team considers your application. You may be contacted if there are any questions regarding your application or further information is required, and you will be notified of the outcome of your application.

    "},{"location":"access/project/#approved-project","title":"Approved Project","text":"

    If your application was approved, refer to Data Science Virtual Desktops: Quickstart how to view your project and to Data Science Virtual Desktops: Managing VMs how to manage a project and how to create virtual machines and user accounts.

    "},{"location":"access/ssh/","title":"SSH Access to Virtual Machines using the EIDF-Gateway Jump Host","text":"

    The EIDF-Gateway is an SSH gateway suitable for accessing EIDF Services via a console or terminal. As the gateway cannot be 'landed' on, a user can only pass through it and so the destination (the VM IP) has to be known for the service to work. Users connect to their VM through the jump host using their given accounts.

    "},{"location":"access/ssh/#generating-and-adding-an-ssh-key","title":"Generating and Adding an SSH Key","text":"

    In order to make use of the EIDF-Gateway, your EIDF account needs an SSH-Key associated with it. If you added one while creating your EIDF account, you can skip this step.

    "},{"location":"access/ssh/#check-for-an-existing-ssh-key","title":"Check for an existing SSH Key","text":"

    To check if you have an SSH Key associated with your account:

    1. Login to the Portal
    2. Select 'Your Projects'
    3. Select your project name
    4. Select your username

    If there is an entry under 'Credentials', then you're all setup. If not, you'll need to generate an SSH-Key, to do this:

    "},{"location":"access/ssh/#generate-a-new-ssh-key","title":"Generate a new SSH Key","text":"
    1. Open a new window of whatever terminal you will use to SSH to EIDF.
    2. Generate a new SSH Key: $ ssh-keygen
    3. Input the directory and filename of they key. It's recommended to make this something like 'eidf-gateway' so it's easier to identify later
    4. Press enter to finish generating the key
    "},{"location":"access/ssh/#adding-the-new-ssh-key-to-your-account-via-the-portal","title":"Adding the new SSH Key to your account via the Portal","text":"
    1. Login into the Portal
    2. Select 'Your Projects'
    3. Select the relevant project
    4. Select your username
    5. Select the plus button under 'Credentials'
    6. Select 'Choose File' to upload the PUBLIC (.pub) ssh key generated in the last step, or open the .pub file you just created and copy its contents into the text box.
    7. Click 'Upload Credential' It should look something like this:
    8. "},{"location":"access/ssh/#adding-a-new-ssh-key-via-safe","title":"Adding a new SSH Key via SAFE","text":"

      This should not be necessary for most users, so only follow this process if you have an issue or have been told to by the EPCC Helpdesk. If you need to add an SSH Key directly to SAFE, you can follow this guide. However, select your '[username]@EIDF' login account, not 'Archer2' as specified in that guide.

      "},{"location":"access/ssh/#using-the-ssh-key-to-access-eidf-windows-and-linux","title":"Using the SSH-Key to access EIDF - Windows and Linux","text":"
      1. From your local terminal, import the SSH Key you generated above: $ ssh-add [sshkey]
      2. This should return \"Identity added [Path to SSH Key]\" if successful. You can then follow the steps below to access your VM.
      "},{"location":"access/ssh/#accessing-from-windows","title":"Accessing from Windows","text":"

      Windows will require the installation of OpenSSH-Server or MobaXTerm to use SSH. Putty can also be used but won\u2019t be covered in this tutorial.

      "},{"location":"access/ssh/#installing-and-using-openssh","title":"Installing and using OpenSSH","text":"
      1. Click the \u2018Start\u2019 button at the bottom of the screen
      2. Click the \u2018Settings\u2019 cog icon
      3. Search in the top bar \u2018Add or Remove Programs\u2019 and select the entry
      4. Select the \u2018Optional Features\u2019 blue text link
      5. If \u2018OpenSSH Client\u2019 is not under \u2018Installed Features\u2019, click the \u2018Add a Feature\u2019 button
      6. Search \u2018OpenSSH Client\u2019
      7. Select the check box next to \u2018OpenSSH Client\u2019 and click \u2018Install\u2019
      8. Once this is installed, you can reach your VM by opening CMD and running: $ ssh -J [username]@eidf-gateway.epcc.ed.ac.uk [username]@[vm_ip]
      "},{"location":"access/ssh/#installing-mobaxterm","title":"Installing MobaXTerm","text":"
      1. Download MobaXTerm from https://mobaxterm.mobatek.net/
      2. Once installed click the \u2018Session\u2019 button in the top left corner
      3. Click \u2018SSH\u2019
      4. In the \u2018Remote Host\u2019 section, specify the VM IP
      5. Click the \u2018Network Settings\u2019 Tab
      6. Click the \u2018SSH Gateway (jump host)\u2019 button in the middle
      7. Under Gateway Host, specify: eidf-gateway.epcc.ed.ac.uk
      8. Under Username, specify your username
      9. Click \u2018OK\u2019
      10. Click \u2018OK\u2019 to launch the session
      11. For the EIDF-Gateway and VM login prompts, use your password
      "},{"location":"access/ssh/#accessing-from-macoslinux","title":"Accessing From MacOS/Linux","text":"

      OpenSSH is installed on Linux and MacOS usually by default, so you can access the gateway natively from the terminal. The '-J' flag is use to specify that we will access the second specified host by jumping through the first specified host like the example below.

      ssh -J [username]@jumphost [username]@target\n

      To access EIDF Services:

      ssh -J [username]@eidf-gateway.epcc.ed.ac.uk [username]@[vm_ip]\n
      "},{"location":"access/ssh/#password-resets-via-the-eidf-gateway","title":"Password Resets via the EIDF-Gateway","text":"

      You will have to connect to your VM via SSH before you can login with RDP as your initial password needs to be reset, which can only be done via SSH. You can reset your password through the SSH Gateway by connecting to it directly:

      ssh [username]@eidf-gateway.epcc.ed.ac.uk\n

      Your first attempt to log in to your account using the SSH Gateway will prompt you for your initial password (provided in the portal) like a normal login. If this is successful you will choose a new password. You will be asked for your initial password again, followed by two entries of your new password. This will reset the password to your account for both the gateway and the VM. Once this reset has been completed, the session will disconnect and you can login via SSH again using the newly set password.

      You will not be able to directly connect to the gateway again, so to connect to your VM, jump through the SSH Gateway:

      ssh -J [username]@eidf-gateway.epcc.ed.ac.uk [username]@[vm_ip]\n
      "},{"location":"access/virtualmachines-vdi/","title":"Virtual Machines (VMs) and the EIDF Virtual Desktop Interface (VDI)","text":"

      Using the EIDF VDI, members of EIDF projects can connect to VMs that they have been granted access to. The EIDF VDI is a web portal that displays the connections to VMs a user has available to them, and then those connections can be easily initiated by clicking on them in the user interface. Once connected to the target VM, all interactions are mediated through the user's web browser by the EIDF VDI.

      "},{"location":"access/virtualmachines-vdi/#login-to-the-eidf-vdi","title":"Login to the EIDF VDI","text":"

      Once your membership request to join the appropriate EIDF project has been approved, you will be able to login to the EIDF VDI at https://eidf-vdi.epcc.ed.ac.uk/vdi.

      Authentication to the VDI is provided by SAFE, so if you do not have an active web browser session in SAFE, you will be redirected to the SAFE log on page. If you do not have a SAFE account follow the instructions in the SAFE documentation how to register and receive your password.

      "},{"location":"access/virtualmachines-vdi/#navigating-the-eidf-vdi","title":"Navigating the EIDF VDI","text":"

      After you have been authenticated through SAFE and logged into the EIDF VDI, if you have multiple connections available to you that have been associated with your user (typically in the case of research projects), you will be presented with the VDI home screen as shown below:

      VDI home page with list of available VM connections

      Adding connections

      Note that if a project manager has added a new connection for you it may not appear in the list of connections immediately. You must log out and log in again to refresh your connections list.

      "},{"location":"access/virtualmachines-vdi/#connecting-to-a-vm","title":"Connecting to a VM","text":"

      If you have only one connection associated with your VDI user account (typically in the case of workshops), you will be automatically connected to the target VM's virtual desktop. Once you are connected to the VM, you will be asked for your username and password as shown below (if you are participating in a workshop, then you may not be asked for credentials)

      VM virtual desktop connection user account login screen

      Once your credentials have been accepted, you will be connected to your VM's desktop environment. For instance, the screenshot below shows a resulting connection to a Xubuntu 20.04 VM with the Xfce desktop environment.

      VM virtual desktop

      "},{"location":"access/virtualmachines-vdi/#vdi-features-for-the-virtual-desktop","title":"VDI Features for the Virtual Desktop","text":"

      The EIDF VDI is an instance of the Apache Guacamole clientless remote desktop gateway. Since the connection to your VM virtual desktop is entirely managed through Guacamole in the web browser, there are some additional features to be aware of that may assist you when using the VDI.

      "},{"location":"access/virtualmachines-vdi/#the-vdi-menu","title":"The VDI Menu","text":"

      The Guacamole menu is a sidebar which is hidden until explicitly shown. On a desktop or other device which has a hardware keyboard, you can show this menu by pressing <Ctrl> + <Alt> + <Shift> on a Windows PC client, or <Ctrl> + <Command> + <Shift> on a Mac client. To hide the menu, you press the same key combination once again. The menu provides various options, including:

      "},{"location":"access/virtualmachines-vdi/#clipboard-copy-and-paste-functionality","title":"Clipboard Copy and Paste Functionality","text":"

      After you have activated the Guacamole menu using the key combination above, at the top of the menu is a text area labeled \u201cclipboard\u201d along with some basic instructions:

      Text copied/cut within Guacamole will appear here. Changes to the text below will affect the remote clipboard.

      The text area functions as an interface between the remote clipboard and the local clipboard. Text from the local clipboard can be pasted into the text area, causing that text to be sent to the clipboard of the remote desktop. Similarly, if you copy or cut text within the remote desktop, you will see that text within the text area, and can manually copy it into the local clipboard if desired.

      You can use the standard keyboard shortcuts to copy text from your client PC or Mac to the Guacamole menu clipboard, then again copy that text from the Guacamole menu clipboard into an application or CLI terminal on the VM's remote desktop. An example of using the copy and paste clipboard is shown in the screenshot below.

      The EIDF VDI Clipboard

      "},{"location":"access/virtualmachines-vdi/#keyboard-language-and-layout-settings","title":"Keyboard Language and Layout Settings","text":"

      For users who do not have standard English (UK) keyboard layouts, key presses can have unexpected translations as they are transmitted to your VM. Please contact the EIDF helpdesk at eidf@epcc.ed.ac.uk if you are experiencing difficulties with your keyboard mapping, and we will help to resolve this by changing some settings in the Guacamole VDI connection configuration.

      "},{"location":"access/virtualmachines-vdi/#further-information","title":"Further information","text":""},{"location":"bespoke/","title":"Bespoke Services","text":"

      Ed-DaSH

      "},{"location":"bespoke/eddash/","title":"EIDFWorkshops","text":"

      Ed-DaSH Notebook Service

      Ed-DaSH Virtual Machines

      JupyterHub Notebook Service Access

      "},{"location":"bespoke/eddash/jhub-git/","title":"EIDF JupyterHub Notebook Service Access","text":"

      Using the EIDF JupyterHub, users can access a range of services including standard interactive Python notebooks as well as RStudio Server.

      "},{"location":"bespoke/eddash/jhub-git/#ed-dash-workshops","title":"Ed-DaSH Workshops","text":""},{"location":"bespoke/eddash/jhub-git/#accessing","title":"Accessing","text":"

      In order to access the EIDF JupyterHub, authentication is through GitHub, so you must have an account on https://github.com and that account must be a member of the appropriate organization in GitHub. Please ask your project admin or workshop instructor for the workshop GitHub organization details. Please follow the relevant steps listed below to prepare.

      1. If you do not have a GitHub account associated with the email you registered for the workshop with, follow the steps described in Step 1: Creating a GitHub Account
      2. If you do already have a GitHub account associated with the email address you registered for the workshop with, follow the steps described in Step 2: Registering with the Workshop GitHub Organization
      "},{"location":"bespoke/eddash/jhub-git/#step-1-creating-a-github-account","title":"Step 1: Creating a GitHub Account","text":"
      1. Visit https://github.com/signup in your browser
      2. Enter the email address that you used to register for the workshop
      3. Complete the remaining steps of the GitHub registration process
      4. Send an email to ed-dash-support@mlist.is.ed.ac.uk from your GitHub registered email address, including your GitHub username, and ask for an invitation to the workshop GitHub organization
      5. Wait for an email from GitHub inviting you to join the organization, then follow the steps in Step 2: Registering with the Workshop GitHub Organization
      "},{"location":"bespoke/eddash/jhub-git/#step-2-registering-with-the-workshop-github-organization","title":"Step 2: Registering With the Workshop GitHub Organization","text":"
      1. If you already have a GitHub account associated with the email address that you registered for the workshop with, you should have received an email inviting you to join the relevant GitHub organization. If you have not, email ed-dash-support@mlist.is.ed.ac.uk from your GitHub registered email address, including your GitHub username, and ask for an invitation to the workshop GitHub organization
      2. Once you have been invited to the GitHub organization, you will receive an email with the invitation; click on the button as shown Invitation to join the workshop GitHub organization
      3. Clicking on the button in the email will open a new web page with another form as shown below Form to accept the invitation to join the GitHub organization
      4. Again, click on the button to confirm, then the Ed-DaSH-Training GitHub organization page will open
      "},{"location":"bespoke/eddash/safe-registration/","title":"Accessing","text":"

      In order to access the EIDF VDI and connect to EIDF data science cloud VMs, you need to have an active SAFE account. If you already have a SAFE account, you can skip ahead to the Request Project Membership instructions. Otherwise, follow the Register Account in EPCC SAFE instructions immediately below to create the account.

      Info

      Please also see Register and Join a project in the SAFE documentation for more information.

      "},{"location":"bespoke/eddash/safe-registration/#step-1-register-account-in-epcc-safe","title":"Step 1: Register Account in EPCC SAFE","text":"
      1. Go to SAFE signup and complete the registration form
        1. Mandatory fields are: Email, Nationality, First name, Last name, Institution for reporting, Department, and Gender
        2. Your Email should be the one you used to register for the EIDF service (or Ed-DaSH workshop)
        3. If you are unsure, enter 'University of Edinburgh' for Institution for reporting and 'EIDF' for Department SAFE registration form
      2. Submit the form, then accept the SAFE Acceptable Use policy on the next page SAFE User Access Agreement
      3. After you have completed the registration form and accepted the policy, you will receive an email from support@archer2.ac.uk with a password reset URL
      4. Visit the link in the email and generate a new password, then submit the form
      5. You will now be logged into your new account in SAFE
      "},{"location":"bespoke/eddash/safe-registration/#step-2-request-project-membership","title":"Step 2: Request Project Membership","text":"
      1. While logged into SAFE, select the \u2018Request Access\u2019 menu item from the 'Projects' menu in the top menu bar
      2. This will open the 'Apply for project membership' page
      3. Enter the appropriate project ID into the \u2018Project\u2019 field and click the \u2018Next\u2019 button Apply for project membership in SAFE
      4. In the 'Access route' drop down field that appears, select 'Request membership' (not 'Request machine account') Request project membership in SAFE
      5. The project owner will then receive notification of the application and accept your request
      "},{"location":"bespoke/eddash/workshops/","title":"Workshop Setup","text":"

      Please follow the instructions in JupyterHub Notebook Service Access to arrange access to the EIDF Notebook service before continuing. The table below provides the login URL and the relevant GitHub organization to register with.

      Workshop Login URL GitHub Organization Ed-DaSH Introduction to Statistics https://secure.epcc.ed.ac.uk/ed-dash-hub Ed-DaSH-Training Ed-DaSH High-Dimensional Statistics https://secure.epcc.ed.ac.uk/ed-dash-hub Ed-DaSH-Training Ed-DaSH Introduction to Machine Learning with Python https://secure.epcc.ed.ac.uk/ed-dash-hub Ed-DaSH-Training N8 CIR Introduction to Artificial Neural Networks in Python https://secure.epcc.ed.ac.uk/ed-dash-hub Ed-DaSH-Training

      Please follow the sequence of instructions described in the sections below to get ready for the workshop:

      1. Step 1: Accessing the EIDF Notebook Service for the First Time
      2. Step 2: Login to EIDF JupyterHub
      3. Step 3: Creating a New R Script
      "},{"location":"bespoke/eddash/workshops/#step-1-accessing-the-eidf-notebook-service-for-the-first-time","title":"Step 1: Accessing the EIDF Notebook Service for the First Time","text":"

      We will be using the Notebook service provided by the Edinburgh International Data Facility (EIDF). Follow the steps listed below to gain access.

      Warning

      If you are receiving an error response such as '403: Forbidden' when you try to access https://secure.epcc.ed.ac.uk/ed-dash-hub, please send an email to ed-dash-support@mlist.is.ed.ac.uk to request access and also include your IP address which you can find by visiting https://whatismyipaddress.com/ in your browser. Please be aware that if you are accessing the service from outside of the UK, your access might be blocked until you have emailed us with your IP address.

      1. Click on the button
      2. You will be asked to sign in to GitHub, as shown in the form below GitHub sign in form for access to EIDF Notebook Service
      3. Enter your GitHub credentials, or click on the \u2018Create an account\u2019 link if you do not already have one, and follow the prerequisite instructions to register with GitHub and join the workshop organization
      4. Click on the \u2018Sign in\u2019 button
      5. On the next page, you will be asked whether to authorize the workshop organization to access your GitHub account as shown below GitHub form requesting authorization for the workshop organization
      6. Click on the button
      7. At this point, you will receive an email to the email address that you registered with in GitHub, stating that \u201cA third-party OAuth application has been added to your account\u201d for the workshop
      8. If you receive a \u2018403 : Forbidden\u2019 error message on the next screen (if you did not already do so as in step 4 of the prerequisites section) send an email to ed-dash-support@mlist.is.ed.ac.uk from your GitHub registered email address, including your GitHub username, and ask for an invitation to the workshop organization. Otherwise, skip to the next step. N.B. If you are accessing the service from outside of the UK, you may see this error; if so, please contact ed-dash-support@mlist.is.ed.ac.uk to enable access
      9. If you receive a \u2018400 : Bad Request\u2019 error message, you need to accept the invitation that has been emailed to you to join the workshop organization as in the prerequisite instructions
      "},{"location":"bespoke/eddash/workshops/#step-2-login-to-the-eidf-notebook-service","title":"Step 2: Login to the EIDF Notebook Service","text":"

      Now that you have completed registration with the workshop GitHub organization, you can access the workshop RStudio Server in EIDF.

      1. Return to the https://secure.epcc.ed.ac.uk/ed-dash-hub
      2. You will be presented with a choice of server as a list of radio buttons. Select the appropriate one as labelled for your workshop and press the orange 'Start' button
      3. You will now be redirected to the hub spawn pending page for your individual server instance
      4. You will see a message stating that your server is launching. If the page has not updated after 10 seconds, simply refresh the page with the <CTRL> + R or <F5> keys in Windows, or <CMD> + R in macOS
      5. Finally, you will be redirected to either the RStudio Server if it's a statistics workshop, or the Jupyter Lab dashboard otherwise, as shown in the screenshots below The RStudio Server UI The Jupyter Lab Dashboard
      "},{"location":"bespoke/eddash/workshops/#step-3-creating-a-new-r-script","title":"Step 3: Creating a New R Script","text":"

      Follow these quickstart instructions to create your first R script in RStudio Server!

      "},{"location":"faq/","title":"FAQ","text":""},{"location":"faq/#eidf-frequently-asked-questions","title":"EIDF Frequently Asked Questions","text":""},{"location":"faq/#how-do-i-contact-the-eidf-helpdesk","title":"How do I contact the EIDF Helpdesk?","text":"

      Submit a query in the EIDF Portal by selecting \"Submit a Support Request\" in the \"Help and Support\" menu and filling in this form.

      You can also email us at eidf@epcc.ed.ac.uk.

      "},{"location":"faq/#how-do-i-request-more-resources-for-my-project-can-i-extend-my-project","title":"How do I request more resources for my project? Can I extend my project?","text":"

      Submit a support request: In the form select the project that your request relates to and select \"EIDF Project extension: duration and quota\" from the dropdown list of categories. Then enter the new quota or extension date in the description text box below and submit the request.

      The EIDF approval team will consider the extension and you will be notified of the outcome.

      "},{"location":"faq/#new-vms-and-vdi-connections","title":"New VMs and VDI connections","text":"

      My project manager gave me access to a VM but the connection doesn't show up in the VDI connections list?

      This may happen when a machine/VM was added to your connections list while you were logged in to the VDI. Please refresh the connections list by logging out and logging in again, and the new connections should appear.

      "},{"location":"faq/#non-default-ssh-keys","title":"Non-default SSH Keys","text":"

      I have different SSH keys for the SSH gateway and my VM, or I use a key which does not have the default name (~/.ssh/id_rsa) and I cannot login.

      The command syntax shown in our SSH documentation (using the -J <username>@eidf-gateway stanza) makes assumptions about SSH keys and their naming. You should try the full version of the command:

      ssh -o ProxyCommand=\"ssh -i ~/.ssh/<gateway_private_key> -W %h:%p <gateway_username>@eidf-gateway.epcc.ed.ac.uk\" -i ~/.ssh/<vm_private_key> <vm_username>@<vm_ip>\n

      Note that for the majority of users, gateway_username and vm_username are the same, as are gateway_private_key and vm_private_key

      "},{"location":"faq/#username-policy","title":"Username Policy","text":"

      I already have an EIDF username for project Y, can I use this for project X?

      We mandate that every username must be unique across our estate. EPCC machines including EIDF services such as the SDF and DSC VMs, and HPC services such as Cirrus require you to create a new machine account with a unique username for each project you work on. Usernames cannot be used on multiple projects, even if the previous project has finished. However, some projects span multiple machines so you may be able to login to multiple machines with the same username.

      "},{"location":"known-issues/","title":"Known Issues","text":""},{"location":"known-issues/#virtual-desktops","title":"Virtual desktops","text":"

      No known issues.

      "},{"location":"overview/","title":"A Unique Service for Academia and Industry","text":"

      The Edinburgh International Data Facility (EIDF) is a growing set of data and compute services developed to support the Data Driven Innovation Programme at the University of Edinburgh.

      Our goal is to support learners, researchers and innovators across the spectrum, with services from data discovery through simple learn-as-you-play-with-data notebooks to GPU-enabled machine-learning platforms for driving AI application development.

      "},{"location":"overview/#eidf-and-the-data-driven-innovation-initiative","title":"EIDF and the Data-Driven Innovation Initiative","text":"

      Launched at the end of 2018, the Data-Driven Innovation (DDI) programme is one of six funded within the Edinburgh & South-East Scotland City Region Deal. The DDI programme aims to make Edinburgh the \u201cData Capital of Europe\u201d, with ambitious targets to support, enhance and improve talent, research, commercial adoption and entrepreneurship across the region through better use of data.

      The programme targets ten industry sectors, with interactions managed through five DDI Hubs: the Bayes Centre, the Usher Institute, Edinburgh Futures Institute, the National Robotarium, and Easter Bush. The activities of these Hubs are underpinned by EIDF.

      "},{"location":"overview/acknowledgements/","title":"Acknowledging EIDF","text":"

      If you make use of EIDF services in your work, we encourage you to acknowledge us in any publications.

      Acknowledgement of using the facility in publications can be used as an identifiable metric to evaluate the scientific support provided, and helps promote the impact of the wider DDI Programme.

      We encourage our users to ensure that an acknowledgement of EIDF is included in the relevant section of their manuscript. We would suggest:

      This work was supported by the Edinburgh International Data Facility (EIDF) and the Data-Driven Innovation Programme at the University of Edinburgh.

      "},{"location":"overview/contacts/","title":"Contact","text":"

      The Edinburgh International Data Facility is located at the Advanced Computing Facility of EPCC, the supercomputing centre based at the University of Edinburgh.

      "},{"location":"overview/contacts/#email-us","title":"Email us","text":"

      Email EIDF: eidf@epcc.ed.ac.uk

      "},{"location":"overview/contacts/#sign-up","title":"Sign up","text":"

      Join our mailing list to receive updates about EIDF.

      "},{"location":"safe-haven-services/network-access-controls/","title":"Safe Haven Network Access Controls","text":"

      The TRE Safe Haven services are protected against open, global access by IPv4 source address filtering. These network access controls ensure that connections are permitted only from Safe Haven controller partner networks and collaborating research institutions.

      The network access controls are managed by the Safe Haven service controllers who instruct EPCC to add and remove the IPv4 addresses allowed to connect to the service gateway. Researchers must connect to the Safe Haven service by first connecting to their institution or corporate VPN and then connecting to the Safe Haven.

      The Safe Haven IG controller and research project co-ordination teams must submit and confirm IPv4 address filter changes to their service help desk via email.

      "},{"location":"safe-haven-services/overview/","title":"Safe Haven Services","text":"

      The EIDF Trusted Research Environment (TRE) hosts several Safe Haven services that enable researchers to work with sensitive data in a secure environment. These services are operated by EPCC in partnership with Safe Haven controllers who manage the Information Governance (IG) appropriate for the research activities and the data access of their Safe Haven service.

      It is the responsibility of EPCC as the Safe Haven operator to design, implement and administer the technical controls required to deliver the Safe Haven security regime demanded by the Safe Haven controller.

      The role of the Safe Haven controller is to satisfy the needs of the researchers and the data suppliers. The controller is responsible for guaranteeing the confidentiality needs of the data suppliers and matching these with the availability needs of the researchers.

      The service offers secure data sharing and analysis environments allowing researchers access to sensitive data under the terms and conditions prescribed by the data providers. The service prioritises the requirements of the data provider over the demands of the researcher and is an academic TRE operating under the guidance of the Five Safes framework.

      The TRE has dedicated, private cloud infrastructure at EPCC's Advanced Computing Facility (ACF) data centre and has its own HPC cluster and high-performance file systems. When a new Safe Haven service is commissioned in the TRE it is created in a new virtual private cloud providing the Safe Haven service controller with an independent IG domain separate from other Safe Havens in the TRE. All TRE service infrastructure and all TRE project data are hosted at ACF.

      If you have any questions about the EIDF TRE or about Safe Haven services, please contact us.

      "},{"location":"safe-haven-services/safe-haven-access/","title":"Safe Haven Service Access","text":"

      Safe Haven services are accessed from a registered network connection address using a browser. The service URL will be \"https://shs.epcc.ed.ac.uk/<service>\" where <service> is the Safe Haven service name.

      The Safe Haven access process is in three stages from multi-factor authentication to project desktop login.

      Researchers who are active in many research projects and in more than one Safe Haven will need to pay attention to the service they connect to, the project desktop they login to, and the accounts and identities they are using.

      "},{"location":"safe-haven-services/safe-haven-access/#safe-haven-login","title":"Safe Haven Login","text":"

      The first step in the process prompts the user for a Safe Haven username and then for a session PIN code sent via SMS text to the mobile number registered for the username.

      Valid PIN code entry allows the user access to all of the Safe Haven service remote desktop gateways for up to 24 hours without entry of a new PIN code. A user who has successfully entered a PIN code once can access shs.epcc.ed.ac.uk/haven1 and shs.epcc.ed.ac.uk/haven2 without repeating PIN code identity verification.

      When a valid PIN code is accepted, the user is prompted to accept the service use terms and conditions.

      Registration of the user mobile phone number is managed by the Safe Haven IG controller and research project co-ordination teams by submitting and confirming user account changes through the dedicated service help desk via email.

      "},{"location":"safe-haven-services/safe-haven-access/#remote-desktop-gateway-login","title":"Remote Desktop Gateway Login","text":"

      The second step in the access process is for the user to login to the Safe Haven service remote desktop gateway so that a project desktop connection can be chosen. The user is prompted for a Safe Haven service account identity.

      VDI Safe Haven Service Login Page

      Safe Haven accounts are managed by the Safe Haven IG controller and research project co-ordination teams by submitting and confirming user account changes through the dedicated service help desk via email.

      "},{"location":"safe-haven-services/safe-haven-access/#project-desktop-connection","title":"Project Desktop Connection","text":"

      The third stage in the process is to select the virtual connection from those available on the account's home page. An example home page is shown below offering two connection options to the same virtual machine. Remote desktop connections will have an _rdp suffix and SSH terminal connections have an _ssh suffix. The most recently used connections are shown as screen thumbnails at the top of the page and all the connections available to the user are shown in a tree list below this.

      VM connections available home page

      The remote desktop gateway software used in the Safe Haven services in the TRE is the Apache Guacamole web application. Users new to this application can find the user manual here. It is recommended that users read this short guide, but note that the data sharing features such as copy and paste, connection sharing, and file transfers are disabled on all connections in the TRE Safe Havens.

      A remote desktop or SSH connection is used to access data provided for a specific research project. If a researcher is working on multiple projects within a Safe Haven they can only login to one project at a time. Some connections may allow the user to login to any project and some connections will only allow the user to login into one specific project. This depends on project IG restrictions specified by the Safe Haven and project controllers.

      Project desktop accounts are managed by the Safe Haven IG controller and research project co-ordination teams by submitting and confirming user account changes through the dedicated service help desk via email.

      "},{"location":"safe-haven-services/using-the-hpc-cluster/","title":"Using the TRE HPC Cluster","text":""},{"location":"safe-haven-services/using-the-hpc-cluster/#introduction","title":"Introduction","text":"

      The TRE HPC system, also called the SuperDome Flex, is a single node, large memory HPC system. It is provided for compute and data intensive workloads that require more CPU, memory, and better IO performance than can be provided by the project VMs, which have the performance equivalent of small rack mount servers.

      "},{"location":"safe-haven-services/using-the-hpc-cluster/#specifications","title":"Specifications","text":"

      The system is an HPE SuperDome Flex configured with 1152 hyper-threaded cores (576 physical cores) and 18TB of memory, of which 17TB is available to users. User home and project data directories are on network mounted storage pods running the BeeGFS parallel filesystem. This storage is built in blocks of 768TB per pod. Multiple pods are available in the TRE for use by the HPC system and the total storage available will vary depending on the project configuration.

      The HPC system runs Red Hat Enterprise Linux, which is not the same flavour of Linux as the Ubuntu distribution running on the desktop VMs. However, most jobs in the TRE run Python and R, and there are few issues moving between the two version of Linux. Use of virtual environments is strongly encouraged to ensure there are no differences between the desktop and HPC runtimes.

      "},{"location":"safe-haven-services/using-the-hpc-cluster/#software-management","title":"Software Management","text":"

      All system level software installed and configured on the TRE HPC system is managed by the TRE admin team. Software installation requests may be made by the Safe Haven IG controllers, research project co-ordinators, and researchers by submitting change requests through the dedicated service help desk via email.

      Minor software changes will be made as soon as admin effort can be allocated. Major changes are likely to be scheduled for the TRE monthly maintenance session on the first Thursday of each month.

      "},{"location":"safe-haven-services/using-the-hpc-cluster/#hpc-login","title":"HPC Login","text":"

      Login to the HPC system is from the project VM using SSH and is not direct from the VDI. The HPC cluster accounts are the same accounts used on the project VMs, with the same username and password. All project data access on the HPC system is private to the project accounts as it is on the VMs, but it is important to understand that the TRE HPC cluster is shared by projects in other TRE Safe Havens.

      To login to the HPC cluster from the project VMs use ssh shs-sdf01 from an xterm. If you wish to avoid entry of the account password for every SSH session or remote command execution you can use SSH key authentication by following the SSH key configuration instructions here. SSH key passphrases are not strictly enforced within the Safe Haven but are strongly encouraged.

      "},{"location":"safe-haven-services/using-the-hpc-cluster/#running-jobs","title":"Running Jobs","text":"

      To use the HPC system fully and fairly, all jobs must be run using the SLURM job manager. More information about SLURM, running batch jobs and running interactive jobs can be found here. Please read this carefully before using the cluster if you have not used SLURM before. The SLURM site also has a set of useful tutorials on HPC clusters and job scheduling.

      All analysis and processing jobs must be run via SLURM. SLURM manages access to all the cores on the system beyond the first 32. If SLURM is not used and programs are run directly from the command line, then there are only 32 cores available, and these are shared by the other users. Normal code development, short test runs, and debugging can be done from the command line without using SLURM.

      There is only one node

      The HPC system is a single node with all cores sharing all the available memory. SLURM jobs should always specify '#SBATCH --nodes=1' to run correctly.

      SLURM email alerts for job status change events are not supported in the TRE.

      "},{"location":"safe-haven-services/using-the-hpc-cluster/#resource-limits","title":"Resource Limits","text":"

      There are no resource constraints imposed on the default SLURM partition at present. There are user limits (see the output of ulimit -a). If a project has a requirement for more than 200 cores, more than 4TB of memory, or an elapsed runtime of more than 96 hours, a resource reservation request should be made by the researchers through email to the service help desk.

      There are no storage quotas enforced in the HPC cluster storage at present. The project storage requirements are negotiated, and space allocated before the project accounts are released. Storage use is monitored, and guidance will be issued before quotas are imposed on projects.

      The HPC system is managed in the spirit of utilising it as fully as possible and as fairly as possible. This approach works best when researchers are aware of their project workload demands and cooperate rather than compete for cluster resources.

      "},{"location":"safe-haven-services/using-the-hpc-cluster/#python-jobs","title":"Python Jobs","text":"

      A basic script to run a Python job in a virtual environment is shown below.

      #!/bin/bash\n#\n#SBATCH --export=ALL                  # Job inherits all env vars\n#SBATCH --job-name=my_job_name        # Job name\n#SBATCH --mem=512G                    # Job memory request\n#SBATCH --output=job-%j.out           # Standard output file\n#SBATCH --error=job-%j.err            # Standard error file\n#SBATCH --nodes=1                     # Run on a single node\n#SBATCH --ntasks=1                    # Run one task per node\n#SBATCH --time=02:00:00               # Time limit hrs:min:sec\n#SBATCH --partition standard          # Run on partition (queue)\n\npwd\nhostname\ndate \"+DATE: %d/%m/%Y TIME: %H:%M:%S\"\necho \"Running job on a single CPU core\"\n\n# Create the job\u2019s virtual environment\nsource ${HOME}/my_venv/bin/activate\n\n# Run the job code\npython3 ${HOME}/my_job.py\n\ndate \"+DATE: %d/%m/%Y TIME: %H:%M:%S\"\n
      "},{"location":"safe-haven-services/using-the-hpc-cluster/#mpi-jobs","title":"MPI Jobs","text":"

      An example script for a multi-process MPI example is shown. The system currently supports MPICH MPI.

      #!/bin/bash\n#\n#SBATCH --export=ALL\n#SBATCH --job-name=mpi_test\n#SBATCH --output=job-%j.out\n#SBATCH --error=job-%j.err\n#SBATCH --nodes=1\n#SBATCH --ntasks-per-node=5\n#SBATCH --time=05:00\n#SBATCH --partition standard\n\necho \"Submitted Open MPI job\"\necho \"Running on host ${HOSTNAME}\"\necho \"Using ${SLURM_NTASKS_PER_NODE} tasks per node\"\necho \"Using ${SLURM_CPUS_PER_TASK} cpus per task\"\nlet mpi_threads=${SLURM_NTASKS_PER_NODE}*${SLURM_CPUS_PER_TASK}\necho \"Using ${mpi_threads} MPI threads\"\n\n# load Open MPI module\nmodule purge\nmodule load mpi/mpich-x86_64\n\n# run mpi program\nmpirun ${HOME}/test_mpi\n
      "},{"location":"safe-haven-services/using-the-hpc-cluster/#managing-files-and-data","title":"Managing Files and Data","text":"

      There are three file systems to manage in the VM and HPC environment.

      1. The desktop VM /home file system. This can only be used when you login to the VM remote desktop. This file system is local to the VM and is not backed up.
      2. The HPC system /home file system. This can only be used when you login to the HPC system using SSH from the desktop VM. This file system is local to the HPC cluster and is not backed up.
      3. The project file and data space in the /safe_data file system. This file system can only be used when you login to a VM remote desktop session. This file system is backed up.

      The /safe_data file system with the project data cannot be used by the HPC system. The /safe_data file system has restricted access and a relatively slow IO performance compared to the parallel BeeGFS file system storage on the HPC system.

      The process to use the TRE HPC service is to copy and synchronise the project code and data files on the /safe_data file system with the HPC /home file system before and after login sessions and job runs on the HPC cluster. Assuming all the code and data required for the job is in a directory 'current_wip' on the project VM, the workflow is as follows:

      1. Copy project code and data to the HPC cluster (from the desktop VM) rsync -avPz -e ssh /safe_data/my_project/current_wip shs-sdf01:
      2. Run jobs/tests/analysis ssh shs-sdf01, cd current_wip, sbatch/srun my_job
      3. Copy any changed project code and data back to /safe_data (from the desktop VM) rsync -avPz -e ssh shs-sdf01:current_wip /safe_data/my_project
      4. Optionally delete the code and data from the HPC cluster working directory.
      "},{"location":"safe-haven-services/virtual-desktop-connections/","title":"Virtual Machine Connections","text":"

      Sessions on project VMs may be either remote desktop (RDP) logins or SSH terminal logins. Most users will prefer to use the remote desktop connections, but the SSH terminal connection is useful when remote network performance is poor and it must be used for account password changes.

      "},{"location":"safe-haven-services/virtual-desktop-connections/#first-time-login-and-account-password-changes","title":"First Time Login and Account Password Changes","text":"

      Account Password Changes

      Note that first time account login cannot be through RDP as a password change is required. Password reset logins must be SSH terminal sessions as password changes can only be made through SSH connections.

      "},{"location":"safe-haven-services/virtual-desktop-connections/#connecting-to-a-remote-ssh-session","title":"Connecting to a Remote SSH Session","text":"

      When a VM SSH connection is selected the browser screen becomes a text terminal and the user is prompted to \"Login as: \" with a project account name, and then prompted for the account password. This connection type is equivalent to a standard xterm SSH session.

      "},{"location":"safe-haven-services/virtual-desktop-connections/#connecting-to-a-remote-desktop-session","title":"Connecting to a Remote Desktop Session","text":"

      Remote desktop connections work best by first placing the browser in Full Screen mode and leaving it in this mode for the entire duration of the Safe Haven session.

      When a VM RDP connection is selected the browser screen becomes a remote desktop presenting the login screen shown below.

      VM virtual desktop connection user account login screen

      Once the project account credentials have been accepted, a remote dekstop similar to the one shown below is presented. The default VM environment in the TRE is Ubuntu 22.04 with the Xfce desktop.

      VM virtual desktop

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/","title":"Accessing the Superdome Flex inside the EPCC Trusted Research Environment","text":""},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/#what-is-the-superdome-flex","title":"What is the Superdome Flex?","text":"

      The Superdome Flex (SDF) is a high-performance computing cluster manufactured by Hewlett Packard Enterprise. It has been designed to handle multi-core, high-memory tasks in environments where security is paramount. The hardware specifications of the SDF within the Trusted Research Environment (TRE) are as follows:

      The software specification of the SDF are:

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/#key-point","title":"Key Point","text":"

      The SDF is within the TRE. Therefore, the same restrictions apply, i.e. the SDF is isolated from the internet (no downloading code from public GitHub repos) and copying/recording/extracting anything on the SDF outside of the TRE is strictly prohibited unless through approved processes.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/#accessing-the-sdf","title":"Accessing the SDF","text":"

      Users can only access the SDF by ssh-ing into it via their VM desktop.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/#hello-world","title":"Hello world","text":"
      **** On the VM desktop terminal ****\n\nssh shs-sdf01\n<Enter VM password>\n\necho \"Hello World\"\n\nexit\n
      "},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/#sdf-vs-vm-file-systems","title":"SDF vs VM file systems","text":"

      The SDF file system is separate from the VM file system, which is again separate from the project data space. Files need to be transferred between the three systems for any analysis to be completed within the SDF.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/#example-showing-separate-sdf-and-vm-file-systems","title":"Example showing separate SDF and VM file systems","text":"
      **** On the VM desktop terminal ****\n\ncd ~\ntouch test.txt\nls\n\nssh shs-sdf01\n<Enter VM password>\n\nls # test.txt is not here\nexit\n\nscp test.txt shs-sdf01:/home/<USERNAME>/\n\nssh shs-sdf01\n<Enter VM password>\n\nls # test.txt is here\n
      "},{"location":"safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/#example-copying-data-between-project-data-space-and-sdf","title":"Example copying data between project data space and SDF","text":"

      Transferring and synchronising data sets between the project data space and the SDF is easier with the rsync command (rather than manually checking and copying files/folders with scp). rsync only transfers files that are different between the two targets, more details in its manual.

      **** On the VM desktop terminal ****\n\nman rsync # check instructions for using rsync\n\nrsync -avPz -e ssh /safe_data/my_project/ shs-sdf01:/home/<USERNAME>/my_project/ # sync project folder and SDF home folder\n\nssh shs-sdf01\n<Enter VM password>\n\n*** Conduct analysis on SDF ***\n\nexit\n\nrsync -avPz -e ssh /safe_data/my_project/current_wip shs-sdf01:/home/<USERNAME>/my_project/ # sync project file and ssh home page # re-syncronise project folder and SDF home folder\n\n*** Optionally remove the project folder on SDF ***\n
      "},{"location":"safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/","title":"Running R/Python Scripts","text":"

      Running analysis scripts on the SDF is slightly different to running scripts on the Desktop VMs. The Linux distribution differs between the two with the SDF using Red Hat Enterprise Linux (RHEL) and the Desktop VMs using Ubuntu. Therefore, it is highly advisable to use virtual environments (e.g. conda environments) to complete any analysis and aid the transition between the two distributions. Conda should run out of the box on the Desktop VMs, but some configuration is required on the SDF.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/#setting-up-conda-environments-on-you-first-connection-to-the-sdf","title":"Setting up conda environments on you first connection to the SDF","text":"
      *** SDF Terminal ***\n\nconda activate base # Test conda environment\n\n# Conda command will not be found. There is no need to install!\n\neval \"$(/opt/anaconda3/bin/conda shell.bash hook)\" # Tells your terminal where conda is\n\nconda init # changes your .bashrc file so conda is automatically available in the future\n\nconda config --set auto_activate_base false # stop conda base from being activated on startup\n\npython # note python version\n\nexit()\n

      The base conda environment is now available but note that the python and gcc compilers are not the latest (Python 3.9.7 and gcc 7.5.0).

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/#getting-an-up-to-date-python-version","title":"Getting an up-to-date python version","text":"

      In order to get an up-to-date python version we first need to use an updated gcc version. Fortunately, conda has an updated gcc toolset that can be installed.

      *** SDF Terminal ***\n\nconda activate base # If conda isn't already active\n\nconda create -n python-v3.11 gcc_linux-64=11.2.0 python=3.11.3\n\nconda activate python-v3.11\n\npython\n\nexit()\n
      "},{"location":"safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/#running-r-scripts-on-the-sdf","title":"Running R scripts on the SDF","text":"

      The default version of R available on the SDF is v4.1.2. Alternative R versions can be installed using conda similar to the python conda environment above.

      conda create -n r-v4.3 gcc_linux-64=11.2.0 r-base=4.3\n\nconda activate r-v4.3\n\nR\n\nq()\n
      "},{"location":"safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/#final-points","title":"Final points","text":""},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/","title":"Submitting Scripts to Slurm","text":""},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#what-is-slurm","title":"What is Slurm?","text":"

      Slurm is a workload manager that schedules jobs submitted to a shared resource. Slurm is a well-developed tool that can manage large computing clusters, such as ARCHER2, with thousands of users each with different priorities and allocated computing hours. Inside the TRE, Slurm is used to help ensure all users of the SDF get equitable access. Therefore, users who are submitting jobs with high resource requirements (>80 cores, >1TB of memory) may have to wait longer for resource allocation to enable users with lower resource demands to continue their work.

      Slurm is currently set up so all users have equal priority and there is no limit to the total number of CPU hours allocated to a user per month. However, there are limits to the maximum amount of resources that can be allocated to an individual job. Jobs that require more than 200 cores, more than 4TB of memory, or an elapsed runtime of more than 96 hours will be rejected. If users need to submit jobs with large resource demand, they need to submit a resource reservation request by emailing their project's service desk.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#why-do-you-need-to-use-slurm","title":"Why do you need to use Slurm?","text":"

      The SDF is a resource shared across all projects within the TRE and all users should have equal opportunity to use the SDF to complete resource-intense tasks appropriate to their projects. Users of the SDF are required to consider the needs of the wider community by:

      Users can develop code, complete test runs, and debug from the SDF command line without using Slurm. However, only 32 of the 512 cores are accessible without submitting a job request to Slurm. These cores are accessible to all users simultaneously.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#slurm-basics","title":"Slurm basics","text":"

      Slurm revolves around four main entities: nodes, partitions, jobs and job steps. Nodes and partitions are relevant for more complex distributed computing clusters so Slurm can allocate appropriate resources to jobs across multiple pieces of hardware. Jobs are requests for resources and job steps are what need to be completed once the resources have been allocated (completed in sequence or parallel). Job steps can be further broken down into tasks.

      There are four key commands for Slurm users:

      More details on these functions (and several not mentioned here) can be seen on the Slurm website.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#submitting-a-simple-job","title":"Submitting a simple job","text":"
      *** SDF Terminal ***\n\nsqueue -u $USER # Check if there are jobs already queued or running for you\n\nsrun --job-name=my_first_slurm_job --nodes 1 --ntasks 10 --cpus-per-task 2 echo 'Hello World'\n\nsqueue -u $USER --state=CD # List all completed jobs\n

      In this instance, the srun command completes two steps: job submission and job step execution. First, it submits a job request to be allocated 10 CPUs (1 CPU for each of the 10 tasks). Once the resources are available, it executes the job step consisting of 10 tasks each running the 'echo \"Hello World\"' function.

      srun accepts a wide variety of options to specify the resources required to complete its job step. Within the SDF, you must always request 1 node (as there is only one node) and never use the --exclusive option (as no one will have exclusive access to this shared resource). Notice that running srun blocks your terminal from accepting any more commands and the output from each task in the job step, i.e. Hello World in the above example, outputs to your terminal. We will compare this to running a sbatch command.\u0011

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#submitting-a-batch-job","title":"Submitting a batch job","text":"

      Batch jobs are incredibly useful because they run in the background without blocking your terminal. Batch jobs also output the results to a log file rather than straight to your terminal. This allows you to check a job was completed successfully at a later time so you can move on to other things whilst waiting for a job to complete.

      A batch job can be submitted to Slurm by passing a job script to the sbatch command. The first few lines of a job script outline the resources to be requested as part of the job. The remainder of a job script consists of one or more srun commands outlining the job steps that need to be completed (in sequence or parallel) once the resources are available. There are numerous options for defining the resource requirements of a job including:

      More information on the various options are in the sbatch documentation.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#example-job-script","title":"Example Job Script","text":"
      #!/usr/bin/env bash\n#SBATCH -J HelloWorld\n#SBATCH --nodes=1\n#SBATCH --tasks-per-node=10\n#SBATCH --cpus-per-task=2\n\n% Run echo task in sequence\n\nsrun --ntasks 5 --cpus-per-task 2 echo \"Series Task A. Time: \" $(date +\u201d%H:%M:%S\u201d)\n\nsrun --ntasks 5 --cpus-per-task 2 echo \"Series Task B. Time: \" $(date +\u201d%H:%M:%S\u201d)\n\n% Run echo task in parallel with the ampersand character\n\nsrun --exclusive --ntasks 5 --cpus-per-task 2 echo \"Parallel Task A. Time: \" $(date +\u201d%H:%M:%S\u201d) &\n\nsrun --exclusive --ntasks 5 --cpus-per-task 2 echo \"Parallel Task B. Time: \" $(date +\u201d%H:%M:%S\u201d)\n
      "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#example-job-script-submission","title":"Example job script submission","text":"
      *** SDF Terminal ***\n\nnano example_job_script.sh\n\n*** Copy example job script above ***\n\nsbatch example_job_script.sh\n\nsqueue -u $USER -r 5\n\n*** Wait for the batch job to be completed ***\n\ncat example_job_script.log # The series tasks should be grouped together and the parallel tasks interspersed.\n

      The example batch job is intended to show two things: 1) the usefulness of the sbatch command and 2) the versatility of a job script. As the sbatch command allows you to submit scripts and check their outcome at your own discretion, it is the most common way of interacting with Slurm. Meanwhile, the job script command allows you to specify one global resource request and break it up into multiple job steps with different resource demands that can be completed in parallel or in sequence.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#submitting-pythonr-code-to-slurm","title":"Submitting python/R code to Slurm","text":"

      Although submitting job steps containing python/R analysis scripts can be done with srun directly, as below, it is more common to submit bash scripts that call the analysis scripts after setting up the environment (i.e. after calling conda activate).

      **** Python code job submission ****\n\nsrun --job-name=my_first_python_job --nodes 1 --ntasks 10 --cpus-per-task 2 --mem 10G python3 example_script.py\n\n**** R code job submission ****\n\nsrun --job-name=my_first_r_job --nodes 1 --ntasks 10 --cpus-per-task 2 --mem 10G Rscript example_script.R\n
      "},{"location":"safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/#signposting","title":"Signposting","text":"

      Useful websites for learning more about Slurm:

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/","title":"Parallelised Python analysis with Dask","text":"

      This lesson is adapted from a workshop introducing users to running python scripts on ARCHER2 as developed by Adrian Jackson.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#introduction","title":"Introduction","text":"

      Python does not have native support for parallelisation. Python contains a Global Interpreter Lock (GIL) which means the python interpreter only allows one thread to execute at a time. The advantage of the GIL is that C libraries can be easily integrated into Python scripts without checking if they are thread-safe. However, this means that most common python modules cannot be easily parallelised. Fortunately, there are now several re-implementations of common python modules that work around the GIL and are therefore parallelisable. Dask is a python module that contains a parallelised version of the pandas data frame as well as a general format for parallelising any python code.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#dask","title":"Dask","text":"

      Dask enables thread-safe parallelised python execution by creating task graphs (a graph of the dependencies of the inputs and outputs of each function) and then deducing which ones can be run separately. This lesson introduces some general concepts required for programming using Dask. There are also some exercises with example answers to help you write your first parallelised python scripts.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#arrays-data-frames-and-bags","title":"Arrays, data frames and bags","text":"

      Dask contains three data objects to enable parallelised analysis of large data sets in a way familiar to most python programmers. If the same operations are being applied to a large data set then Dask can split up the data set and apply the operations in parallel. The three data objects that Dask can easily split up are:

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#example-dask-array","title":"Example Dask array","text":"

      You may need to install dask or create a new conda environment with it in.

      conda create -n dask-env gcc_linux-64=11.2.0 python=3.11.3 dask\n\nconda activate dask-env\n

      Try running the following Python using dask:

      import dask.array as da\n\nx = da.random.random((10000, 10000), chunks=(1000, 1000))\n\nprint(x)\n\nprint(x.compute())\n\nprint(x.sum())\n\nprint(x.sum().compute())\n

      This should demonstrate that dask is both straightforward to implement simple parallelism, but also lazy in that it does not compute anything until you force it to with the .compute() function.

      You can also try out dask DataFrames, using the following code:

      import dask.dataframe as dd\n\ndf = dd.read_csv('surveys.csv')\n\ndf.head()\ndf.tail()\n\ndf.weight.max().compute()\n

      You can try using different blocksizes when reading in the csv file, and then undertaking an operation on the data, as follows: Experiment with varying blocksizes, although you should be aware that making your block size too small is likely to cause poor performance (the blocksize affects the number of bytes read in at each operation).

      df = dd.read_csv('surveys.csv', blocksize=\"10000\")\ndf.weight.max().compute()\n

      You can also experiment with Dask Bags to see how that functionality works:

      import dask.bag as db\nfrom operator import add\nb = db.from_sequence([1, 2, 3, 4, 5], npartitions=2)\nprint(b.compute())\n
      "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#dask-delayed","title":"Dask Delayed","text":"

      Dask delayed lets you construct your own task graphs/parallelism from Python functions. You can find out more about dask delayed from the dask documentation Try parallelising the code below using the .delayed function or the @delayed decorator, an example answer can be found here.

      def inc(x):\n    return x + 1\n\ndef double(x):\n    return x * 2\n\ndef add(x, y):\n    return x + y\n\ndata = [1, 2, 3, 4, 5]\n\noutput = []\nfor x in data:\n    a = inc(x)\n    b = double(x)\n    c = add(a, b)\n    output.append(c)\n\ntotal = sum(output)\n\nprint(total)\n
      "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#mandelbrot-exercise","title":"Mandelbrot Exercise","text":"

      The code below calculates the members of a Mandelbrot set using Python functions:

      import sys\nimport time\nimport numpy as np\nimport matplotlib.pyplot as plt\n\ndef mandelbrot(h, w, maxit=20, r=2):\n\"\"\"Returns an image of the Mandelbrot fractal of size (h,w).\"\"\"\n    start = time.time()\n\n    x = np.linspace(-2.5, 1.5, 4*h+1)\n\n    y = np.linspace(-1.5, 1.5, 3*w+1)\n\n    A, B = np.meshgrid(x, y)\n\n    C = A + B*1j\n\n    z = np.zeros_like(C)\n\n    divtime = maxit + np.zeros(z.shape, dtype=int)\n\n    for i in range(maxit):\n        z = z**2 + C\n        diverge = abs(z) > r # who is diverging\n        div_now = diverge & (divtime == maxit) # who is diverging now\n        divtime[div_now] = i # note when\n        z[diverge] = r # avoid diverging too much\n\n    end = time.time()\n\n    return divtime, end-start\n\nh = 2000\nw = 2000\n\nmandelbrot_space, time = mandelbrot(h, w)\n\nplt.imshow(mandelbrot_space)\n\nprint(time)\n

      Your task is to parallelise this code using Dask Array functionality. Using the base python code above, extend it with Dask Array for the main arrays in the computation. Remember you need to specify a chunk size with Dask Arrays, and you will also need to call compute at some point to force Dask to actually undertake the computation. Note, depending on where you run this you may not see any actual speed up of the computation. You need access to extra resources (compute cores) for the calculation to go faster. If in doubt, submit a python script of your solution to the SDF compute nodes to see if you see speed up there. If you are struggling with this parallelisation exercise, there is a solution available for you here.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#pi-exercise","title":"Pi Exercise","text":"

      The code below calculates Pi using a function that can split it up into chunks and calculate each chunk separately. Currently it uses a single chunk to produce the final value of Pi, but that can be changed by calling pi_chunk multiple times with different inputs. This is not necessarily the most efficient method for calculating Pi in serial, but it does enable parallelisation of the calculation of Pi using multiple copies of pi_chunk called simultaneously.

      import time\nimport sys\n\n# Calculate pi in chunks\n\n# n     - total number of steps to be undertaken across all chunks\n# lower - the lowest number of this chunk\n# upper - the upper limit of this chunk such that i < upper\n\ndef pi_chunk(n, lower, upper):\n    step = 1.0 / n\n    p = step * sum(4.0/(1.0 + ((i + 0.5) * (i + 0.5) * step * step)) for i in range(lower, upper))\n    return p\n\n# Number of slices\n\nnum_steps = 10000000\n\nprint(\"Calculating PI using:\\n \" + str(num_steps) + \" slices\")\n\nstart = time.time()\n\n# Calculate using a single chunk containing all steps\n\np = pi_chunk(num_steps, 1, num_steps)\n\nstop = time.time()\n\nprint(\"Obtained value of Pi: \" + str(p))\n\nprint(\"Time taken: \" + str(stop - start) + \" seconds\")\n

      For this exercise, your task is to implemented the above code on the SDF, and then parallelise using Dask. There are a number of different ways you could parallelise this using Dask, but we suggest using the Futures map functionality to run the pi_chunk function on a range of different inputs. Futures map has the following definition:

      Client.map(func, *iterables[, key, workers, ...])\n

      Where func is the function you want to run, and then the subsequent arguments are inputs to that function. To utilise this for the Pi calculation, you will first need to setup and configure a Dask Client to use, and also create and populate lists or vectors of inputs to be passed to the pi_chunk function for each function run that Dask launches.

      If you run Dask with processes then it is possible that you will get errors about forking processes, such as these:

          An attempt has been made to start a new process before the current process has finished its bootstrapping phase.\n    This probably means that you are not using fork to start your child processes and you have forgotten to use the proper idiom in the main module:\n

      In that case you need to encapsulate your code within a main function, using something like this:

      if __name__ == \"__main__\":\n

      If you are struggling with this exercise then there is a solution available for you here.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/#signposting","title":"Signposting","text":""},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/","title":"Parallelised R Analysis","text":"

      This lesson is adapted from a workshop introducing users to running R scripts on ARCHER2 as developed by Adrian Jackson.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/#introduction","title":"Introduction","text":"

      In this exercise we are going to try different methods of parallelising R on the SDF. This will include single node parallelisation functionality (e.g. using threads or processes to use cores within a single node), and distributed memory functionality that enables the parallelisation of R programs across multiple nodes.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/#example-parallelised-r-code","title":"Example parallelised R code","text":"

      You may need to activate an R conda environment.

      conda activate r-v4.2\n

      Try running the following R script using R on the SDF login node:

      n <- 8*2048\nA <- matrix( rnorm(n*n), ncol=n, nrow=n )\nB <- matrix( rnorm(n*n), ncol=n, nrow=n )\nC <- A %*% B\n

      You can run this as follows on the SDF (assuming you have saved the above code into a file named matrix.R):

      Rscript ./matrix.R\n

      You can check the resources used by R when running on the login node using this command:

      top -u $USER\n

      If you run the R script in the background using &, as follows, you can then monitor your run using the top command. You may notice when you run your program that at points R uses many more resources than a single core can provide, as demonstrated below:

          PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND\n    178357 adrianj 20 0 15.542 0.014t 13064 R 10862 2.773 9:01.66 R\n

      In the example above it can be seen that >10862% of a single core is being used by R. This is an example of R using automatic parallelisation. You can experiment with controlling the automatic parallelisation using the OMP_NUM_THREADS variable to restrict the number of cores available to R. Try using the following values:

      export OMP_NUM_THREADS=8\n\nexport OMP_NUM_THREADS=4\n\nexport OMP_NUM_THREADS=2\n

      You may also notice that not all the R script is parallelised. Only the actual matrix multiplication is undertaken in parallel, the initialisation/creation of the matrices is done in serial.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/#parallelisation-with-datatables","title":"Parallelisation with data.tables","text":"

      We can also experiment with the implicit parallelism in other libraries, such as data.table. You will first need to install this library on the SDF. To do this you can simply run the following command:

      install.packages(data.table)\n

      Once you have installed data.table you can experiment with the following code:

      library(data.table)\nvenue_data <- data.table( ID = 1:50000000,\nCapacity = sample(100:1000, size = 50000000, replace = T), Code = sample(LETTERS, 50000000, replace = T),\nCountry = rep(c(\"England\",\"Scotland\",\"Wales\",\"NorthernIreland\"), 50000000))\nsystem.time(venue_data[, mean(Capacity), by = Country])\n

      This creates some random data in a large data table and then performs a calculation on it. Try running R with varying numbers of threads to see what impact that has on performance. Remember, you can vary the number of threads R uses by setting OMP_NUM_THREADS= before you run R. If you want to try easily varying the number of threads you can save the above code into a script and run it using Rscript, changing OMP_NUM_THREADS each time you run it, e.g.:

      export OMP_NUM_THREADS=1\n\nRscript ./data_table_test.R\n\nexport OMP_NUM_THREADS=2\n\nRscript ./data_table_test.R\n

      The elapsed time that is printed out when the calculation is run represents how long the script/program took to run. It\u2019s important to bear in mind that, as with the matrix multiplication exercise, not everything will be parallelised. Creating the data table is done in serial so does not benefit from the addition of more threads.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/#loop-and-function-parallelism","title":"Loop and function parallelism","text":"

      R provides a number of different functions to run loops or functions in parallel. One of the most common functions is to use are the {X}apply functions:

      For example:

      res <- lapply(1:3, function(i) {\nsqrt(i)*sqrt(i*2)\n})\n

      The {X}apply functionality supports iteration over a dataset without requiring a loop to be constructed. However, the functions outlined above do not exploit parallelism, even if there is potential for parallelisation many operations that utilise them.

      There are a number of mechanisms that can be used to implement parallelism using the {X}apply functions. One of the simplest is using the parallel library, and the mclapply function:

      library(parallel)\nres <- mclapply(1:3, function(i) {\nsqrt(i)\n})\n

      Try experimenting with the above functions on large numbers of iterations, both with lapply and mclapply. Can you achieve better performance using the MC_CORES environment variable to specify how many parallel processes R uses to complete these calculations? The default on the SDF is 2 cores, but you can increase this in the same way we did for OMP_NUM_THREADS, e.g.:

      export MC_CORES=16\n

      Try different numbers of iterations of the functions (e.g. change 1:3 in the code to something much larger), and different numbers of parallel processes, e.g.:

      export MC_CORES=2\n\nexport MC_CORES=8\n\nexport MC_CORES=16\n

      If you have separate functions then the above approach will provide a simple method for parallelising using the resources within a single node. However, if your functionality is more loop-based, then you may not wish to have to package this up into separate functions to parallelise.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/#parallelisation-with-foreach","title":"Parallelisation with foreach","text":"

      The foreach package can be used to parallelise loops as well as functions. Consider a loop of the following form:

      main_list <- c()\nfor (i in 1:3) {\nmain_list <- c(main_list, sqrt(i))\n}\n

      This can be converted to foreach functionality as follows:

      main_list <- c()\nlibrary(foreach)\nforeach(i=1:3) %do% {\nmain_list <- c(main_list, sqrt(i))\n}\n

      Whilst this approach does not significantly change the performance or functionality of the code, it does let us then exploit parallel functionality in foreach. The %do% can be replaced with a %dopar% which will execute the code in parallel.

      To test this out we\u2019re going to try an example using the randomForest library. We can now run the following code in R:

      library(foreach)\nlibrary(randomForest)\nx <- matrix(runif(50000), 1000)\ny <- gl(2, 500)\nrf <- foreach(ntree=rep(250, 4), .combine=combine) %do%\nrandomForest(x, y, ntree=ntree)\nprint(rf)\n

      Implement the above code and run with a system.time to see how long it takes. Once you have done this you can change the %do% to a %dopar% and re-run. Does this provide any performance benefits?

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/#parallelisation-with-doparallel","title":"Parallelisation with doParallel","text":"

      To exploit the parallelism with dopar we need to provide parallel execution functionality and configure it to use extra cores on the system. One method to do this is using the doParallel package.

      library(doParallel)\nregisterDoParallel(8)\n

      Does this now improve performance when running the randomForest example? Experiment with different numbers of workers by changing the number set in registerDoParallel(8) to see what kind of performance you can get. Note, you may also need to change the number of clusters used in the foreach, e.g. what is specified in the rep(250, 4) part of the code, to enable more than 4 different sets to be run at once if using more than 4 workers. The amount of parallel workers you can use is dependent on the hardware you have access to, the number of workers you specify when you setup your parallel backend, and the amount of chunks of work you have to distribute with your foreach configuration.

      "},{"location":"safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/#cluster-parallelism","title":"Cluster parallelism","text":"

      It is possible to use different parallel backends for foreach. The one we have used in the example above creates new worker processes to provide the parallelism, but you can also use larger numbers of workers through a parallel cluster, e.g.:

      my.cluster <- parallel::makeCluster(8)\nregisterDoParallel(cl = my.cluster)\n

      By default makeCluster creates a socket cluster, where each worker is a new independent process. This can enable running the same R program across a range of systems, as it works on Linux and Windows (and other clients). However, you can also fork the existing R process to create your new workers, e.g.:

      cl <-makeCluster(5, type=\"FORK\")\n

      This saves you from having to create the variables or objects that were setup in the R program/script prior to the creation of the cluster, as they are automatically copied to the workers when using this forking mode. However, it is limited to Linux style systems and cannot scale beyond a single node.

      Once you have finished using a parallel cluster you should shut it down to free up computational resources, using stopCluster, e.g.:

      stopCluster(cl)\n

      When using clusters without the forking approach, you need to distribute objects and variables from the main process to the workers using the clusterExport function, e.g.:

      library(parallel)\nvariableA <- 10\nvariableB <- 20\nmySum <- function(x) variableA + variableB + x\ncl <- makeCluster(4)\nres <- try(parSapply(cl=cl, 1:40, mySum))\n

      The program above will fail because variableA and variableB are not present on the cluster workers. Try the above on the SDF and see what result you get.

      To fix this issue you can modify the program using clusterExport to send variableA and variableB to the workers, prior to running the parSapply e.g.:

      clusterExport(cl=cl, c('variableA', 'variableB'))\n
      "},{"location":"services/","title":"EIDF Services","text":""},{"location":"services/#computing-services","title":"Computing Services","text":"

      Data Science Virtual Desktops

      Managed File Transfer

      Managed JupyterHub

      Cerebras CS-2

      Ultra2

      "},{"location":"services/#data-management-services","title":"Data Management Services","text":"

      Data Catalogue

      "},{"location":"services/cs2/","title":"Cerebras CS-2","text":"

      Get Access

      Running codes

      "},{"location":"services/cs2/access/","title":"Cerebras CS-2","text":""},{"location":"services/cs2/access/#getting-access","title":"Getting Access","text":"

      Access to the Cerebras CS-2 system is currently by arrangement with EPCC. Please email eidf@epcc.ed.ac.uk with a short description of the work you would like to perform.

      "},{"location":"services/cs2/run/","title":"Cerebras CS-2","text":""},{"location":"services/cs2/run/#introduction","title":"Introduction","text":"

      The Cerebras CS-2 system is attached to the SDF-CS1 (Ultra2) system which serves as a host, provides access to files, the SLURM batch system etc.

      "},{"location":"services/cs2/run/#login","title":"Login","text":"

      To login to the host system, use the username and password you obtain from SAFE, along with the SSH Key you registered when creating the account. You can then login directly to the host via: ssh <username>@sdf-cs1.epcc.ed.ac.uk

      "},{"location":"services/cs2/run/#running-jobs","title":"Running Jobs","text":"

      All jobs must be run via SLURM to avoid inconveniencing other users of the system. The csrun_cpu and csrun_wse scripts themselves contain calls to srun to work with the SLURM system, so note the omission of srun in the below examples. Users can either copy these files from /home/y26/shared/bin to their own home directory should they wish, or use the centrally supplied version. In either case, ensure they are in your PATH before execution, eg:

      export PATH=$PATH:/home/y26/shared/bin\n
      "},{"location":"services/cs2/run/#run-on-the-host","title":"Run on the host","text":"

      Jobs can be run on the host system (eg simulations, test scripts) using the csrun_cpu wrapper. Here is the example from the Cerebras documentation on PyTorch. Note that this assumes csrun_cpu is in your path.

      #!/bin/bash\n#SBATCH --job-name=Example        # Job name\n#SBATCH --cpus-per-task=2         # Request 2 cores\n#SBATCH --output=example_%j.log   # Standard output and error log\n#SBATCH --time=01:00:00           # Set time limit for this job to 1 hour\n\ncsrun_cpu python-pt run.py --mode train --compile_only --params configs/<name-of-the-params-file.yaml>\n
      "},{"location":"services/cs2/run/#run-on-the-cs-2","title":"Run on the CS-2","text":"

      The following will run the above PyTorch example on the CS-2 - note the --cs_ip argument with port number passed in via the command line, and the inclusion of the --gres option to request use of the CS-2 via SLURM.

      #!/bin/bash\n#SBATCH --job-name=Example        # Job name\n#SBATCH --tasks-per-node=8        # There is only one node on SDF-CS1\n#SBATCH --cpus-per-task=16        # Each cpu is a core\n#SBATCH --gres=cs:1               # Request CS-2 system\n#SBATCH --output=example_%j.log   # Standard output and error log\n#SBATCH --time=01:00:00           # Set time limit for this job to 1 hour\n\n\ncsrun_wse python-pt run.py --mode train --cs_ip 172.24.102.121:9000 --params configs/<name-of-the-params-file.yaml>\n
      "},{"location":"services/datacatalogue/","title":"EIDF Data Catalogue Information","text":"

      QuickStart

      Tutorial

      Documentation

      Metadata information

      "},{"location":"services/datacatalogue/docs/","title":"Service Documentation","text":""},{"location":"services/datacatalogue/docs/#metadata","title":"Metadata","text":"

      For more information on metadata, please read the following: Metadata

      "},{"location":"services/datacatalogue/docs/#online-support","title":"Online support","text":""},{"location":"services/datacatalogue/metadata/","title":"EIDF Metadata Information","text":""},{"location":"services/datacatalogue/metadata/#what-is-fair","title":"What is FAIR?","text":"

      FAIR stands for Findable, Accessible, Interoperable, and Reusable, and helps emphasise the best practices with publishing and sharing data (more details: FAIR Principles)

      "},{"location":"services/datacatalogue/metadata/#what-is-metadata","title":"What is metadata?","text":"

      Metadata is data about data, to help describe the dataset. Common metadata fields are things like the title of the dataset, who produced it, where it was generated (if relevant), when it was generated, and some key words describing it

      "},{"location":"services/datacatalogue/metadata/#what-is-ckan","title":"What is CKAN?","text":"

      CKAN is a metadata catalogue - i.e. it is a database for metadata rather than data. This will help with all aspects of FAIR:

      "},{"location":"services/datacatalogue/metadata/#what-metadata-will-we-need-to-provide","title":"What metadata will we need to provide?","text":""},{"location":"services/datacatalogue/metadata/#why-do-i-need-to-use-a-controlled-vocabulary","title":"Why do I need to use a controlled vocabulary?","text":"

      Using a standard vocabulary (such as the FAST Vocabulary) has many benefits:

      All of these advantages mean that we, as a project, don't need to think about this - there is no need to reinvent the wheel when other institutes (e.g. National Libraries) have created. You might recognise WorldCat - it is an organisation which manages a global catalogue of ~18000 libraries world-wide, so they are in a good position to generate a comprehensive vocabulary of academic topics!

      "},{"location":"services/datacatalogue/metadata/#what-about-licensing-what-does-cc-by-sa-40-mean","title":"What about licensing? (What does CC-BY-SA 4.0 mean?)","text":"

      The R in FAIR stands for reusable - more specifically it includes this subphrase: \"(Meta)data are released with a clear and accessible data usage license\". This means that we have to tell anyone else who uses the data what they're allowed to do with it - and, under the FAIR philosophy, more freedom is better.

      CC-BY-SA 4.0 allows anyone to remix, adapt, and build upon your work (even for commercial purposes), as long as they credit you and license their new creations under the identical terms. It also explicitly includes Sui Generis Database Rights, giving rights to the curation of a database even if you don't have the rights to the items in a database (e.g. a Spotify playlist, even though you don't own the rights to each track).

      Human readable summary: Creative Commons 4.0 Human Readable Full legal code: Creative Commons 4.0 Legal Code

      "},{"location":"services/datacatalogue/metadata/#im-stuck-how-do-i-get-help","title":"I'm stuck! How do I get help?","text":"

      Contact the EIDF Service Team via eidf@epcc.ed.ac.uk

      "},{"location":"services/datacatalogue/quickstart/","title":"Quickstart","text":""},{"location":"services/datacatalogue/quickstart/#accessing","title":"Accessing","text":""},{"location":"services/datacatalogue/quickstart/#first-task","title":"First Task","text":""},{"location":"services/datacatalogue/quickstart/#further-information","title":"Further information","text":""},{"location":"services/datacatalogue/tutorial/","title":"Tutorial","text":""},{"location":"services/datacatalogue/tutorial/#first-query","title":"First Query","text":""},{"location":"services/gpuservice/","title":"Overview","text":"

      The EIDF GPU Service (EIDFGPUS) uses Nvidia A100 GPUs as accelerators.

      Full Nvidia A100 GPUs are connected to 40GB of dynamic memory.

      Multi-instance usage (MIG) GPUs allow multiple tasks or users to share the same GPU (similar to CPU threading).

      There are two types of MIG GPUs inside the EIDFGPUS the Nvidia A100 3G.20GB GPUs and the Nvidia A100 1G.5GB GPUs which equate to ~1/2 and ~1/7 of a full Nvidia A100 40 GB GPU.

      The current specification of the EIDFGPUS is:

      The EIDFGPUS is managed using Kubernetes, with up to 8 GPUs being on a single node.

      "},{"location":"services/gpuservice/#service-access","title":"Service Access","text":"

      Users should have an EIDF account - EIDF Accounts.

      Project Leads will be able to have access to the EIDFGPUS added to their project during the project application process or through a request to the EIDF helpdesk.

      Each project will be given a namespace to operate in and a kubeconfig file in a Virtual Machine on the EIDF DSC - information on access to VMs is available here.

      "},{"location":"services/gpuservice/#project-quotas","title":"Project Quotas","text":"

      A standard project namespace has the following initial quota (subject to ongoing review):

      Note these quotas are maximum use by a single project, and that during periods of high usage Kubernetes Jobs maybe queued waiting for resource to become available on the cluster.

      "},{"location":"services/gpuservice/#additional-service-policy-information","title":"Additional Service Policy Information","text":"

      Additional information on service policies can be found here.

      "},{"location":"services/gpuservice/#eidf-gpu-service-tutorial","title":"EIDF GPU Service Tutorial","text":"

      This tutorial teaches users how to submit tasks to the EIDFGPUS, but it is not a comprehensive overview of Kubernetes.

      Lesson Objective Getting started with Kubernetes a. What is Kubernetes?b. How to send a task to a GPU node.c. How to define the GPU resources needed. Requesting persistent volumes with Kubernetes a. What is a persistent volume? b. How to request a PV resource. Running a PyTorch task a. Accessing a Pytorch container.b. Submitting a PyTorch task to the cluster.c. Inspecting the results."},{"location":"services/gpuservice/#further-reading-and-help","title":"Further Reading and Help","text":""},{"location":"services/gpuservice/policies/","title":"GPU Service Policies","text":""},{"location":"services/gpuservice/policies/#namespaces","title":"Namespaces","text":"

      Each project will be given a namespace which will have an applied quota.

      Default Quota:

      "},{"location":"services/gpuservice/policies/#kubeconfig","title":"Kubeconfig","text":"

      Each project will be assigned a kubeconfig file for access to the service which will allow operation in the assigned namespace and access to exposed service operators, for example the GPU and CephRBD operators.

      "},{"location":"services/gpuservice/policies/#kubernetes-job-time-to-live","title":"Kubernetes Job Time to Live","text":"

      All Kubernetes Jobs submitted to the service will have a Time to Live (TTL) applied via \"spec.ttlSecondsAfterFinished\" automatically. The default TTL for jobs using the service will be 1 week (604800 seconds). A completed job (in success or error state) will be deleted from the service once one week has elapsed after execution has completed. This will reduce excessive object accumulation on the service.

      Note: This policy is automated and does not require users to change their job specifications.

      "},{"location":"services/gpuservice/training/L1_getting_started/","title":"Getting started with Kubernetes","text":""},{"location":"services/gpuservice/training/L1_getting_started/#introduction","title":"Introduction","text":"

      Kubernetes (K8s) is a systems administration tool originally developed by Google to orchestrate the deployment, scaling, and management of containerised applications.

      Nvidia have created drivers to officially support clusters of Nvidia GPUs managed by K8s.

      Using K8s to manage the EIDFGPUS provides two key advantages:

      "},{"location":"services/gpuservice/training/L1_getting_started/#interacting-with-a-k8s-cluster","title":"Interacting with a K8s cluster","text":"

      An overview of the key components of a K8s container can be seen on the Kubernetes docs website.

      The primary component of a K8s cluster is a pod.

      A pod is a set of one or more containers (and their storage volumes) that share resources.

      Users define the resource requirements of a pod (i.e. number/type of GPU) and the containers to be ran in the pod by writing a yaml file.

      The pod definition yaml file is sent to the cluster using the K8s API and is assigned to an appropriate node to be ran.

      A node is a unit of the cluster, e.g. a group of GPUs or virtual GPUs.

      Multiple pods can be defined and maintained using several different methods depending on purpose: deployments, services and jobs; see the K8s docs for more details.

      Users interact with the K8s API using the kubectl (short for kubernetes control) commands. Some of the kubectl commands are restricted on the EIDF cluster in order to ensure project details are not shared across namespaces. Useful commands are:

      "},{"location":"services/gpuservice/training/L1_getting_started/#creating-your-first-pod","title":"Creating your first pod","text":"

      Nvidia have several prebuilt docker images to perform different tasks on their GPU hardware.

      The list of docker images is available on their website.

      This example uses their CUDA sample code simulating nbody interactions.

      1. Open an editor of your choice and create the file test_NBody.yml
      2. Copy the following in to the file:

      The pod resources are defined with the requests and limits tags.

      Resources defined in the requests tags are the minimum possible resources required for the pod to run.

      If a pod is assigned to an unused node then it may use resources beyond those requested.

      This may allow the task within the pod to run faster, but it also runs the risk of unnecessarily blocking off resources for future pod requests.

      The limits tag specifies the maximum resources that can be assigned to a pod.

      The EIDFGPUS cluster requires all pods to have requests and limits tags for cpu and memory resources in order to be accepted.

      Finally, it optional to define GPU resources but only the limits tag is used to specify the use of a GPU, limits: nvidia.com/gpu: 1.

      apiVersion: v1\nkind: Pod\nmetadata:\ngenerateName: first-pod-\nspec:\nrestartPolicy: OnFailure\ncontainers:\n- name: cudasample\nimage: nvcr.io/nvidia/k8s/cuda-sample:nbody-cuda11.7.1\nargs: [\"-benchmark\", \"-numbodies=512000\", \"-fp64\", \"-fullscreen\"]\nresources:\nrequests:\ncpu: 2\nmemory: \"1Gi\"\nlimits:\ncpu: 4\nmemory: \"4Gi\"\nnvidia.com/gpu: 1\n
      1. Save the file and exit the editor
      2. Run `kubectl create -f test_NBody.yml'
      3. This will output something like:

        pod/first-pod-7gdtb created\n
      4. Run kubectl get pods

      5. This will output something like:

        pi-tt9kq                                                          0/1     Completed   0              24h\nfirst-pod-24n7n                                                   0/1     Completed   0              24h\nfirst-pod-2j5tc                                                   0/1     Completed   0              24h\nfirst-pod-2kjbx                                                   0/1     Completed   0              24h\nsample-2mnvg                                                      0/1     Completed   0              24h\nsample-4sng2                                                      0/1     Completed   0              24h\nsample-5h6sr                                                      0/1     Completed   0              24h\nsample-6bqql                                                      0/1     Completed   0              24h\nfirst-pod-7gdtb                                                   0/1     Completed   0              39s\nsample-8dnht                                                      0/1     Completed   0              24h\nsample-8pxz4                                                      0/1     Completed   0              24h\nsample-bphjx                                                      0/1     Completed   0              24h\nsample-cp97f                                                      0/1     Completed   0              24h\nsample-gcbbb                                                      0/1     Completed   0              24h\nsample-hdlrr                                                      0/1     Completed   0              24h\n
      6. View the logs of the pod you ran kubectl logs first-pod-7gdtb

      7. This will output something like:

        Run \"nbody -benchmark [-numbodies=<numBodies>]\" to measure performance.\n    -fullscreen       (run n-body simulation in fullscreen mode)\n-fp64             (use double precision floating point values for simulation)\n-hostmem          (stores simulation data in host memory)\n-benchmark        (run benchmark to measure performance)\n-numbodies=<N>    (number of bodies (>= 1) to run in simulation)\n-device=<d>       (where d=0,1,2.... for the CUDA device to use)\n-numdevices=<i>   (where i=(number of CUDA devices > 0) to use for simulation)\n-compare          (compares simulation results running once on the default GPU and once on the CPU)\n-cpu              (run n-body simulation on the CPU)\n-tipsy=<file.bin> (load a tipsy model file for simulation)\n\nNOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.\n\n> Fullscreen mode\n> Simulation data stored in video memory\n> Double precision floating point simulation\n> 1 Devices used for simulation\nGPU Device 0: \"Ampere\" with compute capability 8.0\n\n> Compute 8.0 CUDA device: [NVIDIA A100-SXM4-40GB]\nnumber of bodies = 512000\n512000 bodies, total time for 10 iterations: 10570.778 ms\n= 247.989 billion interactions per second\n= 7439.679 double-precision GFLOP/s at 30 flops per interaction\n
      8. delete your pod with kubectl delete pod first-pod-7gdtb

      "},{"location":"services/gpuservice/training/L1_getting_started/#specifying-gpu-requirements","title":"Specifying GPU requirements","text":"

      If you create multiple pods with the same yaml file and compare their log files you may notice the CUDA device may differ from Compute 8.0 CUDA device: [NVIDIA A100-SXM4-40GB].

      This is because K8s is allocating the pod to any free node irrespective of whether that node contains a full 80GB Nvida A100 or a GPU from a MIG Nvida A100.

      The GPU resource request can be more specific by adding the type of product the pod is requesting to the node selector:

      "},{"location":"services/gpuservice/training/L1_getting_started/#example-yaml-file","title":"Example yaml file","text":"
      apiVersion: v1\nkind: Pod\nmetadata:\ngenerateName: first-pod-\nspec:\nrestartPolicy: OnFailure\ncontainers:\n- name: cudasample\nimage: nvcr.io/nvidia/k8s/cuda-sample:nbody-cuda11.7.1\nargs: [\"-benchmark\", \"-numbodies=512000\", \"-fp64\", \"-fullscreen\"]\nresources:\nrequests:\ncpu: 2\nmemory: \"1Gi\"\nlimits:\ncpu: 4\nmemory: \"4Gi\"\nnvidia.com/gpu: 1\nnodeSelector:\nnvidia.com/gpu.product: NVIDIA-A100-SXM4-40GB-MIG-1g.5gb\n
      "},{"location":"services/gpuservice/training/L2_requesting_persistent_volumes/","title":"Requesting Persistent Volumes With Kubernetes","text":"

      Pods in the K8s EIDFGPUS are intentionally ephemeral.

      They only last as long as required to complete the task that they were created for.

      Keeping pods ephemeral ensures the cluster resources are released for other users to request.

      However, this means the default storage volumes within a pod are temporary.

      If multiple pods require access to the same large data set or they output large files, then computationally costly file transfers need to be included in every pod instance.

      Instead, K8s allows you to request persistent volumes that can be mounted to multiple pods to share files or collate outputs.

      These persistent volumes will remain even if the pods it is mounted to are deleted, are updated or crash.

      "},{"location":"services/gpuservice/training/L2_requesting_persistent_volumes/#submitting-a-persistent-volume-claim","title":"Submitting a Persistent Volume Claim","text":"

      Before a persistent volume can be mounted to a pod, the required storage resources need to be requested and reserved to your namespace.

      A PersistentVolumeClaim (PVC) needs to be submitted to K8s to request the storage resources.

      The storage resources are held on a Ceph server which can accept requests up 100 TiB. Currently, each PVC can only be accessed by one pod at a time, this limitation is being addressed in further development of the EIDFGPUS. This means at this stage, pods can mount the same PVC in sequence, but not concurrently.

      Example PVCs can be seen on the Kubernetes documentation page.

      All PVCs on the EIDFGPUS must use the csi-rbd-sc storage class.

      "},{"location":"services/gpuservice/training/L2_requesting_persistent_volumes/#example-persistentvolumeclaim","title":"Example PersistentVolumeClaim","text":"
      kind: PersistentVolumeClaim\napiVersion: v1\nmetadata:\nname: test-ceph-pvc\nspec:\naccessModes:\n- ReadWriteOnce\nresources:\nrequests:\nstorage: 2Gi\nstorageClassName: csi-rbd-sc\n

      You create a persistent volume by passing the yaml file to kubectl like a pod specification yaml kubectl create <PV specification yaml> Once you have successfully created a persistent volume you can interact with it using the standard kubectl commands:

      "},{"location":"services/gpuservice/training/L2_requesting_persistent_volumes/#mounting-a-persistent-volume-to-a-pod","title":"Mounting a persistent Volume to a Pod","text":"

      Introducing a persistent volume to a pod requires the addition of a volumeMount option to the container and a volume option linking to the PVC in the pod specification yaml.

      "},{"location":"services/gpuservice/training/L2_requesting_persistent_volumes/#example-pod-specification-yaml-with-mounted-persistent-volume","title":"Example pod specification yaml with mounted persistent volume","text":"
      apiVersion: v1\nkind: Pod\nmetadata:\nname: test-ceph-pvc-pod\nspec:\ncontainers:\n- name: trial\nimage: busybox\ncommand: [\"sleep\", \"infinity\"]\nresources:\nrequests:\ncpu: 1\nmemory: \"1Gi\"\nlimits:\ncpu: 1\nmemory: \"1Gi\"\nvolumeMounts:\n- mountPath: /mnt/ceph_rbd\nname: volume\nvolumes:\n- name: volume\npersistentVolumeClaim:\nclaimName: test-ceph-pvc\n
      "},{"location":"services/gpuservice/training/L2_requesting_persistent_volumes/#accessing-the-persistent-volume-outside-a-pod","title":"Accessing the persistent volume outside a pod","text":"

      To move files in/out of the persistent volume from outside a pod you can use the kubectl cp command.

      *** On Login Node ***\nkubectl cp /home/data/test_data.csv test-ceph-pvc-pod:/mnt/ceph_rbd\n

      For more complex file transfers and synchronisation, create a low resource pod with the persistent volume mounted.

      The bash command rsync can be amended to manage file transfers into the mounted PV following this GitHub repo.

      "},{"location":"services/gpuservice/training/L2_requesting_persistent_volumes/#clean-up","title":"Clean up","text":"
      kubectl delete pod test-ceph-pvc-pod\n\nkubectl delete pvc test-ceph-pvc\n
      "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/","title":"Running a PyTorch task","text":"

      In the following lesson, we\u2019ll build a NLP neural network and train it using the EIDFGPUS.

      The model was taken from the PyTorch Tutorials.

      The lesson will be split into three parts:

      "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#load-training-data-and-ml-code-into-a-persistent-volume","title":"Load training data and ML code into a persistent volume","text":""},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#create-a-persistent-volume","title":"Create a persistent volume","text":"

      Request memory from the Ceph server by submitting a PVC to K8s (example pvc spec yaml below).

      kubectl create -f <pvc-spec-yaml>\n
      "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#example-pytorch-persistentvolumeclaim","title":"Example PyTorch PersistentVolumeClaim","text":"
      kind: PersistentVolumeClaim\napiVersion: v1\nmetadata:\nname: pytorch-pvc\nspec:\naccessModes:\n- ReadWriteOnce\nresources:\nrequests:\nstorage: 2Gi\nstorageClassName: csi-rbd-sc\n
      "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#transfer-codedata-to-persistent-volume","title":"Transfer code/data to persistent volume","text":"
      1. Check PVC has been created

        kubectl get pvc <pv-name>\n
      2. Create a lightweight pod with PV mounted (example pod below)

        kubectl create -f lightweight-pod.yaml\n
      3. Download the pytorch code

        wget https://github.com/EPCCed/eidf-docs/raw/main/docs/services/gpuservice/training/resources/example_pytorch_code.py\n
      4. Copy python script into the PV

        kubectl cp example_pytorch_code.py lightweight-pod:/mnt/ceph_rbd/\n
      5. Check files were transferred successfully

        kubectl exec lightweight-pod -- ls /mnt/ceph_rbd\n
      6. Delete lightweight pod

        kubectl delete pod lightweight-pod\n
      "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#example-lightweight-pod-specification","title":"Example lightweight pod specification","text":"
      apiVersion: v1\nkind: Pod\nmetadata:\nname: lightweight-pod\nspec:\ncontainers:\n- name: data-loader\nimage: busybox\ncommand: [\"sleep\", \"infinity\"]\nresources:\nrequests:\ncpu: 1\nmemory: \"1Gi\"\nlimits:\ncpu: 1\nmemory: \"1Gi\"\nvolumeMounts:\n- mountPath: /mnt/ceph_rbd\nname: volume\nvolumes:\n- name: volume\npersistentVolumeClaim:\nclaimName: pytorch-pvc\n
      "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#creating-a-pod-with-a-pytorch-container","title":"Creating a pod with a PyTorch container","text":"

      We will use the pre-made PyTorch Docker image available on Docker Hub to run the PyTorch ML model.

      The PyTorch container will be held within a pod that has the persistent volume mounted and access a MIG GPU.

      Submit the specification file to K8s to create the pod.

      kubectl create -f <pytorch-pod-yaml>\n
      "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#example-pytorch-pod-specification-file","title":"Example PyTorch Pod Specification File","text":"
      apiVersion: v1\nkind: Pod\nmetadata:\nname: pytorch-pod\nspec:\nrestartPolicy: Never\ncontainers:\n- name: pytorch-con\nimage: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel\ncommand: [\"python3\"]\nargs: [\"/mnt/ceph_rbd/example_pytorch_code.py\"]\nvolumeMounts:\n- mountPath: /mnt/ceph_rbd\nname: volume\nresources:\nrequests:\ncpu: 2\nmemory: \"1Gi\"\nlimits:\ncpu: 4\nmemory: \"4Gi\"\nnvidia.com/gpu: 1\nnodeSelector:\nnvidia.com/gpu.product: NVIDIA-A100-SXM4-40GB-MIG-1g.5gb\nvolumes:\n- name: volume\npersistentVolumeClaim:\nclaimName: pytorch-pvc\n
      "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#reviewing-the-results-of-the-pytorch-model","title":"Reviewing the results of the PyTorch model","text":"

      This is not intended to be an introduction to PyTorch, please see the online tutorial for details about the model.

      1. Check model ran to completion

        kubectl logs <pytorch-pod-name>\n
      2. Spin up lightweight pod to retrieve results

        kubectl create -f lightweight-pod.yaml\n
      3. Copy trained model back to the head node

        kubectl cp lightweight-pod:mnt/ceph_rbd/model.pth model.pth\n
      "},{"location":"services/gpuservice/training/L3_running_a_pytorch_task/#clean-up","title":"Clean up","text":"
      kubectl delete pod pytorch-pod\n\nkubectl delete pv pytorch-pvc\n
      "},{"location":"services/gpuservice/training/L4_template_workflow/","title":"Template workflow","text":""},{"location":"services/jhub/","title":"EIDF Jupyterhub","text":"

      QuickStart

      Tutorial

      Documentation

      "},{"location":"services/jhub/docs/","title":"Service Documentation","text":""},{"location":"services/jhub/docs/#online-support","title":"Online support","text":""},{"location":"services/jhub/quickstart/","title":"Quickstart","text":""},{"location":"services/jhub/quickstart/#accessing","title":"Accessing","text":""},{"location":"services/jhub/quickstart/#first-task","title":"First Task","text":""},{"location":"services/jhub/quickstart/#further-information","title":"Further information","text":""},{"location":"services/jhub/tutorial/","title":"Tutorial","text":""},{"location":"services/jhub/tutorial/#first-notebook","title":"First notebook","text":""},{"location":"services/mft/","title":"MFT","text":""},{"location":"services/mft/quickstart/","title":"Managed File Transfer","text":""},{"location":"services/mft/quickstart/#getting-to-the-mft","title":"Getting to the MFT","text":"

      The EIDF MFT can be accessed at https://eidf-mft.epcc.ed.ac.uk

      "},{"location":"services/mft/quickstart/#how-it-works","title":"How it works","text":"

      The MFT provides a 'drop' zone for the project. All users in a given project will have access to the same shared transfer area. They will have the ability to upload, download, and delete files from the project's transfer area. This area is linked to a directory within the projects space on the shared backend storage.

      Files which are uploaded are owned by the Linux user 'nobody' and the group ID of whatever project the file is being uploaded to. They have the permissions: Owner = rw Group = r Others = r

      Once the file is opened on the VM, the user that opened it will become the owner and they can make further changes.

      "},{"location":"services/mft/quickstart/#gaining-access-to-the-mft","title":"Gaining access to the MFT","text":"

      By default a project won't have access to the MFT, this has to be enabled. Currently this can be done by the PI sending a request to the EIDF Helpdesk. Once the project is enabled within the MFT, every user with the project will be able to log into the MFT using their usual EIDF credentials.

      "},{"location":"services/mft/sftp/","title":"SFTP","text":"

      Coming Soon

      "},{"location":"services/mft/using-the-mft/","title":"Using the MFT Web Portal","text":""},{"location":"services/mft/using-the-mft/#logging-in","title":"Logging in","text":"

      When you reach the MFT home page you can log in using your usual VM project credentials.

      You will then be asked what type of session you would like to start. Select New Web Client or Web Client and continue.

      "},{"location":"services/mft/using-the-mft/#file-ingress","title":"File Ingress","text":"

      Once logged in, all files currently in the projects transfer directory will be displayed. Click the 'Upload' button under the 'Home' title to open the dialogue for file upload. You can then drag and drop files in, or click 'Browse' to find them locally.

      Once uploaded, the file will be immediately accessible from the project area, and can be used within any EIDF service which has the filesystem mounted.

      "},{"location":"services/mft/using-the-mft/#file-egress","title":"File Egress","text":"

      File egress can be done in the reverse way. By placing the file into the project transfer directory, it will become available in the MFT portal.

      "},{"location":"services/mft/using-the-mft/#file-management","title":"File Management","text":"

      Directories can be created within the project transfer directory, for example with 'Import' and 'Export' to allow for better file management. Files deleted from either the MFT portal or from the VM itself will remove it from the other, as both locations point at the same file. It's only stored in one place, so modifications made from either place will remove the file.

      "},{"location":"services/rstudioserver/","title":"EIDF R Studio Server","text":"

      QuickStart

      Tutorial

      Documentation

      "},{"location":"services/rstudioserver/docs/","title":"Service Documentation","text":""},{"location":"services/rstudioserver/docs/#online-support","title":"Online support","text":""},{"location":"services/rstudioserver/quickstart/","title":"Quickstart","text":""},{"location":"services/rstudioserver/quickstart/#accessing","title":"Accessing","text":""},{"location":"services/rstudioserver/quickstart/#first-task","title":"First Task","text":""},{"location":"services/rstudioserver/quickstart/#creating-a-new-r-script","title":"Creating a New R Script","text":"

      Your RStudio Server session has been initialised now. If you are participating in a workshop, then all the packages and data required for the workshop have been loaded into the workspace. All that remains is to create a new R script to contain your code!

      1. In the RStudio Server UI, open the File menu item at the far left of the main menu bar at the top of the page
      2. Hover over the \u2018New File\u2019 sub-menu item, then select \u2018R Script\u2019 from the expanded menu
      3. A new window pane will appear in the UI as shown below, and you are now ready to start adding the R code to your script! RStudio Server UI screen with new script
      "},{"location":"services/rstudioserver/quickstart/#further-information","title":"Further information","text":""},{"location":"services/rstudioserver/tutorial/","title":"Tutorial","text":""},{"location":"services/rstudioserver/tutorial/#first-notebook","title":"First notebook","text":""},{"location":"services/ultra2/","title":"Ultra2 Large Memory System","text":"

      Get Access

      Running codes

      "},{"location":"services/ultra2/access/","title":"Ultra2 Large Memory System","text":""},{"location":"services/ultra2/access/#getting-access","title":"Getting Access","text":"

      Access to the Ultra2 system (also referred to as the SDF-CS1 system) is currently by arrangement with EPCC. Please email eidf@epcc.ed.ac.uk with a short description of the work you would like to perform.

      "},{"location":"services/ultra2/run/","title":"Ultra2 High Memory System","text":""},{"location":"services/ultra2/run/#introduction","title":"Introduction","text":"

      The Ultra2 system (also called the SDF-CS1) system, is a single logical CPU system based at EPCC. It is suitable for running jobs which require large volumes of non-distributed memory (as opposed to a cluster).

      "},{"location":"services/ultra2/run/#specifications","title":"Specifications","text":"

      The system is a HPE SuperDome Flex containing 576 individual cores in a SMT-1 arrangement (1 thread per core). The system has 18TB of memory available to users. Home directories are network mounted from the EIDF e1000 Lustre filesystem, although some local NVMe storage is available for temporary file storage during runs.

      "},{"location":"services/ultra2/run/#login","title":"Login","text":"

      To login to the host system, use the username and password you obtain from SAFE, along with the SSH Key you registered when creating the account. You can then login directly to the host via: ssh <username>@sdf-cs1.epcc.ed.ac.uk

      "},{"location":"services/ultra2/run/#software","title":"Software","text":"

      The primary software provided is Intel's OneAPI suite containing mpi compilers and runtimes, debuggers and the vTune performance analyser. Standard GNU compilers are also available. The OneAPI suite can be loaded by sourcing the shell script:

      source  /opt/intel/oneapi/setvars.sh\n
      "},{"location":"services/ultra2/run/#running-jobs","title":"Running Jobs","text":"

      All jobs must be run via SLURM to avoid inconveniencing other users of the system. Users should not run jobs directly. Note that the system has one logical processor with a large number of threads and thus appears to SLURM as a single node. This is intentional.

      "},{"location":"services/ultra2/run/#queue-limits","title":"Queue limits","text":"

      We kindly request that users limit their maximum total running job size to 288 cores and 4TB of memory, whether that be a divided into a single job, or a number of jobs. This may be enforced via SLURM in the future.

      "},{"location":"services/ultra2/run/#mpi-jobs","title":"MPI jobs","text":"

      An example script to run a multi-process MPI \"Hello world\" example is shown.

      #!/usr/bin/env bash\n#SBATCH -J HelloWorld\n#SBATCH --nodes=1\n#SBATCH --tasks-per-node=4\n#SBATCH --nodelist=sdf-cs1\n#SBATCH --partition=standard\n##SBATCH --exclusive\n\n\necho \"Running on host ${HOSTNAME}\"\necho \"Using ${SLURM_NTASKS_PER_NODE} tasks per node\"\necho \"Using ${SLURM_CPUS_PER_TASK} cpus per task\"\nlet mpi_threads=${SLURM_NTASKS_PER_NODE}*${SLURM_CPUS_PER_TASK}\necho \"Using ${mpi_threads} MPI threads\"\n\n# Source oneAPI to ensure mpirun available\nif [[ -z \"${SETVARS_COMPLETED}\" ]]; then\nsource /opt/intel/oneapi/setvars.sh\nfi\n\n# mpirun invocation for Intel suite.\nmpirun -n ${mpi_threads} ./helloworld.exe\n
      "},{"location":"services/virtualmachines/docs/","title":"Service Documentation","text":""},{"location":"services/virtualmachines/docs/#project-management-guide","title":"Project Management Guide","text":""},{"location":"services/virtualmachines/docs/#required-member-permissions","title":"Required Member Permissions","text":"

      VMs and user accounts can only be managed by project members with Cloud Admin permissions. This includes the principal investigator (PI) of the project and all project managers (PM). Through SAFE the PI can designate project managers and the PI and PMs can grant a project member the Cloud Admin role:

      1. Click \"Manage Project in SAFE\" at the bottom of the project page (opens a new tab)
      2. On the project management page in SAFE, scroll down to \"Manage Members\"
      3. Click Add project manager or Set member permissions

      For details please refer to the SAFE documentation: How can I designate a user as a project manager?

      "},{"location":"services/virtualmachines/docs/#create-a-vm","title":"Create a VM","text":"

      To create a new VM:

      1. Select the project from the list of your projects, e.g. eidfxxx
      2. Click on the 'New Machine' button
      3. Complete the 'Create Machine' form as follows:

        1. Provide an appropriate name, e.g. dev-01. The project code will be prepended automatically to your VM name, in this case your VM would be named eidfxxx-dev-01.
        2. Select a suitable operating system
        3. Select a machine specification that is suitable
        4. Choose the required disk size (in GB) or leave blank for the default
        5. Tick the checkbox \"Configure RDP access\" if you would like to install RDP and configure VDI connections via RDP for your VM.
        6. Select the package installations from the software catalogue drop-down list, or \"None\" if you don't require any pre-installed packages
      4. Click on 'Create'

      5. You should see the new VM listed under the 'Machines' table on the project page and the status as 'Creating'
      6. Wait while the job to launch the VM completes. This may take up to 10 minutes, depending on the configuration you requested. You have to reload the page to see updates.
      7. Once the job has completed successfully the status shows as 'Active' in the list of machines.

      You may wish to ensure that the machine size selected (number of CPUs and RAM) does not exceed your remaining quota before you press Create, otherwise the request will fail.

      In the list of 'Machines' in the project page in the portal, click on the name of new VM to see the configuration and properties, including the machine specification, its 10.24.*.* IP address and any configured VDI connections.

      "},{"location":"services/virtualmachines/docs/#quota-and-usage","title":"Quota and Usage","text":"

      Each project has a quota for the number of instances, total number of vCPUs, total RAM and storage. You will not be able to create a VM if it exceeds the quota.

      You can view and refresh the project usage compared to the quota in a table near the bottom of the project page. This table will be updated automatically when VMs are created or removed, and you can refresh it manually by pressing the \"Refresh\" button at the top of the table.

      Please contact the helpdesk if your quota requirements have changed.

      "},{"location":"services/virtualmachines/docs/#add-a-user-account","title":"Add a user account","text":"

      User accounts allow project members to log in to the VMs in a project. The Project PI and project managers manage user accounts for each member of the project. Users usually use one account (username and password) to log in to all the VMs in the same project that they can access, however a user may have multiple accounts in a project, for example for different roles.

      1. From the project page in the portal click on the 'Create account' button under the 'Project Accounts' table at the bottom
      2. Complete the 'Create User Account' form as follows:

        1. Choose 'Account user name': this could be something sensible like the first and last names concatenated (or initials) together with the project name. The username is unique across all EPCC systems so the user will not be able to reuse this name in another project once it has been assigned.
        2. Select the project member from the 'Account owner' drop-down field
        3. Click 'Create'

      The new account is allocated a temporary password which the account owner can view in their account details.

      "},{"location":"services/virtualmachines/docs/#adding-access-to-the-vm-for-a-user","title":"Adding Access to the VM for a User","text":"

      User accounts can be granted or denied access to existing VMs.

      1. Click 'Manage' next to an existing user account in the 'Project Accounts' table on the project page, or click on the account name and then 'Manage' on the account details page
      2. Select the checkboxes in the column \"Access\" for the VMs to which this account should have access or uncheck the ones without access
      3. Click the 'Update' button
      4. After a few minutes, the job to give them access to the selected VMs will complete and the account status will show as \"Active\".

      If a user is logged in already to the VDI at https://eidf-vdi.epcc.ed.ac.uk/vdi newly added connections may not appear in their connections list immediately. They must log out and log in again to refresh the connection information, or wait until the login token expires and is refreshed automatically - this might take a while.

      If a user only has one connection available in the VDI they will be automatically directed to the VM with the default connection.

      "},{"location":"services/virtualmachines/docs/#sudo-permissions","title":"Sudo permissions","text":"

      A project manager or PI may also grant sudo permissions to users on selected VMs. Management of sudo permissions must be requested in the project application - if it was not requested or the request was denied the functionality described below is not available.

      1. Click 'Manage' next to an existing user account in the 'Project Accounts' table on the project page
      2. Select the checkboxes in the column \"Sudo\" for the VMs on which this account is granted sudo permissions or uncheck to remove permissions
      3. Make sure \"Access\" is also selected for the sudo VMs to allow login
      4. Click the 'Update' button

      After a few minutes, the job to give the user account sudo permissions on the selected VMs will complete. On the account detail page a \"sudo\" badge will appear next to the selected VMs.

      Please contact the helpdesk if sudo permission management is required but is not available in your project.

      "},{"location":"services/virtualmachines/docs/#first-login","title":"First login","text":"

      A new user account is allocated a temporary password which the user must reset before they can log in for the first time. The password reset will not work when logging in via RDP - they must use a SSH connection, either in the VDI or via an SSH gateway.

      The user can view the temporary password in their account details page.

      "},{"location":"services/virtualmachines/docs/#updating-an-existing-machine","title":"Updating an existing machine","text":""},{"location":"services/virtualmachines/docs/#adding-rdp-access","title":"Adding RDP Access","text":"

      If you did not select RDP access when you created the VM you can add it later:

      1. Open the VM details page by selecting the name on the project page
      2. Click on 'Configure RDP'
      3. The configuration job runs for a few minutes.

      Once the RDP job is completed, all users that are allowed to access the VM will also be permitted to use the RDP connection.

      "},{"location":"services/virtualmachines/docs/#software-catalogue","title":"Software catalogue","text":"

      You can install packages from the software catalogue at a later time, even if you didn't select a package when first creating the machine.

      1. Open the VM details page by selecting the name on the project page
      2. Click on 'Software Catalogue'
      3. Select the configuration you wish to install and press 'Submit'
      4. The configuration job runs for a few minutes.
      "},{"location":"services/virtualmachines/flavours/","title":"Flavours","text":"

      These are the current Virtual Machine (VM) flavours (configurations) available on the the Virtual Desktop cloud service. Note that all VMs are built and configured using the EIDF Portal by PIs/Cloud Admins of projects, except GPU flavours which must be requested via the helpdesk or the support request form.

      Flavour Name vCPUs DRAM in GB Pinned Cores GPU general.v2.tiny 1 2 No No general.v2.small 2 4 No No general.v2.medium 4 8 No No general.v2.large 8 16 No No general.v2.xlarge 16 32 No No capability.v2.8cpu 8 112 Yes No capability.v2.16cpu 16 224 Yes No capability.v2.32cpu 32 448 Yes No capability.v2.48cpu 48 672 Yes No capability.v2.64cpu 64 896 Yes No gpu.v1.8cpu 8 128 Yes Yes gpu.v1.16cpu 16 256 Yes Yes gpu.v1.32cpu 32 512 Yes Yes gpu.v1.48cpu 48 768 Yes Yes"},{"location":"services/virtualmachines/policies/","title":"EIDF Data Science Cloud Policies","text":""},{"location":"services/virtualmachines/policies/#end-of-life-policy-for-user-accounts-and-projects","title":"End of Life Policy for User Accounts and Projects","text":""},{"location":"services/virtualmachines/policies/#what-happens-when-an-account-or-project-is-no-longer-required-or-a-user-leaves-a-project","title":"What happens when an account or project is no longer required, or a user leaves a project","text":"

      These situations are most likely to come about during one of the following scenarios:

      1. The retirement of project (usually one month after project end)
      2. A Principal Investigator (PI) tidying up a project requesting the removal of user(s) no longer working on the project
      3. A user wishing their own account to be removed
      4. A failure by a user to respond to the annual request to verify their email address held in the SAFE

      For each user account involved, assuming the relevant consent is given, the next step can be summarised as one of the following actions:

      It will be possible to have the account re-activated up until resources are removed (as outlined above); after this time it will be necessary to re-apply.

      A user's right to use EIDF is granted by a project. Our policy is to treat the account and associated data as the property of the PI as the owner of the project and its resources. It is the user's responsibility to ensure that any data they store on the EIDF DSC is handled appropriately and to copy off anything that they wish to keep to an appropriate location.

      A project manager or the PI can revoke a user's access accounts within their project at any time, by locking, removing or re-owning the account as appropriate.

      A user may give up access to an account and return it to the control of the project at any time.

      When a project is due to end, the PI will receive notification of the closure of the project and its accounts one month before all project accounts and DSC resources (VMs, data volumes) are closed and cleaned or removed.

      "},{"location":"services/virtualmachines/quickstart/","title":"Quickstart","text":"

      Projects using the Virtual Desktop cloud service are accessed via the EIDF Portal.

      Authentication is provided by SAFE, so if you do not have an active web browser session in SAFE, you will be redirected to the SAFE log on page. If you do not have a SAFE account follow the instructions in the SAFE documentation how to register and receive your password.

      "},{"location":"services/virtualmachines/quickstart/#accessing-your-projects","title":"Accessing your projects","text":"
      1. Log into the portal at https://portal.eidf.ac.uk/. The login will redirect you to the SAFE.

      2. View the projects that you have access to at https://portal.eidf.ac.uk/project/

      "},{"location":"services/virtualmachines/quickstart/#joining-a-project","title":"Joining a project","text":"
      1. Navigate to https://portal.eidf.ac.uk/project/ and click the link to \"Request access\", or choose \"Request Access\" in the \"Project\" menu.

      2. Select the project that you want to join in the \"Project\" dropdown list - you can search for the project name or the project code, e.g. \"eidf0123\".

      Now you have to wait for your PI or project manager to accept your request to join.

      "},{"location":"services/virtualmachines/quickstart/#accessing-a-vm","title":"Accessing a VM","text":"
      1. View your user accounts on the project page.

      2. Click on an account name to view details of the VMs that are you allowed to access with this account, and look up the temporary password allocated to the account.

      3. Follow the link to the Guacamole login or log in directly at https://eidf-vdi.epcc.ed.ac.uk/vdi/. Please see the VDI guide for more information.

      4. Choose the SSH connection to log in for the first time. You will be asked to reset the password.

      Warning

      Do not use RDP to login for the first time as you have to reset your password. Always use SSH to login to the VM for the first time. This can be done either via the VDI or the EIDF-Gateway Jump Host as described here.

      "},{"location":"services/virtualmachines/quickstart/#further-information","title":"Further information","text":"

      Managing VMs: Project management guide to creating, configuring and removing VMs and managing user accounts in the portal.

      Virtual Desktop Interface: Working with the VDI interface.

      "},{"location":"status/","title":"EIDF Service Status","text":"

      The table below represents the broad status of each EIDF service.

      Service Status EIDF Portal VM SSH Gateway VM VDI Gateway Virtual Desktops Cerebras CS-2 SuperDome Flex (SDF-CS1 / Ultra2)"},{"location":"status/#maintenance-sessions","title":"Maintenance Sessions","text":"

      There will be a service outage on the 3rd Thursday of every month from 9am to 5pm. We keep maintenance downtime to a minimum on the service but do occasionally need to perform essential work on the system. Maintenance sessions are used to ensure that:

      The service will be returned to service ahead of 5pm if all the work is completed early.

      "}]} \ No newline at end of file diff --git a/services/cs2/access/index.html b/services/cs2/access/index.html index 4480e3449..d7ac9680f 100644 --- a/services/cs2/access/index.html +++ b/services/cs2/access/index.html @@ -17,7 +17,7 @@ - + diff --git a/services/cs2/index.html b/services/cs2/index.html index fd97976b8..bab4a00f4 100644 --- a/services/cs2/index.html +++ b/services/cs2/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/cs2/run/index.html b/services/cs2/run/index.html index 33368a275..9679994bd 100644 --- a/services/cs2/run/index.html +++ b/services/cs2/run/index.html @@ -17,7 +17,7 @@ - + diff --git a/services/datacatalogue/docs/index.html b/services/datacatalogue/docs/index.html index b4f86cd8f..668ecdea6 100644 --- a/services/datacatalogue/docs/index.html +++ b/services/datacatalogue/docs/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/datacatalogue/index.html b/services/datacatalogue/index.html index 5bb9e4630..0852b7419 100644 --- a/services/datacatalogue/index.html +++ b/services/datacatalogue/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/datacatalogue/metadata/index.html b/services/datacatalogue/metadata/index.html index 463a45bf5..a26a7ee03 100644 --- a/services/datacatalogue/metadata/index.html +++ b/services/datacatalogue/metadata/index.html @@ -17,7 +17,7 @@ - + diff --git a/services/datacatalogue/quickstart/index.html b/services/datacatalogue/quickstart/index.html index c25e22b15..3143c70c4 100644 --- a/services/datacatalogue/quickstart/index.html +++ b/services/datacatalogue/quickstart/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/datacatalogue/tutorial/index.html b/services/datacatalogue/tutorial/index.html index 79ae91825..7a9c3d8b8 100644 --- a/services/datacatalogue/tutorial/index.html +++ b/services/datacatalogue/tutorial/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/gpuservice/index.html b/services/gpuservice/index.html index b1d05171d..550b6e0fa 100644 --- a/services/gpuservice/index.html +++ b/services/gpuservice/index.html @@ -17,7 +17,7 @@ - + diff --git a/services/gpuservice/policies/index.html b/services/gpuservice/policies/index.html index a11f601bf..d33f0d96a 100644 --- a/services/gpuservice/policies/index.html +++ b/services/gpuservice/policies/index.html @@ -17,7 +17,7 @@ - + diff --git a/services/gpuservice/training/L1_getting_started/index.html b/services/gpuservice/training/L1_getting_started/index.html index 57b193a20..127762d97 100644 --- a/services/gpuservice/training/L1_getting_started/index.html +++ b/services/gpuservice/training/L1_getting_started/index.html @@ -17,7 +17,7 @@ - + @@ -1516,29 +1516,38 @@

      Creating your first pod

      Nvidia have several prebuilt docker images to perform different tasks on their GPU hardware.

      The list of docker images is available on their website.

      This example uses their CUDA sample code simulating nbody interactions.

      -

      Note how you specify the use of a GPU by setting limits: nvidia.com/gpu: 1.

      1. Open an editor of your choice and create the file test_NBody.yml
      2. -
      3. -

        Copy the following in to the file:

        +
      4. Copy the following in to the file:
      5. +
      +

      The pod resources are defined with the requests and limits tags.

      +

      Resources defined in the requests tags are the minimum possible resources required for the pod to run.

      +

      If a pod is assigned to an unused node then it may use resources beyond those requested.

      +

      This may allow the task within the pod to run faster, but it also runs the risk of unnecessarily blocking off resources for future pod requests.

      +

      The limits tag specifies the maximum resources that can be assigned to a pod.

      +

      The EIDFGPUS cluster requires all pods to have requests and limits tags for cpu and memory resources in order to be accepted.

      +

      Finally, it optional to define GPU resources but only the limits tag is used to specify the use of a GPU, limits: nvidia.com/gpu: 1.

      apiVersion: v1
       kind: Pod
       metadata:
       generateName: first-pod-
       spec:
      -restartPolicy: OnFailure
      -containers:
      -- name: cudasample
      -    image: nvcr.io/nvidia/k8s/cuda-sample:nbody-cuda11.7.1
      -    args: ["-benchmark", "-numbodies=512000", "-fp64", "-fullscreen"]
      -    resources:
      + restartPolicy: OnFailure
      + containers:
      + - name: cudasample
      +   image: nvcr.io/nvidia/k8s/cuda-sample:nbody-cuda11.7.1
      +   args: ["-benchmark", "-numbodies=512000", "-fp64", "-fullscreen"]
      +   resources:
      +    requests:
      +     cpu: 2
      +     memory: "1Gi"
           limits:
      -    nvidia.com/gpu: 1
      +     cpu: 4
      +     memory: "4Gi"
      +     nvidia.com/gpu: 1
       
      - -
    9. -

      Save the file and exit the editor

      -
    10. +
        +
      1. Save the file and exit the editor
      2. Run `kubectl create -f test_NBody.yml'
      3. This will output something like:

        @@ -1625,7 +1634,12 @@

        Example yaml file

        image: nvcr.io/nvidia/k8s/cuda-sample:nbody-cuda11.7.1 args: ["-benchmark", "-numbodies=512000", "-fp64", "-fullscreen"] resources: + requests: + cpu: 2 + memory: "1Gi" limits: + cpu: 4 + memory: "4Gi" nvidia.com/gpu: 1 nodeSelector: nvidia.com/gpu.product: NVIDIA-A100-SXM4-40GB-MIG-1g.5gb diff --git a/services/gpuservice/training/L2_requesting_persistent_volumes/index.html b/services/gpuservice/training/L2_requesting_persistent_volumes/index.html index 6f2f6ebd4..dc0d9aa42 100644 --- a/services/gpuservice/training/L2_requesting_persistent_volumes/index.html +++ b/services/gpuservice/training/L2_requesting_persistent_volumes/index.html @@ -17,7 +17,7 @@ - + @@ -1556,6 +1556,13 @@

        Example p - name: trial image: busybox command: ["sleep", "infinity"] + resources: + requests: + cpu: 1 + memory: "1Gi" + limits: + cpu: 1 + memory: "1Gi" volumeMounts: - mountPath: /mnt/ceph_rbd name: volume diff --git a/services/gpuservice/training/L3_running_a_pytorch_task/index.html b/services/gpuservice/training/L3_running_a_pytorch_task/index.html index c226dd004..8a2d04c78 100644 --- a/services/gpuservice/training/L3_running_a_pytorch_task/index.html +++ b/services/gpuservice/training/L3_running_a_pytorch_task/index.html @@ -17,7 +17,7 @@ - + @@ -1625,6 +1625,10 @@

        Example lightweight pod specifica resources: requests: cpu: 1 + memory: "1Gi" + limits: + cpu: 1 + memory: "1Gi" volumeMounts: - mountPath: /mnt/ceph_rbd name: volume @@ -1655,7 +1659,12 @@

        Example PyTorch Pod Specificatio - mountPath: /mnt/ceph_rbd name: volume resources: + requests: + cpu: 2 + memory: "1Gi" limits: + cpu: 4 + memory: "4Gi" nvidia.com/gpu: 1 nodeSelector: nvidia.com/gpu.product: NVIDIA-A100-SXM4-40GB-MIG-1g.5gb diff --git a/services/gpuservice/training/L4_template_workflow/index.html b/services/gpuservice/training/L4_template_workflow/index.html index 5b882bf36..b4254dcc1 100644 --- a/services/gpuservice/training/L4_template_workflow/index.html +++ b/services/gpuservice/training/L4_template_workflow/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/index.html b/services/index.html index 0d59369a5..8dc17010f 100644 --- a/services/index.html +++ b/services/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/jhub/docs/index.html b/services/jhub/docs/index.html index 78861675e..9a604712b 100644 --- a/services/jhub/docs/index.html +++ b/services/jhub/docs/index.html @@ -17,7 +17,7 @@ - + diff --git a/services/jhub/index.html b/services/jhub/index.html index 0f595a017..9e75d53f6 100644 --- a/services/jhub/index.html +++ b/services/jhub/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/jhub/quickstart/index.html b/services/jhub/quickstart/index.html index 095f41ca4..029bb2ec6 100644 --- a/services/jhub/quickstart/index.html +++ b/services/jhub/quickstart/index.html @@ -17,7 +17,7 @@ - + diff --git a/services/jhub/tutorial/index.html b/services/jhub/tutorial/index.html index f7874271d..06bea945f 100644 --- a/services/jhub/tutorial/index.html +++ b/services/jhub/tutorial/index.html @@ -17,7 +17,7 @@ - + diff --git a/services/mft/index.html b/services/mft/index.html index 706ce32ec..85c48cc7d 100644 --- a/services/mft/index.html +++ b/services/mft/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/mft/quickstart/index.html b/services/mft/quickstart/index.html index c8a9b8aaa..c4cfe93c4 100644 --- a/services/mft/quickstart/index.html +++ b/services/mft/quickstart/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/mft/sftp/index.html b/services/mft/sftp/index.html index 4c54d70fb..23472f6a7 100644 --- a/services/mft/sftp/index.html +++ b/services/mft/sftp/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/mft/using-the-mft/index.html b/services/mft/using-the-mft/index.html index 4fa474f5c..d947311d2 100644 --- a/services/mft/using-the-mft/index.html +++ b/services/mft/using-the-mft/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/rstudioserver/docs/index.html b/services/rstudioserver/docs/index.html index d031b1147..8e8e52634 100644 --- a/services/rstudioserver/docs/index.html +++ b/services/rstudioserver/docs/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/rstudioserver/index.html b/services/rstudioserver/index.html index 1a1731552..4a6d7853f 100644 --- a/services/rstudioserver/index.html +++ b/services/rstudioserver/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/rstudioserver/quickstart/index.html b/services/rstudioserver/quickstart/index.html index ac4641661..443916614 100644 --- a/services/rstudioserver/quickstart/index.html +++ b/services/rstudioserver/quickstart/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/rstudioserver/tutorial/index.html b/services/rstudioserver/tutorial/index.html index 7ab872eb5..e650d1c83 100644 --- a/services/rstudioserver/tutorial/index.html +++ b/services/rstudioserver/tutorial/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/ultra2/access/index.html b/services/ultra2/access/index.html index 03e39672c..45a1914fc 100644 --- a/services/ultra2/access/index.html +++ b/services/ultra2/access/index.html @@ -17,7 +17,7 @@ - + diff --git a/services/ultra2/index.html b/services/ultra2/index.html index 87ad01084..942dbace3 100644 --- a/services/ultra2/index.html +++ b/services/ultra2/index.html @@ -13,7 +13,7 @@ - + diff --git a/services/ultra2/run/index.html b/services/ultra2/run/index.html index 5c364e059..bded11bf6 100644 --- a/services/ultra2/run/index.html +++ b/services/ultra2/run/index.html @@ -17,7 +17,7 @@ - + diff --git a/services/virtualmachines/docs/index.html b/services/virtualmachines/docs/index.html index ee549abba..30e9eccf5 100644 --- a/services/virtualmachines/docs/index.html +++ b/services/virtualmachines/docs/index.html @@ -17,7 +17,7 @@ - + diff --git a/services/virtualmachines/flavours/index.html b/services/virtualmachines/flavours/index.html index ec57bc445..429590200 100644 --- a/services/virtualmachines/flavours/index.html +++ b/services/virtualmachines/flavours/index.html @@ -17,7 +17,7 @@ - + @@ -1374,8 +1374,8 @@

        Flavours

        Flavour Name -vCPUs -DRAM in GB +vCPUs +DRAM in GB Pinned Cores GPU @@ -1383,99 +1383,99 @@

        Flavours

        general.v2.tiny -1 -2 +1 +2 No No general.v2.small -2 -4 +2 +4 No No general.v2.medium -4 -8 +4 +8 No No general.v2.large -8 -16 +8 +16 No No general.v2.xlarge -16 -32 +16 +32 No No capability.v2.8cpu -8 -112 +8 +112 Yes No capability.v2.16cpu -16 -224 +16 +224 Yes No capability.v2.32cpu -32 -448 +32 +448 Yes No capability.v2.48cpu -48 -672 +48 +672 Yes No capability.v2.64cpu -64 -896 +64 +896 Yes No gpu.v1.8cpu -8 -128 +8 +128 Yes Yes gpu.v1.16cpu -16 -256 +16 +256 Yes Yes gpu.v1.32cpu -32 -512 +32 +512 Yes Yes gpu.v1.48cpu -48 -768 +48 +768 Yes Yes diff --git a/services/virtualmachines/policies/index.html b/services/virtualmachines/policies/index.html index c3cbed79a..b2a6c5e2b 100644 --- a/services/virtualmachines/policies/index.html +++ b/services/virtualmachines/policies/index.html @@ -17,7 +17,7 @@ - + diff --git a/services/virtualmachines/quickstart/index.html b/services/virtualmachines/quickstart/index.html index defee4e6d..293032d81 100644 --- a/services/virtualmachines/quickstart/index.html +++ b/services/virtualmachines/quickstart/index.html @@ -17,7 +17,7 @@ - + diff --git a/sitemap.xml b/sitemap.xml index 03e40c8bc..96bc00bb0 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,302 +2,302 @@ https://epcced.github.io/eidf-docs/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/access/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/access/project/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/access/ssh/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/access/virtualmachines-vdi/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/bespoke/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/bespoke/eddash/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/bespoke/eddash/jhub-git/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/bespoke/eddash/safe-registration/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/bespoke/eddash/workshops/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/faq/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/known-issues/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/overview/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/overview/acknowledgements/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/overview/contacts/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/safe-haven-services/network-access-controls/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/safe-haven-services/overview/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/safe-haven-services/safe-haven-access/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/safe-haven-services/using-the-hpc-cluster/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/safe-haven-services/virtual-desktop-connections/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/safe-haven-services/superdome-flex-tutorial/L1_Accessing_the_SDF_Inside_the_EPCC_TRE/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/safe-haven-services/superdome-flex-tutorial/L2_running_R_Python_analysis_scripts/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/safe-haven-services/superdome-flex-tutorial/L3_submitting_scripts_to_slurm/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/safe-haven-services/superdome-flex-tutorial/L4_parallelised_python_analysis/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/safe-haven-services/superdome-flex-tutorial/L5_parallelised_r_analysis/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/cs2/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/cs2/access/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/cs2/run/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/datacatalogue/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/datacatalogue/docs/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/datacatalogue/metadata/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/datacatalogue/quickstart/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/datacatalogue/tutorial/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/gpuservice/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/gpuservice/policies/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/gpuservice/training/L1_getting_started/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/gpuservice/training/L2_requesting_persistent_volumes/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/gpuservice/training/L3_running_a_pytorch_task/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/gpuservice/training/L4_template_workflow/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/jhub/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/jhub/docs/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/jhub/quickstart/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/jhub/tutorial/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/mft/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/mft/quickstart/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/mft/sftp/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/mft/using-the-mft/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/rstudioserver/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/rstudioserver/docs/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/rstudioserver/quickstart/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/rstudioserver/tutorial/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/ultra2/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/ultra2/access/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/ultra2/run/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/virtualmachines/docs/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/virtualmachines/flavours/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/virtualmachines/policies/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/services/virtualmachines/quickstart/ - 2023-07-26 + 2023-07-31 daily https://epcced.github.io/eidf-docs/status/ - 2023-07-26 + 2023-07-31 daily \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index 1bf12a7e84edfb917c35204b548ac679b094969a..19f5b8610ed9cb39d1427a0a975f0fe3f693c1da 100644 GIT binary patch delta 765 zcmVABzYGeWu5e2OfWP+7D5xscWT4d#JkY*>dem;^NrGcZT-ApG{c@ zsC(JN#1RRRIL7*X?aSTy!tCm)w}TZ#G~Qhf&qw3o0G#sLxb|}R^T%!VVR$t^o26(G zXplQQyc~8!;Zt3&*Xt1)f?ft3`PkN|x_p84wy_?MJ`U$+gM5?$Ud}eh%zA${FQZ@{ z27*$cN3EeQJ&cU6!RV&aUSU0}OUK>cNu=KE`D8qqR^#{8^gMUjw(WFNopdcU5tef; zjeVZgJ1s9j^()aovdMf0Ss;e!A3+fZRK$)0=E_7$()Lny#sO9cq&jlijnnGn3S5PSJICVM0Tp*py+r1!=!o9()4{^X z-vZxhV~`TRr66J6?Nrp|8$ngdCM1aLRBUt7N&_y)hw>!2ob~6>&ToH3Fa?O(_fRz! zo+?V@qmj1$b}l||Y%DLB$O|UEf4LQ39U2Xr%bUCFYw_d$=5z#4L`=>Vx%cAknL6); zbkaVfL4hioKqs`iQ-SHBH$+A6R-GU((5Bd*j?4=YWR%u|H3(XS{bfBJo$vN_MkW$} z>tX?^aKZN!n_Yh|4_Ga8@}57|w+T2RAAPm=FWfzlEG-<_m&vFFl8nTW{x!_7 zU~fa(p0nc(-Wp}N#KRqMiqe$Kx$GKR*d~?5+(9$ek~ajfOn@6Xp}e@uyH!VG zK)KmNVaA36AKE`9UN(jik&-sPTpxQi`fX%BPI;f3+tavD&&y3Go0FvpD^^ZLtSvH| vI@ZDesu;PDL-NMTl~0i=J+D3gAw3rdPLk4pnBvVB0?mH`l}D`d?JEEP^EiKz delta 764 zcmV9V{WD@$O=HIvNiL;FQ(zg(jDmR> z2ugt-wT8O%FfzUdqnk>5h4r*99k+ick$SIZlkw!N8h@xJ?{k+;+g3N#N!LOXVKLLv z*ym}z)$#&VzY_f;o6L8R1!9Q)5fpJiMeI0WE={B)Z7)@49ALH7=Af}30E3?c3r5gd z9=H|kp+4pgRqn@$8D#@igw}r`MM>HDbAn&_ILFS1!}h+7TpTpt-1(KO3}S-Ao=Bhb zlcj+b4@jxX#9GiT^x%le)GF^tsspE8JFQMG!Btqeb1beMP;m#21}X7d3KHhsPDNe55mcpYLW0On#U>}MG~k?kC{KdRS$_`g{8oPiQ-G*_4^?B~ zsiH(a8fokAr{c@n#`1!RykO$T*BkN8q0z9uyuQ7<68Cr4$0K+mVsfs?y%V?3)OjbQ zllB=63RKYqI-=D*6PO-)Lsax`)CuweZHoQz$UGN8MrkcrgP=v&UDo5#`EFNdWFqmm zE*6jqCwxFT0#!rcSO(!!B_k&K!n$w(aOU&9Ow z_BN!=IXm9qtx<+cJlq1OC{4+n%dVk?O;TCR9W-Mtc|!n;1h|$H%8R?aTXiIs-lm@Q zDxJM$wiF3x1&ILgqtRxVnNYz-s;-RLz(qKf&Cw**K<8q6JwQqsnk>qD=e6hGr|06pNmBX`Q@s8{pxH0oG&eWxD*ym05`nz{ diff --git a/status/index.html b/status/index.html index d9a579945..6df58133c 100644 --- a/status/index.html +++ b/status/index.html @@ -15,7 +15,7 @@ - + @@ -1413,34 +1413,34 @@

        EIDF Service Status

        - - + + - - + + - - + + - - + + - - + + - - + + - - + +
        ServiceStatusServiceStatus
        EIDF PortalCustom badgeEIDF PortalCustom badge
        VM SSH GatewayCustom badgeVM SSH GatewayCustom badge
        VM VDI GatewayCustom badgeVM VDI GatewayCustom badge
        Virtual DesktopsCustom badgeVirtual DesktopsCustom badge
        Cerebras CS-2Custom badgeCerebras CS-2Custom badge
        SuperDome Flex (SDF-CS1 / Ultra2)Custom badgeSuperDome Flex (SDF-CS1 / Ultra2)Custom badge