diff --git a/docusaurus.config.js b/docusaurus.config.js index eee8b9bdc..81c46fcd2 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -1,8 +1,20 @@ import { themes } from "prism-react-renderer"; const path = require("path"); +// @ts-check +// `@type` JSDoc annotations allow editor autocompletion and type checking +// (when paired with `@ts-check`). +// There are various equivalent ways to declare your Docusaurus config. +// See: https://docusaurus.io/docs/api/docusaurus-config + +import {themes as prismThemes} from 'prism-react-renderer'; +import math from 'remark-math'; +import katex from 'rehype-katex'; +import remarkYamlToTable from 'remark-yaml-to-table'; +import remarkCodeImport from 'remark-code-import'; +import tabBlocks from 'docusaurus-remark-plugin-tab-blocks'; import "dotenv/config"; -import platform_enterprise_latest_version from "./platform-enterprise_latest_version.js"; +// This runs in Node.js - Don't use client-side code here (browser APIs, JSX...) export default async function createConfigAsync() { const changelog = { @@ -26,26 +38,14 @@ export default async function createConfigAsync() { const docs_platform_enterprise = [ "@docusaurus/plugin-content-docs", { - id: "platform-enterprise", - routeBasePath: "/platform-enterprise", - includeCurrentVersion: false, - remarkPlugins: [ - (await import("remark-code-import")).default, - (await require("remark-math")).default, - (await import("docusaurus-remark-plugin-tab-blocks")).default, - (await require("remark-yaml-to-table")).default, - ], - rehypePlugins: [(await require("rehype-katex")).default], - editUrl: "https://github.com/seqeralabs/docs/tree/master/", - sidebarPath: false, - versions: { - // Replace /platform-enterprise with /platform-enterprise/24.2, when no version is specified in the URL. - // (Applies to latest version only) - [platform_enterprise_latest_version]: { - label: platform_enterprise_latest_version, - path: platform_enterprise_latest_version, - }, - }, + id: 'platform-enterprise', + path: 'platform-enterprise', + routeBasePath: '/platform-enterprise', + sidebarPath: './platform-enterprise/sidebar.json', + includeCurrentVersion: false, + remarkPlugins: [ math, remarkYamlToTable], //remarkCodeImport tabBlocks + rehypePlugins: [katex], + editUrl: 'https://github.com/seqeralabs/docs/tree/master/', }, ]; @@ -80,75 +80,55 @@ export default async function createConfigAsync() { const docs_platform_cloud = [ "@docusaurus/plugin-content-docs", { - id: "platform-cloud", - routeBasePath: "/platform-cloud", - path: "platform-cloud/docs", - remarkPlugins: [ - (await import("remark-code-import")).default, - (await require("remark-math")).default, - (await import("docusaurus-remark-plugin-tab-blocks")).default, - (await require("remark-yaml-to-table")).default, - ], - rehypePlugins: [(await require("rehype-katex")).default], - editUrl: "https://github.com/seqeralabs/docs/tree/master/", - sidebarPath: "./platform-cloud/cloud-sidebar.json", + id: 'platform-cloud', + path: "platform-cloud/docs", + routeBasePath: '/platform-cloud', + sidebarPath: './platform-cloud/cloud-sidebar.json', + editUrl: 'https://github.com/seqeralabs/docs/tree/master/', + remarkPlugins: [math, remarkYamlToTable, remarkCodeImport , tabBlocks], + rehypePlugins: [katex], }, ]; const docs_multiqc = [ "@docusaurus/plugin-content-docs", { - id: "multiqc", - routeBasePath: "/multiqc", - path: "multiqc_docs/multiqc_repo/docs/markdown", - remarkPlugins: [ - (await import("remark-code-import")).default, - (await require("remark-math")).default, - (await import("docusaurus-remark-plugin-tab-blocks")).default, - (await require("remark-yaml-to-table")).default, - ], - rehypePlugins: [(await require("rehype-katex")).default], - editUrl: ({ docPath }) => { - return `https://github.com/MultiQC/MultiQC/blob/main/docs/markdown/${docPath.replace("multiqc_docs/multiqc_repo/docs", "")}`; - }, - sidebarPath: "./multiqc_docs/sidebar.js", + id: 'multiqc', + path: "multiqc_docs/multiqc_repo/docs/markdown", + routeBasePath: '/multiqc', + sidebarPath: './multiqc_docs/sidebar.js', + remarkPlugins: [math, remarkYamlToTable, remarkCodeImport , tabBlocks], + rehypePlugins: [katex], + editUrl: ({ docPath }) => { + return `https://github.com/MultiQC/MultiQC/blob/main/docs/markdown/${docPath.replace('multiqc', '')}` + }, }, ]; const docs_fusion = [ "@docusaurus/plugin-content-docs", { - id: "fusion", - routeBasePath: "/fusion", - path: "fusion_docs", - remarkPlugins: [ - (await import("remark-code-import")).default, - (await require("remark-math")).default, - (await import("docusaurus-remark-plugin-tab-blocks")).default, - (await require("remark-yaml-to-table")).default, - ], - rehypePlugins: [(await require("rehype-katex")).default], - editUrl: "https://github.com/seqeralabs/docs/tree/master/", - sidebarPath: "./fusion_docs/sidebar.json", + id: 'fusion', + path: 'fusion_docs', + routeBasePath: '/fusion', + sidebarPath: './fusion_docs/sidebar.json', + remarkPlugins: [math, remarkYamlToTable, remarkCodeImport , tabBlocks], + rehypePlugins: [katex], + editUrl: 'https://github.com/seqeralabs/docs/tree/master/', }, ]; const docs_wave = [ "@docusaurus/plugin-content-docs", { - id: "wave", - routeBasePath: "/wave", - path: "wave_docs/wave_repo/docs", - remarkPlugins: [ - (await import("remark-code-import")).default, - (await require("remark-math")).default, - (await import("docusaurus-remark-plugin-tab-blocks")).default, - (await require("remark-yaml-to-table")).default, - ], - rehypePlugins: [(await require("rehype-katex")).default], - editUrl: ({ docPath }) => { - return `https://github.com/seqeralabs/wave/blob/master/docs/${docPath.replace("wave_docs/wave_repo/docs", "")}`; - }, - sidebarPath: "./wave_docs/sidebar.json", + id: "wave", + routeBasePath: "/wave", + path: "wave_docs/wave_repo/docs", + sidebarPath: './wave_docs/sidebar.json', + remarkPlugins: [math, remarkYamlToTable, remarkCodeImport , tabBlocks], + rehypePlugins: [katex], + editUrl: ({ docPath }) => { + return `https://github.com/seqeralabs/wave/blob/master/docs/${docPath.replace('wave', '')}` + }, }, ]; @@ -402,8 +382,8 @@ export default async function createConfigAsync() { copyright: `© ${new Date().getFullYear()} Seqera`, }, prism: { - theme: themes.oneLight, - darkTheme: themes.oneDark, + theme: prismThemes.oneLight, + darkTheme: prismThemes.oneDark, additionalLanguages: [ "bash", "docker", diff --git a/platform-enterprise/_images/nf_home_page.png b/platform-enterprise/_images/nf_home_page.png new file mode 100644 index 000000000..fa562e50d Binary files /dev/null and b/platform-enterprise/_images/nf_home_page.png differ diff --git a/platform-enterprise/_images/overview_image.png b/platform-enterprise/_images/overview_image.png new file mode 100644 index 000000000..7f7ef12b0 Binary files /dev/null and b/platform-enterprise/_images/overview_image.png differ diff --git a/platform-enterprise/_images/staging_options.png b/platform-enterprise/_images/staging_options.png new file mode 100644 index 000000000..850bede43 Binary files /dev/null and b/platform-enterprise/_images/staging_options.png differ diff --git a/platform-enterprise/_images/tw_agent.png b/platform-enterprise/_images/tw_agent.png new file mode 100644 index 000000000..2f39652c2 Binary files /dev/null and b/platform-enterprise/_images/tw_agent.png differ diff --git a/platform-enterprise/_images/tw_agent_running.png b/platform-enterprise/_images/tw_agent_running.png new file mode 100644 index 000000000..2bc2bd92b Binary files /dev/null and b/platform-enterprise/_images/tw_agent_running.png differ diff --git a/platform-enterprise/_images/tw_cli.png b/platform-enterprise/_images/tw_cli.png new file mode 100644 index 000000000..d93acb717 Binary files /dev/null and b/platform-enterprise/_images/tw_cli.png differ diff --git a/platform-enterprise/_templates/aws-batch/forge-policy.json b/platform-enterprise/_templates/aws-batch/forge-policy.json new file mode 100644 index 000000000..cad01805e --- /dev/null +++ b/platform-enterprise/_templates/aws-batch/forge-policy.json @@ -0,0 +1,62 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "TowerForge0", + "Effect": "Allow", + "Action": [ + "ssm:GetParameters", + "iam:CreateInstanceProfile", + "iam:DeleteInstanceProfile", + "iam:GetRole", + "iam:RemoveRoleFromInstanceProfile", + "iam:CreateRole", + "iam:DeleteRole", + "iam:AttachRolePolicy", + "iam:PutRolePolicy", + "iam:AddRoleToInstanceProfile", + "iam:PassRole", + "iam:DetachRolePolicy", + "iam:ListAttachedRolePolicies", + "iam:DeleteRolePolicy", + "iam:ListRolePolicies", + "iam:TagRole", + "iam:TagInstanceProfile", + "batch:CreateComputeEnvironment", + "batch:DescribeComputeEnvironments", + "batch:CreateJobQueue", + "batch:DescribeJobQueues", + "batch:UpdateComputeEnvironment", + "batch:DeleteComputeEnvironment", + "batch:UpdateJobQueue", + "batch:DeleteJobQueue", + "batch:TagResource", + "fsx:DeleteFileSystem", + "fsx:DescribeFileSystems", + "fsx:CreateFileSystem", + "fsx:TagResource", + "ec2:DescribeSecurityGroups", + "ec2:DescribeAccountAttributes", + "ec2:DescribeSubnets", + "ec2:DescribeLaunchTemplates", + "ec2:DescribeLaunchTemplateVersions", + "ec2:CreateLaunchTemplate", + "ec2:DeleteLaunchTemplate", + "ec2:DescribeKeyPairs", + "ec2:DescribeVpcs", + "ec2:DescribeInstanceTypeOfferings", + "ec2:GetEbsEncryptionByDefault", + "elasticfilesystem:DescribeMountTargets", + "elasticfilesystem:CreateMountTarget", + "elasticfilesystem:CreateFileSystem", + "elasticfilesystem:DescribeFileSystems", + "elasticfilesystem:DeleteMountTarget", + "elasticfilesystem:DeleteFileSystem", + "elasticfilesystem:UpdateFileSystem", + "elasticfilesystem:PutLifecycleConfiguration", + "elasticfilesystem:TagResource" + ], + "Resource": "*" + } + ] +} diff --git a/platform-enterprise/_templates/aws-batch/launch-policy.json b/platform-enterprise/_templates/aws-batch/launch-policy.json new file mode 100644 index 000000000..f129defa5 --- /dev/null +++ b/platform-enterprise/_templates/aws-batch/launch-policy.json @@ -0,0 +1,36 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "TowerLaunch0", + "Effect": "Allow", + "Action": [ + "batch:DescribeJobQueues", + "batch:CancelJob", + "batch:SubmitJob", + "batch:ListJobs", + "batch:TagResource", + "batch:DescribeComputeEnvironments", + "batch:TerminateJob", + "batch:DescribeJobs", + "batch:RegisterJobDefinition", + "batch:DescribeJobDefinitions", + "ecs:DescribeTasks", + "ec2:DescribeInstances", + "ec2:DescribeInstanceTypes", + "ec2:DescribeInstanceAttribute", + "ecs:DescribeContainerInstances", + "ec2:DescribeInstanceStatus", + "logs:Describe*", + "logs:Get*", + "logs:List*", + "logs:StartQuery", + "logs:StopQuery", + "logs:TestMetricFilter", + "logs:FilterLogEvents", + "secretsmanager:ListSecrets" + ], + "Resource": "*" + } + ] +} \ No newline at end of file diff --git a/platform-enterprise/_templates/aws-batch/s3-bucket-write-policy.json b/platform-enterprise/_templates/aws-batch/s3-bucket-write-policy.json new file mode 100644 index 000000000..684f37e12 --- /dev/null +++ b/platform-enterprise/_templates/aws-batch/s3-bucket-write-policy.json @@ -0,0 +1,26 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:ListBucket" + ], + "Resource": [ + "arn:aws:s3:::YOUR-BUCKET-NAME" + ] + }, + { + "Action": [ + "s3:GetObject", + "s3:PutObject", + "s3:PutObjectTagging", + "s3:DeleteObject" + ], + "Resource": [ + "arn:aws:s3:::YOUR-BUCKET-NAME/*" + ], + "Effect": "Allow" + } + ] +} \ No newline at end of file diff --git a/platform-enterprise/_templates/eks/eks-iam-policy.json b/platform-enterprise/_templates/eks/eks-iam-policy.json new file mode 100644 index 000000000..951a40011 --- /dev/null +++ b/platform-enterprise/_templates/eks/eks-iam-policy.json @@ -0,0 +1,14 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "TowerEks0", + "Effect": "Allow", + "Action": [ + "eks:ListClusters", + "eks:DescribeCluster" + ], + "Resource": "*" + } + ] + } \ No newline at end of file diff --git a/platform-enterprise/_templates/k8s/tower-launcher.yml b/platform-enterprise/_templates/k8s/tower-launcher.yml new file mode 100644 index 000000000..2d6ac8cca --- /dev/null +++ b/platform-enterprise/_templates/k8s/tower-launcher.yml @@ -0,0 +1,60 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: tower-nf + +--- + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: tower-launcher-sa + namespace: tower-nf + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: tower-launcher-role + namespace: tower-nf +rules: + - apiGroups: [""] + resources: ["pods", "pods/status", "pods/log", "pods/exec", "persistentvolumeclaims", "configmaps"] + verbs: ["get", "list", "watch", "create", "delete"] + - apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get", "list", "watch", "create", "delete"] + - apiGroups: ["batch"] + resources: ["jobs", "jobs/status", "jobs/log"] + verbs: ["get", "list", "watch", "create", "delete"] + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: tower-launcher-rolebind + namespace: tower-nf +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: tower-launcher-role +subjects: + - kind: ServiceAccount + name: tower-launcher-sa + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: tower-launcher-userbind + namespace: tower-nf +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: tower-launcher-role +subjects: + - kind: User + name: tower-launcher-user diff --git a/platform-enterprise/_templates/k8s/tower-scratch-local.yml b/platform-enterprise/_templates/k8s/tower-scratch-local.yml new file mode 100644 index 000000000..f282e9e71 --- /dev/null +++ b/platform-enterprise/_templates/k8s/tower-scratch-local.yml @@ -0,0 +1,31 @@ +# PVC backed by local storage +# Only works for a single node cluster + +apiVersion: v1 +kind: PersistentVolume +metadata: + name: tower-storage + namespace: tower-nf +spec: + storageClassName: scratch + capacity: + storage: 10Gi + accessModes: + - ReadWriteMany + hostPath: + path: /tmp/tower + +--- + +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: tower-scratch + namespace: tower-nf +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 10Gi + storageClassName: scratch diff --git a/platform-enterprise/_templates/k8s/tower-scratch-nfs.yml b/platform-enterprise/_templates/k8s/tower-scratch-nfs.yml new file mode 100644 index 000000000..9e3e29a98 --- /dev/null +++ b/platform-enterprise/_templates/k8s/tower-scratch-nfs.yml @@ -0,0 +1,111 @@ +# PVC backed by NFS server +# +# For K8s environments other than GKE, you will need to modify the nfs-storage +# persistent volume to reference directly the IP address of the nfs-server: +# 1. Deploy the nfs-server resources +# 2. Get the IP address of the nfs-server with `kubectl get service nfs-server` +# 3. Replace `nfs-server.tower-nf.svc.cluster.local` with the IP address in the nfs-storage YAML +# 4. Deploy the nfs-storage PV and tower-scratch PVC +# +# For more information, see https://github.com/kubernetes/minikube/issues/3417#issuecomment-670005434 + +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: nfs-server + namespace: tower-nf +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: standard + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nfs-server + namespace: tower-nf +spec: + replicas: 1 + selector: + matchLabels: + role: nfs-server + template: + metadata: + labels: + role: nfs-server + spec: + containers: + - name: nfs-server + image: gcr.io/google_containers/volume-nfs:0.8 + ports: + - name: nfs + containerPort: 2049 + - name: mountd + containerPort: 20048 + - name: rpcbind + containerPort: 111 + securityContext: + privileged: true + volumeMounts: + - mountPath: /exports + name: vol-1 + volumes: + - name: vol-1 + persistentVolumeClaim: + claimName: nfs-server + +--- + +apiVersion: v1 +kind: Service +metadata: + name: nfs-server + namespace: tower-nf +spec: + ports: + - name: nfs + port: 2049 + - name: mountd + port: 20048 + - name: rpcbind + port: 111 + selector: + role: nfs-server + +--- + +apiVersion: v1 +kind: PersistentVolume +metadata: + name: nfs-storage + namespace: tower-nf +spec: + capacity: + storage: 10Gi + accessModes: + - ReadWriteMany + nfs: + # For K8s environments other than GKE, the nfs-server IP address must be used + server: nfs-server.tower-nf.svc.cluster.local + path: "/" + +--- + +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: tower-scratch + namespace: tower-nf +spec: + accessModes: + - ReadWriteMany + storageClassName: "" + resources: + requests: + storage: 10Gi + diff --git a/platform-enterprise/administration/overview.md b/platform-enterprise/administration/overview.md new file mode 100644 index 000000000..3d38fcc99 --- /dev/null +++ b/platform-enterprise/administration/overview.md @@ -0,0 +1,69 @@ +--- +title: "Admin panel" +description: "Overview of Seqera user and organization administration" +date: "21 Apr 2023" +tags: [administration, user, workspace, organization] +--- + +As a root user, you can access a comprehensive overview of the organizations, workspaces, users, and teams in your account from the **Admin panel**. It also includes tabs for application event audit logs, administrative statistics, and system configuration options. The root user system role should only be assigned to a system administrator as it provides high-level visibility and configuration access to your account. + +:::tip +See [Basic configuration](../../version-24.2/enterprise/configuration/overview#basic-configuration) to learn how to add root users to your Seqera Enterprise deployment with the `TOWER_ROOT_USERS` environment variable. +::: + +## Manage organizations + +:::note +From version 23.2, organization owners and root users can edit organization names on the **Edit organization** page. +::: + +The **Organizations** tab lists all the organizations in your account. + +- Use the search function to find an organization by name and perform various operations with that organization. +- Select **Add organization** to create a new organization. +- Select an organization name from the table to edit or delete it. + +See [Organizations](../orgs-and-teams/organizations) for more information. + +## Manage members + +From an organization's page, select the **Members of organization** tab to view a list of its members. Here you can list and search for all members and owners of the organization, change a member's role, remove a member from the organization, or add a new member to the organization. + +You can only add existing users to an organization. You can't remove the last owner of an organization until you promote another member to **Owner** first. + +See [Members](../orgs-and-teams/organizations#members) for more information. + +## Manage workspaces + +The **Workspaces** tab lists all the workspaces in your account. + +- Use the search function to find a workspace by name to view and edit that workspace. +- Select **Add workspace** to create a new workspace. Choose a workspace name that isn't already in use. If the new workspace name already exists in the system, the creation will fail. After the workspace is created, it's listed in the **Workspaces** tab. +- Select **Edit** next to a workspace name to edit or delete the workspace. + +See [Workspaces](../orgs-and-teams/workspace-management) for more information. + +## Manage users + +The **Users** tab lists all the users in your account. + +- Select **Add user** to create a new user. If the new user email already exists in the system, the user creation will fail. After the user is created, inform them that access has been granted. +- Use the search function to find a user by name or email. +- Select a username from the list or select **Edit** to view and update the user's details. +- To disable a user's Platform login access, select **Disable user**. This action does not delete the user. +- To reinstate a disabled user's Platform login access, select **Allow login**. This option is grayed out for active users. + +See [User roles](../orgs-and-teams/roles) for more information on organization and workspace user access roles. + +## Manage teams + +The **Teams** tab lists all the teams in your account. + +- Use the organizations dropdown next to the search bar to filter teams by organization. +- Use the search function to find a team by name and perform various operations. +- Select **Add team** to create a new team. +- Select **Edit** next to a team to edit the team's details, or select **Delete** to delete it. + +From the teams list, you have an overview of the number of members and the unique ID of each team. Select **Edit** to view a team's page, or select the number next to **Members:** to go to the **Members** tab of the team page. From the **Members of team** tab, you can list and search for all users that are members of the selected team, change a user's role, remove a member from the team, or add a new member to the team. + +See [Teams](../orgs-and-teams/organizations#teams) for more information. \ No newline at end of file diff --git a/platform-enterprise/api/overview.mdx b/platform-enterprise/api/overview.mdx new file mode 100644 index 000000000..cd0550c2e --- /dev/null +++ b/platform-enterprise/api/overview.mdx @@ -0,0 +1,112 @@ +--- +title: "API" +description: "Using the Seqera Platform API." +date: "15 Mar 2024" +tags: [api] +--- + +import MDXComponents from "@theme-original/MDXComponents"; +import Button from "@site/src/components/Button"; + +The Seqera Platform services API is a programmatic interface for all operations available in the Platform web UI. This allows organizations to integrate Platform seamlessly into existing processes. + +### API reference + +

+ + +

+ +The API reference is an interactive list of all API endpoints and includes request and response payload examples to test requests in the browser. + +:::info +The API requires authentication with a Platform access token to perform requests. + + +::: + +The API and its interactive reference is built on the OpenAPI standard. +Download the current OpenAPI schema file [here](https://cloud.seqera.io/openapi/seqera-api-latest.yml). + +:::tip +For more information on the OpenAPI standard, see [OpenAPI](https://swagger.io/specification/). +::: + +### Encoding and formatting standards + +- The API can be accessed from `https://api.cloud.seqera.io`. +- All API endpoints use HTTPS, and all request and response payloads use [JSON](https://www.json.org/) encoding. +- All timestamps use the [ISO 8601](https://www.iso.org/iso-8601-date-and-time-format.html) date-time standard format: `YYYY-MM-DDTHH:MM:SSZ`. + +:::note +From version 23.4, the Platform API is live on `https://api.cloud.seqera.io`. The legacy API `https://api.tower.nf` remains fully operational, so existing API integrations will continue to perform as expected. Deprecation of the legacy API will be communicated well in advance to avoid any breaking changes to your integrations. +::: + +### Programmatic API + +Both the [Seqera Platform CLI](../cli/overview) and [seqerakit](https://github.com/seqeralabs/seqera-kit) are built on the Platform API and provide a programmatic automation layer for operations. + +You can also use tools such as [openapi-python-client](https://github.com/openapi-generators/openapi-python-client) to generate your own programmatic API for a particular language (such as Python) based on the OpenAPI schema. However, we do not guarantee that any OpenAPI client generators will work with the Platform API. + +### Authentication + +The API requires an authentication token to be specified in each API request using the +[Bearer](https://swagger.io/docs/specification/authentication/bearer-authentication) HTTP header. + +Once created, use your access token to authenticate requests to the API via cURL, Postman, or within your code. + + + +:::note +The token is only displayed once when it is created. Store your token in a safe place. +::: + +### cURL example + +```bash +curl -H "Authorization: Bearer eyJ...YTk0" https://api.cloud.seqera.io/workflow +``` + +:::info +Your access token must be included in every API call. +::: + +### Parameters + +Some API `GET` methods will accept standard `query` parameters, which are defined in the documentation. Optional parameters such as page size, number (when available), and file name can be submitted as query parameters. `POST`, `PUT`, and `DELETE` requests require additional parameters to be provided in your request body. + +Several head parameters are also accepted, such as `Authorization` for bearer access tokens or `Accept-Version` to indicate the desired API version to use (default version 1): + +```bash +curl -H "Authorization: Bearer QH..E5M=" + -H "Accept-Version:1" + -X POST https://api.cloud.seqera.io/domain/{item_id}?queryString={value} + -d { params: { "key":"value" } } + +``` + +### Client errors + +Two typical standard errors (i.e., response statuses other than `200` or `204`) are returned by the API. + +#### Bad Request + +The request payload is not properly defined or the query parameters are invalid. + +```json +{ + "message": "Unexpected error while processing - Error ID: 54apnFENQxbvCr23JaIjLb" +} +``` + +#### Forbidden + +Your access token is invalid or expired. This response may also imply that the endpoint you are trying to access is not available. Check your request syntax for typos, and confirm that your access token is valid. + +```bash +Status: 403 Forbidden +``` + +### Rate limiting + +For all API requests, there is a limit of 20 calls per second, per access token (72000 calls per hour). diff --git a/platform-enterprise/cli/_images/tw-info.jpg b/platform-enterprise/cli/_images/tw-info.jpg new file mode 100644 index 000000000..c3fc7a53a Binary files /dev/null and b/platform-enterprise/cli/_images/tw-info.jpg differ diff --git a/platform-enterprise/cli/_images/tw-screenshot.png b/platform-enterprise/cli/_images/tw-screenshot.png new file mode 100644 index 000000000..0cce6f21e Binary files /dev/null and b/platform-enterprise/cli/_images/tw-screenshot.png differ diff --git a/platform-enterprise/cli/commands.md b/platform-enterprise/cli/commands.md new file mode 100644 index 000000000..956ccfacf --- /dev/null +++ b/platform-enterprise/cli/commands.md @@ -0,0 +1,1200 @@ +--- +title: "Commands" +description: "Seqera Platform CLI commands." +date: "16 Jul 2024" +tags: [cli, commands] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +:::note +The CLI performs operations in the user workspace context by default. Use the `TOWER_WORKSPACE_ID` environment variable or the `--workspace` parameter to specify an organization workspace ID. +::: + +Use the `-h` or `--help` parameter to list the available commands and their associated options. + +[![`tw --help`](./_images/tw-info.jpg)](https://github.com/seqeralabs/tower-cli) + +For help with a specific subcommand, run the command with `-h` or `--help` appended. For example, `tw credentials add google -h`. + +:::tip +Use `tw --output=json ` to dump and store Seqera Platform entities in JSON format. + +Use `tw --output=json | jq -r '.[].'` to pipe the command to use jq to retrieve specific values in the JSON output. For example, `tw --output=json workspaces list | jq -r '.workspaces[].orgId'` returns the organization ID for each workspace listed. +::: + +## Credentials + +To launch pipelines in a Platform workspace, you need [credentials][credentials] for: + +1. Compute environments +2. Pipeline repository Git providers +3. (Optional) [Tower agent][tower-agent] — used with HPC clusters +4. (Optional) Container registries, such as docker.io + +### Add credentials + +Run `tw credentials add -h` to view a list of providers. + +Run `tw credentials add -h` to view the required fields for your provider. + +:::note +You can add multiple credentials from the same provider in the same workspace. +::: + +#### Compute environment credentials + +Platform requires credentials to access your cloud compute environments. See the [compute environment page][compute-envs] for your cloud provider for more information. + +```bash +tw credentials add aws --name=my_aws_creds --access-key= --secret-key= + + New AWS credentials 'my_aws_creds (1sxCxvxfx8xnxdxGxQxqxH)' added at user workspace +``` + +#### Git credentials + +Platform requires access credentials to interact with pipeline Git repositories. See [Git integration][git-integration] for more information. + +```bash +tw credentials add github -n=my_GH_creds -u= -p= + + New GITHUB credentials 'my_GH_creds (xxxxx3prfGlpxxxvR2xxxxo7ow)' added at user workspace +``` + +#### Container registry credentials + +Configure credentials for the Nextflow Wave container service to authenticate to private and public container registries. See the **Container registry credentials** section under [Credentials][credentials] for registry-specific instructions. + +:::note +Container registry credentials are only used by the Wave container service. See [Wave containers][wave-docs] for more information. +::: + +### List credentials + +```bash +tw credentials list + + Credentials at user workspace: + + ID | Provider | Name | Last activity + ------------------------+-----------+------------------------------------+------------------------------- + 1x1HxFxzxNxptxlx4xO7Gx | aws | my_aws_creds_1 | Wed, 6 Apr 2022 08:40:49 GMT + 1sxCxvxfx8xnxdxGxQxqxH | aws | my_aws_creds_2 | Wed, 9 Apr 2022 08:40:49 GMT + 2x7xNsf2xkxxUIxXKxsTCx | ssh | my_ssh_key | Thu, 8 Jul 2021 07:09:46 GMT + 4xxxIeUx7xex1xqx1xxesk | github | my_github_cred | Wed, 22 Jun 2022 09:18:05 GMT +``` + +### Delete credentials + +```bash +tw credentials delete --name=my_aws_creds + + Credentials '1sxCxvxfx8xnxdxGxQxqxH' deleted at user workspace +``` + +## Compute environments + +Compute environments define the execution platform where a pipeline runs. A compute environment is composed of the credentials, configuration, and storage options related to a particular computing platform. See [Compute environments][compute-envs] for more information on supported providers. + +Run `tw compute-envs -h` to view the list of supported compute environment operations. + +### Add a compute environment + +Run `tw compute-envs add -h` to view the list of supported providers. + +Run `tw compute-envs add -h` to view the required and optional fields for your provider. + +You must add the credentials for your provider before creating your compute environment. + +```bash +tw compute-envs add aws-batch forge --name=my_aws_ce \ +--credentials= --region=eu-west-1 --max-cpus=256 \ +--work-dir=s3:// --wait=AVAILABLE + + New AWS-BATCH compute environment 'my_aws_ce' added at user workspace +``` + +This command will: + +- Use **Batch Forge** to automatically manage the AWS Batch resource lifecycle (`forge`) +- Use the credentials previously added to the workspace (`--credentials`) +- Create the required AWS Batch resources in the AWS Ireland (`eu-west-1`) region +- Provision a maximum of 256 CPUs in the compute environment (`--max-cpus`) +- Use an existing S3 bucket to store the Nextflow work directory (`--work-dir`) +- Wait until the compute environment has been successfully created and is ready to use (`--wait`) + +See the [compute environment][compute-envs] page for your provider for detailed information on Batch Forge and manual compute environment creation. + +### Delete a compute environment + +```bash +tw compute-envs delete --name=my_aws_ce + + Compute environment '1sxCxvxfx8xnxdxGxQxqxH' deleted at user workspace +``` + +### Default compute environment + +Select a **primary** compute environment to be used by default in a workspace. You can override the workspace primary compute environment by explicitly specifying an alternative compute environment when you create or launch a pipeline. + +```bash +tw compute-envs primary set --name=my_aws_ce + + Primary compute environment for workspace 'user' was set to 'my_aws_ce (1sxCxvxfx8xnxdxGxQxqxH)' +``` + +### Import and export a compute environment + +Export the configuration details of a compute environment in JSON format for scripting and reproducibility purposes. + +```bash +tw compute-envs export --name=my_aws_ce my_aws_ce_v1.json + + Compute environment exported into 'my_aws_ce_v1.json' +``` + +Similarly, a compute environment can be imported to a workspace from a previously exported JSON file. + +```bash +tw compute-envs import --name=my_aws_ce_v1 ./my_aws_ce_v1.json + + New AWS-BATCH compute environment 'my_aws_ce_v1' added at user workspace +``` + +## Datasets + +Run `tw datasets -h` to view the list of supported operations. + +[Datasets](../data/datasets) are CSV (comma-separated values) and TSV (tab-separated values) files stored in a workspace, used as inputs during pipeline execution. The most commonly used datasets for Nextflow pipelines are samplesheets, where each row consists of a sample, the location of files for that sample (such as FASTQ files), and other sample details. + +### Add a dataset + +Run `tw datasets add -h` to view the required and optional fields for adding a dataset. + +Add a preconfigured dataset file to a workspace (include the `--header` flag if the first row of your samplesheet file is a header): + +```bash +tw datasets add --name=samplesheet1 --header samplesheet_test.csv + +Dataset 'samplesheet1' added at user workspace with id '60gGrD4I2Gk0TUpEGOj5Td' +``` + +:::note +The maximum supported dataset file size is 10 MB. +::: + +### Delete a dataset + +To delete a workspace dataset, specify either the dataset name (`-n` flag) or ID (`-i` flag): + +```bash +tw datasets delete -i 6tYMjGqCUJy6dEXNK9y8kh + +Dataset '6tYMjGqCUJy6dEXNK9y8kh' deleted at 97652229034604 workspace +``` + +### Download a dataset + +View a stored dataset's contents: + +```bash +tw datasets download -n samplesheet1 + +sample,fastq_1,fastq_2,strandedness +WT_REP1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_2.fastq.gz,auto +WT_REP1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz,auto +WT_REP2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz,reverse +RAP1_UNINDUCED_REP1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz,,reverse +RAP1_UNINDUCED_REP2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz,,reverse +RAP1_UNINDUCED_REP2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz,,reverse +RAP1_IAA_30M_REP1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz,reverse +``` + +### List datasets + +Run `tw datasets list -h` to view the optional fields for listing and filtering datasets. + +```bash +tw datasets list -f data + +Datasets at 97652229034604 workspace: + + ID | Name | Created + ------------------------+----------+------------------------------- + 6vBGj6aWWpBuLpGKjJDpZy | dataset2 | Tue, 27 Aug 2024 14:49:32 GMT +``` + +### View a dataset + +Run `tw datasets view -h` to view the required and optional fields for viewing a stored dataset's details. + +```bash +tw datasets view -n samplesheet1 + +Dataset at 97652229034604 workspace: + + -------------+------------------------------- + ID | 60gGrD4I2Gk0TUpEGOj5Td + Name | samplesheet1 + Description | + Media Type | text/csv + Created | Mon, 19 Aug 2024 07:59:16 GMT + Updated | Mon, 19 Aug 2024 07:59:17 GMT +``` + +### Update a dataset + +Run `tw datasets update -h` to view the required and optional fields for updating a dataset. + +```bash +tw datasets update -n dataset1 --new-name=dataset2 -f samplesheet_test.csv + +Dataset 'dataset1' updated at 97652229034604 workspace with id '6vBGj6aWWpBuLpGKjJDpZy' +``` + +### Obtain a dataset URL + +Run `tw datasets url -h` to view the required and optional fields for obtaining dataset URLs. + +```bash +tw datasets url -n dataset2 + +Dataset URL + +----------- + +https://api.cloud.seqera.io/workspaces/97652229034xxx/datasets/6vBGj6aWWpBuLpGKjJDxxx/v/2/n/samplesheet_test.csv +``` + +## Studios + +Run `tw studios -h` to view the list of supported operations. + +Manage [Studio sessions](../studios/overview) hosted in Seqera Platform. Studio sessions allow interactive analysis using Jupyter, RStudio, VS Code, and Xpra. Additional custom analysis environments can be defined as needed. + +:::note +Most Studio operations require workspace `MAINTAIN` permissions. +::: + +### List Studio sessions + +Run `tw studios list -h` to view the required and optional fields for listing Studio sessions. + +If a workspace is not defined, the `TOWER_WORKSPACE_ID` workspace is used by default. + +```bash +tw studios list -w community/showcase + +Studios at [community / showcase] workspace: + + ID | Name | Description | User | Status + ----------+---------------------------------+----------------------------------------------------+------------+---------- + 13083356 | studio-jupyter-demo | Demo Studio session for Jupyter | user1 | RUNNING + 419e602a | studio-rstudio-demo | Demo Studio session for RStudio v4.4.1 | user2 | RUNNING + 722cb780 | studio-vscode-demo | Demo Studio session for VSCode v1.83.0 | user3 | RUNNING + 950e56f5 | studio-multiqc-demo | Demo MultiQC Studio session | user1 | RUNNING +``` + +### View a Studio session + +Run `tw studios view -h` to view the required and optional fields for viewing session details. + +```bash +tw studios view -i 23ce7967 -w community/showcase + + Studio at workspace '[community / showcase]' + +---------------------+------------------------------------------------------------ +SessionID | 23ce7967 +Name | experiment-analysis-session +Status | STARTING +Status Last Update | Fri, 31 Jan 2025 19:35:07 GMT +Studio URL | https://a23ce7967.connect.cloud.seqera.io +Description | +Created on | Fri, 31 Jan 2025 18:12:27 GMT +Created by | rob-newman | rob.newman@seqera.io +Template | public.cr.seqera.io/platform/data-studio-jupyter:4.1.5-0.7 +Mounted Data | +Compute environment | aws-datastudios-sandbox-ireland-16cpus +Region | eu-west-1 +GPU allocated | 0 +CPU allocated | 2 +Memory allocated | 8192 +Build reports | NA +``` + +### List available templates for Studio sessions + +Run `tw studios templates -h` to view the required and optional fields for session templates. + +Templates are the container image templates used as the foundation for Studio sessions. See [Container image templates](../studios/overview#container-image-templates) for more information. + +```bash +tw studios templates -w community/showcase + + Available templates for Studios: + + Templates + --------------------------------------------------------------- + public.cr.seqera.io/platform/data-studio-jupyter:4.2.5-0.7 + public.cr.seqera.io/platform/data-studio-rstudio:4.4.1-u1-0.7 + public.cr.seqera.io/platform/data-studio-vscode:1.93.1-0.7 + public.cr.seqera.io/platform/data-studio-xpra:6.2.0-r2-1-0.7 +``` + +### Add a Studio session + +Run `tw studios add -h` to view the required and optional fields for adding sessions. + +Add a new Studio session in a workspace. + +```bash +tw studios add -n new-analysis -w community/showcase \ +--description="New Python analysis for RNA experiment ABC" \ +--template="public.cr.seqera.io/platform/data-studio-jupyter:4.1.5-0.7" \ +--compute-env=48bB2PDk83AxskE40lealy \ +--cpu=2 \ +--memory=8192 + + Studio 2aa60bb7 CREATED at [community / showcase] workspace. +``` + +### Start a Studio session + +Run `tw studios start -h` to view the required and optional fields for adding sessions. + +Start (or restart) an existing Studio session in a workspace. + +```bash +tw studios start -i 2aa60bb7 -w community/showcase + + Studio 2aa60bb7 START successfully submitted at [community / showcase] workspace. + + https://cloud.seqera.io/orgs/community/workspaces/showcase/studios/2aa60bb7/connect +``` + +### Stop a Studio session + +Run `tw studios stop -h` to view the required and optional fields for adding sessions. + +Stop an existing Studio session in a workspace. + +```bash +tw studios stop -i 13083356 -w community/showcase + + Studio 13083356 STOP successfully submitted at [community / showcase] workspace. +``` + +### List checkpoints of a Studio session + +Run `tw studios checkpoints -h` to view the required and optional fields for viewing checkpoints for a session. + +List all checkpoints for an existing Studio session in a workspace. See [Session checkpoints](../studios/overview#studio-session-checkpoints) for more information. + +```bash +tw studios checkpoints -i 19a3abbd -w community/showcase + +Checkpoints at studio 19a3abbd at [community / showcase] workspace: + + ID | Name | Author | Date Created | Date Saved + ------+----------------------+------------+-------------------------------+------------------------------- + 2010 | my_custom_named_ckpt | rob-newman | Fri, 31 Jan 2025 20:22:15 GMT | Fri, 31 Jan 2025 20:33:00 GMT + 2011 | foo_1738355617 | rob-newman | Fri, 31 Jan 2025 20:33:37 GMT | Fri, 31 Jan 2025 20:35:22 GMT +``` + +### Start a new Studio session from an existing session checkpoint + +Run `tw studios start-as-new -h` to view the required and optional fields for adding and starting a new session from an existing session checkpoint. + +Add a new session from an existing parent Studio session and checkpoint. Useful for experimentation without impacting the parent Studio session state. + +```bash +tw studios start-as-new -pid=657ddbca \ +-n=analysis-env-from-parent \ +-w community/showcase \ +--description="New sandbox for temporary analysis" \ +--cpu=2 \ +--memory=8192 \ +-a + + Studio 19a3abbd CREATED at [community / showcase] workspace and auto-started. +``` + +### Delete a Studio session + +Run `tw studios delete -h` to view the required and optional fields for listing sessions. + +Delete an existing Studio session from a workspace. + +```bash +tw studios delete -i 2aa60bb7 + +Studio 2aa60bb7 deleted at [community / showcase] workspace. +``` + +## Pipelines + +Run `tw pipelines -h` to view the list of supported operations. + +Pipelines define pre-configured workflows in a workspace. A pipeline consists of a workflow repository, launch parameters, and a compute environment. + +### Add a pipeline + +Run `tw pipelines add -h` to view the required and optional fields for adding your pipeline. + +Add a pre-configured pipeline to the Launchpad: + +```bash +tw pipelines add --name=my_rnaseq_nf_pipeline \ +--params-file=my_rnaseq_nf_pipeline_params.yaml \ +--config= \ +https://github.com/nextflow-io/rnaseq-nf + +New pipeline 'my_rnaseq_nf_pipeline' added at user workspace +``` + +The optional `--params-file` flag is used to pass a set of default parameters that will be associated with the pipeline in the Launchpad. + +The optional `--config` flag is used to pass a custom Nextflow configuration file — configuration values passed here override the same values in the default pipeline repository `nextflow.conf` file. When this flag is set, all configuration values specified in Platform pipeline or compute environment **Nextflow config** fields are ignored. + +:::tip +The `params-file` or `--config` file must be a YAML or JSON file using [Nextflow configuration][nextflow-config] syntax. +::: + +### Import and export a pipeline + +Export the configuration details of a pipeline in JSON format for scripting and reproducibility purposes. + +```bash +tw pipelines export --name=my_rnaseq_nf_pipeline my_rnaseq_nf_pipeline_v1.json + + Pipeline exported into 'my_rnaseq_nf_pipeline_v1.json' +``` + +Similarly, a pipeline can be imported to a workspace from a previously exported JSON file. + +```bash +tw pipelines import --name=my_rnaseq_nf_pipeline_v1 ./my_rnaseq_nf_pipeline_v1.json + + New pipeline 'my_rnaseq_nf_pipeline_v1' added at user workspace +``` + +### Update a pipeline + +The default launch parameters can be changed with the `update` command: + +```bash +tw pipelines update --name=my_rnaseq_nf_pipeline \ +--params-file=my_rnaseq_nf_pipeline_params_2.yaml +``` + +## Launch pipelines + +Run `tw launch -h` to view supported launch options. + +### Launch a preconfigured pipeline + +If no custom parameters are passed via the CLI during launch, the defaults set for the pipeline in the Launchpad will be used. + +:::note +tw CLI users are bound to the same user permissions that apply in the Platform UI. Launch users can launch pre-configured pipelines in the workspaces they have access to, but they cannot add or run new pipelines. +::: + +```bash +tw launch my_rnaseq_nf_pipeline \ +--config= \ + + + Workflow 1XCXxX0vCX8xhx submitted at user workspace. + + https://cloud.seqera.io/user/user1/watch/1XCXxX0vCX8xhx +``` + +The optional `--config` flag is used to pass a custom Nextflow configuration file — configuration values passed here override the same values in the default pipeline repository `nextflow.conf` file. When this flag is set, all configuration values specified in Platform pipeline or compute environment **Nextflow config** fields are ignored. + +When using `--wait`, `tw` can exit with one of two exit codes: + +- `0`: When the run reaches the desired state. +- `1`: When the run reaches a state that makes it impossible to reach the desired state. + +:::tip +Use `--wait=SUCCEEDED` if you want the command to wait until the pipeline execution is complete. +::: + +### Launch a pipeline with custom parameters + +To specify custom parameters during pipeline launch, specify a custom `--params-file`: + +```bash +tw launch my_rnaseq_nf_pipeline --params-file=my_rnaseq_nf_pipeline_params_2.yaml + + Workflow 2XDXxX0vCX8xhx submitted at user workspace. + + https://cloud.seqera.io/user/user1/watch/2XDXxX0vCX8xhx +``` + +See [Nextflow configuration][nextflow-config] for more information. + +### Launch an unsaved pipeline + +The CLI can directly launch pipelines that have not been added to the Launchpad in a Platform workspace by using the full pipeline repository URL: + +```bash +tw launch https://github.com/nf-core/rnaseq \ +--params-file=./custom_rnaseq_params.yaml \ +--config= \ +--compute-env=my_aws_ce --revision 3.8.1 \ +--profile=test,docker + + Workflow 2XDXxX0vCX8xhx submitted at user workspace. + + https://cloud.seqera.io/user/user1/watch/2XDXxX0vCX8xhx +``` + +- Pipeline parameters are defined within the `custom_rnaseq_params.yaml` file. +- The optional `--config` flag is used to pass a custom Nextflow configuration file — configuration values passed here override the same values in the default pipeline repository `nextflow.conf` file. When this flag is set, all configuration values specified in Platform pipeline or compute environment **Nextflow config** fields are ignored. +- Other parameters such as `--profile` and `--revision` can also be specified. +- A non-primary compute environment can be used to launch the pipeline. Omit `--compute-env` to launch with the workspace default compute environment. + +:::note +CLI users are bound to the same user permissions that apply in the Platform UI. Launch users can launch pre-configured pipelines in the workspaces they have access to, but they cannot add or run new pipelines. +::: + +## Runs + +Run `tw runs -h` to view supported runs operations. + +Runs display all the current and previous pipeline runs in the specified workspace. Each new or resumed run is given a random name such as _grave_williams_ by default, which can be overridden with a custom value at launch. See [Run details](../monitoring/run-details) for more information. As a run executes, it can transition through the following states: + +- `submitted`: Pending execution +- `running`: Running +- `succeeded`: Completed successfully +- `failed`: Successfully executed, where at least one task failed with a terminate [error strategy](https://www.nextflow.io/docs/latest/process.html#errorstrategy) +- `cancelled`: Stopped manually during execution +- `unknown`: Indeterminate status + +### View pipeline's runs + +Run `tw runs view -h` to view all the required and optional fields for viewing a pipeline's runs. + +```bash +tw runs view -i 2vFUbBx63cfsBY -w seqeralabs/showcase + + Run at [seqeralabs / showcase] workspace: + + + General + ---------------------+------------------------------------------------- + ID | 2vFUbBx63cfsBY + Operation ID | b5d55384-734e-4af0-8e47-0d3abec71264 + Run name | adoring_brown + Status | SUCCEEDED + Starting date | Fri, 31 May 2024 10:38:30 GMT + Commit ID | b89fac32650aacc86fcda9ee77e00612a1d77066 + Session ID | 9365c6f4-6d79-4ca9-b6e1-2425f4d957fe + Username | user1 + Workdir | s3://seqeralabs-showcase/scratch/2vFUbBx63cfsBY + Container | No container was reported + Executors | awsbatch + Compute Environment | seqera_aws_ireland_fusionv2_nvme + Nextflow Version | 23.10.1 + Labels | star_salmon,yeast +``` + +### List runs + +Run `tw runs list -h` to view all the required and optional fields for listing runs in a workspace. + +```bash +tw runs list + + Pipeline runs at [seqeralabs / testing] workspace: + + ID | Status | Project Name | Run Name | Username | Submit Date + ----------------+-----------+----------------------------+---------------------------------+-----------------------+------------------------------- + 49Gb5XVMud2e7H | FAILED | seqeralabs/nf-aggregate | distraught_archimedes | user1 | Fri, 31 May 2024 16:22:10 GMT + 4anNFvTUwRFDp | SUCCEEDED | nextflow-io/rnaseq-nf | nasty_kilby | user1 | Fri, 31 May 2024 15:23:12 GMT + 3wo3Kfni6Kl3hO | SUCCEEDED | nf-core/proteinfold | reverent_linnaeus | user2 | Fri, 31 May 2024 15:22:38 GMT + + + + 4fIRrFgZV3eDb1 | FAILED | nextflow-io/hello | gigantic_lichterman | user1 | Mon, 29 Apr 2024 08:44:47 GMT + cHEdKBXmdoQQM | FAILED | mathysgrapotte/stimulus | mighty_poitras | user3 | Mon, 29 Apr 2024 08:08:52 GMT +``` + +Use the optional `--filter` flag to filter the list of runs returned by one or more `keyword:value` entries: + +- `status` +- `label` +- `workflowId` +- `runName` +- `username` +- `projectName` +- `after` +- `before` +- `sessionId` +- `is:starred` + +If no `keyword` is defined, the filtering is applied to the `runName`, `projectName` (the pipeline name), and `username`. + +:::note +The `after` and `before` flags require an [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) timestamp with UTC timezone (`YYYY-MM-DDThh:mm:ss.sssZ`). +::: + +```bash +tw runs list --filter hello_slurm_20240530 + + Pipeline runs at [seqeralabs / showcase] workspace: + + ID | Status | Project Name | Run Name | Username | Submit Date + ---------------+-----------+-------------------+--------------------------------------+------------+------------------------------- + pZeJBOLtIvP7R | SUCCEEDED | nextflow-io/hello | hello_slurm_20240530_e75584566f774e7 | user1 | Thu, 30 May 2024 09:12:51 GMT +``` + +Multiple filter criteria can be defined: + +```bash +tw runs list --filter="after:2024-05-29T00:00:00.000Z before:2024-05-30T00:00:00.000Z username:user1" + + Pipeline runs at [seqeralabs / testing] workspace: + + ID | Status | Project Name | Run Name | Username | Submit Date + ----------------+-----------+-----------------------+--------------------+-------------+------------------------------- + xJvK95W6YUmEz | SUCCEEDED | nextflow-io/rnaseq-nf | ondemand2 | user1 | Wed, 29 May 2024 20:35:28 GMT + 1c1ckn9a3j0xF0 | SUCCEEDED | nextflow-io/rnaseq-nf | fargate | user1 | Wed, 29 May 2024 20:28:02 GMT + 3sYX1acJ01T7rL | SUCCEEDED | nextflow-io/rnaseq-nf | min1vpcu-spot | user1 | Wed, 29 May 2024 20:27:47 GMT + 4ZYJGWJCttXqXq | SUCCEEDED | nextflow-io/rnaseq-nf | min1cpu-ondemand | user1 | Wed, 29 May 2024 20:25:21 GMT + 4LCxsffTqf3ysT | SUCCEEDED | nextflow-io/rnaseq-nf | lonely_northcutt | user1 | Wed, 29 May 2024 20:09:51 GMT + 4Y8EcyopNiYBlJ | SUCCEEDED | nextflow-io/rnaseq-nf | fargate | user1 | Wed, 29 May 2024 18:53:47 GMT + dyKevNwxK50XX | SUCCEEDED | mark814/nr-test | cheeky_cuvier | user1 | Wed, 29 May 2024 12:21:10 GMT + eS6sVB5A387aR | SUCCEEDED | mark814/nr-test | evil_murdock | user1 | Wed, 29 May 2024 12:11:08 GMT +``` + +A leading and trailing `*` wildcard character is supported: + +```bash +tw runs list --filter="*man/rnaseq-*" + + Pipeline runs at [seqeralabs / testing] workspace: + + ID | Status | Project Name | Run Name | Username | Submit Date + ----------------+-----------+---------------------+---------------------+----------------+------------------------------- + 5z4AMshti4g0GK | SUCCEEDED | robnewman/rnaseq-nf | admiring_darwin | user1 | Tue, 16 Jan 2024 19:56:29 GMT + 62LqiS4O4FatSy | SUCCEEDED | robnewman/rnaseq-nf | cheeky_yonath | user1 | Wed, 3 Jan 2024 12:36:09 GMT + 3k2nu8ZmcBFSGv | SUCCEEDED | robnewman/rnaseq-nf | compassionate_jones | user3 | Tue, 2 Jan 2024 16:22:26 GMT + 3zG2ggf5JsniNW | SUCCEEDED | robnewman/rnaseq-nf | fervent_payne | user1 | Wed, 20 Dec 2023 23:55:17 GMT + 1SNIcSXRuJMSNZ | SUCCEEDED | robnewman/rnaseq-nf | curious_babbage | user3 | Thu, 28 Sep 2023 17:48:04 GMT + 5lI2fZUZfiokBI | SUCCEEDED | robnewman/rnaseq-nf | boring_heisenberg | user2 | Thu, 28 Sep 2023 12:29:27 GMT + 5I4lsRXIHVEjNB | SUCCEEDED | robnewman/rnaseq-nf | ecstatic_ptolemy | user2 | Wed, 27 Sep 2023 22:06:19 GMT +``` + +### Relaunch run + +Run `tw runs relaunch -h` to view all the required and optional fields for relaunching a run in a workspace. + +### Cancel a run + +Run `tw runs cancel -h` to view all the required and optional fields for canceling a run in a workspace. + +### Manage labels for runs + +Run `tw runs labels -h` to view all the required and optional fields for managing labels for runs in a workspace. + +In the example below, we add the labels `test` and `rnaseq-demo` to the run with ID `5z4AMshti4g0GK`: + +```bash +tw runs labels -i 5z4AMshti4g0GK test,rnaseq-demo + +'set' labels on 'run' with id '5z4AMshti4g0GK' at 34830707738561 workspace +``` + +### Delete a run + +Run `tw runs delete -h` to view all the required and optional fields for deleting a run in a workspace. + +### Dump all logs and details of a run + +Run `tw runs dump -h` to view all the required and optional fields for dumping all logs and details of a run in a workspace. The supported formats are `.tar.xz` and `.tar.gz`. In the example below, we dump all the logs and details for the run with ID `5z4AMshti4g0GK` to the output file `file.tar.gz`. + +```bash +tw runs dump -i 5z4AMshti4g0GK -o file.tar.gz +- Tower info +- Workflow details +- Task details + + Pipeline run '5z4AMshti4g0GK' at [seqeralabs / testing] workspace details dump at 'file.tar.gz' +``` + +## Labels + +Run `tw labels -h` to view supported label operations. + +Manage labels and resource labels. + +### Add a label + +Run `tw labels add -h` to view the required and optional fields for adding a label. + +:::note +[Resource labels](../resource-labels/overview) consist of a `name=value` pair. +[Labels](../labels/overview) require only a name. +::: + +```bash +tw labels add -n Label1 -w DocTestOrg2/Testing -v Value1 + +Label 'Label1=Value1' added at 'DocTestOrg2/Testing' workspace with id '268741348267491' +``` + +### List labels + +Run `tw labels list -h` to view the optional fields for filtering labels. + +```bash +tw labels list + +Labels at 97652229034604 workspace: + + ID | Name | Value | Type + -----------------+------------------------+-----------+---------- + 116734717739444 | manual-fusion-amd64 | | Normal + 120599302764779 | test-with-prefix | | Normal + 128477232893714 | manual-fusion-arm64 | | Normal + 214201679620273 | test-config-link | | Normal + 244634136444435 | manual-nonfusion-amd64 | | Normal + 9184612610501 | Resource1 | Value1 | Resource +``` + +### Update a label + +Run `tw labels update -h` to view the required and optional fields for updating labels. + +### Delete a label + +Run `tw labels delete -h` to view the required and optional fields for deleting labels. + +```bash +tw labels delete -i 203879852150462 + +Label '203879852150462' deleted at '97652229034604' workspace +``` + +## Data links + +Run `tw data-links -h` to view supported data link operations. + +Data links allow you to work with public and private cloud storage buckets in [Data Explorer](../data/data-explorer) in the specified workspace. AWS S3, Azure Blob Storage, and Google Cloud Storage are supported. The full list of operations are: + +- `list`: List data links in a workspace +- `add`: Add a custom data link to a workspace +- `update`: Update a custom data link in a workspace +- `delete`: Delete a custom data link from a workspace +- `browse`: Browse the contents of a data link in a workspace + +### List data links + +Run `tw data-links list -h` to view all the optional fields for listing data links in a workspace. + +If a workspace is not defined, the `TOWER_WORKSPACE_ID` workspace is used by default. Data links can be one of two types: + +- `v1-cloud-`: Cloud data links auto-discovered using credentials attached to the workspace. +- `v1-user-`: Custom data links created by users. + +```bash +tw data-links list -w seqeralabs/showcase + +Data links at [seqeralabs / showcase] workspace: + +ID | Provider | Name | Resource ref | Region +-------------------------------------------+----------+--------------------------------+-----------------------------------------------------------------+----------- +v1-cloud-833bb845bd9ec1970c4a7b0bb7b8c4ad | aws | e2e-data-explorer-tests-aws | s3://e2e-data-explorer-tests-aws | eu-west-2 +v1-cloud-60700a33ec3fae68d424cf948fa8d10c | aws | nf-tower-bucket | s3://nf-tower-bucket | eu-west-1 +v1-user-09705781697816b62f9454bc4b9434b4 | aws | vscode-analysis-demo | s3://seqera-development-permanent-bucket/studios-demo/vscode/ | eu-west-2 +v1-user-0dede00fabbc4b9e2610261822a2d6ae | aws | seqeralabs-showcase | s3://seqeralabs-showcase | eu-west-1 +v1-user-171aa8801cabe4af71500335f193d649 | aws | projectA-rnaseq-analysis | s3://seqeralabs-showcase/demo/nf-core-rnaseq/ | eu-west-1 + + + +v1-user-bb4fa9625a44721510c47ac1cb97905b | aws | genome-in-a-bottle | s3://giab | us-east-1 +v1-user-e7bf26921ba74032bd6ae1870df381fc | aws | NCBI_Sequence_Read_Archive_SRA | s3://sra-pub-src-1/ | us-east-1 + +Showing from 0 to 99 from a total of 16 entries. +``` + +### Add a custom data link + +Run `tw data-links add -h` to view all the required and optional fields for adding a custom data link to a workspace. + +Users with the workspace `MAINTAIN` role and above can add custom data links. The data link `name`, `uri`, and `provider` (`aws`, `azure`, or `google`) fields are required. If adding a custom data link for a private bucket, the credentials identifier field is also required. Adding a custom data link for a public bucket doesn't require credentials. + +```bash +tw data-links add -w seqeralabs/showcase -n FOO -u az://seqeralabs.azure-benchmarking \ +-p azure -c seqera_azure_credentials + +Data link created: + +ID | Provider | Name | Resource ref | Region +------------------------------------------+----------+------+------------------------------------+-------- +v1-user-152116183ee325463901430bb9efb8c9 | azure | FOO | az://seqeralabs.azure-benchmarking | +``` + +### Update a custom data link + +Run `tw data-links update -h` to view all the required and optional fields for updating a custom data link in a workspace. Users with the `MAINTAIN` role and above for a workspace can update custom data links. + +```bash +tw data-links update -w seqeralabs/showcase -i v1-user-152116183ee325463901430bb9efb8c9 -n BAR + +Data link updated: + +ID | Provider | Name | Resource ref | Region +------------------------------------------+----------+------+------------------------------------+-------- +v1-user-152116183ee325463901430bb9efb8c9 | azure | BAR | az://seqeralabs.azure-benchmarking | +``` + +### Delete a custom data link + +Run `tw data-links delete -h` to view all the required and optional fields for deleting a custom data link from a workspace. + +Users with the `MAINTAIN` role and above for a workspace can delete custom data links. + +```bash +tw data-links delete -w seqeralabs/showcase -i v1-user-152116183ee325463901430bb9efb8c9 + +Data link 'v1-user-152116183ee325463901430bb9efb8c9' deleted at '138659136604200' workspace. +``` + +### Browse data link contents + +Run `tw data-links browse -h` to view all the required and optional fields for browsing a data link in a workspace. + +Define the data link ID using the required `-i` or `--id` argument, which can be found by first using the list operation for a workspace. In the example below, a name is defined to only retrieve data links with names that start with the given word: + +```bash +tw data-links list -w seqeralabs/showcase -n 1000genomes + +Data links at [seqeralabs / showcase] workspace: + +ID | Provider | Name | Resource ref | Region +------------------------------------------+----------+-------------+------------------+----------- +v1-user-6d8f44c239e2a098b3e02e918612452a | aws | 1000genomes | s3://1000genomes | us-east-1 + + Showing from 0 to 99 from a total of 1 entries. + +tw data-links browse -w seqeralabs/showcase -i v1-user-6d8f44c239e2a098b3e02e918612452a + + Content of 's3://1000genomes' and path 'null': + +Type | Name | Size +--------+--------------------------------------------+---------- +FILE | 20131219.populations.tsv | 1663 +FILE | 20131219.superpopulations.tsv | 97 +FILE | CHANGELOG | 257098 +FILE | README.alignment_data | 15977 +FILE | README.analysis_history | 5289 +FILE | README.complete_genomics_data | 5967 +FILE | README.crams | 563 +FILE | README.ebi_aspera_info | 935 +FILE | README.ftp_structure | 8408 +FILE | README.pilot_data | 2082 +FILE | README.populations | 1938 +FILE | README.sequence_data | 7857 +FILE | README_missing_files_20150612 | 672 +FILE | README_phase3_alignments_sequence_20150526 | 136 +FILE | README_phase3_data_move_20150612 | 273 +FILE | alignment.index | 3579471 +FILE | analysis.sequence.index | 54743580 +FILE | exome.alignment.index | 3549051 +FILE | sequence.index | 67069489 +FOLDER | 1000G_2504_high_coverage/ | 0 +FOLDER | alignment_indices/ | 0 +FOLDER | changelog_details/ | 0 +FOLDER | complete_genomics_indices/ | 0 +FOLDER | data/ | 0 +FOLDER | hgsv_sv_discovery/ | 0 +FOLDER | phase1/ | 0 +FOLDER | phase3/ | 0 +FOLDER | pilot_data/ | 0 +FOLDER | release/ | 0 +FOLDER | sequence_indices/ | 0 +FOLDER | technical/ | 0 +``` + +## Organizations + +Run `tw organizations -h` to view supported workspace operations. + +Organizations are the top-level structure and contain workspaces, members, and teams. You can also add external collaborators to an organization. See [Organization management](../orgs-and-teams/organizations) for more information. + +### Add an organization + +Run `tw organizations add -h` to view the required and optional fields for adding your workspace. + +```bash +tw organizations add -n TestOrg2 -f 2nd\ Test\ Organization\ LLC -l RSA + +Organization 'TestOrg2' with ID '204336622618177' was added +``` + +## Members + +Run `tw members -h` to view supported member operations. + +Manage organization members. Organization membership management requires organization `OWNER` permissions. + +### List members + +Run `tw members list -h` view all the optional fields for listing organization members. + +```bash +tw members list -o TestOrg2 + +Members for TestOrg2 organization: + + ID | Username | Email | Role + -----------------+----------------------+---------------------------------+-------- + 200954501314303 | user1 | user1@domain.com | MEMBER + 277776534946151 | user2 | user2@domain.com | MEMBER + 243277166855716 | user3 | user3@domain.com | OWNER + +``` + +### Add a member + +Run `tw members add -h` view all the required and optional fields for adding organization members. + +```bash +tw members add -u user1@domain.com -o DocTestOrg2 + +Member 'user1' with ID '134534064600266' was added in organization 'TestOrg2' +``` + +### Delete a member + +Run `tw members delete -h` view all the required and optional fields for deleting organization members. + +```bash +tw members delete -u user1 -o TestOrg2 + +Member 'user1' deleted from organization 'TestOrg2' +``` + +### Update member role + +Run `tw members update -h` view all the required and optional fields for updating organization members. + +```bash +tw members update -u user1 -r OWNER -o TestOrg2 + +Member 'user1' updated to role 'owner' in organization 'TestOrg2' +``` + +### Leave an organization + +Run `tw members leave -o ` to be removed from the given organization's members. + + +## Workspaces + +Run `tw workspaces -h` to view supported workspace operations. + +Workspaces provide the context in which a user launches workflow executions, defines the available resources, and manages who can access those resources. Workspaces contain pipelines, runs, actions, datasets, compute environments, credentials, and secrets. Access permissions are controlled with participants, collaborators, and teams. + +See [User workspaces][user-workspaces] for more information. + +### Add a workspace + +:::note +Workspace management operations require organization `OWNER` permissions. +::: + +Run `tw workspaces add -h` to view the required and optional fields for adding your workspace. + +In the example below, we create a shared workspace to be used for sharing pipelines with other private workspaces. See [Shared workspaces][shared-workspaces] for more information. + +```bash +tw workspaces add --name=shared-workspace --full-name=shared-workspace-for-all --org=my-tower-org --visibility=SHARED + + A 'SHARED' workspace 'shared-workspace' added for 'my-tower-org' organization +``` + +:::note +By default, a workspace is set to private when created. +::: + +### List workspaces + +List all the workspaces in which you are a participant: + +```bash +tw workspaces list + + Workspaces for default user: + + Workspace ID | Workspace Name | Organization Name | Organization ID + -----------------+------------------+-------------------+----------------- + 26002603030407 | shared-workspace | my-tower-org | 04303000612070 +``` + +## Participants + +Run `tw participants -h` to view supported participant operations. + +Manage workspace participants. + +:::note +The operations listed below require workspace `OWNER` or `ADMIN` permissions. +::: + +### List participants + +```bash +tw participants list + + Participants for 'my-tower-org/shared-workspace' workspace: + + ID | Participant Type | Name | Workspace Role + ----------------+------------------+-----------------------------+---------------- + 45678460861822 | MEMBER | user (user@mydomain.com) | owner +``` + +### Add participants + +Run `tw participants add -h` to view the required and optional fields for adding a participant. + +To add a new _collaborator_ to the workspace, use the `add` subcommand. The default role assigned to a _collaborator_ is `Launch`. + +See [Participant roles][participant-roles] for more information. + +```bash +tw participants add --name=collaborator@mydomain.com --type=MEMBER + + User 'collaborator' was added as participant to 'shared-workspace' workspace with role 'launch' +``` + +### Update participant roles + +To update the role of a _Collaborator_ to `ADMIN` or `MAINTAIN`, use the `update` subcommand: + +```bash +tw participants update --name=collaborator@mydomain.com --type=COLLABORATOR --role=MAINTAIN + + Participant 'collaborator@mydomain.com' has now role 'maintain' for workspace 'shared-workspace' +``` + +## Teams + +Run `tw teams -h` to view supported team operations. + +Manage organization teams. + +:::note +Team management operations require organization `OWNER` permissions. +::: + +### List teams + +Run `tw teams list -h` to view the required and optional fields for listing teams. + +```bash +tw teams list -o TestOrg2 + +Teams for TestOrg2 organization: + + Team ID | Team Name | Members Count Name + ----------------+-----------+-------------------- + 84866234211969 | Testing | 1 +``` + +### Add a team + +Run `tw teams add -h` to view the required and optional fields for creating a team. + +```bash +tw teams add -n team1 -o TestOrg2 -d testing + +A 'team1' team added for 'TestOrg2' organization +``` + +### Delete a team + +```bash +tw teams delete -i 169283393825479 -o TestOrg2 + +Team '169283393825479' deleted for TestOrg2 organization +``` + +### Manage team members + +Run `tw teams members -h` to view the fields and additional commands for managing team members. + +To add a new team member, include an existing username or new user email: + +```bash +tw teams members -t Testing -o TestOrg2 add -m user1@domain.com + +Member 'user1' added to team 'Testing' with id '243206491381406' +``` + +To delete a team member, include the member's username: + +```bash +tw teams members -t Testing -o TestOrg2 delete -m user1 + +Team member 'user1' deleted at 'Testing' team +``` + +## Collaborators + +Run `tw collaborators -h` view all the required and optional fields for managing organization collaborators. + +Manage organization collaborators. + +### List collaborators + +```bash +tw collaborators list -o seqeralabs + +Collaborators for 88848180287xxx organization: + + ID | Username | Email + -----------------+----------------------+-------------------- + 13136942731xxx | external_user1 | user1@domain.com + 127726720173xxx | external_user2 | user2@domain.com + 59151157784xxx | external_user3 | user3@domain.com + 132868466675xxx | external_user4 | user4@domain.com + 178756942629xxx | external_user5 | user5@domain.com +``` + +## Actions + +Run `tw actions -h` to view supported pipeline action operations. + +[Actions](../pipeline-actions/overview) enable event-based pipeline execution, such as triggering a pipeline launch with a GitHub webhook whenever the pipeline repository is updated. + +### Add a pipeline action + +Run `tw actions add -h` to view the required and optional fields for adding an action. + +## Secrets + +Run `tw secrets -h` to view supported workspace secret operations. + +[Secrets](../secrets/overview) are used to store the keys and tokens used by workflow tasks to interact with external systems, such as a password to connect to an external database or an API token. + +### Add a workspace secret + +Run `tw secrets add -h` to view the required and optional fields for adding a secret. + + +[compute-envs]: ../compute-envs/overview +[credentials]: ../credentials/overview +[git-integration]: ../git/overview +[nextflow-config]: https://www.nextflow.io/docs/latest/config.html#config-syntax +[participant-roles]: ../orgs-and-teams/roles +[shared-workspaces]: ../orgs-and-teams/workspace-management +[tower-agent]: ../supported_software/agent/overview +[user-workspaces]: ../orgs-and-teams/workspace-management +[wave-docs]: https://docs.seqera.io/wave diff --git a/platform-enterprise/cli/installation.md b/platform-enterprise/cli/installation.md new file mode 100644 index 000000000..00bf35030 --- /dev/null +++ b/platform-enterprise/cli/installation.md @@ -0,0 +1,169 @@ +--- +title: "Installation" +description: "Seqera Platform CLI installation and configuration instructions." +date: "16 Jul 2024" +tags: [cli, platform, installation, configuration] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +1. Download the latest [version][releases] for your OS from the CLI GitHub repository. +1. Rename the file and and make it executable: + + ```bash + mv tw-* tw + chmod +x ./tw + ``` + +1. Move the file to a directory accessible to your `$PATH` variable: + + ```bash + sudo mv tw /usr/local/bin/ + ``` + +### Configuration + +The CLI requires an access token to interact with your Seqera Platform instance. Select **User tokens** from the user menu in the [Platform UI](https://cloud.seqera.io), then select **Add token** to create a new token. + +Copy the access token value and use it with the CLI in one of two ways: + +- **Environment variable**: + + 1. Export the token as a shell variable directly into your terminal: + + ```bash + export TOWER_ACCESS_TOKEN= + ``` + + 2. Add the `export` command to your `.bashrc`, `.zshrc`, or `.bash_profile` file for it to be permanently added to your environment. + +- **tw command flag**: + + Provide the access token directly in your `tw` command with `--access-token`: + + ```bash + tw --access-token= + ``` + +If required, configure the following optional environment variables using the same methods above: + +- `TOWER_WORKSPACE_ID`: Workspace ID. Default: Your user workspace. +- `TOWER_API_ENDPOINT`: Seqera API URL. Default: `api.cloud.seqera.io`. + +:::tip +Find your `TOWER_WORKSPACE_ID` from the **Workspaces** tab on your organization page. Alternatively, list all the workspaces your token can access with `tw workspaces list` and copy the workspace ID from the command output. +::: + +### Health check + +Confirm the installation, configuration, and connection: + +```bash +tw info + + Details + -------------------------+---------------------- + Tower API endpoint | + Tower API version | 1.25.0 + Tower version | 24.2.0_cycle22 + CLI version | 0.9.4 (f3e846e) + CLI minimum API version | 1.15 + Authenticated user | + + System health status + ---------------------------------------+------------------ + Remote API server connection check | OK + Tower API version check | OK + Authentication API credential's token | OK +``` + +### Commands + +See [Commands](./commands) for detailed instructions to use the CLI. + +### Autocompletion + +Activate autocompletion in your current session with this command: + +```bash +source <(tw generate-completion) +``` + +### Custom SSL certificate authority store + +If you are using a Private CA SSL certificate not recognized by the default Java certificate authorities, use a [custom](https://www.baeldung.com/jvm-certificate-store-errors) `cacerts` store: + +```bash +tw -Djavax.net.ssl.trustStore=/absolute/path/to/cacerts -Djavax.net.ssl.trustStorePassword= info +``` + +Replace `` with your keystore password. If you did not set a password when creating the keystore, include the default keystore password `changeit` in the command above. + +You can also rename the binary to `tw-binary` and create a `tw` script to automatically include the custom `cacerts` store in every session: + +```bash +#!/usr/bin/env bash +tw-binary -Djavax.net.ssl.trustStore=/absolute/path/to/cacerts -Djavax.net.ssl.trustStorePassword= $@ +``` + +### Build binary development versions + +tw CLI is a platform binary executable created by a native compilation from Java GraalVM. To compile and build a development version of the binary: + +1. If necessary, install [SDKMan!](https://sdkman.io/) +1. From the root of the tower-cli project, install GraalVM: + + ```bash + sdk env install + ``` + + This ensures that SDKMan uses the tower-cli project-specific `.sdkmanrc` configuration. + +1. Install `native-image`: + + ```bash + gu install native-image + ``` + +1. Export your Github credentials. Github requires authentication for public packages (the token only requires the `read:packages` scope): + + ```bash + export GITHUB_USERNAME=... + export GITHUB_TOKEN=... + ``` + +1. Create the native client: + + ```bash + ./gradlew nativeCompile + ``` + + This will install a locally compiled version of `tw` in the nativeCompile directory: + + ```bash + Produced artifacts: + /build/native/nativeCompile/tw (executable) + ======================================================================================================================== + Finished generating 'tw' in 1m 6s. + [native-image-plugin] Native Image written to: /build/native/nativeCompile + + BUILD SUCCESSFUL in 1m 8s + 6 actionable tasks: 2 executed, 4 up-to-date + ``` + +1. Run `tw`: + + ```bash + ./build/native/nativeCompile/tw + ``` + +### Non-binary development versions + +Run a non-binary development version by executing the [`./tw`](https://github.com/seqeralabs/tower-cli/blob/master/tw) script in the root of the CLI repository. + +### License + +[Mozilla Public License v2.0](https://github.com/seqeralabs/tower-cli/blob/master/LICENSE.txt) + +[releases]: https://github.com/seqeralabs/tower-cli/releases \ No newline at end of file diff --git a/platform-enterprise/cli/overview.md b/platform-enterprise/cli/overview.md new file mode 100644 index 000000000..910ac388b --- /dev/null +++ b/platform-enterprise/cli/overview.md @@ -0,0 +1,29 @@ +--- +title: "Overview" +description: "Seqera Platform CLI overview." +date: "24 Apr 2023" +tags: [cli, tower] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +Seqera Platform CLI brings concepts like pipelines and compute environments to the terminal. + +The CLI interacts with Platform to provide an interface to launch pipelines, manage cloud resources, and administer your analysis. + +![tw](./_images/tw-screenshot.png) + +## Key features + +- **A Nextflow-like experience**: tw CLI provides a developer-friendly environment. Pipelines can be launched with the CLI similarly to Nextflow but with the Platform benefits of monitoring, logging, resource provisioning, dataset management, and collaborative sharing. + +- **Infrastructure as Code**: All Platform resources, including pipelines and compute environments, can be described in a declarative manner. This enables a complete definition of an analysis environment that can be versioned and treated as code. It greatly simplifies configuration sharing and routine administration. + +- **Built on OpenAPI**: tw CLI interacts with Platform via the [Seqera Platform API](https://cloud.seqera.io/openapi/index.html) which uses the OpenAPI 3.0 specification. The CLI provides full control of the Platform application, allowing users to get maximum insights into pipeline submissions and execution environments. + +## Availability + +The CLI can be installed on macOS, Windows, and Linux. It is compatible with [Seqera Platform Cloud](https://cloud.seqera.io/) and Enterprise versions 21.08 and later. + +See [Installation](./installation) to get started. diff --git a/platform-enterprise/compute-envs/aws-batch.md b/platform-enterprise/compute-envs/aws-batch.md new file mode 100644 index 000000000..23ad862c3 --- /dev/null +++ b/platform-enterprise/compute-envs/aws-batch.md @@ -0,0 +1,380 @@ +--- +title: "AWS Batch" +description: "Instructions to set up AWS Batch in Seqera Platform" +date: "21 Apr 2023" +tags: [aws, batch, compute environment] +--- + +:::tip +This guide assumes you have an existing [Amazon Web Service (AWS)](https://aws.amazon.com/) account. + +The AWS Batch service quota for job queues is 50 per account. For more information on AWS Batch service quotas, see [AWS Batch service quotas](https://docs.aws.amazon.com/batch/latest/userguide/service_limits.html). +::: + +There are two ways to create a Seqera Platform compute environment for AWS Batch: + +- [**Batch Forge**](#batch-forge): This option automatically creates the AWS Batch resources in your AWS account. This eliminates the need to set up your AWS Batch infrastructure manually. +- [**Manual**](#manual): This option allows Seqera to use existing AWS Batch resources. + +## Batch Forge + +Batch Forge automates the configuration of an [AWS Batch](https://aws.amazon.com/batch/) compute environment and the queues required for deploying Nextflow pipelines. + +:::caution +Batch Forge automatically creates resources that you may be charged for in your AWS account. See [Cloud costs](../monitoring/cloud-costs) for guidelines to manage cloud resources effectively and prevent unexpected costs. +::: + +### IAM + +Batch Forge requires an Identity and Access Management (IAM) user with the permissions listed in [this policy file](https://github.com/seqeralabs/nf-tower-aws/blob/master/forge/forge-policy.json). These authorizations are more permissive than those required to only [launch](https://github.com/seqeralabs/nf-tower-aws/blob/master/launch/launch-policy.json) a pipeline, since Seqera needs to manage AWS resources on your behalf. Note that launch permissions also require the S3 storage write permissions in [this policy file](https://github.com/seqeralabs/nf-tower-aws/blob/master/launch/s3-bucket-write.json). + +We recommend that you create separate IAM policies for Batch Forge and launch permissions using the policy files above. These policies can then be assigned to the Seqera IAM user. + +**Create Seqera IAM policies** + +1. Open the [AWS IAM console](https://console.aws.amazon.com/iam). +1. From the left navigation menu, select **Policies** under **Access management**. +1. Select **Create policy**. +1. On the **Create policy** page, select the **JSON** tab. + +1. Copy the contents of your policy JSON file ([Forge](https://github.com/seqeralabs/nf-tower-aws/blob/master/forge/forge-policy.json) or [Launch](https://github.com/seqeralabs/nf-tower-aws/blob/master/launch/launch-policy.json), depending on the policy being created) and replace the default text in the policy editor area under the JSON tab. + +1. To create a Launch user, you must also create the [S3 bucket write policy](https://github.com/seqeralabs/nf-tower-aws/blob/master/launch/s3-bucket-write.json) separately to attach to your Launch user. + +1. To use Data Explorer and Studios, you must create the [data policy](https://github.com/seqeralabs/nf-tower-aws/blob/master/data/data-explorer-policy.json) separately to attach to your Platform users. + +1. Select **Next: Tags**. +1. Select **Next: Review**. +1. Enter a name and description for the policy on the **Review policy** page, then select **Create policy**. +1. Repeat these steps for both the `forge-policy.json` and `launch-policy.json` files. For a Launch user, also create the `s3-bucket-write-policy.json` listed in step 5 above. + +**Create an IAM user** + +1. From the [AWS IAM console](https://console.aws.amazon.com/iam), select **Users** in the left navigation menu, then select **Add User** at the top right of the page. +1. Enter a name for your user (e.g., _seqera_) and select the **Programmatic access** type. +1. Select **Next: Permissions**. +1. Select **Next: Tags > Next: Review > Create User**. + :::note + For the time being, you can ignore the "user has no permissions" warning. Permissions will be applied using the **IAM Policy**. + ::: +1. Save the **Access key ID** and **Secret access key** in a secure location as you will use these when creating credentials in Seqera. +1. After you have saved the keys, select **Close**. +1. Back in the users table, select the newly created user, then select **Add permissions** under the **Permissions** tab. +1. Select **Attach existing policies**, then search for and select each of the policies created above. +1. Select **Next: Review > Add permissions**. + +### S3 Bucket + +S3 (Simple Storage Service) is a type of **object storage**. To access files and store the results for your pipelines, create an **S3 bucket** that your Seqera IAM user can access. + +**Create an S3 bucket** + +1. Navigate to the [S3 service](https://console.aws.amazon.com/s3/home). +1. Select **Create New Bucket**. +1. Enter a unique name for your bucket and select a region. + :::note + To maximize data transfer resilience and minimize cost, storage should be in the same region as compute. + ::: +1. Select the default options in **Configure options**. +1. Select the default options in **Set permissions**. +1. Review and select **Create bucket**. + +:::note +S3 is used by Nextflow for the storage of intermediate files. In production pipelines, this can amount to a lot of data. To reduce costs, consider using a retention policy when creating a bucket, such as automatically deleting intermediate files after 30 days. See [here](https://aws.amazon.com/premiumsupport/knowledge-center/s3-empty-bucket-lifecycle-rule/) for more information. +::: + +### Batch Forge compute environment + +Batch Forge automates the configuration of an [AWS Batch](https://aws.amazon.com/batch/) compute environment and the queues required to deploy Nextflow pipelines. After your IAM user and S3 bucket have been set up, create a new **AWS Batch** compute environment in Seqera. + +:::caution +Batch Forge automatically creates resources that you may be charged for in your AWS account. See [Cloud costs](../monitoring/cloud-costs) for guidelines to manage cloud resources effectively and prevent unexpected costs. +::: + +**Create a Batch Forge AWS Batch compute environment** + +1. In a workspace, select **Compute environments > New environment**. +1. Enter a descriptive name for this environment, e.g., _AWS Batch Spot (eu-west-1)_. +1. Select **AWS Batch** as the target platform. +1. From the **Credentials** drop-down, select existing AWS credentials, or select **+** to add new credentials. If you're using existing credentials, skip to step 8. + :::note + You can create multiple credentials in your Seqera environment. See [Credentials](../credentials/overview). + ::: +1. Enter a name, e.g., _AWS Credentials_. +1. Add the **Access key** and **Secret key**. These are the keys you saved previously when you created the Seqera IAM user. +1. (Optional) Under **Assume role**, specify the IAM role to be assumed by the Seqera IAM user to access the compute environment's AWS resources. + :::note + When using AWS keys without an assumed role, the associated AWS user account must have [Launch](https://github.com/seqeralabs/nf-tower-aws/tree/master/launch) and [Forge](https://github.com/seqeralabs/nf-tower-aws/tree/master/forge) permissions. When an assumed role is provided, the keys are only used to retrieve temporary credentials impersonating the role specified. In this case, [Launch](https://github.com/seqeralabs/nf-tower-aws/tree/master/launch) and [Forge](https://github.com/seqeralabs/nf-tower-aws/tree/master/forge) permissions must be granted to the role instead of the user account. + ::: +1. Select a **Region**, e.g., _eu-west-1 - Europe (Ireland)_. +1. Enter your S3 bucket path in the **Pipeline work directory** field, e.g., `s3://seqera-bucket`. This bucket must be in the same region chosen in the previous step. + :::note + When you specify an S3 bucket as your work directory, this bucket is used for the Nextflow [cloud cache](https://www.nextflow.io/docs/latest/cache-and-resume.html#cache-stores) by default. Seqera adds a `cloudcache` block to the Nextflow configuration file for all runs executed with this compute environment. This block includes the path to a `cloudcache` folder in your work directory, e.g., `s3://seqera-bucket/cloudcache/.cache`. You can specify an alternative cache location with the **Nextflow config file** field on the pipeline [launch](../launch/launchpad#launch-form) form. + ::: + :::warning + Using an EFS file system as your work directory is currently incompatible with [Studios](../studios/index), and will result in errors with checkpoints and mounted data. + ::: +1. Select **Enable Wave containers** to facilitate access to private container repositories and provision containers in your pipelines using the Wave containers service. See [Wave containers](https://www.nextflow.io/docs/latest/wave.html) for more information. +1. Select **Enable Fusion v2** to allow access to your S3-hosted data via the [Fusion v2](https://docs.seqera.io/fusion) virtual distributed file system. This speeds up most data operations. The Fusion v2 file system requires Wave containers to be enabled. See [Fusion file system](../supported_software/fusion/overview) for configuration details. + +
+ Use Fusion v2 file system + + :::note + The compute recommendations below are based on internal benchmarking performed by Seqera. Benchmark runs of [nf-core/rnaseq](https://github.com/nf-core/rnaseq) used profile `test_full`, consisting of an input dataset with 16 FASTQ files and a total size of approximately 123.5 GB. + ::: + + We recommend using Fusion with AWS NVMe instances (fast instance storage) as this delivers the fastest performance when compared to environments using only AWS EBS (Elastic Block Store). + + 1. Use Seqera Platform version 23.1 or later. + 1. Use an S3 bucket as the pipeline work directory. + 1. Enable **Wave containers**, **Fusion v2**, and **fast instance storage**. + 1. Select the **Batch Forge** config mode. + 1. Fast instance storage requires an EC2 instance type that uses NVMe disks. Specify NVMe-based instance types in **Instance types** under **Advanced options**. If left unspecified, Platform selects instances from AWS NVMe-based instance type families. See [Instance store temporary block storage for EC2 instances](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html) for more information. + + :::note + When enabling fast instance storage, do not select the `optimal` instance type families (c4, m4, r4) for your compute environment as these are not NVMe-based instances. Specify AWS NVMe-based instance types, or leave the **Instance types** field empty for Platform to select NVMe instances for you. + ::: + + :::tip + We recommend selecting 8xlarge or above for large and long-lived production pipelines: + - A local temp storage disk of at least 200 GB and a random read speed of 1000 MBps or more. To work with files larger than 100 GB, increase temp storage accordingly (400 GB or more). + - Dedicated networking ensures a guaranteed network speed service level compared with "burstable" instances. See [Instance network bandwidth](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-network-bandwidth.html) for more information. + ::: + + When using Fusion v2 without fast instance storage, the following EBS settings are applied to optimize file system performance: + + - EBS boot disk size is increased to 100 GB + - EBS boot disk type GP3 is selected + - EBS boot disk throughput is increased to 325 MB/s + + Extensive benchmarking of Fusion v2 has demonstrated that the increased cost associated with these settings are generally outweighed by the costs saved due to decreased run time. + +
+ +1. Set the **Config mode** to **Batch Forge**. +1. Select a **Provisioning model**. In most cases, this will be **Spot**. You can specify an allocation strategy and instance types under [**Advanced options**](#advanced-options). If advanced options are omitted, Seqera Platform 23.2 and later versions default to `BEST_FIT_PROGRESSIVE` for On-Demand and `SPOT_CAPACITY_OPTIMIZED` for Spot compute environments. + :::note + You can create a compute environment that launches either Spot or On-Demand instances. Spot instances can cost as little as 20% of On-Demand instances, and with Nextflow's ability to automatically relaunch failed tasks, Spot is almost always the recommended provisioning model. Note, however, that when choosing Spot instances, Seqera will also create a dedicated queue for running the main Nextflow job using a single On-Demand instance to prevent any execution interruptions. + + From Nextflow version 24.10, the default Spot reclamation retry setting changed to `0` on AWS and Google. By default, no internal retries are attempted on these platforms. Spot reclamations now lead to an immediate failure, exposed to Nextflow in the same way as other generic failures (returning for example, `exit code 1` on AWS). Nextflow will treat these failures like any other job failure unless you actively configure a retry strategy. For more information, see [Spot instance failures and retries](https://docs.seqera.io/platform/24.2/troubleshooting_and_faqs/nextflow#spot-instance-failures-and-retries-in-nextflow). + ::: +1. Enter the **Max CPUs**, e.g., `64`. This is the maximum number of combined CPUs (the sum of all instances' CPUs) AWS Batch will provision at any time. +1. Select **EBS Auto scale (deprecated)** to allow the EC2 virtual machines to dynamically expand the amount of available disk space during task execution. This feature is deprecated, and is not compatible with Fusion v2. + :::note + When you run large AWS Batch clusters (hundreds of compute nodes or more), EC2 API rate limits may cause the deletion of unattached EBS volumes to fail. You should delete volumes that remain active after Nextflow jobs have completed to avoid additional costs. Monitor your AWS account for any orphaned EBS volumes via the EC2 console, or with a Lambda function. See [here](https://aws.amazon.com/blogs/mt/controlling-your-aws-costs-by-deleting-unused-amazon-ebs-volumes/) for more information. + ::: +1. With the optional **Enable Fusion mounts (deprecated)** feature enabled, S3 buckets specified in **Pipeline work directory** and **Allowed S3 Buckets** are mounted as file system volumes in the EC2 instances carrying out the Batch job execution. These buckets can then be accessed at `/fusion/s3/`. For example, if the bucket name is `s3://imputation-gp2`, your pipeline will access it using the file system path `/fusion/s3/imputation-gp2`. **Note:** This feature has been deprecated. Consider using Fusion v2 (see above) for enhanced performance and stability. + :::note + You do not need to modify your pipeline or files to take advantage of this feature. Nextflow will automatically recognize and replace any reference to files prefixed with `s3://` with the corresponding Fusion mount paths. + ::: +1. Select **Enable Fargate for head job** to run the Nextflow head job with the [AWS Fargate](https://aws.amazon.com/fargate/) container service and speed up pipeline launch. Fargate is a serverless compute engine that enables users to run containers without the need to provision servers or clusters in advance. AWS takes a few minutes to spin up an EC2 instance, whereas jobs can be launched with Fargate in under a minute (depending on container size). We recommend Fargate for most pipeline deployments, but EC2 is more suitable for environments that use GPU instances, custom AMIs, or that require more than 16 vCPUs. If you specify a custom AMI ID in the [Advanced options](#advanced-options) below, this will not be applied to the Fargate-enabled head job. See [here](https://docs.aws.amazon.com/batch/latest/userguide/fargate.html#when-to-use-fargate) for more information on Fargate's limitations. + :::note + Fargate requires the Fusion v2 file system and a **Spot** provisioning model. Fargate is not compatible with EFS and FSx file systems. + ::: +1. Select **Enable GPUs** if you intend to run GPU-dependent workflows in the compute environment. See [GPU usage](./overview#aws-batch) for more information. + :::note + Seqera only supports NVIDIA GPUs. Select instances with NVIDIA GPUs for your GPU-dependent processes. + ::: +1. Select **Use Graviton CPU architecture** to execute on Graviton-based EC2 instances (i.e., ARM64 CPU architecture). When enabled, `m6g`, `r6g`, and `c6g` instance types are used by default for compute jobs, but 3rd-generation Graviton [instances](https://www.amazonaws.cn/en/ec2/graviton/) are also supported. You can specify your own **Instance types** under [**Advanced options**](#advanced-options). + :::note + Graviton requires Fargate, Wave containers, and Fusion v2 file system to be enabled. This feature is not compatible with GPU-based architecture. + ::: +1. Enter any additional **Allowed S3 buckets** that your workflows require to read input data or write output data. The **Pipeline work directory** bucket above is added by default to the list of **Allowed S3 buckets**. +1. To use **EFS**, you can either select **Use existing EFS file system** and specify an existing EFS instance, or select **Create new EFS file system** to create one. To use the EFS file system as your work directory, specify `/work` in the **Pipeline work directory** field (step 8 of this guide). + - To use an existing EFS file system, enter the **EFS file system id** and **EFS mount path**. This is the path where the EFS volume is accessible to the compute environment. For simplicity, we recommend that you use `/mnt/efs` as the EFS mount path. + - To create a new EFS file system, enter the **EFS mount path**. We advise that you specify `/mnt/efs` as the EFS mount path. + - EFS file systems created by Batch Forge are automatically tagged in AWS with `Name=TowerForge-`, with `` being the compute environment ID. Any manually-added resource label with the key `Name` (capital N) will override the automatically-assigned `TowerForge-` label. + :::warning + EFS file systems are compatible with [Studios](../studios/index), **except** when using the EFS file system as your **work directory**. + ::: +1. To use **FSx for Lustre**, you can either select **Use existing FSx file system** and specify an existing FSx instance, or select **Create new FSx file system** to create one. To use the FSx file system as your work directory, specify `/work` in the **Pipeline work directory** field (step 8 of this guide). + - To use an existing FSx file system, enter the **FSx DNS name** and **FSx mount path**. The FSx mount path is the path where the FSx volume is accessible to the compute environment. For simplicity, we recommend that you use `/mnt/fsx` as the FSx mount path. + - To create a new FSx file system, enter the **FSx size** (in GB) and the **FSx mount path**. We advise that you specify `/mnt/fsx` as the FSx mount path. + - FSx file systems created by Batch Forge are automatically tagged in AWS with `Name=TowerForge-`, with `` being the compute environment ID. Any manually-added resource label with the key `Name` (capital N) will override the automatically-assigned `TowerForge-` label. +1. Select **Dispose resources** to automatically delete these AWS resources if you delete the compute environment in Seqera Platform. +1. Apply [**Resource labels**](../resource-labels/overview) to the cloud resources consumed by this compute environment. Workspace default resource labels are prefilled. +1. Expand **Staging options** to include: + - Optional [pre- or post-run Bash scripts](../launch/advanced#pre-and-post-run-scripts) that execute before or after the Nextflow pipeline execution in your environment. + - Global Nextflow configuration settings for all pipeline runs launched with this compute environment. Values defined here are pre-filled in the **Nextflow config file** field in the pipeline launch form. These values can be overridden during pipeline launch. + :::info + Configuration settings in this field override the same values in the pipeline repository `nextflow.config` file. See [Nextflow config file](../launch/advanced#nextflow-config-file) for more information on configuration priority. + ::: +1. Specify custom **Environment variables** for the **Head job** and/or **Compute jobs**. +1. Configure any advanced options described in the next section, as needed. +1. Select **Create** to finalize the compute environment setup. It will take a few seconds for all the AWS resources to be created before you are ready to launch pipelines. + +:::info +See [Launch pipelines](../launch/launchpad) to start executing workflows in your AWS Batch compute environment. +::: + +### Advanced options + +Seqera Platform compute environments for AWS Batch include advanced options to configure instance types, resource allocation, custom networking, and CloudWatch and ECS agent integration. + +**Batch Forge AWS Batch advanced options** + +Specify the **Allocation strategy** and indicate any preferred **Instance types**. AWS applies quotas for the number of running and requested [Spot](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-spot-limits.html) and [On-Demand](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-on-demand-instances.html#ec2-on-demand-instances-limits) instances per account. AWS will allocate instances from up to 20 instance types, based on those requested for the compute environment. AWS excludes the largest instances when you request more than 20 instance types. + + :::note + If these advanced options are omitted, allocation strategy defaults are `BEST_FIT_PROGRESSIVE` for On-Demand and `SPOT_CAPACITY_OPTIMIZED` for Spot compute environments. + ::: + :::caution + tw CLI v0.8 and earlier does not support the `SPOT_PRICE_CAPACITY_OPTIMIZED` allocation strategy in AWS Batch. You cannot currently use CLI to create or otherwise interact with AWS Batch Spot compute environments that use this allocation strategy. + ::: + +- Configure a custom networking setup using the **VPC ID**, **Subnets**, and **Security groups** fields. +- You can specify a custom **AMI ID**. + + :::note + From version 24.2, Seqera supports Amazon Linux 2023 ECS-optimized AMIs, in addition to previously supported Amazon Linux-2 AMIs. AWS-recommended Amazon Linux 2023 AMI names start with `al2023-`. To learn more about approved versions of the Amazon ECS-optimized AMIs or creating a custom AMI, see [this AWS guide](https://docs.aws.amazon.com/batch/latest/userguide/compute_resource_AMIs.html#batch-ami-spec). + + If a custom AMI is specified and the **Enable GPU** option is also selected, the custom AMI will be used instead of the AWS-recommended GPU-optimized AMI. + ::: + +- If you need to debug the EC2 instance provisioned by AWS Batch, specify a **Key pair** to log in to the instance via SSH. +- You can set **Min CPUs** to be greater than `0`, in which case some EC2 instances will remain active. An advantage of this is that pipeline executions will initialize faster. + + :::note + Setting Min CPUs to a value greater than 0 will keep the required compute instances active, even when your pipelines are not running. This will result in additional AWS charges. + ::: + +- Use **Head Job CPUs** and **Head Job Memory** to specify the hardware resources allocated for the Nextflow head job. The default head job memory allocation is 4096 MiB. + + :::warning + Setting Head Job values will also limit the size of any Studio session that can be created in the compute environment. + ::: + +- Use **Head Job role** and **Compute Job role** to grant fine-grained IAM permissions to the **Head Job** and **Compute Jobs**. +- Add an execution role ARN to the **Batch execution role** field to grant permissions to make API calls on your behalf to the ECS container used by Batch. This is required if the pipeline launched with this compute environment needs access to the secrets stored in this workspace. This field can be ignored if you are not using secrets. +- Specify an EBS block size (in GB) in the **EBS auto-expandable block size** field to control the initial size of the EBS auto-expandable volume. New blocks of this size are added when the volume begins to run out of free space. This feature is deprecated, and is not compatible with Fusion v2. +- Enter the **Boot disk size** (in GB) to specify the size of the boot disk in the VMs created by this compute environment. +- If you're using **Spot** instances, you can also specify the **Cost percentage**, which is the maximum allowed price of a **Spot** instance as a percentage of the **On-Demand** price for that instance type. Spot instances will not be launched until the current Spot price is below the specified cost percentage. +- Use **AWS CLI tool path** to specify the location of the `aws` CLI. +- Specify a **CloudWatch Log group** for the `awslogs` driver to stream the logs entry to an existing Log group in Cloudwatch. +- Specify a custom **ECS agent configuration** for the ECS agent parameters used by AWS Batch. This is appended to the `/etc/ecs/ecs.config` file in each cluster node. + + :::note + Altering this file may result in a malfunctioning Batch Forge compute environment. See [Amazon ECS container agent configuration](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-agent-config.html) to learn more about the available parameters. + ::: + +## Manual + +This section is for users with a pre-configured AWS environment. You will need a [Batch queue, a Batch compute environment, an IAM user, and an S3 bucket](../enterprise/advanced-topics/manual-aws-batch-setup.mdx) already set up. + +To enable Seqera in your existing AWS configuration, you need an IAM user with the following permissions: + +- `AmazonS3ReadOnlyAccess` +- `AmazonEC2ContainerRegistryReadOnly` +- `CloudWatchLogsReadOnlyAccess` +- A [custom policy](https://github.com/seqeralabs/nf-tower-aws/blob/master/launch/launch-policy.json) to grant the ability to submit and control Batch jobs +- Write access to any S3 bucket used by pipelines with [this policy template](https://github.com/seqeralabs/nf-tower-aws/blob/master/launch/s3-bucket-write.json) + +### S3 bucket access + +Seqera can use S3 to store the intermediate files and output data generated by pipeline executions. Create a policy for your Seqera IAM user that grants access to specific buckets. + +**Assign an S3 access policy to Seqera IAM users** + +1. Go to the IAM User table in the [IAM service](https://console.aws.amazon.com/iam/home). +1. Select the IAM user. +1. Select **Add inline policy**. +1. Copy the contents of [this policy](https://github.com/seqeralabs/nf-tower-aws/blob/master/launch/s3-bucket-write.json) into the **JSON** tab. Replace `YOUR-BUCKET-NAME` (lines 10 and 21) with your bucket name. +1. Name your policy and select **Create policy**. + +### Seqera manual compute environment + +With your AWS environment and resources set up and your user permissions configured, create an AWS Batch compute environment in Seqera manually. + +:::caution +Your Seqera compute environment uses resources that you may be charged for in your AWS account. See [Cloud costs](../monitoring/cloud-costs) for guidelines to manage cloud resources effectively and prevent unexpected costs. +::: + +**Create a manual Seqera compute environment** + +1. In a workspace, select **Compute environments > New environment**. +1. Enter a descriptive name for this environment, e.g., _AWS Batch Manual (eu-west-1)_. +1. Select **AWS Batch** as the target platform. +1. Select **+** to add new credentials. +1. Enter a name for the credentials, e.g., _AWS Credentials_. +1. Enter the **Access key** and **Secret key** for your IAM user. + :::note + You can create multiple credentials in your Seqera environment. See [Credentials](../credentials/overview). + ::: +1. Select a **Region**, e.g., _eu-west-1 - Europe (Ireland)_. +1. Enter an S3 bucket path for the **Pipeline work directory**, e.g., `s3://seqera-bucket`. This bucket must be in the same region chosen in the previous step. + :::note + When you specify an S3 bucket as your work directory, this bucket is used for the Nextflow [cloud cache](https://www.nextflow.io/docs/latest/cache-and-resume.html#cache-stores) by default. Seqera adds a `cloudcache` block to the Nextflow configuration file for all runs executed with this compute environment. This block includes the path to a `cloudcache` folder in your work directory, e.g., `s3://seqera-bucket/cloudcache/.cache`. You can specify an alternative cache location with the **Nextflow config file** field on the pipeline [launch](../launch/launchpad#launch-form) form. + ::: +1. Select **Enable Wave containers** to facilitate access to private container repositories and provision containers in your pipelines using the Wave containers service. See [Wave containers](https://www.nextflow.io/docs/latest/wave.html) for more information. +1. Select **Enable Fusion v2** to allow access to your S3-hosted data via the [Fusion v2](https://docs.seqera.io/fusion) virtual distributed file system. This speeds up most data operations. The Fusion v2 file system requires Wave containers to be enabled. See [Fusion file system](../supported_software/fusion/overview) for configuration details. + +
+ Use Fusion v2 file system + + :::note + The compute recommendations below are based on internal benchmarking performed by Seqera. Benchmark runs of [nf-core/rnaseq](https://github.com/nf-core/rnaseq) used profile `test_full`, consisting of an input dataset with 16 FASTQ files and a total size of approximately 123.5 GB. + ::: + + We recommend using Fusion with AWS NVMe instances (fast instance storage) as this delivers the fastest performance when compared to environments using only AWS EBS (Elastic Block Store). + + 1. Use Seqera Platform version 23.1 or later. + 1. Use an S3 bucket as the pipeline work directory. + 1. Enable **Wave containers**, **Fusion v2**, and **fast instance storage**. + 1. Fast instance storage requires an EC2 instance type that uses NVMe disks. Specify NVMe-based instance types in **Instance types** under **Advanced options**. If left unspecified, Platform selects instances from AWS NVMe-based instance type families. See [Instance store temporary block storage for EC2 instances](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html) for more information. + + :::note + When enabling fast instance storage, do not select the `optimal` instance type families (c4, m4, r4) for your compute environment as these are not NVMe-based instances. Specify AWS NVMe-based instance types, or leave the **Instance types** field empty for Platform to select NVMe instances for you. + ::: + + :::tip + We recommend selecting 8xlarge or above for large and long-lived production pipelines: + - A local temp storage disk of at least 200 GB and a random read speed of 1000 MBps or more. To work with files larger than 100 GB, increase temp storage accordingly (400 GB or more). + - Dedicated networking ensures a guaranteed network speed service level compared with "burstable" instances. See [Instance network bandwidth](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-network-bandwidth.html) for more information. + ::: + + When using Fusion v2 without fast instance storage, the following EBS settings are applied to optimize file system performance: + + - EBS boot disk size is increased to 100 GB + - EBS boot disk type GP3 is selected + - EBS boot disk throughput is increased to 325 MB/s + + Extensive benchmarking of Fusion v2 has demonstrated that the increased cost associated with these settings are generally outweighed by the costs saved due to decreased run time. + +
+ +1. Set the **Config mode** to **Manual**. +1. Enter the **Head queue**, which is the name of the AWS Batch queue that the Nextflow main job will run. +1. Enter the **Compute queue**, which is the name of the AWS Batch queue where tasks will be submitted. +1. Apply [**Resource labels**](../resource-labels/overview) to the cloud resources consumed by this compute environment. Workspace default resource labels are prefilled. +1. Expand **Staging options** to include: + - Optional [pre- or post-run Bash scripts](../launch/advanced#pre-and-post-run-scripts) that execute before or after the Nextflow pipeline execution in your environment. + - Global Nextflow configuration settings for all pipeline runs launched with this compute environment. Values defined here are pre-filled in the **Nextflow config file** field in the pipeline launch form. These values can be overridden during pipeline launch. + :::info + Configuration settings in this field override the same values in the pipeline repository `nextflow.config` file. See [Nextflow config file](../launch/advanced#nextflow-config-file) for more information on configuration priority. + ::: +1. Specify custom **Environment variables** for the **Head job** and/or **Compute jobs**. +1. Configure any advanced options described in the next section, as needed. +1. Select **Create** to finalize the compute environment setup. + +:::info +See [Launch pipelines](../launch/launchpad) to start executing workflows in your AWS Batch compute environment. +::: + +### Advanced options + +Seqera Platform compute environments for AWS Batch include advanced options to configure resource allocation, execution roles, custom AWS CLI tool paths, and CloudWatch integration. + +**Seqera AWS Batch advanced options** + +- Use **Head Job CPUs** and **Head Job Memory** to specify the hardware resources allocated for the Nextflow head job. The default head job memory allocation is 4096 MiB. +- Use **Head Job role** and **Compute Job role** to grant fine-grained IAM permissions to the Head Job and Compute Jobs, +- Add an execution role ARN to the **Batch execution role** field to grant permissions to make API calls on your behalf to the ECS container used by Batch. This is required if the pipeline launched with this compute environment needs access to the secrets stored in this workspace. This field can be ignored if you are not using secrets. +- Use **AWS CLI tool path** to specify the location of the `aws` CLI. +- Specify a **CloudWatch Log group** for the `awslogs` driver to stream the logs entry to an existing Log group in Cloudwatch. + +:::caution +Seqera platform is designed to terminate compute resources when a Nextflow pipeline completes or is canceled. However, due to external factors — including user-defined workflow logic, transient cloud faults, or abnormal pipeline exits — residual resources may persist. While Platform provides visibility to detect and resolve these states, customers are responsible for final resource cleanup and ensuring compute environments operate according to Platform expectations. + +From Nextflow v24.10+, compute jobs are identifiable by Seqera workflow ID. If you search your AWS console/CLI/API for jobs prefixed by a given workflow ID, you can check the status and perform additional cleanup in edge case scenarios. +::: diff --git a/platform-enterprise/compute-envs/azure-batch.md b/platform-enterprise/compute-envs/azure-batch.md new file mode 100644 index 000000000..2ff6e9605 --- /dev/null +++ b/platform-enterprise/compute-envs/azure-batch.md @@ -0,0 +1,470 @@ +--- +title: "Azure Batch" +description: "Instructions to set up Azure Batch in Seqera Platform" +date: "04 Jan 2024" +tags: [azure, batch, compute environment] +--- + +:::note +This guide assumes you already have an Azure account with a valid Azure Subscription. +For details, visit [Azure Free Account][az-create-account]. +Ensure you have sufficient permissions to create resource groups, an Azure Storage account, and an Azure Batch account. +::: + +## Azure concepts + +#### Regions + +Azure regions are specific geographic locations around the world where Microsoft has established data centers to host its cloud services. Each Azure region is a collection of data centers that provide users with high availability, fault tolerance, and low latency for cloud services. Each region offers a wide range of Azure services that can be chosen to optimize performance, ensure data residency compliance, and meet regulatory requirements. Azure regions also enable redundancy and disaster recovery options by allowing resources to be replicated across different regions, enhancing the resilience of applications and data. + +#### Resource groups + +An Azure resource group is a logical container that holds related Azure resources such as virtual machines, storage accounts, databases, and more. A resource group serves as a management boundary to organize, deploy, monitor, and manage the resources within it as a single entity. Resources in a resource group share the same lifecycle, meaning they can be deployed, updated, and deleted together. This also enables easier access control, monitoring, and cost management, making resource groups a foundational element in organizing and managing cloud infrastructure in Azure. + +#### Accounts + +Azure uses accounts for each service. For example, an [Azure Storage account][az-learn-storage] will house a collection of blob containers, file shares, queues, and tables. An Azure subscription can have multiple Azure Storage and Azure Batch accounts - however, a Platform compute environment can only use one of each. Multiple Platform compute environments can be created to use separate credentials, Azure Storage accounts, and Azure Batch accounts. + +#### Service principals + +An Azure service principal is an identity created specifically for applications, hosted services, or automated tools to access Azure resources. It acts like a user identity with a defined set of permissions, enabling resources authenticated through the service principal to perform actions within the Azure account. Seqera can utilize an Azure service principal to authenticate and access Azure Batch for job execution and Azure Storage for data management. + +## Create Azure resources + +### Resource group + +Create a resource group to link your Azure Batch and Azure Storage account: + +:::note +A resource group can be created while creating an Azure Storage account or Azure Batch account. +::: + +1. Log in to your Azure account, go to the [Create Resource group][az-create-rg] page, and select **Create new resource group**. +1. Enter a name for the resource group, such as _seqeracompute_. +1. Choose the preferred region. +1. Select **Review and Create** to proceed. +1. Select **Create**. + +### Storage account + +After creating a resource group, set up an [Azure Storage account][az-learn-storage]: + +1. Log in to your Azure account, go to the [Create storage account][az-create-storage] page, and select **Create a storage account**. + :::note + If you haven't created a resource group, you can do so now. + ::: +1. Enter a name for the storage account, such as _seqeracomputestorage_. +1. Choose the preferred region. This must be the same region as the Batch account. +1. Platform supports all performance or redundancy settings — select the most appropriate settings for your use case. +1. Select **Next: Advanced**. +1. Enable _storage account key access_. +1. Select **Next: Networking**. + - Enable public access from all networks. You can enable public access from selected virtual networks and IP addresses, but you will be unable to use Forge to create compute resources. Disabling public access is not supported. +1. Select **Data protection**. + - Configure appropriate settings. All settings are supported by the platform. +1. Select **Encryption**. + - Only Microsoft-managed keys (MMK) are supported. +1. In **tags**, add any required tags for the storage account. +1. Select **Review and Create**. +1. Select **Create** to create the Azure Storage account. + - You will need at least one Blob Storage container to act as a working directory for Nextflow. +1. Go to your new storage account and select **+ Container** to create a new Blob Storage container. A new container dialogue will open. Enter a suitable name, such as _seqeracomputestorage-container_. +1. Go to the **Access Keys** section of your new storage account (_seqeracomputestorage_ in this example). +1. Store the access keys for your Azure Storage account, to be used when you create a Seqera compute environment. + +:::caution +Blob container storage credentials are associated with the Batch pool configuration. Avoid changing these credentials in your Seqera instance after you have created the compute environment. +::: + +### Batch account + +After you have created a resource group and Storage account, create a [Batch account][az-learn-batch]: + +1. Log in to your Azure account and select **Create a batch account** on [this page][az-create-batch]. +1. Select the existing resource group or create a new one. +1. Enter a name for the Batch account, such as _seqeracomputebatch_. +1. Choose the preferred region. This must be the same region as the Storage account. +1. Select **Advanced**. +1. For **Pool allocation mode**, select **Batch service**. +1. For **Authentication mode**, select _Shared Key_. +1. Select **Networking**. Ensure networking access is sufficient for Platform and any additional required resources. +1. Add any **Tags** to the Batch account, if needed. +1. Select **Review and Create**. +1. Select **Create**. +1. Go to your new Batch account, then select **Access Keys**. +1. Store the access keys for your Azure Batch account, to be used when you create a Seqera compute environment. + :::caution + A newly-created Azure Batch account may not be entitled to create virtual machines without making a service request to Azure. + See [Azure Batch service quotas and limits][az-batch-quotas] for more information. + ::: +1. Select the **+ Quotas** tab of the Azure Batch account to check and increase existing quotas if necessary. +1. Select **+ Request quota increase** and add the quantity of resources you require. Here is a brief guideline: + - **Active jobs and schedules**: Each Nextflow process will require an active Azure Batch job per pipeline while running, so increase this number to a high level. See [here][az-learn-jobs] to learn more about jobs in Azure Batch. + - **Pools**: Each platform compute environment requires one Azure Batch pool. Each pool is composed of multiple machines of one virtual machine size. + :::note + To use separate pools for head and compute nodes, see [this FAQ entry](../troubleshooting_and_faqs/azure_troubleshooting). + ::: + - **Batch accounts per region per subscription**: Set this to the number of Azure Batch accounts per region per subscription. Only one is required. + - **Spot/low-priority vCPUs**: Platform does not support spot or low-priority machines when using Forge, so when using Forge this number can be zero. When manually setting up a pool, select an appropriate number of concurrent vCPUs here. + - **Total Dedicated vCPUs per VM series**: See the Azure documentation for [virtual machine sizes][az-vm-sizes] to help determine the machine size you need. We recommend the latest version of the ED series available in your region as a cost-effective and appropriately-sized machine for running Nextflow. However, you will need to select alternative machine series that have additional requirements, such as those with additional GPUs or faster storage. Increase the quota by the number of required concurrent CPUs. In Azure, machines are charged per cpu minute so there is no additional cost for a higher number. + +### Credentials + +There are two types of Azure credentials available: access keys and Entra service principals. + +Access keys are simple to use but have several limitations: +- Access keys are long-lived. +- Access keys provide full access to the Azure Storage and Azure Batch accounts. +- Azure allows only two access keys per account, making them a single point of failure. + +Entra service principals are accounts which can be granted access to Azure Batch and Azure Storage resources: +- Service principals enable role-based access control with more precise permissions. +- Service principals map to a many-to-many relationship with Azure Batch and Azure Storage accounts. +- Some Azure Batch features are only available when using a service principal. + +:::note +The two Azure credential types use different authentication methods. You can add more than one credential to a workspace, but Platform compute environments use only one credential at any given time. While separate credentials can be used by separate compute environments concurrently, they are not cross-compatible — access granted by one credential will not be shared with the other. +::: + +#### Access keys + +:::info +Batch Forge compute environments must use access keys for authentication. Service principals are only supported in manual compute environments. +::: + +To create an access key: + +1. Navigate to the Azure Portal and sign in. +1. Locate the Azure Batch account and select **Keys** under **Account management**. The Primary and Secondary keys are listed here. Copy one of the keys and save it in a secure location for later use. +1. Locate the Azure Storage account and, under the **Security and Networking** section, select **Access keys**. Key1 and Key2 options are listed here. Copy one of them and save it in a secure location for later use. +1. In your Platform workspace **Credentials** tab, select the **Add credentials** button and complete the following fields: + - Enter a **Name** for the credentials + - **Provider**: Azure + - Select the **Shared key** tab + - Add the **Batch account** and **Blob Storage account** names and access keys to the relevant fields. +1. Delete the copied keys from their temporary location after they have been added to a credential in Platform. + +#### Entra service principal and managed identity + +To use Entra for authentication, you must create a service principal and managed identity. Seqera uses the service principal to authenticate to Azure Batch and Azure Storage. It submits a Nextflow task as the head process to run Nextflow, which authenticates to Azure Batch and Storage using the managed identity attached to the node pool. + +Therefore, you must create both an Entra service principal and a managed identity. You add the service principal to your Seqera credentials and attach the managed identity to your Azure Batch node pool which will run Nextflow. + +:::info +Batch Forge compute environments must use access keys for authentication. Service principals are only supported in manual compute environments. + +The use of Entra service principals in manual compute environments requires the use of a [managed identity](#managed-identity). +::: + +##### Service principal + +See [Create a service principal][az-create-sp] for more details. + +To create an Entra service principal: + +1. In the Azure Portal, navigate to **Microsoft Entra ID**. Under **App registrations**, select **New registration**. +1. Provide a name for the application. The application will automatically have a service principal associated with it. +1. Assign roles to the service principal: + 1. Go to the Azure Storage account. Under **Access Control (IAM)**, select **Add role assignment**. + 1. Select the **Storage Blob Data Reader** and **Storage Blob Data Contributor** roles. + 1. Select **Members**, then **Select Members**. Search for your newly created service principal and assign the role. + 1. Repeat the same process for the Azure Batch account, using the **Azure Batch Data Contributor** role. +1. Platform will need credentials to authenticate as the service principal: + 1. Navigate back to the app registration. On the **Overview** page, save the **Application (client) ID** value for use in Platform. + 1. Select **Certificates & secrets**, then **New client secret**. A new secret is created containing a value and secret ID. Save both values securely for use in Platform. +1. In your Platform workspace **Credentials** tab, select the **Add credentials** button and complete the following fields: + - Enter a **Name** for the credentials + - **Provider**: Azure + - Select the **Entra** tab + - Complete the remaining fields: **Batch account name**, **Blob Storage account name**, **Tenant ID** (Application (client) ID in Azure), **Client ID** (Client secret ID in Azure), **Client secret** (Client secret value in Azure). +1. Delete the ID and secret values from their temporary location after they have been added to a credential in Platform. + +##### Managed identity + +:::info +To use managed identities, Seqera requires Nextflow version 24.06.0-edge or later. +::: + +Nextflow can authenticate to Azure services using a managed identity. This method offers enhanced security compared to access keys, but must run on Azure infrastructure. + +When you use a manually-configured compute environment with a managed identity attached to the Azure Batch Pool, Nextflow can use this managed identity for authentication. However, Seqera still needs to use access keys or an Entra service principal to submit the initial task to Azure Batch to run Nextflow, which will then proceed with the managed identity for subsequent authentication. + +1. In Azure, create a user-assigned managed identity. See [Manage user-assigned managed identities](https://learn.microsoft.com/en-us/entra/identity/managed-identities-azure-resources/how-manage-user-assigned-managed-identities) for detailed steps. Take note of the client ID of the managed identity when you create it. +1. The user-assigned managed identity must have the necessary access roles for Nextflow. See [Required role assignments](https://www.nextflow.io/docs/latest/azure.html#required-role-assignments) for more information. +1. Associate the user-assigned managed identity with the Azure Batch Pool. See [Set up managed identity in your Batch pool](https://learn.microsoft.com/en-us/troubleshoot/azure/hpc/batch/use-managed-identities-azure-batch-account-pool#set-up-managed-identity-in-your-batch-pool) for more information. +1. When you set up the Seqera compute environment, select the Azure Batch pool by name and enter the managed identity client ID in the specified field as instructed above. + +When you submit a pipeline to this compute environment, Nextflow will authenticate using the managed identity associated with the Azure Batch node it runs on, rather than relying on access keys. + +## Add Seqera compute environment + +There are two ways to create an Azure Batch compute environment in Seqera Platform: + +- [**Batch Forge**](#batch-forge): Automatically creates Azure Batch resources. +- [**Manual**](#manual): For using existing Azure Batch resources. + +### VM size considerations + +Azure Batch requires you to select an appropriate VM size for your compute environment. There are a number of considerations when selecting VM sizes — See [Sizes for virtual machines in Azure][az-vm-sizes] for more information. + +1. **Family**: The first letter of the VM size name indicates the machine family. For example, `Standard_E16d_v5` is a member of the E family. + - *A*: Economical machines, low power machines. + - *B*: Burstable machines which use credits for cost allocation. + - *D*: General purpose machines suitable for most applications. + - *DC*: D machines with additional confidential compute capabilities. + - *E*: The same as D but with more memory. These are generally the best machines for bioinformatics workloads. + - *EC*: The same as E but with additional confidential compute capabilities. + - *F*: Compute optimized machines which come with a faster CPU compared to D-series machines. + - *M*: Memory optimized machines which come with extremely large and fast memory layers, typically more than is needed for bioinformatics workloads. + - *L*: Storage optimized machines which come with large locally attached NVMe storage drives. Note that these need to be configured before you can use them with Azure Batch. + - *N*: Accelerated computing machines which come with FPGAs, GPUs, or custom ASICs. + - *H*: High performance machines which come with the fastest processors and memory. + +In general, we recommend using the E family of machines for bioinformatics workloads since these are cost-effective, widely available, and sufficiently fast. + +1. **vCPUs**: The machine's number of vCPUs. This is the main factor in determining the speed of the machine. +1. **features**: Additional machine features. For example, some machines come with a local SSD. + - d: A local storage disk. Azure Batch can use this disk automatically instead of the operating system disk. + - s: The VM supports a [premium storage account][az-premium-storage]. + - a: AMD CPUs instead of Intel. + - p: ARM-based CPUs, such as Azure Cobalt. + - l: Reduced memory with a large cost reduction. + +1. **Version**: The version of the VM size. This is the generation of the machine. Typically, more recent is better but availability can vary between regions. + +In the Azure Portal on the page for your Azure Batch account, request an appropriate quota for your desired VM size. See [Azure Batch service quotas and limits][az-batch-quotas] for more information. + +### Batch Forge + +:::caution +Batch Forge automatically creates resources that you may be charged for in your Azure account. See [Cloud costs](../monitoring/cloud-costs) for guidelines to manage cloud resources effectively and prevent unexpected costs. +::: + +Create a Batch Forge Azure Batch compute environment: + +1. In a workspace, select **Compute Environments > New Environment**. +1. Enter a descriptive name, such as _Azure Batch (east-us)_. +1. Select **Azure Batch** as the target platform. +1. Choose existing Azure credentials or add a new credential. + :::info + Batch Forge compute environments must use access keys for authentication. Entra service principals are only supported in manual compute environments. + ::: +1. Add the **Batch account** and **Blob Storage** account names and access keys. +1. Select a **Region**, such as _eastus_. +1. In the **Pipeline work directory** field, enter the Azure blob container created previously. For example, `az://seqeracomputestorage-container/work`. + :::note + When you specify a Blob Storage bucket as your work directory, this bucket is used for the Nextflow [cloud cache](https://www.nextflow.io/docs/latest/cache-and-resume.html#cache-stores) by default. You can specify an alternative cache location with the **Nextflow config file** field on the pipeline [launch](../launch/launchpad#launch-form) form. + ::: +1. Select **Enable Wave containers** to facilitate access to private container repositories and provision containers in your pipelines using the Wave containers service. See [Wave containers][wave-docs] for more information. +1. Select **Enable Fusion v2** to allow access to your Azure Blob Storage data via the [Fusion v2][fusion-docs] virtual distributed file system. This speeds up most data operations. The Fusion v2 file system requires Wave containers to be enabled. See [Fusion file system](../supported_software/fusion/overview) for configuration details. + +
+ Use Fusion v2 + + :::note + The compute recommendations below are based on internal benchmarking performed by Seqera. Benchmark runs of [nf-core/rnaseq](https://github.com/nf-core/rnaseq) used profile `test_full`, consisting of an input dataset with 16 FASTQ files and a total size of approximately 123.5 GB. + ::: + + Azure virtual machines include fast SSDs and require no additional storage configuration for Fusion. For optimal performance, use VMs with sufficient local storage to support Fusion's streaming data throughput. + + 1. Use Seqera Platform version 23.1 or later. + 1. Use an Azure Blob storage container as the pipeline work directory. + 1. Enable **Wave containers** and **Fusion v2**. + 1. Select the **Batch Forge** config mode. + 1. Specify suitable VM sizes under **VMs type**. A `Standard_E16d_v5` VM or larger is recommended for production use. + + :::tip + We recommend selecting machine types with a local temp storage disk of at least 200 GB and a random read speed of 1000 MBps or more for large and long-lived production pipelines. To work with files larger than 100 GB, increase temp storage accordingly (400 GB or more). + + The suffix `d` after the core number (e.g., `Standard_E16*d*_v5`) denotes a VM with a local temp disk. Select instances with Standard SSDs — Fusion does not support Azure network-attached storage (Premium SSDv2, Ultra Disk, etc.). Larger local storage increases Fusion's throughput and reduces the chance of overloading the machine. See [Sizes for virtual machines in Azure](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes/overview) for more information. + ::: + +
+ +1. Set the **Config mode** to **Batch Forge**. +1. Enter the default **VMs type**, depending on your quota limits set previously. The default is _Standard_D4_v3_. +1. Enter the **VMs count**. If autoscaling is enabled (default), this is the maximum number of VMs you wish the pool to scale up to. If autoscaling is disabled, this is the fixed number of virtual machines in the pool. +1. Enable **Autoscale** to scale up and down automatically, based on the number of pipeline tasks. The number of VMs will vary from **0** to **VMs count**. +1. Enable **Dispose resources** for Seqera to automatically delete the Batch pool if the compute environment is deleted on the platform. +1. Select or create [**Container registry credentials**](../credentials/azure_registry_credentials) to authenticate a registry (used by the [Wave containers](https://www.nextflow.io/docs/latest/wave.html) service). It is recommended to use an [Azure Container registry](https://azure.microsoft.com/en-gb/products/container-registry) within the same region for maximum performance. +1. Apply [**Resource labels**](../resource-labels/overview). This will populate the **Metadata** fields of the Azure Batch pool. +1. Expand **Staging options** to include: + - Optional [pre- or post-run Bash scripts](../launch/advanced#pre-and-post-run-scripts) that execute before or after the Nextflow pipeline execution in your environment. + - Global Nextflow configuration settings for all pipeline runs launched with this compute environment. Values defined here are pre-filled in the **Nextflow config file** field in the pipeline launch form. These values can be overridden during pipeline launch. + :::info + Configuration settings in this field override the same values in the pipeline repository `nextflow.config` file. See [Nextflow config file](../launch/advanced#nextflow-config-file) for more information on configuration priority. + ::: +1. Specify custom **Environment variables** for the **Head job** and/or **Compute jobs**. +1. Configure any advanced options you need: + - Use **Jobs cleanup policy** to control how Nextflow process jobs are deleted on completion. Active jobs consume the quota of the Azure Batch account. By default, jobs are terminated by Nextflow and removed from the quota when all tasks succesfully complete. If set to _Always_, all jobs are deleted by Nextflow after pipeline completion. If set to _Never_, jobs are never deleted. If set to _On success_, successful tasks are removed but failed tasks will be left for debugging purposes. + - Use **Token duration** to control the duration of the SAS token generated by Nextflow. This must be as long as the longest period of time the pipeline will run. +1. Select **Add** to finalize the compute environment setup. It will take a few seconds for all the resources to be created before the compute environment is ready to launch pipelines. + +:::info +See [Launch pipelines](../launch/launchpad) to start executing workflows in your Azure Batch compute environment. +::: + +### Manual + +You can configure Seqera Platform to use a pre-existing Azure Batch pool. This allows the use of more advanced Azure Batch features, such as custom VM images and private networking. See [Azure Batch security best practices][az-batch-best-practices] for more information. + +:::caution +Your Seqera compute environment uses resources that you may be charged for in your Azure account. See [Cloud costs](../monitoring/cloud-costs) for guidelines to manage cloud resources effectively and prevent unexpected costs. +::: + +#### Create a Nextflow-compatible Azure Batch pool + +If not described below, use the default settings: + +1. **Account**: You must have an existing Azure Batch account. Ideally, you should already have tested whether you can run an Azure Batch task within this account. Any type of account is compatible. +1. **Quota**: You must check you have sufficient quota for the number of pools, jobs, and vCPUs per series. See [Azure Batch service quotas and limits][az-batch-quotas] for more information. +1. On the Azure Batch page of the Azure Portal, select **Pools** and then **+ Add**. +1. **Name**: Enter a **Pool ID** and **Display Name**. This ID will be used by Seqera and Nextflow. +1. **Identity**: Select **User assigned** to use a managed identity for the pool. Select **Add** for the user-assigned managed identity and select the managed identity with the correct permissions to the Azure Storage and Batch accounts. +1. **Operating System**: You can use any Linux-based image here, but it is recommended to use it with a Microsoft Azure Batch-provided image. Note that there are two generations of Azure Virtual Machine images, and certain VM series are only available in one generation. See [Azure Virtual Machine series][az-vm-gen] for more information. For default settings, select the following: + - **Publisher**: `microsoft-dsvm` + - **Offer**: `ubuntu-hpc` + - **Sku**: `2204` + - **Security type**: `standard` +1. **OS disk storage account type**: Certain VM series only support a specific Storage account type. See [Azure managed disk types][az-disk-type] and [Azure Virtual Machine series][az-vm-gen] for more information. In general, a VM series with the suffix *s* supports a *Premium LRS* Storage account type. For example, a `standard_e16ds_v5` supports `Premium_LRS` but a `standard_e16d_v5` does not. Premium LRS offers the best performance. +1. **OS disk size**: The size of the OS disk in GB. This must be sufficient to hold every Docker container the VM will run, plus any logging or further files. If you are not using a machine with attached storage, you must increase this disk size to accommodate task files (see VM type below). If you are using a machine with attached storage, this setting can be left at the OS default size. +1. **Container configuration**: Container configuration must be turned on. Do this by switching it from **None** to **Custom**. The type is **Docker compatible** which should be the only available option. This will enable the VM to use Docker images and is sufficient. However, you can add further options: + - Under **Container image names** you can add containers for the VM to grab at startup time. Add a list of fully qualified Docker URIs, such as `quay.io/seqeralabs/nf-launcher:j17-23.04.2`. - Under **Container registries**, you can add any container registries that require additional authentication. Select **Container registries**, then **Add**. Here, you can add a registry username, password, and registry server. If you attached the managed identity earlier, select this as an authentication method so you don't have to enter a username and password. +1. **VM size**: This is the size of the VM. See [Sizes for virtual machines in Azure][az-vm-sizes] for more information. +1. **Scale**: Azure Node pools can be fixed in size or autoscale based on a formula. Autoscaling is recommended to enable scaling your resources down to zero when not in use. Select **Auto scale** and change the **AutoScale evaluation interval** to 5 minutes - this is the minimum period between evaluations of the autoscale formula. For **Formula**, you can use any valid formula — See [Create a formula to automatically scale compute nodes in a Batch pool][az-batch-autoscale] for more information. This is the default autoscaling formula, with a maximum of 8 VMs: + + ``` + // Get pool lifetime since creation. + lifespan = time() - time("2024-10-30T00:00:00.880011Z"); + interval = TimeInterval_Minute * 5; + + // Compute the target nodes based on pending tasks. + // $PendingTasks == The sum of $ActiveTasks and $RunningTasks + $samples = $PendingTasks.GetSamplePercent(interval); + $tasks = $samples < 70 ? max(0, $PendingTasks.GetSample(1)) : max( $PendingTasks.GetSample(1), avg($PendingTasks.GetSample(interval))); + $targetVMs = $tasks > 0 ? $tasks : max(0, $TargetDedicatedNodes/2); + targetPoolSize = max(0, min($targetVMs, 8)); + + // For first interval, deploy 1 node, for other intervals scale up/down as per tasks. + $TargetLowPriorityNodes = lifespan < interval ? 1 : targetPoolSize; + $NodeDeallocationOption = taskcompletion; + ``` + +1. **Start task**: This is the task that will run on each VM when it joins the pool. This can be used to install additional software on the VM. When using Batch Forge, this is used to install `azcopy` for staging files onto and off of the node. Select **Enabled** and add the following command line to install `azcopy`: + + ```shell + bash -c "chmod +x azcopy && mkdir $AZ_BATCH_NODE_SHARED_DIR/bin/ && cp azcopy $AZ_BATCH_NODE_SHARED_DIR/bin/" + ``` + + Select **Resource files** then select **Http url**. For the **URL**, add `https://nf-xpack.seqera.io/azcopy/linux_amd64_10.8.0/azcopy` and for **File path** enter `azcopy`. Every other setting can be left default. + + :::note + When not using Fusion, every node **must** have `azcopy` installed. + ::: + +1. **Task Slots**: Set task slots to the machine's number of vCPUs. For example, select `4` for a `Standard_D4_v3` VM size. +1. **Task scheduling policy**: This can be set to `Pack` or `Spread`. `Pack` will attempt to schedule tasks from the same job on the same VM, while `Spread` will attempt to distribute tasks evenly across VMs. +1. **Virtual Network**: If you are using a virtual network, you can select it here. Be sure to select the correct virtual network and subnet. The VMs require: + - Access to container registries (such as quay.io and docker.io) to pull containers. + - Access to Azure Storage to copy data using `azcopy`. + - Access to any remote files required by the pipeline, such as AWS S3 storage. + - Communication with the head node that runs Nextflow and Seqera to relay logs and information. + Note that overly-restrictive networking may prevent pipelines from running successfully. +1. **Mount configuration**: Nextflow *only* supports Azure File Shares. Select `Azure Files Share`, then add: + - **Source**: URL in format `https://${accountName}.file.core.windows.net/${fileShareName}` + - **Relative mount path**: Path where the file share will be mounted on the VM + - **Storage account name** and **Storage account key** (managed identity is not supported) + +Leave the node pool to start and create a single Azure VM. Monitor the VM to ensure it starts correctly. If any errors occur, check and correct them - you may need to create a new Azure node pool if issues persist. + +The following settings can be modified after creating a pool: + +- Autoscale formula +- Start task +- Application packages +- Node communication +- Metadata + +#### Create a manual Seqera Azure Batch compute environment + +1. In a workspace, select **Compute Environments**, then **Add compute environment**. +1. Enter a descriptive name for this environment, such as _Azure Batch (east-us)_. +1. For **Provider**, select **Azure Batch**. +1. Select your existing Azure credentials (access keys or Entra service principal) or select **+** to add new credentials. + :::note + To authenticate using an Entra service principal, you must include a user-assigned managed identity. See [Managed identity](#managed-identity) below. + ::: +1. Select a **Region**, such as _eastus (East US)_. +1. In the **Pipeline work directory** field, add the Azure blob container created previously. For example, `az://seqeracomputestorage-container/work`. + :::note + When you specify a Blob Storage bucket as your work directory, this bucket is used for the Nextflow [cloud cache](https://www.nextflow.io/docs/latest/cache-and-resume.html#cache-stores) by default. You can specify an alternative cache location with the **Nextflow config file** field on the pipeline [launch](../launch/launchpad#launch-form) form. + ::: +1. Select **Enable Wave containers** to facilitate access to private container repositories and provision containers in your pipelines using the Wave containers service. See [Wave containers][wave-docs] for more information. +1. Select **Enable Fusion v2** to allow access to your Azure Blob Storage data via the [Fusion v2][fusion-docs] virtual distributed file system. This speeds up most data operations. The Fusion v2 file system requires Wave containers to be enabled. See [Fusion file system](../supported_software/fusion/overview) for configuration details. + +
+ Use Fusion v2 + + :::note + The compute recommendations below are based on internal benchmarking performed by Seqera. Benchmark runs of [nf-core/rnaseq](https://github.com/nf-core/rnaseq) used profile `test_full`, consisting of an input dataset with 16 FASTQ files and a total size of approximately 123.5 GB. + ::: + + Azure virtual machines include fast SSDs and require no additional storage configuration for Fusion. For optimal performance, use VMs with sufficient local storage to support Fusion's streaming data throughput. + + 1. Use Seqera Platform version 23.1 or later. + 1. Use an Azure Blob storage container as the pipeline work directory. + 1. Enable **Wave containers** and **Fusion v2**. + 1. Specify suitable VM sizes under **VMs type**. A `Standard_E16d_v5` VM or larger is recommended for production use. + + :::tip + We recommend selecting machine types with a local temp storage disk of at least 200 GB and a random read speed of 1000 MBps or more for large and long-lived production pipelines. To work with files larger than 100 GB, increase temp storage accordingly (400 GB or more). + + The suffix `d` after the core number (e.g., `Standard_E16*d*_v5`) denotes a VM with a local temp disk. Select instances with Standard SSDs — Fusion does not support Azure network-attached storage (Premium SSDv2, Ultra Disk, etc.). Larger local storage increases Fusion's throughput and reduces the chance of overloading the machine. See [Sizes for virtual machines in Azure](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes/overview) for more information. + ::: + +
+ +1. Set the **Config mode** to **Manual**. +1. Enter the **Compute Pool name**. This is the name of the Azure Batch pool you created previously in the Azure Batch account. + :::note + The default Azure Batch implementation uses a single pool for head and compute nodes. To use separate pools for head and compute nodes (for example, to use low-priority VMs for compute jobs), see [this FAQ entry](../troubleshooting_and_faqs/azure_troubleshooting). + ::: +1. Enter a user-assigned **Managed identity client ID**, if one is attached to your Azure Batch pool. See [Managed Identity](#managed-identity) below. +1. Apply [**Resource labels**](../resource-labels/overview). This will populate the **Metadata** fields of the Azure Batch pool. +1. Expand **Staging options** to include: + - Optional [pre- or post-run Bash scripts](../launch/advanced#pre-and-post-run-scripts) that execute before or after the Nextflow pipeline execution in your environment. + - Global Nextflow configuration settings for all pipeline runs launched with this compute environment. Values defined here are pre-filled in the **Nextflow config file** field in the pipeline launch form. These values can be overridden during pipeline launch. + :::info + Configuration settings in this field override the same values in the pipeline repository `nextflow.config` file. See [Nextflow config file](../launch/advanced#nextflow-config-file) for more information on configuration priority. + ::: +1. Define custom **Environment Variables** for the **Head Job** and/or **Compute Jobs**. +1. Configure any necessary advanced options: + - Use **Jobs cleanup policy** to control how Nextflow process jobs are deleted on completion. Active jobs consume the quota of the Azure Batch account. By default, jobs are terminated by Nextflow and removed from the quota when all tasks succesfully complete. If set to _Always_, all jobs are deleted by Nextflow after pipeline completion. If set to _Never_, jobs are never deleted. If set to _On success_, successful tasks are removed but failed tasks will be left for debugging purposes. + - Use **Token duration** to control the duration of the SAS token generated by Nextflow. This must be as long as the longest period of time the pipeline will run. +1. Select **Add** to complete the compute environment setup. The creation of resources will take a few seconds, after which you can launch pipelines. + +:::info +See [Launch pipelines](../launch/launchpad) to start executing workflows in your Azure Batch compute environment. +::: + +[az-data-residency]: https://azure.microsoft.com/en-gb/explore/global-infrastructure/data-residency/#select-geography +[az-batch-quotas]: https://docs.microsoft.com/en-us/azure/batch/batch-quota-limit#view-batch-quotas +[az-batch-best-practices]: https://learn.microsoft.com/en-us/azure/batch/security-best-practices +[az-vm-sizes]: https://learn.microsoft.com/en-us/azure/virtual-machines/sizes +[az-create-account]: https://azure.microsoft.com/en-us/free/ +[az-learn-rg]: https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/manage-resource-groups-portal#create-resource-groups +[az-create-batch]: https://portal.azure.com/#create/Microsoft.BatchAccount +[az-learn-storage]: https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview +[az-learn-batch]: https://learn.microsoft.com/en-us/training/modules/create-batch-account-using-azure-portal/ +[az-learn-jobs]: https://learn.microsoft.com/en-us/azure/batch/jobs-and-tasks +[az-create-rg]: https://portal.azure.com/#create/Microsoft.ResourceGroup +[az-create-storage]: https://portal.azure.com/#create/Microsoft.StorageAccount-ARM +[az-premium-storage]: https://learn.microsoft.com/en-us/azure/virtual-machines/premium-storage-performance +[az-vm-gen]: https://learn.microsoft.com/en-us/azure/virtual-machines/generation-2 +[az-disk-type]: https://learn.microsoft.com/en-us/azure/virtual-machines/disks-types +[az-batch-autoscale]: https://learn.microsoft.com/en-us/azure/batch/batch-automatic-scaling +[az-file-shares]: https://www.nextflow.io/docs/latest/azure.html#azure-file-shares +[az-vm-sizes]: https://learn.microsoft.com/en-us/azure/virtual-machines/sizes/overview + +[wave-docs]: https://docs.seqera.io/wave +[fusion-docs]: https://docs.seqera.io/fusion diff --git a/platform-enterprise/compute-envs/eks.md b/platform-enterprise/compute-envs/eks.md new file mode 100644 index 000000000..95ec57bcf --- /dev/null +++ b/platform-enterprise/compute-envs/eks.md @@ -0,0 +1,194 @@ +--- +title: "Amazon EKS" +description: "Instructions to set up Amazon EKS in Seqera Platform" +date: "21 Apr 2023" +tags: [eks, amazon, compute environment] +--- + +[Amazon EKS](https://aws.amazon.com/eks/) is a managed Kubernetes cluster that enables the execution of containerized workloads in the AWS cloud at scale. + +Seqera Platform offers native support for Amazon EKS clusters to streamline the deployment of Nextflow pipelines. + +## Requirements + +You must have an EKS cluster up and running. Follow the [cluster preparation](../compute-envs/k8s#cluster-preparation) instructions to create the resources required by your Seqera instance. In addition to the generic Kubernetes instructions, you must make a number of EKS-specific modifications. + +### Service account role + +Assign a service account role to the AWS IAM user used by Seqera to access the EKS cluster. + +**Assign a service account role to your Seqera IAM user** + +1. Modify the EKS auth configuration: + + ```bash + kubectl edit configmap -n kube-system aws-auth + ``` + +1. In the editor that opens, add this entry: + + ```yaml + mapUsers: | + - userarn: + username: tower-launcher-user + groups: + - tower-launcher-role + ``` + +1. Retrieve your user ARN from the [AWS IAM console](https://console.aws.amazon.com/iam), or with the AWS CLI: + + ```bash + aws sts get-caller-identity + ``` + + :::note + The same user must be used when specifying the AWS credentials in the Seqera compute environment configuration. + ::: + +1. The AWS user must have [this](../_templates/eks/eks-iam-policy.json) IAM policy applied. + +See the [AWS documentation](https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html) for more details. + +## Seqera compute environment + +:::caution +Your Seqera compute environment uses resources that you may be charged for in your AWS account. See [Cloud costs](../monitoring/cloud-costs) for guidelines to manage cloud resources effectively and prevent unexpected costs. +::: + +After you have prepared your Kubernetes cluster and assigned a service account role to your Seqera IAM user, create a Seqera EKS compute environment: + +1. In a workspace, select **Compute environments > New environment**. +1. Enter a descriptive name for this environment, e.g., _Amazon EKS (eu-west-1)_. +1. From the **Provider** drop-down menu, select **Amazon EKS**. +1. Under **Storage**, select either **Fusion storage** (recommended) or **Legacy storage**. The [Fusion v2](https://docs.seqera.io/fusion) virtual distributed file system allows access to your AWS S3-hosted data (`s3://` URLs). This eliminates the need to configure a shared file system in your Kubernetes cluster. See [Fusion v2](#fusion-v2) below. +1. From the **Credentials** drop-down menu, select existing AWS credentials, or select **+** to add new credentials. If you choose to use existing credentials, skip to step 9. + :::note + The user must have the IAM permissions required to describe and list EKS clusters, per service account role requirements in the previous section. + ::: +1. Enter a name, e.g., _EKS Credentials_. +1. Add the IAM user **Access key** and **Secret key**. This is the IAM user with the service account role detailed in the previous section. +1. (Optional) Under **Assume role**, specify the IAM role to be assumed by the Seqera IAM user to access the compute environment AWS resources. + :::note + When using AWS keys without an assumed role, the associated AWS user account must have Seqera [Launch](https://github.com/seqeralabs/nf-tower-aws/tree/master/launch) and [Forge](https://github.com/seqeralabs/nf-tower-aws/tree/master/forge) permissions. When an assumed role is provided, the keys are only used to retrieve temporary credentials impersonating the role specified. In this case, Seqera [Launch](https://github.com/seqeralabs/nf-tower-aws/tree/master/launch) and [Forge](https://github.com/seqeralabs/nf-tower-aws/tree/master/forge) permissions must be granted to the role instead of the user account. + ::: +1. Select a **Region**, e.g., _eu-west-1 - Europe (Ireland)_. +1. Select a **Cluster name** from the list of available EKS clusters in the selected region. +1. Specify the **Namespace** created in the [cluster preparation](../compute-envs/k8s#cluster-preparation) instructions, which is _tower-nf_ by default. +1. Specify the **Head service account** created in the [cluster preparation](../compute-envs/k8s#cluster-preparation) instructions, which is _tower-launcher-sa_ by default. + :::note + If you enable Fusion v2 (**Fusion storage** in step 4 above), the head service account must have access to the S3 storage bucket specified as your work directory. + ::: +1. Specify the **Storage claim** created in the [cluster preparation](../compute-envs/k8s#cluster-preparation) instructions, which serves as a scratch filesystem for Nextflow pipelines. The storage claim is called _tower-scratch_ in the provided examples. + :::note + The **Storage claim** isn't needed when Fusion v2 is enabled. + ::: +1. Apply [**Resource labels**](../resource-labels/overview) to the cloud resources consumed by this compute environment. Workspace default resource labels are prefilled. +1. Expand **Staging options** to include: + - Optional [pre- or post-run Bash scripts](../launch/advanced#pre-and-post-run-scripts) that execute before or after the Nextflow pipeline execution in your environment. + - Global Nextflow configuration settings for all pipeline runs launched with this compute environment. Values defined here are pre-filled in the **Nextflow config file** field in the pipeline launch form. These values can be overridden during pipeline launch. + :::info + Configuration settings in this field override the same values in the pipeline repository `nextflow.config` file. See [Nextflow config file](../launch/advanced#nextflow-config-file) for more information on configuration priority. + ::: +1. Specify custom **Environment variables** for the **Head job** and/or **Compute jobs**. +1. Configure any advanced options described in the next section, as needed. +1. Select **Create** to finalize the compute environment setup. + +### Advanced options + +Seqera Platform compute environments for EKS include advanced options for storage and work directory paths, resource allocation, and pod customization. + +**Seqera EKS advanced options** + +- The **Storage mount path** is the file system path where the Storage claim is mounted (default: `/scratch`). +- The **Work directory** is the file system path used as a working directory by Nextflow pipelines. This must be the storage mount path (default) or a subdirectory of it. +- The **Compute service account** is the service account used by Nextflow to submit tasks (default: the `default` account in the given namespace). +- The **Pod cleanup policy** determines when to delete terminated pods. +- Use **Custom head pod specs** to provide custom options for the Nextflow workflow pod (`nodeSelector`, `affinity`, etc). For example: + +```yaml +spec: + nodeSelector: + disktype: ssd +``` + +- Use **Custom service pod specs** to provide custom options for the compute environment pod. See above for an example. +- Use **Head Job CPUs** and **Head Job memory** to specify the hardware resources allocated for the Nextflow workflow pod. + +:::info +See [Launch pipelines](../launch/launchpad) to start executing workflows in your EKS compute environment. +::: + +### Fusion v2 + +To use [Fusion v2](https://docs.seqera.io/fusion) in your Seqera EKS compute environment: +1. Use Seqera Platform version 23.1 or later. +1. Use an S3 bucket as the pipeline work directory. +1. Both the head service and compute service accounts must have access to the S3 bucket specified as the work directory. + +
+ Configure IAM to use Fusion v2 + + 1. Allow the IAM role access to your S3 bucket: + + ```json + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": ["s3:ListBucket"], + "Resource": ["arn:aws:s3:::"] + }, + { + "Action": [ + "s3:GetObject", + "s3:PutObject", + "s3:PutObjectTagging", + "s3:DeleteObject" + ], + "Resource": ["arn:aws:s3:::/*"], + "Effect": "Allow" + } + ] + } + ``` + + Replace `` with a bucket name of your choice. + + 1. The IAM role must have a trust relationship with your Kubernetes service account: + + ```json + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "arn:aws:iam:::oidc-provider/oidc.eks..amazonaws.com/id/" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { + "oidc.eks.eu-west-2.amazonaws.com/id/:aud": "sts.amazonaws.com", + "oidc.eks.eu-west-2.amazonaws.com/id/:sub": "system:serviceaccount:" + } + } + } + ] + } + ``` + + Replace ``, ``, ``, `` with your corresponding values. + + 1. Annotate the Kubernetes service account with the IAM role: + + ```shell + kubectl annotate serviceaccount --namespace eks.amazonaws.com/role-arn=arn:aws:iam:::role/ + ``` + + Replace ``, ``, and `` with your corresponding values. + + See the [AWS documentation](https://docs.aws.amazon.com/eks/latest/userguide/associate-service-account-role.html) for further details. + +
+ diff --git a/platform-enterprise/compute-envs/gke.md b/platform-enterprise/compute-envs/gke.md new file mode 100644 index 000000000..a34fe8bf3 --- /dev/null +++ b/platform-enterprise/compute-envs/gke.md @@ -0,0 +1,142 @@ +--- +title: "Google Kubernetes Engine" +description: "Instructions to set up Google Kubernetes Engine in Seqera Platform" +date: "21 Apr 2023" +tags: [gke, google, compute environment] +--- + +[Google Kubernetes Engine (GKE)](https://cloud.google.com/kubernetes-engine) is a managed Kubernetes cluster that allows the execution of containerized workloads in Google Cloud at scale. + +Seqera Platform offers native support for GKE clusters to streamline the deployment of Nextflow pipelines. + +## Requirements + +See [here](../compute-envs/google-cloud-batch#configure-google-cloud) for instructions to set up your Google Cloud account and other services (such as Cloud storage). + +You must have a GKE cluster up and running. Follow the [cluster preparation](../compute-envs/k8s#cluster-preparation) instructions to create the resources required by Seqera. In addition to the generic Kubernetes instructions, you must make a number of modifications specific to GKE. + +### Service account role + +You must grant cluster access to the service account used by the Seqera compute environment. To do this, update the [service account _RoleBinding_](https://cloud.google.com/kubernetes-engine/docs/how-to/role-based-access-control#rolebinding): + +```yaml +cat << EOF | kubectl apply -f - +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: tower-launcher-userbind +subjects: + - kind: User + name: + apiGroup: rbac.authorization.k8s.io +roleRef: + kind: Role + name: tower-launcher-role + apiGroup: rbac.authorization.k8s.io +--- +EOF +``` + +Replace `` with the corresponding service account, e.g., `test-account@test-project-123456.google.com.iam.gserviceaccount.com`. + +See [Role-based access control](https://cloud.google.com/kubernetes-engine/docs/how-to/role-based-access-control) for more information. + +## Seqera compute environment + +:::caution +Your Seqera compute environment uses resources that you may be charged for in your Google Cloud account. See [Cloud costs](../monitoring/cloud-costs) for guidelines to manage cloud resources effectively and prevent unexpected costs. +::: + +After you've prepared your Kubernetes cluster and granted cluster access to your service account, create a Seqera GKE compute environment: + +1. In a Seqera workspace, select **Compute environments > New environment**. +1. Enter a descriptive name for this environment, e.g., _Google Kubernetes Engine (europe-west1)_. +1. From the **Provider** drop-down, select **Google Kubernetes Engine**. +1. Under **Storage**, select either **Fusion storage** (recommended) or **Legacy storage**. The [Fusion v2](https://docs.seqera.io/fusion) virtual distributed file system allows access to your Google Cloud-hosted data (`gs://` URLs). This eliminates the need to configure a shared file system in your Kubernetes cluster. See [Fusion v2](#fusion-v2) below. +1. From the **Credentials** drop-down menu, select existing GKE credentials, or select **+** to add new credentials. If you choose to use existing credentials, skip to step 8. +1. Enter a name for the credentials, e.g., _GKE Credentials_. +1. Enter the **Service account key** for your Google service account. + :::tip + You can create multiple credentials in your Seqera environment. See [Credentials](../credentials/overview). + ::: +1. Select the **Location** of your GKE cluster. + :::caution + GKE clusters can be either regional or zonal. For example, `us-west1` identifies the United States West-Coast _region_, which has three _zones_: `us-west1-a`, `us-west1-b`, and `us-west1-c`. + + Seqera Platform's auto-completion only shows regions. You should manually edit this field if you're using a zonal GKE cluster. + ::: +1. Select or enter the **Cluster name** of your GKE cluster. +1. Specify the **Namespace** created in the [cluster preparation](../compute-envs/k8s#cluster-preparation) instructions. This is _tower-nf_ by default. +1. Specify the **Head service account** created in the [cluster preparation](../compute-envs/k8s#cluster-preparation) instructions. This is _tower-launcher-sa_ by default. + :::note + If you enable Fusion v2 (**Fusion storage** in step 4 above), the head service account must have access to the Google Cloud storage bucket specified as your work directory. + ::: +1. Specify the **Storage claim** created in the [cluster preparation](../compute-envs/k8s#cluster-preparation) instructions. This serves as a scratch filesystem for Nextflow pipelines. The storage claim is called _tower-scratch_ in the provided examples. + :::note + The **Storage claim** isn't needed when Fusion v2 is enabled. + ::: +1. Apply [**Resource labels**](../resource-labels/overview) to the cloud resources consumed by this compute environment. Workspace default resource labels are prefilled. +1. Expand **Staging options** to include: + - Optional [pre- or post-run Bash scripts](../launch/advanced#pre-and-post-run-scripts) that execute before or after the Nextflow pipeline execution in your environment. + - Global Nextflow configuration settings for all pipeline runs launched with this compute environment. Values defined here are pre-filled in the **Nextflow config file** field in the pipeline launch form. These values can be overridden during pipeline launch. + :::info + Configuration settings in this field override the same values in the pipeline repository `nextflow.config` file. See [Nextflow config file](../launch/advanced#nextflow-config-file) for more information on configuration priority. + ::: +1. Specify custom **Environment variables** for the **Head job** and/or **Compute jobs**. +1. Configure any advanced options described in the next section, as needed. +1. Select **Create** to finalize the compute environment setup. + +### Advanced options + +Seqera Platform compute environments for GKE include advanced options for storage and work directory paths, resource allocation, and pod customization. + +- The **Storage mount path** is the file system path where the Storage claim is mounted (default: `/scratch`). +- The **Work directory** is the file system path used as a working directory by Nextflow pipelines. It must be the storage mount path (default) or a subdirectory of it. +- The **Compute service account** is the service account used by Nextflow to submit tasks (default: the `default` account in the given namespace). +- The **Pod cleanup policy** determines when to delete terminated pods. +- Use **Custom head pod specs** to provide custom options for the Nextflow workflow pod (`nodeSelector`, `affinity`, etc). For example: + +```yaml +spec: + nodeSelector: + disktype: ssd +``` + +- Use **Custom service pod specs** to provide custom options for the compute environment pod. See above for an example. +- Use **Head Job CPUs** and **Head Job Memory** to specify the hardware resources allocated for the Nextflow workflow pod. + +:::info +See [Launch pipelines](../launch/launchpad) to start executing workflows in your GKE compute environment. +::: + +### Fusion v2 + +To use [Fusion v2](https://docs.seqera.io/fusion) in your Seqera GKE compute environment: +1. Use Seqera Platform version 23.1 or later. +1. Use an S3 bucket as the pipeline work directory. +1. Both the head service and compute service accounts must have access to the Google Cloud storage bucket specified as the work directory. + +
+Configure IAM to use Fusion v2 + +1. Ensure the **Workload Identity** feature is enabled for the cluster: + - **Enable Workload Identity** in the cluster **Security** settings. + - **Enable GKE Metadata Server** in the node group **Security** settings. +1. Allow the IAM service account access to your Google storage bucket: + ```shell + gcloud storage buckets add-iam-policy-binding gs:// --role roles/storage.objectAdmin --member serviceAccount:@.iam.gserviceaccount.com + ``` + The role must have at least `storage.objects.create`, `storage.objects.get`, and `storage.objects.list` permissions. +1. Allow the Kubernetes service account to impersonate the IAM service account: + ```shell + gcloud iam service-accounts add-iam-policy-binding @.iam.gserviceaccount.com --role roles/iam.workloadIdentityUser --member "serviceAccount:.svc.id.goog[/]" + ``` +1. Annotate the Kubernetes service account with the email address of the IAM service account: + ```shell + kubectl annotate serviceaccount --namespace iam.gke.io/gcp-service-account=@.iam.gserviceaccount.com + ``` + +See the [GKE documentation](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity#authenticating_to) for further details. + +
\ No newline at end of file diff --git a/platform-enterprise/compute-envs/google-cloud-batch.md b/platform-enterprise/compute-envs/google-cloud-batch.md new file mode 100644 index 000000000..d277a835e --- /dev/null +++ b/platform-enterprise/compute-envs/google-cloud-batch.md @@ -0,0 +1,252 @@ +--- +title: "Google Cloud Batch" +description: "Instructions to set up Google Cloud Batch in Seqera Platform" +date: "21 Apr 2023" +tags: [google, batch, gcp, compute environment] +--- + +:::note +This guide assumes you have an existing Google Cloud account. Sign up for a free account [here](https://cloud.google.com/). Seqera Platform provides integration to Google Cloud via the [Batch API](https://cloud.google.com/batch/docs/reference/rest). +::: + +The guide is split into two parts: + +1. How to configure your Google Cloud account to use the Batch API. +2. How to create a Google Cloud Batch compute environment in Seqera. + +## Configure Google Cloud + +### Create a project + +Go to the [Google Project Selector page](https://console.cloud.google.com/projectselector2) and select an existing project, or select **Create project**. + +Enter a name for your new project, e.g., _tower-nf_. + +If you are part of an organization, the location will default to your organization. + +### Enable billing + +See [here](https://cloud.google.com/billing/docs/how-to/modify-project) to enable billing in your Google Cloud account. + +### Enable APIs + +See [here](https://console.cloud.google.com/flows/enableapi?apiid=batch.googleapis.com%2Ccompute.googleapis.com%2Cstorage-api.googleapis.com) to enable the following APIs for your project: + +- Batch API +- Compute Engine API +- Cloud Storage API + +Select your project from the dropdown menu and select **Enable**. + +Alternatively, you can enable each API manually by selecting your project in the navigation bar and visiting each API page: + +- [Batch API](https://console.cloud.google.com/marketplace/product/google/batch.googleapis.com) + +- [Compute Engine API](https://console.cloud.google.com/marketplace/product/google/compute.googleapis.com) + +- [Cloud Storage API](https://console.cloud.google.com/marketplace/product/google/storage-api.googleapis.com) + +### IAM + +Seqera requires a service account with appropriate permissions to interact with your Google Cloud resources. As an IAM user, you must have access to the service account that submits Batch jobs. + +:::caution +By default, Google Cloud Batch uses the default Compute Engine service account to submit jobs. This service account is granted the Editor (`roles/Editor`) role. While this service account has the necessary permissions needed by Seqera, this role is not recommended for production environments. Control job access using a custom service account with only the permissions necessary for Seqera to execute Batch jobs instead. +::: + +#### Service account permissions + +[Create a custom service account][create-sa] with at least the following permissions: + +- Batch Agent Reporter (`roles/batch.agentReporter`) on the project +- Batch Job Editor (`roles/batch.jobsEditor`) on the project +- Logs Writer (`roles/logging.logWriter`) on the project (to let jobs generate logs in Cloud Logging) +- Service Account User (`roles/iam.serviceAccountUser`) + +If your Google Cloud project does not require access restrictions on any of its Cloud Storage buckets, you can grant project Storage Admin (`roles/storage.admin`) permissions to your service account to simplify setup. To grant access only to specific buckets, add the service account as a principal on each bucket individually. See [Cloud Storage bucket](#cloud-storage-bucket) below. + +#### User permissions + +Ask your Google Cloud administrator to grant you the following IAM user permissions to interact with your custom service account: + +- Batch Job Editor (`roles/batch.jobsEditor`) on the project +- Service Account User (`roles/iam.serviceAccountUser`) on the job's service account (default: Compute Engine service account) +- View Service Accounts (`roles/iam.serviceAccountViewer`) on the project + +To configure a credential in Seqera, you must first create a [service account JSON key file][get-json]: + +1. In the Google Cloud navigation menu, select **IAM & Admin > Service Accounts**. +2. Select the email address of the service account. + + :::note + The Compute Engine default service account is not recommended for production environments due to its powerful permissions. To use a service account other than the Compute Engine default, specify the service account email address under **Advanced options** on the Seqera compute environment creation form. + ::: + +3. Select **Keys > Add key > Create new key**. +4. Select **JSON** as the key type. +5. Select **Create**. + +A JSON file is downloaded to your computer. This file contains the credential needed to configure the compute environment in Seqera. + +You can manage your key from the **Service Accounts** page. + +### Cloud Storage bucket + +Google Cloud Storage is a type of **object storage**. To access files and store the results for your pipelines, create a **Cloud bucket** that your Seqera service account can access. + +#### Create a Cloud Storage bucket + +1. In the hamburger menu (**≡**), select **Cloud Storage**. +2. From the **Buckets** tab, select **Create**. +3. Enter a name for your bucket. You will reference this name when you create the compute environment in Seqera. +4. Select **Region** for the **Location type** and select the **Location** for your bucket. You'll reference this location when you create the compute environment in Seqera. + + :::note + The Batch API is available in a limited number of [locations][batch-locations]. These locations are only used to store metadata about the pipeline operations. The storage bucket and compute resources can be in any region. + ::: + +5. Select **Standard** for the default storage class. +6. To restrict public access to your bucket data, select the **Enforce public access prevention on this bucket** checkbox. +7. Under **Access control**, select **Uniform**. +8. Select any additional object data protection tools, per your organization's data protection requirements. +9. Select **Create**. + +#### Assign bucket permissions + +1. After the bucket is created, you are redirected to the **Bucket details** page. +2. Select **Permissions**, then **Grant access** under **View by principals**. +3. Copy the email address of your service account into **New principals**. +4. Select the **Storage Admin** role, then select **Save**. + +:::tip +You've created a project, enabled the necessary Google APIs, created a bucket, and created a service account JSON key file with the required credentials. You now have what you need to set up a new compute environment in Seqera. +::: + +### Seqera compute environment + +:::caution +Your Seqera compute environment uses resources that you may be charged for in your Google Cloud account. See [Cloud costs](../monitoring/cloud-costs) for guidelines to manage cloud resources effectively and prevent unexpected costs. +::: + +After your Google Cloud resources have been created, create a new Seqera compute environment: + +1. In a workspace, select **Compute Environments > New Environment**. +2. Enter a descriptive name for this environment, e.g., _Google Cloud Batch (europe-north1)_. +3. Select **Google Cloud Batch** as the target platform. + +#### Credentials + +1. From the **Credentials** drop-down, select existing Google credentials or select **+** to add new credentials. If you choose to use existing credentials, skip to the next section. +2. Enter a name for the credentials, e.g., _Google Cloud Credentials_. +3. Paste the contents of the JSON file created previously in the **Service account key** field. + +#### Location and work directory + +Select the **Location** where you will execute your pipelines. See [Location][location] to learn more. + +In the **Pipeline work directory** field, enter your storage bucket URL, e.g., `gs://my-bucket`. This bucket must be accessible in the location selected in the previous step. + +:::note +When you specify a Cloud Storage bucket as your work directory, this bucket is used for the Nextflow [cloud cache](https://www.nextflow.io/docs/latest/cache-and-resume.html#cache-stores) by default. You can specify an alternative cache location with the **Nextflow config file** field on the pipeline [launch](../launch/launchpad#launch-form) form. +::: + +#### Seqera features + +Select **Enable Wave containers** to facilitate access to private container repositories and provision containers in your pipelines using the Wave containers service. See [Wave containers][wave-docs] for more information. + +Select **Enable Fusion v2** to allow access to your Google Cloud Storage data via the [Fusion v2][fusion-docs] virtual distributed file system. This speeds up most data operations. The Fusion v2 file system requires Wave containers to be enabled. See [Fusion file system][platform-fusion-docs] for configuration details. + +
+ Use Fusion v2 + + :::note + The compute recommendations below are based on internal benchmarking performed by Seqera. Benchmark runs of [nf-core/rnaseq](https://github.com/nf-core/rnaseq) used profile `test_full`, consisting of an input dataset with 16 FASTQ files and a total size of approximately 123.5 GB. + ::: + + 1. Use Seqera Platform version 23.1 or later. + 1. Use a Google Cloud Storage bucket as the pipeline work directory. + 1. Enable **Wave containers** and **Fusion v2**. + 1. Specify suitable virtual machine types and local storage settings, or accept the default machine settings listed below. An `n2-highmem-16-lssd` VM or larger is recommended for production use. + + :::note + To specify virtual machine settings in Platform during compute environment creation, use the **Global Nextflow config** field to apply custom Nextflow process directives to all pipeline runs launched with this compute environment. + + To specify virtual machine settings per pipeline run in Platform, or as a persistent configuration in your Nextflow pipeline repository, use Nextflow process directives. See [Google Cloud Batch process definition](https://www.nextflow.io/docs/latest/google.html#process-definition) for more information. + ::: + + When Fusion v2 is enabled, the following virtual machine settings are applied: + - A 375 GB local NVMe SSD is selected for all compute jobs. + - If you do not specify a machine type, a VM from families that support local SSDs is selected. + - Any machine types you specify in the Nextflow config must support local SSDs. + - Local SSDs are only offered in multiples of 375 GB. You can increment the number of SSDs used per process with the `disk` directive to request multiples of 375 GB. To work with files larger than 100 GB, use at least two SSDs (750 GB or more). + - Fusion v2 can also use persistent disks for caching. Override the disk requested by Fusion using the `disk` directive and the `type: pd-standard`. + - The `machineType` directive can be used to specify a VM instance type, family, or custom machine type in a comma-separated list of patterns. For example, `c2-*`, `n1-standard-1`, `custom-2-4`, `n*`, `m?-standard-*`. + +
+ +:::note +Wave containers and Fusion v2 are recommended features for added capability and improved performance, but neither are required to execute workflows in your compute environment. +::: + +#### GCP resources + +Enable **Spot** to use Spot instances, which have significantly reduced cost compared to On-Demand instances. + +:::note +From Nextflow version 24.10, the default Spot reclamation retry setting changed to `0` on AWS and Google. By default, no internal retries are attempted on these platforms. Spot reclamations now lead to an immediate failure, exposed to Nextflow in the same way as other generic failures (returning for example, `exit code 1` on AWS). Nextflow will treat these failures like any other job failure unless you actively configure a retry strategy. For more information, see [Spot instance failures and retries](https://docs.seqera.io/platform/24.2/troubleshooting_and_faqs/nextflow#spot-instance-failures-and-retries-in-nextflow). +::: + +Apply [**Resource labels**][resource-labels] to the cloud resources consumed by this compute environment. Workspace default resource labels are prefilled. + +#### Scripting and environment variables + +- Expand **Staging options** to include: + - Optional [pre- or post-run Bash scripts](../launch/advanced#pre-and-post-run-scripts) that execute before or after the Nextflow pipeline execution in your environment. + - Global Nextflow configuration settings for all pipeline runs launched with this compute environment. Values defined here are pre-filled in the **Nextflow config file** field in the pipeline launch form. These values can be overridden during pipeline launch. + :::info + Configuration settings in this field override the same values in the pipeline repository `nextflow.config` file. See [Nextflow config file](../launch/advanced#nextflow-config-file) for more information on configuration priority. + ::: + + +- Specify custom **Environment variables** for the head and compute jobs. + +#### Advanced options + +:::note +If you use VM instance templates for the head or compute jobs (see step 6 below), resource allocation and networking values specified in the templates override any conflicting values you specify while creating your Seqera compute environment. +::: + +1. Enable **Use Private Address** to ensure that your Google Cloud VMs aren't accessible to the public internet. +1. Use **Boot disk size** to control the persistent disk size that each task and the head job are provided. +1. Use **Head Job CPUs** and **Head Job Memory** to specify the CPUs and memory allocated for the head job. +1. Use **Service Account email** to specify a service account email address other than the Compute Engine default to execute workflows with this compute environment (recommended for productions environments). +1. Use **VPC** and **Subnet** to specify the name of a VPC network and subnet to be used by this compute environment. If your organization's VPC architecture relies on network tags, you can apply network tags to VM instance templates used for the Nextflow head and compute jobs (see below). + :::note + You must specify both a **VPC** and **Subnet** for your compute environment to use either. + ::: +1. Use **Head job instance template** and **Compute jobs instance template** to specify the name or fully-qualified reference of a VM instance template, without the `template://` prefix, to use for the head and compute jobs. [VM instance templates][gcp-vm-instance-template] allow you to define the resources allocated to Batch jobs. Configuration values defined in a VM instance template override any conflicting values you specify while creating your Seqera compute environment. + + You can use network tags in VM instance templates to enable cross-network and cross-project distribution of compute resources. This is useful if your head and compute instances must reside in different GCP projects or across isolated networking infrastructures. Note that the use of network tags does not affect the resource labels applied to your compute environment. + + :::caution + Seqera does not validate the VM instance template you specify in these fields. Generally, use templates that define only the machine type, network, disk, and configuration values that will not change across multiple VM instances and Seqera compute environments. See [Create instance templates](https://cloud.google.com/compute/docs/instance-templates/create-instance-templates) for instructions to create your instance templates. + + To prevent errors during workflow execution, ensure that the instance templates you use are suitably configured for your needs with an appropriate machine type. You can define multiple instance templates with varying machine type sizes in your Nextflow configuration using the `machineType` [process directive](https://www.nextflow.io/docs/latest/google.html#process-definition) (e.g., `process.machineType = 'template://my-template-name'`). You can use [process selectors](https://www.nextflow.io/docs/latest/config.html#config-process-selectors) to assign separate templates to each of your processes. + ::: + +Select **Create** to finalize the compute environment setup. + +:::info +See [Launch pipelines](../launch/launchpad) to start executing workflows in your Google Cloud Batch compute environment. +::: + +[batch-locations]: https://cloud.google.com/batch/docs/locations +[create-sa]: https://cloud.google.com/iam/docs/service-accounts-create#creating +[get-json]: https://cloud.google.com/iam/docs/keys-list-get#get-key +[location]: https://cloud.google.com/compute/docs/regions-zones#available +[wave-docs]: https://www.nextflow.io/docs/latest/wave.html +[fusion-docs]: https://docs.seqera.io/fusion +[platform-fusion-docs]: ../supported_software/fusion/overview +[pre-post-run-scripts]: ../launch/advanced#pre-and-post-run-scripts +[resource-labels]: ../resource-labels/overview +[gcp-vm-instance-template]: https://cloud.google.com/compute/docs/instance-templates diff --git a/platform-enterprise/compute-envs/google-cloud-lifesciences.md b/platform-enterprise/compute-envs/google-cloud-lifesciences.md new file mode 100644 index 000000000..11e2a2e68 --- /dev/null +++ b/platform-enterprise/compute-envs/google-cloud-lifesciences.md @@ -0,0 +1,143 @@ +--- +title: "Google Life Sciences (deprecated)" +description: "Instructions to set up Google Life Sciences in Seqera Platform" +date: "21 Apr 2023" +tags: [google, gcp, life sciences, compute environment] +--- + +:::info +Google Cloud Life Sciences is [deprecated](https://cloud.google.com/life-sciences/docs/getting-support) and will no longer be available on Google Cloud after July 8, 2025. +::: + +This guide assumes you have an existing Google Cloud account. Sign up for a free account [here](https://cloud.google.com/). Seqera Platform provides integration to Google Cloud via the [Cloud Life Sciences API](https://cloud.google.com/life-sciences/docs/reference/rest). + +This guide is split into two parts: + +1. How to configure your Google Cloud account to use the Cloud Life Sciences API. +2. How to create a Google Life Sciences compute environment in Seqera. + +## Configure Google Cloud + +### Create a project + +Navigate to the [Google Project Selector page](https://console.cloud.google.com/projectselector2) and either select an existing project or select **Create project**. + +Enter a name for your new project, e.g., _tower-nf_. + +If you are part of an organization, the location will default to your organization. + +### Enable billing + +See [here](https://cloud.google.com/billing/docs/how-to/modify-project) to enable billing in your Google Cloud account. + +### Enable APIs + +See [here](https://console.cloud.google.com/flows/enableapi?apiid=lifesciences.googleapis.com%2Ccompute.googleapis.com%2Cstorage-api.googleapis.com) to enable the following APIs for your project: + +- Cloud Life Sciences API +- Compute Engine API +- Cloud Storage API + +Select your project from the dropdown menu and select **Enable**. + +Alternatively, select your project in the navigation bar and enable each API manually from these pages: + +- [Cloud Life Sciences API](https://console.cloud.google.com/marketplace/product/google/lifesciences.googleapis.com) + +- [Compute Engine API](https://console.cloud.google.com/marketplace/product/google/compute.googleapis.com) + +- [Cloud Storage API](https://console.cloud.google.com/marketplace/product/google/storage-api.googleapis.com) + +### IAM + +Seqera requires a service account with appropriate permissions to interact with your Google Cloud resources. + +**Create a service account** + +1. In the navigation menu, select **IAM & Admin > Service Accounts**. +2. Select the email address of the **Compute Engine default service account**. +3. Select **Keys > Add key > Create new key**. +4. Select **JSON** as the key type. +5. Select **Create**. + +A JSON file will be downloaded to your computer. This file contains the credentials needed to configure the compute environment in Seqera. + +You can manage your key from the **Service Accounts** page. + +### Cloud Storage bucket + +Google Cloud Storage is a type of **object storage**. To access files and store the results for your pipelines, create a **Cloud bucket** that your Seqera service account can access. + +**Create a Cloud Storage bucket** + +1. In the hamburger menu (**≡**), select **Cloud Storage > Create bucket**. +2. Enter a name for your bucket. You will reference this name when creating the compute environment in Seqera. + +:::caution +Do not use underscores (`_`) in your bucket name. Use hyphens (`-`) instead. +::: + +3. Select **Region** for the **Location type** and select the **Location** for your bucket. You will reference this location when creating the compute environment in Seqera. +4. Select **Standard** for the default storage class. +5. Select **Uniform** for the **Access control**. + +:::note +The Cloud Life Sciences API is available in a limited number of [locations](https://cloud.google.com/life-sciences/docs/concepts/locations). These locations are only used to store metadata about the pipeline operations. The storage bucket and compute resources can be in any region. +::: + +6. Select **Create**. +7. Once the bucket is created, you will be redirected to the **Bucket details** page. +8. Select **Permissions**, then **+ Add**. +9. Copy the email address of the Compute Engine default service account into **New principals**. +10. Select the following roles: + + - Storage Admin + - Storage Legacy Bucket Owner + - Storage Legacy Object Owner + - Storage Object Creator + +## Seqera compute environment + +:::caution +Your Seqera compute environment uses resources that you may be charged for in your Google Cloud account. See [Cloud costs](../monitoring/cloud-costs) for guidelines to manage cloud resources effectively and prevent unexpected costs. +::: + +After your Google Cloud resources have been created, create a new Seqera compute environment. + +**Create a Seqera Google Cloud Life Sciences compute environment** + +1. In a workspace, select **Compute Environments > New Environment**. +2. Enter a descriptive name for this environment, e.g., _Google Life Sciences (europe-west2)_. +3. Select **Google Life Sciences** as the target platform. +4. From the **Credentials** drop-down, select existing Google Cloud credentials, or add new credentials by selecting the **+** button. If you choose to use existing credentials, skip to step 7. + +:::tip +You can create multiple credentials in your Seqera workspace. See [Credentials](../credentials/overview). +::: + +5. Enter a name for the credentials, e.g., _Google Cloud Credentials_. +6. Enter the **Service account key** created previously. +7. Select the [**Region** and **Zones**](https://cloud.google.com/compute/docs/regions-zones#available) where you wish to execute pipelines. Leave the **Location** empty for the Cloud Life Sciences API to use the closest available location. +8. In the **Pipeline work directory** field, enter your storage bucket URL, e.g., `gs://my-bucket`. This bucket must be accessible in the region selected in the previous step. + + :::note + When you specify a Cloud Storage bucket as your work directory, this bucket is used for the Nextflow [cloud cache](https://www.nextflow.io/docs/latest/cache-and-resume.html#cache-stores) by default. You can specify an alternative cache location with the **Nextflow config file** field on the pipeline [launch](../launch/launchpad#launch-form) form. + ::: + +9. You can enable **Preemptible** to use preemptible instances, which have significantly reduced cost compared to on-demand instances. +10. You can use a **Filestore file system** to automatically mount a Google Filestore volume in your pipelines. +11. Apply [**Resource labels**](../resource-labels/overview) to the cloud resources consumed by this compute environment. Workspace default resource labels are prefilled. +1. Expand **Staging options** to include: + - Optional [pre- or post-run Bash scripts](../launch/advanced#pre-and-post-run-scripts) that execute before or after the Nextflow pipeline execution in your environment. + - Global Nextflow configuration settings for all pipeline runs launched with this compute environment. Values defined here are pre-filled in the **Nextflow config file** field in the pipeline launch form. These values can be overridden during pipeline launch. + :::info + Configuration settings in this field override the same values in the pipeline repository `nextflow.config` file. See [Nextflow config file](../launch/advanced#nextflow-config-file) for more information on configuration priority. + ::: +13. Use the **Environment variables** option to specify custom environment variables for the Head job and/or Compute jobs. +14. Configure any advanced options you need: + - Enable **Use Private Address** to ensure that your Google Cloud VMs aren't accessible to the public internet. + - Use **Boot disk size** to control the boot disk size of VMs. + - Use **Head Job CPUs** and **Head Job Memory** to specify the CPUs and memory allocated for head jobs. +15. Select **Create** to finalize the compute environment setup. + +See [Launch pipelines](../launch/launchpad) to start executing workflows in your Google Cloud Life Sciences compute environment. diff --git a/platform-enterprise/compute-envs/hpc.md b/platform-enterprise/compute-envs/hpc.md new file mode 100644 index 000000000..3c18037a1 --- /dev/null +++ b/platform-enterprise/compute-envs/hpc.md @@ -0,0 +1,94 @@ +--- +title: "HPC compute environments" +description: "Instructions to set up HPC compute environments in Seqera Platform" +date: "11 May 2023" +tags: [slurm, lsf, pbs, grid, altair, ibm, moab, slurm, compute environment] +--- + +Seqera Platform streamlines the deployment of Nextflow pipelines into both cloud-based and on-prem HPC clusters and supports compute environment creation for the following management and scheduling solutions: + +- [Altair PBS Pro](https://www.altair.com/pbs-professional/) +- [Grid Engine](https://www.altair.com/grid-engine/) +- [IBM Spectrum LSF](https://www.ibm.com/products/hpc-workload-management/details) (Load Sharing Facility) +- [Moab](http://docs.adaptivecomputing.com/suite/8-0/basic/help.htm#topics/moabWorkloadManager/topics/intro/productOverview.htm) +- [Slurm](https://slurm.schedmd.com/overview.html) + +## Requirements + +To launch pipelines into an **HPC** cluster from Seqera, the following requirements must be satisfied: + +- The cluster should allow outbound connections to the Seqera web service. +- The cluster queue used to run the Nextflow head job must be able to submit cluster jobs. +- The Nextflow runtime version **21.02.0-edge** (or later) must be installed on the cluster. + +## Credentials + +Seqera requires SSH access to your HPC cluster to run pipelines. Use [managed identities](../credentials/managed_identities) to enable granular access control and preserve individual cluster user identities. + +You can also use workspace [SSH credentials](../credentials/ssh_credentials) for cluster login, but this provides service account access to your HPC to all Platform users. This means that all users will be granted the same file system access, and all activity is logged under the same user account on your HPC cluster. + +For HPC clusters that do not allow direct access through an SSH client, a secure connection can be authenticated with [Tower Agent](../supported_software/agent/overview). + +## Work and launch directories + +For instances where the work directory or launch directory must be set dynamically at runtime, you can use variable expansion. This works in conjunction with Tower Agent. The path that results from variable expansion must exist before workflow execution as the agent does not create directories. + +For example, if the HPC cluster file system has a `/workspace` directory with subdirectories for each user that can run jobs, the value for the work directory can be the following: `/workspace/$TW_AGENT_USER`. For a user `user1`, the work directory resolves to the `/workspace/user1` directory. + +The following variables are supported: + +- `TW_AGENT_WORKDIR`: Resolves to the work directory for Tower Agent. By default, this directory resolves to the `${HOME}/work` path, where `HOME` is the home directory of the user that the agent runs as. The work directory can be overridden by specifying the `--work-dir` argument when configuring Tower Agent. For more information, see the [Tower Agent][agent] documentation. +- `TW_AGENT_USER`: Resolves to the username that the agent is running as. By default, this is the Unix username that the agent runs as. On systems where the agent cannot determine which user it runs as, it falls back to the value of the `USER` environment variable. + +## HPC compute environment + +To create a new **HPC** compute environment: + +1. In a Seqera workspace, select **Compute environments > New environment**. +1. Enter a descriptive name for this environment. Use only alphanumeric characters, dashes, and underscores. +1. Select your HPC environment from the **Platform** dropdown menu. +1. Select your existing managed identity, SSH, or Tower Agent credentials, or select **+** and **SSH** or **Tower Agent** to add new credentials. +1. Enter the absolute path of the **Work directory** to be used on the cluster. You can use the `TW_AGENT_WORKDIR` and `TW_AGENT_USER` variables in the file system path. + + :::caution + All managed identity users must be a part of the same Linux user group. The group must have access to the HPC compute environment work directory. Set group permissions for the work directory as follows (replace `sharedgroupname` and `` with your group name and work directory): + + ```bash + chgrp -R sharedgroupname + chmod -R g+wxs + setfacl -Rdm g::rwX + ``` + + These commands change the group ownership of all files and directories in the work directory to `sharedgroupname`, ensure new files inherit the directory's group, and apply default ACL entries to allow the group read, write, and execute permissions for new files and directories. This setup facilitates shared access and consistent permissions management in the directory. + ::: + +1. Enter the absolute path of the **Launch directory** to be used on the cluster. If omitted, it will be the same as the work directory. +1. Enter the **Login hostname**. This is usually the hostname or public IP address of the cluster's login node. +1. Enter the **Head queue name**. This is the [default](https://www.nextflow.io/docs/latest/process.html#queue) cluster queue to which the Nextflow job will be submitted. +1. Enter the **Compute queue name**. This is the [default](https://www.nextflow.io/docs/latest/process.html#queue) cluster queue to which the Nextflow job will submit tasks. +1. Expand **Staging options** to include: + - Optional [pre- or post-run Bash scripts](../launch/advanced#pre-and-post-run-scripts) that execute before or after the Nextflow pipeline execution in your environment. + - Global Nextflow configuration settings for all pipeline runs launched with this compute environment. Values defined here are pre-filled in the **Nextflow config file** field in the pipeline launch form. These values can be overridden during pipeline launch. + :::info + Configuration settings in this field override the same values in the pipeline repository `nextflow.config` file. See [Nextflow config file](../launch/advanced#nextflow-config-file) for more information on configuration priority. + ::: +1. Specify custom **Environment variables** for the head job and/or compute jobs. +1. Configure any advanced options needed: + - Use the **Nextflow queue size** to limit the number of jobs that Nextflow can submit to the scheduler at the same time. + - Use the **Head job submit options** to add platform-specific submit options for the head job. You can optionally apply these options to compute jobs as well: + + :::note + Once set during compute environment creation, these options can't be overridden at pipeline launch time. + ::: + + :::note + In IBM LSF compute environments, use **Unit for memory limits**, **Per job memory limits**, and **Per task reserve** to control how memory is requested for Nextflow jobs. + ::: + +1. Select **Create** to finalize the creation of the compute environment. + +See [Launch pipelines](../launch/launchpad) to start executing workflows in your HPC compute environment. + + + +[agent]: ../supported_software/agent/overview diff --git a/platform-enterprise/compute-envs/k8s.md b/platform-enterprise/compute-envs/k8s.md new file mode 100644 index 000000000..5cb45db6b --- /dev/null +++ b/platform-enterprise/compute-envs/k8s.md @@ -0,0 +1,149 @@ +--- +title: "Kubernetes" +description: "Instructions to set up Kubernetes in Seqera Platform" +date: "21 Apr 2023" +tags: [k8s, kubernetes, compute environment] +--- + +[Kubernetes](https://kubernetes.io/) is the leading technology for the deployment and orchestration of containerized workloads in cloud-native environments. + +Seqera Platform streamlines the deployment of Nextflow pipelines into Kubernetes, both for cloud-based and on-prem clusters. + +The following instructions create a Seqera compute environment for a **generic Kubernetes** distribution. See [Amazon EKS](./eks) or [Google Kubernetes Engine (GKE)](./gke) for EKS and GKE compute environment instructions. + +## Cluster preparation + +To prepare your Kubernetes cluster for the deployment of Nextflow pipelines using Seqera, this guide assumes that you've already created the cluster and that you have administrative privileges. + +This guide applies a Kubernetes manifest that creates a service account named `tower-launcher-sa` and the associated role bindings, all contained in the `tower-nf` namespace. Seqera uses the service account to launch Nextflow pipelines. Use this service account name when setting up the compute environment for this Kubernetes cluster in Seqera. + +**Prepare your Kubernetes cluster for Seqera Platform** + +1. Verify the connection to your Kubernetes cluster: + + ```bash + kubectl cluster-info + ``` + +1. Create a file named `tower-launcher.yml` with the following YAML: + + ```yaml file=../_templates/k8s/tower-launcher.yml showLineNumbers + ``` + +1. Apply the manifest: + + ```bash + kubectl apply -f tower-launcher.yml + ``` + +1. Create a persistent API token for the `tower-launcher-sa` service account: + + ```bash + kubectl apply -f - < + ``` + +## Seqera compute environment + +After you've prepared your Kubernetes cluster for Seqera integration, create a compute environment: + +**Create a Seqera Kubernetes compute environment** + +1. In a workspace, select **Compute environments > New environment**. +1. Enter a descriptive name for this environment, e.g., _K8s cluster_. +1. Select **Kubernetes** as the target platform. +1. From the **Credentials** drop-down, select existing Kubernetes credentials, or select **+** to add new credentials. If you choose to use existing credentials, skip to step 7. + + :::tip + You can create multiple credentials in your Seqera workspace. See [Credentials](../credentials/overview). + ::: + +1. Enter a name, such as _K8s Credentials_. +1. Select either the **Service Account Token** or **X509 Client Certs** tab: + + - To authenticate using a Kubernetes service account, enter your **Service account token**. Obtain the token with the following command: + + ```bash + kubectl describe secret | grep -E '^token' | cut -f2 -d':' | tr -d '\t ' + ``` + + Replace `` with the name of the service account token created in the [cluster preparation](#cluster-preparation) instructions (default: `tower-launcher-token`). + + - To authenticate using an X509 client certificate, paste the contents of your certificate and key file (including the `-----BEGIN...-----` and `-----END...-----` lines) in the **Client certificate** and **Client Key** fields respectively. See the [Kubernetes documentation](https://kubernetes.io/docs/tasks/administer-cluster/certificates/) for instructions to generate your client certificate and key. + +1. Enter the **Control plane URL**, obtained with this command: + + ```bash + kubectl cluster-info + ``` + + It can also be found in your `~/.kube/config` file under the `server` field corresponding to your cluster. + +1. Specify the **SSL certificate** to authenticate your connection. + + Find the certificate data in your `~/.kube/config` file. It is the `certificate-authority-data` field corresponding to your cluster. + +1. Specify the **Namespace** created in the [cluster preparation](#cluster-preparation) instructions, which is _tower-nf_ by default. +1. Specify the **Head service account** created in the [cluster preparation](#cluster-preparation) instructions, which is _tower-launcher-sa_ by default. +1. Specify the **Storage claim** created in the [cluster preparation](#cluster-preparation) instructions, which serves as a scratch filesystem for Nextflow pipelines. The storage claim is called _tower-scratch_ in each of the provided examples. +1. Apply [**Resource labels**](../resource-labels/overview) to the cloud resources consumed by this compute environment. Workspace default resource labels are prefilled. +1. Expand **Staging options** to include: + - Optional [pre- or post-run Bash scripts](../launch/advanced#pre-and-post-run-scripts) that execute before or after the Nextflow pipeline execution in your environment. + - Global Nextflow configuration settings for all pipeline runs launched with this compute environment. Values defined here are pre-filled in the **Nextflow config file** field in the pipeline launch form. These values can be overridden during pipeline launch. + :::info + Configuration settings in this field override the same values in the pipeline repository `nextflow.config` file. See [Nextflow config file](../launch/advanced#nextflow-config-file) for more information on configuration priority. + ::: +1. You can use the **Environment variables** option to specify custom environment variables for the Head job and/or Compute jobs. +1. Configure any advanced options described below, as needed. +1. Select **Create** to finalize the compute environment setup. + +See [Launch pipelines](../launch/launchpad) to start executing workflows in your Kubernetes compute environment. + +### Advanced options + +Seqera Platform compute environments for Kubernetes include advanced options for storage and work directory paths, resource allocation, and pod customization. + +**Seqera Kubernetes advanced options** + +- The **Storage mount path** is the file system path where the Storage claim is mounted (default: `/scratch`). +- The **Work directory** is the file system path used as a working directory by Nextflow pipelines. It must be the storage mount path (default) or a subdirectory of it. +- The **Compute service account** is the service account used by Nextflow to submit tasks (default: the `default` account in the given namespace). +- The **Pod cleanup policy** determines when to delete terminated pods. +- Use **Custom head pod specs** to provide custom options for the Nextflow workflow pod (`nodeSelector`, `affinity`, etc). For example: + +```yaml +spec: + nodeSelector: + disktype: ssd +``` + +- Use **Custom service pod specs** to provide custom options for the compute environment pod. See above for an example. +- Use **Head Job CPUs** and **Head Job memory** to specify the hardware resources allocated to the Nextflow workflow pod. + diff --git a/platform-enterprise/compute-envs/overview.md b/platform-enterprise/compute-envs/overview.md new file mode 100644 index 000000000..3e12d1833 --- /dev/null +++ b/platform-enterprise/compute-envs/overview.md @@ -0,0 +1,59 @@ +--- +title: "Compute environment overview" +description: "Overview of compute environments in Seqera Platform" +date: "21 Apr 2023" +tags: [compute environment] +--- + +Seqera Platform **compute environments** define the execution platform where a pipeline will run. Compute environments enable users to launch pipelines on a growing number of **cloud** and **on-premises** platforms. + +Each compute environment must be configured to enable Seqera to submit tasks. See the individual compute environment pages below for platform-specific configuration steps. + +## Platforms + +- [AWS Batch](./aws-batch) +- [Azure Batch](./azure-batch) +- [Google Cloud Batch](./google-cloud-batch) +- [Google Life Sciences](./google-cloud-lifesciences) +- [Grid Engine](./hpc) +- [Altair PBS Pro](./hpc) +- [IBM LSF](./hpc) +- [Moab](./hpc) +- [Slurm](./hpc) +- [Kubernetes](./k8s) +- [Amazon EKS](./eks) +- [Google Kubernetes Engine](./gke) + +## Select default compute environment + +If you have more than one compute environment, you can select a workspace primary compute environment to be used as the default when launching pipelines in that workspace. In a workspace, select **Compute Environments**. Then select **Make primary** from the options menu next to the compute environment you wish to use as default. + +## Rename compute environment + +You can edit the names of compute environments in private and organization workspaces. Select **Rename** from the options menu next to the compute environment you wish to edit. + +Select **Update** on the edit page to save your changes after you have updated the compute environment name. + +## GPU usage + +The process for provisioning GPU instances in your compute environment differs for each cloud provider. + +### AWS Batch + +The AWS Batch compute environment creation form in Seqera includes an **Enable GPUs** option. This enables you to run GPU-dependent workflows in the compute environment. + +Some important considerations: + +- Seqera only supports NVIDIA GPUs. Select instances with NVIDIA GPUs for your GPU-dependent processes. +- The **Enable GPUs** setting causes Batch Forge to specify the most current [AWS-recommended GPU-optimized ECS AMI](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-optimized_AMI.html) as the EC2 fleet AMI when creating the compute environment. This setting can be overridden by **AMI ID** in the advanced options. +- The **Enable GPUs** setting alone does not deploy GPU instances in your compute environment. You must still specify GPU-enabled instance types in the **Advanced options > Instance types** field. +- Your Nextflow script must include [accelerator directives](https://www.nextflow.io/docs/latest/process.html?highlight=accelerator#accelerator) to use the provisioned GPUs. +- The NVIDIA Container Runtime uses [environment variables](https://github.com/NVIDIA/nvidia-container-runtime#environment-variables-oci-spec) in container images to specify a GPU accelerated container. These variables should be included in the [`containerOptions`](https://www.nextflow.io/docs/latest/process.html#process-containeroptions) directive for each GPU-dependent process in your Nextflow script. The `containerOptions` directive can be set inline in your process definition or via configuration. For example, to add the directive to a process named `UseGPU` via configuration: + +```groovy +process { + withName: UseGPU { + containerOptions '-e NVIDIA_DRIVER_CAPABILITIES=compute,utility -e NVIDIA_VISIBLE_DEVICES=all' + } +} +``` diff --git a/platform-enterprise/credentials/agent_credentials.md b/platform-enterprise/credentials/agent_credentials.md new file mode 100644 index 000000000..5e770e267 --- /dev/null +++ b/platform-enterprise/credentials/agent_credentials.md @@ -0,0 +1,27 @@ +--- +title: "Tower Agent credentials" +description: "Instructions to create Tower Agent credentials in Seqera Platform." +date: "21 Apr 2023" +tags: [agent, credentials] +--- + +[Tower Agent](../supported_software/agent/overview) enables Seqera Platform to launch pipelines on HPC clusters that do not allow direct access through an SSH client. Tower Agent authenticates a secure connection with Seqera using a Tower Agent credential. + +## Tower Agent sharing + +You can share a single Tower Agent instance with all members of a workspace. Create a Tower Agent credential, with **Shared agent** enabled, in the relevant workspace. All workspace members can then use this credential (Connection ID + Seqera access token) to use the same Tower Agent instance. + +## Create a Tower Agent credential + +1. Add your credentials to your organization or personal workspace: + - From an organization workspace: Go to **Credentials > Add Credentials**. + - From your personal workspace: From the user menu, go to **Your credentials > Add credentials**. + +2. Complete the following fields: + - **Name**: Specify a unique name for the credentials using alphanumeric characters, dashes, or underscores. For example, `my-agent-creds`. + - **Provider**: Select **Tower Agent**. + - **Agent connection ID**: The connection ID used to run your Tower Agent instance. Must match the connection ID used when running the Agent (see **Usage** below). + - **Shared agent**: Enables Tower Agent sharing for all workspace members. + - **Usage**: Populates a code snippet for Tower Agent download with your connection ID. Replace `` with your [Seqera access token](../api/overview#authentication). + +3. After you've completed all the form fields, select **Add**. The new credential is now listed under the **Credentials** tab. diff --git a/platform-enterprise/credentials/aws_registry_credentials.md b/platform-enterprise/credentials/aws_registry_credentials.md new file mode 100644 index 000000000..337f109ec --- /dev/null +++ b/platform-enterprise/credentials/aws_registry_credentials.md @@ -0,0 +1,50 @@ +--- +title: "AWS ECR credentials" +description: "Instructions to create AWS ECR credentials in Seqera Platform." +date: "21 Apr 2023" +tags: [aws, ecr, credentials] +--- + +From version 22.3, Seqera Platform supports the configuration of credentials for the Nextflow Wave container service to authenticate to private and public container registries. For more information on Wave containers, see [the Nextflow documentation](https://www.nextflow.io/docs/latest/wave.html). + +:::note +Container registry credentials are only used by the Wave container service. Add `wave { enabled=true }` to the **Nextflow config** field on the launch page, or to your `nextflow.config` file, for your pipeline execution to use Wave containers. +::: + +## AWS ECR access + +Wave requires programmatic access to your private Elastic Container Registry (ECR) via [long-term access keys](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#create-long-term-access-keys). Create a user with registry read permissions (e.g., a subset of the AWS-managed `AmazonEC2ContainerRegistryReadOnly` policy) for this purpose. + +**Create an IAM user with AWS ECR access** + +1. Open the [IAM console](https://console.aws.amazon.com/iam/). +2. Select **Users** from the navigation pane. +3. Select the name of the user whose keys you want to manage, then select the **Security credentials** tab. We recommend creating an IAM user specifically for Wave authentication instead of using existing credentials with broader permissions. +4. In the **Access keys** section, select **Create access key**. Each IAM user can have only two access keys at a time, so if the Create option is deactivated, delete an existing access key first. +5. On the **Access key best practices & alternatives** page, select **Other** and then **Next**. +6. On the **Retrieve access key** page, you can either **Show** the user's secret access key details, or store them by selecting **Download .csv file**. +7. The newly created access key pair is active by default and can be stored as a container registry credential in Seqera. + +:::note +Your credential must be stored in Seqera as a **container registry** credential, even if the same access keys already exist as a workspace credential. +::: + +## Add credentials to Seqera + +1. Add your credentials to your organization or personal workspace: + - From an organization workspace: Go to **Credentials > Add Credentials**. + - From your personal workspace: From the user menu, go to **Your credentials > Add credentials**. + +2. Complete the following fields: + + - **Name**: Specify a unique name for the credentials using alphanumeric characters, dashes, or underscores. For example, `my-registry-creds`. + - **Provider**: Select **Container registry**. + - **User name**: Specify your IAM user access key ID. For example, `AKIAIOSFODNN7EXAMPLE`. + - **Password**: Specify your IAM user secret access. + - **Registry server**: Specify container registry server name. For example, `.dkr.ecr..amazonaws.com`. + +3. After you've completed all the form fields, select **Add**. The new credential is now listed under the **Credentials** tab. + +:::note +To use the Amazon ECR Public Registry, create a container registry credential with **public.ecr.aws** as the registry server. You can reuse your existing IAM credentials, but ensure that the IAM user has the `AmazonElasticContainerRegistryPublicReadOnly` policy attached. +::: \ No newline at end of file diff --git a/platform-enterprise/credentials/azure_registry_credentials.md b/platform-enterprise/credentials/azure_registry_credentials.md new file mode 100644 index 000000000..47e0184b7 --- /dev/null +++ b/platform-enterprise/credentials/azure_registry_credentials.md @@ -0,0 +1,49 @@ +--- +title: "Azure container registry credentials" +description: "Instructions to create Azure container registry credentials in Seqera Platform." +date: "21 Apr 2023" +tags: [azure, registry, credentials] +--- + +From version 22.3, Seqera Platform supports the configuration of credentials for the Nextflow Wave container service to authenticate to private and public container registries. For more information on Wave containers, see the [Nextflow documentation](https://www.nextflow.io/docs/latest/wave.html). + +:::note +Container registry credentials are only used by the Wave container service. Add `wave { enabled=true }` to the **Nextflow config** field on the launch page, or to your `nextflow.config` file, for your pipeline execution to use Wave containers. +::: + +## Azure container registry access + +Azure container registry makes use of Azure RBAC (Role-Based Access Control) to grant users access. For more information, see [Azure container registry roles and permissions](https://learn.microsoft.com/en-us/azure/container-registry/container-registry-roles). + +You must use Azure credentials with long-term registry read (**content/read**) access to authenticate Seqera to your registry. We recommend a [token with repository-scoped permissions](https://learn.microsoft.com/en-us/azure/container-registry/container-registry-repository-scoped-permissions) that's used only by Seqera. + +**Create an access token with Azure container registry access** + +1. In the Azure portal, navigate to your container registry. +2. Under **Repository permissions**, select **Tokens > +Add**. +3. Enter a token name. +4. Under **Scope map**, select **Create new**. +5. In the **Create scope map** section, enter a name and description for the new scope map. +6. Select your **Repository** from the drop-down menu. +7. Select **content/read** from the **Permissions** drop-down menu, then select **Add** to create the scope map. +8. In the **Create token** section, ensure the **Status** is **Enabled** (default), then select **Create**. +9. Return to **Repository permissions > Tokens** for your registry, then select the token you just created. +10. On the token details page, select **password1** or **password2**. +11. In the password details section, uncheck the **Set expiration date?** checkbox, then select **Generate**. +12. Copy and save the generated password (this is only displayed once). + +## Add credentials to Seqera + +1. Add your credentials to your organization or personal workspace: + - From an organization workspace: Go to **Credentials > Add Credentials**. + - From your personal workspace: From the user menu, go to **Your credentials > Add credentials**. + +2. Complete the following fields: + + - **Name**: Specify a unique name for the credentials using alphanumeric characters, dashes, or underscores. For example, `my-registry-creds`. + - **Provider**: Select **Container registry**. + - **User name**: Specify your registry token name. For example, `my-registry-token`. + - **Password**: Your registry token password. For example, `my-registry-token`. + - **Registry server**: Specify the container registry server name. You can obtain this from the Azure portal: **Settings > Access keys > Login server**. For example, `myregistry.azurecr.io`. + +3. After you've completed all the form fields, select **Add**. The new credential is now listed under the **Credentials** tab. diff --git a/platform-enterprise/credentials/docker_hub_registry_credentials.md b/platform-enterprise/credentials/docker_hub_registry_credentials.md new file mode 100644 index 000000000..1c1e376d6 --- /dev/null +++ b/platform-enterprise/credentials/docker_hub_registry_credentials.md @@ -0,0 +1,40 @@ +--- +title: "Docker Hub credentials" +description: "Instructions to create Docker Hub credentials in Seqera Platform." +date: "21 Apr 2023" +tags: [docker, registry, credentials] +--- + +From version 22.3, Seqera Platform supports the configuration of credentials for the Nextflow Wave container service to authenticate to private and public container registries. For more information on Wave containers, see the [Nextflow documentation](https://www.nextflow.io/docs/latest/wave.html). + +:::note +Container registry credentials are only used by the Wave container service. Add `wave { enabled=true }` to the **Nextflow config** field on the launch page, or to your `nextflow.config` file, for your pipeline execution to use Wave containers. +::: + +## Docker Hub registry access + +You must use Docker Hub credentials with **Read-only** access to authenticate Seqera to your registry. Docker Hub uses personal access tokens (PATs) for authentication. We don't currently support Docker Hub authentication with 2FA (two-factor authentication). + +**Create a Docker Hub PAT** + +1. Log in to [Docker Hub](https://hub.docker.com/). +2. Select your username in the top right corner and select **Account Settings**. +3. Select **Security > New Access Token**. +4. Enter a token description and select **Read-only** from the Access permissions drop-down menu, then select **Generate**. +5. Copy and save the generated access token (this is only displayed once). + +## Add credentials to Seqera + +1. Add your credentials to your organization or personal workspace: + - From an organization workspace: Go to **Credentials > Add Credentials**. + - From your personal workspace: From the user menu, go to **Your credentials > Add credentials**. + +2. Complete the following fields: + + - **Name**: Specify a unique name for the credentials using alphanumeric characters, dashes, or underscores. For example, `my-registry-creds`. + - **Provider**: Select **Container registry**. + - **User name**: Specify your Docker username. For example, `user1`. + - **Password**: Specify your personal access token (PAT). For example, `1fcd02dc-...215bc3f3`. + - **Registry server**: Specify the container registry hostname, excluding the protocol. For example, `docker.io`. + +3. After you've completed all the form fields, select **Add**. The new credential is now listed under the **Credentials** tab. diff --git a/platform-enterprise/credentials/gitea_registry_credentials.md b/platform-enterprise/credentials/gitea_registry_credentials.md new file mode 100644 index 000000000..68942d36e --- /dev/null +++ b/platform-enterprise/credentials/gitea_registry_credentials.md @@ -0,0 +1,37 @@ +--- +title: "Gitea container registry credentials" +description: "Instructions to create GitHub container registry credentials in Seqera Platform." +date: "15 Dec 2023" +tags: [gitea, registry, credentials] +--- + +From version 22.3, Seqera Platform supports the configuration of credentials for the Nextflow Wave container service to authenticate to private and public container registries. For more information on Wave containers, see the [Nextflow documentation](https://www.nextflow.io/docs/latest/wave.html). + +Gitea container registries support [authentication][gitea-auth] using a personal access token. Use your personal access token as your password when you create your Gitea container registry credentials in Seqera. + +:::note +Container registry credentials are only used by the Wave container service. Add `wave { enabled=true }` to the **Nextflow config** field on the launch page, or to your `nextflow.config` file, for your pipeline execution to use Wave containers. +::: + +## Create a personal access token (PAT) + +You must create a PAT to access your Gitea container registry from Wave. For more information, see [Create a personal access token][gitea-create]. + +## Add credentials to Seqera + +1. Add your credentials to your organization or personal workspace: + - From an organization workspace: Go to **Credentials > Add Credentials**. + - From your personal workspace: From the user menu, go to **Your credentials > Add credentials**. + +2. Complete the following fields: + + - **Name**: Specify a unique name for the credentials using alphanumeric characters, dashes, or underscores. For example, `my-registry-creds`. + - **Provider**: Select **Container registry**. + - **User name**: Specify your Gitea username. For example, `gitlab_user1`. + - **Password**: Specify your Gitea personal access token (PAT). For example, `1fcd02dc-...215bc3f3`. + - **Registry server**: Specify your Gitea container registry URL. For example, `gitea.example.com`. + +3. After you've completed all the form fields, select **Add**. The new credential is now listed under the **Credentials** tab. + +[gitea-auth]: https://docs.gitea.com/usage/packages/container#login-to-the-container-registry +[gitea-create]: https://docs.gitea.com/development/api-usage#authentication diff --git a/platform-enterprise/credentials/github_registry_credentials.md b/platform-enterprise/credentials/github_registry_credentials.md new file mode 100644 index 000000000..e69de52aa --- /dev/null +++ b/platform-enterprise/credentials/github_registry_credentials.md @@ -0,0 +1,37 @@ +--- +title: "GitHub container registry credentials" +description: "Instructions to create GitHub container registry credentials in Seqera Platform." +date: "15 Dec 2023" +tags: [github, registry, credentials] +--- + +From version 22.3, Seqera Platform supports the configuration of credentials for the Nextflow Wave container service to authenticate to private and public container registries. For more information on Wave containers, see the [Nextflow documentation](https://www.nextflow.io/docs/latest/wave.html). + +GitHub Packages only supports [authentication][github-pat] using a personal access token (classic). Use your personal access token as your password when you create your GitHub container registry credentials in Seqera. + +:::note +Container registry credentials are only used by the Wave container service. Add `wave { enabled=true }` to the **Nextflow config** field on the launch page, or to your `nextflow.config` file, for your pipeline execution to use Wave containers. +::: + +## Create a personal access token (PAT) + +You must create a PAT to access your GitHub container registry from Wave. For more information, see [Create a personal access token][github-create]. + +## Add credentials to Seqera + +1. Add your credentials to your organization or personal workspace: + - From an organization workspace: Go to **Credentials > Add Credentials**. + - From your personal workspace: From the user menu, go to **Your credentials > Add credentials**. + +2. Complete the following fields: + + - **Name**: Specify a unique name for the credentials using alphanumeric characters, dashes, or underscores. For example, `my-registry-creds`. + - **Provider**: Select **Container registry**. + - **User name**: Specify your GitHub username. For example, `github_user1`. + - **Password**: Specify your personal access token (PAT) classic. For example, `1fcd02dc-...215bc3f3`. + - **Registry server**: Specify your GitHub container registry URL. For example, `ghcr.io`. + +3. After you've completed all the form fields, select **Add**. The new credential is now listed under the **Credentials** tab. + +[github-pat]: https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry#authenticating-with-a-personal-access-token-classic +[github-create]: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-personal-access-token-classic diff --git a/platform-enterprise/credentials/gitlab_registry_credentials.md b/platform-enterprise/credentials/gitlab_registry_credentials.md new file mode 100644 index 000000000..8a969ffbe --- /dev/null +++ b/platform-enterprise/credentials/gitlab_registry_credentials.md @@ -0,0 +1,40 @@ +--- +title: "GitLab container registry credentials" +description: "Instructions to create GitLab container registry credentials in Seqera Platform." +date: "15 Dec 2023" +tags: [gitlab, registry, credentials] +--- + +From version 22.3, Seqera Platform supports the configuration of credentials for the Nextflow Wave container service to authenticate to private and public container registries. For more information on Wave containers, see the [Nextflow documentation](https://www.nextflow.io/docs/latest/wave.html). + +If your organization enabled two-factor authentication (2FA) for your GitLab organization or project, you must use your [personal access token][gitlab-pat] as your password when you create your [GitLab container registry credentials][gitlab-cr]. + +:::note +Container registry credentials are only used by the Wave container service. Add `wave { enabled=true }` to the **Nextflow config** field on the launch page, or to your `nextflow.config` file, for your pipeline execution to use Wave containers. +::: + +## Create a personal access token (PAT) + +If your organization enabled 2FA for your organization or project, you must create a PAT to access your GitLab container registry from Wave. For more information, see [Create a personal access token][gitlab-create]. If your organization created a [project access token][gitlab-project] or a [group access token][gitlab-group], ask your GitLab administrator for access. + +## Add credentials to Seqera + +1. Add your credentials to your organization or personal workspace: + - From an organization workspace: Go to **Credentials > Add Credentials**. + - From your personal workspace: From the user menu, go to **Your credentials > Add credentials**. + +2. Complete the following fields: + + - **Name**: Specify a unique name for the credentials using alphanumeric characters, dashes, or underscores. For example, `my-registry-creds`. + - **Provider**: Select **Container registry**. + - **User name**: Specify your GitLab username. + - **Password**: Specify your personal access token (PAT), group access token, or project access token if 2FA is enabled by your GitLab organization. Otherwise specify your GitLab password. + - **Registry server**: Specify your GitLab container registry URL. For example, `gitlab.example.com`. + +3. After you've completed all the form fields, select **Add**. The new credential is now listed under the **Credentials** tab. + +[gitlab-cr]: https://docs.gitlab.com/ee/user/packages/container_registry/authenticate_with_container_registry.html +[gitlab-pat]: https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html +[gitlab-create]: https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html#create-a-personal-access-token +[gitlab-project]: https://docs.gitlab.com/ee/user/project/settings/project_access_tokens.html +[gitlab-group]: https://docs.gitlab.com/ee/user/group/settings/group_access_tokens.html diff --git a/platform-enterprise/credentials/google_registry_credentials.md b/platform-enterprise/credentials/google_registry_credentials.md new file mode 100644 index 000000000..931f276bf --- /dev/null +++ b/platform-enterprise/credentials/google_registry_credentials.md @@ -0,0 +1,84 @@ +--- +title: "Google registry credentials" +description: "Instructions to create Google Cloud registry credentials in Seqera Platform." +date: "21 Apr 2023" +tags: [google, container, registry, artifact, credentials] +--- + +From version 22.3, Seqera Platform supports the configuration of credentials for the Nextflow Wave container service to authenticate to private and public container registries. For more information on Wave containers, see the [Nextflow documentation](https://www.nextflow.io/docs/latest/wave.html). + +:::note +Container registry credentials are only used by the Wave container service. Add `wave { enabled=true }` to the **Nextflow config** field on the launch page, or to your `nextflow.config` file, for your pipeline execution to use Wave containers. +::: + +## Google Cloud registry access + +:::note +Although Google Cloud Container Registry is still available and supported as a [Google Enterprise API](https://cloud.google.com/blog/topics/inside-google-cloud/new-api-stability-tenets-govern-google-enterprise-apis), new features will only be available in Artifact Registry. Container Registry will only receive critical security fixes. Google recommends using Artifact Registry for all new registries moving forward. +::: + +Google Cloud Artifact Registry and Container Registry are fully integrated with Google Cloud services and support various authentication methods. Seqera requires programmatic access to your private registry using [long-lived service account keys](https://cloud.google.com/artifact-registry/docs/docker/authentication#json-key) in JSON format. + +Create dedicated service account keys that are only used to interact with your repositories. Seqera requires the [Artifact Registry Reader](https://cloud.google.com/artifact-registry/docs/access-control#permissions) or [Storage Object Viewer](https://cloud.google.com/container-registry/docs/access-control#permissions) role. + +## Create a Google service account with registry access + +**Google Cloud Artifact Registry** + +Administrators can create a service account from the Google Cloud console: + +1. Go to the [Create service account](https://console.cloud.google.com/projectselector/iam-admin/serviceaccounts/create?walkthrough_id=iam--create-service-account) page. +2. Select a Cloud project. +3. Enter a service account name and (optional) description. +4. Select **Create and continue**. +5. From the **Role** drop-down menu under step 2, select **Artifact Registry > Artifact Registry Reader**, then select **Continue**. +6. (Optional) Grant other users and admins access to this service account. +7. Select **Done**. +8. From the project service accounts page, select the three dots menu icon under **Actions** for the service account you just created, then select **Manage keys**. +9. On the **Keys** page, select **Add key**. +10. On the **Create private key** popup, select **JSON** and then **Create**. This triggers a download of a JSON file containing the service account private key and service account details. +11. Base-64 encode the contents of the JSON key file: + +```bash + #Linux + base64 KEY-FILE-NAME > NEW-KEY-FILE-NAME + + #macOS + base64 -i KEY-FILE-NAME -o NEW-KEY-FILE-NAME + + #Windows + Base64.exe -e KEY-FILE-NAME > NEW-KEY-FILE-NAME +``` + +**Google Cloud Container Registry** + +Administrators can create a service account from the Google Cloud console: + +1. Navigate to the [Create service account](https://console.cloud.google.com/projectselector/iam-admin/serviceaccounts/create?walkthrough_id=iam--create-service-account) page. +2. Select a Cloud project. +3. Enter a service account name and an optional description. +4. Select **Create and continue**. +5. From the **Role** drop-down menu under step 2, search for and select **Storage Object Viewer**, then select **Continue**. +6. (Optional) Grant other users and admins access to this service account under step 3. +7. Select **Done**. +8. From the project service accounts page, select the three dots menu icon under **Actions** for the service account you just created, then select **Manage keys**. +9. On the **Keys** page, select **Add key**. +10. On the **Create private key** popup, select **JSON** and then **Create**. This triggers a download of a JSON file containing the service account private key and service account details. + +## Add credentials to Seqera + +1. Add your credentials to your organization or personal workspace: + - From an organization workspace: Go to **Credentials > Add Credentials**. + - From your personal workspace: From the user menu, go to **Your credentials > Add credentials**. + +2. Complete the following fields: + + - **Name**: Specify a unique name for the credentials using alphanumeric characters, dashes, or underscores. For example, `my-registry-creds`. + - **Provider**: Select **Container registry**. + - **User name**: Specify the service account key type: + - Container registry: `_json_key` + - Artifact Registry: `_json_key_base64` + - **Password**: Specify the JSON key file content. This content is base64-encoded for Artifact Registry. You must remove any line breaks or trailing spaces. For example, `wewogICJ02...9tIgp9Cg==`. + - **Registry server**: Specify the container registry hostname, excluding the protocol. For example, `-docker.pkg.dev`. + +3. After you've completed all the form fields, select **Add**. The new credential is now listed under the **Credentials** tab. diff --git a/platform-enterprise/credentials/managed_identities.md b/platform-enterprise/credentials/managed_identities.md new file mode 100644 index 000000000..95d727443 --- /dev/null +++ b/platform-enterprise/credentials/managed_identities.md @@ -0,0 +1,63 @@ +--- +title: "Managed identities" +description: "Configure managed identities in Seqera Platform." +date: "12 Jun 2024" +tags: [organizations, administration, managed identities, clusters, ssh, credentials] +--- + +Managed identities offer significant advantages for high performance computing (HPC) environments by enabling granular access control for individual workspace users. Unlike traditional SSH credentials that grant all workspace users access to HPC clusters using the same set of credentials, managed identities ensure each user’s activity is logged under their own credentials. This preservation of user identity is crucial as it naturally inherits the HPC system's fair usage queue policies, mitigates the noisy neighbor problem, and reduces the long wait times associated with First-In-First-Out (FIFO) queues common with shared SSH credentials. + +Traditional SSH credentials, while simplifying access control to computing resources, result in all user activities on the HPC cluster being logged under the same user credentials. This means all Seqera workspace users have the same access permissions on your HPC cluster, leading to indistinguishable user activities. + +Managed identities resolve these limitations by allowing administrators to configure a managed identity at the organizational level for access to supported HPC compute environments. This managed identity is selected for authentication similarly to traditional credentials, but contains multiple user credentials each tied to a unique Seqera user. This setup preserves the identity of the user launching workflows on the compute environment and improves traceability and adherence to data access policies. + +Moreover, with managed identities, users only have the access permissions that their system administrators have granted, minimizing the risk of unauthorized read/write operations in restricted folders. In contrast, shared SSH credentials provide all workspace users with the same access level on the HPC side, which is often more extensive than what an individual user typically needs. By grouping individual user SSH credentials into a single element, managed identities allow administrators to streamline user login and compute environment access while maintaining visibility into data access and compute resource usage for each user. + + +## Create a managed identity + +Organization owners can create managed identities at the organization level. A managed identity with user credentials can be used as a credential in HPC clusters for the same provider. + +1. From your organization page, select the **Managed identities** tab, then **Add managed identity**. +1. Enter the details of your cluster: + - A unique **Cluster name** of your choice using alphanumeric, dash, and underscore characters. + - Select a cluster **Provider** from the dropdown. + - The fully qualified cluster **Hostname** to be used to connect to the cluster via SSH. This is usually the cluster login node. + - The SSH **Port** number for the login connection. The default is port 22. +1. Select **Add cluster**. The new cluster is now listed under your organization's managed identities. + +Select **Edit** next to a managed identity in the list to edit its details and add user credentials. + +:::note +If the managed identity is already in use on a compute environment, editing its details may lead to errors when using the compute environment. +::: + +## Add user credentials + +Organization owners can grant individual users access to managed identities by adding each user's credentials to the managed identity. You must add user credentials to a managed identity before it can be used in a compute environment. + +Organization members can add, edit, and delete their own user credentials in a managed identity. + +:::caution +All managed identity users must be a part of the same Linux user group. The group must have access to the HPC compute environment work directory. + +Set group permissions for the work directory as follows (replace `sharedgroupname` and `` with your group name and work directory): + +```bash +chgrp -R sharedgroupname +chmod -R g+wxs +setfacl -Rdm g::rwX +``` + +These commands change the group ownership of all files and directories in the work directory to `sharedgroupname`, ensure new files inherit the directory's group, and apply default ACL entries to allow the group read, write, and execute permissions for new files and directories. This setup facilitates shared access and consistent permissions management in the directory. +::: + +1. From the **Managed identities** tab, select **Edit** next to the cluster in question, then select the **Users** tab. +1. The members of the organization are prepopulated in the **Users** list. Users without credentials are listed with a **Missing** credentials status. Add a user's credentials by selecting **Add credentials** from the user action menu, or the **Add credentials** button. +1. Enter the credential details in the **Add credentials** window: + - The member's **Linux username** used to access the cluster. + - Paste the contents of the **SSH private key** file for the user's SSH key pair, including the `-----BEGIN OPENSSH PRIVATE KEY-----` and `-----END OPENSSH PRIVATE KEY-----` lines. Ensure no additional lines or spaces are included. + - The SSH private key **Passphrase**, if the key has a passphrase. Otherwise, leave this blank. +1. Select **Add credentials**. The Linux username for the user is now populated in the list, and the **Credentials** status is changed to **Added**. + +Edit existing user credentials by selecting **Edit credentials** from the **Actions** menu next to a user name in the list. \ No newline at end of file diff --git a/platform-enterprise/credentials/overview.md b/platform-enterprise/credentials/overview.md new file mode 100644 index 000000000..6c45a55ae --- /dev/null +++ b/platform-enterprise/credentials/overview.md @@ -0,0 +1,16 @@ +--- +title: "Credentials Overview" +description: "Overview of credentials in Seqera Platform." +date: "21 Apr 2023" +tags: [credentials] +--- + +Configure **workspace credentials** in Seqera Platform to store the access keys and tokens for your [compute environments](../compute-envs/overview) and [Git hosting services](../git/overview). + +From version 22.3, you can configure **container registry credentials** to be used by the [Wave container service](https://nextflow.io/docs/latest/wave.html) to authenticate to private and public container registries like Docker Hub, Google Artifact Registry, Quay, etc. + +See the **Container registry credentials** section for registry-specific instructions. + +:::note +All credentials are (AES-256) encrypted before secure storage and not exposed in an unencrypted way by any Seqera API. +::: diff --git a/platform-enterprise/credentials/quay_registry_credentials.md b/platform-enterprise/credentials/quay_registry_credentials.md new file mode 100644 index 000000000..9a160b73d --- /dev/null +++ b/platform-enterprise/credentials/quay_registry_credentials.md @@ -0,0 +1,39 @@ +--- +title: "Quay container registry credentials" +description: "Instructions to create Quay container registry credentials in Seqera Platform." +date: "21 Apr 2023" +tags: [quay, credentials] +--- + +From version 22.3, Seqera Platform supports the configuration of credentials for the Nextflow Wave container service to authenticate to private and public container registries. For more information on Wave containers, see the [Nextflow documentation](https://www.nextflow.io/docs/latest/wave.html). + +:::note +Container registry credentials are only used by the Wave container service. Add `wave { enabled=true }` to the **Nextflow config** field on the launch page, or to your `nextflow.config` file, for your pipeline execution to use Wave containers. +::: + +For Quay repositories, we recommend using [robot accounts](https://docs.quay.io/glossary/robot-accounts.html) with **Read** access permissions for authentication. + +**Create a Quay robot account** + +1. Sign in to [quay.io](https://quay.io/). +2. From the user or organization view, select the **Robot Accounts** tab. +3. Select **Create Robot Account**. +4. Enter a robot account name. The username for robot accounts have the format `namespace+accountname`, where `namespace` is the user or organization name and `accountname` is your chosen robot account name. +5. Grant the robot account repository **Read** permissions from **Settings > User and Robot Permissions** in the repository view. +6. Select the robot account in your admin panel to retrieve the token value. + +## Add credentials to Seqera + +1. Add your credentials to your organization or personal workspace: + - From an organization workspace: Go to **Credentials > Add Credentials**. + - From your personal workspace: From the user menu, go to **Your credentials > Add credentials**. + +2. Complete the following fields: + + - **Name**: Specify a unique name for the credentials using alphanumeric characters, dashes, or underscores. For example, `my-registry-creds`. + - **Provider**: Select **Container registry**. + - **User name**: Specify your robot account username. For example, `namespace+accountname`. + - **Password**: Specify your robot account access token. For example, `PasswordFromQuayAdminPanel`. + - **Registry server**: Specify your container registry hostname. For example, `quay.io`. + +3. After you've completed all the form fields, select **Add**. The new credential is now listed under the **Credentials** tab. diff --git a/platform-enterprise/credentials/ssh_credentials.md b/platform-enterprise/credentials/ssh_credentials.md new file mode 100644 index 000000000..1f715146d --- /dev/null +++ b/platform-enterprise/credentials/ssh_credentials.md @@ -0,0 +1,44 @@ +--- +title: "SSH credentials" +description: "Instructions to create SSH credentials in Seqera Platform." +date: "21 Apr 2023" +tags: [ssh, credentials] +--- + +SSH public key authentication relies on asymmetric cryptography to generate a public and private key pair. The public key remains on the target (remote) machine, while the private key (and passphrase) is stored in Seqera Platform as a credential. The key pair is used to authenticate a connection with your SSH-enabled environment. + +To preserve individual user identities by using multiple user SSH credentials to access your HPC compute environments, see [Managed identities](./managed_identities). + +:::note +All credentials are (AES-256) encrypted before secure storage and not exposed in an unencrypted way by any Seqera API. +::: + +## Create an SSH key pair + +To use SSH public key authentication: + +- The remote system must have a version of SSH installed. This guide assumes the remote system uses OpenSSH. If you're using a different version of SSH, the key generation steps may differ. +- The SSH public key must be present on the remote system (usually in `~/.ssh/authorized_keys`). + +To generate an SSH key pair: + +1. From the target machine, open a terminal window and run `ssh-keygen`. +2. Follow the prompts to: + - Specify a file path and name (or keep the default). + - Specify a passphrase (recommended). +3. Navigate to the target folder (default `/home/user/.ssh/id_rsa`) and open the private key file with a plain text editor. +4. Copy the private key file contents before navigating to Seqera. + +## Create an SSH credential in Seqera + +1. Add your credentials to your organization or personal workspace: + - From an organization workspace: Go to **Credentials > Add Credentials**. + - From your personal workspace: From the user menu, go to **Your credentials > Add credentials**. + +2. Complete the following fields: + - **Name**: A unique name for the credentials using alphanumeric characters, dashes, or underscores. For example, `my-ssh-creds`. + - **Provider**: Select **SSH**. + - **SSH private key**: Paste the SSH private key file contents. Include the `-----BEGIN OPENSSH PRIVATE KEY-----` and `-----END OPENSSH PRIVATE KEY-----` lines. + - **Passphrase**: The SSH private key passphrase (recommended). If your key pair was created without a passphrase, leave this blank. + +3. After you've completed all the form fields, select **Add**. The new credential is now listed under the **Credentials** tab. diff --git a/platform-enterprise/data-privacy/overview.md b/platform-enterprise/data-privacy/overview.md new file mode 100644 index 000000000..d65290336 --- /dev/null +++ b/platform-enterprise/data-privacy/overview.md @@ -0,0 +1,154 @@ +--- +title: "Data privacy" +description: "Overview of the data collected by Seqera Platform" +date: "21 Apr 2023" +tags: [data, privacy] +--- + +### Your data + +Your data stays strictly within **your** infrastructure. + +When you launch a workflow through Seqera Platform, you need to connect your infrastructure (HPC/VMs/K8s) by creating the appropriate credentials and compute environment in a workspace. The application then uses this configuration to trigger a Nextflow workflow within your infrastructure similar to what is done via the Nextflow CLI. The application doesn't manipulate any data itself and no data is transferred to the infrastructure where it's running. + +It may be possible to access some data within your storage from the application interface - for example, viewing logs and reports generated in a pipeline run. However, this data is never stored within the platform's infrastructure. + +### User deletion + +When a Platform user account is deleted, the following happens: + +- The user account email is changed to `none@your-domain`. Any runs and run metadata associated with the user account will then display that email address. +- The username is changed to `username-`. +- All of the user's organization, workspace, and team memberships are deleted. +- All of the user's access tokens are deleted from their personal workspace. + +For Enterprise installations, in addition to the above: + +- All the user's credentials are deleted from their personal workspace. +- All the user's compute environments are deleted from their personal workspace. +- All actions created by the user are deleted from their personal workspace. + +### Metadata stored by Seqera Platform + +Workflow execution metadata is sent by the Nextflow runtime to the application when: + +- When you launch a workflow with the application. + - When you use the `-with-tower` option at the command line. +- When `tower.enabled` is specified in the Nextflow config. + +The following sections describe the data structure and metadata fields collected by Seqera Platform. + +#### Workflow metadata + +The following metadata fields are collected and stored by the application backend during a workflow execution: + +| Name | Description | +| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | +| `command_line` | The command line used to launch the workflow execution | +| `commit_id` | The workflow project commit ID at the time of the execution | +| `complete` | The workflow execution completion timestamp | +| `config_files` | The Nextflow config file paths(s) involved in the workflow execution | +| `config_text` | The Nextflow config content used for the workflow execution. Note: secrets, such as, AWS keys are stripped and _not_ included in this field | +| `container` | The container image name(s) used for the pipeline execution | +| `container_engine` | The container engine name used for the pipeline execution | +| `duration` | The workflow execution overall duration (wall time) | +| `error_message` | The error message reported in the case of Nextflow execution failure | +| `error_report` | The extended error message reported in case of workflow execution error | +| `exit_status` | The workflow execution (POSIX) exit code | +| `home_dir` | The launching user home directory path | +| `launch_dir` | The workflow launching directory path | +| `manifest_author` | The workflow project author as defined in the Nextflow config manifest file | +| `manifest_default_branch` | The workflow project default Git branch as defined in the Nextflow config manifest file | +| `manifest_description` | The workflow project description as defined in the Nextflow config manifest file | +| `manifest_gitmodules` | The workflow project Git submodule flag in the Nextflow config manifest file | +| `manifest_home_page` | The workflow project Git home page as defined in the Nextflow config manifest file | +| `manifest_main_script` | The workflow project main script file name as defined in the Nextflow config manifest file | +| `manifest_name` | The workflow project name as defined in the Nextflow config manifest file | +| `manifest_nextflow_version` | The workflow project required Nextflow version defined in the Nextflow config manifest file | +| `manifest_version` | The workflow project version string as defined in the Nextflow config manifest file | +| `nextflow_build` | The build number of the Nextflow runtime used to launch the workflow execution | +| `nextflow_timestamp` | The build timestamp of the Nextflow runtime used to launch the workflow execution | +| `nextflow_version` | The version string of the Nextflow runtime used to launch the workflow execution | +| `params` | The workflow params used to launch the pipeline execution | +| `profile` | The workflow config profile string used for the pipeline execution | +| `project_dir` | The directory path where the workflow scripts are stored | +| `project_name` | The workflow project name | +| `repository` | The workflow project repository | +| `resume` | The flag set when a resume execution was submitted | +| `revision` | The workflow project revision number | +| `run_name` | The workflow run name as given by the Nextflow runtime | +| `script_file` | The workflow script file path | +| `script_id` | The workflow script checksum number | +| `script_name` | The workflow script filename | +| `session_id` | The workflow execution unique UUID as assigned by the Nextflow runtime | +| `start` | The workflow execution start timestamp | +| `stats_cached_count` | The number of cached tasks upon completion | +| `stats_cached_duration` | The aggregate time of cached tasks upon completion | +| `stats_cached_pct` | The percentage of cached tasks upon completion | +| `stats_compute_time_fmt` | The overall compute time as a formatted string | +| `stats_failed_count` | The number of failed tasks upon completion | +| `stats_failed_count_fmt` | The number of failed tasks upon completion as a formatted string | +| `stats_failed_duration` | The aggregate time of failed tasks upon completion | +| `stats_failed_pct` | The percentage of failed tasks upon completion | +| `stats_ignored_count` | The number of ignored tasks upon completion | +| `stats_ignored_count_fmt` | The number of ignored tasks upon completion as a formatted string | +| `stats_ignored_pct` | The percentage of ignored tasks upon completion | +| `stats_succeed_count` | The number of succeeded tasks upon completion | +| `stats_succeed_count_fmt` | The number of succeeded tasks upon completion as a formatted string | +| `stats_succeed_duration` | The aggregate time of succeeded tasks upon completion | +| `stats_succeed_pct` | The percentage of succeeded tasks upon completion | +| `status` | The workflow execution status | +| `submit` | The workflow execution submission timestamp | +| `success` | The flag reporting whether the execution completed successfully | +| `user_name` | The POSIX user name that launched the workflow execution | +| `work_dir` | The workflow execution scratch directory path | + +#### Task metadata + +| Name | Description | +| -------------- | ---------------------------------------------------------------------------------------------- | +| `attempt` | Number of Nextflow execution attempts of the task | +| `cloud_zone` | Cloud zone where the task execution was allocated | +| `complete` | Task execution completion timestamp | +| `container` | Container image name used to execute the task | +| `cost` | Estimated task compute cost | +| `cpus` | Number of CPUs requested | +| `disk` | Amount of disk storage requested | +| `duration` | Amount of time for the task completion | +| `env` | Task execution environment variables | +| `error_action` | Action applied on task failure | +| `executor` | Executor requested for the task execution | +| `exit_status` | Task POSIX exit code on completion | +| `hash` | Task unique hash code | +| `inv_ctxt` | Number of involuntary context switches | +| `machine_type` | Cloud virtual machine type | +| `memory` | Amount of memory requested | +| `module` | Environment module requested | +| `name` | Task unique name | +| `native_id` | Task unique ID as assigned by the underlying execution platform | +| `pcpu` | Percentage of CPU used to compute the task | +| `peak_rss` | Peak of real memory during the task execution | +| `peak_vmem` | Peak of virtual memory during the task execution | +| `pmem` | Percentage of memory used to compute the task | +| `price_model` | The cloud price model applied for the task | +| `process` | The Nextflow process name | +| `queue` | The compute queue name requested | +| `rchar` | Number of bytes the process read, using any read-like system call from files, pipes, tty, etc. | +| `read_bytes` | Number of bytes the process directly read from disk | +| `realtime` | The time required to compute the task | +| `rss` | Real memory (resident set) size of the process | +| `scratch` | Flag reporting the task was executed in a local scratch path | +| `script` | The task command script | +| `start` | Task execution start timestamp | +| `status` | The task execution status | +| `submit` | Task submission timestamp | +| `syscr` | Number of read-like system call invocations that the process performed | +| `syscw` | Number of write-like system call invocations that the process performed | +| `tag` | Nextflow tag associated to the task execution | +| `task_id` | Nextflow task ID | +| `time` | Task execution timeout requested | +| `vmem` | Virtual memory size used by the task execution | +| `vol_ctxt` | Number of voluntary context switches | +| `wchar` | Number of bytes the process wrote, using any write-like system call | +| `workdir` | Task execution work directory | +| `write_bytes` | Number of bytes the process written to disk | diff --git a/platform-enterprise/data/_images/data_explorer.png b/platform-enterprise/data/_images/data_explorer.png new file mode 100644 index 000000000..ae4c7d393 Binary files /dev/null and b/platform-enterprise/data/_images/data_explorer.png differ diff --git a/platform-enterprise/data/data-explorer.md b/platform-enterprise/data/data-explorer.md new file mode 100644 index 000000000..ca9db87bc --- /dev/null +++ b/platform-enterprise/data/data-explorer.md @@ -0,0 +1,234 @@ +--- +title: "Data Explorer" +description: "Using Seqera Data Explorer." +date: "08 May 2025" +tags: [data, explorer] +--- + +With Data Explorer, you can browse and interact with remote data repositories from organization workspaces in Seqera Platform. It supports AWS S3, Azure Blob Storage, and Google Cloud Storage repositories. + +:::note +To enable Data Explorer globally for all workspaces and users in your **Seqera Enterprise** account, set `TOWER_DATA_EXPLORER_ENABLED=true` in your environment variables (default: `false`). + +To disable Data Explorer automatic cloud bucket retrieval per workspace in your **Seqera Enterprise** account, specify comma-separated workspace IDs with the `TOWER_DATA_EXPLORER_CLOUD_DISABLED_WORKSPACES` environment variable. +::: + +Access the **Data Explorer** tab from any enabled workspace to view and manage all available cloud storage buckets. It is also integrated with the pipeline launch form and run detail pages, which allows you to select input data files and output directories or quickly view the output files of a run. + +If you use Seqera Cloud and want to disable Data Explorer, [contact](https://seqera.io/contact-us/) your Seqera account executive. + +## Participant roles + +The role assigned to a workspace user affects what functionality is available in Data Explorer. The specific functionality for each role is described in the following list of roles: + +- **View**: Can only view contents of cloud storage buckets. Cannot download, upload, or preview. Cannot hide or add buckets. +- **Launch**: Can only view contents of cloud storage buckets. Cannot download, upload, or preview. Cannot hide or add buckets. +- **Connect**: Can only view contents of cloud storage buckets. Cannot download, upload, or preview. Cannot hide or add buckets. +- **Maintain**: Can view download, upload, and preview contents of cloud storage buckets. Can hide and add buckets. +- **Admin**: Can view, download, upload, and preview contents of cloud storage buckets. Can hide and add buckets. +- **Owner**: Can view, download, upload, and preview contents of cloud storage buckets. Can hide and add buckets. + +For more information on roles in Seqera Platform, see [Participant roles][roles]. + +## Add cloud storage buckets + +Data Explorer lists public and private cloud storage buckets. Buckets accessible to your workspace cloud credentials are retrieved automatically; workspace maintainers can also configure buckets manually. + +- **Retrieve cloud buckets with workspace credentials** + + Private cloud storage buckets accessible to the credentials defined in your workspace are listed in Data Explorer automatically. The cloud storage permissions required for your [AWS](../compute-envs/aws-batch#iam), [Google Cloud](../compute-envs/google-cloud-batch#iam), and [Azure Batch](../compute-envs/azure-batch#create-a-storage-account) compute environment credentials allow full Data Explorer functionality. + +- **Configure individual buckets manually** + + Select **Add cloud bucket** from the Data Explorer tab to add individual buckets (or directory paths within buckets). Specify the **Provider**, **Bucket path**, **Name**, **Credentials**, and **Description**, then select **Add**. For public cloud buckets, select **Public** from the **Credentials** drop-down menu. + +## Remove cloud storage buckets + +A workspace maintainer can remove a manually created cloud storage bucket. + +From the **Data Explorer** tab, find the cloud bucket that you want to delete. Select the options menu for the bucket, and select **Remove**. When prompted, select **Remove** from the confirmation modal that appears. + +If you remove a data link associated with a cloud storage bucket, the data link is automatically removed from the relevant Studio configuration. + +## Browse cloud storage buckets + +![](./_images/data_explorer.png) + +- **View bucket details** + + To view bucket details such as the cloud provider, bucket address, and credentials, select the information icon next to a bucket in the Data Explorer list. + +- **Search and filter buckets** + + Search for buckets by name and region (e.g., `region:eu-west-2`) in the search field, and filter by provider. + +- **Hide buckets from list view** + + Using checkboxes, choose one or more buckets, then select the **Hide** icon in the Data Explorer toolbar. To hide buckets individually, select **Hide** from the three dots options menu of a bucket in the list. + + The Data Explorer list filter defaults to **Only visible**. Select **Only hidden** or **All** from the filtering menu to view hidden buckets in the list. You can unhide a bucket by selecting **Show** from the three dots options menu in the list view. + +- **View bucket contents** + + Select a bucket name from the Data Explorer list to view the contents of that bucket. From the **View cloud bucket** page, you can browse directories and search for objects by name in a particular directory. The size and path of an object is displayed in columns to the right of the object name. To view bucket details such as the cloud provider, bucket address, and credentials, select the information icon. + +- **Preview and download files** + + From the **View cloud bucket** page, you can preview and download files. Select the download icon in the **Actions** column to download a file directly from the list view. Select a file to open a preview window that includes a **Download** button. + + File preview is supported for these object types: + + - Nextflow output files ( `.command.*`, `.fusion.*` and `.exitcode` ) + - Text + - CSV and TSV + - PDF + - HTML + - Images (JPG, PNG, SVG, etc.) + + :::note + The file size limit for preview is 10 MB. 10-25 MB files can still be downloaded directly. + + Seqera Enterprise users can increase the default 25 MB file size download limit with `tower.content.max-file-size` in the `tower.yml` [configuration](../enterprise/configuration/overview#data-features) file. Note that increasing this value may degrade Platform performance. + ::: + +- **Copy bucket/object paths** + + Select the **Path** of an object on the **View cloud bucket** page to copy its absolute path to the clipboard. Use these object paths to specify input data locations during [pipeline launch](../launch/launchpad), or add them to a [dataset](../data/datasets) for pipeline input. + +## Isolate view, read, and write permissions to specific cloud storage bucket paths + +To isolate pipeline or Studios view, read, and write permissions to a specific **Bucket path**, workspace maintainers can optionally create **custom data-links** by manually configuring an individual bucket plus path to a specific folder/directory. This is supported to any level of the cloud storage bucket path hierarchy, provided it is a folder (also known as a **prefix**). You can optionally choose to **Hide** or **Show** either the root bucket and/or any related custom data-links on demand in Data Explorer using the **Show/Hide** toggle and the **Show cloud buckets** filter options: + +- Only visible (default) +- Only hidden +- All + +:::note +This customized Data Explorer view will be displayed by default to all workspace users, until the filter is updated or removed by a workspace maintainer. +::: + +## Upload files to private buckets + +Data Explorer supports file uploads to your private cloud storage buckets. From the **View cloud bucket** page, select **Upload file** to select the files you wish to upload. + +:::note +You must configure cross-origin resource sharing (CORS) for your cloud storage provider to allow file uploads from Seqera. CORS configuration differs for each provider. +::: + +## Download multiple files + +You can download up to a maximum of 1,000 files using the browser interface, or an unlimited number of files with the auto-generated download script that uses your cloud provider's CLI and credentials. + +:::note +If you use a non-Chromium based browser, such as Safari or Firefox, file paths are concatenated with an underscore (`_`) character and the cloud bucket directory structure is not reproduced locally. For example, the file `s3://example-us-east-1/path/to/files/my-file-1.txt` is saved as `path_to_files_my-file-1.txt`. +::: + +Open the cloud bucket and navigate to the folder that you'd like to download files and folders from. By default, you can download the contents of the current directory by choosing **Download current directory**. Alternatively, use checkboxes to select specific files and folders, and select the **Download** button. You can **Download files** via the browser or **Download using code**. + +The code snippet provided is specific to the cloud provider you've configured. You may be prompted to authenticate during the download process. Refer to your cloud provider's documentation for troubleshooting credential-related issues: + +- [GCP](https://cloud.google.com/sdk/gcloud/reference/storage) +- [AWS](https://docs.aws.amazon.com/cli/latest/reference/s3/) +- [Azure](https://learn.microsoft.com/en-us/cli/azure/storage?view=azure-cli-latest) + +## CORS configurations + +Each public cloud provider has a specific way to allow Cross-Origin Resource Sharing (CORS) for both uploads and multi-file downloads. + +### Amazon S3 CORS configuration + +Apply a [CORS configuration](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ManageCorsUsing.html) to enable file uploads and folder downloads from the Seqera Platform to and from specific S3 buckets. The CORS configuration is a JSON file that defines the origins, headers, and methods allowed for resource sharing requests to a bucket. Follow [these AWS instructions](https://docs.aws.amazon.com/AmazonS3/latest/userguide/enabling-cors-examples.html) to apply the CORS configuration below to each bucket you wish to enable file uploads and folder downloads for: + +**Seqera Cloud S3 CORS configuration** + +```json +[ + { + "AllowedHeaders": ["*"], + "AllowedMethods": ["PUT", "POST", "DELETE", "GET"], + "AllowedOrigins": ["https://cloud.seqera.io"], + "ExposeHeaders": ["ETag"] + } +] +``` + +**Seqera Enterprise S3 CORS configuration** + +Replace `` with your Seqera Enterprise server URL: + +```json +[ + { + "AllowedHeaders": ["*"], + "AllowedMethods": ["PUT", "POST", "DELETE", "GET"], + "AllowedOrigins": ["https://"], + "ExposeHeaders": ["ETag"] + } +] +``` + +### Azure Blob Storage CORS configuration + +:::note +CORS configuration in Azure Blob Storage is set at the account level. This means that CORS rules for your account apply to every blob in the account. +::: + +Apply a [CORS configuration](https://learn.microsoft.com/en-us/rest/api/storageservices/cross-origin-resource-sharing--cors--support-for-the-azure-storage-services#enabling-cors-for-azure-storage) to enable file uploads and folder downloads from the Seqera Platform to and from your Azure Blob Storage account. + +**Seqera Cloud Azure CORS configuration** + +1. From the [Azure portal](https://portal.azure.com), go to the **Storage account** you wish to configure. +2. Under **Settings** in the left navigation menu, select **Resource sharing (CORS)**. +3. Add a new entry under **Blob service**: + + - **Allowed origins**: `https://cloud.seqera.io` + - **Allowed methods**: `GET,POST,PUT,DELETE,HEAD` + - **Allowed headers**: `x-ms-blob-type,content-type` + - **Exposed headers**: `x-ms-blob-type` + +4. Select **Save** to apply the CORS configuration. + +**Seqera Enterprise Azure CORS configuration** + +1. From the [Azure portal](https://portal.azure.com), go to the Storage account you wish to configure. +2. Under **Settings** in the left navigation menu, select **Resource sharing (CORS)**. +3. Add a new entry under **Blob service**: + + - **Allowed origins**: `https://` + - **Allowed methods**: `GET,POST,PUT,DELETE,HEAD` + - **Allowed headers**: `x-ms-blob-type,content-type` + - **Exposed headers**: `x-ms-blob-type` + +4. Select **Save** to apply the CORS configuration. + +### Google Cloud Storage CORS configuration + +Apply a [CORS configuration](https://cloud.google.com/storage/docs/cross-origin#cors-components) to enable file uploads from Seqera to specific GCS buckets. The CORS configuration is a JSON file that defines the origins, headers, and methods allowed for resource sharing requests to a bucket. Follow [these Google instructions](https://cloud.google.com/storage/docs/using-cors#command-line) to apply the CORS configuration below to each bucket you wish to enable file uploads for. + +:::note +Google Cloud Storage only supports CORS configuration via gcloud CLI. +::: + +**Seqera Cloud GCS CORS configuration** + +```json +{ + "origin": ["https://cloud.seqera.io"], + "method": ["GET", "POST", "PUT", "DELETE", "HEAD"], + "responseHeader": ["Content-Type", "Content-Range"], + "maxAgeSeconds": 3600 +} +``` + +**Seqera Enterprise GCS CORS configuration** + +```json +{ + "origin": ["https://"], + "method": ["GET", "POST", "PUT", "DELETE", "HEAD"], + "responseHeader": ["Content-Type", "Content-Range"], + "maxAgeSeconds": 3600 +} +``` + +[roles]: ../orgs-and-teams/roles diff --git a/platform-enterprise/data/datasets.md b/platform-enterprise/data/datasets.md new file mode 100644 index 000000000..0354f82ea --- /dev/null +++ b/platform-enterprise/data/datasets.md @@ -0,0 +1,82 @@ +--- +title: "Datasets" +description: "Using datasets in Seqera Platform." +date: "21 Apr 2023" +tags: [datasets] +--- + +:::note +This feature is only available in organization workspaces. +::: + +Datasets in Seqera are CSV (comma-separated values) and TSV (tab-separated values) files stored in a workspace. They are used as inputs to pipelines to simplify data management, minimize user data-input errors, and facilitate reproducible workflows. + +The most commonly used datasets for Nextflow pipelines are samplesheets, where each row consists of a sample, the location of files for that sample (such as FASTQ files), and other sample details. For example, [nf-core/rnaseq](https://github.com/nf-core/rnaseq) works with input datasets (samplesheets) containing sample names, FASTQ file locations, and indications of strandedness. The Seqera Community Showcase sample dataset for _nf-core/rnaseq_ looks like this: + +**Example rnaseq dataset** + +|sample |fastq_1 |fastq_2 |strandedness| +|-------------------|------------------------------------|---------------------------------------------|------------| +|WT_REP1 |s3://nf-core-awsmegatests/rnaseq/...|s3://nf-core-awsmegatests/rnaseq/... |reverse | +|WT_REP1 |s3://nf-core-awsmegatests/rnaseq/...|s3://nf-core-awsmegatests/rnaseq/... |reverse | +|WT_REP2 |s3://nf-core-awsmegatests/rnaseq/...|s3://nf-core-awsmegatests/rnaseq/... |reverse | +|RAP1_UNINDUCED_REP1|s3://nf-core-awsmegatests/rnaseq/...| |reverse | +|RAP1_UNINDUCED_REP2|s3://nf-core-awsmegatests/rnaseq/...| |reverse | +|RAP1_UNINDUCED_REP2|s3://nf-core-awsmegatests/rnaseq/...| |reverse | +|RAP1_IAA_30M_REP1 |s3://nf-core-awsmegatests/rnaseq/...|s3://nf-core-awsmegatests/rnaseq/... |reverse | + +:::note +Use [Data Explorer](../data/data-explorer) to browse for cloud storage objects directly and copy the object paths to be used in your datasets. +::: + +The combination of datasets, [secrets](../secrets/overview), and [actions](../pipeline-actions/overview) in the application allows you to automate workflows to curate your data and maintain and launch pipelines based on specific events. See [here](https://seqera.io/blog/workflow-automation/) for an example of pipeline workflow automation using Seqera. + +- Datasets reduce errors that occur due to manual data entry when you launch pipelines. +- Datasets can be generated automatically in response to events (such as S3 storage new file notifications). +- Datasets can streamline differential data analysis when using the same pipeline to launch a run for each dataset as it becomes available. + +For your pipeline to use your dataset as input during runtime, information about the dataset and file format must be included in the relevant parameters of your [pipeline schema](../pipeline-schema/overview). The pipeline schema specifies the accepted dataset file type in the `mimetype` attribute (either `text/csv` or `text/tsv`). + +## Dataset validation and file content requirements + +Seqera doesn't validate your dataset file contents. While datasets can contain static file links, you're responsible for maintaining the access to that data. + +Datasets can point to files stored in various locations, such as Amazon S3 or GitHub. To stage the file paths defined in the dataset, Nextflow requires access to the infrastructure where the files reside, whether on cloud or HPC systems. Add the access keys for data sources that require authentication to your [secrets](../secrets/overview). + +### Create a dataset + +All Seqera users with any role have access to the datasets feature in organization workspaces. To create a new dataset: + +1. Open the **Datasets** tab in your organization workspace. +2. Select **New dataset**. +3. Complete the **Name** and **Description** fields using information relevant to your dataset. +4. Add the dataset file to your workspace with drag-and-drop or the system file explorer dialog. +5. For dataset files that use the first row for column names, customize the dataset view with the **First row as header** option. + +:::note +The size of the dataset file must not exceed 10 MB. +::: + +## Dataset versions + +Seqera can accommodate multiple versions of a dataset. To add a new version for an existing dataset, follow these steps: + +1. Select **Edit** next to the dataset you wish to update. +2. Select **Add a new version**. +3. Upload the newer version of the dataset and select **Update**. + +:::caution +All subsequent versions of a dataset must be the same format (CSV or TSV) as the initial version. +::: + +### Use a dataset + +To use a dataset with the saved pipelines in your workspace: + +1. Open any pipeline that contains a pipeline schema from the [Launchpad](../launch/launchpad). +2. Select the input field for the pipeline, removing any default values. +3. Pick the dataset to use as input to your pipeline. + +:::note +The input field drop-down menu will only display datasets that match the file type specified in the `nextflow_schema.json` of the chosen pipeline. If the schema specifies `"mimetype": "text/csv"`, no TSV datasets will be available for use with that pipeline, and vice-versa. +::: diff --git a/platform-enterprise/enterprise/_images/README.md b/platform-enterprise/enterprise/_images/README.md new file mode 100644 index 000000000..704efad27 --- /dev/null +++ b/platform-enterprise/enterprise/_images/README.md @@ -0,0 +1,6 @@ +# Source locations + +The images under this folder are both generated from source files in the Seqera company Google Drive. If you're a Seqera employee, search for the following in Google Drive for the respective source files: + +- "Seqera reference architecture" for the source file of 'seqera_reference_architecture.png'. +- "References, Architectures & Diagrams" for the source file of 'seqera_reference_architecture_aws.png'. diff --git a/platform-enterprise/enterprise/_images/seqera_reference_architecture.png b/platform-enterprise/enterprise/_images/seqera_reference_architecture.png new file mode 100644 index 000000000..4639e1e76 Binary files /dev/null and b/platform-enterprise/enterprise/_images/seqera_reference_architecture.png differ diff --git a/platform-enterprise/enterprise/_images/seqera_reference_architecture_aws.png b/platform-enterprise/enterprise/_images/seqera_reference_architecture_aws.png new file mode 100644 index 000000000..6339f0116 Binary files /dev/null and b/platform-enterprise/enterprise/_images/seqera_reference_architecture_aws.png differ diff --git a/platform-enterprise/enterprise/_templates/Seqera-Role-Policy.json b/platform-enterprise/enterprise/_templates/Seqera-Role-Policy.json new file mode 100644 index 000000000..4d23e2deb --- /dev/null +++ b/platform-enterprise/enterprise/_templates/Seqera-Role-Policy.json @@ -0,0 +1,109 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "TowerForgePermissions", + "Effect": "Allow", + "Action": [ + "ssm:GetParameters", + "iam:CreateInstanceProfile", + "iam:DeleteInstanceProfile", + "iam:GetRole", + "iam:RemoveRoleFromInstanceProfile", + "iam:CreateRole", + "iam:DeleteRole", + "iam:AttachRolePolicy", + "iam:PutRolePolicy", + "iam:AddRoleToInstanceProfile", + "iam:PassRole", + "iam:DetachRolePolicy", + "iam:ListAttachedRolePolicies", + "iam:DeleteRolePolicy", + "iam:ListRolePolicies", + "batch:CreateComputeEnvironment", + "batch:DescribeComputeEnvironments", + "batch:CreateJobQueue", + "batch:DescribeJobQueues", + "batch:UpdateComputeEnvironment", + "batch:DeleteComputeEnvironment", + "batch:UpdateJobQueue", + "batch:DeleteJobQueue", + "fsx:DeleteFileSystem", + "fsx:DescribeFileSystems", + "fsx:CreateFileSystem", + "ec2:DescribeSecurityGroups", + "ec2:DescribeAccountAttributes", + "ec2:DescribeSubnets", + "ec2:DescribeLaunchTemplates", + "ec2:DescribeLaunchTemplateVersions", + "ec2:CreateLaunchTemplate", + "ec2:DeleteLaunchTemplate", + "ec2:DescribeKeyPairs", + "ec2:DescribeVpcs", + "ec2:DescribeInstanceTypeOfferings", + "elasticfilesystem:DescribeMountTargets", + "elasticfilesystem:CreateMountTarget", + "elasticfilesystem:CreateFileSystem", + "elasticfilesystem:DescribeFileSystems", + "elasticfilesystem:DeleteMountTarget", + "elasticfilesystem:DeleteFileSystem", + "elasticfilesystem:UpdateFileSystem", + "elasticfilesystem:PutLifecycleConfiguration" + ], + "Resource": "*" + }, + { + "Sid": "TowerLaunchPermissions", + "Effect": "Allow", + "Action": [ + "batch:DescribeJobQueues", + "batch:CancelJob", + "batch:SubmitJob", + "batch:ListJobs", + "batch:DescribeComputeEnvironments", + "batch:TerminateJob", + "batch:DescribeJobs", + "batch:RegisterJobDefinition", + "batch:DescribeJobDefinitions", + "ecs:DescribeTasks", + "ec2:DescribeInstances", + "ec2:DescribeInstanceTypes", + "ec2:DescribeInstanceAttribute", + "ecs:DescribeContainerInstances", + "ec2:DescribeInstanceStatus", + "ec2:DescribeImages", + "logs:Describe*", + "logs:Get*", + "logs:List*", + "logs:StartQuery", + "logs:StopQuery", + "logs:TestMetricFilter", + "logs:FilterLogEvents" + ], + "Resource": "*" + }, + { + "Sid": "BucketPolicy01", + "Effect": "Allow", + "Action": [ + "s3:ListAllMyBuckets", + "s3:ListBucket", + "s3:GetBucketLocation" + ], + "Resource": [ + "*" + ] + }, + { + "Sid": "BucketPolicy02", + "Effect": "Allow", + "Action": [ + "s3:*Object*" + ], + "Resource": [ + "arn:aws:s3:::YOUR-BUCKET-01/*", + "arn:aws:s3:::YOUR-BUCKET-02/*" + ] + } + ] +} \ No newline at end of file diff --git a/platform-enterprise/enterprise/_templates/Seqera-Role-Trust-Policy.json b/platform-enterprise/enterprise/_templates/Seqera-Role-Trust-Policy.json new file mode 100644 index 000000000..5c36818e3 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/Seqera-Role-Trust-Policy.json @@ -0,0 +1,34 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Service": "ec2.amazonaws.com" + }, + "Action": "sts:AssumeRole" + }, + { + "Effect": "Allow", + "Principal": { + "Service": "ecs-tasks.amazonaws.com" + }, + "Action": "sts:AssumeRole" + }, + { + "Effect": "Allow", + "Principal": { + "Service": "eks.amazonaws.com" + }, + "Action": "sts:AssumeRole" + }, + { + "Sid": "AllowEc2AssumeRole", + "Effect": "Allow", + "Principal": { + "AWS": "arn:aws:iam::YOUR-AWS-ACCOUNT:USER-OR-ROLE/USER-OR-ROLE-ID" + }, + "Action": "sts:AssumeRole" + } + ] +} \ No newline at end of file diff --git a/platform-enterprise/enterprise/_templates/cloudformation/aws-ecs-cloudformation.json b/platform-enterprise/enterprise/_templates/cloudformation/aws-ecs-cloudformation.json new file mode 100644 index 000000000..650af62cd --- /dev/null +++ b/platform-enterprise/enterprise/_templates/cloudformation/aws-ecs-cloudformation.json @@ -0,0 +1,353 @@ +{ + "AWSTemplateFormatVersion" : "2010-09-09", + "Description" : "Nextflow Tower ECS template", + "Parameters": { + "ClusterName": { + "Type": "String", + "Description": "ECS cluster name" + }, + "TowerSmtpHost": { + "Type": "String", + "Description": "SMTP server hostname" + }, + "TowerSmtpPort": { + "Type": "String", + "Description": "SMTP server port" + }, + "TowerSmtpUser": { + "Type": "String", + "Description": "SMTP server username", + "NoEcho": "true" + }, + "TowerSmtpPassword": { + "Type": "String", + "Description": "SMTP server password", + "NoEcho": "true" + }, + "TowerContactEmail": { + "Type": "String", + "Description": "Email for login emails" + }, + "TowerServerUrl": { + "Type": "String", + "Description": "IP address of container instance" + }, + "TowerJwtSecret": { + "Type": "String", + "Description": ">256 bit random string", + "NoEcho": "true" + }, + "TowerCryptoSecretkey": { + "Type": "String", + "Description": ">256 bit random string", + "NoEcho": "true" + }, + "TowerLicense": { + "Type": "String", + "Description": "The Tower License", + "NoEcho": "true" + }, + "TowerDbUrl": { + "Type": "String", + "Description": "MySQL DB connection URL" + }, + "TowerDbUser": { + "Type": "String", + "Description": "MySQL DB username" + }, + "TowerDbPassword": { + "Type": "String", + "Description": "MySQL DB password", + "NoEcho": "true" + } + }, + "Resources" : { + "TowerTask": { + "Type": "AWS::ECS::TaskDefinition", + "Properties": { + "NetworkMode": "bridge", + "ContainerDefinitions": [ + { + "Name": "redis", + "Image": "cr.seqera.io/public/redis:7.0.10", + "Memory": 2000, + "Cpu": 0, + "PortMappings": [{ + "ContainerPort": 6379, + "HostPort": 6379 + }], + "Command": ["--appendonly yes"] + }, + { + "Name": "cron", + "Image": "cr.seqera.io/private/nf-tower-enterprise/backend:v25.1.0", + "Memory": 2000, + "Cpu": 0, + "Links": [ + "redis" + ], + "DependsOn": [ + { + "ContainerName": "redis", + "Condition": "START" + } + ], + "WorkingDirectory": "/work", + "EntryPoint": [ "/bin/sh" ], + "Command": [ "-c", "/migrate-db.sh; /tower.sh" ], + "Environment": [ + { + "Name": "TOWER_CONTACT_EMAIL", + "Value": { + "Ref": "TowerContactEmail" + } + }, + { + "Name": "TOWER_SMTP_HOST", + "Value": { + "Ref": "TowerSmtpHost" + } + }, + { + "Name": "TOWER_SMTP_PORT", + "Value": { + "Ref": "TowerSmtpPort" + } + }, + { + "Name": "TOWER_SMTP_USER", + "Value": { + "Ref": "TowerSmtpUser" + } + }, + { + "Name": "TOWER_SMTP_PASSWORD", + "Value": { + "Ref": "TowerSmtpPassword" + } + }, + { + "Name": "TOWER_DB_URL", + "Value": { + "Ref": "TowerDbUrl" + } + }, + { + "Name": "TOWER_DB_DRIVER", + "Value": "org.mariadb.jdbc.Driver" + }, + { + "Name": "TOWER_DB_DIALECT", + "Value": "io.seqera.util.MySQL55DialectCollateBin" + }, + { + "Name": "TOWER_DB_USER", + "Value": { + "Ref": "TowerDbUser" + } + }, + { + "Name": "TOWER_DB_PASSWORD", + "Value": { + "Ref": "TowerDbPassword" + } + }, + { + "Name": "TOWER_SERVER_URL", + "Value": { + "Ref": "TowerServerUrl" + } + }, + { + "Name": "MICRONAUT_ENVIRONMENTS", + "Value": "prod,redis,cron" + }, + { + "Name": "TOWER_ENABLE_PLATFORMS", + "Value": "awsbatch-platform,azbatch-platform,gls-platform,slurm-platform" + }, + { + "Name": "TOWER_JWT_SECRET", + "Value": { + "Ref": "TowerJwtSecret" + } + }, + { + "Name": "TOWER_CRYPTO_SECRETKEY", + "Value": { + "Ref": "TowerCryptoSecretkey" + } + }, + { + "Name": "TOWER_LICENSE", + "Value": { + "Ref": "TowerLicense" + } + }, + { + "Name": "FLYWAY_LOCATIONS", + "Value": "classpath:db-schema/mysql" + } + ] + }, + { + "Name": "frontend", + "Image": "cr.seqera.io/private/nf-tower-enterprise/frontend:v25.1.0", + "Memory": 2000, + "Cpu": 0, + "Essential": false, + "PortMappings": [{ + "ContainerPort": 80, + "HostPort": 80 + }], + "Links": [ + "backend" + ], + "DependsOn": [ + { + "ContainerName": "backend", + "Condition": "START" + } + ] + }, + { + "Name": "backend", + "Hostname": "backend", + "Memory": 2000, + "Cpu": 0, + "Image": "cr.seqera.io/private/nf-tower-enterprise/backend:v25.1.0", + "PortMappings": [{ + "ContainerPort": 8080, + "HostPort": 8080 + }], + "Essential": false, + "Links": [ + "redis", + "cron" + ], + "WorkingDirectory": "/work", + "DependsOn": [ + { + "ContainerName": "cron", + "Condition": "START" + }, + { + "ContainerName": "redis", + "Condition": "START" + } + ], + "Environment": [ + { + "Name": "TOWER_CONTACT_EMAIL", + "Value": { + "Ref": "TowerContactEmail" + } + }, + { + "Name": "TOWER_SMTP_HOST", + "Value": { + "Ref": "TowerSmtpHost" + } + }, + { + "Name": "TOWER_SMTP_PORT", + "Value": { + "Ref": "TowerSmtpPort" + } + }, + { + "Name": "TOWER_SMTP_USER", + "Value": { + "Ref": "TowerSmtpUser" + } + }, + { + "Name": "TOWER_SMTP_PASSWORD", + "Value": { + "Ref": "TowerSmtpPassword" + } + }, + { + "Name": "TOWER_DB_URL", + "Value": { + "Ref": "TowerDbUrl" + } + }, + { + "Name": "TOWER_DB_DRIVER", + "Value": "org.mariadb.jdbc.Driver" + }, + { + "Name": "TOWER_DB_DIALECT", + "Value": "io.seqera.util.MySQL55DialectCollateBin" + }, + { + "Name": "TOWER_DB_USER", + "Value": { + "Ref": "TowerDbUser" + } + }, + { + "Name": "TOWER_DB_PASSWORD", + "Value": { + "Ref": "TowerDbPassword" + } + }, + { + "Name": "TOWER_SERVER_URL", + "Value": { + "Ref": "TowerServerUrl" + } + }, + { + "Name": "MICRONAUT_ENVIRONMENTS", + "Value": "prod,redis,ha" + }, + { + "Name": "TOWER_ENABLE_PLATFORMS", + "Value": "awsbatch-platform,azbatch-platform,gls-platform,slurm-platform" + }, + { + "Name": "TOWER_JWT_SECRET", + "Value": { + "Ref": "TowerJwtSecret" + } + }, + { + "Name": "TOWER_CRYPTO_SECRETKEY", + "Value": { + "Ref": "TowerCryptoSecretkey" + } + }, + { + "Name": "TOWER_LICENSE", + "Value": { + "Ref": "TowerLicense" + } + }, + { + "Name": "FLYWAY_LOCATIONS", + "Value": "classpath:db-schema/mysql" + } + ], + "EntryPoint": [ "/bin/sh" ], + "Command": [ "-c", "/tower.sh" ] + }] + } + }, + "TowerService": { + "Type" : "AWS::ECS::Service", + "Properties" : { + "Cluster": { + "Ref": "ClusterName" + }, + "DesiredCount" : 1, + "ServiceName" : "TowerService", + "TaskDefinition": { + "Ref": "TowerTask" + }, + "LaunchType": "EC2" + } + } + } +} diff --git a/platform-enterprise/enterprise/_templates/cloudformation/params.json.template b/platform-enterprise/enterprise/_templates/cloudformation/params.json.template new file mode 100644 index 000000000..1e69ec983 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/cloudformation/params.json.template @@ -0,0 +1,54 @@ +[ + { + "ParameterKey": "ClusterName", + "ParameterValue": "" + }, + { + "ParameterKey": "TowerSmtpUser", + "ParameterValue": "" + }, + { + "ParameterKey": "TowerSmtpPassword", + "ParameterValue": "" + }, + { + "ParameterKey": "TowerContactEmail", + "ParameterValue": "" + }, + { + "ParameterKey": "TowerServerUrl", + "ParameterValue": "" + }, + { + "ParameterKey": "TowerJwtSecret", + "ParameterValue": "" + }, + { + "ParameterKey": "TowerCryptoSecretkey", + "ParameterValue": "" + }, + { + "ParameterKey": "TowerLicense", + "ParameterValue": "" + }, + { + "ParameterKey": "TowerDbUrl", + "ParameterValue": "tower" + }, + { + "ParameterKey": "TowerDbUser", + "ParameterValue": "tower" + }, + { + "ParameterKey": "TowerDbPassword", + "ParameterValue": "tower" + }, + { + "ParameterKey": "TowerSmtpHost", + "ParameterValue": "email-smtp.eu-west-1.amazonaws.com" + }, + { + "ParameterKey": "TowerSmtpPort", + "ParameterValue": "587" + }, +] diff --git a/platform-enterprise/enterprise/_templates/docker/data-studios-rsa.pem b/platform-enterprise/enterprise/_templates/docker/data-studios-rsa.pem new file mode 100644 index 000000000..64cd67dd3 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/docker/data-studios-rsa.pem @@ -0,0 +1,6 @@ +-----BEGIN PRIVATE KEY----- + +-----END PRIVATE KEY----- +-----BEGIN PUBLIC KEY----- + +-----END PUBLIC KEY----- diff --git a/platform-enterprise/enterprise/_templates/docker/data-studios.env b/platform-enterprise/enterprise/_templates/docker/data-studios.env new file mode 100644 index 000000000..50aa4d745 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/docker/data-studios.env @@ -0,0 +1,7 @@ +# Data studios settings +PLATFORM_URL= +CONNECT_HTTP_PORT=9090 +CONNECT_TUNNEL_URL=connect-server:7070 +CONNECT_REDIS_ADDRESS=redis:6379 +CONNECT_PROXY_URL= +CONNECT_OIDC_CLIENT_REGISTRATION_TOKEN= diff --git a/platform-enterprise/enterprise/_templates/docker/docker-compose.yml b/platform-enterprise/enterprise/_templates/docker/docker-compose.yml new file mode 100644 index 000000000..defc67c8f --- /dev/null +++ b/platform-enterprise/enterprise/_templates/docker/docker-compose.yml @@ -0,0 +1,159 @@ +version: "3" +services: + db: + image: mysql:8.0 + platform: linux/amd64 + networks: + - backend + expose: + - 3306 + environment: + MYSQL_ALLOW_EMPTY_PASSWORD: "yes" + MYSQL_USER: tower + MYSQL_PASSWORD: tower + MYSQL_DATABASE: tower + healthcheck: + test: ["CMD", "mysqladmin" , "ping", "-h", "localhost"] + timeout: 20s + retries: 10 + restart: always + volumes: + # To use the pipeline resource optimization service, uncomment the lines below to mount the initialization script + # - type: bind + # source: ./init.sql + # target: /docker-entrypoint-initdb.d/init.sql + + # Store the MySQL data in a directory on the host + - $HOME/.tower/db/mysql:/var/lib/mysql + + redis: + image: cr.seqera.io/public/redis:7.0.10 + platform: linux/amd64 + networks: + - backend + expose: + - 6379 + command: --appendonly yes + restart: always + # Store the Redis data in a directory on the host + volumes: + - $HOME/.tower/db/redis:/data + + migrate: + image: cr.seqera.io/private/nf-tower-enterprise/migrate-db:v25.1.1 + platform: linux/amd64 + command: -c "/migrate-db.sh" + networks: + - backend + volumes: + - $PWD/tower.yml:/tower.yml + env_file: + # Seqera environment variables — see https://docs.seqera.io/platform-enterprise/latest/enterprise/configuration/overview for details + - tower.env + restart: no + + depends_on: + db: + condition: service_healthy + + cron: + image: cr.seqera.io/private/nf-tower-enterprise/backend:v25.1.1 + platform: linux/amd64 + command: -c '/tower.sh' + networks: + - backend + volumes: + # A Studios RSA key is required for the Studios functionality. Uncomment the line below to mount the key. + #- $PWD/data-studios-rsa.pem:/data-studios-rsa.pem + - $PWD/tower.yml:/tower.yml + env_file: + - tower.env + environment: + # Micronaut environments are required. Do not edit these values + - MICRONAUT_ENVIRONMENTS=prod,redis,cron + restart: always + depends_on: + migrate: + condition: service_completed_successfully + + + backend: + image: cr.seqera.io/private/nf-tower-enterprise/backend:v25.1.1 + platform: linux/amd64 + command: -c '/wait-for-it.sh db:3306 -t 60; /tower.sh' + networks: + - frontend + - backend + expose: + - 8080 + volumes: + - $PWD/tower.yml:/tower.yml + # Data studios RSA key is required for the data studios functionality. Uncomment the line below to mount the key. + #- $PWD/data-studios-rsa.pem:/data-studios-rsa.pem + env_file: + # Seqera environment variables — see https://docs.seqera.io/platform-enterprise/latest/enterprise/configuration/overview for details + - tower.env + environment: + # Micronaut environments are required. Do not edit these values + - MICRONAUT_ENVIRONMENTS=prod,redis,ha + restart: always + depends_on: + - db + - redis + - cron + + frontend: + image: cr.seqera.io/private/nf-tower-enterprise/frontend:v25.1.1 + platform: linux/amd64 + networks: + - frontend + ports: + - 8000:80 + restart: always + depends_on: + - backend + +# Uncomment the following section to enable Data Studios functionality. See [Data Studios configuration](../../data-studios.mdx) for more information. +# connect-proxy: +# image: cr.seqera.io/private/nf-tower-enterprise/data-studio/connect-proxy:0.8.0 +# platform: linux/amd64 +# env_file: +# - data-studios.env +# networks: +# - frontend +# - backend +# ports: +# - 9090:9090 +# restart: always +# depends_on: +# - redis +# +# connect-server: +# image: cr.seqera.io/private/nf-tower-enterprise/data-studio/connect-server:0.8.0 +# platform: linux/amd64 +# env_file: +# - data-studios.env +# networks: +# - backend +# ports: +# - 7070:7070 +# restart: always + + # Uncomment the following section to enable the pipeline resource optimization service. Add TOWER_ENABLE_GROUNDSWELL + # or GROUNDSWELL_SERVER_URL to tower.env environment variables + # groundswell: + # image: cr.seqera.io/private/nf-tower-enterprise/groundswell:0.3.3 + # command: bash -c 'bin/wait-for-it.sh db:3306 -t 60; bin/migrate-db.sh; bin/serve.sh' + # networks: + # - backend + # ports: + # - 8090:8090 + # env_file: + # - groundswell.env + # restart: always + # depends_on: + # - db + +networks: + frontend: {} + backend: {} diff --git a/platform-enterprise/enterprise/_templates/docker/groundswell.env b/platform-enterprise/enterprise/_templates/docker/groundswell.env new file mode 100644 index 000000000..0a19aa329 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/docker/groundswell.env @@ -0,0 +1,19 @@ +# Tower settings +TOWER_DB_URL=mysql://db:3306/tower +TOWER_DB_USER=tower +TOWER_DB_PASSWORD=tower + +# Server settings +SWELL_SERVER_HOST=0.0.0.0 +SWELL_SERVER_PORT=8090 + +# API settings +SWELL_API_TRAIN_TIMEOUT=60 +SWELL_API_TRAIN_BATCH_SIZE=1000 +SWELL_API_PREDICT_FRACTIONAL_CPUS=false + +# Database settings +SWELL_DB_URL=mysql://db:3306/swell +SWELL_DB_USER=swell +SWELL_DB_PASSWORD=swell +SWELL_DB_DIALECT=mysql diff --git a/platform-enterprise/enterprise/_templates/docker/init.sql b/platform-enterprise/enterprise/_templates/docker/init.sql new file mode 100644 index 000000000..0f7e35ea9 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/docker/init.sql @@ -0,0 +1,11 @@ +-- Tower database +CREATE DATABASE IF NOT EXISTS `tower`; +CREATE USER 'tower'@'%' IDENTIFIED BY 'tower'; +GRANT ALL PRIVILEGES ON *.* TO 'tower'@'%'; + +-- Groundswell database +CREATE DATABASE IF NOT EXISTS `swell`; +CREATE USER 'swell'@'%' IDENTIFIED BY 'swell'; +GRANT ALL PRIVILEGES ON *.* TO 'swell'@'%'; + +FLUSH PRIVILEGES; diff --git a/platform-enterprise/enterprise/_templates/docker/tower.env b/platform-enterprise/enterprise/_templates/docker/tower.env new file mode 100644 index 000000000..b3e6b2893 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/docker/tower.env @@ -0,0 +1,79 @@ +TOWER_SERVER_URL= +TOWER_CONTACT_EMAIL=admin@your-org.com +TOWER_JWT_SECRET= +TOWER_CRYPTO_SECRETKEY= +TOWER_LICENSE= + +# Compute environment settings +TOWER_ENABLE_PLATFORMS=awsbatch-platform,azbatch-platform,gls-platform,googlebatch-platform,k8s-platform,uge-platform,slurm-platform + +# DB settings +TOWER_DB_URL=jdbc:mysql://db:3306/tower?permitMysqlScheme=true&sessionVariables=sql_mode='STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION' +TOWER_DB_DRIVER=org.mariadb.jdbc.Driver +TOWER_DB_DIALECT=io.seqera.util.MySQL55DialectCollateBin +TOWER_DB_USER=tower +TOWER_DB_PASSWORD=tower +FLYWAY_LOCATIONS=classpath:db-schema/mysql + +# SMTP settings +TOWER_SMTP_HOST=mail +TOWER_SMTP_PORT=587 +TOWER_SMTP_USER=foo +TOWER_SMTP_PASSWORD=foo + +# Pipeline resource optimization service. Set TOWER_ENABLE_GROUNDSWELL true to use default service URL (http://groundswell:8090) +# Use GROUNDSWELL_SERVER_URL to specify a custom service URL (enables the service by default) +TOWER_ENABLE_GROUNDSWELL=false +GROUNDSWELL_SERVER_URL="http://groundswell:8090" + +# Data studios settings +#TOWER_DATA_STUDIO_CONNECT_URL= +#TOWER_OIDC_REGISTRATION_INITIAL_ACCESS_TOKEN= +#TOWER_OIDC_PEM_PATH= + +# Enable Data Explorer to enable mounting data to your data studios +#TOWER_DATA_EXPLORER_ENABLED=true +#TOWER_DATA_EXPLORER_CLOUD_DISABLED_WORKSPACES= + +# Data Studios templates +TOWER_DATA_STUDIO_TEMPLATES_JUPYTERGA_STATUS=recommended +TOWER_DATA_STUDIO_TEMPLATES_JUPYTERGA_REPOSITORY='public.cr.seqera.io/platform/data-studio-jupyter:4.2.5-0.8' +TOWER_DATA_STUDIO_TEMPLATES_JUPYTERGA_ICON=jupyter +TOWER_DATA_STUDIO_TEMPLATES_JUPYTERGA_TOOL=jupyter +TOWER_DATA_STUDIO_TEMPLATES_JUPYTERGA_PORT='8080' + +TOWER_DATA_STUDIO_TEMPLATES_JUPYTEROLD_STATUS=deprecated +TOWER_DATA_STUDIO_TEMPLATES_JUPYTEROLD_REPOSITORY='public.cr.seqera.io/platform/data-studio-jupyter:4.1.5-0.7.1' +TOWER_DATA_STUDIO_TEMPLATES_JUPYTEROLD_ICON=jupyter +TOWER_DATA_STUDIO_TEMPLATES_JUPYTEROLD_TOOL=jupyter +TOWER_DATA_STUDIO_TEMPLATES_JUPYTEROLD_PORT='8080' + +TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOGA_STATUS=recommended +TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOGA_REPOSITORY='public.cr.seqera.io/platform/data-studio-rstudio:4.4.1-u2-0.8' +TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOGA_ICON=rstudio +TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOGA_TOOL=rstudio +TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOGA_PORT='8787' + +TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOOLD_STATUS=deprecated +TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOOLD_REPOSITORY='public.cr.seqera.io/platform/data-studio-rstudio:4.4.1-0.7.1' +TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOOLD_ICON=rstudio +TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOOLD_TOOL=rstudio +TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOOLD_PORT='8787' + +TOWER_DATA_STUDIO_TEMPLATES_VSCODEGA_STATUS=recommended +TOWER_DATA_STUDIO_TEMPLATES_VSCODEGA_REPOSITORY='public.cr.seqera.io/platform/data-studio-vscode:1.93.1-0.8' +TOWER_DATA_STUDIO_TEMPLATES_VSCODEGA_ICON=vscode +TOWER_DATA_STUDIO_TEMPLATES_VSCODEGA_TOOL=vscode +TOWER_DATA_STUDIO_TEMPLATES_VSCODEGA_PORT='3000' + +TOWER_DATA_STUDIO_TEMPLATES_VSCODEOLD_STATUS=deprecated +TOWER_DATA_STUDIO_TEMPLATES_VSCODEOLD_REPOSITORY='public.cr.seqera.io/platform/data-studio-vscode:1.83.0-0.7.1' +TOWER_DATA_STUDIO_TEMPLATES_VSCODEOLD_TOOL=vscode +TOWER_DATA_STUDIO_TEMPLATES_VSCODEOLD_ICON=vscode +TOWER_DATA_STUDIO_TEMPLATES_VSCODEOLD_PORT='3000' + +TOWER_DATA_STUDIO_TEMPLATES_XPRAGA_STATUS=recommended +TOWER_DATA_STUDIO_TEMPLATES_XPRAGA_REPOSITORY='public.cr.seqera.io/platform/data-studio-xpra:6.2.0-r2-1-0.8' +TOWER_DATA_STUDIO_TEMPLATES_XPRAGA_ICON=xpra +TOWER_DATA_STUDIO_TEMPLATES_XPRAGA_TOOL=xpra +TOWER_DATA_STUDIO_TEMPLATES_XPRAGA_PORT='14500' \ No newline at end of file diff --git a/platform-enterprise/enterprise/_templates/docker/tower.yml b/platform-enterprise/enterprise/_templates/docker/tower.yml new file mode 100644 index 000000000..20d9b1e96 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/docker/tower.yml @@ -0,0 +1,56 @@ +# Replace these settings with a SMTP server provided by your cloud vendor +# The mail scope is used for providing config to the underlying Micronaut framework +mail: + from: "${TOWER_CONTACT_EMAIL}" + smtp: + host: ${TOWER_SMTP_HOST} + port: ${TOWER_SMTP_PORT} + user: ${TOWER_SMTP_USER} + password: ${TOWER_SMTP_PASSWORD} + # `starttls` should be enabled with a production SMTP host + auth: true + starttls: + enable: true + required: true + ssl: + protocols: "TLSv1.2" + +# Duration of Platform sign-in email link validity +auth: + mail: + duration: 30m + +# Set a custom application name for the Micronaut environment to deploy multiple instances from the same Enterprise account +# Required for AWS Parameter Store configuration. For more information, see https://docs.seqera.io/platform/24.1/enterprise/configuration/aws_parameter_store +micronaut: + application: + name: tower-app + +# The tower scope is used for providing config for your Seqera Enterprise installation +tower: + trustedEmails: + - '*@org.xyz' + - 'named_user@org.xyz' + + # Seqera instance-wide configuration for authentication. For further information, see https://docs.seqera.io/platform/24.1/enterprise/configuration/authentication/ + auth: + google: + allow-list: + - "*@org.xyz" + oidc: + allow-list: + - "*@org.xyz" + + # Seqera instance-wide configuration for SCM providers. For further information, see https://docs.seqera.io/platform/24.1/enterprise/configuration/overview + scm: + providers: + github: + user: + password: + gitlab: + user: + password: + token: + bitbucket: + user: + password: \ No newline at end of file diff --git a/platform-enterprise/enterprise/_templates/k8s/configmap.yml b/platform-enterprise/enterprise/_templates/k8s/configmap.yml new file mode 100644 index 000000000..ed771dd14 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/k8s/configmap.yml @@ -0,0 +1,105 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: tower-backend-cfg + labels: + app: backend-cfg +data: + TOWER_SERVER_URL: "https://" + TOWER_CONTACT_EMAIL: "support@tower.nf" + TOWER_JWT_SECRET: "ReplaceThisWithALongSecretString" + TOWER_DB_URL: "jdbc:mysql:///tower" + TOWER_DB_DRIVER: "org.mariadb.jdbc.Driver" + TOWER_DB_USER: "tower" + TOWER_DB_PASSWORD: "" + TOWER_DB_DIALECT: "io.seqera.util.MySQL55DialectCollateBin" + TOWER_DB_MIN_POOL_SIZE: "2" + TOWER_DB_MAX_POOL_SIZE: "10" + TOWER_DB_MAX_LIFETIME: "180000" + TOWER_SMTP_HOST: "" + TOWER_SMTP_USER: "" + TOWER_SMTP_PASSWORD: "" + TOWER_CRYPTO_SECRETKEY: "" + TOWER_LICENSE: "" + TOWER_ENABLE_PLATFORMS: "awsbatch-platform,gls-platform,googlebatch-platform,azbatch-platform,uge-platform,slurm-platform" + FLYWAY_LOCATIONS: "classpath:db-schema/mysql" + TOWER_REDIS_URL: "redis://:6379" # For a managed Redis service, specify the IP address or DNS name of the service endpoint. If you use the containerized Redis service, specify "redis" instead of an IP address. + + # Pipeline resource optimization service. Set TOWER_ENABLE_GROUNDSWELL true to use default service URL (http://groundswell:8090) + # Use GROUNDSWELL_SERVER_URL to specify a custom service URL (enables the service by default) + TOWER_ENABLE_GROUNDSWELL: false + GROUNDSWELL_SERVER_URL: http://groundswell:8090 + + # Data Studios configuration variables + TOWER_OIDC_REGISTRATION_INITIAL_ACCESS_TOKEN: '' # This must match the value of CONNECT_OIDC_CLIENT_REGISTRATION_TOKEN in your proxy.yml file. + TOWER_DATA_STUDIO_CONNECT_URL: 'https://connect.' # + TOWER_DATA_STUDIO_TEMPLATES_JUPYTERGA_STATUS: 'recommended' + TOWER_DATA_STUDIO_TEMPLATES_JUPYTERGA_REPOSITORY: 'public.cr.seqera.io/platform/data-studio-jupyter:4.2.5-0.8' + TOWER_DATA_STUDIO_TEMPLATES_JUPYTERGA_ICON: 'jupyter' + TOWER_DATA_STUDIO_TEMPLATES_JUPYTERGA_TOOL: 'jupyter' + TOWER_DATA_STUDIO_TEMPLATES_JUPYTERGA_PORT: '8080' + + TOWER_DATA_STUDIO_TEMPLATES_JUPYTEROLD_STATUS: 'deprecated' + TOWER_DATA_STUDIO_TEMPLATES_JUPYTEROLD_REPOSITORY: 'public.cr.seqera.io/platform/data-studio-jupyter:4.1.5-0.7.1' + TOWER_DATA_STUDIO_TEMPLATES_JUPYTEROLD_ICON: 'jupyter' + TOWER_DATA_STUDIO_TEMPLATES_JUPYTEROLD_TOOL: 'jupyter' + TOWER_DATA_STUDIO_TEMPLATES_JUPYTEROLD_PORT: '8080' + + TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOGA_STATUS: 'recommended' + TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOGA_REPOSITORY: 'public.cr.seqera.io/platform/data-studio-rstudio:4.4.1-u2-0.8' + TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOGA_ICON: 'rstudio' + TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOGA_TOOL: 'rstudio' + TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOGA_PORT: '8787' + + TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOOLD_STATUS: 'deprecated' + TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOOLD_REPOSITORY: 'public.cr.seqera.io/platform/data-studio-rstudio:4.4.1-0.7.1' + TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOOLD_ICON: 'rstudio' + TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOOLD_TOOL: 'rstudio' + TOWER_DATA_STUDIO_TEMPLATES_RSTUDIOOLD_PORT: '8787' + + TOWER_DATA_STUDIO_TEMPLATES_VSCODEGA_STATUS: 'recommended' + TOWER_DATA_STUDIO_TEMPLATES_VSCODEGA_REPOSITORY: 'public.cr.seqera.io/platform/data-studio-vscode:1.93.1-0.8' + TOWER_DATA_STUDIO_TEMPLATES_VSCODEGA_ICON: 'vscode' + TOWER_DATA_STUDIO_TEMPLATES_VSCODEGA_TOOL: 'vscode' + TOWER_DATA_STUDIO_TEMPLATES_VSCODEGA_PORT: '3000' + + TOWER_DATA_STUDIO_TEMPLATES_VSCODEOLD_STATUS: 'deprecated' + TOWER_DATA_STUDIO_TEMPLATES_VSCODEOLD_REPOSITORY: 'public.cr.seqera.io/platform/data-studio-vscode:1.83.0-0.7.1' + TOWER_DATA_STUDIO_TEMPLATES_VSCODEOLD_TOOL: 'vscode' + TOWER_DATA_STUDIO_TEMPLATES_VSCODEOLD_ICON: 'vscode' + TOWER_DATA_STUDIO_TEMPLATES_VSCODEOLD_PORT: '3000' + + TOWER_DATA_STUDIO_TEMPLATES_XPRAGA_STATUS: 'recommended' + TOWER_DATA_STUDIO_TEMPLATES_XPRAGA_REPOSITORY: 'public.cr.seqera.io/platform/data-studio-xpra:6.2.0-r2-1-0.8' + TOWER_DATA_STUDIO_TEMPLATES_XPRAGA_ICON: 'xpra' + TOWER_DATA_STUDIO_TEMPLATES_XPRAGA_TOOL: 'xpra' + TOWER_DATA_STUDIO_TEMPLATES_XPRAGA_PORT: '14500' + +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: tower-yml + labels: + app: backend-cfg +data: + tower.yml: | + mail: + smtp: + auth: true + # FIXME `starttls` should be enabled with a production SMTP host + starttls: + enable: true + required: true + ssl: + protocols: "TLSv1.2" + + auth: + mail: + duration: 30m + + # Set a custom application name for the Micronaut environment to deploy multiple instances from the same Enterprise account + # Required for AWS Parameter Store configuration. For more information, see https://docs.seqera.io/platform-enterprise/latest/enterprise/configuration/aws_parameter_store + micronaut: + application: + name: tower-app diff --git a/platform-enterprise/enterprise/_templates/k8s/data_studios/proxy.yml b/platform-enterprise/enterprise/_templates/k8s/data_studios/proxy.yml new file mode 100644 index 000000000..2c7cda6ee --- /dev/null +++ b/platform-enterprise/enterprise/_templates/k8s/data_studios/proxy.yml @@ -0,0 +1,73 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: connect-proxy + labels: + app.kubernetes.io/component: tower-connect-proxy +spec: + selector: + matchLabels: + app.kubernetes.io/component: tower-connect-proxy + template: + metadata: + labels: + app.kubernetes.io/component: tower-connect-proxy + spec: + imagePullSecrets: + - name: "cr.seqera.io" + nodeSelector: + kubernetes.io/arch: amd64 + containers: + - name: proxy + image: cr.seqera.io/private/nf-tower-enterprise/data-studio/connect-proxy:0.8.0 + env: + - name: CONNECT_HTTP_PORT + value: "8081" + - name: CONNECT_REDIS_ADDRESS + value: :6379 + - name: CONNECT_REDIS_PREFIX + value: "connect:session" + - name: CONNECT_REDIS_TLS_ENABLE + value: "false" + - name: CONNECT_REDIS_DB + value: "1" + - name: CONNECT_PROXY_URL + value: + - name: CONNECT_TUNNEL_URL + value: connect-server:7070 + - name: PLATFORM_URL + value: + - name: CONNECT_OIDC_CLIENT_REGISTRATION_TOKEN + value: + - name: LOCAL_CACHE_TTL + value: 2m + ports: + - containerPort: 8081 + name: proxy + securityContext: + capabilities: + add: + - NET_BIND_SERVICE + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + securityContext: + fsGroup: 65532 +--- +apiVersion: v1 +kind: Service +metadata: + name: connect-proxy + labels: + app.kubernetes.io/component: tower-connect-proxy +spec: + type: NodePort + ports: + - port: 80 + name: connect-proxy + targetPort: proxy + selector: + app.kubernetes.io/component: tower-connect-proxy diff --git a/platform-enterprise/enterprise/_templates/k8s/data_studios/server.yml b/platform-enterprise/enterprise/_templates/k8s/data_studios/server.yml new file mode 100644 index 000000000..1be64a58c --- /dev/null +++ b/platform-enterprise/enterprise/_templates/k8s/data_studios/server.yml @@ -0,0 +1,76 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: connect-server + labels: + app.kubernetes.io/component: connect-server +spec: + replicas: 2 + selector: + matchLabels: + app.kubernetes.io/component: connect-server + serviceName: connect-server + template: + metadata: + labels: + app.kubernetes.io/component: connect-server + spec: + imagePullSecrets: + - name: "cr.seqera.io" + nodeSelector: + kubernetes.io/arch: amd64 + containers: + - name: server + image: cr.seqera.io/private/nf-tower-enterprise/data-studio/connect-server:0.8.0 + ports: + - containerPort: 7070 + name: server + - containerPort: 7777 + name: entrypoint + env: + - name: CONNECT_HOST_DOMAIN + value: connect-server + - name: CONNECT_TUNNEL_PORT + value: "7070" + - name: CONNECT_LISTENER_PORT + value: "7777" + - name: CONNECT_REDIS_ADDRESS + value: + - name: CONNECT_REDIS_PREFIX + value: connect:session + - name: CONNECT_REDIS_TLS_ENABLE + value: "false" + - name: CONNECT_REDIS_DB + value: "1" + - name: CONNECT_SERVER_LOG_LEVEL + value: info + securityContext: + capabilities: + add: + - NET_BIND_SERVICE + drop: + - ALL + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + securityContext: + fsGroup: 65532 +--- +apiVersion: v1 +kind: Service +metadata: + name: connect-server + labels: + app.kubernetes.io/component: connect-server +spec: + type: ClusterIP + clusterIP: None + ports: + - port: 8080 + targetPort: server + name: server + - port: 7777 + targetPort: entrypoint + name: entrypoint + selector: + app.kubernetes.io/component: connect-server \ No newline at end of file diff --git a/platform-enterprise/enterprise/_templates/k8s/dbconsole.yml b/platform-enterprise/enterprise/_templates/k8s/dbconsole.yml new file mode 100644 index 000000000..8828e7f20 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/k8s/dbconsole.yml @@ -0,0 +1,34 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dbconsole + labels: + app: dbconsole +spec: + selector: + matchLabels: + app: dbconsole + template: + metadata: + labels: + app: dbconsole + spec: + containers: + - image: adminer:4.7.7 + name: dbconsole + ports: + - containerPort: 8080 + restartPolicy: Always +--- +apiVersion: v1 +kind: Service +metadata: + name: dbconsole +spec: + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + type: NodePort + selector: + app: dbconsole diff --git a/platform-enterprise/enterprise/_templates/k8s/groundswell.yml b/platform-enterprise/enterprise/_templates/k8s/groundswell.yml new file mode 100644 index 000000000..866ae52dd --- /dev/null +++ b/platform-enterprise/enterprise/_templates/k8s/groundswell.yml @@ -0,0 +1,137 @@ +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: tower-groundswell-cfg +data: + # Server settings + SWELL_SERVER_HOST: "0.0.0.0" + SWELL_SERVER_PORT: "8090" + + # API settings + SWELL_API_TRAIN_TIMEOUT: "60" + SWELL_API_TRAIN_BATCH_SIZE: "1000" + SWELL_API_PREDICT_FRACTIONAL_CPUS: "false" + + # Database settings, different from the tower DB credentials. + # If using Amazon RDS or similar managed database services, `SWELL_DB_URL` will have the form + # SWELL_DB_URL=mysql://db1.abcdefghijkl.us-east-1.rds.amazonaws.com:3306/swell or similar + SWELL_DB_URL: mysql:///swell + SWELL_DB_USER: swell + SWELL_DB_PASSWORD: + SWELL_DB_DIALECT: mysql +--- +apiVersion: v1 +kind: Service +metadata: + name: groundswell + labels: + app: groundswell +spec: + selector: + app: groundswell + ports: + - name: http + port: 8090 + # targetPort must match with the SWELL_SERVER_PORT in the ConfigMap above. + targetPort: 8090 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: groundswell + labels: + app: groundswell +spec: + selector: + matchLabels: + app: groundswell + strategy: + type: Recreate + + template: + metadata: + labels: + app: groundswell + spec: + imagePullSecrets: + - name: "cr.seqera.io" + securityContext: + fsGroup: 101 + + initContainers: + - name: wait-for-tower-db + image: "mysql:8.0" + command: + - 'bash' + - '-c' + - | + echo "$(date): starting check for db $TOWER_DB_URL" + # Strip initial Java connection string and options after '?' from URI + until mysqlsh --uri "$(echo $TOWER_DB_URL |cut -d'?' -f1 |sed -e 's@jdbc:\(.*\)@\1@')" -u"$TOWER_DB_USER" -p"$TOWER_DB_PASSWORD" --sql -e "SELECT VERSION()"; do + echo "$(date): see you in $SLEEP_PERIOD_SECONDS seconds" + sleep $SLEEP_PERIOD_SECONDS + done + echo "$(date): db server ready" + env: + - name: SLEEP_PERIOD_SECONDS + value: "5" + envFrom: + - configMapRef: + name: tower-backend-cfg + + - name: wait-for-swell-db + image: "mysql:8.0" + command: + - 'bash' + - '-c' + - | + echo "$(date): starting check for db $SWELL_DB_URL" + # Strip initial Java connection string and options after '?' from URI + until mysqlsh --uri "$SWELL_DB_URL" -u"$SWELL_DB_USER" -p"$SWELL_DB_PASSWORD" --sql -e "SELECT VERSION()"; do + echo "$(date): see you in $SLEEP_PERIOD_SECONDS seconds" + sleep $SLEEP_PERIOD_SECONDS + done + echo "$(date): db server ready" + env: + - name: SLEEP_PERIOD_SECONDS + value: "5" + envFrom: + - configMapRef: + name: tower-groundswell-cfg + + - name: migrate-db + image: "cr.seqera.io/private/nf-tower-enterprise/groundswell:0.4.0" + command: ['/opt/groundswell/bin/migrate-db.sh'] + envFrom: + - configMapRef: + name: tower-groundswell-cfg + - configMapRef: + name: tower-backend-cfg + + containers: + - name: groundswell + image: "cr.seqera.io/private/nf-tower-enterprise/groundswell:0.4.0" + env: + - name: MPLCONFIGDIR + value: "/tmp" + envFrom: + - configMapRef: + name: tower-backend-cfg + - configMapRef: + name: tower-groundswell-cfg + volumeMounts: + - name: tmp-volume + mountPath: /tmp/ + securityContext: + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 101 + + volumes: + - name: tmp-volume + emptyDir: + sizeLimit: "1Gi" diff --git a/platform-enterprise/enterprise/_templates/k8s/ingress.aks.yml b/platform-enterprise/enterprise/_templates/k8s/ingress.aks.yml new file mode 100644 index 000000000..5b98f38d5 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/k8s/ingress.aks.yml @@ -0,0 +1,18 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: front-ingress + annotations: + kubernetes.io/ingress.class: azure/application-gateway +spec: + rules: + - host: YOUR-TOWER-HOST-NAME + http: + paths: + - path: /* + pathType: ImplementationSpecific + backend: + service: + name: frontend + port: + number: 80 diff --git a/platform-enterprise/enterprise/_templates/k8s/ingress.eks.yml b/platform-enterprise/enterprise/_templates/k8s/ingress.eks.yml new file mode 100644 index 000000000..de94ec5d9 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/k8s/ingress.eks.yml @@ -0,0 +1,48 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: front-ingress + annotations: + kubernetes.io/ingress.class: alb + alb.ingress.kubernetes.io/scheme: internet-facing + alb.ingress.kubernetes.io/certificate-arn: + alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS":443}]' + alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}' + alb.ingress.kubernetes.io/ssl-policy: ELBSecurityPolicy-TLS-1-2-Ext-2018-06 + alb.ingress.kubernetes.io/load-balancer-attributes: > + idle_timeout.timeout_seconds=301, + routing.http2.enabled=false, + access_logs.s3.enabled=true, + access_logs.s3.bucket=YOUR-LOGS-S3-BUCKET, + access_logs.s3.prefix=YOUR-LOGS-PREFIX +spec: + rules: + - host: + http: + paths: + - path: /* + pathType: ImplementationSpecific + backend: + service: + name: ssl-redirect + port: + name: use-annotation + - path: /* + pathType: ImplementationSpecific + backend: + service: + name: frontend + port: + number: 80 + +# Uncomment the host section below to enable Data Studios. Replace with the base domain of your Platform installation. See [Data Studios configuration](https://docs.seqera.io/platform-enterprise/latest/enterprise/data-studios) for more information. +# - host: "*." +# http: +# paths: +# - backend: +# service: +# name: connect-proxy +# port: +# number: 80 +# pathType: ImplementationSpecific +# path: /* \ No newline at end of file diff --git a/platform-enterprise/enterprise/_templates/k8s/ingress.gke.yml b/platform-enterprise/enterprise/_templates/k8s/ingress.gke.yml new file mode 100644 index 000000000..1c8eea626 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/k8s/ingress.gke.yml @@ -0,0 +1,18 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: front-ingress + annotations: + spec.ingressClassName: "gce" +spec: + rules: + - host: YOUR-TOWER-HOST-NAME + http: + paths: + - path: /* + pathType: ImplementationSpecific + backend: + service: + name: frontend + port: + number: 80 diff --git a/platform-enterprise/enterprise/_templates/k8s/pod-scaling.yml b/platform-enterprise/enterprise/_templates/k8s/pod-scaling.yml new file mode 100644 index 000000000..be6fe3f31 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/k8s/pod-scaling.yml @@ -0,0 +1,42 @@ +apiVersion: metrics.aws/v1alpha1 +kind: ExternalMetric +metadata: + name: elb-requestscount-1min + labels: + app: skale +spec: + name: elb-requestscount-1min + resource: + resource: "deployment" + queries: + - id: elb_requestscount_1min + metricStat: + metric: + namespace: "AWS/ApplicationELB" + metricName: "RequestCount" + dimensions: + - name: LoadBalancer + value: "app/789e9218-towerdev-fronting-019e/f00ff4ad912d92e3 " + period: 60 + stat: Sum + unit: Count + returnData: true +--- +kind: HorizontalPodAutoscaler +apiVersion: autoscaling/v2beta1 +metadata: + name: skale + labels: + app: skale +spec: + scaleTargetRef: + apiVersion: apps/v1beta1 + kind: Deployment + name: backend + minReplicas: 1 + maxReplicas: 8 + metrics: + - type: External + external: + metricName: elb-requestscount-1min + targetAverageValue: 150 diff --git a/platform-enterprise/enterprise/_templates/k8s/redis.aks.yml b/platform-enterprise/enterprise/_templates/k8s/redis.aks.yml new file mode 100644 index 000000000..fb8ed0835 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/k8s/redis.aks.yml @@ -0,0 +1,76 @@ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: standard + labels: + app: redis + annotations: + storageclass.beta.kubernetes.io/is-default-class: "true" +provisioner: kubernetes.io/disk.csi.azure.com +parameters: + kind: Managed + storageaccounttype: Premium_LRS +allowVolumeExpansion: true +reclaimPolicy: Retain +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: redis-data + labels: + app: redis +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: standard +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: redis + labels: + app: redis +spec: + selector: + matchLabels: + app: redis + serviceName: redis + template: + metadata: + labels: + app: redis + spec: + containers: + - image: cr.seqera.io/public/redis:7.0.10 + name: redis + args: + - --appendonly yes + ports: + - containerPort: 6379 + volumeMounts: + - mountPath: "/data" + name: "vol-data" + volumes: + - name: vol-data + persistentVolumeClaim: + claimName: redis-data + - name: host-sys + hostPath: + path: /sys + restartPolicy: Always +--- +apiVersion: v1 +kind: Service +metadata: + name: redis + labels: + app: redis +spec: + ports: + - port: 6379 + targetPort: 6379 + selector: + app: redis diff --git a/platform-enterprise/enterprise/_templates/k8s/redis.eks.yml b/platform-enterprise/enterprise/_templates/k8s/redis.eks.yml new file mode 100644 index 000000000..1923aa771 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/k8s/redis.eks.yml @@ -0,0 +1,76 @@ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: standard + labels: + app: redis + annotations: + storageclass.beta.kubernetes.io/is-default-class: "true" +provisioner: kubernetes.io/aws-ebs +parameters: + type: gp2 + fsType: ext4 +allowVolumeExpansion: true +reclaimPolicy: Retain +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: redis-data + labels: + app: redis +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: standard +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: redis + labels: + app: redis +spec: + selector: + matchLabels: + app: redis + serviceName: redis + template: + metadata: + labels: + app: redis + spec: + containers: + - image: cr.seqera.io/public/redis:7.0.10 + name: redis + args: + - --appendonly yes + ports: + - containerPort: 6379 + volumeMounts: + - mountPath: "/data" + name: "vol-data" + volumes: + - name: vol-data + persistentVolumeClaim: + claimName: redis-data + - name: host-sys + hostPath: + path: /sys + restartPolicy: Always +--- +apiVersion: v1 +kind: Service +metadata: + name: redis + labels: + app: redis +spec: + ports: + - port: 6379 + targetPort: 6379 + selector: + app: redis diff --git a/platform-enterprise/enterprise/_templates/k8s/redis.gke.yml b/platform-enterprise/enterprise/_templates/k8s/redis.gke.yml new file mode 100644 index 000000000..34fc4380f --- /dev/null +++ b/platform-enterprise/enterprise/_templates/k8s/redis.gke.yml @@ -0,0 +1,61 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: redis-data + labels: + app: redis +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: standard +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: redis + labels: + app: redis +spec: + selector: + matchLabels: + app: redis + serviceName: redis + template: + metadata: + labels: + app: redis + spec: + containers: + - image: cr.seqera.io/public/redis:7.0.10 + name: redis + args: + - --appendonly yes + ports: + - containerPort: 6379 + volumeMounts: + - mountPath: "/data" + name: "vol-data" + volumes: + - name: vol-data + persistentVolumeClaim: + claimName: redis-data + - name: host-sys + hostPath: + path: /sys + restartPolicy: Always +--- +apiVersion: v1 +kind: Service +metadata: + name: redis + labels: + app: redis +spec: + ports: + - port: 6379 + targetPort: 6379 + selector: + app: redis diff --git a/platform-enterprise/enterprise/_templates/k8s/tower-cron.yml b/platform-enterprise/enterprise/_templates/k8s/tower-cron.yml new file mode 100644 index 000000000..bb09593f8 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/k8s/tower-cron.yml @@ -0,0 +1,60 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cron + labels: + app: cron +spec: + selector: + matchLabels: + app: cron + template: + metadata: + labels: + app: cron + spec: + imagePullSecrets: + - name: "cr.seqera.io" + volumes: + - name: config-volume + configMap: + name: tower-yml + initContainers: + - name: migrate-db + image: cr.seqera.io/private/nf-tower-enterprise/migrate-db:v25.1.1 + command: ["sh", "-c", "/migrate-db.sh"] + envFrom: + - configMapRef: + name: tower-backend-cfg + volumeMounts: + - name: config-volume + mountPath: /tower.yml + subPath: tower.yml + containers: + - name: backend + image: cr.seqera.io/private/nf-tower-enterprise/backend:v25.1.1 + envFrom: + - configMapRef: + name: tower-backend-cfg + volumeMounts: + - name: config-volume + mountPath: /tower.yml + subPath: tower.yml + env: + - name: MICRONAUT_ENVIRONMENTS + value: "prod,redis,cron" + ports: + - containerPort: 8080 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 3 + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 3 + failureThreshold: 10 diff --git a/platform-enterprise/enterprise/_templates/k8s/tower-svc.yml b/platform-enterprise/enterprise/_templates/k8s/tower-svc.yml new file mode 100644 index 000000000..48f9ff111 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/k8s/tower-svc.yml @@ -0,0 +1,134 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: backend + labels: + app: backend +spec: + selector: + matchLabels: + app: backend + strategy: + rollingUpdate: + maxUnavailable: 0 + maxSurge: 1 + template: + metadata: + labels: + app: backend + spec: + imagePullSecrets: + - name: "cr.seqera.io" + volumes: + - name: config-volume + configMap: + name: tower-yml + # Volume for Studios + #- name: cert-volume + # secret: + # secretName: platform-oidc-certs + containers: + - name: backend + image: cr.seqera.io/private/nf-tower-enterprise/backend:v25.1.1 + envFrom: + - configMapRef: + name: tower-backend-cfg + env: + - name: MICRONAUT_ENVIRONMENTS + value: "prod,redis,ha" + # TLS certificate for Studios + #- name: TOWER_OIDC_PEM_PATH + # value: '/data/certs/oidc.pem' + ports: + - containerPort: 8080 + volumeMounts: + - name: config-volume + mountPath: /tower.yml + subPath: tower.yml + # Volume mount for Studios + #- name: cert-volume + # mountPath: /data/certs + resources: + requests: + cpu: "1" + memory: "1200Mi" + limits: + memory: "4200Mi" + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 3 + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 3 + failureThreshold: 10 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: frontend + labels: + app: frontend +spec: + replicas: 1 + selector: + matchLabels: + app: frontend + template: + metadata: + labels: + app: frontend + spec: + imagePullSecrets: + - name: "cr.seqera.io" + containers: + - name: frontend + image: cr.seqera.io/private/nf-tower-enterprise/frontend:v25.1.1 + ports: + - containerPort: 80 + restartPolicy: Always +--- +# Services +apiVersion: v1 +kind: Service +metadata: + name: backend + labels: + app: backend +spec: + ports: + - name: http + port: 8080 + targetPort: 8080 + selector: + app: backend +--- +apiVersion: v1 +kind: Service +metadata: + name: backend-api +spec: + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + type: NodePort + selector: + app: backend +--- +apiVersion: v1 +kind: Service +metadata: + name: frontend +spec: + type: LoadBalancer + ports: + - port: 80 + selector: + app: "frontend" +--- diff --git a/platform-enterprise/enterprise/_templates/nginx/cert_on_frontend.md b/platform-enterprise/enterprise/_templates/nginx/cert_on_frontend.md new file mode 100644 index 000000000..d2c37fb85 --- /dev/null +++ b/platform-enterprise/enterprise/_templates/nginx/cert_on_frontend.md @@ -0,0 +1,81 @@ +title: "cert_on_frontend" +This example assumes deployment on an Amazon Linux 2 AMI. + +1. Install NGINX and other required packages: + + ```yml + sudo amazon-linux-extras install nginx1.12 + sudo wget -r --no-parent -A 'epel-release-*.rpm' https://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/e/ + sudo rpm -Uvh dl.fedoraproject.org/pub/epel/7/x86_64/Packages/e/epel-release-*.rpm + sudo yum-config-manager --enable epel* + sudo yum repolist all + sudo amazon-linux-extras install epel -y + ``` + +2. Generate a [private certificate and key](https://www.digitalocean.com/community/tutorials/openssl-essentials-working-with-ssl-certificates-private-keys-and-csrs). + +3. Create a `ssl.conf` file. + + ```ini + + server { + server_name your.server.name; # replace with your server name + root /usr/share/nginx/html; + + location / { + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_set_header Authorization $http_authorization; + proxy_pass_header Authorization; + + proxy_pass http://frontend/; + proxy_read_timeout 90; + proxy_redirect http://frontend/ https://your.redirect.url/; + } + + error_page 404 /404.html; + location = /40x.html { + } + + error_page 500 502 503 504 /50x.html; + location = /50x.html { + } + listen [::]:443 ssl ipv6only=on; + listen 443 ssl; + + ssl_certificate /etc/ssl/testcrt.crt; + ssl_certificate_key /etc/ssl/testkey.key; + } + + ``` + +4. Make a local copy of the `frontend` container's `/etc/nginx/nginx.conf` file. + +5. Add the following to the `server` block of your local `nginx.conf` file: + + ```ini + include /etc/nginx/ssl.conf; + ``` + +6. Modify the `frontend` container definition in your `docker-compose.yml` file: + + ```yml + frontend: + image: cr.seqera.io/frontend:${TAG} + networks: + - frontend + ports: + - 8000:80 + - 443:443 + volumes: + - $PWD/nginx.conf:/etc/nginx/nginx.conf + - $PWD/ssl.conf:/etc/nginx/ssl.conf + - $PWD/cert/testcrt.crt:/etc/ssl/testcrt.crt + - $PWD/cert/testkey.key:/etc/ssl/testkey.key + restart: always + depends_on: + - backend + ``` diff --git a/platform-enterprise/enterprise/advanced-topics/cloudformation.md b/platform-enterprise/enterprise/advanced-topics/cloudformation.md new file mode 100644 index 000000000..42c1d679e --- /dev/null +++ b/platform-enterprise/enterprise/advanced-topics/cloudformation.md @@ -0,0 +1,70 @@ +--- +title: "CloudFormation deployment (deprecated)" +description: Deploy Tower to ECS using CloudFormation +date: "12 Apr 2023" +tags: [ecs, cloudformation, deployment] +--- + +!!! danger "Deprecated" +This deployment option is deprecated, and will be removed in the future. We strongly recommend against using this option unless you're sufficiently experienced with CloudFormation to customize this template for your own infrastructure. + +Tower can be deployed via AWS CloudFormation, using the included configuration. + +This guide assumes that all [prerequisites](../prerequisites/aws) have been met. + +## Set up an ECS cluster + +1. Navigate to the ECS console in AWS. + +2. Select **Create cluster**. + +3. Select **Amazon ECS > Clusters > EC2 Linux + Networking**. + +### ECS Cluster Configuration + +- Name: _nf-tower_ + +**Instance Configuration** + +- Provisioning Model: On-Demand +- EC2 instance type: c4.2xlarge +- Number of instances: 1 +- EC2 AMI ID: Amazon Linux 2 +- Root EBS Volume Size (GiB): none +- Key pair: none + +### Networking Configuration + +- Create a new VPC + +**Container instance IAM role** + +- Create a new role (if the `ecsInstance` role doesn't exist) + +**Instance `ServerURL`** + +- Record the public IP of the instance in the ECS cluster e.g., `3.122.246.202` + +## Deploy Tower + +1. Download [aws-ecs-cloudformation.json](../_templates/cloudformation/aws-ecs-cloudformation.json) and [params.json.template](../_templates/cloudformation/params.json.template). + +2. Rename `params.template.json` to `params.json` and configure for your environment. + + For more information on configuration, visit the [Configuration](../configuration/overview) section. + +3. Deploy the Tower stack to your ECS cluster: + + ```bash + aws cloudformation create-stack \ + --stack-name Tower \ + --template-body file://aws-ecs-cloudformation.json \ + --parameters file://params.json + ``` + +You can delete the stack at any time, to uninstall Tower or update any parameters: + +```bash +aws cloudformation delete-stack \ + --stack-name Tower +``` diff --git a/platform-enterprise/enterprise/advanced-topics/custom-launch-container.md b/platform-enterprise/enterprise/advanced-topics/custom-launch-container.md new file mode 100644 index 000000000..525b3ca0c --- /dev/null +++ b/platform-enterprise/enterprise/advanced-topics/custom-launch-container.md @@ -0,0 +1,44 @@ +--- +title: "Custom AWS Batch launch container" +description: Use a custom AWS Batch launch container +date: "12 Apr 2023" +tags: [aws, batch, launch, container] +--- + +You can customize your Seqera instance's Nextflow launch container, e.g., to include private CA certificates or compliance software in your Nextflow environment. + +:::caution +Seqera recommends using the default Nextflow launch container wherever possible. Custom launch containers can complicate your Seqera configuration and upgrade process. +::: + +Specify the path to your custom launch container image with an environment variable: + +```env +TOWER_LAUNCH_CONTAINER=quay.io/seqeralabs/nf-launcher:j17-23.04.3 +``` + +**Use an AWS Batch job definition as a Seqera custom launch container** + +Seqera Platform automatically registers an AWS Batch [job definition](https://docs.aws.amazon.com/batch/latest/userguide/job_definitions.html) to launch pipelines with the required Nextflow runtime. + +If you need to manage this manually, create a job definition in your AWS Batch environment with the following settings: + +- `name`: any of your choice +- `image`: a custom image based on the Seqera [nf-launcher image](https://quay.io/repository/seqeralabs/nf-launcher) +- `vcpus`: at least `1` +- `memory`: at least `1000` +- `command`: `true` + +After the job definition is registered, update your Seqera Enterprise configuration with the following (replace `` with the name of the job definition): + +:::caution +The custom launch container is set at the root level, so all executions in your Seqera instance will use this container. If you set an AWS Batch job definition as your custom launch container, launching workflow executions in other cloud provider compute environments will fail. +::: + +```env +TOWER_LAUNCH_CONTAINER=job-definition:// +``` + +:::note +The repository where your launch container resides must be accessible to the Batch cluster's [ECS Agent](https://aws.amazon.com/blogs/compute/how-to-authenticate-private-container-registries-using-aws-batch/). +::: diff --git a/platform-enterprise/enterprise/advanced-topics/db-docker-to-RDS.md b/platform-enterprise/enterprise/advanced-topics/db-docker-to-RDS.md new file mode 100644 index 000000000..9e3e89ae8 --- /dev/null +++ b/platform-enterprise/enterprise/advanced-topics/db-docker-to-RDS.md @@ -0,0 +1,134 @@ +--- +title: "Migrate Docker DB to Amazon RDS" +description: Migrate your Docker database to Amazon RDS +date: "12 Apr 2023" +tags: [rds, docker, database] +--- + +While [Docker Compose](../docker-compose) is a fast and convenient way to deploy your Seqera instance, production deployments require a robust database solution to minimize the risk of data loss. + +## Points to consider before database migration + +- **Target database:** You have options when selecting your new MySQL-compliant database. While the process is mostly the same, some of the commands will differ (example: [MariaDB on RDS](../configuration/overview#seqera-and-redis-databases)). + +- **Database size:** The data in your database must be exported from the MySQL container and imported to the new instance. Depending on the amount of data in your database and the remaining EC2 EBS capacity, you can save your data directly to the instance, or use a service with more capacity (such as AWS S3). + +- **Testing:** What level of testing is required to confirm data has been properly migrated? + +- **Maintenance window:** It is easier to initiate a migration once all transactions to the database cease than to do so while jobs are still running. Perform the migration at a time when an outage may occur, and notify your users in advance. + +- **MySQL container volume retention:** Seqera recommends retaining your original volume until you are 100% satisfied that the migration occurred without error. So long as the volume is kept, you can fall back to the MySQL container to ensure retention of the material generated thus far. + +## Prerequisites + +Before starting your migration: + +1. Create an RDS MySQL-compliant instance and populate it with a [Seqera user and database](../configuration/overview#seqera-and-redis-databases). + +2. Ensure both your database and EC2 instances' Security Group(s) have been configured to allow MySQL traffic (default: Port 3306). + +## Migration steps + +:::note +These migration instructions assume: + +- You have sufficient EC2 instance space to store your data. +- You have implemented a full maintenance outage. + +::: + +1. Connect to the EC2 instance with SSH and navigate to your Seqera instance's `docker compose` folder. + +2. Shut down the application: + + ```bash + docker compose down + ``` + +3. Mount a new folder into the MySQL container for migration activities: + + 1. Create a new folder to hold migration artefacts: + + ```bash + mkdir ~/tower_migration + ``` + + 2. Back up the database: + + ```bash + sudo tar -zcvf ~/tower_migration/tower_backup.tar.gz ~/.tower/db/mysql + ``` + +4. Add a new volume entry for the db container to the Seqera `docker-compose.yml`: + + ``` + $HOME/tower_migration:/tower_migration + ``` + +5. Restart the application to confirm your changes were successful: + + ```bash + docker compose up + ``` + +6. Stop all the non-MySQL containers: + + ```bash + docker container stop + ``` + +7. Exec onto the MySQL container: + + ```bash + docker exec -it /bin/bash + ``` + + 1. Dump your data to disk: + + ```bash + mysqldump -u tower -p --databases tower --no-tablespaces --set-gtid-purged=OFF > /tower_migration/tower_dumpfile.sql + ``` + + 2. Exit the container. + +8. Stop the MySQL container: + + ```bash + docker container stop + ``` + +9. Import the dump file into your new RDS instance: + + ```bash + mysql --host --port 3306 -u tower -p tower < ~/tower_migration/tower_dumpfile.sql + ``` + + :::note + If you encounter an `Access denied; you need (at least one of) the SUPER or SET_USER_ID privilege(s) for this operation` error, do the following: + + 1. Create a backup of your MySQL dump file (`tower_dumpfile.sql`). + 2. Run the following command on the dump file: + + ```bash + sed 's/\sDEFINER=`[^`]*`@`[^`]*`//g' -i tower_dumpfile.sql + ``` + + ::: + +10. Log in to the RDS instance and ensure that the Seqera database is populated with tables prefixed by `tw_`. + +11. Modify the `tower.env` config file in the Seqera Docker folder: + + 1. Comment out the existing `TOWER_DB-*` variables. + 2. Add new entries [relevant to your database](../configuration/overview#seqera-and-redis-databases). + 3. Save and exit. + +12. Restart the application: + + ```bash + docker compose up + ``` + +13. Confirm that Seqera Enterprise starts and that your data is available when you log in. + +The migration is complete and testing can begin. diff --git a/platform-enterprise/enterprise/advanced-topics/firewall-configuration.md b/platform-enterprise/enterprise/advanced-topics/firewall-configuration.md new file mode 100644 index 000000000..72ede1473 --- /dev/null +++ b/platform-enterprise/enterprise/advanced-topics/firewall-configuration.md @@ -0,0 +1,70 @@ +--- +title: "Firewall configuration" +description: Configure your firewall to allow Seqera Cloud access +date: "12 Apr 2023" +tags: [firewall, configuration] +--- + +Seqera Platform Cloud ([cloud.seqera.io](https://cloud.seqera.io)) may need to connect to resources within your network, e.g., your storage server. To do so, your firewall must be configured to allow certain IPs to reach your resources. + +A dynamic list of IPs is kept up-to-date at https://meta.seqera.io. + +This endpoint returns a JSON object that can be parsed to dynamically adapt your firewall, e.g., in Python with the `requests` package: + +```python +$ python3 +>>> import requests +>>> requests.get("https://meta.seqera.io").json() +{ + "cloud.seqera.io": [ + "18.135.7.45/32", + "18.169.21.18/32", + "18.171.4.252/32" + ], + "licenses.seqera.io": [ + "35.176.121.51/32", + "35.178.254.247/32" + ] +} +``` + +### DNS allowlist + +In order for you to access resources such as Fusion tarballs, `nf-xpack` files, Wave cloud containers and other services provided by Seqera, you'll need to add `*.seqera.io.cdn.cloudflare.net` to the allowlist in your network firewall. If DNS wildcards aren't supported by your firewall, you can use the following: + +- `cloud.seqera.io` +- `api.cloud.seqera.io` +- `user-data.cloud.seqera.io` +- `tower.nf` +- `api.tower.nf` +- `connect.cloud.seqera.io` and its subdomains `*.connect.cloud.seqera.io` +- `hub.seqera.io` +- `intern.seqera.io` +- `wave.seqera.io` +- `community.wave.seqera.io` +- `cerbero.seqera.io` +- `public.cr.seqera.io` +- `auth.cr.seqera.io` +- `cr.seqera.io` +- `licenses.seqera.io` +- `api.multiqc.info` +- `fusionfs.seqera.io` +- `nf-xpack.seqera.io` +- `community-cr-prod.seqera.io` +- `fusionfs.seqera.io` +- `nf-xpack.seqera.io` +- `public-cr-prod.seqera.io` +- `wave-cache-prod-cloudflare.seqera.io` +- `fusionfs.seqera.io.cdn.cloudflare.net` +- `nf-xpack.seqera.io.cdn.cloudflare.net` +- `community-cr-prod.seqera.io.cdn.cloudflare.net` +- `fusionfs.seqera.io.cdn.cloudflare.net` +- `nf-xpack.seqera.io.cdn.cloudflare.net` +- `public-cr-prod.seqera.io.cdn.cloudflare.net` +- `wave-cache-prod-cloudflare.seqera.io.cdn.cloudflare.net` + +If you chose to filter by specific DNS records, please note that new services may be added in the future. + +:::note +If your allowlist is based on IP addresses, allow all of the following IP addresses: https://www.cloudflare.com/ips/. +::: diff --git a/platform-enterprise/enterprise/advanced-topics/manual-aws-batch-setup.mdx b/platform-enterprise/enterprise/advanced-topics/manual-aws-batch-setup.mdx new file mode 100644 index 000000000..8085db50b --- /dev/null +++ b/platform-enterprise/enterprise/advanced-topics/manual-aws-batch-setup.mdx @@ -0,0 +1,359 @@ +--- +title: "Manual AWS Batch configuration" +description: "Manual AWS Batch configuration" +date: "12 Apr 2023" +tags: [aws, batch, configuration] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +This page describes how to set up AWS roles and Batch queues manually for the deployment of Nextflow workloads with Seqera Platform. + +:::tip +Manual AWS Batch configuration is only necessary if you don't use Batch Forge. + +Batch Forge _automatically creates_ the AWS Batch queues required for your workflow executions. +::: + +Complete the following procedures to configure AWS Batch manually: + +1. Create a user policy. +2. Create the instance role policy. +3. Create the AWS Batch service role. +4. Create an EC2 Instance role. +5. Create an EC2 SpotFleet role. +6. Create a launch template. +7. Create the AWS Batch compute environments. +8. Create the AWS Batch queue. + +### Create a user policy + +Create the policy for the user launching Nextflow jobs: + +1. In the [IAM Console](https://console.aws.amazon.com/iam/home), select **Create policy** from the Policies page. +1. Create a new policy with the following content: + + ```json + { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "Stmt1530313170000", + "Effect": "Allow", + "Action": [ + "batch:CancelJob", + "batch:RegisterJobDefinition", + "batch:DescribeComputeEnvironments", + "batch:DescribeJobDefinitions", + "batch:DescribeJobQueues", + "batch:DescribeJobs", + "batch:ListJobs", + "batch:SubmitJob", + "batch:TerminateJob" + ], + "Resource": ["*"] + } + ] + } + ``` + +1. Save with it the name `seqera-user`. + +### Create the instance role policy + +Create the policy with a role that allows Seqera to submit Batch jobs on your EC2 instances: + +1. In the [IAM Console](https://console.aws.amazon.com/iam/home), select **Create policy** from the Policies page. +1. Create a new policy with the following content: + + ```json + { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "VisualEditor0", + "Effect": "Allow", + "Action": [ + "batch:DescribeJobQueues", + "batch:CancelJob", + "batch:SubmitJob", + "batch:ListJobs", + "batch:DescribeComputeEnvironments", + "batch:TerminateJob", + "batch:DescribeJobs", + "batch:RegisterJobDefinition", + "batch:DescribeJobDefinitions", + "batch:TagResource", + "ecs:DescribeTasks", + "ec2:DescribeInstances", + "ec2:DescribeInstanceTypes", + "ec2:DescribeInstanceAttribute", + "ecs:DescribeContainerInstances", + "ec2:DescribeInstanceStatus", + "logs:Describe*", + "logs:Get*", + "logs:List*", + "logs:Create*", + "logs:Put*", + "logs:StartQuery", + "logs:StopQuery", + "logs:TestMetricFilter", + "logs:FilterLogEvents" + ], + "Resource": "*" + } + ] + } + ``` + +1. Save it with the name `seqera-batchjob`. + +### Create the Batch Service role + +Create a service role used by AWS Batch to launch EC2 instances on your behalf: + +1. In the [IAM Console](https://console.aws.amazon.com/iam/home), select **Create role** from the Roles page. +1. Select **AWS service** as the trusted entity type, and **Batch** as the service. +1. On the next page, the `AWSBatchServiceRole` is already attached. No further permissions are needed for this role. +1. Enter `seqera-servicerole` as the role name and add an optional description and tags if needed, then select **Create**. + +### Create an EC2 instance role + +Create a role that controls which AWS resources the EC2 instances launched by AWS Batch can access: + +1. In the [IAM Console](https://console.aws.amazon.com/iam/home), select **Create role** from the Roles page. +1. Select AWS service as the trusted entity type, EC2 as the service, and _EC2 - Allows EC2 instances to call AWS services on your behalf_ as the use case. +1. Select **Next: Permissions**. Search for the following policies to attach to the role: + - `AmazonEC2ContainerServiceforEC2Role` + - `AmazonS3FullAccess` (you may want to use a custom policy to allow access only on specific S3 buckets) + - `seqera-batchjob` (the instance role policy created above) +1. Enter `seqera-instancerole` as the role name and add an optional description and tags if needed, then select **Create**. + +### Create an EC2 SpotFleet role + +The EC2 SpotFleet role allows you to use Spot instances when you run jobs in AWS Batch. Create a role for the creation and launch of Spot fleets — Spot instances with similar compute capabilities (i.e., vCPUs and RAM): + +1. In the [IAM Console](https://console.aws.amazon.com/iam/home), select **Create role** from the Roles page. +1. Select AWS service as the trusted entity type, EC2 as the service, and _EC2 - Spot Fleet Tagging_ as the use case. +1. On the next page, the `AmazonEC2SpotFleetTaggingRole` is already attached. No further permissions are needed for this role. +1. Enter `seqera-fleetrole` as the role name and add an optional description and tags if needed, then select **Create**. + +### Create a launch template + +Create a launch template to configure the EC2 instances deployed by Batch jobs: + + + + + 1. In the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home), select **Create launch template** from the Launch templates page. + 1. Scroll down to **Advanced details** and paste the following in the **User data** field: + + ```bash + MIME-Version: 1.0 + Content-Type: multipart/mixed; boundary="//" + + --// + Content-Type: text/cloud-config; charset="us-ascii" + + #cloud-config + write_files: + - path: /root/custom-ce.sh + permissions: 0744 + owner: root + content: | + #!/usr/bin/env bash + yum install -q -y jq sed wget unzip nvme-cli lvm2 + wget -q https://amazoncloudwatch-agent.s3.amazonaws.com/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm + rpm -U ./amazon-cloudwatch-agent.rpm + rm -f ./amazon-cloudwatch-agent.rpm + curl -s https://nf-xpack.seqera.io/amazon-cloudwatch-agent/custom-v0.1.json \ + # | sed 's/custom-id//g' \ + > /opt/aws/amazon-cloudwatch-agent/bin/config.json + /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl \ + -a fetch-config \ + -m ec2 \ + -s \ + -c file:/opt/aws/amazon-cloudwatch-agent/bin/config.json + mkdir -p /scratch/fusion + NVME_DISKS=($(nvme list | grep 'Amazon EC2 NVMe Instance Storage' | awk '{ print $1 }')) + NUM_DISKS=${#NVME_DISKS[@]} + if (( NUM_DISKS > 0 )); then + if (( NUM_DISKS == 1 )); then + mkfs -t xfs ${NVME_DISKS[0]} + mount ${NVME_DISKS[0]} /scratch/fusion + else + pvcreate ${NVME_DISKS[@]} + vgcreate scratch_fusion ${NVME_DISKS[@]} + lvcreate -l 100%FREE -n volume scratch_fusion + mkfs -t xfs /dev/mapper/scratch_fusion-volume + mount /dev/mapper/scratch_fusion-volume /scratch/fusion + fi + fi + chmod a+w /scratch/fusion + mkdir -p /etc/ecs + echo ECS_IMAGE_PULL_BEHAVIOR=once >> /etc/ecs/ecs.config + echo ECS_ENABLE_AWSLOGS_EXECUTIONROLE_OVERRIDE=true >> /etc/ecs/ecs.config + systemctl stop docker + ## install AWS CLI + mkdir -p /home/ec2-user + curl -s https://nf-xpack.seqera.io/miniconda-awscli/miniconda-awscli.tar.gz \ + | tar xz -C /home/ec2-user + export PATH=$PATH:/home/ec2-user/miniconda/bin + ln -s /home/ec2-user/miniconda/bin/aws /usr/bin/aws + systemctl start docker + systemctl enable --now --no-block ecs + echo "1258291200" > /proc/sys/vm/dirty_bytes + echo "629145600" > /proc/sys/vm/dirty_background_bytes + + runcmd: + - bash /root/custom-ce.sh + + --//-- + ``` + + 1. To prepend a custom identifier to the CloudWatch log streams for AWS resources created by your manual compute environment, uncomment the `| sed 's/custom-id//g' \` line and replace `` with your custom ID. If ommitted, this defaults to `custom-id`. + 1. Save the template with the name `seqera-launchtemplate`. + + + + + 1. In the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home), select **Create launch template** from the Launch templates page. + 1. Scroll down to **Advanced details** and paste the following in the **User data** field: + + ```bash + MIME-Version: 1.0 + Content-Type: multipart/mixed; boundary="//" + + --// + Content-Type: text/cloud-config; charset="us-ascii" + + #cloud-config + write_files: + - path: /root/custom-ce.sh + permissions: 0744 + owner: root + content: | + #!/usr/bin/env bash + yum install -q -y jq sed wget unzip nvme-cli lvm2 + wget -q https://amazoncloudwatch-agent.s3.amazonaws.com/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm + rpm -U ./amazon-cloudwatch-agent.rpm + rm -f ./amazon-cloudwatch-agent.rpm + curl -s https://nf-xpack.seqera.io/amazon-cloudwatch-agent/custom-v0.1.json \ + # | sed 's/custom-id//g' \ + > /opt/aws/amazon-cloudwatch-agent/bin/config.json + /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl \ + -a fetch-config \ + -m ec2 \ + -s \ + -c file:/opt/aws/amazon-cloudwatch-agent/bin/config.json + mkdir -p /etc/ecs + echo ECS_IMAGE_PULL_BEHAVIOR=once >> /etc/ecs/ecs.config + echo ECS_ENABLE_AWSLOGS_EXECUTIONROLE_OVERRIDE=true >> /etc/ecs/ecs.config + systemctl stop docker + ## install AWS CLI v2 + mkdir -p /home/ec2-user + curl -s https://nf-xpack.seqera.io/miniconda-awscli/miniconda-awscli.tar.gz \ + | tar xz -C /home/ec2-user + export PATH=$PATH:/home/ec2-user/miniconda/bin + ln -s /home/ec2-user/miniconda/bin/aws /usr/bin/aws + systemctl start docker + systemctl enable --now --no-block ecs + echo "1258291200" > /proc/sys/vm/dirty_bytes + echo "629145600" > /proc/sys/vm/dirty_background_bytes + + runcmd: + - bash /root/custom-ce.sh + + --//-- + ``` + + 1. To prepend a custom identifier to the CloudWatch log streams for AWS resources created by your manual compute environment, uncomment the `| sed 's/custom-id//g' \` line and replace `` with your custom ID. If ommitted, this defaults to `custom-id`. + 1. Save the template with the name `seqera-launchtemplate`. + + + + +### Create the Batch compute environments + +:::caution +AWS Graviton instances (ARM64 CPU architecture) are not supported in manual compute environments. To use Graviton instances, create your AWS Batch compute environment with [Batch Forge](../../compute-envs/aws-batch#batch-forge-compute-environment). +::: + +Nextflow makes use of two job queues during workflow execution: + +- A head queue to run the Nextflow application +- A compute queue where Nextflow will submit job executions + +While the compute queue can use a compute environment with Spot instances, the head queue requires an on-demand compute environment. If you intend to use an on-demand compute environment for compute jobs, the same job queue can be used for both head and compute. + +:::note +Spot instances can significantly reduce your AWS compute costs, provided your workflow compute tasks can run on ephemeral instances. +::: + +Create a compute environment for each queue in the AWS Batch console: + + + + +The head queue requires an on-demand compute environment. Do not select **Use Spot instances** during compute environment creation. + +1. In the [Batch Console](https://eu-west-1.console.aws.amazon.com/batch/home), select **Create** on the Compute environments page. +1. Select **Amazon EC2** as the compute environment configuration. + :::note + Seqera AWS Batch compute environments created with [Batch Forge](../../compute-envs/aws-batch#batch-forge-compute-environment) support using Fargate for the head job, but manual compute environments must use EC2. + ::: +1. Enter a name of your choice, and apply the `seqera-servicerole` and `seqera-instancerole`. +1. Enter vCPU limits and instance types, if needed. + :::note + To use the same queue for both head and compute tasks, you must assign sufficient resources to your compute environment. + ::: +1. Expand **Additional configuration** and select the `seqera-launchtemplate` from the Launch template dropdown. +1. Configure VPCs, subnets, and security groups on the next page as needed. +1. Review your configuration and select **Create compute environment**. + + + + +Create this compute environment to use Spot instances for your workflow compute tasks. This compute environment cannot be assigned to the Nextflow head job queue. + +1. In the [Batch Console](https://eu-west-1.console.aws.amazon.com/batch/home), select **Create** on the Compute environments page. +1. Select **Amazon EC2** as the compute environment configuration. +1. Enter a name of your choice, and apply the `seqera-servicerole` and `seqera-instancerole`. +1. Select **Enable using Spot instances** to use Spot instances and save computing costs. +1. Select the `seqera-fleetrole` and enter vCPU limits and instance types, if needed. +1. Expand **Additional configuration** and select the `seqera-launchtemplate` from the Launch template dropdown. +1. Configure VPCs, subnets, and security groups on the next page as needed. +1. Review your configuration and select **Create compute environment**. + + + + +### Create the Batch queue + +Create a Batch queue to be associated with each compute environment. + +:::note +You only need to create one queue if you intend to use on-demand instances for your workflow compute tasks. Compute environments with Spot instances require separate queues for the head and compute tasks. +::: + + + + +1. Go to the [Batch Console](https://eu-west-1.console.aws.amazon.com/batch/home). +2. Create a new queue. +3. Associate the queue with the head queue compute environment created in the previous section. +4. Save it with a name of your choice. + + + + +1. Go to the [Batch Console](https://eu-west-1.console.aws.amazon.com/batch/home). +2. Create a new queue. +3. Associate the queue with the compute queue environment created in the previous section. +4. Save it with a name of your choice. + + + + +Use the AWS resources created on this page to create your [manual AWS Batch compute environment](../../compute-envs/aws-batch#manual). diff --git a/platform-enterprise/enterprise/advanced-topics/manual-azure-batch-setup.md b/platform-enterprise/enterprise/advanced-topics/manual-azure-batch-setup.md new file mode 100644 index 000000000..ca7467ea0 --- /dev/null +++ b/platform-enterprise/enterprise/advanced-topics/manual-azure-batch-setup.md @@ -0,0 +1,486 @@ +--- +title: Azure Batch walkthrough +headline: "Azure Batch walkthrough" +description: "A tutorial for using advanced features of Azure Batch with Seqera Platform" +--- + +This guide details how to set up more complex Azure Batch compute environments with Seqera Platform. It begins with the simplest possible setup before adding complexity, therefore it is designed to be performed stepwise. + +The first step indicates how to configure a simple Azure Batch compute environment on Azure and Seqera Platform, however beyond that is not required for most users and is only recommended for those who need to customize their compute environments. + +## Prerequisites + +- An Azure account with sufficient permissions to create resources. +- [Azure CLI][install-azure-cli] +- [Seqera Platform CLI][install-seqera-cli] + +### Set up Azure Batch + +In the Azure Portal: + +1. Create an Azure Storage account with the default settings. +1. In the Azure Storage account, add a single blob container called `work`. This is the [Nextflow working directory][nextflow-working-directory]. +1. Create a new Azure Batch account. Use Batch Managed for now, with the default settings. Use the same region as your Storage account and attach the Storage account to the Batch account when prompted. +1. On the Azure Batch page, select **Quotas**. +1. Select **Request Quota Increase**. +1. For **Quota Type**, select **Batch**, then select **Next**. +1. Select **Enter Details**, then choose the **Location** as the region of your Batch account. +1. Select **EDv5 Series**. +1. Select **Spot/low-priority vCPUS (all Series)**. +1. Select **Active jobs and job schedules per Batch account**. +1. Select **Pools per Batch account**. + +Increase each value to a minimum of the following: + +- **EDv5 Series**: 192 +- **Active jobs and job schedules per Batch account**: 100 +- **Pools per Batch account**: 50 +- **Spot/low-priority vCPUS (all Series)**: 192 + +### Set up Seqera Cloud + +In Seqera Cloud: + +- Create a new account. +- [Create a new organization and workspace][create-org-workspace]. +- Add a GitHub credential the workspace to prevent API rate-limiting issues with GitHub. + +## Compute environment and pipeline configuration + +### Option 1. Azure Batch with Seqera Batch Forge + +**Behavior**: + +- Seqera Platform will submit a Nextflow job and task to this pool. +- The Nextflow job will execute and submit each task to the same node pool on Azure Batch. +- The node pool will autoscale up and down based on the number of waiting tasks. + +**Advantages**: + +- Simple to set up. +- Low cost. +- Autoscales for number of waiting tasks. + +**Disadvantages**: + +- The Nextflow job will submit each task to the same node pool on Azure Batch, which can cause bottlenecks. +- Because the processes require larger resources than the head node, you often have oversized machines running Nextflow or undersized machines running processes. +- Dedicated nodes only. + +The first configuration is a simple Azure Batch compute environment created with Batch Forge. This environment uses the same Batch pool for both the Nextflow head job and task nodes. + +First, add the Azure Batch account credentials to Seqera Platform: + +1. In the Azure portal, go to the Batch account you created and note the Batch account name and region. +1. Go to the **Keys** tab to find the primary access keys for the Batch account and Storage account. +1. In your Seqera Platform workspace, go to the **Credentials** tab and select **Add credentials**. +1. Enter a credential name such as `azure-keys` and select Azure from the **Provider** dropdown. +1. Enter the Batch account name and key, and Storage account name and key. +1. Select **Create** to save the credentials. + +Seqera now has the credentials needed to access your Azure Batch and Storage accounts and make the necessary changes. + +Next, create a compute environment with Batch Forge: + +1. Go to the **Compute Environments** tab and select **Add Compute Environment**. +1. Enter a name such as `1-azure-batch-forge`. +1. Select Azure Batch from the **Provider** dropdown. +1. Sellect your `azure-keys` credentials. +1. Select the **Region** of your Batch account. +1. Select the `az://work` container in your Storage account. +1. For **VMs type**, select `standard_e2ds_v5`. +1. For **VMs count**, select 4. +1. Enable **Autoscale** and **Dispose resources**. +1. All other options can be left default. Select **Create** to save the compute environment. + +Add the `nextflow-hello` pipeline to your workspace: + +[Add a pipeline][add-pipeline] from your workspace Launchpad with the following settings: + +- Select your Azure Batch compute environment from the dropdown. +- For **Pipeline to launch**, enter `https://github.com/nextflow-io/hello`. +- For **Work directory**, enter a subdirectory in the `az://work` container in your Storage account. + +Select **Launch** next to the pipeline name in your workspace Launchpad to complete the launch form and launch the workflow. + +### Option 2. Use a separate node and head pool on Seqera Platform + +**Behavior**: + +- Seqera Platform will submit a Nextflow job and task to the first pool, which uses dedicated VMs. +- The Nextflow job will execute and submit each task to the second pool, which uses low-priority VMs. +- Both pools will autoscale up and down based on the number of waiting tasks. + +**Advantages**: + +- The processes are not bottlenecked by the head node. +- You can set the worker nodes to use a different VM size than the head node. +- Cheaper nodes for work than for running Nextflow. + +**Disadvantages**: + +- More complex to set up. +- Still fairly inflexible. +- You have to wait a long time for nodes to autoscale up and down in response to the work. + +This configuration separates head and task nodes into different Batch pools. + +To create a separate node pool to run all the processes: + +1. Create another compute environment in Seqera Platform, exactly as before: + - **Name**: `2-azure-batch-low-priority` or similar + - **Platform**: Azure Batch + - **Credentials**: `azure-keys` + - **Region**: As before + - **Pipeline work directory**: As before + - **VMs type**: `standard_e2ds_v5` + - **VMs count**: `4` +1. Note the compute environment ID, which is the first item on the compute environment page. +1. In the Azure Portal, go to the Batch account you created earlier. +1. Go to the **Pools** tab and find the pool called `tower-pool-${id}`, where `${id}` is the ID you made a note of earlier. +1. Select **Scale**. +1. An Autoscale formula is displayed. On the second-to-last line, there will be a line that starts with `$TargetDedicatedNodes`. Change this string to `$TargetLowPriorityNodes`. +1. Select **Evaluate**, then **Save**. + +You have created a new node pool that uses low-priority VMs, which are cheaper than dedicated VMs. You can now run Nextflow on the first pool, but execute all the processes on the second pool. + +1. On the pipeline launch page, duplicate the existing pipeline, but do not save it yet. +1. Under advanced options, add the following configuration block to the `nextflow.config` text input: + + ```nextflow + process.queue = 'tower-pool-${id}' + ``` + + :::info + Remember to replace `${id}` with the ID of the compute environment you created earlier! + ::: + +1. Save the pipeline as `hello-world-low-priority`. + +Select **Launch** next to the `hello-world-low-priority` pipeline in your workspace Launchpad to complete the launch form and launch the workflow. + +### Option 3. Configure the head pool with a hot node + +**Behavior**: + +- A "hot" head node is left running. +- The head node will run Nextflow as soon as the work is created. +- The worker node pool will autoscale up and down based on the number of waiting tasks. + +**Advantages**: + +- The latency of the pipeline is reduced. + +**Disadvantages**: + +- The always-on head node incurs additional cost. + +This configuration separates the head and task pools as before and leaves a single head node up and running to make the response time faster. + +To create the compute environment with a persistent head node: + +1. Get the ID of the first node pool (`1-azure-batch-forge`). +1. In the Azure Portal, go to the Batch account you created earlier. +1. Go to the **Pools** tab and find the pool called `tower-pool-${id}`, where `${id}` is the ID you made a note of earlier. +1. Select **Scale**. +1. In the line `targetPoolSize = max(0, min($targetVMs, 4));`, change the `0` to `1`. +1. Select **Evaluate**, then **Save**. + +The node pool will increase to a minimum of 1 node. Now, when you make adjustments to the pipeline, the head node will not be scaled down. + +Select **Launch** next to the `hello-world-low-priority` pipeline in your workspace Launchpad to complete the launch form and launch the workflow. With this run, it should respond much faster. The _latency_ of the pipeline has improved, although the overall run time will be similar. This effect is more substantial on larger production pipelines. + +:::tip +If you do not wish to continue paying for the head node, scale the node pool back down by replacing the original autoscale formula (`targetPoolSize = max(0, min($targetVMs, 4))`). +You can also delete the compute environment in Platform, which will delete the head node. +::: + +### Option 4. Use the Nextflow autopool feature + +**Behavior**: + +- Seqera will submit a Nextflow job and task to the first pool, which uses dedicated VMs. +- The Nextflow job will create pools in the Azure Batch account based on the pipeline's requirements. +- The pools are called `nf-pool-${id}`, where `${id}` is a unique identifier for the pool. +- The pools are created with the VM size specified in the Nextflow config. +- The pools are created with the autoscale settings specified in the Nextflow config. + +:::info +Nextflow will create a range of pools based on resource sizes and try to reuse them for similar tasks. This means that if you run a process with different CPU, memory, or machineType, it will create a new pool for that process. +::: + +**Advantages**: + +- Nextflow handles the creation and management of pools. +- You can create flexible pools with the correct VM size and autoscale settings. +- The pools are highly configurable via Nextflow configuration. + +**Disadvantages**: + +- You may be overly specific and end up with a lot of pools, which can exhaust your quota for the maximum number of pools. +- This configuration does not use low-priority nodes. + +With the autopool feature, Nextflow automatically creates and manages Azure Batch pools based on your pipeline's requirements. + +To configure your pipeline to use Nextflow autopool: + +1. Duplicate the `hello-world-low-priority` pipeline to a new pipeline called `hello-world-autopool`. +1. Update your Nextflow config to use autopool mode: + +```groovy +process.queue = "auto" +process.machineType = "Standard_E*d_v5" +azure { + batch { + autoPoolMode = true + allowPoolCreation = true + pools { + auto { + autoscale = true + vmCount = 1 + maxVmCount = 4 + } + } + } +} +``` + +3. Save the pipeline. + +Select **Launch** next to the `hello-world-autopool` pipeline in your workspace Launchpad to complete the launch form and launch the workflow. + +### Option 5. Use Nextflow autopool feature with low-priority nodes + +**Behavior**: + +- Seqera submits a Nextflow job and task to the first pool, which uses dedicated VMs. +- The Nextflow job creates pools in the Azure Batch account based on the pipeline's requirements. +- The pools are named `nf-pool-${id}`, where `${id}` is a unique identifier for the pool. +- The pools are created with the VM size specified in the Nextflow config. +- The pools are created with the autoscale settings specified in the Nextflow config. +- These pools use low-priority nodes. It achieves this by modifying the autoscale formula. + +**Advantages**: + +- Nextflow handles the creation and management of pools. +- You can create flexible pools with the correct VM size and autoscale settings. +- This configuration uses low-priority nodes. + +**Disadvantages**: + +- Spot and low-priority nodes can be preempted, which can cause the pipeline to fail. + +To configure your pipeline to use Nextflow autopool with low-priority nodes: + +1. Duplicate the `hello-world-autopool` pipeline to a new pipeline called `hello-world-autopool-low-priority`. +1. Update your Nextflow config to use low-priority nodes: + +```groovy +process.queue = "auto" +process.machineType = "Standard_E*d_v5" +azure { + batch { + autoPoolMode = true + allowPoolCreation = true + pools { + auto { + autoscale = true + vmCount = 1 + maxVmCount = 4 + } + } + } +} +``` + +3. Save the pipeline. + +Select **Launch** next to the `hello-world-autopool-low-priority` pipeline in your workspace Launchpad to complete the launch form and launch the workflow. + +### Option 6. Use Entra authentication + +**Behavior**: + +- Seqera authenticates to Azure Batch and Azure Storage using a service principal. +- It submits a job and task to the Azure Batch service using the service principal. +- The task runs Nextflow, which authenticates to Azure Batch and Azure Storage using the managed identity. +- All processes run on the head node as in the first example. + +**Advantages**: + +- No keys or short access tokens are exchanged, increasing security. +- A service prinicipal can have very granular permissions, so you can grant it only the permissions it needs. +- Managed identities can be scoped to a specific resource, so the Nextflow head job has very restricted permissions. +- Different managed IDs can have different permissions, so different compute environments can have different scoped permissions. + +**Disadvantages**: + +- The setup is quite complicated with room for error. +- Errors can be harder to troubleshoot. + +Seqera can utilize an Azure Entra service principal to authenticate and access Azure Batch for job execution and Azure Storage for data management, and Nextflow can authenticate to Azure services using a managed identity. This method offers enhanced security compared to access keys, but must run on Azure infrastructure. + +See [Microsoft Entra](https://www.nextflow.io/docs/latest/azure.html#microsoft-entra) in the Nextflow documentation for more information. + +#### Create a service principal for Seqera to use for authentication + +1. [Create an Azure service principal](https://learn.microsoft.com/en-us/entra/identity-platform/howto-create-service-principal-portal). +1. [Assign roles to the service principal](https://learn.microsoft.com/en-us/azure/role-based-access-control/role-assignments-portal?tabs=current). +1. [Get the Service Principal ID, Tenant ID, and Client Secret](https://learn.microsoft.com/en-us/entra/identity-platform/howto-create-service-principal-portal#option-3-create-a-new-client-secret). +1. [Add to Seqera credentials](https://docs.seqera.io/platform/24.2/compute-envs/azure-batch#entra-service-principal). + +In Seqera: + +1. Add new credentials with the name `entra-keys` and select the Azure **Provider**. +1. Add the Service Principal ID, Tenant ID and Client Secret. +1. Select **Create** to save the credentials. + +#### Create a managed identity for Nextflow to use for authentication + +Back in the Azure Portal: + +1. [Create a managed identity](https://learn.microsoft.com/en-us/entra/identity/managed-identities-azure-resources/how-manage-user-assigned-managed-identities?pivots=identity-mi-methods-azp) +1. [Assign the relevant roles to the managed identity](https://learn.microsoft.com/en-us/azure/role-based-access-control/role-assignments-portal?tabs=current). See [Required role assignments](https://www.nextflow.io/docs/latest/azure.html#required-role-assignments) for Nextflow requirements. +1. Note the managed identity client ID for later. +1. In the Azure Portal, go to the Batch account you created earlier. +1. Go to the **Pools** tab and find the pool called `tower-pool-${id}`, where `${id}` is the ID of the head node pool created earlier. +1. Select **Identity**. +1. Select **Add User Assigned Identity**. +1. Select the managed identity created earlier. +1. Select **Add**. + +Processes running on this pool can now use the managed identity to authenticate to Azure Batch and Storage. + +In Seqera: + +1. Add a new compute environment with the name `entra-mi` and select the Azure Batch **Provider** type. +1. For **Location**, select the same region as your Batch account. +1. For **Config mode**, select Manual. +1. For **Compute pool**, select the pool you added the managed identity to earlier (`tower-pool-${id}`). +1. For **Managed Identity Client ID**, enter the client ID of the managed identity created earlier. + +Duplicate the `hello-world-autopool-low-priority` pipeline and save it as `hello-world-entra-mi`. + +Select **Launch** next to the `hello-world-entra-mi` pipeline in your workspace Launchpad to complete the launch form and launch the workflow. + +The pipeline will run as before, but using the managed identity to authenticate to Azure Batch and Storage. No keys or storage required. + +:::note +You can also use User Subscription mode instead of Batch Managed here, but this is beyond the scope of this tutorial. +::: + +### Option 7. Use a node pool attached to a VNet + +**Behavior**: + +- Each node is attached to the VNet and uses the security and networking rules of that virtual network subnetwork. +- All other behaviour is as normal. + +**Advantages**: + +- Security can be increased by restricting the virtual network subnet. +- Exchange of data can be faster and cheaper than other services. + +**Disadvantages**: + +- It requires fairly complicated setup. +- If security is too restrictive, it can fail silently and be unable to report the error state. + +It is common to attach Azure Batch pools to a virtual network. This is useful to connect to other resources in the same VNet or place things behind enhanced security. Seqera Platform does not support this feature directly, so you must manually create an Azure Batch pool. + +See [Create a Nextflow-compatible Azure Batch pool](../../compute-envs/azure-batch#create-a-nextflow-compatible-azure-batch-pool) to create an Azure Batch pool manually that is compatible with Seqera and Nextflow. Use the following settings: + +- Name & ID: `3-azure-batch-vnet` +- Add the managed identity created earlier as a user-assigned managed identity. +- VMs type: `standard_e2ds_v5` +- Use the autoscale formula described in the documentation, with a minimum size of 0 and a maximum size of 4. +- For Virtual network, create a new virtual network with the default subnet. You can add this to a new resource group here. + +In practice, you are more likely to connect an Azure Batch Node pool to an existing virtual network that is connected to other resources, such as Seqera Platform or the Azure Storage Acccount. In this instance, connecting it to a VNet with public internet access will route the network traffic via the virtual network while still allowing you to perform every action. + +Back in Seqera Platform, add a new Azure Batch compute environment: + +1. Add a new compute environment with the name `3-azure-batch-vnet` and select the Azure Batch **Provider** type. +1. For **Location**, select the same region as your Batch account. +1. For **Credentials**, select the service principal credentials. +1. For **Config mode**, select Manual. +1. For **Compute pool**, select the Compute pool name `3-azure-batch-vnet`. +1. For **Managed Identity Client ID**, enter the client ID of the managed identity created earlier. + +Duplicate the **original** `hellow-world` pipeline and save it as `hello-world-vnet`. + +Select **Launch** next to the `hello-world-vnet` pipeline in your workspace Launchpad to complete the launch form and launch the workflow. + +The pipeline runs as before, but it will run on the node pool attached to the VNet. It will resemble a normal Azure Batch pipeline run. + +Using this technique allows you to run pipelines on Azure Batch with more restrictive networking and security requirements. + +### Option 8. Use a node pool attached to a VNet with worker nodes attached to the same VNet + +**Behavior**: + +- We use a separate head node pool to run Nextflow, along with automatically created Nextflow autoscale pools to run processes. +- Each worker node is attached to the VNet and uses the security and networking rules of that virtual network subnetwork. + +**Advantages**: + +- Security can be increased by restricting the virtual network subnet. +- Exchange of data can be faster and cheaper than other services. +- Additionally, you get the advantages of using worker nodes with autopools. + +**Disadvantages**: + +- The set up is very complicated now and errors are likely to occur. +- Errors can be hard to troubleshoot. + +Finally, you can combine some of the previous approaches. Nextflow can create and modify Azure Batch pools based on the pipeline requirements. You can also attach Azure Batch pools to a VNet. Next, attach the worker nodes to the same VNet. + +To achieve this, the following requirements must be met: + +- The pipeline must be launched on the node pool attached to the VNet. +- The managed identity must be used to authenticate to Azure Batch and Storage. +- The managed identity must have permissions to create resources attached to the VNet. +- Nextflow creates node pools attached to the VNet. + +Do the following: + +1. Duplicate the `hello-world-entra-mi` pipeline, but modfiy the compute environment to `3-azure-batch-vnet` and change the pipeline name to `hello-world-vnet`. +1. Check the virtual network string under the pool details in the Azure Portal, under the **Network Configuration** section. The value should be a Subnet ID, such as `/subscriptions/${subscriptionId}/resourceGroups/${resourceGroup}/providers/Microsoft.Network/virtualNetworks/${vnetName}/subnets/${subnetName}`. Save this value. +1. Change the Nextflow configuration under the **Advanced** tab to include a virtual network with the autopools: + +```nextflow +process.queue = "auto" +process.machineType = "Standard_E*d_v5" +azure { + batch { + autoPoolMode = true + allowPoolCreation = true + pools { + auto { + autoscale = true + vmCount = 1 + maxVmCount = 4 + virtualNetwork = '/subscriptions/${subscriptionId}/resourceGroups/${resourceGroup}/providers/Microsoft.Network/virtualNetworks/${vnetName}/subnets/${subnetName}' + } + } + } +} +``` + +Select **Launch** next to the `hello-world-vnet` pipeline in your workspace Launchpad to complete the launch form and launch the workflow. + +The pipeline runs as before, but using the managed identity to authenticate to Azure Batch and Storage. It also creates worker pools attached to the VNet. + +### Clear up resources + +Once you have completed setup and workflow execution, you can delete the pipelines and compute environments from Seqera. + +In Azure, you can delete the Batch account, which will delete all pools, jobs, and tasks. You can then delete the Storage account. + +If you wish to keep the Azure resources, you can remove each pool within a Batch account and mark any active jobs as terminated to free up any quotas on your Azure Batch account. + +[install-azure-cli]: https://learn.microsoft.com/en-us/cli/azure/install-azure-cli +[install-seqera-cli]: ../../cli/installation +[nextflow-working-directory]: https://www.nextflow.io/docs/latest/cache-and-resume.html#work-directory +[create-org-workspace]: ../../getting-started/workspace-setup +[add-pipeline]: ../../getting-started/quickstart-demo/add-pipelines#add-from-the-launchpad \ No newline at end of file diff --git a/platform-enterprise/enterprise/advanced-topics/seqera-container-images.md b/platform-enterprise/enterprise/advanced-topics/seqera-container-images.md new file mode 100644 index 000000000..af8a462c9 --- /dev/null +++ b/platform-enterprise/enterprise/advanced-topics/seqera-container-images.md @@ -0,0 +1,50 @@ +--- +title: "Legacy Seqera container image registries" +description: Legacy Seqera container deployments +date: "12 Apr 2023" +tags: [on-prem, prerequisites, configuration] +--- + +:::caution +The `cr.seqera.io` container registry is the default Seqera Enterprise container image registry from version 22.4. Using the AWS ECR Seqera container registry in existing installations is still supported but will be deprecated on June 1, 2025. +::: + +Seqera publishes legacy Seqera Enterprise containers to a private Elastic Container Registry (ECR) on AWS. Retrieve them with the following steps: + +1. **Provide Seqera with your AWS Account ID.** + + Supply this value to the Seqera representative managing your onboarding and wait for confirmation that it has been added to the ECR repository policy as an approved Principal. + +2. **Retrieve a local copy of the container.** + + With the `docker compose` deployment method, you must retrieve container copies for local use: + + 1. Install [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) on the target machine. + + 2. Configure the AWS CLI with an IAM User with at least these privileges: + + ```bash + ecr:BatchGetImage + ecr:GetAuthorizationToken + ecr:GetDownloadUrlForLayer + ``` + 3. Authenticate Docker against the Seqera ECR: + + ```bash + # AWS CLI v2 + aws ecr get-login-password --region eu-west-1 | \ + docker login --username AWS --password-stdin 195996028523.dkr.ecr.eu-west-1.amazonaws.com + + # AWS CLI v1 + $(aws ecr get-login --registry-ids 195996028523 --region eu-west-1 --no-include-email) + ``` + + 4. Pull the containers to your machine: + + ```bash + export REPOSITORY_URL="195996028523.dkr.ecr.eu-west-1.amazonaws.com/nf-tower-enterprise" + export TAG="v22.3.1" + + docker pull ${REPOSITORY_URL}/backend:${TAG} + docker pull ${REPOSITORY_URL}/frontend:${TAG} + ``` \ No newline at end of file diff --git a/platform-enterprise/enterprise/advanced-topics/use-iam-role.md b/platform-enterprise/enterprise/advanced-topics/use-iam-role.md new file mode 100644 index 000000000..580957482 --- /dev/null +++ b/platform-enterprise/enterprise/advanced-topics/use-iam-role.md @@ -0,0 +1,91 @@ +--- +title: "Use AWS IAM roles" +description: Use AWS IAM Roles instead of user credentials +date: "12 Apr 2023" +tags: [aws, iam, role] +--- + +AWS-based customers can configure Seqera Platform to authenticate to AWS services like Batch with an IAM Role instead of IAM user credentials. + +:::note +The provided policies were designed with certain assumptions: + +1. **IAM Policy**: Seqera must have full access to identified S3 buckets. +2. **Trust Policy**: The Role should be assumable by EC2 or EKS (depending on your Seqera deployment), and only specifically-named IAM actors. + +You may wish to limit S3 access to specific prefixes, and/or Role assumption to more specific platforms. +::: + +## Configure the Seqera IAM policy + +1. Download the [custom IAM Policy](https://github.com/seqeralabs/nf-tower-aws/blob/master/forge/forge-policy.json). +1. Download the [S3 bucket write policy](https://github.com/seqeralabs/nf-tower-aws/blob/master/launch/s3-bucket-write.json). +1. Modify the S3 bucket write policy by adding `"arn:aws:s3:::YOUR-BUCKET-NAME"` for each bucket that will be used as a pipeline work directory. +1. Revise the scope of access to a specific prefix in the S3 buckets, if needed (modify `"arn:aws:s3:::YOUR-BUCKET-NAME/*"`). + +:::note +AWS credentials in Seqera are used to access S3 cloud buckets in [Data Explorer](../../data/data-explorer). If the scope of access for the Seqera IAM policy excludes buckets or directories you need to access in Data Explorer, create additional Seqera credentials with S3 access specific to your Data Explorer needs. +::: + +## Modify the Seqera IAM role trust policy (optional) + +You can optionally create a Seqera role trust policy to allow EC2 instances or EKS clusters (depending on your Seqera deployment) to assume the Seqera IAM role. + +1. Download the [Seqera IAM role trust policy](https://github.com/seqeralabs/nf-tower-aws/blob/master/launch/seqera-role-trust-policy.json). +1. Replace `YOUR-AWS-ACCOUNT` with your AWS Account ID. +1. Replace `USER-OR-ROLE/USER-OR-ROLE-ID` with the users and or roles that must be able to assume the Seqera IAM role. + +## Create the IAM artifacts + +[Create the IAM artifacts](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-service.html) in your AWS account. + +1. Navigate to the folder containing your configured IAM documents: + + ```bash + cd + ``` + +2. Create the **Role**: + + ```bash + aws iam create-role --role-name Seqera-Role --assume-role-policy-document file://Seqera-Role-Trust-Policy.json + ``` + +3. Create an **inline policy** for the Role: + + ```bash + aws iam put-role-policy --role-name Seqera-Role --policy-name Seqera-Role-Policy --policy-document file://Seqera-Role-Policy.json + ``` + +4. Create an **instance profile**: + + ```bash + aws iam create-instance-profile --instance-profile-name Seqera-Instance + ``` + +5. **Bind** the Role to the instance profile: + + ```bash + aws iam add-role-to-instance-profile --instance-profile-name Seqera-Instance --role-name Seqera-Role + ``` + +## Configure Seqera + +With the IAM artifacts created, update your application configuration: + +1. Add the following entry to your `tower.env` + + ```env + TOWER_ALLOW_INSTANCE_CREDENTIALS=true + ``` + +2. Restart the Seqera application. + +3. Verify that the change took effect by querying the Seqera instance `service-info` endpoint: + + ```bash + curl -X GET "https://YOUR-TOWER-DOMAIN/api/service-info" -H "Accept: application/json" | jq ".serviceInfo.allowInstanceCredentials" + ``` + +4. Log in to Seqera and create a new AWS credential. You are now prompted for an AWS `arn` instead of access keys. + diff --git a/platform-enterprise/enterprise/configuration/authentication.md b/platform-enterprise/enterprise/configuration/authentication.md new file mode 100644 index 000000000..0c8bd6de9 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/authentication.md @@ -0,0 +1,188 @@ +--- +title: "Authentication" +description: Seqera configuration options for authentication +date: "21 Apr 2023" +tags: [authentication, configuration] +--- + +Seqera Platform supports email and various OAuth providers for login authentication. + +:::note +Platform login sessions remain active as long as the application browser window remains open and active. When the browser window is terminated, automatic logout occurs within 6 hours by default. +::: + +## OpenID Connect configuration + +Seqera Enterprise can be configured to integrate with several authentication providers to enable single sign-on (SSO) login. + +Configure OIDC authentication with the following environment variables: + +- `TOWER_OIDC_CLIENT`: The client ID provided by your authentication service. +- `TOWER_OIDC_SECRET`: The client secret provided by your authentication service. +- `TOWER_OIDC_ISSUER`: The authentication service URL to which Seqera connects to authenticate the sign-in request. + +Some providers require the full authentication service URL while others require only the SSO root domain (without the trailing sub-directories). + +In your OpenID provider settings, specify the following URL as a callback address or authorized redirect: + +``` +https:///oauth/callback/oidc +``` + +## Identity providers + +The following identity providers are currently supported: + +- GitHub +- Google +- Keycloak +- Entra +- Okta + +### GitHub identity provider + +To use GitHub as SSO provider for Seqera: + +1. Register your Seqera instance as a [GitHub OAuth App](https://docs.github.com/en/apps/oauth-apps/building-oauth-apps/creating-an-oauth-app) + in your organization settings page. + +2. When creating the OAuth App, specify the following path as callback URL: `https:///oauth/callback/github` (replace `` with the domain name of your deployment). + +3. Include the following variables in the backend environment configuration: + + - `TOWER_GITHUB_CLIENT`: The client ID provided by GitHub when registering the new OAuth App. + - `TOWER_GITHUB_SECRET`: The client secret provided by GitHub when registering the new OAuth App. + +### Google Cloud identity provider + +To use Google as the identity provider for Seqera: + +1. Visit the [Google Cloud console](https://console.developers.google.com) and create a new project. +2. From the sidebar, select the **Credentials** tab. +3. Select **Create credentials > OAuth client ID**. +4. On the next page, select **Web Application** type. +5. Enter the redirect URL: `https:///oauth/callback/google`. Replace `` with the domain name of your deployment. +6. Confirm the operation. You'll then receive a `client ID` and `secret ID`. +7. Include the `client ID` and `secret ID` in the following variables in the Seqera backend environment configuration: + +- `TOWER_GOOGLE_CLIENT`: The client ID provided by Google above. +- `TOWER_GOOGLE_SECRET`: The client secret provided by Google above. + +### Keycloak identity provider + +To use [Keycloak](https://www.keycloak.org/) as the identity provider for Seqera, configure a new client in your Keycloak service with these steps: + +1. In the **Realm settings**, ensure the **Endpoints** field includes _OpenID Endpoint Configuration_. +2. Open the **Client** page and select **Create** to set up a new client for Seqera. +3. In the **Settings** tag, include the following fields: + - **Client Id**: An ID of your choice, e.g., `seqera` + - **Enabled**: `ON` + - **Client Protocol**: `openid-connect` + - **Access Type**: `confidential` + - **Standard Flow Enabled**: `ON` + - **Implicit Flow Enabled**: `OFF` + - **Direct Access Grants Enabled**: `ON` + - **Valid Redirect URIs**: https:///\//oauth/callback/oidc, e.g., `http://localhost:8000/oauth/callback/oidc` +4. Select **Save**. +5. In the **Credentials** tab, note the **Secret** field. +6. In the **Keys** tab, set the field **Use JWKS URL** to `OFF`. +7. Complete the setup in Seqera by adding the following environment variables to your configuration: + + - `TOWER_OIDC_CLIENT`: The client ID assigned in step 3 above. + - `TOWER_OIDC_SECRET`: The contents of the **Secret** field noted in step 5 above. + - `TOWER_OIDC_ISSUER`: The Keycloak issuer URL. Locate this on the **Realm Settings** page, from _OpenID Configuration_ in the **Endpoints** field. From the JSON payload displayed, copy the value associated with `issues`, e.g., `http://localhost:9000/auth/realms/master`. + +### Entra ID OIDC provider + +To use [Entra ID OIDC](https://learn.microsoft.com/en-us/entra/identity-platform/v2-protocols-oidc) as the identity provider for Seqera, configure a new client in your Entra ID service: + +1. Log in to the [Azure portal](https://portal.azure.com/). +2. Go to the **Entra ID** service. +3. Select **Manage Tenants**. +4. Create a new **Tenant**, e.g., `SeqeraOrg`. +5. Select the new tenant. +6. Go to **App Registrations**. +7. Select **New Registration** and specify the following: + + 1. A name for the application. + 2. The scope of user verification (e.g., single tenant, multi-tenant, personal MSFT accounts, etc). + +:::note +The Entra ID app must have user consent settings configured to **Allow user consent for apps** to ensure that admin approval is not required for each application login. See [User consent settings](https://learn.microsoft.com/en-us/azure/active-directory/manage-apps/configure-user-consent?pivots=portal#configure-user-consent-settings). +::: + +8. Specify the **Redirect** (callback) URI. + +:::note +This must be an `https://` URI, per Microsoft's requirements. +::: + +9. Open the newly-created app: + + 1. Note the `Application (client) ID` on the **Essentials** table. + 2. Generate **Client credentials** on the **Essentials** table. + 3. Select **Endpoints** and note the OpenID Connect metadata document URI. + +10. Add users to your tenant as required. +11. Complete the setup in Seqera by adding the following environment variables to your configuration: + + ```bash + TOWER_OIDC_CLIENT= + TOWER_OIDC_SECRET= + TOWER_OIDC_ISSUER= (e.g. https://login.microsoftonline.com/000000-0000-0000-00-0000000000000/v2.0) + ``` +12. Add `auth-oidc` to the `MICRONAUT_ENVIRONMENTS` environment variable for both the `cron` and `backend` services. + +### Okta identity provider + +To use [Okta](https://www.okta.com/) as the identity provider for Seqera: + +1. Sign in to your Okta organization with your administrator account. +2. From the **Admin Console** side navigation, select **Applications > Applications**. +3. Select **Add Application**. +4. Select **Create New App**. +5. Select the **OpenID Connect** sign-on method. +6. Select **Create**. +7. Enter a name for your new app integration, e.g., `Seqera`. +8. In **Configure OpenID Connect**, add the following redirect URIs: + + - **Sign-in redirect URIs** : `https:///oauth/callback/oidc` + - **Sign-out redirect URIs** : `https:///logout` + +9. Select **Save**. +10. Okta automatically redirects to your new application settings. Complete the setup in Seqera by adding the following environment variables to your configuration: + + - `TOWER_OIDC_CLIENT`: Copy the **Client ID** value from **General > Client Credentials** for the corresponding app client configuration. + - `TOWER_OIDC_SECRET`: Copy the **Client secret** value from **General > Client Credentials** for the corresponding app client configuration. + - `TOWER_OIDC_ISSUER`: Copy the Okta issuer URL from **Sign On > OpenID Connect ID Token** for the corresponding app client configuration. + +:::note +Connection strings can differ based on the issuer type. This should be verified via the Okta console. +::: + +## Configure user access allow list + +When using a public authentication provider such as Google or GitHub, you may need to restrict the access to specific user email addresses or domains. + +Replace `` in the snippets below with `github`, `google`, or `oidc`. `oidc` is used to specify any other authentication service based on OpenID Connect, such as Okta, Entra ID, Keycloak, etc. Include each provider separately if you specify more than one. + +The allow list entries are case-insensitive. + +**Environment variables** + +```env +TOWER_AUTH__ALLOW_LIST=*@foo.com,user1@bar.com +``` + +**tower.yml** + +```yaml + +tower: +auth: + : + allow-list: + - "*@foo.com" + - "me@bar.com" + +``` diff --git a/platform-enterprise/enterprise/configuration/aws_parameter_store.md b/platform-enterprise/enterprise/configuration/aws_parameter_store.md new file mode 100644 index 000000000..facd1746d --- /dev/null +++ b/platform-enterprise/enterprise/configuration/aws_parameter_store.md @@ -0,0 +1,68 @@ +--- +title: "AWS Parameter Store" +description: Configure values for Seqera configuration with AWS Parameter Store +date: "21 Apr 2023" +tags: [configuration, aws, parameter, securestring] +--- + +From version 23.1, Seqera Platform Enterprise can fetch configuration values from the AWS Parameter Store. + +:::caution +`TOWER_DB_USER`, `TOWER_DB_PASSWORD`, and `TOWER_DB_URL` values must be specified using **environment variables** during initial Seqera Enterprise deployment in a new environment. A new installation will fail if DB values are only defined in `tower.yml` or the AWS Parameter Store. + +After the database has been created, these values can be added to AWS Parameter Store entries and removed from your environment variables. +::: + +## Configuration values not supported in AWS Parameter Store + +Due to the order of operations when deploying Seqera Enterprise, some configuration values can only be retrieved from **environment variables** (`tower.env`). The following configuration values are not supported by AWS Parameter Store and must be set as environment variables: + +| Environment Variable | Description | Value | +| ------------------------- | --------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | +| `TOWER_DB_USER` | The user account to access your database. If you are using an external database, you must create this user manually. **For installation in a new environment, this value must be set as an environment variable.** | Default: `tower` | +| `TOWER_DB_PASSWORD` | The user password to access your database. If you are using an external database, you must create this password manually. **For installation in a new environment, this value must be set as an environment variable.** | Default: `tower` | +| `TOWER_DB_URL` | The URL to access your database. **For installation in a new environment, this value must be set as an environment variable.** | Example: `jdbc:mysql://db:3306/tower` | +| `TOWER_APP_NAME` | Application name. To run multiple instances of the same Seqera account, each instance must have a unique name, e.g., `tower-dev` and `tower-prod`. **Can also be set in `tower.yml` with `tower.appName`.** | Default: `tower` | +| `TOWER_ENABLE_AWS_SES` | Set `true` to enable AWS Simple Email Service for sending Seqera emails instead of SMTP. | Default: `false` | +| `TOWER_ENABLE_PLATFORMS` | A comma-separated list of execution backends to enable. **At least one is required.** | `altair-platform,awsbatch-platform,azbatch-platform,eks-platform,googlebatch-platform,gke-platform,k8s-platform,local-platform,lsf-platform,moab-platform,slurm-platform` | +| `TOWER_ENABLE_UNSAFE_MODE` | Set to `true` to allow HTTP connections to Seqera. HTTP must not be used in production deployments. HTTPS is used by default from version 22.1.x. | Default: `false` | + +## Configure Seqera to use AWS Parameter Store values + +To enable Seqera use AWS Parameter Store values: + +1. Grant [AWS Parameter Store permissions](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-paramstore-access.html) to your Seqera host instance. +2. Add `TOWER_ENABLE_AWS_SSM=true` in the `tower.env` configuration file. +3. Create individual parameters in the AWS Parameter Store (see below). +4. Start your Seqera instance and confirm the following entries appear in the **backend** container log: + +```bash +[main] - INFO i.m.context.DefaultBeanContext - Reading bootstrap environment configuration +[main] - INFO i.m.d.c.c.DistributedPropertySourceLocator - Resolved 2 configuration sources from client: compositeConfigurationClient(AWS Parameter Store) +``` + +## Create configuration values in AWS Parameter Store + +Store Seqera configuration values as individual parameters in the AWS Parameter Store. + +:::caution +The default application name is `tower-app`. To deploy multiple instances from the same Seqera Enterprise account, set a custom application name for each instance with the `micronaut.application.name` value in your `tower.yml` configuration file. +::: + +We recommend storing sensitive values, such as database passwords, as SecureString-type parameters. These parameters require additional IAM KMS key permissions to be decrypted. + +Seqera does not support StringList parameters. Configuration parameters with multiple values can be created as comma-separated lists of String type. + +To create Seqera configuration parameters in AWS Parameter Store, do the following: + +1. Navigate to the **Parameter Store** from the **AWS Systems Manager Service** console. +2. From the **My parameters** tab, select **Create parameter** and populate as follows: + +| Field | Description | +| ----- | ----------- | +| **Name** | Use the format `/config//`. `` follows the `tower.yml` nesting hierarchy. See the [configuration overview](./overview) for specific paths.
**Example: `/config/tower-app/mail.smtp.password : `** | +| **Description** | (Optional) Description for the parameter. | +| **Tier** | Select **Standard**. | +| **Type** | Use **SecureString** for sensitive values like passwords and tokens. Use **String** for everything else. | +| **Data type** | Select **text**. | +| **Value** | Enter a plain text value (this is the configuration value used in Seqera). | diff --git a/platform-enterprise/enterprise/configuration/configtables/compute_env.yml b/platform-enterprise/enterprise/configuration/configtables/compute_env.yml new file mode 100644 index 000000000..bb3096970 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/compute_env.yml @@ -0,0 +1,16 @@ +--- +- + Environment variable: '`TOWER_ENABLE_PLATFORMS`' + Description: > + Comma-separated list of the execution backends to enable. **At least one is required**. + Value: '`altair-platform,awsbatch-platform,azbatch-platform,eks-platform,googlebatch-platform,gke-platform,k8s-platform,local-platform,lsf-platform,moab-platform,slurm-platform`' +- + Environment variable: '`MICRONAUT_ENVIRONMENTS`' + Description: > + Configuration values to control the behavior of the Seqera `cron` and `backend` containers. **Do not edit these values** + Value: 'Backend configuration: `prod`, `redis`, `ha` Cron configuration: `prod`, `redis`, `cron`' +- + Environment variable: '`TOWER_FORGE_PREFIX`' + Description: > + Override the default `TowerForge` prefix, appended to AWS resources created by Batch Forge, with a custom value. + Value: 'Default: `TowerForge`' diff --git a/platform-enterprise/enterprise/configuration/configtables/crypto_aws.yml b/platform-enterprise/enterprise/configuration/configtables/crypto_aws.yml new file mode 100644 index 000000000..5644e1a74 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/crypto_aws.yml @@ -0,0 +1,16 @@ +--- +- + AWS Parameter Store: '`{prefix}/tower/crypto/secretKey`' + Description: > + The secret key used to encrypt user credentials (**required**). + Value: 'Random string of alphanumeric characters.' +- + AWS Parameter Store: '`{prefix}/micronaut/security/token/jwt/signatures/secret/generator/secret`' + Description: > + The secret used to generate the login JWT token (**required**). + Value: 'Random string of 35 characters or more.' +- + AWS Parameter Store: '`{prefix}/micronaut/security/token/jwt/generator/refresh-token/secret`' + Description: > + The secret used to generate the login refresh token (**required**). + Value: 'Random string of 35 characters or more.' \ No newline at end of file diff --git a/platform-enterprise/enterprise/configuration/configtables/crypto_env.yml b/platform-enterprise/enterprise/configuration/configtables/crypto_env.yml new file mode 100644 index 000000000..77719f90f --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/crypto_env.yml @@ -0,0 +1,11 @@ +--- +- + Environment variable: '`TOWER_CRYPTO_SECRETKEY`' + Description: > + The secret key used to encrypt user credentials (**required**). + Value: 'Random string of alphanumeric characters.' +- + Environment variable: '`TOWER_JWT_SECRET`' + Description: > + The secret used to generate the login JWT token (**required**). + Value: 'Random string of 35 characters or more.' diff --git a/platform-enterprise/enterprise/configuration/configtables/crypto_yml.yml b/platform-enterprise/enterprise/configuration/configtables/crypto_yml.yml new file mode 100644 index 000000000..79063fd8f --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/crypto_yml.yml @@ -0,0 +1,16 @@ +--- +- + tower.yml: '`tower.crypto.secretKey`' + Description: > + The secret key used to encrypt user credentials (**required**). + Value: 'Random string of alphanumeric characters.' +- + tower.yml: '`micronaut.security.token.jwt.signatures.secret.generator.secret`' + Description: > + The secret used to generate the login JWT token (**required**). + Value: 'Random string of 35 characters or more.' +- + tower.yml: '`micronaut.security.token.jwt.generator.refresh-token.secret`' + Description: > + The secret used to generate the login refresh token (**required**). + Value: 'Random string of 35 characters or more.' \ No newline at end of file diff --git a/platform-enterprise/enterprise/configuration/configtables/data_features_env.yml b/platform-enterprise/enterprise/configuration/configtables/data_features_env.yml new file mode 100644 index 000000000..bb754b882 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/data_features_env.yml @@ -0,0 +1,56 @@ +--- +- + Environment variable: '`TOWER_DATA_EXPLORER_ENABLED`' + Description: > + Enable [Data Explorer](https://docs.seqera.io/platform-cloud/data/data-explorer) in all workspaces. To mount data inside a Studio, you must enable Data Explorer. + Value: 'Default: `false`' +- + Environment variable: '`TOWER_DATA_EXPLORER_CLOUD_DISABLED_WORKSPACES`' + Description: > + Disable Data Explorer automatic cloud bucket retrieval per workspace. + Value: 'Example: `,`' +- + Environment variable: '`TOWER_DATA_EXPLORER_CREDENTIALS_TTL`' + Description: > + Data Explorer interval to fetch credentials from cloud providers for new buckets. + Value: 'Default: `20m`' +- + Environment variable: '`TOWER_DATA_EXPLORER_LINK_STORE_TTL`' + Description: > + Data Explorer cloud bucket cache duration. + Value: 'Default: `30m`' +- + Environment variable: '`TOWER_DATA_EXPLORER_LINK_STORE_BACKOFF`' + Description: > + The amount of time that elapses after an error, before a retry attempt is made. + Value: 'Default: `10m`' +- + Environment variable: '`TOWER_DATA_EXPLORER_MAX_RETRIES`' + Description: > + The number of retries Data Explorer will attempt to fetch cloud buckets in the event of temporary errors. + Value: 'Default: `3`' +- + Environment variable: '`TOWER_DATA_EXPLORER_LINK_STORE_RETRY_AFTER`' + Description: > + The period of time that retry attempts will be made even when max retries has been exceeded. + Value: 'Default: `1d`' +- + Environment variable: '`TOWER_CONTENT_MAX_FILE_SIZE`' + Description: > + Data Explorer download file size limit. **Increasing this value may degrade performance.** + Value: 'Default: `25MB`' +- + Environment variable: '`TOWER_DATA_STUDIO_CONNECT_URL`' + Description: > + The URL of the Studios connect proxy. The connect proxy is used internally by Seqera Platform. See [Studios deployment](https://docs.seqera.io/platform/24.2/enterprise/studios). + Value: 'Example: `https://connect.example.com/`' +- + Environment variable: '`TOWER_OIDC_REGISTRATION_INITIAL_ACCESS_TOKEN`' + Description: > + An access token used to register new clients in Seqera Platform. Any alphanumeric value is allowed. See [Studios deployment](https://docs.seqera.io/platform/24.2/enterprise/studios). + Value: 'd5XDoRzHpWo1c............mDnfBpB' +- + Environment variable: '`TOWER_OIDC_PEM_PATH`' + Description: > + The file path to a PEM certificate used to sign the OIDC tokens for the OpenID connect provider. See [Studios deployment](https://docs.seqera.io/platform/24.2/enterprise/studios). + Value: 'Example: `/data-studios-rsa.pem`' diff --git a/platform-enterprise/enterprise/configuration/configtables/data_features_yml.yml b/platform-enterprise/enterprise/configuration/configtables/data_features_yml.yml new file mode 100644 index 000000000..b2f0cb28c --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/data_features_yml.yml @@ -0,0 +1,11 @@ +--- +- + tower.yml: '`tower.content.max-file-size`' + Description: > + You can set the downloadable file size limit for Data Explorer and workflow reports. Increasing this value may degrade Platform performance. The supported suffixes are `GB`, `MB`, and `KB`. + Value: 'Default: `25MB`' +- + tower.yml: '`tower.data.data-link-items.tree-list.max-allowed`' + Description: > + You can set the maximum number of items listed in Data Explorer. Increasing this value may degrade Platform performance. + Value: 'Default: `1000`' diff --git a/platform-enterprise/enterprise/configuration/configtables/db_aws.yml b/platform-enterprise/enterprise/configuration/configtables/db_aws.yml new file mode 100644 index 000000000..b17c3b336 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/db_aws.yml @@ -0,0 +1,43 @@ +--- +- + AWS Parameter Store: '`{prefix}/datasources/default/username`' + Description: > + The user account to access your database. + If you are using an external database, you must create this user manually. **For installation in a new environment, this value must be set as an environment variable.** + Value: 'Default: `tower`' +- + AWS Parameter Store: '`{prefix}/datasources/default/password`' + Description: > + The user password to access your database. + If you are using an external database, you must create this password manually. **For installation in a new environment, this value must be set as an environment variable.** + Value: 'Default: `tower`' +- + AWS Parameter Store: '`{prefix}/datasources/default/url`' + Description: > + The URL to access your database. **For installation in a new environment, this value must be set as an environment variable.** + Value: '`jdbc:mysql://db:3306/tower`' +- + AWS Parameter Store: '`{prefix}/datasources/default/minPoolSize`' + Description: > + Minimum database connection pool size. + Value: 'Default: `5`' +- + AWS Parameter Store: '`{prefix}/datasources/default/maxPoolSize`' + Description: > + Maximum database connection pool size. + Value: 'Default: `10`' +- + AWS Parameter Store: '`{prefix}/datasources/default/maxLifetime`' + Description: > + Maximum lifespan of database connections, in milliseconds. + Value: 'Default: `1800000`' +- + AWS Parameter Store: '`{prefix}/redisson/uri`' + Description: > + The URL to access your Seqera Redis instance. + Value: 'Example: `redis://redis:6379`' +- + AWS Parameter Store: '`{prefix}/redisson/password`' + Description: > + The password of your Seqera Redis instance. + Value: diff --git a/platform-enterprise/enterprise/configuration/configtables/db_env.yml b/platform-enterprise/enterprise/configuration/configtables/db_env.yml new file mode 100644 index 000000000..396420883 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/db_env.yml @@ -0,0 +1,43 @@ +--- +- + Environment variable: '`TOWER_DB_USER`' + Description: > + The user account to access your database. + If you are using an external database, you must create this user manually. + Value: 'Default: `tower`' +- + Environment variable: '`TOWER_DB_PASSWORD`' + Description: > + The user password to access your database. + If you are using an external database, you must create this password manually. + Value: 'Default: `tower`' +- + Environment variable: '`TOWER_DB_URL`' + Description: > + The URL to access your database. + Value: 'Example: `jdbc:mysql://db:3306/tower`' +- + Environment variable: '`TOWER_DB_MIN_POOL_SIZE`' + Description: > + Minimum database connection pool size. + Value: 'Default: `5`' +- + Environment variable: '`TOWER_DB_MAX_POOL_SIZE`' + Description: > + Maximum database connection pool size. + Value: 'Default: `10`' +- + Environment variable: '`TOWER_DB_MAX_LIFETIME`' + Description: > + Maximum lifespan of database connections, in milliseconds. + Value: 'Default: `1800000`' +- + Environment variable: '`TOWER_REDIS_URL`' + Description: > + The URL to access your Seqera Redis instance. + Value: 'Example: `redis://redis:6379`' +- + Environment variable: '`TOWER_REDIS_PASSWORD`' + Description: > + The password of your Seqera Redis instance. + Value: diff --git a/platform-enterprise/enterprise/configuration/configtables/db_yml.yml b/platform-enterprise/enterprise/configuration/configtables/db_yml.yml new file mode 100644 index 000000000..c2c0fa546 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/db_yml.yml @@ -0,0 +1,43 @@ +--- +- + tower.yml: '`datasources.default.username`' + Description: > + The user account to access your database. + If you are using an external database, you must create this user manually. **For installation in a new environment, this value must be set as an environment variable.** + Value: 'Default: `tower`' +- + tower.yml: '`datasources.default.password`' + Description: > + The user password to access your database. + If you are using an external database, you must create this password manually. **For installation in a new environment, this value must be set as an environment variable.** + Value: 'Default: `tower`' +- + tower.yml: '`datasources.default.url`' + Description: > + The URL to access your database. **For installation in a new environment, this value must be set as an environment variable.** + Value: 'Example: `jdbc:mysql://db:3306/tower`' +- + tower.yml: '`datasources.default.minPoolSize`' + Description: > + Minimum database connection pool size. + Value: 'Default: `5`' +- + tower.yml: '`datasources.default.maxPoolSize`' + Description: > + Maximum database connection pool size. + Value: 'Default: `10`' +- + tower.yml: '`datasources.default.maxLifetime`' + Description: > + Maximum lifespan of database connections, in milliseconds. + Value: 'Default: `1800000`' +- + tower.yml: '`redisson.uri`' + Description: > + The URL to access your Seqera Redis instance. + Value: 'Example: `redis://redis:6379`' +- + tower.yml: '`redisson.password`' + Description: > + The password of your Seqera Redis instance. + Value: diff --git a/platform-enterprise/enterprise/configuration/configtables/features_env.yml b/platform-enterprise/enterprise/configuration/configtables/features_env.yml new file mode 100644 index 000000000..213a26779 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/features_env.yml @@ -0,0 +1,31 @@ +--- +- + Environment variable: '`TOWER_ENABLE_WAVE`' + Description: > + Enable Seqera integration with [Wave containers](https://wave.seqera.io). + Value: 'Default: `false`' +- + Environment variable: '`WAVE_SERVER_URL`' + Description: > + Define the Wave containers service endpoint URL. + Value: 'Example: `https://wave.seqera.io`' +- + Environment variable: '`TOWER_ENABLE_AWS_SSM`' + Description: > + Enable Seqera configuration value retrieval from [AWS Parameter Store](https://docs.seqera.io/platform-enterprise/latest/enterprise/configuration/aws_parameter_store). + Value: 'Default: `false`' +- + Environment variable: '`TOWER_ENABLE_AWS_SES`' + Description: > + Use AWS Simple Email Service (SES) to send Seqera emails instead of SMTP. + Value: 'Default: `false`' +- + Environment variable: '`TOWER_ALLOW_NEXTFLOW_LOGS`' + Description: > + Allow log and report files from Nextflow CLI runs (`-with-tower`) to be accessible in the Seqera UI. Run output files must be accessible to your Seqera workspace primary compute environment. + Value: 'Default: `false`' +- + Environment variable: '`TOWER_STEPPED_LAUNCH_FORM_ALLOWED_WORKSPACES`' + Description: > + Disable the [stepped launch form](https://docs.seqera.io/platform-cloud/launch/launchpad#launch-form) in the workspaces specified. Omit or set empty (`TOWER_STEPPED_LAUNCH_FORM_ALLOWED_WORKSPACES=`) to enable the new launch form in all workspaces, or provide a comma-separated list of workspace IDs to enable the form per workspace. + Value: 'Default: Enabled for all workspaces' diff --git a/platform-enterprise/enterprise/configuration/configtables/generic_config_aws.yml b/platform-enterprise/enterprise/configuration/configtables/generic_config_aws.yml new file mode 100644 index 000000000..b95143ac2 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/generic_config_aws.yml @@ -0,0 +1,41 @@ +--- +- + AWS Parameter Store: '`{prefix}/tower/serverUrl`' + Description: > + Your Seqera instance hostname, IP address, DNS name, or full [reverse proxy path](https://docs.seqera.io/platform-enterprise/latest/enterprise/configuration/reverse_proxy) where the application is exposed. The `https://` protocol is required for instances that use an SSL certificate. As of version 22.1, HTTPS is used by default. To use HTTP, set `TOWER_ENABLE_UNSAFE_MODE=true`. + Value: "Default: `http://localhost:8000`" +- + AWS Parameter Store: '`{prefix}/tower/license`' + Description: > + Your Seqera Enterprise license key (**required**). [Contact us](mailto:sales@seqera.io) to obtain your license key. The key is base64-encoded by Seqera — paste this value exactly as received. + Value: '`DT8G5F3...BBV90OW`' +- + AWS Parameter Store: '`{prefix}/tower/landingUrl`' + Description: > + Custom landing page for the application (requires version 21.10.1 or later). This value doesn't change the `TOWER_SERVER_URL` used for inbound Seqera connections. + Value: '`https://your.custom.landing.example.net`' +- + AWS Parameter Store: '`{prefix}/micronaut/server/port`' + Description: > + Define the HTTP port used by the Seqera cron service (requires version 21.06.1 or later). + Value: '`8080`' +- + AWS Parameter Store: '`{prefix}/tower/admin/root-users`' + Description: > + Grant users access to the application admin panel. + Value: '`user1@your-company.com,user2@your-company.com`' +- + AWS Parameter Store: '`{prefix}/tower/contactEmail`' + Description: > + Your Seqera system administrator contact email. + Value: '`tower@your-company.com`' +- + AWS Parameter Store: '`{prefix}/tower/auth/disable-email`' + Description: > + Set to `true` to disable the email login. Ensure that you've configured an alternative authentication provider first. + Value: 'Default: `false`' +- + AWS Parameter Store: '`{prefix}/tower/admin/user-workspace-enabled`' + Description: > + Enable or disable user private workspaces (requires version 22.1.0 or later). + Value: 'Default: `true`' diff --git a/platform-enterprise/enterprise/configuration/configtables/generic_config_env.yml b/platform-enterprise/enterprise/configuration/configtables/generic_config_env.yml new file mode 100644 index 000000000..6ef7ad53d --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/generic_config_env.yml @@ -0,0 +1,51 @@ +--- +- + Environment variable: '`TOWER_SERVER_URL`' + Description: > + Your Seqera instance hostname, IP address, DNS name, or full [reverse proxy path](https://docs.seqera.io/platform-enterprise/latest/enterprise/configuration/reverse_proxy) where the application is exposed. The `https://` protocol is required for instances that use an SSL certificate. As of version 22.1, HTTPS is used by default. To use HTTP, set `TOWER_ENABLE_UNSAFE_MODE=true`. + Value: "Default: `http://localhost:8000`" +- + Environment variable: '`TOWER_LICENSE`' + Description: > + Your Seqera Enterprise license key (**required**). [Contact us](mailto:sales@seqera.io) to obtain your license key. The key is base64-encoded by Seqera — paste this value exactly as received. + Value: '`DT8G5F3...BBV90OW`' +- + Environment variable: '`TOWER_APP_NAME`' + Description: > + Application name. To run multiple instances of the same Seqera account, each instance must have a unique name, e.g., `tower-dev` and `tower-prod`. + Value: 'Default: `tower`' +- + Environment variable: '`TOWER_CONFIG_FILE`' + Description: > + Custom path for the `tower.yml` file. + Value: '`path/to/tower/config`' +- + Environment variable: '`TOWER_LANDING_URL`' + Description: > + Custom landing page for the application (requires version 21.10.1 or later). This value doesn't change the `TOWER_SERVER_URL` used for inbound Seqera connections. + Value: '`https://your.custom.landing.example.net`' +- + Environment variable: '`TOWER_CRON_SERVER_PORT`' + Description: > + Define the HTTP port used by the Seqera cron service (requires version 21.06.1 or later). + Value: '`8080`' +- + Environment variable: '`TOWER_ROOT_USERS`' + Description: > + Grant users access to the application admin panel. + Value: '`user1@your-company.com,user2@your-company.com`' +- + Environment variable: '`TOWER_CONTACT_EMAIL`' + Description: > + Your Seqera system administrator contact email. + Value: '`seqera@your-company.com`' +- + Environment variable: '`TOWER_AUTH_DISABLE_EMAIL`' + Description: > + Set to `true` to disable the email login. Ensure that you've configured an alternative authentication provider first. + Value: 'Default: `false`' +- + Environment variable: '`TOWER_USER_WORKSPACE_ENABLED`' + Description: > + Enable or disable user private workspaces (requires version 22.1.0 or later). + Value: 'Default: `true`' diff --git a/platform-enterprise/enterprise/configuration/configtables/generic_config_yml.yml b/platform-enterprise/enterprise/configuration/configtables/generic_config_yml.yml new file mode 100644 index 000000000..ee3ec5b1b --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/generic_config_yml.yml @@ -0,0 +1,46 @@ +--- +- + tower.yml: '`tower.serverUrl`' + Description: > + Your Seqera instance hostname, IP address, DNS name, or full [reverse proxy path](https://docs.seqera.io/platform-enterprise/latest/enterprise/configuration/reverse_proxy) where the application is exposed. The `https://` protocol is required for instances that use an SSL certificate. As of version 22.1, HTTPS is used by default. To use HTTP, set `TOWER_ENABLE_UNSAFE_MODE=true`. + Value: "Default: `http://localhost:8000`" +- + tower.yml: '`tower.license`' + Description: > + Your Seqera Enterprise license key (**required**). [Contact us](mailto:sales@seqera.io) to obtain your license key. The key is base64-encoded by Seqera — paste this value exactly as received. + Value: '`DT8G5F3...BBV90OW`' +- + tower.yml: '`tower.appName`' + Description: > + Application name. To run multiple instances of the same Seqera account, each instance must have a unique name, e.g., `tower-dev` and `tower-prod`. + Value: 'Default: `tower`' +- + tower.yml: '`tower.landingUrl`' + Description: > + Custom landing page for the application (requires version 21.10.1 or later). This value doesn't change the `TOWER_SERVER_URL` used for inbound Seqera connections. + Value: '`https://your.custom.landing.example.net`' +- + tower.yml: '`micronaut.server.port`' + Description: > + Define the HTTP port used by the Seqera cron service (requires version 21.06.1 or later). + Value: '`8080`' +- + tower.yml: '`tower.admin.root-users`' + Description: > + Grant users access to the application admin panel. + Value: '`user1@your-company.com,user2@your-company.com`' +- + tower.yml: '`tower.contactEmail`' + Description: > + Your Seqera system administrator contact email. + Value: '`tower@your-company.com`' +- + tower.yml: '`tower.auth.disable-email`' + Description: > + Set to `true` to disable the email login. Ensure that you've configured an alternative authentication provider first. + Value: 'Default: `false`' +- + tower.yml: '`tower.admin.user-workspace-enabled`' + Description: > + Enable or disable user private workspaces (requires version 22.1.0 or later). + Value: 'Default: `true`' diff --git a/platform-enterprise/enterprise/configuration/configtables/git_aws.yml b/platform-enterprise/enterprise/configuration/configtables/git_aws.yml new file mode 100644 index 000000000..0d0135fc6 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/git_aws.yml @@ -0,0 +1,41 @@ +--- +- + AWS Parameter Store: '`{prefix}/tower/scm/provider/github/user`' + Description: 'Your GitHub username.' +- + AWS Parameter Store: '`{prefix}/tower/scm/provider/github/password`' + Description: 'Your GitHub (classic or fine-grained) access token.' +- + AWS Parameter Store: '`{prefix}/tower/scm/provider/gitlab/user`' + Description: 'Your GitLab username.' +- + AWS Parameter Store: '`{prefix}/tower/scm/provider/gitlab/password`' + Description: 'Your GitLab (Personal, Group, or Project) access token.' +- + AWS Parameter Store: '`{prefix}/tower/scm/provider/gitlab/token`' + Description: 'Your GitLab (Personal, Group, or Project) access token.' +- + AWS Parameter Store: '`{prefix}/tower/scm/provider/bitbucket/user`' + Description: 'Your BitBucket username.' +- + AWS Parameter Store: '`{prefix}/tower/scm/provider/bitbucket/password`' + Description: 'Your BitBucket App password.' +- + AWS Parameter Store: '`{prefix}/tower/scm/provider/gitea/user`' + Description: 'Your Gitea username.' +- + AWS Parameter Store: '`{prefix}/tower/scm/provider/gitea/password`' + Description: 'Your Gitea token.' +- + AWS Parameter Store: '`{prefix}/tower/scm/provider/azurerepos/user`' + Description: 'Your Azure DevOps repository username.' +- + AWS Parameter Store: '`{prefix}/tower/scm/provider/azurerepos/token`' + Description: 'Your Azure DevOps repository personal access token.' +#To be confirmed +#- +# AWS Parameter Store: '`/tower/scm/provider/`' +# Description: 'AWS CodeCommit user access key.' +#- +# AWS Parameter Store: '`/tower/scm/provider/`' +# Description: 'AWS CodeCommit user secret key.' \ No newline at end of file diff --git a/platform-enterprise/enterprise/configuration/configtables/git_env.yml b/platform-enterprise/enterprise/configuration/configtables/git_env.yml new file mode 100644 index 000000000..ebcbb71b7 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/git_env.yml @@ -0,0 +1,41 @@ +--- +- + Environment variable: '`TOWER_SCM_PROVIDERS_GITHUB_USER`' + Description: 'Your GitHub username.' +- + Environment variable: '`TOWER_SCM_PROVIDERS_GITHUB_PASSWORD`' + Description: 'Your GitHub (classic or fine-grained) access token.' +- + Environment variable: '`TOWER_SCM_PROVIDERS_GITLAB_USER`' + Description: 'Your GitLab username.' +- + Environment variable: '`TOWER_SCM_PROVIDERS_GITLAB_PASSWORD`' + Description: 'Your GitLab (Personal, Group, or Project) access token.' +- + Environment variable: '`TOWER_SCM_PROVIDERS_GITLAB_TOKEN`' + Description: 'Your GitLab (Personal, Group, or Project) access token.' +- + Environment variable: '`TOWER_SCM_PROVIDERS_BITBUCKET_USER`' + Description: 'Your BitBucket username.' +- + Environment variable: '`TOWER_SCM_PROVIDERS_BITBUCKET_PASSWORD`' + Description: 'Your BitBucket App password.' +- + Environment variable: '`TOWER_SCM_PROVIDERS_GITEA_USER`' + Description: 'Your Gitea username.' +- + Environment variable: '`TOWER_SCM_PROVIDERS_GITEA_PASSWORD`' + Description: 'Your Gitea token.' +- + Environment variable: '`TOWER_SCM_PROVIDERS_AZUREREPOS_USER`' + Description: 'Your Azure DevOps repository username.' +- + Environment variable: '`TOWER_SCM_PROVIDERS_AZUREREPOS_TOKEN`' + Description: 'Your Azure DevOps repository personal access token.' +#To be confirmed +#- +# Environment variable: '`TOWER_SCM_PROVIDERS_CODECOMMIT_ACCESS_KEY`' +# Description: 'AWS CodeCommit user access key.' +#- +# Environment variable: '`TOWER_SCM_PROVIDERS_CODECOMMIT_SECRET_KEY`' +# Description: 'AWS CodeCommit user secret key.' \ No newline at end of file diff --git a/platform-enterprise/enterprise/configuration/configtables/git_yml.yml b/platform-enterprise/enterprise/configuration/configtables/git_yml.yml new file mode 100644 index 000000000..c19359ca3 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/git_yml.yml @@ -0,0 +1,41 @@ +--- +- + tower.yml: '`tower.scm.provider.github.user`' + Description: 'Your GitHub username.' +- + tower.yml: '`tower.scm.provider.github.password`' + Description: 'Your GitHub (classic or fine-grained) access token.' +- + tower.yml: '`tower.scm.provider.gitlab.user`' + Description: 'Your GitLab username.' +- + tower.yml: '`tower.scm.provider.gitlab.password`' + Description: 'Your GitLab (Personal, Group, or Project) access token.' +- + tower.yml: '`tower.scm.provider.gitlab.token`' + Description: 'Your GitLab (Personal, Group, or Project) access token.' +- + tower.yml: '`tower.scm.provider.bitbucket.user`' + Description: 'Your BitBucket username.' +- + tower.yml: '`tower.scm.provider.bitbucket.password`' + Description: 'Your BitBucket App password.' +- + tower.yml: '`tower.scm.provider.gitea.user`' + Description: 'Your Gitea username.' +- + tower.yml: '`tower.scm.provider.gitea.password`' + Description: 'Your Gitea token.' +- + tower.yml: '`tower.scm.provider.azurerepos.user`' + Description: 'Your Azure DevOps repository username.' +- + tower.yml: '`tower.scm.provider.azurerepos.token`' + Description: 'Your Azure DevOps repository personal access token.' +#To be confirmed +#- +# tower.yml: '`tower.scm.provider.`' +# Description: 'AWS CodeCommit user access key.' +#- +# tower.yml: '`tower.scm.provider.`' +# Description: 'AWS CodeCommit user secret key.' \ No newline at end of file diff --git a/platform-enterprise/enterprise/configuration/configtables/leftover.yml b/platform-enterprise/enterprise/configuration/configtables/leftover.yml new file mode 100644 index 000000000..81daecb41 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/leftover.yml @@ -0,0 +1,30 @@ +- + Environment variable: '`TOWER_CONTENT_URL`' + Description: > + URL to securely download user-generated content (must be a subdomain of `TOWER_SERVER_URL`). + Value: +- + Environment variable: '`TOWER_DEFAULT_WORKSPACE_ID`' + Description: > + ID of the default workspace where users will land after logging in. Requires users to be members of the given workspace. + Value: '' +- + AWS Parameter Store: '`{prefix}/tower/admin/default-workspace-id`' + Description: > + ID of the default workspace where users will land after logging in. Requires users to be members of the given workspace. + Value: '' +- + AWS Parameter Store: '`{prefix}/tower/contentUrl`' + Description: > + URL to securely download user-generated content (must be a subdomain of `/tower/serverUrl`). + Value: +- + tower.yml: '`tower.admin.default-workspace-id`' + Description: > + ID of the default workspace where users will land after logging in. Requires users to be members of the given workspace. + Value: '' +- + AWS Parameter Store: '`tower.contentUrl`' + Description: > + URL to securely download user-generated content (must be a subdomain of `tower.serverUrl`). + Value: diff --git a/platform-enterprise/enterprise/configuration/configtables/mail_server_aws.yml b/platform-enterprise/enterprise/configuration/configtables/mail_server_aws.yml new file mode 100644 index 000000000..2fe779ab8 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/mail_server_aws.yml @@ -0,0 +1,43 @@ +--- +# https://guides.micronaut.io/latest/micronaut-aws-parameter-store-gradle-java.html +# https://javaee.github.io/javamail/docs/api/com/sun/mail/smtp/package-summary.html +- + AWS Parameter Store: '`{prefix}/mail/smtp/user`' + Description: > + Your email service user. + Value: 'Example: `user`' +- + AWS Parameter Store: '`{prefix}/mail/smtp/password`' + Description: > + Your email service password. + Value: +- + AWS Parameter Store: '`{prefix}/mail/smtp/host`' + Description: > + Your email service host name, excluding protocol. + Value: 'Example: `email-smtp.eu-west-1.amazonaws.com`' +- + AWS Parameter Store: '`{prefix}/mail/smtp/port`' + Description: > + Your email service port. Most cloud services block port 25 by default. + Value: 'Default: `587`' +- + AWS Parameter Store: '`{prefix}/mail/from`' + Description: > + The email address used to send Seqera emails. + Value: 'Example: `seqera@your-company.com`' +- + AWS Parameter Store: '`{prefix}/mail/smtp/auth`' + Description: > + Use SMTP authentication when calling your email service endpoint. + Value: 'Default: `true`' +- + AWS Parameter Store: '`{prefix}/mail/smtp/starttls/enable`' + Description: > + Switch the connection to a TLS-protected connection before issuing login commands. Must be `true` for production SMTP hosts. + Value: '**Recommended**: `true`' +- + AWS Parameter Store: '`{prefix}/mail/smtp/starttls/required`' + Description: > + Require the use of the STARTTLS command. Must be `true` for production SMTP hosts. + Value: '**Recommended**: `true`' \ No newline at end of file diff --git a/platform-enterprise/enterprise/configuration/configtables/mail_server_env.yml b/platform-enterprise/enterprise/configuration/configtables/mail_server_env.yml new file mode 100644 index 000000000..3738f0b39 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/mail_server_env.yml @@ -0,0 +1,48 @@ +--- +# https://guides.micronaut.io/latest/micronaut-aws-parameter-store-gradle-java.html +# https://javaee.github.io/javamail/docs/api/com/sun/mail/smtp/package-summary.html +- + Environment variable: '`TOWER_SMTP_USER`' + Description: > + Your email service user. + Value: 'Example: `user`' +- + Environment variable: '`TOWER_SMTP_PASSWORD`' + Description: > + Your email service password. + Value: +- + Environment variable: '`TOWER_SMTP_HOST`' + Description: > + Your email service host name, excluding protocol. + Value: 'Example: `email-smtp.eu-west-1.amazonaws.com`' +- + Environment variable: '`TOWER_SMTP_PORT`' + Description: > + Your email service port. Most cloud services block port 25 by default. + Value: 'Default: `587`' +- + Environment variable: '`TOWER_CONTACT_EMAIL`' + Description: > + The email address used to send Seqera emails. + Value: 'Example: `seqera@your-company.com`' +- + Environment variable: '`TOWER_SMTP_AUTH`' + Description: > + Use SMTP authentication when calling your email service endpoint. + Value: 'Default: `true`' +- + Environment variable: '`TOWER_SMTP_STARTTLS_ENABLED`' + Description: > + Switch the connection to a TLS-protected connection before issuing login commands. Must be `true` for production SMTP hosts. + Value: '**Recommended**: `true`' +- + Environment variable: '`TOWER_SMTP_STARTTLS_REQUIRED`' + Description: > + Require the use of the STARTTLS command. Must be `true` for production SMTP hosts. + Value: '**Recommended**: `true`' +- + Environment variable: '`TOWER_ENABLE_AWS_SES`' + Description: > + Use AWS SES (Simple Email Service) to use Seqera emails, instead of SMTP. + Value: 'Default: `false`' \ No newline at end of file diff --git a/platform-enterprise/enterprise/configuration/configtables/mail_server_proxy_aws.yml b/platform-enterprise/enterprise/configuration/configtables/mail_server_proxy_aws.yml new file mode 100644 index 000000000..2586cf942 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/mail_server_proxy_aws.yml @@ -0,0 +1,23 @@ +--- +# https://guides.micronaut.io/latest/micronaut-aws-parameter-store-gradle-java.html +# https://javaee.github.io/javamail/docs/api/com/sun/mail/smtp/package-summary.html +- + AWS Parameter Store: '`{prefix}/mail/smtp/proxy/host`' + Description: > + Specify the host name of an HTTP web proxy server that will be used for connections to the mail server. + Value: 'Example: `http://my.proxy.int`' +- + AWS Parameter Store: '`{prefix}/mail/smtp/proxy/port`' + Description: > + Specify the port number for the HTTP web proxy server. Defaults to 80 if not set. + Value: 'Example: `80`' +- + AWS Parameter Store: '`{prefix}/mail/smtp/proxy/user`' + Description: > + Specify the username to authenticate with the HTTP web proxy server. No authentication is performed if not set. + Value: 'Example: `proxy_user_1`' +- + AWS Parameter Store: '`{prefix}/mail/smtp/proxy/password`' + Description: > + Specify the password to authenticate with the HTTP web proxy server. No authentication is performed if not set. + Value: \ No newline at end of file diff --git a/platform-enterprise/enterprise/configuration/configtables/mail_server_proxy_yml.yml b/platform-enterprise/enterprise/configuration/configtables/mail_server_proxy_yml.yml new file mode 100644 index 000000000..0e4b5676d --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/mail_server_proxy_yml.yml @@ -0,0 +1,23 @@ +--- +# https://guides.micronaut.io/latest/micronaut-aws-parameter-store-gradle-java.html +# https://javaee.github.io/javamail/docs/api/com/sun/mail/smtp/package-summary.html +- + tower.yml: '`mail.smtp.proxy.host`' + Description: > + Specify the host name of an HTTP web proxy server that will be used for connections to the mail server. + Value: 'Example: `http://my.proxy.int`' +- + tower.yml: '`mail.smtp.proxy.port`' + Description: > + Specify the port number for the HTTP web proxy server. Defaults to 80 if not set. + Value: 'Example: `80`' +- + tower.yml: '`mail.smtp.proxy.user`' + Description: > + Specify the username to authenticate with the HTTP web proxy server. No authentication is performed if not set. + Value: 'Example: `proxy_user_1`' +- + tower.yml: '`mail.smtp.proxy.password`' + Description: > + Specify the password to authenticate with the HTTP web proxy server. No authentication is performed if not set. + Value: \ No newline at end of file diff --git a/platform-enterprise/enterprise/configuration/configtables/mail_server_yml.yml b/platform-enterprise/enterprise/configuration/configtables/mail_server_yml.yml new file mode 100644 index 000000000..355038640 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/mail_server_yml.yml @@ -0,0 +1,43 @@ +--- +# https://guides.micronaut.io/latest/micronaut-aws-parameter-store-gradle-java.html +# https://javaee.github.io/javamail/docs/api/com/sun/mail/smtp/package-summary.html +- + tower.yml: '`mail.smtp.user`' + Description: > + Your email service user. + Value: 'Example: `user`' +- + tower.yml: '`mail.smtp.password`' + Description: > + Your email service password. + Value: +- + tower.yml: '`mail.smtp.host`' + Description: > + Your email service host name, excluding protocol. + Value: 'Example: `email-smtp.eu-west-1.amazonaws.com`' +- + tower.yml: '`mail.smtp.port`' + Description: > + Your email service port. Most cloud services block port 25 by default. + Value: 'Default: `587`' +- + tower.yml: '`mail.from`' + Description: > + The email address used to send Seqera emails. + Value: 'Example: `Seqera@your-company.com`' +- + tower.yml: '`mail.smtp.auth`' + Description: > + Use SMTP authentication when calling your email service endpoint. + Value: 'Default: `true`' +- + tower.yml: '`mail.smtp.starttls.enable`' + Description: > + Switch the connection to a TLS-protected connection before issuing login commands. Must be `true` for production SMTP hosts. + Value: '**Recommended**: `true`' +- + tower.yml: '`mail.smtp.starttls.required`' + Description: > + Require the use of the STARTTLS command. Must be `true` for production SMTP hosts. + Value: '**Recommended**: `true`' \ No newline at end of file diff --git a/platform-enterprise/enterprise/configuration/configtables/req_env_vars.yml b/platform-enterprise/enterprise/configuration/configtables/req_env_vars.yml new file mode 100644 index 000000000..a70589f6c --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/req_env_vars.yml @@ -0,0 +1,38 @@ +--- +- + Environment variable: '`TOWER_DB_USER`' + Description: > + The user account to access your database. + If you are using an external database, you must create this user manually. **For installation in a new environment, this value must be set as an environment variable.** + Value: 'Default: `tower`' +- + Environment variable: '`TOWER_DB_PASSWORD`' + Description: > + The user password to access your database. + If you are using an external database, you must create this password manually. **For installation in a new environment, this value must be set as an environment variable.** + Value: 'Default: `tower`' +- + Environment variable: '`TOWER_DB_URL`' + Description: > + The URL to access your database. **For installation in a new environment, this value must be set as an environment variable.** See the [24.1 release notes](https://docs.seqera.io/platform/24.1/enterprise/release_notes/enterprise_latest#breaking-changes) for information about the DB URL format. + Value: 'Example: `jdbc:mysql://db:3306/tower?permitMysqlScheme=true`' +- + Environment variable: '`TOWER_APP_NAME`' + Description: > + Application name. To run multiple instances of the same Seqera account, each instance must have a unique name, e.g., `tower-dev` and `tower-prod`. **Can also be set in** `tower.yml` **with** `tower.appName`**.** + Value: 'Default: `tower`' +- + Environment variable: '`TOWER_ENABLE_AWS_SES`' + Description: > + Set `true` to enable AWS Simple Email Service for sending Seqera emails instead of SMTP. + Value: 'Default: `false`' +- + Environment variable: '`TOWER_ENABLE_PLATFORMS`' + Description: > + A comma-separated list of execution backends to enable. **At least one is required.** + Value: '`altair-platform,awsbatch-platform,azbatch-platform,eks-platform,googlebatch-platform,gke-platform,k8s-platform,local-platform,lsf-platform,moab-platform,slurm-platform`' +- + Environment variable: '`TOWER_ENABLE_UNSAFE_MODE`' + Description: > + Set to `true` to allow HTTP connections to Seqera. HTTP must not be used in production deployments. HTTPS is used by default from version 22.1.x. + Value: 'Default: `false`' diff --git a/platform-enterprise/enterprise/configuration/configtables/tower_logging.yml b/platform-enterprise/enterprise/configuration/configtables/tower_logging.yml new file mode 100644 index 000000000..7022b6a38 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/configtables/tower_logging.yml @@ -0,0 +1,61 @@ +--- +- + Environment variable: '`TOWER_CRON_AUDIT_LOG_CLEAN_UP_TIME_OFFSET`' + Description: > + Application event audit log retention period. Logged events older than this period are deleted. Value includes units (`30d`, `24h`, `60m`, etc.). + Value: 'Default: `365d`' +- + Environment variable: '`TOWER_LOG_APPENDER`' + Description: > + The output format of Platform logs. + Value: 'Options: `STDOUT`, `JSON`' +- + Environment variable: '`TOWER_LOG_LEVEL`' + Description: > + Platform backend logging detail level. + Value: 'Options: `TRACE`, `DEBUG`, `INFO`, `WARN`, `ERROR`' +- + Environment variable: '`TOWER_SECURITY_LOGLEVEL`' + Description: > + Platform authentication logging detail level. + Value: 'Options: `TRACE`, `DEBUG`, `INFO`, `WARN`, `ERROR`' +- + Environment variable: '`TOWER_LOG_DIR`' + Description: > + Base directory to store Platform logs. + Value: +- + Environment variable: '`TOWER_LOG_PATTERN`' + Description: > + The logging format emitted to STDOUT. See [here](https://logback.qos.ch/manual/layouts.html#conversionWord) for a reference of the full logback pattern syntax. + Value: '`%d{MMM-dd HH:mm:ss.SSS} [%t] %X{ip:--} %-5level %logger{36} - %msg%n} # Default logging pattern shown`' +- + Environment variable: '`TOWER_LOG_MAX_HISTORY`' + Description: > + The maximum number of backend log files retained by the system. + Value: +- + Environment variable: '`TOWER_LOG_MAX_SIZE`' + Description: > + The maximum file size of the Platform backend log file. When this limit is reached, a new log file is created. + Value: +- + Environment variable: '`LOGGER_LEVELS_IO_SEQERA_TOWER_AGENT`' + Description: > + Tower Agent logging detail level. + Value: 'Options: `TRACE`, `DEBUG`, `INFO`, `WARN`, `ERROR`' +- + Environment variable: '`TOWER_AGENT_HEARTBEAT`' + Description: > + Tower Agent polling interval. + Value: 'Example: `10s`' +- + Environment variable: '`TOWER_SSH_LOGLEVEL`' + Description: > + Event logging detail level for the SSH connection library used by Seqera. + Value: 'Options: `TRACE`, `DEBUG`, `INFO`, `WARN`, `ERROR`' +- + Environment variable: '`TOWER_ALLOW_NEXTFLOW_LOGS`' + Description: > + Set `true` to allow Seqera to retrieve logs and reports for runs launched with Nextflow CLI. + Value: 'Default: `false`' diff --git a/platform-enterprise/enterprise/configuration/networking.md b/platform-enterprise/enterprise/configuration/networking.md new file mode 100644 index 000000000..c62f1098b --- /dev/null +++ b/platform-enterprise/enterprise/configuration/networking.md @@ -0,0 +1,42 @@ +--- +title: "Networking" +description: Seqera configuration options for networking +date: "21 Apr 2023" +tags: [networking, configuration] +--- + +## HTTP proxy environment variables + +:::caution +Proxies that require passwords aren't supported. +::: + +If your Seqera Platform Enterprise instance must access the internet via a proxy server, configure the following case-insensitive environment variables: + +- `http_proxy`: The proxy server for HTTP connections. +- `https_proxy`: The proxy server for HTTPS connections. +- `no_proxy`: One or more host names that bypass the proxy server. + +In the following example, `alice.example.com:8080` is configured as a proxy for all HTTP and HTTPS traffic, except for traffic to the `internal.example.com` and `internal2.example.com` hosts. + +```env +export http_proxy='alice.example.com:8080' +export https_proxy='alice.example.com:8080' +export no_proxy=internal.example.com,internal2.example.com +``` + +## Isolated environments + +If you're deploying Seqera in an environment that has no external internet access, ensure that no pipeline assets or parameters in your configuration contain external links, as this will lead to connection failures. + +## Mail proxy server + +Mail proxy server configuration details must be set either in `tower.yml` or AWS Parameter Store. + +**tower.yml** + +::table{file=configtables/mail_server_proxy_yml.yml} + +**AWS Parameter Store** + +::table{file=configtables/mail_server_proxy_aws.yml} diff --git a/platform-enterprise/enterprise/configuration/overview.md b/platform-enterprise/enterprise/configuration/overview.md new file mode 100644 index 000000000..6e98b518b --- /dev/null +++ b/platform-enterprise/enterprise/configuration/overview.md @@ -0,0 +1,561 @@ +--- +title: "Configuration overview" +description: Overview of Seqera configuration options +date: "21 Apr 2023" +tags: [configuration] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +:::note +Nextflow Tower Enterprise is now Seqera Platform Enterprise. Existing configuration parameters, configuration files, and API endpoints that include _Tower_ currently remain unchanged. +::: + +Set Seqera configuration values using environment variables, a `tower.yml` configuration file, or individual values stored in AWS Parameter Store. Sensitive values such as database passwords should be stored securely (e.g., as SecureString type parameters in AWS Parameter Store). + + + + + Declare environment variables in a [tower.env](../_templates/docker/tower.env) file. For example: + + ```bash + TOWER_CONTACT_EMAIL=hello@foo.com + TOWER_SMTP_HOST=your.smtphost.com + ``` + + See the `Environment variables` option in each section below. + + + + + Declare YAML configuration values in a [tower.yml](../_templates/docker/tower.yml) file. For example: + + ```yml + mail: + from: "hello@foo.com" + smtp: + host: "your.smtphost.com" + ``` + + See the `tower.yml` option in each section below. YAML configuration keys on this page are listed in "dot" notation, i.e., the SMTP host value in the snippet above is represented as `mail.smtp.host` in the tables that follow. + + Don't declare duplicate keys in your `tower.yml` configuration file. Platform will only enforce the last instance of configuration keys that are defined more than once, for example: + + ```yaml + # This block will not be enforced due to the duplicate `tower` key below + tower: + trustedEmails: + - user@example.com + + # This block will be enforced because it's defined last + tower: + auth: + oidc: + - "*@foo.com" + ``` + + + + +AWS Parameter Store configuration is only supported for AWS deployments. + +Create parameters in the AWS Parameter Store individually, using the format +`/config// : `. For example: + +```bash +/config/tower-app/mail.smtp.user : +/config/tower-app/mail.smtp.password : +``` + +:::caution +The default application name is `tower-app`. To deploy multiple instances from the same Seqera Enterprise account, set a custom application name for each instance with the `micronaut.application.name` value in your `tower.yml` configuration file. +::: + +Sensitive values (such as database passwords) should be SecureString type parameters. See [AWS Parameter Store](./aws_parameter_store) for detailed instructions. + + + + +## Configuration values not supported in tower.yml or AWS Parameter Store + +Due to the order of operations when deploying Seqera Enterprise, some configuration values can only be retrieved from **environment variables** (`tower.env`). The following configuration values are not supported for `tower.yml` or AWS Parameter Store configuration and must be set as environment variables: + + + + +::table{file=configtables/req_env_vars.yml} + + + + +## Basic configuration + +Basic configuration options such as the Seqera instance server URL, application name, and license key. + + + + +::table{file=configtables/generic_config_env.yml} + + + + +YAML configuration keys in this table are listed in "dot" notation, i.e., a nested value: + +```yaml +... +mail: + smtp: + host: "your.smtphost.com" +... +``` + +is represented as `mail.smtp.host`. + +::table{file=configtables/generic_config_yml.yml} + + + + +AWS Parameter Store configuration is only supported for AWS deployments. + +Replace `{prefix}` in each configuration path with `/config/`, where `application_name` is `tower` or your custom application name. See [AWS Parameter Store](./aws_parameter_store). + +::table{file=configtables/generic_config_aws.yml} + + + + +## Seqera and Redis databases + +Configuration values that control Seqera's interaction with databases and Redis instances. `TOWER_DB_USER`, `TOWER_DB_PASSWORD`, and `TOWER_DB_URL` must be specified using environment variables during initial Seqera Enterprise deployment in a new environment. A new installation will fail if DB values are only defined in `tower.yml` or the AWS Parameter Store. Once the database has been created, these values can be added to `tower.yml` or [AWS Parameter Store](./aws_parameter_store) entries and removed from your environment variables. + +:::note +From Seqera Enterprise version 24.2: + +- Redis version 6.2 or greater is required. +- Redis version 7 is officially supported. + +Follow your cloud provider specifications to upgrade your instance. +::: + +If you use a database **other than** the provided `db` container, you must create a user and database schema manually. + + + + +```SQL +CREATE DATABASE tower; +ALTER DATABASE tower CHARACTER SET utf8 COLLATE utf8_bin; + +CREATE USER 'tower' IDENTIFIED BY ; +GRANT ALL PRIVILEGES ON tower.* TO tower@'%' ; +``` + + + + +```SQL +GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, REFERENCES, INDEX, ALTER, CREATE TEMPORARY TABLES, LOCK TABLES, EXECUTE, CREATE VIEW, SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, EVENT, TRIGGER on tower.* TO tower@'%'; +``` + + + + +### Managed Redis services + +Seqera supports managed Redis services such as [Amazon ElastiCache](https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/WhatIs.html), [Azure Cache for Redis](https://learn.microsoft.com/en-gb/azure/azure-cache-for-redis/cache-overview), or [Google Memorystore](https://cloud.google.com/memorystore/docs/redis). + +When using a managed Redis service, you must specify the service IP address or DNS name for the `TOWER_REDIS_URL` as described in the following sections. + + + + +- Use a single-node cluster, as multi-node clusters are not supported +- Use an instance with at least 6GB capacity ([cache.m4.large](https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/CacheNodes.SupportedTypes.html) or greater) +- Specify your private ElastiCache instance in the Seqera environment variables: + +```bash +TOWER_REDIS_URL=redis://:6379 +``` + + + + +- Use a single-node cluster, as multi-node clusters are not supported +- Use an instance with at least 6GB capacity ([C3](https://azure.microsoft.com/en-gb/pricing/details/cache/) or greater) +- Specify your private Azure Cache for Redis instance in the Seqera environment variables: + +```bash +TOWER_REDIS_URL=redis://:6379 +``` + + + + +- Use a single-node cluster, as multi-node clusters are not supported +- Use an instance with at least 6GB capacity ([M2](https://cloud.google.com/memorystore/docs/redis/pricing#instance_pricing) or greater) +- Specify your private Memorystore instance in the Seqera environment variables: + +```bash +TOWER_REDIS_URL=redis://:6379 +``` + + + + +If you run the Redis service as a container in your Docker or Kubernetes installation, specify the service name as part of the `TOWER_REDIS_URL`: + + ```bash + TOWER_REDIS_URL=redis://redis:6379 + ``` + + + + +### Database and Redis manual configuration + +If the DB username and password variables are left empty when using [Docker Compose](../docker-compose), default `tower` database values are applied automatically. With [Kubernetes](../kubernetes) and custom DB deployments, `tower` values are not pre-filled. + +:::note +We recommend using managed cloud database services for production deployments. +::: + + + + +::table{file=configtables/db_env.yml} + + + + +`TOWER_DB_USER`, `TOWER_DB_PASSWORD`, and `TOWER_DB_URL` must be specified using **environment variables** during initial Seqera Enterprise deployment in a new environment. + +YAML configuration keys in this table are listed in "dot" notation, i.e., a nested value: + +```yaml +... +mail: + smtp: + host: "your.smtphost.com" +... +``` + +is represented as `mail.smtp.host`. + +::table{file=configtables/db_yml.yml} + + + + +AWS Parameter Store configuration is only supported for AWS deployments. + +`TOWER_DB_USER`, `TOWER_DB_PASSWORD`, and `TOWER_DB_URL` must be specified using **environment variables** during initial Seqera Enterprise deployment in a new environment. + +Replace `{prefix}` in each configuration path with `/config/`, where `application_name` is `tower` or your custom application name. See [AWS Parameter Store](./aws_parameter_store). + +::table{file=configtables/db_aws.yml} + + + + +## Opt-in Seqera features + +Configuration values that enable opt-in Seqera features per instance or workspace. + +### Core features + + + + +::table{file=configtables/features_env.yml} + + + + +### Data features + +Configuration values used by Seqera for Data Explorer. + + + + +::table{file=configtables/data_features_env.yml} + + + + +::table{file=configtables/data_features_yml.yml} + + + + +## Cryptographic options + +Configuration values used by Seqera to encrypt your data. + +:::caution +Do not modify your crypto secret key between starts. Changing this value will prevent the decryption of existing data. +::: + + + + +::table{file=configtables/crypto_env.yml} + + + + +YAML configuration keys in this table are listed in "dot" notation, i.e., a nested value: + +```yaml +... +mail: + smtp: + host: "your.smtphost.com" +... +``` + +is represented as `mail.smtp.host`. + +::table{file=configtables/crypto_yml.yml} + + + + +AWS Parameter Store configuration is only supported for AWS deployments. + +Replace `{prefix}` in each configuration path with `/config/`, where `application_name` is `tower` or your custom application name. See [AWS Parameter Store](./aws_parameter_store). + +::table{file=configtables/crypto_aws.yml} + + + + +## Compute environments + +Configuration values to enable computing platforms and customize Batch Forge resource naming. + + + + +::table{file=configtables/compute_env.yml} + + + + +## Git integration + +Seqera Platform has built-in support for public and private Git repositories. Create [Git provider credentials](../../git/overview) to allow Seqera to interact with the following services: + +- [GitHub](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) +- [BitBucket](https://confluence.atlassian.com/bitbucketserver/personal-access-tokens-939515499.html) +- [GitLab](https://gitlab.com/profile/personal_access_tokens) +- [Gitea](https://docs.gitea.io/en-us/development/api-usage/#generating-and-listing-api-tokens) +- [Azure Repos](https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate) + +:::caution +Credentials configured in your SCM providers list override Git credentials in your (organization or personal) workspace. +::: + +Public Git repositories can be accessed without authentication, but are often subject to [throttling](https://docs.github.com/en/rest/overview/resources-in-the-rest-api?apiVersion=2022-11-28#rate-limits-for-requests-from-personal-accounts). We recommend always adding Git credentials to your Seqera workspace, regardless of the repository type you use. + + + + +Credentials and other secrets must not be hard-coded in environment variables in production environments. Credentials added using the application UI are SHA256-encrypted before secure storage and not exposed by any Seqera API. + +::table{file=configtables/git_env.yml} + + + + +Credentials and other secrets must not be stored in plain text in production environments. Credentials added using the application UI are SHA256-encrypted before secure storage and not exposed by any Seqera API. + +YAML configuration keys in this table are listed in "dot" notation, i.e., a nested value: + +```yaml +... +mail: + smtp: + host: "your.smtphost.com" +... +``` + +is represented as `mail.smtp.host`. + +::table{file=configtables/git_yml.yml} + + + + +AWS Parameter Store configuration is only supported for AWS deployments. + +Replace `{prefix}` in each configuration path with `/config/`, where `application_name` is `tower` or your custom application name. See [AWS Parameter Store](./aws_parameter_store). + +::table{file=configtables/git_aws.yml} + + + + +### Local repositories + +Seqera Enterprise can connect to workflows stored in local Git repositories. To do so, volume mount your local repository folder in your Seqera backend container. Then, update your `tower.yml`: + +```yml +tower: + pipeline: + allow-local-repos: + - /path/to/repo +``` + +## Mail server + +Configure values for SMTP email service integration. Production SMTP hosts must use a TLS-protected connection. See [SSL/TLS](../configuration/ssl_tls). + +AWS deployments also support [Amazon Simple Email Service (SES)](https://aws.amazon.com/ses/). + +### SMTP service integration + +To use an SMTP gateway for mail service, set SMTP user and password values to `null`. + +:::caution +Your organization's email security policy may prevent the `TOWER_CONTACT_EMAIL` address from receiving Seqera emails. If this occurs after successful SMTP configuration, you may need to configure `spf`, `dkim`, and `dmarc` records for your domain. Contact your IT support staff for further assistance. +::: + + + + +::table{file=configtables/mail_server_env.yml} + + + + +YAML configuration keys in this table are listed in "dot" notation, i.e., a nested value: + +```yaml +... +mail: + smtp: + host: "your.smtphost.com" +... +``` + +is represented as `mail.smtp.host`. + +::table{file=configtables/mail_server_yml.yml} + + + + +AWS Parameter Store configuration is only supported for AWS deployments. + +Replace `{prefix}` in each configuration path with `/config/`, where `application_name` is `tower` or your custom application name. See [AWS Parameter Store](./aws_parameter_store). + +::table{file=configtables/mail_server_aws.yml} + + + + +### AWS SES integration + +In AWS deployments, you can use AWS Simple Email Service (SES) instead of traditional SMTP for sending Seqera platform emails. + +:::note +Simple Email Service (SES) is only supported in Seqera deployments on AWS. +::: + +To configure AWS SES as your Seqera email service: + +1. Set `TOWER_ENABLE_AWS_SES=true` in your environment variables. +2. Specify the email address used to send Seqera emails with one of the following: + - the `TOWER_CONTACT_EMAIL` environment variable + - a `mail.from` entry in `tower.yml` + - a `/config//mail/from` AWS Parameter Store entry +3. The [AWS SES service](https://docs.aws.amazon.com/ses/index.html) must run in the same region as your Seqera instance. +4. The [Seqera IAM role](../../compute-envs/aws-batch#iam) must include the `ses:SendRawEmail` permission. + +## Nextflow launch container + +:::caution +Do not replace the [Seqera-provided default image](../../functionality_matrix/overview) unless absolutely necessary. +::: + + + + +| Environment Variable | Description | Value | +| ------------------------- | --------------------------------------------------------------------------------------------------------------- | ---------------------------------- | +| `TOWER_LAUNCH_CONTAINER` | The container image to run the Nextflow execution. This setting overrides the launch container selection for all organizations and workspaces in your account. | Example: `quay.io/seqeralabs/nf-launcher:j17-23.04.3` | + + + + +## Seqera API + +Enable the API endpoints to host the Seqera Enterprise OpenAPI specification and use the [tw CLI](https://github.com/seqeralabs/tower-cli). Set custom API rate limits and timeouts. + +:::note +To configure API rate limit environment variables, you must add `ratelim` to the `MICRONAUT_ENVIRONMENTS`. Without `ratelim` being set, the rate limit configuration variables below are ignored. +::: + + + + +| Environment variable | Description | Value | +| ---------------------- | ----------------------------------------------------------------------------- | --------------- | +| `TOWER_ENABLE_OPENAPI` | Enable the OpenAPI documentation endpoint, e.g., [cloud.seqera.io/openapi/index.html](https://cloud.seqera.io/openapi/index.html). | Default: `true` | +| `TOWER_RATELIMIT_PERIOD` | Specify the maximum number of HTTP requests that can be made during the `TOWER_RATELIMIT_REFRESH` period. | Default: `20` | +| `TOWER_RATELIMIT_REFRESH` | API rate limit refresh period. | Default: `1s` | +| `TOWER_RATELIMIT_TIMEOUT` | The waiting period before rejecting requests over the `TOWER_RATELIMIT_PERIOD` limit during the refresh period. | Default: `500ms` | + + + + +## Custom navigation menu + +Modify your Seqera instance's navigation menu options. + + + + +```yaml +tower: + navbar: + menus: + - label: "My Community" + url: "https://host.com/foo" + - label: "My Pipelines" + url: "https://other.com/bar" +``` + + + + +## Logging + +Logging-related configuration values to aid troubleshooting. See [Audit logs](../../monitoring/audit-logs) for more information on application event logging. + + + + +::table{file=configtables/tower_logging.yml} + + + + +Set the logging detail level for various Seqera services. Logs for particular services may be requested by support to assist with troubleshooting an issue. Set the logging configuration parameter in your Seqera YAML configuration before attempting to reproduce your issue. The example below sets the detail level for application and database logging: + +`logger` is a root-level object in the `tower.yml` configuration file, i.e., it is not nested under `tower`. + +```yaml +logger: + levels: + org.hibernate.SQL: DEBUG + org.hibernate.type: TRACE + io.seqera.tower: TRACE +``` + + + diff --git a/platform-enterprise/enterprise/configuration/pipeline_optimization.md b/platform-enterprise/enterprise/configuration/pipeline_optimization.md new file mode 100644 index 000000000..43d00ba69 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/pipeline_optimization.md @@ -0,0 +1,112 @@ +--- +title: "Pipeline resource optimization" +description: "Configure pipeline resource optimization in your Seqera Enterprise deployment." +date: "12 Feb 2024" +tags: [compute, resource, optimization, configuration] +--- + +[Pipeline resource optimization](../../pipeline-optimization/overview) takes the resource usage information from previous workflow runs to optimize subsequent runs. + +The pipeline resource optimization service requires a separate database schema to store its internal data, but also requires access to the Seqera schema. The Seqera and optimization service schemas can coexist on the same database instance. + +## Docker Compose deployment + +Docker Compose makes use of a separate container to set up the pipeline resource optimization service during initialization. Configuration steps differ for new and existing deployments. + +### New installation + +To use the pipeline resource optimization service in a new Docker Compose installation of Seqera Enterprise, use the following steps: + +1. To run the service from a custom URL, declare the URL with the `GROUNDSWELL_SERVER_URL` environment variable in `tower.env`. A non-zero value for this environment variable activates the optimization service automatically, so `TOWER_ENABLE_GROUNDSWELL` does not need to be set when you declare a custom URL. + +2. Set the `TOWER_ENABLE_GROUNDSWELL` environment variable in `tower.env` to `true`. This enables the service at the default service URL `http://groundswell:8090`. + +3. In your [docker-compose.yml](../_templates/docker/docker-compose.yml) file, uncomment the `groundswell` section at the bottom. + + - To create a schema for the optimization service on the same local MySQL container, uncomment the `init.sql` script in the `volumes` section. + +4. Download the [init.sql](../_templates/docker/init.sql) file. Store this file in the mount path of your `docker-compose.yml` file, else update the `source: ./init.sql` line in your `docker-compose.yml` with the file path. + +5. When the pipeline resource optimization service is active, pipelines that can be optimized display a lightbulb icon in your Launchpad. Any pipeline with at least one successful run can be optimized. + +### Existing installation + +To use the pipeline resource optimization service in an existing Docker Compose installation of Seqera Enterprise, use the following steps: + +1. To run the service from a custom URL, declare the URL with the `GROUNDSWELL_SERVER_URL` environment variable. A non-zero value for this environment variable activates the optimization service automatically, so `TOWER_ENABLE_GROUNDSWELL` does not need to be set when you declare a custom URL. + +2. Set the `TOWER_ENABLE_GROUNDSWELL` environment variable to `true`. This enables the service at the default service URL `http://groundswell:8090`. + +3. In your [docker-compose.yml](../_templates/docker/docker-compose.yml) file, uncomment the `groundswell` section at the bottom. If you use a `docker-compose.yml` file older than version 23.3, download a newer version of the file to extract the `groundswell` section. + +4. Log in to your database server and run the following commands: + + ```sql + CREATE DATABASE IF NOT EXISTS `swell`; + CREATE USER 'swell'@'%' IDENTIFIED BY 'swell'; + GRANT ALL PRIVILEGES ON *.* TO 'swell'@'%'; + FLUSH PRIVILEGES; + ``` + +5. If you use Amazon RDS or other managed database services, run the following commands in your database instance: + + ```sql + CREATE DATABASE IF NOT EXISTS `swell`; + CREATE USER 'swell'@'%' IDENTIFIED BY 'swell'; + GRANT ALL PRIVILEGES ON `%`.* TO 'swell'@'%'; + FLUSH PRIVILEGES; + ``` + +6. Download the [groundswell.env](../_templates/docker/groundswell.env) file. Store this file in the mount path of your `docker-compose.yml` file. Update the `TOWER_DB_URL` and `SWELL_DB_URL` values: + + ```env + # Uncomment for container DB instances + # TOWER_DB_URL=mysql://db:3306/tower + # SWELL_DB_URL=mysql://db:3306/swell + + # Uncomment for managed DB instances (Example URL shows an Amazon RDS instance URL) + # TOWER_DB_URL=mysql://db1.abcdefghijkl.us-east-1.rds.amazonaws.com:3306/tower + # SWELL_DB_URL=mysql://db1.abcdefghijkl.us-east-1.rds.amazonaws.com:3306/swell + ``` + +7. When the pipeline resource optimization service is active, pipelines that can be optimized display a lightbulb icon in your Launchpad. Any pipeline with at least one successful run can be optimized. + +## Kubernetes deployment + +Kubernetes deployments use an [initContainer](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) that runs during pod initialization to set up the pipeline resource optimization service. To use the service in new or existing Kubernetes installations of Seqera Enterprise, do the following: + +1. Download the [groundswell manifest](../_templates/k8s/groundswell.yml): + + ```yaml file=../_templates/k8s/groundswell.yml + ``` + +1. To run the service from a custom URL, declare the URL with the `GROUNDSWELL_SERVER_URL` environment variable in the `configmap.yml` file that you downloaded for your [Platform installation][platform-k8s]. A non-zero value for this environment variable activates the optimization service automatically, so `TOWER_ENABLE_GROUNDSWELL` does not need to be set when you declare a custom URL. + +1. Define a set of credentials for the optimization database. This can be the same database used for Seqera, but in a different schema. + +1. Log in to your database server and run the following commands: + + - If you use Amazon RDS or other managed database services, run the following commands in your database instance: + + ```sql + CREATE DATABASE IF NOT EXISTS `swell`; + CREATE USER 'swell'@'%' IDENTIFIED BY 'swell'; + GRANT ALL PRIVILEGES ON `%`.* TO 'swell'@'%'; + FLUSH PRIVILEGES; + ``` + + - If you do not use a managed database service, run the following commands in your database instance: + + ```sql + CREATE DATABASE IF NOT EXISTS `swell`; + CREATE USER 'swell'@'%' IDENTIFIED BY 'swell'; + GRANT ALL PRIVILEGES ON *.* TO 'swell'@'%'; + FLUSH PRIVILEGES; + ``` + +The initContainers process will wait until both the Seqera and pipeline resource optimization service databases are ready before starting the migration in the Seqera database and finally starting the optimization container. + +When the pipeline resource optimization service is active, pipelines that can be optimized display a lightbulb icon in your Launchpad. Any pipeline with at least one successful run can be optimized. + + +[platform-k8s]: ../kubernetes diff --git a/platform-enterprise/enterprise/configuration/reverse_proxy.md b/platform-enterprise/enterprise/configuration/reverse_proxy.md new file mode 100644 index 000000000..94c60a11f --- /dev/null +++ b/platform-enterprise/enterprise/configuration/reverse_proxy.md @@ -0,0 +1,46 @@ +--- +title: "Reverse proxy" +description: Configuration options for reverse proxy connection +date: "5 Oct 2023" +tags: [reverse-proxy, configuration] +--- + +:::caution +As of February 2024, this configuration guide is not currently recommended for production use, as the instructions are actively under development and will likely change. +::: + +To expose your Seqera instance behind a reverse proxy, complete the following steps: + +1. Use the [Seqera frontend unprivileged](../kubernetes#seqera-frontend-unprivileged) image. +2. Add `TOWER_BASE_PATH` to the environment variables of the frontend container: + - `TOWER_BASE_PATH: "/myseqera/"` exposes your instance at `https://example.com/myseqera/` (this must match your proxy configuration) +3. In the backend/cron environment variables or in the Seqera config file, edit the following environment variables: + - Set `TOWER_SERVER_URL` to the complete URL where you want to expose your instance, e.g., `TOWER_SERVER_URL: "https://example.com/myseqera"` (without the trailing slash) + - Disable/unset `TOWER_LANDING_URL` +4. Configure your reverse proxy to redirect all Seqera-related links to your Seqera frontend container: + +- If your frontend container listens on `http://tower-frontend:8080` and you're using Apache HTTP as your reverse proxy, add the following lines at the end of your configuration file (replace `/myseqera/` with the URL you defined in `TOWER_BASE_PATH`): + + ``` + LoadModule proxy_module modules/mod_proxy.so + LoadModule proxy_http_module modules/mod_proxy_http.so + LoadModule rewrite_module modules/mod_rewrite.so + + RewriteEngine on + RewriteRule "^/myseqera/(.*)$" http://tower-frontend:8080/$1 [P] + ProxyPassReverse "/myseqera/" http://tower-frontend:8080/ + RewriteRule "^/api/(.*)$" http://tower-frontend:8080/api/$1 [P] + ProxyPassReverse "/api/" http://tower-frontend:8080/api/ + RewriteRule "^/auth/(.*)$" http://tower-frontend:8080/auth/$1 [P] + ProxyPassReverse "/auth/" http://tower-frontend:8080/auth/ + RewriteRule "^/oauth/(.*)$" http://tower-frontend:8080/oauth/$1 [P] + ProxyPassReverse "/oauth/" http://tower-frontend:8080/oauth/ + RewriteRule "^/openapi/(.*)$" http://tower-frontend:8080/openapi/$1 [P] + ProxyPassReverse "/openapi/" http://tower-frontend:8080/openapi/ + RewriteRule "^/content/(.*)$" http://tower-frontend:8080/content/$1 [P] + ProxyPassReverse "/content/" http://tower-frontend:8080/content/ + ``` + +- A similar configuration should be applied for NGINX or other reverse proxies. Redirect visits to `/api/`, `/oauth/`, `/openapi/`, and `/content/`. + +After you configure the reverse proxy, the Seqera frontend URL (default `http://tower-frontend:8080`) should return a blank page. This behavior is expected, because Seqera is now configured to work only from behind the reverse proxy. diff --git a/platform-enterprise/enterprise/configuration/ssl_tls.md b/platform-enterprise/enterprise/configuration/ssl_tls.md new file mode 100644 index 000000000..944231891 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/ssl_tls.md @@ -0,0 +1,233 @@ +--- +title: "SSL/TLS" +description: Configure your Seqera instance to use SSL/TLS certificates for HTTPS +date: "21 Apr 2023" +tags: [ssl, tls, https, configuration] +--- + +HTTP must not be used in production environments. An SSL certificate is required for your Seqera instance to handle HTTPS traffic. Private certificates are supported, but require additional configuration during Seqera Enterprise installation and Nextflow execution. + +## AWS deployments: Manage SSL certificates with Amazon Certificate Manager (ACM) + +Use [Amazon Certificate Manager](https://aws.amazon.com/certificate-manager/) (ACM) to apply SSL certificates to your AWS deployment: + +- If you have an existing SSL certificate, see [Importing certificates into AWS Certificate Manager](https://docs.aws.amazon.com/acm/latest/userguide/import-certificate.html). + +- If you don't have an existing SSL certificate, see [Issuing and managing certificates](https://docs.aws.amazon.com/acm/latest/userguide/gs.html). + +## Configure Seqera to trust your private certificate + +If you secure related infrastructure (such as private Git repositories) with certificates issued by a private Certificate Authority, these certificates must be loaded into the Seqera Enterprise containers. You can achieve this in several ways. + +**Configure private certificate trust** + +1. This guide assumes you're using the original containers supplied by Seqera. +2. Replace `TARGET_HOSTNAME`, `TARGET_ALIAS`, and `PRIVATE_CERT.pem` with your unique values. +3. Previous instructions advised using `openssl`. The native `keytool` utility is preferred as it simplifies steps and better accommodates private CA certificates. + +**Use Docker volume** + +1. Retrieve the private certificate on your Seqera container host: + +``` +keytool -printcert -rfc -sslserver TARGET_HOSTNAME:443 > /PRIVATE_CERT.pem +``` + +2. Modify the `backend` and `cron` container configuration blocks in `docker-compose.yml`: + +```yaml +CONTAINER_NAME: + # -- Other keys here like `image` and `networks`-- + + # Add a new mount for the downloaded certificate + volumes: + - type: bind + source: /PRIVATE_CERT.pem + target: /etc/pki/ca-trust/source/anchors/PRIVATE_CERT.pem + + # Add a new keytool import line PRIOR to 'update-ca-trust' for the certificate + command: > + sh -c "keytool -import -trustcacerts -storepass changeit -noprompt -alias TARGET_ALIAS -file /etc/pki/ca-trust/source/anchor/TARGET_HOSTNAME.pem && + update-ca-trust && + /wait-for-it.sh db:3306 -t 60 && + /tower.sh" +``` + +**Use K8s ConfigMap** + +1. Retrieve the private certificate on a machine with CLI access to your Kubernetes cluster: + +```bash +keytool -printcert -rfc -sslserver TARGET_HOSTNAME:443 > /PRIVATE_CERT.pem +``` + +2. Load the certificate as a `ConfigMap` in the same namespace where your Seqera instance will run: + +```bash +kubectl create configmap private-cert-pemstore --from-file=/PRIVATE_CERT.pem +``` + +3. Modify both the `backend` and `cron` Deployment objects: + +- Define a new volume based on the certificate `ConfigMap`: + + ```yaml + spec: + template: + spec: + volumes: + - name: private-cert-pemstore + configMap: + name: private-cert-pemstore + ``` + +- Add a volumeMount entry into the container definition: + + ```yaml + spec: + template: + spec: + containers: + - name: CONTAINER_NAME + volumeMounts: + - name: private-cert-pemstore + mountPath: /etc/pki/ca-trust/source/anchors/PRIVATE_CERT.pem + subPath: PRIVATE_CERT.pem + ``` + +- Modify the container start command to load the certificate prior to running your Seqera instance: + + ```yaml + spec: + template: + spec: + containers: + - name: CONTAINER_NAME + command: ["/bin/sh"] + args: + - -c + - | + keytool -import -trustcacerts -cacerts -storepass changeit -noprompt -alias TARGET_ALIAS -file /PRIVATE_CERT.pem; + ./tower.sh + ``` + +**Download on Pod start** + +1. Modify both the `backend` and `cron` Deployment objects to retrieve and load the certificate prior to running your Seqera instance: + +```yaml +spec: + template: + spec: + containers: + - name: CONTAINER_NAME + command: ["/bin/sh"] + args: + - -c + - | + keytool -printcert -rfc -sslserver TARGET_HOST:443 > /PRIVATE_CERT.pem; + keytool -import -trustcacerts -cacerts -storepass changeit -noprompt -alias TARGET_ALIAS -file /PRIVATE_CERT.pem; + ./tower.sh +``` + +## Configure the Nextflow launcher image to trust your private certificate + +If you secure infrastructure such as private Git repositories or your Seqera Enterprise instance with certificates issued by a private Certificate Authority, these certificates must also be loaded into the Nextflow launcher container. + +**Import private certificates via pre-run script** + +1. This configuration assumes you're using the default `nf-launcher` image supplied by Seqera. +2. Replace `TARGET_HOSTNAME`, `TARGET_ALIAS`, and `PRIVATE_CERT.pem` with your unique values. +3. Previous instructions advised using `openssl`. The native `keytool` utility is preferred as it simplifies steps and better accommodates private CA certificates. + +Add the following to your compute environment [pre-run script](../../launch/advanced#pre-and-post-run-scripts): + +```bash +keytool -printcert -rfc -sslserver TARGET_HOSTNAME:443 > /PRIVATE_CERT.pem +keytool -import -trustcacerts -cacerts -storepass changeit -noprompt -alias TARGET_ALIAS -file /PRIVATE_CERT.pem + +cp /PRIVATE_CERT.pem /etc/pki/ca-trust/source/anchors/PRIVATE_CERT.pem +update-ca-trust +``` + +## Configure Seqera to present a SSL/TLS certificate + +You can secure your Seqera instance with a TLS certificate in several ways. + +**Load balancer (recommended)** + +Place a load balancer, configured to present a certificate and act as a TLS termination point, in front of your Seqera instance. + +This solution is likely already implemented for cloud-based Kubernetes implementations and can be easily implemented for Docker Compose-based stacks. See [this example](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/create-application-load-balancer.html). + +**Reverse proxy container** + +This solution works well for Docker Compose-based stacks to avoid the additional cost and maintenance of a load balancer. See [this example](https://doc.traefik.io/traefik/v1.7/configuration/acme/). + +**Modify `frontend` container** + +Due to complications that can be encountered during upgrades, this approach is not recommended. + +
+ Show me anyway + + This example assumes deployment on an Amazon Linux 2 AMI. + + 1. Install NGINX and other required packages: + + ```bash + sudo amazon-linux-extras install nginx1.12 + sudo wget -r --no-parent -A 'epel-release-*.rpm' https://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/e/ + sudo rpm -Uvh dl.fedoraproject.org/pub/epel/7/x86_64/Packages/e/epel-release-*.rpm + sudo yum-config-manager --enable epel* + sudo yum repolist all + sudo amazon-linux-extras install epel -y + ``` + + 2. Generate a [private certificate and key](https://www.digitalocean.com/community/tutorials/openssl-essentials-working-with-ssl-certificates-private-keys-and-csrs). + + 3. Make a local copy of the `/etc/nginx/templates/tower.conf.template` file from the `frontend` container, or create a ConfigMap to store it if you're using Kubernetes. + + 4. Replace the `listen` directives in the `server` block with the following: + + ```nginx + listen ${NGINX_LISTEN_PORT} ssl default_server; + listen [::]:${NGINX_LISTEN_PORT_IPV6} ssl default_server; + + ssl_certificate /etc/ssl/testcrt.crt; + ssl_certificate_key /etc/ssl/testkey.key; + ``` + + 5. Modify the `frontend` container definition in your `docker-compose.yml` file or Kubernetes manifest: + + ```yml + frontend: + image: cr.seqera.io/frontend:${TAG} + networks: + - frontend + environment: + NGINX_LISTEN_PORT: 8081 + NGINX_LISTEN_PORT_IPV6: 8443 + ports: + - 8000:8081 + - 443:8443 + volumes: + - $PWD/tower.conf.template:/etc/nginx/templates/tower.conf.template + - $PWD/cert/testcrt.crt:/etc/ssl/testcrt.crt + - $PWD/cert/testkey.key:/etc/ssl/testkey.key + restart: always + depends_on: + - backend + ``` + +
+ +## TLS version support + +Seqera Enterprise versions 22.3.2 and earlier rely on Java 11 (Amazon Corretto). You may encounter issues when integrating with third-party services that enforce TLS v1.2 (such as Azure Active Directory OIDC). + +TLS v1.2 can be explicitly enabled by default using JDK environment variables: + +```bash +_JAVA_OPTIONS="-Dmail.smtp.ssl.protocols=TLSv1.2" +``` diff --git a/platform-enterprise/enterprise/configuration/wave.md b/platform-enterprise/enterprise/configuration/wave.md new file mode 100644 index 000000000..50e810a27 --- /dev/null +++ b/platform-enterprise/enterprise/configuration/wave.md @@ -0,0 +1,38 @@ +--- +title: "Wave containers" +description: "Configuring the Wave container service" +date: "12 Apr 2023" +tags: [wave, containers, configuration] +--- + +From version 22.4, Seqera Platform Enterprise supports the Seqera Wave containers service for on-prem installations. + +To learn more about Wave, see [Wave containers](https://wave.seqera.io). To learn more about Wave and Nextflow integration, see the [Nextflow documentation](https://www.nextflow.io/docs/latest/wave.html). + +## Pair your Seqera instance with Wave + +To pair Seqera Enterprise with Wave, you need the following: + +- Credentials to authenticate to your (private or public) container registry configured in the Seqera UI. See the [container registry credentials](../../credentials/overview) instructions for your provider. + +- Your container registry must allow ingress from the Wave service (`https://wave.seqera.io`). + +- The Wave service (`https://wave.seqera.io`) must be accessible from the network where your Seqera instance is installed (i.e., the domain should be whitelisted in protected Seqera installations). + +- The `TOWER_ENABLE_WAVE=true` and `WAVE_SERVER_URL="https://wave.seqera.io"` environment variables must be added to your Seqera configuration environment. + +:::note +Wave does not currently support container repositories that have private CA SSL certificates applied. +::: + +You can test connectivity with the Wave service by accessing https://wave.seqera.io/service-info, either from the browser or with cURL: + +```curl +$ curl https://wave.seqera.io/service-info +``` + +When these conditions are met, the Wave feature is available on the Seqera compute environment creation page (currently only available for AWS compute environments). + +After Wave is enabled, you can use private container repositories and the Fusion file system in your Nextflow pipelines. + +Wave can also be enabled in the Nextflow pipeline config file. See the [Nextflow documentation](https://www.nextflow.io/docs/latest/wave.html) for more information. diff --git a/platform-enterprise/enterprise/docker-compose.md b/platform-enterprise/enterprise/docker-compose.md new file mode 100644 index 000000000..9134a9d43 --- /dev/null +++ b/platform-enterprise/enterprise/docker-compose.md @@ -0,0 +1,52 @@ +--- +title: "Docker Compose" +description: Deploy Seqera Platform Enterprise with Docker Compose +date: "12 Feb 2024" +tags: [docker, compose, deployment] +--- + +This guide assumes that all prerequisites have been met. Visit the corresponding **Prerequisites** page for your infrastructure provider. + +Seqera recommends configuring your database or Redis details in either `tower.yml` or `docker-compose.yml`, but not both. + +:::note +The DB or Redis volume is persistent after a Docker restart by default. Use the `volumes` key in the `db` or `redis` section of your `docker-compose.yml` file to specify a local path to the DB or Redis instance. For your database or Redis volume to be ephemeral, remove the `volumes` key altogether. +::: + +## Deploy Seqera Enterprise + +1. Download and configure [tower.env](_templates/docker/tower.env). See [Configuration](../configuration/overview#basic-configuration) for detailed instructions. + +2. Download and configure [tower.yml](_templates/docker/tower.yml). See [Configuration](../configuration/overview#basic-configuration) for detailed instructions. + +3. Download and configure the [docker-compose.yml](_templates/docker/docker-compose.yml) file: + + - The `db` container should be used only for local testing. If you have configured this service elsewhere, you can remove this container. + + - To configure the Seqera pipeline resource optimization service (`groundswell`), see [Pipeline resource optimization](./configuration/pipeline_optimization). + + - To deploy with Studios, see [Studios deployment](./studios). + +4. Deploy the application and wait for it to initialize (this process takes a few minutes): + + ```bash + docker compose up + ``` + +5. [Test](./testing) the application by running an nf-core pipeline with a test profile. + +6. After you've confirmed that Seqera Enterprise is correctly configured and you can launch workflows, run `docker compose up -d` to deploy the application as a background process. You can then disconnect from the VM instance. + +:::note +For more information on configuration, see [Configuration options](./configuration/overview). +::: + +## Optional features + +### Studios + +[Studios](../studios/overview) is an interactive analysis environment available in organizational workspaces. To enable Studios, see [Studios deployment](./studios). + +:::note +Studios is available from Seqera Platform v24.1. If you experience any problems during the deployment process please contact your account executive. Studios in Enterprise is not installed by default. +::: diff --git a/platform-enterprise/enterprise/general_troubleshooting.md b/platform-enterprise/enterprise/general_troubleshooting.md new file mode 100644 index 000000000..b4c9d8e53 --- /dev/null +++ b/platform-enterprise/enterprise/general_troubleshooting.md @@ -0,0 +1,255 @@ +--- +title: "Troubleshooting" +description: Platform Enterprise troubleshooting guidance +date: "21 Apr 2023" +tags: [troubleshooting] +--- + +## Networking + +**503 errors during pipeline execution** + +Error 503 suggests that one or more of the services being contacted by Seqera Enterprise as part of workflow execution are unavailable. Ensure all required services are running and available. [Database](./configuration/overview#seqera-and-redis-databases) connectivity is often the culprit for `503` errors. + +**_SocketTimeoutException: connect timed out_ errors with self-hosted Git servers** + +You may encounter connection timeout issues while trying to launch workflows from a self-hosted Git server (BitBucket, GitLab, etc.). If you configured the correct Git credentials in Seqera Enterprise, this error signals that the `backend/cron` container cannot connect to the Git remote host. This can be caused by a missing or incorrect proxy configuration. + +
+ Error log + + ```bash + + ERROR i.s.t.c.GlobalErrorController - Unexpected error while processing - Error ID: 6h3HBUkaPe03vgzoDPc5HO + java.net.SocketTimeoutException: connect timed out + at java.base/java.net.PlainSocketImpl.socketConnect(Native Method) + at java.base/java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:399) + at java.base/java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:242) + at java.base/java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:224) + at java.base/java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) + at java.base/java.net.Socket.connect(Socket.java:609) + at java.base/sun.security.ssl.SSLSocketImpl.connect(SSLSocketImpl.java:289) + at java.base/sun.net.NetworkClient.doConnect(NetworkClient.java:177) + at java.base/sun.net.www.http.HttpClient.openServer(HttpClient.java:474) + at java.base/sun.net.www.http.HttpClient.openServer(HttpClient.java:569) + at java.base/sun.net.www.protocol.https.HttpsClient.(HttpsClient.java:265) + at java.base/sun.net.www.protocol.https.HttpsClient.New(HttpsClient.java:372) + at java.base/sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.getNewHttpClient(AbstractDelegateHttpsURLConnection.java:203) + at java.base/sun.net.www.protocol.http.HttpURLConnection.plainConnect0(HttpURLConnection.java:1187) + at java.base/sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:1081) + at java.base/sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(AbstractDelegateHttpsURLConnection.java:189) + at java.base/sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1592) + at java.base/sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1520) + at java.base/java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:527) + at java.base/sun.net.www.protocol.https.HttpsURLConnectionImpl.getResponseCode(HttpsURLConnectionImpl.java:334) + at nextflow.scm.RepositoryProvider.checkResponse(RepositoryProvider.groovy:167) + at nextflow.scm.RepositoryProvider.invoke(RepositoryProvider.groovy:136) + at nextflow.scm.RepositoryProvider.memoizedMethodPriv$invokeAndParseResponseString(RepositoryProvider.groovy:218) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) + at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) + at java.base/java.lang.reflect.Method.invoke(Method.java:566) + at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:107) + at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:323) + at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1259) + at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1026) + at org.codehaus.groovy.runtime.InvokerHelper.invokePogoMethod(InvokerHelper.java:1029) + at org.codehaus.groovy.runtime.InvokerHelper.invokeMethod(InvokerHelper.java:1012) + at org.codehaus.groovy.runtime.InvokerHelper.invokeMethodSafe(InvokerHelper.java:101) + at nextflow.scm.RepositoryProvider$_closure2.doCall(RepositoryProvider.groovy) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) + at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) + at java.base/java.lang.reflect.Method.invoke(Method.java:566) + at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:107) + at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:323) + at org.codehaus.groovy.runtime.metaclass.ClosureMetaClass.invokeMethod(ClosureMetaClass.java:263) + at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1026) + at groovy.lang.Closure.call(Closure.java:412) + at org.codehaus.groovy.runtime.memoize.Memoize$MemoizeFunction.lambda$call$0(Memoize.java:137) + at org.codehaus.groovy.runtime.memoize.ConcurrentCommonCache.getAndPut(ConcurrentCommonCache.java:137) + at org.codehaus.groovy.runtime.memoize.ConcurrentCommonCache.getAndPut(ConcurrentCommonCache.java:113) + at org.codehaus.groovy.runtime.memoize.Memoize$MemoizeFunction.call(Memoize.java:136) + at groovy.lang.Closure.call(Closure.java:428) + at nextflow.scm.RepositoryProvider.invokeAndParseResponse(RepositoryProvider.groovy) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) + at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) + at java.base/java.lang.reflect.Method.invoke(Method.java:566) + at org.codehaus.groovy.runtime.callsite.PlainObjectMetaMethodSite.doInvoke(PlainObjectMetaMethodSite.java:43) + at org.codehaus.groovy.runtime.callsite.PogoMetaMethodSite$PogoCachedMethodSiteNoUnwrapNoCoerce.invoke(PogoMetaMethodSite.java:193) + at org.codehaus.groovy.runtime.callsite.PogoMetaMethodSite.callCurrent(PogoMetaMethodSite.java:61) + at org.codehaus.groovy.runtime.callsite.AbstractCallSite.callCurrent(AbstractCallSite.java:185) + at nextflow.scm.BitbucketRepositoryProvider.getCloneUrl(BitbucketRepositoryProvider.groovy:114) + at nextflow.scm.AssetManager.memoizedMethodPriv$getGitRepositoryUrl(AssetManager.groovy:394) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) + at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) + at java.base/java.lang.reflect.Method.invoke(Method.java:566) + at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:107) + at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:323) + at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1259) + at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1026) + at org.codehaus.groovy.runtime.InvokerHelper.invokePogoMethod(InvokerHelper.java:1029) + at org.codehaus.groovy.runtime.InvokerHelper.invokeMethod(InvokerHelper.java:1012) + at org.codehaus.groovy.runtime.InvokerHelper.invokeMethodSafe(InvokerHelper.java:101) + at nextflow.scm.AssetManager$_closure1.doCall(AssetManager.groovy) + at nextflow.scm.AssetManager$_closure1.doCall(AssetManager.groovy) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) + at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) + at java.base/java.lang.reflect.Method.invoke(Method.java:566) + at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:107) + at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:323) + at org.codehaus.groovy.runtime.metaclass.ClosureMetaClass.invokeMethod(ClosureMetaClass.java:263) + at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1026) + at groovy.lang.Closure.call(Closure.java:412) + at org.codehaus.groovy.runtime.memoize.Memoize$MemoizeFunction.lambda$call$0(Memoize.java:137) + at org.codehaus.groovy.runtime.memoize.ConcurrentCommonCache.getAndPut(ConcurrentCommonCache.java:137) + at org.codehaus.groovy.runtime.memoize.ConcurrentCommonCache.getAndPut(ConcurrentCommonCache.java:113) + at org.codehaus.groovy.runtime.memoize.Memoize$MemoizeFunction.call(Memoize.java:136) + at groovy.lang.Closure.call(Closure.java:406) + at nextflow.scm.AssetManager.getGitRepositoryUrl(AssetManager.groovy) + + ``` + +
+ +Update the HTTP proxy configuration in the `backend` and `cron` environment with your proxy details: + + ```bash + export http_proxy="http://PROXY_SERVER:PORT" + export https_proxy="https://PROXY_SERVER:PORT" + ``` + +## Database + +**Login failures: _java.sql.SQLException_ in the backend logs** + +After Seqera login authentication, an _Unexpected error while processing_ error is presented, with _java.sql.SQLException_ errors related to server time zone in the backend log: + +
+ Error log + + ``` + io.micronaut.transaction.exceptions.CannotCreateTransactionException: Could not open Hibernate Session for transaction + … + Caused by: org.hibernate.exception.GenericJDBCException: Unable to acquire JDBC Connection + … + java.sql.SQLException: The server time zone value 'CEST' is unrecognized or represents more than one time zone. You must configure either the server or JDBC driver (via the 'serverTimezone' configuration property) to use a more specific time zone value if you want to utilize time zone support. + … + ``` + +
+ +This error means that Seqera is unable to connect to the database and the `JDBC` client must specify the time zone value via `serverTimezone`. + +To resolve this issue for `Europe/Amsterdam` time zone, append `serverTimezone` to the value of [`TOWER_DB_URL`](./configuration/overview#seqera-and-redis-databases): + +```bash +export TOWER_DB_URL": "jdbc:mysql://:3306/tower?serverTimezone=Europe/Amsterdam" +``` + +**_java.io.IOException: Unsupported protocol version 252_ error while completed or terminated runs display as in progress on Seqera** + +When a service is restarted or otherwise interrupted, this may result in invalid entries which corrupt the cache of your installation's Redis instance. Manually delete the key with the invalid entry to restore expected behavior (replace `container-name` with your container name in the commands below): + +```bash +## Check if the key exists +docker exec -ti [container-name] redis-cli keys \* | grep workflow + +## Show the hash contents of the key +docker exec -ti [container-name] redis-cli hgetall "workflow/modified" + +## Delete the key +docker exec -ti [container-name] redis-cli del "workflow/modified" +``` + +## Authentication + +**Login failures with OpenID Connect (OIDC): 500 error code in the frontend logs** + +When using OpenID Connect, the callback request may contain large HTTP headers that exceed buffer size, causing login failures. + +```bash + + *8317 upstream sent too big header while reading response header from upstream, client: 10.170.157.186, server: localhost, request: "GET /oauth/callback + +``` + +Rebuild the frontend container and add the following proxy directives to the `/etc/nginx/nginx.conf` file: + +```conf +proxy_buffer_size 128k; +proxy_buffers 4 256k; +proxy_busy_buffers_size 256k; +``` + +**OIDC callback failure** + +Callbacks could fail for many reasons. To investigate the problem: + +- Set the authentication logging level environment variable to `TOWER_SECURITY_LOGLEVEL=DEBUG`. +- Ensure your `TOWER_OIDC_CLIENT`, `TOWER_OIDC_SECRET`, and `TOWER_OIDC_ISSUER` environment variables all match the values specified in your OIDC provider application. +- Ensure your network infrastructure allows the necessary egress and ingress traffic. + +**OIDC `redirect_url` set to HTTP instead of HTTPS** + +This can occur for several reasons. Verify the following: + +- Your `TOWER_SERVER_URL` environment variable uses the `https://` prefix. +- Your `tower.yml` has `micronaut.ssl.enabled` set to `true`. +- Any Load Balancer instance that sends traffic to Seqera Enterprise is configured to use HTTPS as its backend protocol rather than HTTP/TCP. + +**On-prem HPC compute environments: _Exhausted available authentication methods_ error** + +This error points to an issue with the SSH credentials used to authenticate Seqera to your HPC cluster (LSF, Slurm, etc.), such as an invalid SSH key or inappropriate permissions on the user directory. Check the following: + +- Ensure the SSH key is still valid. If not, create new SSH keys and [re-create the compute environment](../compute-envs/hpc) in Seqera with the updated credentials. + +- Check the backend logs for a stack trace similar to the following: + +
+ Error log + + ``` + [io-executor-thread-2] 10.42.0.1 ERROR i.s.t.c.GlobalErrorController - Unexpected error while processing - Error ID: 5d7rDpS8pByF8YqfUVPvB4 + net.schmizz.sshj.userauth.UserAuthException: Exhausted available authentication methods + at net.schmizz.sshj.SSHClient.auth(SSHClient.java:227) + at net.schmizz.sshj.SSHClient.authPublickey(SSHClient.java:342) + at net.schmizz.sshj.SSHClient.authPublickey(SSHClient.java:360) + at io.seqera.tower.service.platform.ssh.SSHClientFactory.createClient(SSHClientFactory.groovy:110) + .. + .. + Caused by: net.schmizz.sshj.userauth.UserAuthException: Problem getting public key from PKCS5KeyFile{resource=[PrivateKeyStringResource]} + at net.schmizz.sshj.userauth.method.KeyedAuthMethod.putPubKey(KeyedAuthMethod.java:47) + at net.schmizz.sshj.userauth.method.AuthPublickey.buildReq(AuthPublickey.java:62) + at net.schmizz.sshj.userauth.method.AuthPublickey.buildReq(AuthPublickey.java:81) + at net.schmizz.sshj.userauth.method.AbstractAuthMethod.request(AbstractAuthMethod.java:68) + at net.schmizz.sshj.userauth.UserAuthImpl.authenticate(UserAuthImpl.java:73) + at net.schmizz.sshj.SSHClient.auth(SSHClient.java:221) + ... 91 common frames omitted + Caused by: net.schmizz.sshj.userauth.keyprovider.PKCS5KeyFile$FormatException: Length mismatch: 1152 != 1191 + at net.schmizz.sshj.userauth.keyprovider.PKCS5KeyFile$ASN1Data.(PKCS5KeyFile.java:248) + ``` + +
+ +- Enable SSH library log tracing with the following environment variable in your `tower.env` file for verbose debug logging of the SSH connection: + + ```env + TOWER_SSH_LOGLEVEL=TRACE + ``` + +- Check the permissions of the `/home` directory of the user tied to the cluster's SSH credentials. `/home/[user]` should be `chmod 755`, whereas `/home/[user]/ssh` requires `chmod 700`: + + ```bash + $ pwd ; ls -ld . + /home/user + drwxr-xr-x 41 user user 20480 + + $ pwd; ls -ld . + /home/user/.ssh + drwx------ 2 user user 4096 + + ``` diff --git a/platform-enterprise/enterprise/kubernetes.md b/platform-enterprise/enterprise/kubernetes.md new file mode 100644 index 000000000..7d10996c1 --- /dev/null +++ b/platform-enterprise/enterprise/kubernetes.md @@ -0,0 +1,300 @@ +--- +title: "Kubernetes" +description: Deploy Seqera Platform Enterprise with Kubernetes +date: "21 Apr 2023" +tags: [kubernetes, deployment] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +This guide assumes that all prerequisites have been met. Visit the corresponding **Prerequisites** page for your infrastructure provider. + +Complete the following procedures to install Seqera Platform Enterprise on a Kubernetes cluster: + +### Create a namespace + +Create a namespace to isolate Kubernetes resources used by Seqera Platform from the other resources on your cluster. + +:::note +This installation guide assumes the use of `seqera-platform` as the installation namespace. Consider using a different one that better fits your cluster naming convention. +::: + +Create a namespace for the Seqera resources: + + ```bash + kubectl create namespace seqera-platform + ``` + +Switch to the namespace: + + ```bash + kubectl config set-context --current --namespace=seqera-platform + ``` + +### Configure container registry credentials + +Seqera Enterprise is distributed as a collection of Docker containers available through the Seqera container registry [`cr.seqera.io`](https://cr.seqera.io). Contact [support](https://support.seqera.io) to get your container access credentials. After you've received your credentials, grant your cluster access to the registry: + +1. Retrieve the `name` and `secret` values from the JSON file that you received from Seqera support. + +1. Create a [secret][kubectl-secret]: + + ```bash + kubectl create secret docker-registry cr.seqera.io \ + --docker-server=cr.seqera.io \ + --docker-username='' \ + --docker-password='' + ``` + + The credential `name` contains a dollar `$` character. Wrap the name in single quotes to prevent the Linux shell from interpreting this value as an environment variable. + +1. Configure the Seqera cron service and the application frontend and backend to use the secret created in the previous step (see [tower-cron.yml](./_templates/k8s/tower-cron.yml) and [tower-svc.yml](./_templates/k8s/tower-svc.yml)): + + ```yaml + imagePullSecrets: + - name: "cr.seqera.io" + ``` + + This parameter is already included in the templates linked above. If you use a name other than `cr.seqera.io` for the secret, update this value accordingly in the configuration files. + +### Seqera ConfigMap + +Download and configure a [ConfigMap](_templates/k8s/configmap.yml). See [Configuration](./configuration/overview) for more information. + +Deploy the ConfigMap to your cluster after it is configured: + + ```bash + kubectl apply -f configmap.yml + ``` + +:::note +The `configmap.yml` manifest includes both the `tower.env` and `tower.yml` files. These files are made available to the other containers through volume mounts. +::: + +### Redis + +Seqera Enterprise requires a Redis database for caching purposes. Configure Redis manually by deploying a manifest to your cluster, or configure a managed Redis service. + +#### Deploy a Redis manifest to your cluster + +1. Download the appropriate manifest for your infrastructure: + + - [Amazon EKS](_templates/k8s/redis.eks.yml) + - [Azure AKS](_templates/k8s/redis.aks.yml) + - [Google Kubernetes Engine](_templates/k8s/redis.gke.yml) + +1. Deploy to your cluster: + + ```bash + kubectl apply -f redis.*.yml + ``` + +1. To run the Redis service as a container as part of your Docker or Kubernetes installation, specify the service name as part of the `TOWER_REDIS_URL`: + + ```bash + TOWER_REDIS_URL=redis://redis:6379 + ``` + +#### Managed Redis services + +Seqera supports managed Redis services such as [Amazon ElastiCache][aws-elasticache], [Azure Cache for Redis][azure-cache], or [Google Memorystore][memorystore]. + + + + +- Use a single-node cluster, as multi-node clusters are not supported +- Use an instance with at least 6GB capacity ([cache.m4.large][aws-cache-instances] or greater) +- Specify your private ElastiCache instance in the Seqera [environment variables](./configuration/overview#database-and-redis-manual-configuration): + + ```bash + TOWER_REDIS_URL=redis://:6379 + ``` + + + + +- Use a single-node cluster, as multi-node clusters are not supported +- Use an instance with at least 6GB capacity ([C3][azure-cache-instances] or greater) +- Specify your private Azure Cache for Redis instance in the Seqera [environment variables](./configuration/overview#database-and-redis-manual-configuration): + + ```bash + TOWER_REDIS_URL=redis://:6379 + ``` + + + + +- Use a single-node cluster, as multi-node clusters are not supported +- Use an instance with at least 6GB capacity ([M2][google-cache-instances] or greater) +- Specify your private Memorystore instance in the Seqera [environment variables](./configuration/overview#database-and-redis-manual-configuration): + + ```bash + TOWER_REDIS_URL=redis://:6379 + ``` + + + + +### Seqera cron service + +Download the [cron service manifest](_templates/k8s/tower-cron.yml) file. + +To deploy the manifest to your cluster, run the following: + + ```bash + kubectl apply -f tower-cron.yml + ``` + +:::caution +This container creates the required database schema the first time it instantiates. This process can take a few minutes to complete and must finish before you instantiate the Seqera backend. Ensure this container is in the `READY` state before proceeding to the next step. +::: + +### Seqera frontend and backend + +Download the [manifest](_templates/k8s/tower-svc.yml). + +To deploy the manifest to your cluster, run the following: + + ```bash + kubectl apply -f tower-svc.yml + ``` + +#### Seqera frontend unprivileged + +An unprivileged version of the Seqera frontend image is also available. This image listens on an unprivileged port and therefore doesn't need to be run as the root user. + +Replace the tag of the frontend image `cr.seqera.io/private/nf-tower-enterprise/frontend:v24.x.x` with `cr.seqera.io/private/nf-tower-enterprise/frontend:v24.x.x-unprivileged`. In the `frontend` service below, specify the `targetPort` to match the environment variable `NGINX_LISTEN_PORT` (see below): + +```yaml +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: frontend + labels: + app: frontend +spec: + ... + containers: + - name: frontend + image: cr.seqera.io/private/nf-tower-enterprise/frontend:v25.1.1-unprivileged + env: + - name: NGINX_LISTEN_PORT # If not defined, defaults to 8000. + value: 8000 +--- +apiVersion: v1 +kind: Service +metadata: + name: frontend +spec: + ports: + - port: 80 + targetPort: 8000 +``` + +The unprivileged Seqera image will soon deprecate the current image that requires root. The unprivileged image can be easily customized using environment variables: + +- `NGINX_LISTEN_PORT` (default `8000`): The port the NGINX process will listen on + inside the container. The `targetPort` on the `frontend` service must match the value + defined in the environment variable. +- `NGINX_LISTEN_PORT_IPV6` (default `8000`): The NGINX listening port to open on the IPv6 address. +- `NGINX_UPSTREAM_HOST` (default `backend`): The hostname of the backend service to which the NGINX process will route requests. +- `NGINX_UPSTREAM_PORT` (default `8080`): The port where the backend service is exposed. + +If further customization of the config file is needed, mount a config map/secret over the templated NGINX configuration file at `/etc/nginx/templates/tower.conf.template`. See [SSL/TLS](./configuration/ssl_tls#configure-seqera-to-present-a-ssltls-certificate) for an example. + +### Seqera ingress + +An ingress is used to make Seqera Enterprise publicly accessible, load-balance traffic, terminate TLS, and offer name-based virtual hosting. The included ingress manifest will create an external IP address and forward HTTP traffic to the Seqera frontend. + +Download and configure the appropriate manifest for your infrastructure: + + - [Amazon EKS](_templates/k8s/ingress.eks.yml) + - [Azure AKS](_templates/k8s/ingress.aks.yml) + - [Google Kubernetes Engine](_templates/k8s/ingress.gke.yml) + +To deploy the manifest to your cluster, run the following: + + ```bash + kubectl apply -f ingress.*.yml + ``` + +See [Kubernetes ingress][k8s-ingress] for more information. If you don't need to make Seqera externally accessible, use a service resource to expose a [node port][k8s-node-port] or a [load balancer][k8s-load-balancer] service to make it accessible within your intranet. + +See the cloud provider documentation for configuring an ingress service on each cloud provider: + +- [Amazon][aws-configure-ingress] +- [Azure][azure-configure-ingress] +- [Google Cloud][google-configure-ingress] + +### Check status + +Check that all services are up and running: + +```bash +kubectl get pods +``` + +### Test the application + +See [Test deployment](./testing). + +## Optional features + +### Pipeline optimization + +Seqera Platform offers a service that optimizes pipeline resource requests. Install the resource optimization service in your Kubernetes cluster with [this manifest](_templates/k8s/groundswell.yml). + +Define a set of credentials for the resource optimization database in the `tower-groundswell-cfg` ConfigMap. This can be the same database used for Seqera, but in a different schema. + +The initContainers will wait until both the Seqera and pipeline optimization service databases are ready before starting the migration in the Seqera database and finally starting the resource optimization container. + +### Studios + +[Studios](../studios/index) is an interactive analysis environment available in organizational workspaces. To enable Studios, see [Studios deployment](./studios). + +:::note +Studios is available from Seqera Platform v24.1. If you experience any problems during the deployment process [contact Seqera support](https://support.seqera.io). Studios in Enterprise is not installed by default. +::: + +### Database console + +Use the [dbconsole.yml](_templates/k8s/dbconsole.yml) manifest to deploy a simple web frontend to the Seqera database. Though not required, this can be useful for administrative purposes. + +1. Deploy the database console: + + ```bash + kubectl apply -f dbconsole.yml + ``` + +1. Enable a port-forward for the database console to your local machine: + + ```bash + kubectl port-forward deployment/dbconsole 8080:8080 + ``` + +1. Access the database console in a web browser at `http://localhost:8080`. + +### High availability + +To configure Seqera Enterprise for high availability, note that: + +- The `backend` service can be run in multiple replicas +- The `frontend` service is replicable, however in most scenarios it is not necessary +- The `cron` service may only have a single instance +- The `groundswell` service may only have a single instance + +[aws-cache-instances]: https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/CacheNodes.SupportedTypes.html +[aws-configure-ingress]: https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.2/guide/ingress/annotations/ +[aws-elasticache]: https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/WhatIs.html +[azure-cache]: https://learn.microsoft.com/en-gb/azure/azure-cache-for-redis/cache-overview +[azure-cache-instances]: https://azure.microsoft.com/en-gb/pricing/details/cache/ +[azure-configure-ingress]: https://docs.microsoft.com/en-us/azure/application-gateway/ingress-controller-annotations +[google-cache-instances]: https://cloud.google.com/memorystore/docs/redis/pricing#instance_pricing +[google-configure-ingress]: https://cloud.google.com/kubernetes-engine/docs/concepts/ingress +[k8s-ingress]: https://kubernetes.io/docs/concepts/services-networking/ingress/ +[k8s-load-balancer]: https://kubernetes.io/docs/concepts/services-networking/service/#loadbalancer +[k8s-node-port]: https://kubernetes.io/docs/concepts/services-networking/service/#nodeport +[kubectl-secret]: https://kubernetes.io/docs/tasks/configmap-secret/managing-secret-using-kubectl/ +[memorystore]: https://cloud.google.com/memorystore/docs/redis diff --git a/platform-enterprise/enterprise/overview.md b/platform-enterprise/enterprise/overview.md new file mode 100644 index 000000000..33f153946 --- /dev/null +++ b/platform-enterprise/enterprise/overview.md @@ -0,0 +1,84 @@ +--- +title: "Enterprise installation" +description: Platform Enterprise installation overview +date: "9 April 2025" +tags: [installation, deployment] +--- + +:::tip +Seqera Enterprise requires a license. If you have not already purchased a license, [contact us](https://seqera.io/contact-us/) for more information. +::: + +Seqera Platform Enterprise is a web application with a microservice-oriented architecture that is designed to maximize portability, scalability, and security. It's composed of several modules that are configured and deployed according to your organizational requirements. Seqera provides these modules as Docker container images that are securely hosted on a private container registry. + +## Architecture + +![Platform architecture diagram](./_images/seqera_reference_architecture.png) + +### Platform backend + +The Seqera backend is a JVM-based web application based on the [Micronaut](https://micronaut.io/) framework, which provides a modern and secure backbone for the application. The backend implements the main application logic, which is exposed via a REST API and defined with an OpenAPI schema. The backend uses JPA, Hibernate, and JDBC API industry standards to interact with the underlying relational database. + +The backend can be run standalone or as multiple replicas for scalability when deployed in high-availability mode. It should run on port `8080`. + +### Platform cron + +Cron is an auxiliary backend service that executes regularly-occurring activities, such as sending email notifications and cleaning up stale data. The cron service also performs database migrations at startup. + +### Platform frontend + +The Seqera frontend is an NGINX web server that serves the [Angular](https://angular.io/) application and reverse-proxies HTTP traffic to the backend. The frontend should run on port `80` within the container and should be the only service that accepts incoming HTTP traffic. The frontend can also be exposed via HTTPS or a load balancer. + +### Redis database + +Seqera Enterprise requires a Redis database for caching purposes. + +### SQL database + +Seqera requires a SQL database to persist user activities and state. The application has been tested against MySQL 8.0. [Contact Seqera support](https://support.seqera.io) if you need to use a different JDBC-compliant SQL database. + +### SMTP service + +Seqera requires an SMTP relay to send email messages and user notifications. + +### Authentication service (optional) + +Seqera supports enterprise authentication mechanisms such as OAuth and OpenID. Third-party identity providers and custom single sign-on flows can be developed according to specific customer requirements. + +## Deployment options + +Seqera can be deployed to a single node, either with [Docker Compose](./docker-compose) or natively, or to a [Kubernetes](./kubernetes) cluster. This documentation includes instructions for both options across multiple platforms, including Amazon AWS, Microsoft Azure, Google Cloud, and on-prem infrastructure. + +### Single-node + +The minimal Seqera Enterprise deployment requires only the frontend, backend, and database services. These services can be deployed as Docker containers or as native services. + +### Kubernetes + +Kubernetes is emerging as the technology of choice for deploying applications that require high-availability, scalability, and security. Seqera Enterprise includes configuration manifests for Kubernetes deployment. + +![](./_images/seqera_reference_architecture_aws.png) +_Reference architecture diagram of Seqera Platform Enterprise on AWS using Elastic Kubernetes Service (EKS)_ + +## Application container images + +Seqera Enterprise is distributed as a collection of Docker containers available through the Seqera container registry [`cr.seqera.io`](https://cr.seqera.io). Contact [support](https://support.seqera.io) to get your container access credentials. When you've received your credentials, retrieve the application container images with these steps: + +1. Retrieve the `name` and `secret` values from the JSON file you received from Seqera support. +2. Authenticate to the registry by using the `name` and `secret` values copied in the previous step: + + ```bash + docker login -u '' -p '' cr.seqera.io + ``` + +3. Pull the application container images: + + ```bash + docker pull cr.seqera.io/private/nf-tower-enterprise/backend:v25.1.1 + + docker pull cr.seqera.io/private/nf-tower-enterprise/frontend:v25.1.1 + ``` + +## Support + +For further information, [contact Seqera support](https://support.seqera.io). diff --git a/platform-enterprise/enterprise/prerequisites/aws.md b/platform-enterprise/enterprise/prerequisites/aws.md new file mode 100644 index 000000000..9f8715734 --- /dev/null +++ b/platform-enterprise/enterprise/prerequisites/aws.md @@ -0,0 +1,190 @@ +--- +title: "AWS" +description: Prerequisites for AWS deployments +date: "12 Apr 2023" +tags: [aws, prerequisites, configuration, ec2, ses, rds] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +This page describes the infrastructure and other prerequisites for deploying Seqera Platform Enterprise on Amazon Web Services (AWS). + +Run the Seqera container with [Docker](../docker-compose) on an AWS EC2 instance, or with [Kubernetes](../kubernetes) on an Amazon EKS cluster. You must satisfy the requirements for your installation target: + +- **SMTP server**: If you don't have an email server, use [Amazon Simple Email Service](https://aws.amazon.com/ses/). + + :::note + Amazon [blocks EC2 traffic over port 25 by default](https://aws.amazon.com/premiumsupport/knowledge-center/ec2-port-25-throttle/). Your integration must use a port that can successfully reach your SMTP server. + ::: + +- **MySQL database**: An external database, such as one provided by [Amazon Relational Database Service](https://aws.amazon.com/rds/), is highly recommended for production deployments. + +- **(Optional) SSL certificate**: HTTP must not be used in production environments. An SSL certificate is required for your Seqera instance to handle HTTPS traffic. See [SSL/TLS configuration](../configuration/ssl_tls#aws-deployments-manage-ssl-certificates-with-amazon-certificate-manager-acm) for more information. + + :::note + HTTP-only implementations **must** set the `TOWER_ENABLE_UNSAFE_MODE=true` environment variable in the Seqera hosting infrastructure to enable user login. HTTP must not be used in production environments. + ::: + +- **(Optional) AWS Parameter Store**: Store sensitive Seqera configuration values as SecureString [AWS Parameter Store](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-parameter-store.html) parameters. See [AWS Parameter Store configuration](../configuration/aws_parameter_store) for instructions. This is recommended for production environments. + +- **(Optional) DNS**: DNS is required to support human-readable domain names and load-balanced traffic. If you don't have access to a pre-existing DNS service, use [Amazon Route 53](https://docs.aws.amazon.com/Route53/latest/DeveloperGuide/Welcome.html). + +### Prerequisites for Docker + +An [EC2](https://aws.amazon.com/ec2/) instance is required. See [Amazon EC2](#amazon-ec2) for instructions to provision an EC2 instance for this purpose. + +### Prerequisites for EKS + +If you're installing Seqera Enterprise with Kubernetes, an [Elastic Kubernetes Service (EKS)](https://docs.aws.amazon.com/eks/latest/userguide/getting-started.html) cluster is required. See the [EKS documentation](https://docs.aws.amazon.com/eks/latest/userguide/create-cluster.html) to provision your own cluster. + +
+ EKS cluster requirements + + - Kubernetes 1.19 or later + + - **Subnet requirements** + + - At least 2 subnets across two different Availability Zones + - Subnets must be tagged for [AWS Load Balancer Controller auto-discovery](https://docs.aws.amazon.com/eks/latest/userguide/network_reqs.html) + - Public subnets must be configured to [auto-assign IPs on launch](https://aws.amazon.com/blogs/containers/upcoming-changes-to-ip-assignment-for-eks-managed-node-groups/) + - Public and private subnets must allow egress traffic to the public internet + + - **RBAC requirements** + + - The cluster must be created by a non-root user + - `aws-auth` must be updated to [allow access to additional IAM users/roles](https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html) (if needed) + + - **Addons** + + - Install the [cert-manager](https://cert-manager.io/docs/) + - Install the [AWS Load Balancer Controller](https://docs.aws.amazon.com/eks/latest/userguide/aws-load-balancer-controller.html) + + - **Ingress** + + - ALB provisioning via the [AWS Load Balancer Controller](https://docs.aws.amazon.com/eks/latest/userguide/aws-load-balancer-controller.html) + - ALB integration with the [Amazon Certificate Manager](https://aws.amazon.com/certificate-manager/) + + Additionally, the ingress assumes the presence of SSL certificates, DNS resolution, and ALB logging. If you've chosen not to use some or all of these features, you'll need to modify the manifest accordingly before applying it to the cluster. + +
+ +## AWS setup + +Set up commonly-used AWS services for Seqera deployment. + +### Fetch Seqera config values from AWS Parameter Store + +From version 23.1, you can retrieve Seqera Enterprise configuration values remotely from the AWS Parameter Store. See [AWS Parameter Store configuration](../configuration/aws_parameter_store) for instructions. + +### Amazon SES + +Seqera Enterprise supports AWS Simple Email Service (SES) as an alternative to traditional SMTP servers for sending application emails. + +:::caution +If you use AWS SES in sandbox mode, both the _sender_ and the _receiver_ email addresses must be verified via AWS SES. Sandbox is not recommended for production use. See the [AWS docs](https://docs.aws.amazon.com/ses/latest/dg/request-production-access.html) for instructions to move out of the sandbox. +::: + +- See [Obtaining SES SMTP credentials using the SES console](https://docs.aws.amazon.com/ses/latest/dg/smtp-credentials.html#smtp-credentials-console) for instructions to set up SES to send emails from your preferred address. + +- To prevent emails from SES being flagged as spam, see these AWS instructions for setting up an email authentication method: + + - [DKIM for a domain](https://docs.aws.amazon.com/ses/latest/DeveloperGuide/send-email-authentication-dkim-easy-setup-domain.html) + + - [SPF authentication](https://docs.aws.amazon.com/ses/latest/DeveloperGuide/send-email-authentication-spf.html) + +### Amazon RDS + +External databases for Seqera Enterprise deployments require: + +- A **MySQL8 Community** DB instance +- At least 2 vCPUs, 8 GB memory, and 30 GB SSD storage +- Manual MySQL user and database schema creation. See [Database configuration](../configuration/overview#seqera-and-redis-databases) for more details. + +:::caution +Recommended instance class and storage requirements depend on the number of parallel pipelines you expect to run. +::: + + + + +See [Creating an Amazon RDS DB instance](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_CreateDBInstance.html) to guide you through the external database setup for your production deployment. + + + + +To create a DB instance with the AWS CLI, call the [create-db-instance](https://docs.aws.amazon.com/cli/latest/reference/rds/create-db-instance.html) command, replacing `INSTANCE_NAME`, `SECURITY_GROUP`, `DB_USER`, and `DB_PASSWORD` with your unique values: + +```bash +aws rds create-db-instance \ + --engine mysql \ + --db-instance-identifier INSTANCE_NAME \ + --allocated-storage 30 \ + --db-instance-class db.m5d.large \ + --vpc-security-group-ids SECURITY_GROUP \ + --db-subnet-group SUBNET_GROUP \ + --master-username DB_USER \ + --master-user-password DB_PASSWORD \ +``` + + + + +After your database is created: + +- Update the inbound rules for the underlying EC2 instance to allow MySQL connections. +- Update your Seqera [configuration](../configuration/overview#seqera-and-redis-databases) with the database hostname, username, and password. + +### Amazon EC2 + +See [Getting started with Amazon EC2](https://aws.amazon.com/ec2/getting-started/) for instructions to create your EC2 instance. + +Create an instance with these attributes: + +- **Amazon Machine Image (AMI)**: Amazon Linux 2023 Optimized +- **Instance type**: c5a.xlarge or c5.large with 4 CPUs and 8 GB RAM +- **Root storage**: 30 GB +- **Tags**: It is helpful to use a descriptive `Name` value for your instance, such as `seqera-app-server`. +- **Security Group name**: Seqera deployment manifests provided in this installation guide use `tower-sg` by default. If you choose to use a custom name, this must be updated consistently across your deployment files. +- **Keypair**: It is security best practice to use a **new** keypair for your production deployment instance. + +After your instance is launched: + +1. Use the key pair to connect to the server with SSH and its public IP address. Terminal-based SSH is easier to use than browser-based SSH for copying and pasting text. + +1. [Install Docker](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/install-docker.html#install-docker-instructions). + +1. [Install Docker Compose](https://docs.docker.com/compose/install/linux/#install-the-plugin-manually). + +1. Confirm that Docker Compose is installed: + + ```bash + docker compose version + ``` + +### Seqera container images + +Seqera Platform Enterprise is distributed as a collection of Docker containers available through the Seqera +container registry ([cr.seqera.io](https://cr.seqera.io)). Contact [support](https://support.seqera.io) to get your container access credentials. Once you've received your credentials, retrieve the Seqera container images on your EC2 instance: + +1. Retrieve the **username** and **password** you received from Seqera support. + +1. Authenticate to the registry: + + ```bash + docker login -u 'username' -p 'password' cr.seqera.io + ``` + +1. Pull the Seqera container images: + + ```bash + docker pull cr.seqera.io/private/nf-tower-enterprise/backend:v25.1.1 + + docker pull cr.seqera.io/private/nf-tower-enterprise/frontend:v25.1.1 + ``` + +## Next steps + +See [Configuration](../configuration/overview). + +[create-db-instance-cli]: https://docs.aws.amazon.com/cli/latest/reference/rds/create-db-instance.html \ No newline at end of file diff --git a/platform-enterprise/enterprise/prerequisites/azure.md b/platform-enterprise/enterprise/prerequisites/azure.md new file mode 100644 index 000000000..c2547afb0 --- /dev/null +++ b/platform-enterprise/enterprise/prerequisites/azure.md @@ -0,0 +1,299 @@ +--- +title: "Azure" +description: Prerequisites for Azure deployments +date: "12 Apr 2023" +tags: [azure, prerequisites, configuration] +--- + +This page describes the infrastructure and other prerequisites for deploying Seqera Platform Enterprise on Microsoft Azure. + +Run the Seqera container with [Docker](../docker-compose) on an Azure VM instance or with [Kubernetes](../kubernetes) on an Azure AKS cluster. You must satisfy the requirements for your installation target: + +- A resource group and a storage account are required to use Azure. See [Azure setup](#azure-setup) below to provision these resources. +- **SMTP server**: If you don't have an email server, see [Azure's recommended method of sending email][azure-sendmail]. Microsoft recommends [Microsoft 365][msft-365] or the third party service [SendGrid][sendgrid]. +- **MySQL database**: An external database such as [Azure Database for MySQL][azure-db-create-portal] is highly recommended for production deployments. +- **SSL certificate**: An SSL certificate is required for your Seqera instance to handle HTTPS traffic. + + :::caution + HTTP-only implementations **must** set the `TOWER_ENABLE_UNSAFE_MODE=true` environment variable in the Seqera hosting infrastructure to enable user login. HTTP must not be used in production environments. + ::: + +- **DNS**: (Optional) DNS is required to support human-readable domain names and load-balanced traffic. See [Azure DNS][azure-dns] to learn about domain aquisition and record management. + +These decisions must be made before you continue as they impact how Seqera configuration files are updated. + +### Prerequisites for Docker + +A Linux VM instance is required to deploy Seqera Enterprise via Docker Compose. See the [detailed instructions](#azure-setup) to provision a VM instance for this purpose. + +### Prerequisites for AKS + +An [Azure Kubernetes Service (AKS)][aks-walkthrough] cluster is required to deploy Seqera Enterprise via Kubernetes. + +## Azure setup + +Set up commonly used Azure services for Seqera deployment. + +### Azure resource group + +Create a resource group: +- [Via the Azure portal][azure-rg-portal] +- [Via the Azure CLI][azure-rg-cli] + +
+ Create a resource group via Azure portal + + 1. Sign in to the [Azure portal](https://portal.azure.com). + 1. Select **Resource groups**. + 1. Select **Add**. + 1. Enter the following values: + - **Subscription**: Select your Azure subscription. + - **Resource group**: Enter a new resource group name (such as `towerrg`). + - **Region**: Select the region where your assets will exist (such as `East US`). + 1. Select **Review and Create**. + 1. Select **Create**. + +
+
+ Create a resource group via Azure CLI + + Run the `az group create` command: + + ```bash + az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION + ``` + +
+ +### Azure storage account + +Create a storage account: +- [Via the Azure portal][azure-storage-portal] +- [Via the Azure CLI][azure-storage-cli] + +
+ Create a storage account via Azure portal + + 1. Sign in to the [Azure portal](https://portal.azure.com). + 1. Select **Storage accounts**. + 1. Select **Create**. + 1. Enter the following values: + - **Subscription**: Select your Azure subscription. + - **Resource group**: Enter your resource group name. + - **Storage account name**: Enter a new storage account name (such as `towerstorage`). + - **Region**: Select the region where your Resource Group exists (such as `East US`). + - **Performance**: Select `Standard`. + - **Redundancy**: Select `Geo-redundant storage (GRS)`. + 1. Select **Review + create**. The default values are used in the other tabs. See [Create a storage account][azure-storage-portal] for further details on each setting. + 1. Select **Create**. + +
+
+ Create a storage account via Azure CLI + + Run the `az storage account create` command: + + ```bash + az storage account create -n towerstorage -g towerrg -l eastus --sku Standard_GRS + ``` + +
+ +### Azure MySQL DB instance + +External databases for Seqera Enterprise deployments require: +- A **MySQL8 Community** DB instance. +- At least **2 vCPUs**, **8 GB memory**, and **30 GB** SSD storage. +- Manual MySQL user and database schema creation. See [Database configuration](../configuration/overview#seqera-and-redis-databases) for more details. + +:::caution +Recommended instance performance and storage requirements depend on the number of parallel pipelines you expect to run. +::: + +Create an Azure MySQL DB instance: +- [Via Azure portal][azure-db-create-portal] +- [Via Azure CLI][azure-db-create-cli] + +
+ Create a MySQL DB instance via Azure portal + + 1. In the Azure portal, search for and select **Azure Database for MySQL servers**. + 1. Select **Create**. + 1. On the **Select Azure Database for MySQL deployment option** pane, select **Flexible server** as the deployment option. + 1. On the **Basics** tab, enter or select the following: + - Your **Subscription** name + - Your **Resource group** name + - A **Server name** such as `towerdbserver` + - Your **Region** + - The **Workload type**, based on your required `max_connections` + - **High availability** — high availability is recommended for production deployments + - **Standby availability zone** — standby server zone location + - **MySQL version** — 8.0 + - An **Admin username** to access the server + - A **Password** to access the server + - Your **Compute + storage** requirements, considering the minimum performance requirements outlined above + 1. Configure networking options. + 1. Select **Review + create**, then **Create**. + 1. Disable invisible primary keys, which can interfere with upgrades to newer releases of Seqera Platform Enterprise. Azure Database for MySQL creates invisible primary keys automatically by default. For more information, see [Steps to disable a GIPK][azure-gipk]. + +
+
+ Create a MySQL DB instance via Azure CLI + + 1. Run `az mysql flexible-server create` to create your server: + + ```bash + az mysql flexible-server create --location eastus --resource-group towerrg --name towerdbserver --admin-user username --admin-password password --sku-name Standard_B2ms --tier Burstable --public-access 0.0.0.0 --storage-size 30 --version 8.0 --high-availability ZoneRedundant --zone 1 --standby-zone 3 --storage-auto-grow Enabled --iops 500 + ``` + + The `sku-name`, `tier`, `storage-size`, and `iops` values depend on your performance requirements. + + 1. Run `az mysql flexible-server db create` to create a database on your server: + + ```bash + az mysql flexible-server db create --resource-group towerrg + --server-name towerdbserver + --database-name towerdb + ``` + 1. Disable invisible primary keys, which can interfere with upgrades to newer releases of Seqera Platform Enterprise. Azure Database for MySQL creates invisible primary keys automatically by default. For more information, see [Steps to disable a GIPK][azure-gipk]. + +
+ +After your database is created, update your Seqera [configuration](../configuration/overview#seqera-and-redis-databases) with the database hostname, Admin username, and password. + +:::note +When creating a MySQL user, use the `USER@HOSTNAME` format for the `TOWER_DB_USER` environment variable. For Azure managed MySQL, it's [recommended][azure-db-config] to pass an explicit `serverTimezone` to the `TOWER_DB_URL` environment variable, which (depending on your configuration) may be `UTC`. The DB connection string should be similar to `jdbc:mysql://towerdbserver.mysql.database.azure.com/towerdb?serverTimezone=UTC`. +::: + +### Azure Linux VM + +Create a VM instance with these attributes: + +- Use **default values** unless otherwise specified. +- At least **2 CPUS** and **8GB RAM**. +- **Ubuntu Server 22.04 LTS - Gen2** image. +- **Accessible by SSH**. + +Create an Azure Linux VM: +- [Via the Azure portal][azure-linux-vm-portal] +- [Via the Azure CLI][azure-linux-vm-cli] + +
+ Create a VM via Azure portal + + 1. Under **Basics**, select your **Subscription** and **Resource group**. + 1. Under **Instance details**: + - Enter a **VM name** + - Select the same **Region** as your resource group. + - Select the **Ubuntu Server 24.04 LTS - Gen2** image. + - Do not set the VM as an **Azure Spot instance**. + - Select the **Size** — B2ps v2 or higher is recommended. + 1. Under **Administrator account**: + - Select **SSH public key** + - Enter a **username** + - Select **Generate new key pair** + - Enter a **Key pair name** + 1. Under **Inbound port rules**: + - Select **Allow selected ports** + - Select **SSH (22)**, **HTTP (8000)**, **HTTP (80)**, and **HTTPS (443)** (required for SSL termination in production environments) from the dropdown + 1. Select **Review + create** at the bottom of the page. + 1. Review your VM details, then select **Create**. + 1. When the **Generate new key pair** window opens, select **Download private key and create resource**. Your key file will be download as `myKey.pem`. Note the path to which it was downloaded. + 1. On the page for your new VM, copy the **Public IP address**. + + To make the VM's IP address static: + + 1. Enter **Public IP addresses** in the search. + 1. Under **Services**, select **Public IP addresses**. + 1. On the **Public IP addresses** page, select the entry containing your VM name. A page opens with that IP's details. + 1. Select **Configuration** from the left-hand navigation panel. + 1. Confirm that your IP address assignment is **Static**. + 1. Do not add a custom DNS name label to the VM. + + To allow ingress on port 8000: + + 1. Enter **Virtual Machines** in the search bar. + 1. Under **Services**, select **Virtual machines**. + 1. On the **Virtual machines** page, select your VM name to navigate to the VM details. + 1. Select **Networking** from the left-hand navigation panel. + 1. **Add inbound port rule** for port 8000. + + To allow ingress on port 443 (required for SSL/TLS termination in production environments): + + 1. Enter **Virtual Machines** in the search bar. + 1. Under **Services**, select **Virtual machines**. + 1. On the **Virtual machines** page, select your VM name to navigate to the VM details. + 1. Select **Networking** from the left-hand navigation panel. + 1. **Add inbound port rule** for port 443. + + Connect to the VM via SSH: + + 1. On a macOS or Linux machine, open a terminal and set read-only permission on the `myKey.pem` file with `chmod 400 ~/Downloads/myKey.pem`. + 1. Install Docker: + 1. [Install Docker using the apt repository][docker]. + 1. Confirm that Docker Compose is installed: + + ```bash + docker compose version + Docker Compose version v2.24.1 + ``` + +
+
+ Create a VM via Azure CLI + + Run `az vm create`: + + ```bash + az vm create \ + --resource-group towerrg \ + --name towervm \ + --image Canonical:0001-com-ubuntu-minimal-jammy:minimal-22_04-lts-gen2:latest \ + --admin-username username \ + --assign-identity \ + --generate-ssh-keys \ + --public-ip-sku Standard + ``` + +
+ +## Seqera container images + +Seqera Platform Enterprise is distributed as a collection of Docker containers available through the Seqera +container registry ([cr.seqera.io](https://cr.seqera.io)). Contact [support](https://support.seqera.io) to get your container access credentials. After you've received your credentials, retrieve the Seqera container images on your Azure VM: + +1. Retrieve the **username** and **password** you received from Seqera support. +1. Run the following Docker command to authenticate to the registry (using the `username` and `password` values copied in step 1): + + ```bash + docker login -u '/\/' -p '/\PASSWORD\>/' cr.seqera.io + ``` + +1. Pull the Seqera container images with the following commands: + + ```bash + docker pull cr.seqera.io/private/nf-tower-enterprise/backend:v25.1.1 + + docker pull cr.seqera.io/private/nf-tower-enterprise/frontend:v25.1.1 + ``` + +## Next steps + +See [Configuration](../configuration/overview). + +[docker]: https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository +[aks-walkthrough]: https://docs.microsoft.com/en-us/azure/aks/kubernetes-walkthrough-portal +[azure-db-create-cli]: https://learn.microsoft.com/en-us/azure/mysql/flexible-server/quickstart-create-server-cli +[azure-db-create-portal]: https://learn.microsoft.com/en-us/azure/mysql/flexible-server/quickstart-create-server-portal +[azure-db-config]: https://docs.microsoft.com/en-us/azure/mysql/connect-java#prepare-a-configuration-file-to-connect-to-azure-database-for-mysql +[azure-gipk]: https://learn.microsoft.com/en-us/azure/mysql/flexible-server/concepts-limitations#steps-to-disable-a-gipk +[azure-dns]: https://docs.microsoft.com/en-us/azure/dns/dns-overview +[azure-linux-vm-cli]: https://learn.microsoft.com/en-us/azure/virtual-machines/linux/quick-create-cli#create-the-virtual-machine +[azure-linux-vm-portal]: https://learn.microsoft.com/en-us/azure/virtual-machines/linux/quick-create-portal?tabs=ubuntu +[azure-rg-cli]: https://learn.microsoft.com/en-us/azure/virtual-machines/linux/quick-create-cli#create-a-resource-group +[azure-rg-portal]: https://docs.microsoft.com/en-us/azure/azure-resource-manager/management/manage-resource-groups-portal +[azure-sendmail]: https://docs.microsoft.com/en-us/azure/virtual-network/troubleshoot-outbound-smtp-connectivity#recommended-method-of-sending-email +[azure-storage-portal]: https://learn.microsoft.com/en-ca/azure/storage/common/storage-account-create?tabs=azure-portal#create-a-storage-account-1 +[azure-storage-cli]: https://learn.microsoft.com/en-us/cli/azure/storage/account?view=azure-cli-latest#az-storage-account-create +[msft-365]: https://docs.microsoft.com/en-us/exchange/mail-flow-best-practices/how-to-set-up-a-multifunction-device-or-application-to-send-email-using-microsoft-365-or-office-365 +[sendgrid]: https://docs.sendgrid.com/for-developers/partners/microsoft-azure-2021 \ No newline at end of file diff --git a/platform-enterprise/enterprise/prerequisites/gcp.md b/platform-enterprise/enterprise/prerequisites/gcp.md new file mode 100644 index 000000000..d7f3408c3 --- /dev/null +++ b/platform-enterprise/enterprise/prerequisites/gcp.md @@ -0,0 +1,172 @@ +--- +title: "Google Cloud" +description: Prerequisites for GCP deployments +date: "12 Apr 2023" +tags: [gcp, prerequisites, configuration] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +This page describes the infrastructure and other prerequisites for deploying Seqera Platform Enterprise on Google Cloud Platform (GCP). + +Run the Seqera container with [Docker](../docker-compose) on a GCP VM instance or with [Kubernetes](../kubernetes) on a Google Kubernetes Engine cluster. You must satisfy the requirements for your installation target: + +- **SMTP server**: If you don't have an email server, Google Cloud provides several ways to send emails, such as [SendGrid][sendgrid], [Mailgun][mailgun], and [Mailjet][mailjet]. Work with your IT team to select the best solution for your organization. +- **MySQL database**: An external database such as [Google CloudSQL][gcloudsql] is highly recommended for production environments. +- **SSL certificate**: An SSL certificate is required for your Seqera instance to handle HTTPS traffic. + + :::caution + HTTP-only implementations **must** set the `TOWER_ENABLE_UNSAFE_MODE=true` environment variable in the Seqera hosting infrastructure to enable user login. HTTP must not be used in production environments. + ::: + +- **Public IP address**: (Optional) A public IP address can be reserved for the Seqera ingress to keep the IP address constant across restarts. + +
+ Reserve a public IP address + + 1. Go to **VPC network > External IP addresses** and select **Reserve Static Address**. + 2. Assign a name (such as `tower-ip`). This name will be used later to configure the ingress. + 3. Select the region where your GKE cluster is deployed. + 4. Select **Reserve**. + +
+ +### Prerequisites for Docker + +A [Google Compute Engine (GCE)][gce] instance is required to deploy Seqera Enterprise via Docker Compose. See [Google Compute Engine](#google-compute-engine) to provision a VM instance for this purpose. + +### Prerequisites for GKE + +A [Google Kubernetes Engine (GKE)][gke] cluster is required to deploy Seqera Enterprise via Kubernetes. See the [GKE documentation][gke-docs] to provision your own cluster. + +:::note +Seqera doesn't currently support GKE Autopilot due to a privilege issue with the Redis deployment. However, you can achieve most of the same behavior with a Standard cluster by enabling autoscaling and node auto-provisioning. +::: + +## GCP setup + +This section provides step-by-step instructions for some commonly used GCP services for Seqera deployment. See the [GCP documentation][gcp-docs] for up-to-date instructions and contact [GCP support][gcp-support] if you have any issues with provisioning GCP resources. + +### Google CloudSQL + +Create a Google CloudSQL instance with the following attributes: +- MySQL 8.0 +- At least **2 vCPUs**, **8 GB** memory, and **30 GB SSD** storage +- Private IP + +:::caution +The recommended machine type and storage requirements depend on the number of parallel pipelines you expect to run. +::: + + + + +1. See [Create a MySQL instance][gcloudsql-create] for Cloud console instructions. +1. After the instance has been created, select the instance, then **Databases**. Create a new database named `tower`. +1. Note the Private IP address of the instance as it must be supplied to the `TOWER_DB_URL` environment variable. + + + + +See [Create a MySQL instance][gcloudsql-create] for gcloud CLI instructions. + +1. Create your MySQL instance with the following command: + ```bash + gcloud sql instances create INSTANCE_NAME \ + --database-version=MYSQL_8_0 \ + --cpu=2 \ + --memory=8GB \ + --storage-size=30GB \ + --region=us-central1 + ``` +1. Note the private IP address as it must be supplied to the `TOWER_DB_URL` environment variable during Seqera configuration. +1. Set the password for the root MySQL user: + ```bash + gcloud sql users set-password root \ + --host=% \ + --instance INSTANCE_NAME \ + --password PASSWORD + ``` +1. Create a database named `tower` on the instance: + ```bash + gcloud sql databases create tower \ + --instance=INSTANCE_NAME \ + ``` + + + + +### Google Compute Engine + +Create a VM instance with these attributes: +- At least **2 vCPUs** and **8 GB** memory +- HTTP traffic enabled. By default, the frontend is exposed to port 8000, so you must add a firewall rule to the underlying VPC network to allow traffic on port 8000 after VM creation. +- SSH enabled (to allow connection to the VM). If you experience issues with SSH, or would like to set up IAP SSH, see [TCP forward to IAP][tcp-iap]. + + + + +See [Create a VM instance from a public image][gcp-vm-public] for Cloud console instructions. + + + + +1. Run `gcloud compute images list` to view a list of available public OS images. Note the name of the image you wish to use and the name of the project that contains it. +1. Create your VM with the following command (you can use either `--image` or `--image-family`, only one is required): + ```gcloud + gcloud compute instances create VM_NAME \ + [--image=IMAGE | --image-family=IMAGE_FAMILY] \ + --image-project=IMAGE_PROJECT + --machine-type=MACHINE_TYPE + ``` + Replace `VM_NAME`, `IMAGE`, `IMAGE_FAMILY`, `IMAGE_PROJECT`, and `MACHINE_TYPE` with your VM details. +1. Run `gcloud compute instances describe VM_NAME` to verify that Compute Engine created the VM. + + + + +After you have created your VM instance: + +1. Connect to the machine using SSH. +1. [Install Docker][install-docker]. +1. Confirm that Docker Compose is installed: + ```bash + docker compose version + Docker Compose version v2.24.1 + ``` + +## Seqera container images + +Seqera Platform Enterprise is distributed as a collection of Docker containers available through the Seqera +container registry [`cr.seqera.io`](https://cr.seqera.io). Contact [support](https://support.seqera.io) to get your container access credentials. After you receive your credentials, retrieve the Seqera container images on your VM instance: + +1. Retrieve the **username** and **password** you received from Seqera support. +1. Authenticate to the registry: + ```bash + docker login -u '/\/' -p '/\PASSWORD\>/' cr.seqera.io + ``` +1. Pull the Seqera container images: + ```bash + docker pull cr.seqera.io/private/nf-tower-enterprise/backend:v25.1.1 + + docker pull cr.seqera.io/private/nf-tower-enterprise/frontend:v25.1.1 + ``` + +## Next steps + +See [Configuration](../configuration/overview). + +[gce]: https://cloud.google.com/compute +[gcloudsql]: https://cloud.google.com/sql/docs/mysql/quickstart +[gcloudsql-create]: https://cloud.google.com/sql/docs/mysql/create-instance#create-2nd-gen +[gcp-docs]: https://cloud.google.com/docs +[gcp-support]: https://cloud.google.com/support-hub +[gcp-vm-public]: https://cloud.google.com/compute/docs/instances/create-start-instance#publicimage +[gke]: https://cloud.google.com/kubernetes-engine +[gke-docs]: https://cloud.google.com/kubernetes-engine/docs +[install-docker]: https://docs.docker.com/engine/install/debian/ +[mailgun]: https://cloud.google.com/compute/docs/tutorials/sending-mail/using-mailgun +[mailjet]: https://cloud.google.com/compute/docs/tutorials/sending-mail/using-mailjet +[sendgrid]: https://cloud.google.com/compute/docs/tutorials/sending-mail/using-sendgrid +[tcp-iap]: https://cloud.google.com/iap/docs/using-tcp-forwarding \ No newline at end of file diff --git a/platform-enterprise/enterprise/prerequisites/on-prem.md b/platform-enterprise/enterprise/prerequisites/on-prem.md new file mode 100644 index 000000000..f17ee04f9 --- /dev/null +++ b/platform-enterprise/enterprise/prerequisites/on-prem.md @@ -0,0 +1,57 @@ +--- +title: "On-prem" +description: Prerequisites for on-premises deployments +date: "12 Apr 2023" +tags: [on-prem, prerequisites, configuration] +--- + +This page details the prerequisites for deploying Seqera Platform Enterprise to your on-premises infrastructure. + +See [Docker Compose](../docker-compose) for instructions to deploy via Docker Compose. + +See [Kubernetes](../kubernetes) for instructions to deploy via Kubernetes. + +## Prerequisites + +You must satisfy the requirements for your installation: + +- **SMTP server**: An SMTP server is required to send emails from your Seqera instance. + + If you don't have your own mail server, you can use an external service from a cloud provider. Visit the provider's corresponding **Prerequisites** page for more information and consult your IT team to select the most suitable solution for your organization. + +- **MySQL database**: A database external to your Docker Compose or Kubernetes environment is highly recommended for production deployments. + + If you don't have your own database service, use an external service from a cloud provider. Visit the provider's corresponding **Prerequisites** page for more information and consult your IT team to select the most suitable solution for your organization. + + To use an external database, you must create a MySQL user and database manually. See [Configuration](../configuration/overview#seqera-and-redis-databases) for more details. + +- **(Optional) SSL certificate**: An SSL certificate is required for your Seqera instance to handle HTTPS traffic. + +:::caution +HTTP-only implementations **must** set the `TOWER_ENABLE_UNSAFE_MODE=true` environment variable in the Seqera hosting infrastructure to enable user login. HTTP must not be used in production environments. +::: + +## Seqera container images + +Seqera Platform Enterprise is distributed as a collection of Docker containers available through the Seqera +container registry [`cr.seqera.io`](https://cr.seqera.io). Contact [support](https://support.seqera.io) to get your container access credentials. Once you've received your credentials, retrieve the Seqera container images: + +1. Retrieve the **username** and **password** you received from Seqera support. + +2. Authenticate to the registry: + + ```bash + docker login -u '/\/' -p '/\PASSWORD\>/' cr.seqera.io + ``` + +3. Pull the Seqera container images: + + ```bash + docker pull cr.seqera.io/private/nf-tower-enterprise/backend:v25.1.1 + + docker pull cr.seqera.io/private/nf-tower-enterprise/frontend:v25.1.1 + ``` + +## Next steps + +See [Configuration](../configuration/overview). \ No newline at end of file diff --git a/platform-enterprise/enterprise/studios.md b/platform-enterprise/enterprise/studios.md new file mode 100644 index 000000000..b5f351f6a --- /dev/null +++ b/platform-enterprise/enterprise/studios.md @@ -0,0 +1,267 @@ +--- +title: "Studios deployment" +description: Deploy Seqera Platform with Studios +date: "17 Mar 2025" +tags: [docker, compose, kubernetes, studios, deployment] +--- + +Enable Studios as part of your Seqera Platform Enterprise instance. You must have Data Explorer enabled to use Studios. Only the AWS public cloud is currently supported. + +:::caution +You must upgrade your Seqera Enterprise installation to version 25.1 before you enable and configure Studios. +::: + +## DNS configuration + +Each Studio is reachable at a unique URL that includes a randomly generated subdomain name. For example: `https://abcd.example.com/`, where `example.com` is your Seqera base domain name. + +Provide a wildcard TLS certificate to allow for uniquely generated subdomains. A wildcard certificate common name includes `*.` in the domain name, such as `*.example.com`, thereby securing any subdomain name at this level. + +Studios uses the following set of domains and subdomains: + +- The domain that you set for `TOWER_SERVER_URL`, such as `example.com`. +- A wildcard subdomain that you must configure specifically for Studios. This wildcard subdomain is the parent for each unique session URL, such as `abcd.example.com`. +- The connection proxy, defined by `CONNECT_PROXY_URL`. This URL is a first-level subdomain of your `TOWER_SERVER_URL`. For example, `https://connect.example.com`. + +## Studios workspace availability + +You can configure which organizational workspaces have access to Studios. This configuration is set in the `tower.yml` file. The `tower.data-studio.allowed-workspaces` field supports the following options: + +- `allowed-workspaces: []`: Disables Studios. This is the default if the `allowed-workspaces` field is not specified. +- `allowed-workspaces: [ , ]`: Enables Studios for the comma-separated list of organizational workspace IDs. +- `allowed-workspaces: null`: Enables Studios for all organizational workspaces. + +## Available Studio environment images + +Each of the provided environments includes a particular version of the underlying software package and the version of Seqera Connect, an integrated web- and file-server. + +To quickly identify which version of the software an image includes, the version string for each container is in the form of `-`. For example, if the version string for the R-IDE is `2025.04.1-0.8`, version `2025.04.01` is the R-IDE version and `0.8` is the Connect version of this Seqera-built container image. Learn more about Studios [environment versioning](../studios/overview#container-image-templates). + +The latest environment versions are listed below: + +- JupyterLab: `public.cr.seqera.io/platform/data-studio-jupyter:4.2.5-0.8` +- R-IDE: `public.cr.seqera.io/platform/data-studio-ride:2025.04.1-0.8` +- Visual Studio Code: `public.cr.seqera.io/platform/data-studio-vscode:1.93.1-0.8` +- Xpra: `public.cr.seqera.io/platform/data-studio-xpra:6.2.0-r2-1-0.8` + +When adding a new Studio, the latest environment versions are tagged `recommended`, and earlier compatible versions are tagged `deprecated`. + +Security scans and container inspection reports (including container specifications, configuration, and manifest) are available on-demand at public.cr.seqera.io/platform for each environment images by selecting the `Scan` and `Inspect` icons respectively. + +## Docker Compose + +This guide assumes that all services will be run in the same container as the rest of your Seqera Platform services. + +If you were using Studios prior to GA (v25.1) please review the `tower.env` file and make sure you are using the latest version which includes a new variable `TOWER_DATA_STUDIO_TEMPLATES__TOOL`. This variable needs to be added to the default/Seqera provided Studio templates: + +`TOWER_DATA_STUDIO_TEMPLATES__TOOL: ''` + +The `TEMPLATE_KEY` can be any string, but the `TOOL_NAME` has to be the template name (`jupyter`/`vscode`/`rstudio`/`xpra`). + +You can also check the current template configuration using `https://towerurl/api/studios/templates?workspaceId=`. The response should include the `TOOL` configuration and template name (`jupyter`/`vscode`/`rstudio`/`xpra`) - not `custom`. + +### Prerequisites + +- Allow inbound traffic to port 9090 on the EC2 instance +- Allow traffic on port 9090 through the AWS LB (Load Balancer) +- An AWS Route53 wildcard DNS record, such as `*.` + +### Procedure + +1. Download the Studios [environment configuration file](./_templates/docker/data-studios.env). + +1. Create an initial OIDC registration token, which can be any secure random string. For example, using openssl: + + ``` + oidc_registration_token=$(openssl rand -base64 32 | tr -d /=+ | cut -c -32) + ``` + +1. Generate an RSA public/private key pair. A key size of at least 2048 bits is recommended. For example, use `openssl` to generate the key pair: + + ```shell + openssl genrsa -out private.pem 2048 + openssl rsa -pubout -in private.pem -out public.pem + ``` + +1. Download the [data-studios-rsa.pem](./_templates/docker/data-studios-rsa.pem) file and replace its contents with the content of your private and public key files, in the same order (private key on top, public key directly beneath it). Save the file as `data-studios-rsa.pem`, in the same directory as your `docker-compose.yml` file. + +1. Open the `docker-compose.yml` and uncomment the volume mount for the PEM key file for the `backend` and `cron` services in the `volumes` list. Your PEM file must be named `data-studios-rsa.pem`. + + ```yaml + volumes: + - $PWD/tower.yml:/tower.yml + # An RSA key is required for Studios functionality. Uncomment the line below to mount the key. + #- $PWD/data-studios-rsa.pem:/data-studios-rsa.pem + ``` + +1. Open `data-studios.env` in an editor, and make the following changes: + + 1. Uncomment the `connect-proxy` and `connect-server` services. + 1. Set the following environment variables: + - `PLATFORM_URL`: The same value assigned to `TOWER_SERVER_URL`. For example, `https://example.com`. + - `CONNECT_PROXY_URL`: A URL for the connect proxy subdomain. We recommend you set a first-level subdomain of your `PLATFORM_URL` for your connect proxy. For example, `https://connect.example.com`. + - `CONNECT_OIDC_CLIENT_REGISTRATION_TOKEN`: The same value set in the `oidc_registration_token` environment variable. + +1. Open `tower.env` in an editor and set the following variables: + + - `TOWER_DATA_EXPLORER_ENABLED`: Set `true` to enable Data Explorer. You must enable Data Explorer to mount data inside a Studio. + - `TOWER_DATA_STUDIO_CONNECT_URL`: The URL of the Studios connect proxy, such as `https://connect.example.com/`. + - `TOWER_OIDC_REGISTRATION_INITIAL_ACCESS_TOKEN`: The same value set in the `oidc_registration_token` environment variable. + - `TOWER_OIDC_PEM_PATH`: The file path to a PEM certificate used for signing the OIDC tokens for the OpenID connect provider, mounted as a volume inside the container. + +1. Edit the `tower.yml` file and include the following snippet to enable Studios in all organization workspaces: + + ```yaml + tower: + data-studio: + allowed-workspaces: null + ``` + +1. Start your Platform instance: `docker compose -d up`. + +1. Confirm that the Platform containers are running: + + ``` + docker ps + ``` + +1. To confirm that Studios is available, log in to your Platform instance and navigate to an organizational workspace that has Studios enabled. The **Studios** tab is included with the available tabs. + +## Kubernetes + +This procedure describes how to configure Studios for Seqera Enterprise deployments in Kubernetes. + +If you were using Studios prior to GA (v25.1) please review the `configmap.yaml` file and make sure you are using the latest version which includes a new variable `TOWER_DATA_STUDIO_TEMPLATES__TOOL` which needs to be added to the default/Seqera provided Studio templates: + +`TOWER_DATA_STUDIO_TEMPLATES__TOOL: ''` + +The `TEMPLATE_KEY` can be any string, but the `TOOL_NAME` has to be the template name (`jupyter`/`vscode`/`rstudio`/`xpra`). + +You can also check the current template configuration using `https://towerurl/api/studios/templates?workspaceId=`. The response should include the `TOOL` configuration and template name (`jupyter`/`vscode`/`rstudio`/`xpra`) - not `custom`. + +### Procedure + +1. Download the Kubernetes manifests for the Studios service: + + - [Proxy](./_templates/k8s/data_studios/proxy.yml) + - [Server](./_templates/k8s/data_studios/server.yml) + +1. Change your Kubernetes context to the namespace where your Platform instance runs: + + ``` + kubectl config set-context --current --namespace= + ``` + +1. Edit the `server.yml` file and set the `CONNECT_REDIS_ADDRESS` environment variable to the hostname or IP address of the Redis server configured for Platform. + +1. Create an initial OIDC registration token, which can be any secure random string. For example, using openssl: + + ``` + oidc_registration_token=$(openssl rand -base64 32 | tr -d /=+ | cut -c -32) + ``` + +1. Edit the `proxy.yml` file and set the following variables: + + - `CONNECT_REDIS_ADDRESS`: The hostname or IP address of the Redis server configured for Platform. + - `CONNECT_PROXY_URL`: A URL for the connect proxy subdomain. We recommend you set a first-level subdomain of your Platform installation domain (`PLATFORM_URL` below) for your connect proxy, to be able to use the same wildcard TLS certificate for all session URLs and avoid additional domain nesting. For example, `https://connect.example.com`. + - `PLATFORM_URL`: The base URL for your Platform installation, such as `https://example.com/`. + - `CONNECT_OIDC_CLIENT_REGISTRATION_TOKEN`: The same value as the `oidc_registration_token` value created previously. + +1. Edit your Platform installation's `ingress.eks.yml` file: + + - Uncomment the `host` section at the bottom of the file. + - Replace `` with the base domain of your Platform installation. For example, `example.com`. + + :::note + This assumes that you have an existing Platform installation Ingress already configured with the following fields: + + - `alb.ingress.kubernetes.io/certificate-arn`: The ARN of a wildcard TLS certificate that secures your Platform URL and connect proxy URL. For example, if `TOWER_SERVER_URL=https://example.com` and `CONNECT_PROXY_URL=https://connect.example.com`, the certificate must secure both `example.com` and `*.example.com`. + - `alb.ingress.kubernetes.io/load-balancer-attributes`: The attributes of the ALB Load Balancer used in your Platform installation. + ::: + +1. Generate an RSA public/private key pair. A key size of at least 2048 bits is recommended. In the following example, the `openssl` command is used to generate the key pair: + + ```shell + openssl genrsa -out private.pem 2048 + openssl rsa -pubout -in private.pem -out public.pem + ``` + +1. Download the [data-studios-rsa.pem](./_templates/docker/data-studios-rsa.pem) file and replace its contents with the content of your private and public key files created in the previous step, in the same order (private key on top, public key directly beneath it). + +1. Apply a base64 encoding to the PEM file that you created in the previous step: + + ``` + base64_pem=$(cat data-studios-rsa.pem | base64) + ``` + +1. Create a secret file named `secret.yml` and set the `oidc.pem` key by pasting the contents of the base64-encoded public/private key pair: + + ```yaml + apiVersion: v1 + kind: Secret + metadata: + name: platform-oidc-certs + namespace: platform-stage + data: + oidc.pem: + ``` + +1. Create the secret: + + ``` + kubectl apply -f secret.yml + ``` + +1. Edit the `tower-svc.yml` file and uncomment the `volumes.cert-volume`, `volumeMounts.cert-volume`, and `env.TOWER_OIDC_PEM_PATH` fields so that the public/private key pair is available to Platform. + +1. Edit the ConfigMap named `platform-backend-cfg` in the `configmap.yml` for Platform by editing the following environment variables: + + - `TOWER_DATA_STUDIO_CONNECT_URL`: The URL of the Studios connect proxy, such as `https://connect.example.com/`. + - `TOWER_OIDC_REGISTRATION_INITIAL_ACCESS_TOKEN`: The same value as the `oidc_registration_token` value created previously. + +1. Edit the ConfigMap named `tower-yml` in the `configmap.yml` and include the following snippet: + + ```yaml + data: + tower.yml: |- + tower: + data-studio: + allowed-workspaces: null + ``` + +1. Apply the updated configuration: + + ``` + kubectl apply -f configmap.yml + ``` + +1. Apply the configuration change to Platform: + + ``` + kubectl apply -f tower-svc.yml + ``` + +1. Restart the cron service of your Platform deployment to load the updated configuration. For example: + + ``` + kubectl delete -f tower-cron.yml + kubectl apply -f tower-cron.yml + ``` + + +1. Restart the backend service of your Platform deployment to load the updated configuration. For example: + + ``` + kubectl scale --replicas=0 deployment/backend + kubectl scale --replicas=1 deployment/backend + ``` + +1. Apply the Studios manifests: + + ``` + kubectl apply -f ingress.aks.yml proxy.yml server.yml + ``` + + It can take several minutes for Kubernetes to apply your changes, during which new pods are rolled out. + +1. To confirm that Studios is available, log in to your Platform instance and navigate to an organizational workspace that has Studios enabled. The **Studios** tab is included with the available tabs. diff --git a/platform-enterprise/enterprise/testing.md b/platform-enterprise/enterprise/testing.md new file mode 100644 index 000000000..324c7be43 --- /dev/null +++ b/platform-enterprise/enterprise/testing.md @@ -0,0 +1,36 @@ +--- +title: "Test deployment" +description: Test your Seqera Platform Enterprise deployment after installation +date: "12 Feb 2024" +tags: [testing, deployment] +--- + +After your [Docker Compose](./docker-compose) or [Kubernetes](./kubernetes) installation is complete, follow these steps to test whether the application is running as expected: + +1. Log in to the application. + +2. Create an [organization](../orgs-and-teams/organizations). + +3. Create a [workspace](../orgs-and-teams/workspace-management) within the organization. + +4. Create a new [compute environment](../compute-envs/overview). + +5. Add your [GitHub credentials](../git/overview). + +6. Select **Quick Launch** from the **Launchpad** tab in your workspace. + +7. Enter the repository URL for the `nf-core/rnaseq` pipeline (`https://github.com/nf-core/rnaseq`). + +8. In the **Config profiles** drop-down menu, select the `test` profile. + +9. In **Pipeline parameters**, change the output directory to a location based on your compute environment: + + ```yaml + # Uncomment to save to an S3 bucket + # outdir: s3:///results + + # Uncomment to save to a scratch directory (Kubernetes) + # outdir: /scratch/results + ``` + +10. Select **Launch**. You'll be redirected to the **Runs** tab for the workflow. After a few minutes, progress logs will be listed in that workflow's **Execution log** tab. diff --git a/platform-enterprise/enterprise/upgrade.md b/platform-enterprise/enterprise/upgrade.md new file mode 100644 index 000000000..3f0fcff60 --- /dev/null +++ b/platform-enterprise/enterprise/upgrade.md @@ -0,0 +1,42 @@ +--- +title: "Upgrade deployment" +description: "Platform Enterprise update guidance" +date: "21 Aug 2024" +tags: [enterprise, update, install] +--- + +Follow these steps to upgrade your database instance and Platform Enterprise installation: + +:::caution +The database volume is persistent on the local machine by default if you use the `volumes` key in the `db` or `redis` section of your `docker-compose.yml` file to specify a local path to the DB or Redis instance. If your database is not persistent, you must back up your database before performing any application or database upgrades. +::: + +1. Make a backup of the Seqera database. If you use the pipeline optimization service and your `groundswell` database resides in a database instance separate from your Seqera database, make a backup of your `groundswell` database as well. +1. Download the latest versions of your deployment templates and update your Seqera container versions: + - [docker-compose.yml](./_templates/docker/docker-compose.yml) for Docker Compose deployments + - [tower-cron.yml](./_templates/k8s/tower-cron.yml) and [tower-svc.yml](./_templates/k8s/tower-svc.yml) for Kubernetes deployments +1. Restart the application. +1. If you're using a containerized database as part of your implementation: + 1. Stop the application. + 1. Upgrade the MySQL image. + 1. Restart the application. +1. If you're using Amazon RDS or other managed database services: + 1. Stop the application. + 1. Upgrade your database instance. + 1. Restart the application. +1. If you're using the pipeline optimization service (`groundswell` database) in a database separate from your Seqera database, update the MySQL image for your `groundswell` database instance while the application is down (during step 4 or 5 above). If you're using the same database instance for both, the `groundswell` update will happen automatically during the Seqera database update. + +### Custom deployments + +- Run the `/migrate-db.sh` script provided in the `migrate-db` container. This will migrate the database schema. +- Deploy Seqera following your usual procedures. + +### Nextflow launcher image + +If you must host your nf-launcher container image on a private image registry, copy the [nf-launcher image](https://quay.io/seqeralabs/nf-launcher:j17-24.04.4) to your private registry. Then update your `tower.env` with the launch container environment variable: + + `TOWER_LAUNCH_CONTAINER=` + +:::caution +If you're using AWS Batch, you will need to [configure a custom job definition](./advanced-topics/custom-launch-container) and populate the `TOWER_LAUNCH_CONTAINER` with the job definition name instead. +::: diff --git a/platform-enterprise/functionality_matrix/overview.md b/platform-enterprise/functionality_matrix/overview.md new file mode 100644 index 000000000..919380ec4 --- /dev/null +++ b/platform-enterprise/functionality_matrix/overview.md @@ -0,0 +1,34 @@ +--- +title: "Version compatibility" +description: "Platform / nf-launcher / Nextflow / Fusion version compatibility" +date: "20 Jun 2024" +tags: [compatibility, nextflow, nf-launcher] +--- + +The two most recent major Seqera Platform versions (24.1.x, 23.4.x, etc) are supported at any given time. + +Each version makes use of `nf-launcher` to determine the Nextflow version used as its baseline. You can override this version during pipeline launch, but note that Seqera may not work reliably with Nextflow versions other than the baseline version. To use a Nextflow version other than the baseline in your pipeline run, use a [pre-run script](../launch/advanced#pre-and-post-run-scripts) during launch. + +If no Nextflow version is specified in your configuration, Seqera defaults to the baseline version outlined below: + +| Platform version | nf-launcher version | Nextflow version | Fusion version | +| ---------------- | ------------------- | ---------------- | -------------- | +| 24.2.4 | j17-24.10.4 | 24.10.4 | 2.4 | +| 24.2.3 | j17-24.10.4 | 24.10.4 | 2.4 | +| 24.2.2 | j17-24.10.0 | 24.10.0 | 2.4 | +| 24.2.0 | j17-24.10.0 | 24.10.0 | 2.4 | +| 24.1.5 | j17-24.04.4 | 24.04.4 | 2.3 | +| 24.1.4 | j17-24.04.4 | 24.04.4 | 2.3 | +| 24.1.3 | j17-24.04.4 | 24.04.4 | 2.3 | +| 24.1.1 | j17-23.10.1-up1 | 23.10.1 | 2.2 | +| 23.4.4 | j17-23.10.1 | 23.10.1 | 2.2 | +| 23.4.3 | j17-23.10.1 | 23.10.1 | 2.2 | +| 23.4.2 | j17-23.10.1 | 23.10.1 | 2.2 | +| 23.4.1 | j17-23.10.1 | 23.10.1 | 2.2 | +| 23.4.0 | j17-23.04.3 | 23.04.3 | 2.1 | +| 23.3.0 | j17-23.04.3 | 23.04.3 | 2.1 | +| 23.3.0 | j17-23.04.3 | 23.04.3 | 2.1 | +| 23.2.0 | j17.23.04.2-up3 | 23.04.2 | 2.1 | +| 23.1.3 | j17-23.04.1 | 23.04.1 | 2.1 | + +nf-launcher versions prefixed with j17 refer to Java version 17; j11 refers to Java 11. diff --git a/platform-enterprise/getting-started/_images/add-s-pf.gif b/platform-enterprise/getting-started/_images/add-s-pf.gif new file mode 100644 index 000000000..0ea2a737d Binary files /dev/null and b/platform-enterprise/getting-started/_images/add-s-pf.gif differ diff --git a/platform-enterprise/getting-started/_images/cpu-table-2.png b/platform-enterprise/getting-started/_images/cpu-table-2.png new file mode 100644 index 000000000..919aec152 Binary files /dev/null and b/platform-enterprise/getting-started/_images/cpu-table-2.png differ diff --git a/platform-enterprise/getting-started/_images/create-ce.gif b/platform-enterprise/getting-started/_images/create-ce.gif new file mode 100644 index 000000000..fa2d48f04 Binary files /dev/null and b/platform-enterprise/getting-started/_images/create-ce.gif differ diff --git a/platform-enterprise/getting-started/_images/create-ds.gif b/platform-enterprise/getting-started/_images/create-ds.gif new file mode 100644 index 000000000..bce8331d9 Binary files /dev/null and b/platform-enterprise/getting-started/_images/create-ds.gif differ diff --git a/platform-enterprise/getting-started/_images/cs-launch-form-1.gif b/platform-enterprise/getting-started/_images/cs-launch-form-1.gif new file mode 100644 index 000000000..8dbe3bfd4 Binary files /dev/null and b/platform-enterprise/getting-started/_images/cs-launch-form-1.gif differ diff --git a/platform-enterprise/getting-started/_images/cs-launch-input.gif b/platform-enterprise/getting-started/_images/cs-launch-input.gif new file mode 100644 index 000000000..329ba865f Binary files /dev/null and b/platform-enterprise/getting-started/_images/cs-launch-input.gif differ diff --git a/platform-enterprise/getting-started/_images/data-explorer-add-proteinfold.gif b/platform-enterprise/getting-started/_images/data-explorer-add-proteinfold.gif new file mode 100644 index 000000000..ec6f1b87e Binary files /dev/null and b/platform-enterprise/getting-started/_images/data-explorer-add-proteinfold.gif differ diff --git a/platform-enterprise/getting-started/_images/guide-vs-code-studio-nf-env-1080p-cropped.gif b/platform-enterprise/getting-started/_images/guide-vs-code-studio-nf-env-1080p-cropped.gif new file mode 100644 index 000000000..7a0ec1fee Binary files /dev/null and b/platform-enterprise/getting-started/_images/guide-vs-code-studio-nf-env-1080p-cropped.gif differ diff --git a/platform-enterprise/getting-started/_images/launch-form-1.gif b/platform-enterprise/getting-started/_images/launch-form-1.gif new file mode 100644 index 000000000..f863ccaee Binary files /dev/null and b/platform-enterprise/getting-started/_images/launch-form-1.gif differ diff --git a/platform-enterprise/getting-started/_images/launch-form-2.gif b/platform-enterprise/getting-started/_images/launch-form-2.gif new file mode 100644 index 000000000..435236bd9 Binary files /dev/null and b/platform-enterprise/getting-started/_images/launch-form-2.gif differ diff --git a/platform-enterprise/getting-started/_images/launch-form-3.gif b/platform-enterprise/getting-started/_images/launch-form-3.gif new file mode 100644 index 000000000..f59bd2c69 Binary files /dev/null and b/platform-enterprise/getting-started/_images/launch-form-3.gif differ diff --git a/platform-enterprise/getting-started/_images/nf-core-proteinfold_metro_map_1.1.0.png b/platform-enterprise/getting-started/_images/nf-core-proteinfold_metro_map_1.1.0.png new file mode 100644 index 000000000..503473566 Binary files /dev/null and b/platform-enterprise/getting-started/_images/nf-core-proteinfold_metro_map_1.1.0.png differ diff --git a/platform-enterprise/getting-started/_images/nf-core-rnaseq_metro_map_grey_static.svg b/platform-enterprise/getting-started/_images/nf-core-rnaseq_metro_map_grey_static.svg new file mode 100644 index 000000000..a0e7a4ccc --- /dev/null +++ b/platform-enterprise/getting-started/_images/nf-core-rnaseq_metro_map_grey_static.svg @@ -0,0 +1,239 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/platform-enterprise/getting-started/_images/pf-ce.gif b/platform-enterprise/getting-started/_images/pf-ce.gif new file mode 100644 index 000000000..09aef094a Binary files /dev/null and b/platform-enterprise/getting-started/_images/pf-ce.gif differ diff --git a/platform-enterprise/getting-started/_images/pf-run-details-general.gif b/platform-enterprise/getting-started/_images/pf-run-details-general.gif new file mode 100644 index 000000000..6c25cc352 Binary files /dev/null and b/platform-enterprise/getting-started/_images/pf-run-details-general.gif differ diff --git a/platform-enterprise/getting-started/_images/pf-run-details.gif b/platform-enterprise/getting-started/_images/pf-run-details.gif new file mode 100644 index 000000000..5c3e54e0b Binary files /dev/null and b/platform-enterprise/getting-started/_images/pf-run-details.gif differ diff --git a/platform-enterprise/getting-started/_images/pf-task-details.gif b/platform-enterprise/getting-started/_images/pf-task-details.gif new file mode 100644 index 000000000..1b8b50689 Binary files /dev/null and b/platform-enterprise/getting-started/_images/pf-task-details.gif differ diff --git a/platform-enterprise/getting-started/_images/pipelines-add-pf.gif b/platform-enterprise/getting-started/_images/pipelines-add-pf.gif new file mode 100644 index 000000000..c3684121d Binary files /dev/null and b/platform-enterprise/getting-started/_images/pipelines-add-pf.gif differ diff --git a/platform-enterprise/getting-started/_images/pipelines-add.gif b/platform-enterprise/getting-started/_images/pipelines-add.gif new file mode 100644 index 000000000..e37292541 Binary files /dev/null and b/platform-enterprise/getting-started/_images/pipelines-add.gif differ diff --git a/platform-enterprise/getting-started/_images/process-runtime-2.png b/platform-enterprise/getting-started/_images/process-runtime-2.png new file mode 100644 index 000000000..1139ad7aa Binary files /dev/null and b/platform-enterprise/getting-started/_images/process-runtime-2.png differ diff --git a/platform-enterprise/getting-started/_images/protein-structure-visualization.gif b/platform-enterprise/getting-started/_images/protein-structure-visualization.gif new file mode 100644 index 000000000..7db545914 Binary files /dev/null and b/platform-enterprise/getting-started/_images/protein-structure-visualization.gif differ diff --git a/platform-enterprise/getting-started/_images/protein-vis-short-gif-1080p-cropped.gif b/platform-enterprise/getting-started/_images/protein-vis-short-gif-1080p-cropped.gif new file mode 100644 index 000000000..0b2f62eff Binary files /dev/null and b/platform-enterprise/getting-started/_images/protein-vis-short-gif-1080p-cropped.gif differ diff --git a/platform-enterprise/getting-started/_images/proteinfold-dataset.gif b/platform-enterprise/getting-started/_images/proteinfold-dataset.gif new file mode 100644 index 000000000..2920efabe Binary files /dev/null and b/platform-enterprise/getting-started/_images/proteinfold-dataset.gif differ diff --git a/platform-enterprise/getting-started/_images/proteinfold-lf1.gif b/platform-enterprise/getting-started/_images/proteinfold-lf1.gif new file mode 100644 index 000000000..53056d630 Binary files /dev/null and b/platform-enterprise/getting-started/_images/proteinfold-lf1.gif differ diff --git a/platform-enterprise/getting-started/_images/proteinfold-lf2.gif b/platform-enterprise/getting-started/_images/proteinfold-lf2.gif new file mode 100644 index 000000000..905c25ac8 Binary files /dev/null and b/platform-enterprise/getting-started/_images/proteinfold-lf2.gif differ diff --git a/platform-enterprise/getting-started/_images/proteinfold-mode.gif b/platform-enterprise/getting-started/_images/proteinfold-mode.gif new file mode 100644 index 000000000..ad2a86ace Binary files /dev/null and b/platform-enterprise/getting-started/_images/proteinfold-mode.gif differ diff --git a/platform-enterprise/getting-started/_images/rstudio.gif b/platform-enterprise/getting-started/_images/rstudio.gif new file mode 100644 index 000000000..31a66d4b8 Binary files /dev/null and b/platform-enterprise/getting-started/_images/rstudio.gif differ diff --git a/platform-enterprise/getting-started/_images/starting_tower_enterprise.png b/platform-enterprise/getting-started/_images/starting_tower_enterprise.png new file mode 100644 index 000000000..817f93a37 Binary files /dev/null and b/platform-enterprise/getting-started/_images/starting_tower_enterprise.png differ diff --git a/platform-enterprise/getting-started/_images/starting_tower_nf.png b/platform-enterprise/getting-started/_images/starting_tower_nf.png new file mode 100644 index 000000000..33216eaa9 Binary files /dev/null and b/platform-enterprise/getting-started/_images/starting_tower_nf.png differ diff --git a/platform-enterprise/getting-started/_images/starting_tower_opensource.png b/platform-enterprise/getting-started/_images/starting_tower_opensource.png new file mode 100644 index 000000000..44f93fd00 Binary files /dev/null and b/platform-enterprise/getting-started/_images/starting_tower_opensource.png differ diff --git a/platform-enterprise/getting-started/_images/usage_create_token.png b/platform-enterprise/getting-started/_images/usage_create_token.png new file mode 100644 index 000000000..65da08d5a Binary files /dev/null and b/platform-enterprise/getting-started/_images/usage_create_token.png differ diff --git a/platform-enterprise/getting-started/_images/usage_name_token.png b/platform-enterprise/getting-started/_images/usage_name_token.png new file mode 100644 index 000000000..ec8ee1b4e Binary files /dev/null and b/platform-enterprise/getting-started/_images/usage_name_token.png differ diff --git a/platform-enterprise/getting-started/_images/usage_token.png b/platform-enterprise/getting-started/_images/usage_token.png new file mode 100644 index 000000000..0cd21e9f0 Binary files /dev/null and b/platform-enterprise/getting-started/_images/usage_token.png differ diff --git a/platform-enterprise/getting-started/_images/xpra-data-studios-IGV-load-bam.png b/platform-enterprise/getting-started/_images/xpra-data-studios-IGV-load-bam.png new file mode 100644 index 000000000..0fcb4a25c Binary files /dev/null and b/platform-enterprise/getting-started/_images/xpra-data-studios-IGV-load-bam.png differ diff --git a/platform-enterprise/getting-started/_images/xpra-data-studios-IGV-view-bam.png b/platform-enterprise/getting-started/_images/xpra-data-studios-IGV-view-bam.png new file mode 100644 index 000000000..0dde536f8 Binary files /dev/null and b/platform-enterprise/getting-started/_images/xpra-data-studios-IGV-view-bam.png differ diff --git a/platform-enterprise/getting-started/deployment-options.md b/platform-enterprise/getting-started/deployment-options.md new file mode 100644 index 000000000..794048709 --- /dev/null +++ b/platform-enterprise/getting-started/deployment-options.md @@ -0,0 +1,89 @@ +--- +title: "Deploy Platform" +description: "An overview of deployment versions and ways to run Seqera Platform." +date: "21 Apr 2023" +tags: [deployment] +--- + +[Seqera Platform Enterprise](../../version-25.1/enterprise/overview) is installed in an organization's own cloud or on-premises infrastructure. It includes: + +- Monitoring, logging, and observability +- Pipeline execution Launchpad +- Cloud resource provisioning +- Pipeline actions and event-based execution +- LDAP and OpenID authentication +- Enterprise role-based access control (RBAC) +- Full-featured API +- Dedicated support for Nextflow and Seqera Platform + +To install Platform in your organization's infrastructure, [contact us](https://cloud.seqera.io/demo/) for a demo to discuss your requirements. + +## How to use Platform + +You can access your Seqera instance through the UI, the [API](../api/overview), the [CLI](../cli/overview), or in Nextflow directly using the `-with-tower` option. + +### Platform web-based UI + +1. Create an account and log in to Seqera Cloud at [cloud.seqera.io](https://cloud.seqera.io). + + :::note + Platform login sessions remain active as long as the application browser window remains open and active. When the browser window is terminated, automatic logout occurs within 6 hours by default. + ::: + +2. Create and configure a new [compute environment](../compute-envs/overview). +3. Start [launching pipelines](../launch/launchpad). + +### Seqera API + +See [API](../api/overview). + +### Seqera CLI + +See [CLI](../cli/overview). + +### Nextflow `-with-tower` + +If you have an existing environment where you run Nextflow directly, you can still leverage Seqera Platform capabilities by executing your Nextflow run with a `with-tower` flag: + +1. Create an account and log in to Seqera at [cloud.seqera.io](https://cloud.seqera.io). +2. From your personal workspace: Go to the user menu and select **Settings > Your tokens**. +3. Select **Add token**. +4. Enter a unique name for your token, then select **Add**. +5. Copy and store your token securely. + + :::caution + The access token is displayed only once. Save the token value before closing the **Personal Access Token** window. + ::: + +6. Open a terminal window and create environment variables to store the Seqera access token and Nextflow version. Replace `` with your newly-created token. + + ```bash + export TOWER_ACCESS_TOKEN= + export NXF_VER=23.10.1 + ``` + + :::note + Bearer token support requires Nextflow version 20.10.0 or later. Set with the `NXF_VER` environment variable. + ::: + +7. To submit a pipeline to a [workspace](../orgs-and-teams/workspace-management) using Nextflow, add the workspace ID to your environment: + + ```bash + export TOWER_WORKSPACE_ID=000000000000000 + ``` + + To find your workspace ID, select your organization in Seqera and navigate to the **Workspaces** tab. + +8. Run your Nextflow pipeline with the `-with-tower` flag: + + ```bash + nextflow run main.nf -with-tower + ``` + + Replace `main.nf` with the filename of your Nextflow script. + +You can now monitor your workflow runs in the Seqera interface. To configure and execute Nextflow pipelines in cloud environments, see [compute environments](../compute-envs/overview). + +:::tip +See the [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html?highlight=tower#scope-tower) for further run configuration options using Nextflow configuration files. +::: diff --git a/platform-enterprise/getting-started/overview.md b/platform-enterprise/getting-started/overview.md new file mode 100644 index 000000000..f82c408a5 --- /dev/null +++ b/platform-enterprise/getting-started/overview.md @@ -0,0 +1,48 @@ +--- +title: "Run a pipeline" +description: "An overview of Seqera Platform deployment versions and ways to run Seqera." +date: "15 September 2023" +tags: [overview] +--- + +On this page, learn how to run a pipeline with sample data and get started running your own pipelines. + +:::tip +[**Sign up**](https://tower.nf "Seqera Platform") to try Seqera for free, or request a [**demo**](https://cloud.tower.nf/demo/ "Seqera Platform Demo") for deployments in your own on-premises or cloud environment. +::: + +The Community Showcase [Launchpad](../launch/launchpad) is an example workspace provided by Seqera. The showcase is pre-configured with compute environments, credentials, and pipelines to start running Nextflow workflows immediately. A pipeline consists of a pre-configured workflow repository, compute environment (with 100 free CPU hours), and launch parameters. From version 23.1.3, the Community Showcase comes pre-loaded with two AWS Batch compute environments to run showcase pipelines. + +### Components + +- [Datasets](../data/datasets) are collections of versioned, structured data (usually in the form of a samplesheet) in CSV or TSV format. A dataset is used as the input for a pipeline run. Sample datasets are used in pipelines with the same name, e.g., the _nf-core-rnaseq-test_ dataset is used as input when you run the _nf-core-rnaseq_ pipeline. +- [Compute environments](../compute-envs/overview) are the platforms where workflows are executed. A compute environment consists of access credentials, configuration settings, and storage options for the environment. +- [Credentials](../credentials/overview) are the authentication keys Seqera uses to access compute environments, private code repositories, and external services. Credentials are SHA-256 encrypted before secure storage. The Community Showcase includes all the credentials you need to run pipelines in the included AWS Batch compute environments. +- [Secrets](../secrets/overview) are retrieved and used during pipeline execution. In your private or organization workspace, you can store the access keys, licenses, or passwords required for your pipeline execution to interact with third-party services. The secrets included in the Community Showcase contain license keys to run _nf-dragen_ and _nf-sentieon_ pipelines in the showcase compute environments. + +## Run a pipeline with sample data + +1. From the [Launchpad](../launch/launchpad), select a pipeline to view the pipeline detail page. _nf-core-rnaseq_ is a good first pipeline example. +2. Optional: Select the URL under **Workflow repository** to view the pipeline code repository in another tab. +3. Select **Launch** from the pipeline detail page. +4. On the **Launch pipeline** page, enter a unique **Workflow run name** or use the pre-filled random name. +5. Optional: Enter labels to be assigned to the run in the **Labels** field. +6. Under **Input/output options**, select the dataset named after your chosen pipeline from the drop-down menu under **input**. +7. Under **outdir**, specify an output directory where run results will be saved. This must be an absolute path to storage on cloud infrastructure and defaults to `./results`. +8. Under **email**, enter an email address where you wish to receive the run completion summary. +9. Under **multiqc_title**, enter a title for the MultiQC report. This is used as both the report page header and filename. + +The remaining launch form fields will vary depending on the pipeline you have selected. Parameters required for the pipeline to run are pre-filled by default, and empty fields are optional. + +Once you've filled the necessary launch form details, select **Launch**. Your new run will be displayed at the top of the list in the **Runs** tab with a **submitted** status. Select the run name to navigate to the run detail page and view the configuration, parameters, status of individual tasks, and run report. + +## Run your own pipelines + +To run pipelines on your own infrastructure, you first need to create your own organization. + +* [Organizations](../orgs-and-teams/organizations) are the top-level structure in Seqera. They contain the building blocks of your organizational infrastructure. +* [Workspaces](../orgs-and-teams/workspace-management) are where resources are managed. All team members can access the organization workspace. In addition to this, each user has a unique personal workspace to manage resources such as pipelines, compute environments, and credentials. +* [Teams](../orgs-and-teams/organizations) are collections of members. +* [Members](../administration/overview#manage-members) belong to an organization and can have different levels of access across workspaces. + +You can create multiple workspaces within an organization context and associate each of these workspaces with dedicated teams of users, while providing fine-grained access control for each of the teams. See [Administration](../orgs-and-teams/organizations). diff --git a/platform-enterprise/getting-started/proteinfold.md b/platform-enterprise/getting-started/proteinfold.md new file mode 100644 index 000000000..784dd10f6 --- /dev/null +++ b/platform-enterprise/getting-started/proteinfold.md @@ -0,0 +1,493 @@ +--- +title: "Protein structure prediction" +description: "An introduction to running nf-core/proteinfold in Seqera Platform" +date: "21 Jul 2024" +tags: [platform, seqera pipelines, studios, proteinfold, alphafold, colabfold, compute environment, aws] +toc_max_heading_level: 2 +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +This guide details how to perform best-practice analysis for protein 3D structure prediction on an AWS Batch compute environment in Platform. It includes: + +- Creating AWS Batch compute environments to run your pipeline and downstream analysis +- Adding the *nf-core/proteinfold* pipeline to your workspace +- Importing your pipeline input data +- Launching the pipeline and monitoring execution from your workspace +- Setting up a custom analysis environment with Studios + +:::info[**Prerequisites**] +You will need the following to get started: + +- [Admin](../orgs-and-teams/roles) permissions in an existing organization workspace. See [Set up your workspace](./workspace-setup) to create an organization and workspace from scratch. +- An existing AWS cloud account with access to the AWS Batch service. +- Existing access credentials with permissions to create and manage resources in your AWS account. See [IAM](../compute-envs/aws-batch#iam) for guidance to set up IAM permissions for Platform. +::: + +## Compute environment + +The compute and storage requirements for protein structure prediction depend on the number and length of protein sequences being analyzed and the size of the database used for prediction by the deep learning models, such as AlphaFold2 and ColabFold. Input sequences typically range from a few kilobytes for single proteins to several megabytes for large datasets, and reference databases can be extremely large, from 100 GB to several TB. Protein folding pipelines generate intermediate files during execution, such as for alignments and feature representations, the sizes of which vary based on the number of sequences and the complexity of the analysis. + +Given the data sizes and computational intensity, production pipelines perform best with NVIDIA A10 or larger GPUs and low-latency, high-throughput cloud storage file handling. + +### GPUs + +The *nf-core/proteinfold* pipeline performs protein folding prediction using one of three deep learning models: AlphaFold2, ColabFold, or ESMFold. The computationally intensive tasks for protein structure prediction perform better on GPUs due to their ability to handle large matrix operations efficiently and perform parallel computations. GPUs can dramatically reduce the time required for protein structure predictions, making it feasible to analyze larger datasets or perform more complex simulations. + +Platform supports the allocation of both CPUs and GPUs in the same compute environment. For example, specify `m6id`, `c6id`, `r6id`, `g5`, `p3` instance families in the **Instance types** field when creating your AWS Batch compute environment. See [Create compute environment](#create-compute-environment) below. + +When you launch *nf-core/proteinfold* in Platform, enable **use_gpu** to instruct Nextflow to run GPU-compatible pipeline processes on GPU instances. See [Launch pipeline](#launch-pipeline) below. + +### Fusion file system + +The [Fusion](../supported_software/fusion/overview) file system enables seamless read and write operations to cloud object stores, leading to simpler pipeline logic and faster, more efficient execution. While Fusion is not required to run nf-core/proteinfold, it significantly enhances I/O-intensive tasks and eliminates the need for intermediate data copies, which is particularly beneficial when working with the large databases used by deep learning models for prediction. + +Fusion works best with AWS NVMe instances (fast instance storage) as this delivers the fastest performance when compared to environments using only AWS EBS (Elastic Block Store). Batch Forge selects instances automatically based on your compute environment configuration, but you can optionally specify instance types. To enable fast instance storage, you must select EC2 instances with NVMe SSD storage (`g4dn`, `g5`, or `P3` families or greater). + +:::note +Fusion requires a license for use in Seqera Platform compute environments or directly in Nextflow. See [Fusion licensing](https://docs.seqera.io/fusion/licensing) for more information. +::: + +### Create compute environment + +:::info +The same compute environment can be used for pipeline execution and running your Studios notebook environment, but Studios does not support AWS Fargate. To use this compute environment for both *nf-core/proteinfold* execution and your Studio, leave **Enable Fargate for head job** disabled and include a CPU-based EC2 instance family (`c6id`, `r6id`, etc.) in your **Instance types**. + +Alternatively, create a second basic AWS Batch compute environment and a Studio with at least 2 CPUs and 8192 MB of RAM. +::: + +From the **Compute Environments** tab in your organization workspace, select **Add compute environment** and complete the following fields: + +| **Field** | **Description** | +|---------------------------------------|------------------------------------------------------------| +| **Name** | A unique name for the compute environment. | +| **Platform** | AWS Batch | +| **Credentials** | Select existing credentials, or **+** to create new credentials.| +| **Access Key** | AWS access key ID. | +| **Secret Key** | AWS secret access key. | +| **Region** | The target execution region. | +| **Pipeline work directory** | An S3 bucket path in the same execution region. | +| **Enable Wave Containers** | Use the Wave containers service to provision containers. | +| **Enable Fusion v2** | Access your S3-hosted data via the Fusion v2 file system. | +| **Enable fast instance storage** | Use NVMe instance storage to speed up I/O and disk access. Requires Fusion v2.| +| **Config Mode** | Batch Forge | +| **Provisioning Model** | Choose between Spot and On-demand instances. | +| **Max CPUs** | Sensible values for production use range between 2000 and 5000.| +| **Enable Fargate for head job** | Run the Nextflow head job using the Fargate container service to speed up pipeline launch. Requires Fusion v2. Do not enable for Studios compute environments. | +| **Use Amazon-recommended GPU-optimized ECS AMI** | When enabled, Batch Forge specifies the most current AWS-recommended GPU-optimized ECS AMI as the EC2 fleet AMI when creating the compute environment. | +| **Allowed S3 buckets** | Additional S3 buckets or paths to be granted read-write permission for this compute environment. For the purposes of this guide, add `s3://proteinfold-dataset` to grant compute environment access to the DB and params used for prediction by AlphaFold2 and ColabFold. | +| **Instance types** | Specify the instance types to be used for computation. You must include GPU-enabled instance types (`g4dn`, `g5`) when the Amazon-recommended GPU-optimized ECS AMI is in use. Include CPU-based instance families for Studios compute environments. | +| **Resource labels** | `name=value` pairs to tag the AWS resources created by this compute environment.| + +![Create AWS Batch compute environment](./_images/pf-ce.gif) + +## Add pipeline to Platform + +:::info +The [*nf-core/proteinfold*](https://github.com/nf-core/proteinfold) pipeline is a bioinformatics best-practice analysis pipeline for Protein 3D structure prediction. + +![nf-core/proteinfold subway map](./_images/nf-core-proteinfold_metro_map_1.1.0.png) +::: + +[Seqera Pipelines](https://seqera.io/pipelines) is a curated collection of quality open source pipelines that can be imported directly to your workspace Launchpad in Platform. Each pipeline includes a curated test dataset to use in a test run to confirm compute environment compatibility in just a few steps. + +To use Seqera Pipelines to import the *nf-core/proteinfold* pipeline to your workspace: + +![Seqera Pipelines add to Launchpad](./_images/pipelines-add-pf.gif) + +1. Search for *nf-core/proteinfold* and select **Launch** next to the pipeline name in the list. In the **Add pipeline** tab, select **Cloud** or **Enterprise** depending on your Platform account type, then provide the information needed for Seqera Pipelines to access your Platform instance: + - **Seqera Cloud**: Paste your Platform **Access token** and select **Next**. + - **Seqera Enterprise**: Specify the **Seqera Platform URL** (hostname) and **Base API URL** for your Enterprise instance, then paste your Platform **Access token** and select **Next**. + :::tip + If you do not have a Platform access token, select **Get your access token from Seqera Platform** to open the Access tokens page in a new browser tab. + ::: +1. Select your Platform **Organization**, **Workspace**, and **Compute environment** for the imported pipeline. +1. (Optional) Customize the **Pipeline Name** and **Pipeline Description**. +1. Select **Add Pipeline**. + +:::info +To add a custom pipeline not listed in Seqera Pipelines to your Platform workspace, see [Add pipelines](./quickstart-demo/add-pipelines#) for manual Launchpad instructions. +::: + +## Pipeline input data + +The [*nf-core/proteinfold*](https://github.com/nf-core/proteinfold) pipeline works with input datasets (samplesheets) containing sequence names and FASTA file locations (paths to FASTA files in cloud or local storage). The pipeline includes an example samplesheet that looks like this: + +
+ **nf-core/proteinfold example samplesheet** + + | sequence | fasta | + | -------- | ----- | + | T1024 | https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta | + | T1026 | https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta | + +
+ +In Platform, samplesheets and other data can be made easily accessible in one of two ways: +- Use **Data Explorer** to browse and interact with remote data from AWS S3, Azure Blob Storage, and Google Cloud Storage repositories, directly in your organization workspace. +- Use **Datasets** to upload structured data to your workspace in CSV (Comma-Separated Values) or TSV (Tab-Separated Values) format. + +
+ **Add a cloud bucket via Data Explorer** + + Private cloud storage buckets accessible with the credentials in your workspace are added to Data Explorer automatically by default. However, you can also add custom directory paths within buckets to your workspace to simplify direct access. + + For example, to add the proteinfold open database to your workspace: + + ![Add public bucket](./_images/data-explorer-add-proteinfold.gif) + + 1. From the **Data Explorer** tab, select **Add cloud bucket**. + 1. Specify the bucket details: + - The cloud **Provider**: AWS + - An existing cloud **Bucket path**: `s3://proteinfold-dataset` + - A unique **Name** for the bucket: "proteinfold-dataset" + - The **Credentials** used to access the bucket: select **Public**. + - An optional bucket **Description**. + 1. Select **Add**. + + You can now select data directly from this bucket as input when launching your pipeline, without the need to interact with cloud consoles or CLI tools. + +
+ +
+ **Add a dataset** + + From the **Datasets** tab, select **Add Dataset**. + + ![Add a dataset](./_images/proteinfold-dataset.gif) + + Specify the following dataset details: + + - A **Name** for the dataset, such as `proteinfold_samplesheet`. + - A **Description** for the dataset. + - Select the **First row as header** option to prevent Platform from parsing the header row of the samplesheet as sample data. + - Select **Upload file** and browse to your CSV or TSV samplesheet file in local storage, or simply drag and drop it into the box. + + The dataset is now listed in your organization workspace datasets and can be selected as input when launching your pipeline. + + :::info + Platform does not store the data used for analysis in pipelines. The dataset must specify the locations of data stored on your own infrastructure. + ::: + +
+ +## Launch pipeline + +:::note +This guide is based on [version 1.1.1](https://nf-co.re/proteinfold/1.1.1) of the *nf-core/proteinfold* pipeline. Launch form parameters and tools may differ in other versions. +::: + +With your compute environment created, *nf-core/proteinfold* added to your workspace Launchpad, and your samplesheet accessible in Platform, you are ready to launch your pipeline. Navigate to the Launchpad and select **Launch** next to *nf-core-proteinfold* to open the launch form. + +The launch form consists of **General config**, **Run parameters**, and **Advanced options** sections to specify your run parameters before execution, and an execution summary. Use section headings or select the **Previous** and **Next** buttons at the bottom of the page to navigate between sections. + +### General config + +- **Pipeline to launch**: The pipeline Git repository name or URL: `https://github.com/nf-core/proteinfold`. For saved pipelines, this is prefilled and cannot be edited. +- **Revision number**: A valid repository commit ID, tag, or branch name: `1.1.1`. For saved pipelines, this is prefilled and cannot be edited. +- **Config profiles**: One or more [configuration profile](https://www.nextflow.io/docs/latest/config.html#config-profiles) names to use for the execution. Config profiles must be defined in the `nextflow.config` file in the pipeline repository. Benchmarking runs for this guide used nf-core profiles with included test datasets — `test_full_alphafold2_multimer` for Alphafold2 and `test_full_alphafold2_multimer` for Colabfold. +- **Workflow run name**: An identifier for the run, pre-filled with a random name. This can be customized. +- **Labels**: Assign new or existing [labels](../labels/overview) to the run. +- **Compute environment**: Your AWS Batch compute environment. +- **Work directory**: The cloud storage path where pipeline scratch data is stored. Platform will create a scratch sub-folder if only a cloud bucket location is specified. + :::note + The credentials associated with the compute environment must have access to the work directory. + ::: + +![General config tab](./_images/proteinfold-lf1.gif) + +### Run parameters + +There are three ways to enter **Run parameters** prior to launch: + +- The **Input form view** displays form fields to enter text or select attributes from lists, and browse input and output locations with [Data Explorer](../data/data-explorer). +- The **Config view** displays raw configuration text that you can edit directly. Select JSON or YAML format from the **View as** list. +- **Upload params file** allows you to upload a JSON or YAML file with run parameters. + +Platform uses the `nextflow_schema.json` file in the root of the pipeline repository to dynamically create a form with the necessary pipeline parameters. + +![Run parameters](./_images/proteinfold-lf2.gif) + +Specify your pipeline input and output and modify other pipeline parameters as needed. + +
+ **input** + + Use **Browse** to select your pipeline input data: + + - In the **Data Explorer** tab, select the existing cloud bucket that contains your samplesheet, browse or search for the samplesheet file, and select the chain icon to copy the file path before closing the data selection window and pasting the file path in the input field. + - In the **Datasets** tab, search for and select your existing dataset. + +
+
+ **outdir** + + Use the `outdir` parameter to specify where the pipeline outputs are published. `outdir` must be unique for each pipeline run. Otherwise, your results will be overwritten. + + **Browse** and copy cloud storage directory paths using Data Explorer, or enter a path manually. + +
+ +- The **mode** menu allows you to select the deep learning model used for structure prediction (`alphafold2`, `colabfold`, or `esmfold`). +- Enable **use_gpu** to run GPU-compatible tasks on GPUs. This requires **Use Amazon-recommended GPU-optimized ECS AMI** to be enabled and GPU-enabled instances to be specified under **Instance types** in your compute environment. + +![Mode options](./_images/proteinfold-mode.gif) + +:::info +For the purposes of this guide, run the pipeline in both `alphafold2` and `colabfold` modes. Specify unique directory paths for the `outdir` parameter (such as "Alphafold2" and "ColabFold") to ensure output data is kept separate and not overwritten. Predicted protein structures for each model will be visualized side-by-side in the [Interactive analysis](#interactive-analysis-with-studios) section. +::: + +### Advanced settings + +- Use [resource labels](../resource-labels/overview) to tag the computing resources created during the workflow execution. While resource labels for the run are inherited from the compute environment and pipeline, workspace admins can override them from the launch form. Applied resource label names must be unique. +- [Pipeline secrets](../secrets/overview) store keys and tokens used by workflow tasks to interact with external systems. Enter the names of any stored user or workspace secrets required for the workflow execution. +- See [Advanced options](../launch/advanced) for more details. + +After you have filled the necessary launch details, select **Launch**. The **Runs** tab shows your new run in a **submitted** status at the top of the list. Select the run name to navigate to the [**View Workflow Run**](../monitoring/overview) page and view the configuration, parameters, status of individual tasks, and run report. + +
+ **Run monitoring** + + Select your new run from the **Runs** tab list to view the run details. + + #### Run details page + + As the pipeline runs, run details will populate with the following tabs: + + - **Command-line**: The Nextflow command invocation used to run the pipeline. This includes details about the pipeline version (`-r` flag) and profile, if specified (`-profile` flag). + - **Parameters**: The exact set of parameters used in the execution. This is helpful for reproducing the results of a previous run. + - **Resolved Nextflow configuration**: The full Nextflow configuration settings used for the run. This includes parameters, but also settings specific to task execution (such as memory, CPUs, and output directory). + - **Execution Log**: A summarized Nextflow log providing information about the pipeline and the status of the run. + - **Datasets**: Link to datasets, if any were used in the run. + - **Reports**: View pipeline outputs directly in the Platform. + + ![View the nf-core/rnaseq run](./_images/pf-run-details.gif) + + #### View reports + + Most Nextflow pipelines generate reports or output files which are useful to inspect at the end of the pipeline execution. Reports can contain quality control (QC) metrics that are important to assess the integrity of the results. + + The paths to report files point to a location in cloud storage (in the `outdir` directory specified during launch), but you can view the contents directly and download each file without navigating to the cloud or a remote filesystem. + + :::info + See [Reports](../reports/overview) for more information. + ::: + + #### View general information + + The run details page includes general information about who executed the run, when it was executed, the Git commit ID and/or tag used, and additional details about the compute environment and Nextflow version used. + + ![General run information](./_images/pf-run-details-general.gif) + + #### View details for a task + + Scroll down the page to view: + + - The progress of individual pipeline **Processes** + - **Aggregated stats** for the run (total walltime, CPU hours) + - **Workflow metrics** (CPU efficiency, memory efficiency) + - A **Task details** table for every task in the workflow + + The task details table provides further information on every step in the pipeline, including task statuses and metrics. + + #### Task details + + Select a task in the task table to open the **Task details** dialog. The dialog has three tabs: + + - The **About** tab contains extensive task execution details. + - The **Execution log** tab provides a real-time log of the selected task's execution. Task execution and other logs (such as stdout and stderr) are available for download from here, if still available in your compute environment. + - The **Data Explorer** tab allows you to view the task working directory directly in Platform. + + ![Task details window](./_images/pf-task-details.gif) + + Nextflow hash-addresses each task of the pipeline and creates unique directories based on these hashes. Data Explorer allows you to view the log files and output files generated for each task in its working directory, directly within Platform. You can view, download, and retrieve the link for these intermediate files in cloud storage from the **Data Explorer** tab to simplify troubleshooting. + +
+ +## Interactive analysis with Studios + +[Studios](../studios/index) streamlines the process of creating interactive analysis environments for Platform users. With built-in templates for platforms like Jupyter Notebook, RStudio, and VS Code, creating a Studio is as simple as adding and sharing pipelines or datasets. The Studio URL can also be shared with any user with the [Connect role](../orgs-and-teams/roles) for real-time access and collaboration. + +For the purposes of this guide, a Jupyter notebook environment will be used for interactive visualization of the predicted protein structures, optionally comparing AlphaFold2 and Colabfold structures for the same sequence data. + +### Create a Jupyter notebookStudio + +From the **Studios** tab, select **Add a Studio** and complete the following: +- In the **Compute & Data** tab: + - Select your AWS Batch compute environment. + :::info + The same compute environment can be used for pipeline execution and running your Studios notebook environment, but Studios does not support AWS Fargate and sessions must run on CPUs. To use one compute environment for both *nf-core/proteinfold* execution and your Studio, leave **Enable Fargate for head job** disabled and include at least one CPU-based EC2 instance family (`c6id`, `r6id`, etc.) in your **Instance types**. + + Alternatively, create a second basic AWS Batch compute environment with at least 2 CPUs and 8192 MB of RAM for your data studio. + ::: + - Optional: Enter CPU and memory allocations. The default values are 2 CPUs and 8192 MB memory (RAM). + :::note + Studios compete for computing resources when sharing compute environments. Ensure your compute environment has sufficient resources to run both your pipelines and data studio sessions. + ::: + - Mount data using Data Explorer: Mount the S3 bucket or directory path that contains the pipeline work directory of your Proteinfold run. +- In the **General config** tab: + - Select the latest **Jupyter** container image template from the list. + - Optional: Enter a unique name and description for the data studio. + - Check **Install Conda packages** and paste the following Conda environment YAML snippet: + + ```yaml + channels: + - bioconda + - conda-forge + dependencies: + - python=3.10 + - conda-forge::biopython=1.84 + - conda-forge::nglview=3.1.2 + - conda-forge::ipywidgets=8.1.5 + ``` + +- Confirm the Studio details in the **Summary** tab +- Select **Add** and choose whether to add and start the Studio immediately. +- When the Studio is created and in a running state, **Connect** to it. + +![Add Studio](./_images/add-s-pf.gif) + +### Visualize protein structures + +The Jupyter environment can be configured with the packages and scripts you need for interactive analysis. For the purposes of this guide, run the following scripts in individual code cells to install the necessary packages and perform visualization: + +1. Import libraries and check versions: + + ```python + import sys + import jupyter_core + import nglview + import ipywidgets + import Bio + + print(f"Python version: {sys.version}") + print(f"Jupyter version: {jupyter_core.__version__}") + print(f"nglview version: {nglview.__version__}") + print(f"ipywidgets version: {ipywidgets.__version__}") + print(f"Biopython version: {Bio.__version__}") + print(f"Operating system: {sys.platform}") + print("All required libraries imported successfully.") + ``` + +1. Define visualization functions: + + ```python + import os + import ipywidgets as widgets + from IPython.display import display, HTML + + def visualize_protein(pdb_file, width='400px', height='400px'): + view = nglview.show_structure_file(pdb_file) + view.add_representation('cartoon', selection='protein', color='residueindex') + view.add_representation('ball+stick', selection='hetero') + view._remote_call('setSize', target='Widget', args=[width, height]) + + # Set initial view + view._remote_call('autoView') + view._remote_call('centerView') + + # Adjust zoom level (you may need to adjust this value) + view._remote_call('zoom', target='stage', args=[0.8]) + + return view + + def compare_proteins(pdb_files): + views = [] + for method, file_path in pdb_files.items(): + if os.path.exists(file_path): + view = visualize_protein(file_path) + label = widgets.Label(method) + views.append(widgets.VBox([label, view])) + return widgets.HBox(views, layout=widgets.Layout(width='100%')) + + print("Visualization functions defined successfully.") + ``` + +1. Set up file paths and create file dictionary: + + ```python + # Replace with the actual paths to your AlphaFold2 and ColabFold PDB files + alphafold_pdb = "data/path/to/your/alphafold/output.pdb" + colabfold_pdb = "data/path/to/your/colabfold/output.pdb" + + # Create a dictionary of files + pdb_files = { + "AlphaFold": alphafold_pdb, + "ColabFold": colabfold_pdb + } + + print("File paths set up successfully.") + ``` + +1. Display file information: + + ```python + display(HTML("

Protein Structure Prediction Output Files:

")) + for method, file_path in pdb_files.items(): + if os.path.exists(file_path): + display(HTML(f"

{method}: {file_path}

")) + else: + display(HTML(f"

{method}: File not found at {file_path}

")) + ``` + +1. Visualize structures: + + ```python + valid_pdb_files = {method: file_path for method, file_path in pdb_files.items() if os.path.exists(file_path)} + + if valid_pdb_files: + display(HTML("

Protein Structure Visualization:

")) + comparison = compare_proteins(valid_pdb_files) + display(comparison) + else: + display(HTML("

No valid PDB files found. Please check the file paths and ensure that the files exist.

")) + ``` + +1. Add interactive elements: + + ```python + if valid_pdb_files: + method_dropdown = widgets.Dropdown( + options=[method for method, file in valid_pdb_files.items()], + description='Select method:', + disabled=False, + ) + + info_output = widgets.Output() + + def on_change(change): + with info_output: + info_output.clear_output() + selected_method = change['new'] + selected_file = valid_pdb_files[selected_method] + print(f"Selected method: {selected_method}") + print(f"File path: {selected_file}") + print(f"File size: {os.path.getsize(selected_file) / 1024:.2f} KB") + + method_dropdown.observe(on_change, names='value') + + display(HTML("

Structure Information:

")) + display(widgets.VBox([method_dropdown, info_output])) + ``` + +1. Display usage instructions: + + ```python + display(HTML(""" +

How to use this visualization:

+
    +
  • The protein structures from AlphaFold and ColabFold are shown side-by-side above.
  • +
  • You can interact with each structure independently:
  • +
      +
    • Click and drag to rotate the structure.
    • +
    • Scroll to zoom in and out.
    • +
    • Right-click and drag to translate the structure.
    • +
    +
  • Use the dropdown menu to select a specific method and view its file information.
  • +
+ """)) + ``` + +![Protein structure visualization](./_images/protein-structure-visualization.gif) + diff --git a/platform-enterprise/getting-started/quickstart-demo/add-data.md b/platform-enterprise/getting-started/quickstart-demo/add-data.md new file mode 100644 index 000000000..820f0ddc6 --- /dev/null +++ b/platform-enterprise/getting-started/quickstart-demo/add-data.md @@ -0,0 +1,112 @@ +--- +title: "Add data" +description: "An introduction to adding pipeline input data in Seqera Platform" +date: "21 Jul 2024" +tags: [platform, data, data explorer, datasets] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +Most bioinformatics pipelines require an input of some sort. This is typically a samplesheet where each row consists of a sample, the location of files for that sample (such as FASTQ files), and other sample details. Reliable shared access to pipeline input data is crucial to simplify data management, minimize user data-input errors, and facilitate reproducible workflows. + +In Platform, samplesheets and other data can be made easily accessible in one of two ways: +- Use **Data Explorer** to browse and interact with remote data from AWS S3, Azure Blob Storage, and Google Cloud Storage repositories, directly in your organization workspace. +- Use **Datasets** to upload structured data to your workspace in CSV (Comma-Separated Values) or TSV (Tab-Separated Values) format. + +## Data Explorer + +For pipeline runs in the cloud, users typically need access to buckets or blob storage to upload files (such as samplesheets and reference data) for analysis and to view pipeline results. Managing credentials and permissions for multiple users and training users to navigate cloud consoles and CLIs can be complicated. Data Explorer provides the simplified alternative of viewing your data directly in Platform. + +### Add a cloud bucket + +Private cloud storage buckets accessible by the [credentials](../../credentials/overview) in your workspace are added to Data Explorer automatically by default. However, you can also add custom directory paths within buckets to your workspace to simplify direct access. + +To add individual buckets (or directory paths within buckets): + +1. From the **Data Explorer** tab, select **Add cloud bucket**. +1. Specify the bucket details: + - The cloud **Provider**. + - An existing cloud **Bucket path**. + - A unique **Name** for the bucket. + - The **Credentials** used to access the bucket. For public cloud buckets, select **Public** from the dropdown menu. + - An optional bucket **Description**. +1. Select **Add**. + + ![Add public bucket](assets/data-explorer-add-bucket.gif) + +You can now use this data in your analysis without the need to interact with cloud consoles or CLI tools. + +#### Public data sources + +Select **Public** from the credentials dropdown menu to add public cloud storage buckets from resources such as: + +- [The Cancer Genome Atlas (TCGA)](https://registry.opendata.aws/tcga/) +- [1000 Genomes Project](https://registry.opendata.aws/1000-genomes/) +- [NCBI SRA](https://registry.opendata.aws/ncbi-sra/) +- [Genome in a Bottle Consortium](https://docs.opendata.aws/giab/readme.html) +- [MSSNG Database](https://cloud.google.com/life-sciences/docs/resources/public-datasets/mssng) +- [Genome Aggregation Database (gnomAD)](https://cloud.google.com/life-sciences/docs/resources/public-datasets/gnomad) + +### View pipeline outputs + +In Data Explorer, you can: + + - **View bucket details**: + Select the information icon next to a bucket in the list to view the cloud provider, bucket address, and credentials. + + ![Bucket details](assets/data-explorer-view-details.gif) + + - **View bucket contents**: + Select a bucket name from the list to view the bucket contents. The file type, size, and path of objects are displayed in columns next to the object name. For example, view the outputs of an *nf-core/rnaseq* run: + + ![Data Explorer bucket](assets/sp-cloud-data-explorer.gif) + + - **Preview files**: + Select a file to open a preview window that includes a **Download** button. For example, view the resultant gene counts of the salmon quantification step of an *nf-core/rnaseq* run: + + ![Preview pipeline results](assets/data-explorer-preview-files.gif) + +## Datasets + +Datasets in Platform are CSV (comma-separated values) and TSV (tab-separated values) files stored in a workspace. You can select stored datasets as input data when launching a pipeline. + +
+ **Example: nf-core/rnaseq test samplesheet** + + The [nf-core/rnaseq](https://github.com/nf-core/rnaseq) pipeline works with input datasets (samplesheets) containing sample names, FASTQ file locations, and indications of strandedness. The Seqera Community Showcase sample dataset for nf-core/rnaseq specifies the paths to 7 small sub-sampled FASTQ files from a yeast RNAseq dataset: + + **Example nf-core/rnaseq dataset** + + | sample | fastq_1 | fastq_2 | strandedness | + | ------------------- | ------------------------------------ | ------------------------------------ | ------------ | + | WT_REP1 | s3://nf-core-awsmegatests/rnaseq/... | s3://nf-core-awsmegatests/rnaseq/... | reverse | + | WT_REP1 | s3://nf-core-awsmegatests/rnaseq/... | s3://nf-core-awsmegatests/rnaseq/... | reverse | + | WT_REP2 | s3://nf-core-awsmegatests/rnaseq/... | s3://nf-core-awsmegatests/rnaseq/... | reverse | + | RAP1_UNINDUCED_REP1 | s3://nf-core-awsmegatests/rnaseq/... | | reverse | + | RAP1_UNINDUCED_REP2 | s3://nf-core-awsmegatests/rnaseq/... | | reverse | + | RAP1_UNINDUCED_REP2 | s3://nf-core-awsmegatests/rnaseq/... | | reverse | + | RAP1_IAA_30M_REP1 | s3://nf-core-awsmegatests/rnaseq/... | s3://nf-core-awsmegatests/rnaseq/... | reverse | + +
+ +Download the nf-core/rnaseq [samplesheet_test.csv](samplesheet_test.csv). + +### Add a dataset + +From the **Datasets** tab, select **Add Dataset**. + +![Add a dataset](assets/sp-cloud-add-a-dataset.gif) + +Specify the following dataset details: + +- A **Name** for the dataset, such as `nf-core-rnaseq-test-dataset`. +- A **Description** for the dataset. +- Select the **First row as header** option to prevent Platform from parsing the header row of the samplesheet as sample data. +- Select **Upload file** and browse to your CSV or TSV file in local storage, or simply drag and drop it into the box. + +Notice the location of the files in the *nf-core/rnaseq* example dataset point to a path on S3. This could also be a path to a shared filesystem, if using an HPC compute environment. Nextflow will use these paths to stage the files into the task working directory. + +:::info +Platform does not store the data used for analysis in pipelines. The datasets must provide the locations of data that is stored on your own infrastructure. +::: \ No newline at end of file diff --git a/platform-enterprise/getting-started/quickstart-demo/add-pipelines.md b/platform-enterprise/getting-started/quickstart-demo/add-pipelines.md new file mode 100644 index 000000000..a9914436c --- /dev/null +++ b/platform-enterprise/getting-started/quickstart-demo/add-pipelines.md @@ -0,0 +1,86 @@ +--- +title: "Add pipelines" +description: "An introduction to adding pipelines to Seqera Platform workspaces" +date: "12 Jul 2024" +tags: [platform, launch, pipelines, launchpad] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +The Launchpad lists the preconfigured Nextflow pipelines that can be executed on the [compute environments](../../compute-envs/overview) in your workspace. + +Platform offers two methods to import pipelines to your workspace Launchpad — directly from Seqera Pipelines or manually via **Add pipeline** in Platform. + +### Import from Seqera Pipelines + +[Seqera Pipelines](https://seqera.io/pipelines) is a curated collection of quality open-source pipelines that can be imported directly to your workspace Launchpad in Platform. Each pipeline includes a dataset to use in a test run to confirm compute environment compatibility in just a few steps. + +![Seqera Pipelines add to Launchpad](assets/seqera-pipelines-add-pipeline.gif) + +To import a pipeline: +1. Select **Launch** next to the pipeline name in the list. In the **Add pipeline** tab, select **Cloud** or **Enterprise** depending on your Platform account type, then provide the information needed for Seqera Pipelines to access your Platform instance: + - **Seqera Cloud**: Paste your Platform **Access token** and select **Next**. + - **Seqera Enterprise**: Specify the **Seqera Platform URL** (hostname) and **Base API URL** for your Enterprise instance, then paste your Platform **Access token** and select **Next**. + :::note + If you do not have a Platform access token, select **Get your access token from Seqera Platform** to open the Access tokens page in a new browser window. + ::: +1. Select the Platform **Organization**, **Workspace**, and **Compute environment** for the imported pipeline. +1. (Optional) Customize the **Pipeline Name** and **Pipeline Description**. + :::note + Pipeline names must be unique per workspace. + ::: +1. Select **Add Pipeline**. + +:::tip +To launch pipelines directly with CLI tools, select the **Launch Pipeline** tab to grab commands for [Seqera Platform CLI](./automation#platform-cli), Nextflow, and [nf-core/tools](https://nf-co.re/docs/nf-core-tools): + +![Launch Seqera Pipeline](assets/seqera-pipelines-launch-cli.png) +::: + +### Add from the Launchpad + +![Add nf-core/rnaseq pipeline](assets/sp-cloud-add-rnaseq.gif) + +From your workspace Launchpad, select **Add Pipeline** and specify the following pipeline details: + +- (*Optional*) **Image**: Select the **Edit** icon on the pipeline image to open the **Edit image** window. From here, select **Upload file** to browse for an image file, or drag and drop the image file directly. Images must be in JPG or PNG format, with a maximum file size of 200 KB. + :::note + You can upload custom icons when adding or updating a pipeline. If no user-uploaded icon is defined, Platform will retrieve and attach a pipeline icon in the following order of precedence: + 1. A valid icon `key:value` pair defined in the `manifest` object of the `nextflow.config` file. + 2. The GitHub organization avatar (if the repository is hosted on GitHub). + 3. If none of the above are defined, Platform auto-generates and attaches a pipeline icon. + ::: +- **Name**: A custom name of your choice. Pipeline names must be unique per workspace. +- Optional: **Description**: A summary of the pipeline or any information that may be useful to workspace participants when selecting a pipeline to launch. +- Optional: **Labels**: Categorize the pipeline according to arbitrary criteria (such research group or reference genome version) that may help workspace participants to select the appropriate pipeline for their analysis. +- **Compute environment**: Select an existing workspace [compute environment](../../compute-envs/overview). +- **Pipeline to launch**: The URL of any public or private Git repository that contains Nextflow source code. +- **Revision number**: Platform will search all of the available tags and branches in the provided pipeline repository and render a dropdown to select the appropriate version. + :::tip + Selecting a specific pipeline version is important for reproducibility as this ensures that each run with the same input data will generate the same results. + ::: +- (*Optional*) **Config profiles**: Select a predefined profile for the Nextflow pipeline. + :::info + nf-core pipelines include a `test` profile that is associated with a minimal test dataset. This profile runs the pipeline with heavily sub-sampled input data for the purposes of [CI/CD](https://resources.github.com/devops/ci-cd/) and to quickly confirm that the pipeline runs on your infrastructure. + ::: +- (*Optional*) **Pipeline parameters**: Set any custom pipeline parameters that will be prepopulated when users launch the pipeline from the Launchpad. For example, set the path to local reference genomes so users don't have to worry about locating these files when launching the pipeline. + ![Add pipeline parameters](assets/sp-cloud-pipeline-params.gif) +- (*Optional*) **Pre-run script**: Define Bash code that executes before the pipeline launches in the same environment where Nextflow runs. + :::info + Pre-run scripts are useful for defining executor settings, troubleshooting, and defining a specific version of Nextflow with the `NXF_VER` environment variable. + + ![Specify NF version in pre-run script](assets/sp-cloud-pre-run-options.gif) + ::: + +:::note +Pre-filled pipeline settings (such as compute environment, config profiles, and pipeline parameters) can be overridden during pipeline launch by workspace participants with the necessary [permissions](../../orgs-and-teams/roles). +::: + +After you have populated the appropriate fields, select **Add**. Your pipeline is now available for workspace participants to launch in the preconfigured compute environment. + + + + + + diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/all_runs_view.gif b/platform-enterprise/getting-started/quickstart-demo/assets/all_runs_view.gif new file mode 100644 index 000000000..5e31103db Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/all_runs_view.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/connect-to-studio.png b/platform-enterprise/getting-started/quickstart-demo/assets/connect-to-studio.png new file mode 100644 index 000000000..c273c7a4d Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/connect-to-studio.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/create-a-data-link.png b/platform-enterprise/getting-started/quickstart-demo/assets/create-a-data-link.png new file mode 100644 index 000000000..52656ac28 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/create-a-data-link.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/create-data-studio.gif b/platform-enterprise/getting-started/quickstart-demo/assets/create-data-studio.gif new file mode 100644 index 000000000..ae5bde638 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/create-data-studio.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/dashboard_view.gif b/platform-enterprise/getting-started/quickstart-demo/assets/dashboard_view.gif new file mode 100644 index 000000000..8b427d520 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/dashboard_view.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/data-explorer-add-bucket.gif b/platform-enterprise/getting-started/quickstart-demo/assets/data-explorer-add-bucket.gif new file mode 100644 index 000000000..f3c13420e Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/data-explorer-add-bucket.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/data-explorer-preview-files.gif b/platform-enterprise/getting-started/quickstart-demo/assets/data-explorer-preview-files.gif new file mode 100644 index 000000000..56c254ccd Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/data-explorer-preview-files.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/data-explorer-view-details.gif b/platform-enterprise/getting-started/quickstart-demo/assets/data-explorer-view-details.gif new file mode 100644 index 000000000..6e6594eed Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/data-explorer-view-details.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/general-run-details.gif b/platform-enterprise/getting-started/quickstart-demo/assets/general-run-details.gif new file mode 100644 index 000000000..6f1c2c97c Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/general-run-details.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/generate-access-token.gif b/platform-enterprise/getting-started/quickstart-demo/assets/generate-access-token.gif new file mode 100644 index 000000000..05855ec7e Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/generate-access-token.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/go-to-workspace.gif b/platform-enterprise/getting-started/quickstart-demo/assets/go-to-workspace.gif new file mode 100644 index 000000000..58ee3ce3d Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/go-to-workspace.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/landing_page.png b/platform-enterprise/getting-started/quickstart-demo/assets/landing_page.png new file mode 100644 index 000000000..97de920c3 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/landing_page.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/logo.svg b/platform-enterprise/getting-started/quickstart-demo/assets/logo.svg new file mode 100644 index 000000000..ac470e9eb --- /dev/null +++ b/platform-enterprise/getting-started/quickstart-demo/assets/logo.svg @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/mount-data-into-studio.gif b/platform-enterprise/getting-started/quickstart-demo/assets/mount-data-into-studio.gif new file mode 100644 index 000000000..1fb29b477 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/mount-data-into-studio.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/optimize-configuration.gif b/platform-enterprise/getting-started/quickstart-demo/assets/optimize-configuration.gif new file mode 100644 index 000000000..211c46d65 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/optimize-configuration.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/platform-cli.png b/platform-enterprise/getting-started/quickstart-demo/assets/platform-cli.png new file mode 100644 index 000000000..d93acb717 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/platform-cli.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/reports-preview.png b/platform-enterprise/getting-started/quickstart-demo/assets/reports-preview.png new file mode 100644 index 000000000..7cd1f069b Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/reports-preview.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/reports-tab.png b/platform-enterprise/getting-started/quickstart-demo/assets/reports-tab.png new file mode 100755 index 000000000..5d25097cc Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/reports-tab.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-rshiny-app-explore.gif b/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-rshiny-app-explore.gif new file mode 100644 index 000000000..4e2b0814a Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-rshiny-app-explore.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-rshiny-pca-plot.gif b/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-rshiny-pca-plot.gif new file mode 100644 index 000000000..f0934d557 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-rshiny-pca-plot.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-rshiny-volcano-plot.gif b/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-rshiny-volcano-plot.gif new file mode 100644 index 000000000..738db8d76 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-rshiny-volcano-plot.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-rstudio-view.gif b/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-rstudio-view.gif new file mode 100644 index 000000000..860f7785c Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-rstudio-view.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-run-rshiny-app.png b/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-run-rshiny-app.png new file mode 100644 index 000000000..a26c7f9f2 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-run-rshiny-app.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-studio-details.gif b/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-studio-details.gif new file mode 100644 index 000000000..1c94faf90 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/rnaseq-diffab-studio-details.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/send-studio-link.png b/platform-enterprise/getting-started/quickstart-demo/assets/send-studio-link.png new file mode 100644 index 000000000..476efb2f1 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/send-studio-link.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/seqera-biotech-stack.png b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-biotech-stack.png new file mode 100644 index 000000000..929b6c6bb Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-biotech-stack.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/seqera-containers-arch-settings.gif b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-containers-arch-settings.gif new file mode 100644 index 000000000..71f51b51d Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-containers-arch-settings.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/seqera-containers-build-details.gif b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-containers-build-details.gif new file mode 100644 index 000000000..45eedf09b Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-containers-build-details.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/seqera-containers-create.gif b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-containers-create.gif new file mode 100644 index 000000000..35ecb9779 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-containers-create.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/seqera-containers-singularity.gif b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-containers-singularity.gif new file mode 100644 index 000000000..c86a239ac Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-containers-singularity.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/seqera-one-platform.png b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-one-platform.png new file mode 100644 index 000000000..42ee49492 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-one-platform.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/seqera-pipelines-add-pipeline.gif b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-pipelines-add-pipeline.gif new file mode 100644 index 000000000..b369b3ffc Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-pipelines-add-pipeline.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/seqera-pipelines-launch-cli.png b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-pipelines-launch-cli.png new file mode 100644 index 000000000..9c5edfefe Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-pipelines-launch-cli.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/seqera-pipelines-overview.gif b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-pipelines-overview.gif new file mode 100644 index 000000000..78fb16462 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/seqera-pipelines-overview.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-add-a-dataset.gif b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-add-a-dataset.gif new file mode 100644 index 000000000..07ab48ba3 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-add-a-dataset.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-add-rnaseq.gif b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-add-rnaseq.gif new file mode 100644 index 000000000..7a22ac8bf Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-add-rnaseq.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-cached-processes.gif b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-cached-processes.gif new file mode 100644 index 000000000..ad5640ab0 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-cached-processes.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-data-explorer.gif b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-data-explorer.gif new file mode 100644 index 000000000..3f7b4d520 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-data-explorer.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-launch-form.gif b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-launch-form.gif new file mode 100644 index 000000000..2e64cba6e Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-launch-form.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-launch-parameters-input.gif b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-launch-parameters-input.gif new file mode 100644 index 000000000..789fcb371 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-launch-parameters-input.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-launch-parameters-outdir.gif b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-launch-parameters-outdir.gif new file mode 100644 index 000000000..26d08b1a0 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-launch-parameters-outdir.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-pipeline-params.gif b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-pipeline-params.gif new file mode 100644 index 000000000..c55c4d5a0 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-pipeline-params.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-pre-run-options.gif b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-pre-run-options.gif new file mode 100644 index 000000000..3492ce70e Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-pre-run-options.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-resume-a-run.gif b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-resume-a-run.gif new file mode 100644 index 000000000..57170cdbe Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-resume-a-run.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-run-info.gif b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-run-info.gif new file mode 100644 index 000000000..eeb578a08 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-run-info.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-signin.gif b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-signin.gif new file mode 100644 index 000000000..e8d780c94 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-signin.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-task-data-explorer.gif b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-task-data-explorer.gif new file mode 100644 index 000000000..fffa631d2 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-task-data-explorer.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-view-all-runs.gif b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-view-all-runs.gif new file mode 100644 index 000000000..688993821 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/sp-cloud-view-all-runs.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/start-studio.gif b/platform-enterprise/getting-started/quickstart-demo/assets/start-studio.gif new file mode 100644 index 000000000..6587f500f Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/start-studio.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/stop-a-studio.png b/platform-enterprise/getting-started/quickstart-demo/assets/stop-a-studio.png new file mode 100644 index 000000000..e42243ec5 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/stop-a-studio.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/studio-checkpoints.png b/platform-enterprise/getting-started/quickstart-demo/assets/studio-checkpoints.png new file mode 100644 index 000000000..41c04a56d Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/studio-checkpoints.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/studio-create-jupyter.gif b/platform-enterprise/getting-started/quickstart-demo/assets/studio-create-jupyter.gif new file mode 100644 index 000000000..0cacf30a5 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/studio-create-jupyter.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/studio-jupyter-notebook-example.png b/platform-enterprise/getting-started/quickstart-demo/assets/studio-jupyter-notebook-example.png new file mode 100644 index 000000000..495d06fb9 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/studio-jupyter-notebook-example.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/studios-overview.png b/platform-enterprise/getting-started/quickstart-demo/assets/studios-overview.png new file mode 100644 index 000000000..5e080bc0e Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/studios-overview.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/task-details.gif b/platform-enterprise/getting-started/quickstart-demo/assets/task-details.gif new file mode 100644 index 000000000..ea64b3c36 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/task-details.gif differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/trimmer-settings.png b/platform-enterprise/getting-started/quickstart-demo/assets/trimmer-settings.png new file mode 100644 index 000000000..fa64d84ef Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/trimmer-settings.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/assets/user-settings.png b/platform-enterprise/getting-started/quickstart-demo/assets/user-settings.png new file mode 100644 index 000000000..16bad3f36 Binary files /dev/null and b/platform-enterprise/getting-started/quickstart-demo/assets/user-settings.png differ diff --git a/platform-enterprise/getting-started/quickstart-demo/automation.md b/platform-enterprise/getting-started/quickstart-demo/automation.md new file mode 100644 index 000000000..2e894069d --- /dev/null +++ b/platform-enterprise/getting-started/quickstart-demo/automation.md @@ -0,0 +1,102 @@ +--- +title: "Automation" +description: "An introduction to automation with APIs and CLI tools in Seqera Platform" +date: "21 Jul 2024" +tags: [platform, automation, api, cli, seqerakit] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +Seqera Platform provides multiple methods of programmatic interaction to automate the execution of pipelines, chain pipelines together, and integrate Platform with third-party services. + +### Platform API + +The Seqera Platform public API is the lowest-level method of programmatic interaction. All operations available in the user interface can be achieved through the API. + +The API can be used to trigger the launch of pipelines based on a file event (such as the upload of a file to a bucket) or completion of a previous run. + +The API can be accessed from `https://api.cloud.seqera.io`. + +The full list of endpoints is available in Seqera's [OpenAPI schema](https://cloud.seqera.io/openapi/index.html). The API requires an authentication token to be specified in every API request. This can be created in your user menu under **Your tokens**. + +![Platform access token](./assets/generate-access-token.gif) + +The token is only displayed once. Store your token in a secure place. Use this token to authenticate requests to the API. + +
+ **Example pipeline launch API request** + ``` + curl -X POST "https://api.cloud.seqera.io/workflow/launch?workspaceId=38659136604200" \ + -H "Accept: application/json" \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -H "Accept-Version:1" \ + -d '{ + "launch": { + "computeEnvId": "hjE97A8TvD9PklUb0hwEJ", + "runName": "first-time-pipeline-api-byname", + "pipeline": "first-time-pipeline", + "workDir": "s3://nf-ireland", + "revision": "master" + } + }' + ``` + +
+ + +### Platform CLI + +For bioinformaticians and scientists more comfortable with the CLI, Platform uses a command line utility called `tw` to manage resources. + +The CLI provides an interface to launch pipelines, manage compute environments, retrieve run metadata, and monitor runs on Platform. It provides a Nextflow-like experience for bioinformaticians who prefer the CLI and allows you store Seqera resource configuration (pipelines, compute environments, etc.) as code. The CLI is built on top of the [Seqera Platform API](#platform-api) but is simpler to use. For example, you can refer to resources by name instead of their unique identifier. + +![Seqera Platform CLI](./assets/platform-cli.png) + +See [CLI](../../cli/overview) for installation and usage details. + +:::info +**Example pipeline launch CLI command** + +```bash +tw launch hello --workspace community/showcase +``` +::: + +### seqerakit + +`seqerakit` is a Python wrapper for the Platform CLI which can be leveraged to automate the creation of all Platform entities via a YAML format configuration file. It can be used to automate the creation of entities, from organizations and workspaces to pipelines and compute environments, and the execution of workflows with one YAML file. + +The key features are: + +- **Simple configuration**: All of the command-line options available in the Platform CLI can be defined in simple YAML format. +- **Infrastructure as Code**: Enable users to manage and provision their infrastructure specifications. +- **Automation**: End-to-end creation of entities within Platform, from adding an organization to launching pipeline(s) within that organization. + +See the [seqerakit GitHub repository](https://github.com/seqeralabs/seqera-kit/) for installation and usage details. + +
+ **Example pipeline launch seqerakit configuration and command** + + Create a YAML file called `hello.yaml`: + + ```yaml + launch: + - name: "hello-world" + url: "https://github.com/nextflow-io/hello" + workspace: "seqeralabs/showcase" + ``` + + Then run seqerakit: + + ```bash + $ seqerakit hello.yaml + ``` + +
+ +## Resources +Common use cases for the automation methods above include automatically executing a pipeline as data arrives from a sequencer, or integrating Platform into a broader user-facing application. For a step-by-step guide to set up these automation methods, see [Workflow automation for Nextflow pipelines](https://seqera.io/blog/workflow-automation/). + +For examples of how to use automation methods, see [Automating pipeline execution with Nextflow and Tower](https://seqera.io/blog/automating-workflows-with-nextflow-and-tower/). \ No newline at end of file diff --git a/platform-enterprise/getting-started/quickstart-demo/launch-pipelines.md b/platform-enterprise/getting-started/quickstart-demo/launch-pipelines.md new file mode 100644 index 000000000..4ffc3cdae --- /dev/null +++ b/platform-enterprise/getting-started/quickstart-demo/launch-pipelines.md @@ -0,0 +1,96 @@ +--- +title: "Launch pipelines" +description: "An introduction to launching nf-core/rnaseq in the community/showcase workspace" +date: "8 Jul 2024" +tags: [platform, launch, pipelines, launchpad, showcase tutorial] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +:::info +This tutorial provides an introduction to launching pipelines in Seqera Platform. + +**Prerequisites:** +1. [Set up an organization and workspace](../workspace-setup). +1. Create a workspace [compute environment](../../compute-envs/overview) for your cloud or HPC compute infrastructure. +1. [Add a pipeline](./add-pipelines) to your workspace. +1. [Add your pipeline input data](./add-data). +::: + +The Launchpad in every Platform workspace allows users to easily create and share Nextflow pipelines that can be executed on any supported infrastructure, including all public clouds and most HPC schedulers. A Launchpad pipeline consists of a pre-configured workflow Git repository, [compute environment](../../compute-envs/overview), and launch parameters. + +## Launch a pipeline + +:::note +This guide is based on version 3.15.1 of the [nf-core/rnaseq pipeline](https://github.com/nf-core/rnaseq). Launch form parameters and tools will differ for other pipelines. +::: + +Navigate to the Launchpad and select **Launch** next to your pipeline to open the launch form. + +The launch form consists of **General config**, **Run parameters**, and **Advanced options** sections to specify your run parameters before execution, and an execution summary. Use section headings or select the **Previous** and **Next** buttons at the bottom of the page to navigate between sections. + +
+ Nextflow parameter schema + + The launch form lets you configure the pipeline execution. The pipeline parameters in this form are rendered from a [pipeline schema](../../pipeline-schema/overview) file in the root of the pipeline Git repository. `nextflow_schema.json` is a simple JSON-based schema describing pipeline parameters for pipeline developers to easily adapt their in-house Nextflow pipelines to be executed in Platform. + + :::tip + See [Best Practices for Deploying Pipelines with the Seqera Platform](https://seqera.io/blog/best-practices-for-deploying-pipelines-with-seqera-platform/) to learn how to build the parameter schema for any Nextflow pipeline automatically with tooling maintained by the nf-core community. + ::: + +
+ +### General config + +![General config tab](../_images/launch-form-2.gif) + +- **Pipeline to launch**: The pipeline Git repository name or URL. For saved pipelines, this is prefilled and cannot be edited. +- **Revision number**: A valid repository commit ID, tag, or branch name. For saved pipelines, this is prefilled and cannot be edited. +- (*Optional*) **Config profiles**: One or more [configuration profile](https://www.nextflow.io/docs/latest/config.html#config-profiles) names to use for the execution. +- **Workflow run name**: An identifier for the run, pre-filled with a random name. This can be customized. +- (*Optional*) **Labels**: Assign new or existing [labels](../../labels/overview) to the run. +- **Compute environment**: Select an existing workspace [compute environment](../../compute-envs/overview). +- **Work directory**: The (cloud or local) file storage path where pipeline scratch data is stored. Platform will create a scratch sub-folder if only a cloud bucket location is specified. + :::note + The credentials associated with the compute environment must have access to the work directory. + ::: + +### Run parameters + +![Run parameters](../_images/launch-form-3.gif) + +There are three ways to enter **Run parameters** prior to launch: + +- The **Input form view** displays form fields to enter text or select attributes from lists, and browse input and output locations with [Data Explorer](../../data/data-explorer). +- The **Config view** displays raw configuration text that you can edit directly. Select JSON or YAML format from the **View as** list. +- **Upload params file** allows you to upload a JSON or YAML file with run parameters. + +Specify your pipeline input and output and modify other pipeline parameters as needed: + +#### input + +Use **Browse** to select your pipeline input data: + +- In the **Data Explorer** tab, select the existing cloud bucket that contains your samplesheet, browse or search for the samplesheet file, and select the chain icon to copy the file path before closing the data selection window and pasting the file path in the input field. +- In the **Datasets** tab, search for and select your existing dataset. + +#### outdir + +Use the `outdir` parameter to specify where the pipeline outputs are published. `outdir` must be unique for each pipeline run. Otherwise, your results will be overwritten. + +**Browse** and copy cloud storage directory paths using Data Explorer, or enter a path manually. + +#### Pipeline-specific parameters + +Modify other parameters to customize the pipeline execution through the parameters form. For example, in nf-core/rnaseq, change the `trimmer` under **Read trimming options** to `fastp` instead of `trimgalore`. + +![Read trimming options](./assets/trimmer-settings.png) + +### Advanced settings + +- Use [resource labels](../../resource-labels/overview) to tag the computing resources created during the workflow execution. While resource labels for the run are inherited from the compute environment and pipeline, workspace admins can override them from the launch form. Applied resource label names must be unique. +- [Pipeline secrets](../../secrets/overview) store keys and tokens used by workflow tasks to interact with external systems. Enter the names of any stored user or workspace secrets required for the workflow execution. +- See [Advanced options](../../launch/advanced) for more details. + +After you have filled the necessary launch details, select **Launch**. The **Runs** tab shows your new run in a **submitted** status at the top of the list. Select the run name to navigate to the [**View Workflow Run**](../../monitoring/overview) page and view the configuration, parameters, status of individual tasks, and run report. \ No newline at end of file diff --git a/platform-enterprise/getting-started/quickstart-demo/logo.svg b/platform-enterprise/getting-started/quickstart-demo/logo.svg new file mode 100644 index 000000000..ac470e9eb --- /dev/null +++ b/platform-enterprise/getting-started/quickstart-demo/logo.svg @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/platform-enterprise/getting-started/quickstart-demo/monitor-runs.md b/platform-enterprise/getting-started/quickstart-demo/monitor-runs.md new file mode 100644 index 000000000..0808a6078 --- /dev/null +++ b/platform-enterprise/getting-started/quickstart-demo/monitor-runs.md @@ -0,0 +1,43 @@ +--- +title: "Monitor runs" +description: "An introduction to monitoring runs in Seqera Platform" +date: "8 Jul 2024" +tags: [platform, monitoring] +--- + +There are several ways to monitor pipeline runs in Seqera Platform: + +### Workspace view + +Access a full history of all runs in a given workspace via the **Runs** tab. + +![View runs](assets/sp-cloud-view-all-runs.gif) + +### All runs view + +Access the **All runs** page from the user menu. This page provides a comprehensive overview of the runs across the entire Platform instance. The default view includes all organizations and workspaces accessible to the user. However, you can select visible workspaces from the dropdown next to **View**, and filter for a particular set of runs using any of the following fields: + +- `status` +- `label` +- `workflowId` +- `runName` +- `username` +- `projectName` +- `after: YYYY-MM-DD` +- `before: YYYY-MM-DD` +- `sessionId` +- `is:starred` + +For example: + +``` +rnaseq username:johndoe status:succeeded after:2024-01-01 +``` + +![All Runs view](assets/all_runs_view.gif) + +### Dashboard view + +Access the **Dashboard** from the user menu. This page provides an overview of the total runs across the Platform instance, grouped by run status. The default view includes all organizations and workspaces accessible to the user. Select visible workspaces from the dropdown next to **View** and filter by time, including a custom date range up to 12 months. Select **Export data** to download a CSV file with the available export data. + +![Dashboard view](./assets/dashboard_view.gif) \ No newline at end of file diff --git a/platform-enterprise/getting-started/quickstart-demo/pipeline-optimization.md b/platform-enterprise/getting-started/quickstart-demo/pipeline-optimization.md new file mode 100644 index 000000000..dff1c7cb7 --- /dev/null +++ b/platform-enterprise/getting-started/quickstart-demo/pipeline-optimization.md @@ -0,0 +1,38 @@ +--- +title: "Pipeline optimization" +description: "An introduction to pipeline optimization in Seqera Platform" +date: "8 Jul 2024" +tags: [platform, runs, pipeline optimization] +--- + +Seqera Platform's task-level resource usage metrics allow you to determine the resources requested for a task and what was actually used. This information helps you fine-tune your configuration more accurately. + +However, manually adjusting resources for every task in your pipeline is impractical. Instead, you can leverage the pipeline optimization feature available on the Launchpad. + +Pipeline optimization analyzes resource usage data from previous runs to optimize the resource allocation for future runs. After a successful run, optimization becomes available, indicated by the lightbulb icon next to the pipeline turning black. + +### Optimize nf-core/rnaseq + +Navigate back to the Launchpad and select the lightbulb icon next to the *nf-core/rnaseq* pipeline to view the optimized profile. You have the flexibility to tailor the optimization's target settings and incorporate a retry strategy as needed. + +### View optimized configuration + +When you select the lightbulb, you can access an optimized configuration profile in the second tab of the **Customize optimization profile** window. + +This profile consists of Nextflow configuration settings for each process and each resource directive (where applicable): **cpus**, **memory**, and **time**. The optimized setting for a given process and resource directive is based on the maximum use of that resource across all tasks in that process. + +Once optimization is selected, subsequent runs of that pipeline will inherit the optimized configuration profile, indicated by the black lightbulb icon with a checkmark. + +:::note +Optimization profiles are generated from one run at a time, defaulting to the most recent run, and _not_ an aggregation of previous runs. +::: + +![Optimized configuration](assets/optimize-configuration.gif) + +Verify the optimized configuration of a given run by inspecting the resource usage plots for that run and these fields in the run's task table: + +| Description | Key | +| ------------ | ---------------------- | +| CPU usage | `pcpu` | +| Memory usage | `peakRss` | +| Runtime | `start` and `complete` | diff --git a/platform-enterprise/getting-started/quickstart-demo/requirements.txt b/platform-enterprise/getting-started/quickstart-demo/requirements.txt new file mode 100644 index 000000000..0f35d1166 --- /dev/null +++ b/platform-enterprise/getting-started/quickstart-demo/requirements.txt @@ -0,0 +1,9 @@ +markdown~=3.5 +mkdocs~=1.5.3 +mkdocs-material~=9.5.6 +mkdocs-material-extensions>=1.0 +pygments~=2.16 +pymdown-extensions~=10.2 +jinja2>=2.11.1 +pillow +cairosvg \ No newline at end of file diff --git a/platform-enterprise/getting-started/quickstart-demo/samplesheet_test.csv b/platform-enterprise/getting-started/quickstart-demo/samplesheet_test.csv new file mode 100644 index 000000000..b183db0a4 --- /dev/null +++ b/platform-enterprise/getting-started/quickstart-demo/samplesheet_test.csv @@ -0,0 +1,8 @@ +sample,fastq_1,fastq_2,strandedness +WT_REP1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_2.fastq.gz,auto +WT_REP1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz,auto +WT_REP2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz,reverse +RAP1_UNINDUCED_REP1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz,,reverse +RAP1_UNINDUCED_REP2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz,,reverse +RAP1_UNINDUCED_REP2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz,,reverse +RAP1_IAA_30M_REP1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz,reverse diff --git a/platform-enterprise/getting-started/quickstart-demo/studios.md b/platform-enterprise/getting-started/quickstart-demo/studios.md new file mode 100644 index 000000000..13ac04bda --- /dev/null +++ b/platform-enterprise/getting-started/quickstart-demo/studios.md @@ -0,0 +1,87 @@ +--- +title: "Studios" +description: "An introduction to Studios in Seqera Platform" +date: "8 Jul 2024" +tags: [platform, studios] +--- + +:::info +This guide provides an introduction to Studios using a demo Studio in the Community Showcase workspace. See [Studios](../../studios/overview) to learn how to create Studios in your own workspace. +::: + +Interactive analysis of pipeline results is often performed in platforms like Jupyter Notebook or an R-IDE. Setting up the infrastructure for these platforms, including accessing pipeline data and the necessary bioinformatics packages, can be complex and time-consuming. + +Studios streamlines the process of creating interactive analysis environments for Platform users. With built-in templates, creating a Studio is as simple as adding and sharing pipelines or datasets. Platform manages all the details, enabling you to easily select your preferred interactive tool and analyze your data. + +In the **Studios** tab, you can monitor and see the details of the Studios in the Community Showcase workspace. + +Select the options menu next to a Studio to: +- See Studio details +- Start or stop the Studio, and connect to a running Studio session +- Copy the Studio URL to share it with collaborators + +### Analyze RNAseq data in Studios + +Studios is used to perform bespoke analysis on the results of upstream workflows. For example, in the Community Showcase workspace we have run the *nf-core/rnaseq* workflow to quantify gene expression, followed by *nf-core/differentialabundance* to derive differential expression statistics. The workspace contains a Studio with these results from cloud storage mounted into the Studio to perform further analysis. One of these outputs is a web app, which can be deployed for interactive analysis. + +### Open the RNAseq analysis Studio + +Select the *rnaseq_to_differentialabundance* Studio. This Studio consists of an R-IDE that uses an existing compute environment available in the showcase workspace. The Studio also contains mounted data generated from the *nf-core/rnaseq* and subsequent *nf-core/differentialabundance* pipeline runs, directly from AWS S3. + +![RNAseq Studio details](assets/rnaseq-diffab-studio-details.gif) + +:::info +Studios allows you to specify the resources each Studio will use. When [creating your own Studios](../../studios/overview) with shared compute environment resources, you must allocate sufficient resources to the compute environment to prevent Studio or pipeline run interruptions. +::: + +### Connect to the Studio + +This Studio will start an R-IDE which already contains the necessary R packages for deploying a web app to interact with various visualizations of the RNAseq data. The Studio also contains an R Markdown document with the commands in place to generate the application. + +Deploy the web app in the Studio by selecting the play button on the last chunk of the R script: + +![Run RShiny app](./assets/rnaseq-diffab-run-rshiny-app.png) + +### Explore results in the web app + +The web app will deploy in a separate browser window, providing a data interface. Here you can view information about your sample data, perform QC or exploratory analysis, and view the differential expression analyses. + +#### Sample clustering with PCA plots + +In the **QC/Exploratory** tab, select the PCA (Principal Component Analysis) plot to visualize how the samples group together based on their gene expression profiles. + +In this example, we used RNA sequencing data from the publicly-available ENCODE project, which includes samples from four different cell lines: + +- **GM12878**: a lymphoblastoid cell line +- **K562**: a chronic myelogenous leukemia cell line +- **MCF-7**: a breast cancer cell line +- **H1-hESC**: a human embryonic stem cell line + +What to look for in the PCA plot: + +- **Replicate clustering**: Ideally, replicates of the same cell type should cluster closely together. For example, replicates of the MCF-7 cells group together. This indicates consistent gene expression profiles among replicates. +- **Cell type separation**: Different cell types should form distinct clusters. For instance, GM12878, K562, MCF-7, and H1-hESC cells should each form their own separate clusters, reflecting their unique gene expression patterns. + +From this PCA plot, you can gain insights into the consistency and quality of your sequencing data, identify any potential issues, and understand the major sources of variation among your samples - all directly in Platform. + +#### Gene expression changes with Volcano plots + +In the **Differential** tab, select **Volcano plots** to compare genes with significant changes in expression between two samples. For example, filter for `Type: H1 vs MCF-7` to view the differences in expression between these two cell lines. + +1. **Identify upregulated and downregulated genes**: The x-axis of the volcano plot represents the log2 fold change in gene expression between the H1 and MCF-7 cell lines, while the y-axis represents the statistical significance of the changes. + + - **Upregulated genes in MCF-7**: Genes on the left side of the plot (negative fold change) are upregulated in the MCF-7 samples compared to H1. For example, the _SHH_ gene, which is known to be upregulated in cancer cell lines, prominently appears here. + +2. **Filtering for specific genes**: If you are interested in specific genes, use the filter function. For example, filter for the _SHH_ gene in the table below the plot. This allows you to quickly locate and examine this gene in more detail. + +3. **Gene expression bar plot**: After filtering for the _SHH_ gene, select it to navigate to a gene expression bar plot. This plot will show you the expression levels of _SHH_ across all samples, allowing you to see in which samples it is most highly expressed. + + - Here, _SHH_ is most highly expressed in MCF-7, which aligns with its known role in cancer cell proliferation. + +Using the volcano plot, you can effectively identify and explore the genes with the most significant changes in expression between your samples, providing a deeper understanding of the molecular differences. + +![RShiny volcano plot](assets/rnaseq-diffab-rshiny-volcano-plot.gif) + +### Collaborate in the Studio + +To share the results of your RNAseq analysis or allow colleagues to perform exploratory analysis, share a link to the Studio by selecting the options menu for the Studio you want to share, then select **Copy Studio URL**. With this link, other authenticated users with the **Connect** [role](../../orgs-and-teams/roles) (or greater) can access the session directly. diff --git a/platform-enterprise/getting-started/quickstart-demo/view-run-information.md b/platform-enterprise/getting-started/quickstart-demo/view-run-information.md new file mode 100644 index 000000000..0cb77d7e9 --- /dev/null +++ b/platform-enterprise/getting-started/quickstart-demo/view-run-information.md @@ -0,0 +1,138 @@ +--- +title: "View run information" +description: "View pipeline run details in Seqera Platform" +date: "8 Jul 2024" +tags: [platform, runs, pipelines, monitoring, showcase tutorial] +--- + +When you launch a pipeline, you are directed to the **Runs** tab which contains all executed workflows, with your submitted run at the top of the list. + +Each new or resumed run is given a random name, which can be customized prior to launch. Each row corresponds to a specific run. As a job executes, it can transition through the following states: + +- **submitted**: Pending execution +- **running**: Running +- **succeeded**: Completed successfully +- **failed**: Successfully executed, where at least one task failed with a terminate error strategy +- **cancelled**: Stopped forceably during execution +- **unknown**: Indeterminate status + +![View runs](assets/sp-cloud-view-all-runs.gif) + +### View run details for *nf-core/rnaseq* + +The pipeline launched [previously](./launch-pipelines) is listed on the **Runs** tab. Select it from the list to view the run details. + +#### Run details page + +As the pipeline runs, run details will populate with the following tabs: + +- **Command-line**: The Nextflow command invocation used to run the pipeline. This contains details about the pipeline version (`-r 3.14.0` flag) and profile, if specified (`-profile test` flag). +- **Parameters**: The exact set of parameters used in the execution. This is helpful for reproducing the results of a previous run. +- **Configuration**: The full Nextflow configuration settings used for the run. This includes parameters, but also settings specific to task execution (such as memory, CPUs, and output directory). +- **Datasets**: Link to datasets, if any were used in the run. +- **Execution Log**: A summarized Nextflow log providing information about the pipeline and the status of the run. +- **Reports**: View pipeline outputs directly in the Platform. + +![View the nf-core/rnaseq run](assets/sp-cloud-run-info.gif) + +### View reports + +Most Nextflow pipelines generate reports or output files which are useful to inspect at the end of the pipeline execution. Reports can contain quality control (QC) metrics that are important to assess the integrity of the results. + +![Reports tab](assets/reports-tab.png) + +For example, for the *nf-core/rnaseq* pipeline, view the [MultiQC](https://docs.seqera.io/multiqc) report generated. MultiQC is a helpful reporting tool to generate aggregate statistics and summaries from bioinformatics tools. + +![Reports MultiQC preview](assets/reports-preview.png) + +The paths to report files point to a location in cloud storage (in the `outdir` directory specified during launch), but you can view the contents directly and download each file without navigating to the cloud or a remote filesystem. + +#### Specify outputs in reports + +To customize and instruct Platform where to find reports generated by the pipeline, a [tower.yml](https://github.com/nf-core/rnaseq/blob/master/tower.yml) file that contains the locations of the generated reports must be included in the pipeline repository. + +In the *nf-core/rnaseq* pipeline, the `MULTIQC` process step generates a MultiQC report file in HTML format: + +```yaml +reports: + multiqc_report.html: + display: "MultiQC HTML report" +``` + +:::info +See [Reports](../../reports/overview) to configure reports for pipeline runs in your own workspace. +::: + +### View general information + +The run details page includes general information about who executed the run and when, the Git hash and tag used, and additional details about the compute environment and Nextflow version used. + +![General run information](assets/general-run-details.gif) + +The **General** panel displays top-level information about a pipeline run: + +- Unique workflow run ID +- Workflow run name +- Timestamp of pipeline start (the time displayed is based on your local timezone defined in your device's system settings) +- Pipeline version and Git commit ID +- Nextflow session ID +- Username of the launcher +- Work directory path + +### View details for a task + +Scroll down the page to view: + +- The progress of individual pipeline **Processes** +- **Aggregated stats** for the run (total walltime, CPU hours) +- A **Task details** table for every task in the workflow +- **Workflow metrics** (CPU efficiency, memory efficiency) + +The task details table provides further information on every step in the pipeline, including task statuses and metrics. + +### Task details + +Select a task in the task table to open the **Task details** dialog. The dialog has three tabs: **About**, **Execution log**, and **Data Explorer**. + +#### About + +The **About** tab includes: + +1. **Name**: Process name and tag +2. **Command**: Task script, defined in the pipeline process +3. **Status**: Exit code, task status, and number of attempts +4. **Work directory**: Directory where the task was executed +5. **Environment**: Environment variables that were supplied to the task +6. **Execution time**: Metrics for task submission, start, and completion time (the time displayed is based on your local timezone defined in your device's system settings) +7. **Resources requested**: Metrics for the resources requested by the task +8. **Resources used**: Metrics for the resources used by the task + +![Task details window](assets/task-details.gif) + +#### Execution log + +The **Execution log** tab provides a real-time log of the selected task's execution. Task execution and other logs (such as stdout and stderr) are available for download from here, if still available in your compute environment. + +### Task work directory in Data Explorer + +If a task fails, a good place to begin troubleshooting is the task's work directory. + +Nextflow hash-addresses each task of the pipeline and creates unique directories based on these hashes. Instead of navigating through a bucket on the cloud console or filesystem to find the contents of this directory, use the **Data Explorer** tab in the Task window to view the work directory. + +Data Explorer allows you to view the log files and output files generated for each task in its working directory, directly within Platform. You can view, download, and retrieve the link for these intermediate files in cloud storage from the **Data Explorer** tab to simplify troubleshooting. + +![Task data explorer](assets/sp-cloud-task-data-explorer.gif) + +### Resume a pipeline + +Platform uses [Nextflow resume](../../launch/cache-resume) to resume a failed or cancelled workflow run with the same parameters, using the cached results of previously completed tasks and only executing failed and pending tasks. + +![Resume a run](assets/sp-cloud-resume-a-run.gif) + +:::info +To resume a run in your own workspace: + +- Select **Resume** from the options menu next to the run. +- Edit the parameters before launch, if needed. +- If you have the appropriate [permissions](../../orgs-and-teams/roles), you may edit the compute environment if needed. +::: diff --git a/platform-enterprise/getting-started/rnaseq.md b/platform-enterprise/getting-started/rnaseq.md new file mode 100644 index 000000000..8684d96c2 --- /dev/null +++ b/platform-enterprise/getting-started/rnaseq.md @@ -0,0 +1,752 @@ +--- +title: "RNA-Seq" +description: "An introduction to running nf-core/rnaseq in Seqera Platform" +date: "21 Jul 2024" +tags: [platform, seqera pipelines, studios, rnaseq, compute environment, aws] +toc_max_heading_level: 2 +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +This guide details how to run bulk RNA sequencing (RNA-Seq) data analysis, from quality control to differential expression analysis, on an AWS Batch compute environment in Platform. It includes: + +- Creating an AWS Batch compute environment to run your pipeline and analysis environment +- Adding pipelines to your workspace +- Importing your pipeline input data +- Launching the pipeline and monitoring execution from your workspace +- Setting up a custom analysis environment with Studios +- Resource allocation guidance for RNA-Seq data + +:::info[**Prerequisites**] +You will need the following to get started: + +- [Admin](../orgs-and-teams/roles) permissions in an existing organization workspace. See [Set up your workspace](./workspace-setup) to create an organization and workspace from scratch. +- An existing AWS cloud account with access to the AWS Batch service. +- Existing access credentials with permissions to create and manage resources in your AWS account. See [IAM](../compute-envs/aws-batch#iam) for guidance to set up IAM permissions for Platform. +::: + +## Compute environment + +Compute and storage requirements for RNA-Seq analysis are dependent on the number of samples and the sequencing depth of your input data. See [RNA-Seq data and requirements](#rna-seq-data-and-requirements) for details on RNA-Seq datasets and the CPU and memory requirements for important steps of RNA-Seq pipelines. + +In this guide, you will create an AWS Batch compute environment with sufficient resources allocated to run the [nf-core/rnaseq](https://github.com/nf-core/rnaseq) pipeline with a large dataset. This compute environment will also be used to run a Studios R-IDE session for interactive analysis of the resulting pipeline data. + +:::note +The compute recommendations below are based on internal benchmarking performed by Seqera. See [RNA-Seq data and requirements](#rna-seq-data-and-requirements) for more information. +::: + +### Recommended compute environment resources + +The following compute resources are recommended for production RNA-Seq pipelines, depending on the size of your input dataset: + +| **Setting** | **Value** | +|--------------------------------|---------------------------------------| +| **Instance Types** | `m5,r5` | +| **vCPUs** | 2 - 8 | +| **Memory (GiB)** | 8 - 32 | +| **Max CPUs** | >500 | +| **Min CPUs** | 0 | + +#### Fusion file system + +The [Fusion](../supported_software/fusion/overview) file system enables seamless read and write operations to cloud object stores, leading to +simpler pipeline logic and faster, more efficient execution. While Fusion is not required to run *nf-core/rnaseq*, it is recommended for optimal performance. See [nf-core/rnaseq performance in Platform](#nf-corernaseq-performance-in-platform) at the end of this guide. + +Fusion works best with AWS NVMe instances (fast instance storage) as this delivers the fastest performance when compared to environments using only AWS EBS (Elastic Block Store). Batch Forge selects instances automatically based on your compute environment configuration, but you can optionally specify instance types. To enable fast instance storage (see Create compute environment below), you must select EC2 instances with NVMe SSD storage (`m5d` or `r5d` families). + +:::note +Fusion requires a license for use in Seqera Platform compute environments or directly in Nextflow. See [Fusion licensing](https://docs.seqera.io/fusion/licensing) for more information. +::: + +### Create compute environment + +![Add Platform compute environment](./_images/create-ce.gif) + +From the **Compute Environments** tab in your organization workspace, select **Add compute environment** and complete the following fields: + +| **Field** | **Description** | +|---------------------------------------|------------------------------------------------------------| +| **Name** | A unique name for the compute environment. | +| **Platform** | AWS Batch | +| **Credentials** | Select existing credentials, or **+** to create new credentials:| +| **Access Key** | AWS access key ID. | +| **Secret Key** | AWS secret access key. | +| **Region** | The target execution region. | +| **Pipeline work directory** | An S3 bucket path in the same execution region. | +| **Enable Wave Containers** | Use the Wave containers service to provision containers. | +| **Enable Fusion v2** | Access your S3-hosted data via the Fusion v2 file system. | +| **Enable fast instance storage** | Use NVMe instance storage to speed up I/O and disk access. Requires Fusion v2.| +| **Config Mode** | Batch Forge | +| **Provisioning Model** | Choose between Spot and On-demand instances. | +| **Max CPUs** | Sensible values for production use range between 2000 and 5000.| +| **Enable Fargate for head job** | Run the Nextflow head job using the Fargate container service to speed up pipeline launch. Requires Fusion v2.| +| **Allowed S3 buckets** | Additional S3 buckets or paths to be granted read-write permission for this compute environment. Add data paths to be mounted in your data studio here, if different from your pipeline work directory.| +| **Resource labels** | `name=value` pairs to tag the AWS resources created by this compute environment.| + + +## Add pipeline to Platform + +:::info +The [nf-core/rnaseq](https://github.com/nf-core/rnaseq) pipeline is a highly configurable and robust workflow designed to analyze RNA-Seq data. It performs quality control, alignment and quantification. + +![nf-core/rnaseq subway map](./_images/nf-core-rnaseq_metro_map_grey_static.svg) +::: + +[Seqera Pipelines](https://seqera.io/pipelines) is a curated collection of quality open-source pipelines that can be imported directly to your workspace Launchpad in Platform. Each pipeline includes a dataset to use in a test run to confirm compute environment compatibility in just a few steps. + +To use Seqera Pipelines to import the *nf-core/rnaseq* pipeline to your workspace: + +![Seqera Pipelines add to Launchpad](./_images/pipelines-add.gif) + +1. Search for *nf-core/rnaseq* and select **Launch** next to the pipeline name in the list. In the **Add pipeline** tab, select **Cloud** or **Enterprise** depending on your Platform account type, then provide the information needed for Seqera Pipelines to access your Platform instance: + - **Seqera Cloud**: Paste your Platform **Access token** and select **Next**. + - **Seqera Enterprise**: Specify the **Seqera Platform URL** (hostname) and **Base API URL** for your Enterprise instance, then paste your Platform **Access token** and select **Next**. + :::tip + If you do not have a Platform access token, select **Get your access token from Seqera Platform** to open the Access tokens page in a new browser tab. + ::: +1. Select your Platform **Organization**, **Workspace**, and **Compute environment** for the imported pipeline. +1. (Optional) Customize the **Pipeline Name** and **Pipeline Description**. +1. Select **Add Pipeline**. + +:::info +To add a custom pipeline not listed in Seqera Pipelines to your Platform workspace, see [Add pipelines](./quickstart-demo/add-pipelines#) for manual Launchpad instructions. +::: + +## Pipeline input data + +The [nf-core/rnaseq](https://github.com/nf-core/rnaseq) pipeline works with input datasets (samplesheets) containing sample names, FASTQ file locations (paths to FASTQ files in cloud or local storage), and strandedness. For example, the dataset used in the `test_full` profile is derived from the publicly available iGenomes collection of datasets, commonly used in bioinformatics analyses. + +This dataset represents RNA-Seq samples from various human cell lines (GM12878, K562, MCF7, and H1) with biological replicates, stored in an AWS S3 bucket (`s3://ngi-igenomes`) as part of the iGenomes resource. These RNA-Seq datasets consist of paired-end sequencing reads, which can be used to study gene expression patterns in different cell types. + +
+ **nf-core/rnaseq test_full profile dataset** + + | sample | fastq_1 | fastq_2 | strandedness | + |--------|---------|---------|--------------| + | GM12878_REP1 | s3://ngi-igenomes/test-data/rnaseq/SRX1603629_T1_1.fastq.gz | s3://ngi-igenomes/test-data/rnaseq/SRX1603629_T1_2.fastq.gz | reverse | + | GM12878_REP2 | s3://ngi-igenomes/test-data/rnaseq/SRX1603630_T1_1.fastq.gz | s3://ngi-igenomes/test-data/rnaseq/SRX1603630_T1_2.fastq.gz | reverse | + | K562_REP1 | s3://ngi-igenomes/test-data/rnaseq/SRX1603392_T1_1.fastq.gz | s3://ngi-igenomes/test-data/rnaseq/SRX1603392_T1_2.fastq.gz | reverse | + | K562_REP2 | s3://ngi-igenomes/test-data/rnaseq/SRX1603393_T1_1.fastq.gz | s3://ngi-igenomes/test-data/rnaseq/SRX1603393_T1_2.fastq.gz | reverse | + | MCF7_REP1 | s3://ngi-igenomes/test-data/rnaseq/SRX2370490_T1_1.fastq.gz | s3://ngi-igenomes/test-data/rnaseq/SRX2370490_T1_2.fastq.gz | reverse | + | MCF7_REP2 | s3://ngi-igenomes/test-data/rnaseq/SRX2370491_T1_1.fastq.gz | s3://ngi-igenomes/test-data/rnaseq/SRX2370491_T1_2.fastq.gz | reverse | + | H1_REP1 | s3://ngi-igenomes/test-data/rnaseq/SRX2370468_T1_1.fastq.gz | s3://ngi-igenomes/test-data/rnaseq/SRX2370468_T1_2.fastq.gz | reverse | + | H1_REP2 | s3://ngi-igenomes/test-data/rnaseq/SRX2370469_T1_1.fastq.gz | s3://ngi-igenomes/test-data/rnaseq/SRX2370469_T1_2.fastq.gz | reverse | + +
+ +In Platform, samplesheets and other data can be made easily accessible in one of two ways: +- Use **Data Explorer** to browse and interact with remote data from AWS S3, Azure Blob Storage, and Google Cloud Storage repositories, directly in your organization workspace. +- Use **Datasets** to upload structured data to your workspace in CSV (Comma-Separated Values) or TSV (Tab-Separated Values) format. + +
+ **Add a cloud bucket via Data Explorer** + + Private cloud storage buckets accessible with the credentials in your workspace are added to Data Explorer automatically by default. However, you can also add custom directory paths within buckets to your workspace to simplify direct access. + + To add individual buckets (or directory paths within buckets): + + ![Add public bucket](./quickstart-demo/assets/data-explorer-add-bucket.gif) + + 1. From the **Data Explorer** tab, select **Add cloud bucket**. + 1. Specify the bucket details: + - The cloud **Provider**. + - An existing cloud **Bucket path**. + - A unique **Name** for the bucket. + - The **Credentials** used to access the bucket. For public cloud buckets, select **Public**. + - An optional bucket **Description**. + 1. Select **Add**. + + You can now select data directly from this bucket as input when launching your pipeline, without the need to interact with cloud consoles or CLI tools. + +
+ +
+ **Add a dataset** + + From the **Datasets** tab, select **Add Dataset**. + + ![Add a dataset](./quickstart-demo/assets/sp-cloud-add-a-dataset.gif) + + Specify the following dataset details: + + - A **Name** for the dataset, such as `nf-core-rnaseq-dataset`. + - A **Description** for the dataset. + - Select the **First row as header** option to prevent Platform from parsing the header row of the samplesheet as sample data. + - Select **Upload file** and browse to your CSV or TSV samplesheet file in local storage, or simply drag and drop it into the box. + + The dataset is now listed in your organization workspace datasets and can be selected as input when launching your pipeline. + + :::info + Platform does not store the data used for analysis in pipelines. The dataset must specify the locations of data stored on your own infrastructure. + ::: + +
+ +## Launch pipeline + +:::note +This guide is based on version 3.15.1 of the *nf-core/rnaseq* pipeline. Launch form parameters and tools may differ in other versions. +::: + +With your compute environment created, *nf-core/rnaseq* added to your workspace Launchpad, and your samplesheet accessible in Platform, you are ready to launch your pipeline. Navigate to the Launchpad and select **Launch** next to **nf-core-rnaseq** to open the launch form. + +The launch form consists of **General config**, **Run parameters**, and **Advanced options** sections to specify your run parameters before execution, and an execution summary. Use section headings or select the **Previous** and **Next** buttons at the bottom of the page to navigate between sections. + +### General config + +![General config tab](./_images/launch-form-2.gif) + +- **Pipeline to launch**: The pipeline Git repository name or URL. For saved pipelines, this is prefilled and cannot be edited. +- **Revision number**: A valid repository commit ID, tag, or branch name. For saved pipelines, this is prefilled and cannot be edited. +- **Config profiles**: One or more [configuration profile](https://www.nextflow.io/docs/latest/config.html#config-profiles) names to use for the execution. Config profiles must be defined in the `nextflow.config` file in the pipeline repository. +- **Workflow run name**: An identifier for the run, pre-filled with a random name. This can be customized. +- **Labels**: Assign new or existing [labels](../labels/overview) to the run. +- **Compute environment**: Your AWS Batch compute environment. +- **Work directory**: The cloud storage path where pipeline scratch data is stored. Platform will create a scratch sub-folder if only a cloud bucket location is specified. + :::note + The credentials associated with the compute environment must have access to the work directory. + ::: + +### Run parameters + +![Run parameters](./_images/launch-form-3.gif) + +There are three ways to enter **Run parameters** prior to launch: + +- The **Input form view** displays form fields to enter text or select attributes from lists, and browse input and output locations with [Data Explorer](../data/data-explorer). +- The **Config view** displays raw configuration text that you can edit directly. Select JSON or YAML format from the **View as** list. +- **Upload params file** allows you to upload a JSON or YAML file with run parameters. + +Platform uses the `nextflow_schema.json` file in the root of the pipeline repository to dynamically create a form with the necessary pipeline parameters. + +Specify your pipeline input and output and modify other pipeline parameters as needed. + +
+ **input** + + Use **Browse** to select your pipeline input data: + + - In the **Data Explorer** tab, select the existing cloud bucket that contains your samplesheet, browse or search for the samplesheet file, and select the chain icon to copy the file path before closing the data selection window and pasting the file path in the input field. + - In the **Datasets** tab, search for and select your existing dataset. + +
+
+ **outdir** + + Use the `outdir` parameter to specify where the pipeline outputs are published. `outdir` must be unique for each pipeline run. Otherwise, your results will be overwritten. + + **Browse** and copy cloud storage directory paths using Data Explorer, or enter a path manually. + +
+ +Modify other parameters to customize the pipeline execution through the parameters form. For example, under **Read trimming options**, change the `trimmer` and select `fastp` instead of `trimgalore`. + +![Read trimming options](./quickstart-demo/assets/trimmer-settings.png) + +### Advanced settings + +- Use [resource labels](../resource-labels/overview) to tag the computing resources created during the workflow execution. While resource labels for the run are inherited from the compute environment and pipeline, workspace admins can override them from the launch form. Applied resource label names must be unique. +- [Pipeline secrets](../secrets/overview) store keys and tokens used by workflow tasks to interact with external systems. Enter the names of any stored user or workspace secrets required for the workflow execution. +- See [Advanced options](../launch/advanced) for more details. + +After you have filled the necessary launch details, select **Launch**. The **Runs** tab shows your new run in a **submitted** status at the top of the list. Select the run name to navigate to the [**View Workflow Run**](../monitoring/overview) page and view the configuration, parameters, status of individual tasks, and run report. + +
+ **Run monitoring** + + Select your new run from the **Runs** tab list to view the run details. + + #### Run details page + + As the pipeline runs, run details will populate with the following tabs: + + - **Command-line**: The Nextflow command invocation used to run the pipeline. This includes details about the pipeline version (`-r` flag) and profile, if specified (`-profile` flag). + - **Parameters**: The exact set of parameters used in the execution. This is helpful for reproducing the results of a previous run. + - **Resolved Nextflow configuration**: The full Nextflow configuration settings used for the run. This includes parameters, but also settings specific to task execution (such as memory, CPUs, and output directory). + - **Execution Log**: A summarized Nextflow log providing information about the pipeline and the status of the run. + - **Datasets**: Link to datasets, if any were used in the run. + - **Reports**: View pipeline outputs directly in the Platform. + + ![View the nf-core/rnaseq run](./quickstart-demo/assets/sp-cloud-run-info.gif) + + #### View reports + + Most Nextflow pipelines generate reports or output files which are useful to inspect at the end of the pipeline execution. Reports can contain quality control (QC) metrics that are important to assess the integrity of the results. + + ![Reports tab](./quickstart-demo/assets/reports-tab.png) + + For example, for the *nf-core/rnaseq* pipeline, view the [MultiQC](https://docs.seqera.io/multiqc) report generated. MultiQC is a helpful reporting tool to generate aggregate statistics and summaries from bioinformatics tools. + + ![Reports MultiQC preview](./quickstart-demo/assets/reports-preview.png) + + The paths to report files point to a location in cloud storage (in the `outdir` directory specified during launch), but you can view the contents directly and download each file without navigating to the cloud or a remote filesystem. + + :::info + See [Reports](../reports/overview) for more information. + ::: + + #### View general information + + The run details page includes general information about who executed the run, when it was executed, the Git commit ID and/or tag used, and additional details about the compute environment and Nextflow version used. + + ![General run information](./quickstart-demo/assets/general-run-details.gif) + + #### View details for a task + + Scroll down the page to view: + + - The progress of individual pipeline **Processes** + - **Aggregated stats** for the run (total walltime, CPU hours) + - **Workflow metrics** (CPU efficiency, memory efficiency) + - A **Task details** table for every task in the workflow + + The task details table provides further information on every step in the pipeline, including task statuses and metrics. + + #### Task details + + Select a task in the task table to open the **Task details** dialog. The dialog has three tabs: + + ![Task details window](./quickstart-demo/assets/task-details.gif) + + - The **About** tab contains extensive task execution details. + - The **Execution log** tab provides a real-time log of the selected task's execution. Task execution and other logs (such as stdout and stderr) are available for download from here, if still available in your compute environment. + - The **Data Explorer** tab allows you to view the task working directory directly in Platform. + + Nextflow hash-addresses each task of the pipeline and creates unique directories based on these hashes. Data Explorer allows you to view the log files and output files generated for each task in its working directory, directly within Platform. You can view, download, and retrieve the link for these intermediate files in cloud storage from the **Data Explorer** tab to simplify troubleshooting. + + ![Task Data Explorer](./quickstart-demo/assets/sp-cloud-task-data-explorer.gif) + +
+ +## Interactive analysis with Studios + +**Studios** streamline the process of creating interactive analysis environments for Platform users. With built-in templates for platforms like Jupyter Notebook, an R-IDE, and VSCode, creating a Studio is as simple as adding and sharing pipelines or datasets. The Studio URL can also be shared with any user with the [Connect role](../orgs-and-teams/roles) for real-time access and collaboration. + +For the purposes of this guide, an R-IDE will be used to normalize the pipeline output data, perform differential expression analysis, and visualize the data with exploratory plots. + +### Prepare your data + +#### Gene counts + +Salmon is the default tool used during the `pseudo-aligner` step of the *nf-core/rnaseq* pipeline. In the pipeline output data, the `/salmon` directory contains the tool's output, including a `salmon.merged.gene_counts_length_scaled.tsv` file. + +#### Sample info + +The analysis script provided in this section requires a sample information file to parse the counts data in the `salmon.merged.gene_counts_length_scaled.tsv` file. *nf-core/rnaseq* does not produce this sample information file automatically. See below to create a sample information file based on the genes in your `salmon.merged.gene_counts_length_scaled.tsv` file. + +
+ **Create a sample info file** + + 1. Note the names of the columns (excluding the first column, which typically contains gene IDs) in your `salmon.merged.gene_counts_length_scaled.tsv` file. These are your sample names. + 1. Identify the group or condition that each sample belongs to. This information should come from your experimental design. + 1. Create a new text file named `sampleinfo.txt`, with two columns: + - First column header: Sample + - Second column header: Group + 1. For each sample in your `salmon.merged.gene_counts_length_scaled.tsv` file: + - In the "Sample" column, write the exact sample name as it appears in the gene counts file. + - In the "Group" column, write the corresponding group name. + + For example, for the dataset used in a `test_full` run of *nf-core/rnaseq*, the `sampleinfo.txt` looks like this: + + ``` + Sample Group + GM12878_REP1 GM12878 + GM12878_REP2 GM12878 + H1_REP1 H1 + H1_REP2 H1 + K562_REP1 K562 + K562_REP2 K562 + MCF7_REP1 MCF7 + MCF7_REP2 MCF7 + ``` + + To make your `sampleinfo.txt` file accessible to the data studio, upload it to the directory that contains your pipeline output data. Select this bucket or directory when you **Mount data** during data studio setup. + +
+ +### Create an R-IDE analysis environment with Studios + +![Add data studio](./_images/create-ds.gif) + +From the **Studios** tab, select **Add a studio** and complete the following: +- Select the latest **R-IDE** container image template from the list. +- Select your AWS Batch compute environment. +:::note +Studio sessions compete for computing resources when sharing compute environments. Ensure your compute environment has sufficient resources to run both your pipelines and sessions. The default CPU and memory allocation for a Studio is 2 CPUs and 8192 MB RAM. +::: +- Mount data using Data Explorer: Mount the S3 bucket or directory path that contains the pipeline work directory of your RNA-Seq run. +- Optional: Enter CPU and memory allocations. The default values are 2 CPUs and 8192 MB memory (RAM). +- Select **Add**. +- Once the Studio has been created, select the options menu next to it and select **Start**. +- When the Studio is in a running state, **Connect** to it. + +### Perform the analysis and explore results + +The R-IDE can be configured with the packages you wish to install and the R script you wish to run. For the purposes of this guide, run the following scripts in the R-IDE console to install the necessary packages and perform the analysis: + +1. Install and load the necessary packages and libraries: + + ```r + # Install required packages + if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager") + BiocManager::install(c("limma", "edgeR", "ggplot2", "gplots")) + + # Load required libraries + library(limma) + library(edgeR) + library(ggplot2) + library(gplots) + ``` + +1. Read and convert the count data and sample information: + + :::info + Replace `` and `` with the paths to your `salmon.merged.gene_counts_length_scaled.tsv` and `sampleinfo.txt` files. + ::: + + ```r + # Read in the count data + counts <- read.delim(file = "/workspace/data/", row.names = + 1) + + # Remove the gene_name column if it exists + if ("gene_name" %in% colnames(counts)) { + counts <- counts[, -which(colnames(counts) == "gene_name")] + } + + # Convert to matrix + counts <- as.matrix(counts) + + # Read in the sample information + targets <- read.table( + file = "/workspace/data/", + header = TRUE, + stringsAsFactors = FALSE, + sep = "", + check.names = FALSE + ) + + # Ensure column names are correct + colnames(targets) <- c("Sample", "Group") + ``` + +1. Create a DGEList object and filter out low-count genes: + + ```r + # Create a DGEList object + y <- DGEList(counts, group = targets$Group) + + # Calculate CPM (counts per million) values + mycpm <- cpm(y) + + # Filter low count genes + thresh <- mycpm > 0.5 + keep <- rowSums(thresh) >= 2 + y <- y[keep, , keep.lib.sizes = FALSE] + ``` + +1. Normalize the data: + + ```r + # Normalize the data + y <- calcNormFactors(y) + ``` + +1. Print a summary of the filtered data: + + ```r + # Print summary of filtered data + print(dim(y)) + print(y$samples) + ``` + +1. Create an MDS plot, displayed in the plots viewer (`a`) and saved as a PNG file (`b`): + + :::info + MDS plots are used to visualize the overall similarity between RNA-Seq samples based on their gene expression profiles, helping to identify sample clusters and potential batch effects. + ::: + + ```r + # Create MDS plot + # a. Display in RStudio + plotMDS(y, col = as.numeric(factor(targets$Group)), labels = targets$Group) + legend( + "topright", + legend = levels(factor(targets$Group)), + col = 1:nlevels(factor(targets$Group)), + pch = 20 + ) + + # b. Save MDS plot to file (change `png` to `pdf` to create a PDF file) + png("MDS_plot.png", width = 800, height = 600) + plotMDS(y, col = as.numeric(factor(targets$Group)), labels = targets$Group) + legend( + "topright", + legend = levels(factor(targets$Group)), + col = 1:nlevels(factor(targets$Group)), + pch = 20 + ) + dev.off() + ``` + +1. Perform differential expression analysis: + + ```r + # Design matrix + design <- model.matrix( ~ 0 + group, data = y$samples) + colnames(design) <- levels(y$samples$group) + + # Estimate dispersion + y <- estimateDisp(y, design) + + # Fit the model + fit <- glmQLFit(y, design) + + # Define contrasts + my.contrasts <- makeContrasts( + GM12878vsH1 = GM12878 - H1, + GM12878vsK562 = GM12878 - K562, + GM12878vsMCF7 = GM12878 - MCF7, + H1vsK562 = H1 - K562, + H1vsMCF7 = H1 - MCF7, + K562vsMCF7 = K562 - MCF7, + levels = design + ) + + # Perform differential expression analysis for each contrast + results <- lapply(colnames(my.contrasts), function(contrast) { + qlf <- glmQLFTest(fit, contrast = my.contrasts[, contrast]) + topTags(qlf, n = Inf) + }) + names(results) <- colnames(my.contrasts) + ``` + + :::info + This script is written for the analysis of human data, based on *nf-core/rnaseq*'s `test_full` dataset. To adapt the script for your data, modify the contrasts based on the comparisons you want to make between your sample groups: + + ```r + my.contrasts <- makeContrasts( + Sample1vsSample2 = Sample1 - Sample2, + Sample2vsSample3 = Sample2 - Sample3, + ... + levels = design + ) + ``` + ::: + +1. Print the number of differentially expressed genes for each comparison and save the results to CSV files: + + ```r + # Print the number of differentially expressed genes for each comparison + for (name in names(results)) { + de_genes <- sum(results[[name]]$table$FDR < 0.05) + print(paste("Number of DE genes in", name, ":", de_genes)) + } + + # Save results + for (name in names(results)) { + write.csv(results[[name]], file = paste0("DE_genes_", name, ".csv")) + } + ``` + +1. Create volcano plots for each differential expression comparison, displayed in the plots viewer and saved as PNG files: + + :::info + Volcano plots in RNA-Seq analysis display the magnitude of gene expression changes (log2 fold change) against their statistical significance. This allows for quick identification of significantly up- and down-regulated genes between two conditions. + ::: + + ```r + # Create volcano plots for differential expression comparisons + # Function to create a volcano plot + create_volcano_plot <- function(res, title) { + ggplot(res$table, aes(x = logFC, y = -log10(FDR))) + + geom_point(aes(color = FDR < 0.05 & + abs(logFC) > 1), size = 0.5) + + scale_color_manual(values = c("black", "red")) + + labs(title = title, x = "Log2 Fold Change", y = "-Log10 FDR") + + theme_minimal() + } + + # Create volcano plots for each comparison + for (name in names(results)) { + p <- create_volcano_plot(results[[name]], name) + # Display in RStudio + print(p) + # Save to file (change `.png` to `.pdf` to create PDF files) + ggsave( + paste0("volcano_plot_", name, ".png"), + p, + width = 8, + height = 6, + dpi = 300 + ) + } + ``` + +1. Create a heatmap of the top 50 differentially expressed genes: + + :::info + Heatmaps in RNA-Seq analysis provide a color-coded representation of gene expression levels across multiple samples or conditions, enabling the visualization of expression patterns and sample clustering based on similarity. + ::: + + ```r + # Create a heatmap of top 50 differentially expressed genes + # Get top 50 DE genes from each comparison + top_genes <- unique(unlist(lapply(results, function(x) + rownames(x$table)[1:50]))) + + # Get log-CPM values for these genes + log_cpm <- cpm(y, log = TRUE) + top_gene_expr <- log_cpm[top_genes, ] + + # Print dimensions of top_gene_expr + print(dim(top_gene_expr)) + + # Create a color palette + my_palette <- colorRampPalette(c("blue", "white", "red"))(100) + + # Create a heatmap using heatmap.2 + # Display in RStudio + heatmap.2( + as.matrix(top_gene_expr), + scale = "row", + col = my_palette, + trace = "none", + dendrogram = "column", + margins = c(5, 10), + labRow = FALSE, + ColSideColors = rainbow(length(unique(y$samples$group)))[factor(y$samples$group)], + main = "Top DE Genes Across Samples" + ) + + # Save heatmap to file (change `png` to `pdf` to create a PDF file) + png("heatmap_top_DE_genes.png", + width = 1000, + height = 1200) + heatmap.2( + as.matrix(top_gene_expr), + scale = "row", + col = my_palette, + trace = "none", + dendrogram = "column", + margins = c(5, 10), + labRow = FALSE, + ColSideColors = rainbow(length(unique(y$samples$group)))[factor(y$samples$group)], + main = "Top DE Genes Across Samples" + ) + dev.off() + + # Print the number of top genes in the heatmap + print(paste("Number of top DE genes in heatmap:", length(top_genes))) + ``` + +![RStudio plots](./_images/rstudio.gif) + +### Collaborate in the Studio + +To share your results or allow colleagues to perform exploratory analysis, share a link to the Studio by selecting the options menu for the Studio you want to share, then select **Copy Studio URL**. With this link, other authenticated users with the **Connect** [role](../orgs-and-teams/roles) (or greater) can access the session directly. + +## RNA-Seq data and requirements + +RNA-Seq data typically consists of raw sequencing reads from high-throughput sequencing technologies. These reads are used to quantify gene expression levels and discover novel transcripts. A typical RNA-Seq dataset can range from a few GB to several hundred GB, depending on the number of samples and the sequencing depth. + +### *nf-core/rnaseq* performance in Platform + +The compute recommendations in this guide are based on internal benchmarking performed by Seqera. Benchmark runs of [*nf-core/rnaseq*](https://github.com/nf-core/rnaseq) used profile `test_full`, consisting of an input dataset with 16 FASTQ files (8 paired-end samples) and a total size of approximately 123.5 GB. + +This benchmark compares pipeline run metrics between single *nf-core/rnaseq* runs in an AWS Batch compute environment with Fusion file system and fast instance storage enabled (**Fusion** group) and an identical AWS Batch compute environment using S3 storage without Fusion (**AWS S3** group). + +### Pipeline steps and computing resource requirements + +The *nf-core/rnaseq* pipeline involves several key steps, each with distinct computational requirements. Resource needs in this table are based on the `test_full` runs detailed previously: + +| **Pipeline step** | **Tools** | **Resource needs** | **Description** | +|-------------------------------------|---------------------------|------------------------------|---------------------------------------------------------------------------------------------------| +| **Quality Control (QC)** | FastQC, MultiQC | Low-moderate CPU (50-200% single-core usage), low memory (1-7 GB peak) | Initial quality checks of raw reads to assess sequencing quality and identify potential issues. | +| **Read Trimming** | Trim Galore! | High CPU (up to 700% single-core usage), low memory (6 GB peak) | Removal of adapter sequences and low-quality bases to prepare reads for alignment. | +| **Read Alignment** | HISAT2, STAR | Moderate-high CPU (480-600% single-core usage), high memory (36 GB peak) | Alignment of trimmed reads to a reference genome, typically the most resource-intensive step. | +| **Pseudoalignment** | Salmon, Kallisto | Moderate-high CPU (420% single-core usage), moderate memory (18 GB peak) | A faster, more accurate method of gene expression quantification than alignment using read compatibility. | +| **Quantification** | featureCounts, Salmon | Moderate-high CPU (500-600% single-core usage), moderate memory (18 GB peak) | Counting the number of reads mapped to each gene or transcript to measure expression levels. | +| **Differential Expression Analysis**| DESeq2, edgeR | High CPU (650% single-core usage), low memory (up to 2 GB peak ) | Statistical analysis to identify genes with significant changes in expression between conditions. | + +#### Overall run metrics + +**Total pipeline run cost (USD)**: + +- Fusion file system with fast instance storage: $34.90 +- Plain S3 storage without Fusion: $58.40 + +**Pipeline runtime**: + +The Fusion file system used with NVMe instance storage contributed to a 34% improvement in total pipeline runtime and a 49% reduction in CPU hours. + +![Run metrics overview](./_images/cpu-table-2.png) + +#### Process run time + +The Fusion file system demonstrates significant performance improvements for most processes in the *nf-core/rnaseq* pipeline, particularly for I/O-intensive tasks: + +- The most time-consuming processes see improvements of 36.07% to 70.15%, saving hours of runtime in a full pipeline execution. +- Most processes show significant performance improvements with Fusion, with time savings ranging from 35.57% to 99.14%. +- The most substantial improvements are seen in I/O-intensive tasks like `SAMTOOLS_FLAGSTAT` (95.20% faster) and `SAMTOOLS_IDXSTATS` (99.14% faster). +- `SALMON_INDEX` shows a notable 70.15% improvement, reducing runtime from 102.18 minutes to 30.50 minutes. +- `STAR_ALIGN_IGENOMES`, one of the most time-consuming processes, is 53.82% faster with Fusion, saving nearly an hour of runtime. + +![Average runtime of *nf-core/rnaseq* processes for eight samples using the Fusion file system and plain S3 storage. Error bars = standard deviation of the mean.](./_images/process-runtime-2.png) + +| Process | S3 Runtime (min) | Fusion Runtime (min) | Time Saved (min) | Improvement (%) | +|---------|------------------|----------------------|------------------|-----------------| +| SAMTOOLS_IDXSTATS | 18.54 | 0.16 | 18.38 | 99.14% | +| SAMTOOLS_FLAGSTAT | 22.94 | 1.10 | 21.84 | 95.20% | +| SAMTOOLS_STATS | 22.54 | 3.18 | 19.36 | 85.89% | +| SALMON_INDEX | 102.18 | 30.50 | 71.68 | 70.15% | +| BEDTOOLS_GENOMECOV_FW | 19.53 | 7.10 | 12.43 | 63.64% | +| BEDTOOLS_GENOMECOV_REV | 18.88 | 7.35 | 11.53 | 61.07% | +| PICARD_MARKDUPLICATES | 102.15 | 41.60 | 60.55 | 59.27% | +| STRINGTIE | 17.63 | 7.60 | 10.03 | 56.89% | +| RSEQC_READDISTRIBUTION | 16.33 | 7.19 | 9.14 | 55.97% | +| STAR_ALIGN_IGENOMES | 106.42 | 49.15 | 57.27 | 53.82% | +| SALMON_QUANT | 30.83 | 15.58 | 15.25 | 49.46% | +| RSEQC_READDUPLICATION | 19.42 | 12.15 | 7.27 | 37.44% | +| QUALIMAP_RNASEQ | 141.40 | 90.40 | 51.00 | 36.07% | +| TRIMGALORE | 51.22 | 33.00 | 18.22 | 35.57% | +| DUPRADAR | 49.04 | 77.81 | -28.77 | -58.67% | + +
+ **Pipeline optimization** + + Seqera Platform's task-level resource usage metrics allow you to determine the resources requested for a task and what was actually used. This information helps you fine-tune your configuration more accurately. + + However, manually adjusting resources for every task in your pipeline is impractical. Instead, you can leverage the pipeline optimization feature on the Launchpad. + + Pipeline optimization analyzes resource usage data from previous runs to optimize the resource allocation for future runs. After a successful run, optimization becomes available, indicated by the lightbulb icon next to the pipeline turning black. + + #### Optimize nf-core/rnaseq + + Select the lightbulb icon next to *nf-core/rnaseq* in your workspace Launchpad to view the optimized profile. You have the flexibility to tailor the optimization's target settings and incorporate a retry strategy as needed. + + #### View optimized configuration + + When you select the lightbulb, you can access an optimized configuration profile in the second tab of the **Customize optimization profile** window. + + This profile consists of Nextflow configuration settings for each process and each resource directive (where applicable): **cpus**, **memory**, and **time**. The optimized setting for a given process and resource directive is based on the maximum use of that resource across all tasks in that process. + + Once optimization is selected, subsequent runs of that pipeline will inherit the optimized configuration profile, indicated by the black lightbulb icon with a checkmark. + + :::info + Optimization profiles are generated from one run at a time, defaulting to the most recent run, and _not_ an aggregation of previous runs. + ::: + + ![Optimized configuration](./quickstart-demo/assets/optimize-configuration.gif) + + Verify the optimized configuration of a given run by inspecting the resource usage plots for that run and these fields in the run's task table: + + | Description | Key | + | ------------ | ---------------------- | + | CPU usage | `pcpu` | + | Memory usage | `peakRss` | + | Runtime | `start` and `complete` | + +
diff --git a/platform-enterprise/getting-started/studios.md b/platform-enterprise/getting-started/studios.md new file mode 100644 index 000000000..902954331 --- /dev/null +++ b/platform-enterprise/getting-started/studios.md @@ -0,0 +1,601 @@ +--- +title: "Studios for interactive analysis" +description: "Creating interactive analysis Studios for Jupyter, RStudio, VS Code, and more" +date: "24 Feb 2025" +tags: [platform, studios, jupyter, rstudio, xpra, vscode, conda] +toc_max_heading_level: 3 +--- + +[Studios](../studios/overview) allows users to host a variety of container images directly in Seqera Platform compute environments for analysis using popular environments including [Jupyter](https://jupyter.org/) (Python) an [R-IDE](https://github.com/seqeralabs/r-ide), [Visual Studio Code](https://code.visualstudio.com/) IDEs, and [Xpra](https://xpra.org/index.html) remote desktops. Each Studio session provides a dedicated interactive environment that encapsulates the live environment. + +This guide explores how Studios integrates with your existing workflows, bridging the gap between pipeline execution and interactive analysis. It details how to set up and use each type of Studio, demonstrating a practical use case for each. + +:::info[**Prerequisites**] +You will need the following to get started: + +- At least the **Maintain** workspace [user role](../orgs-and-teams/roles) to create and configure Studios. +- An [AWS Batch compute environment](../compute-envs/aws-batch#batch-forge-compute-environment) (**without Fargate**) with sufficient resources (minimum: 2 CPUs, 8192 MB RAM). +- Valid [credentials](../credentials/overview) for your cloud storage account and compute environment. +- [Data Explorer](../data/data-explorer) enabled in your workspace. +::: + +:::note +The scripts and instructions provided in this guide were tested on 24 February 2025. Library and package versions recommended here may become outdated and lead to unexpected results over time. +::: + +## Jupyter: Python-based visualization of protein structure prediction data + +Jupyter notebooks enable interactive analysis using Python libraries and tools. For example, Py3DMol is a tool used for visualizing and comparing structures produced by workflows such as [*nf-core/proteinfold*](https://nf-co.re/proteinfold/1.1.1), a bioinformatics best-practice analysis pipeline for protein 3D structure prediction. This section demonstrates how to create an AWS Batch compute environment, add the nf-core AWS megatests public proteinfold data to your workspace, create a Jupyter Studio, and run the provided Python script to produce interactive composite 3D images of the [H1065 sequence](https://predictioncenter.org/casp14/multimer_results.cgi?target=H1065). + +:::note +This script and instructions can also be used to visualize the structures from *nf-core/proteinfold* runs performed with your own public or private data. +::: + +#### Create an AWS Batch compute environment + +Studios require an AWS Batch compute environment. If you do not have an existing compute environment available, [create one](../compute-envs/aws-batch#batch-forge-compute-environment) with the following attributes: + +- **Region**: To minimize costs, your compute environment should be in the same region as your data. To browse the nf-core AWS megatests public data optimally, select **eu-west-1**. +- **Provisioning model**: Use **On-demand** EC2 instances. +- Studios does not support AWS Fargate. Do not enable **Use Fargate for head job**. +- At least 2 available CPUs and 8192 MB of RAM. + +#### Add data using Data Explorer + +For the purposes of this guide, add the proteinfold results (H1065 sequence) from the nf-core AWS megatests S3 bucket to your workspace using Data Explorer: + +1. From the **Data Explorer** tab, select **Add cloud bucket**. +1. Specify the bucket details: + - **Provider**: AWS + - **Bucket path**: `s3://nf-core-awsmegatests/proteinfold/results-9bea0dc4ebb26358142afbcab3d7efd962d3a820` + - A unique **Name** for the bucket, such as `nf-core-awsmegatests-proteinfold-h1065` + - **Credentials**: **Public** + - An optional bucket **Description** +1. Select **Add**. + +:::info +To use your own pipeline data for interactive visualization, add the cloud bucket that contains the results of your *nf-core/proteinfold* pipeline run. See [Add a cloud bucket](./quickstart-demo/add-data#add-a-cloud-bucket) for more information. +::: + +### Create a Jupyter Studio + +From the **Studios** tab, select **Add a Studio** and complete the following: +- In the **Compute & Data** tab: + - Select your AWS Batch compute environment. + :::note + Studio sessions compete for computing resources when sharing compute environments. Ensure your shared compute environment has sufficient resources to run both your pipelines and Studio sessions. + ::: + - Optional: Enter CPU and memory allocations. The default values are 2 CPUs and 8192 MB memory (RAM). + - Mount data using Data Explorer: Mount the S3 bucket or directory path that contains the nf-core AWS megatests proteinfold data, or the pipeline work directory of your *nf-core/proteinfold* run. +- In the **General config** tab: + - Select the latest **Jupyter** container image template from the list. + - Optional: Enter a unique name and description for the Studio. + - Check **Install Conda packages** and paste the following into the YAML textfield: + ```yaml + channels: + - schrodinger + - conda-forge + - bioconda + dependencies: + - python=3.10 + - conda-forge::libgl + - pip + - pip: + - biopython==1.85 + - mdtraj==1.10.3 + - py3dmol==2.4.2 + ``` +- Select **Add** or choose to **Add and start** a Studio session immediately. +- If you chose to **Add** the Studio in the preceding step, select **Connect** in the options menu to open a Studio session in a new browser tab. + +### Visualize protein structures + +The following Python script visualizes and compares protein structures produced by Alphafold 2 and ESMFold, creating a composite interactive 3D image of the two structures with contrasting colors. The script aligns mobile structures to reference structures, retrieves lists of C-alpha atoms from both structures, creates views for individual and combined structures, and creates an interactive view of the individual and combined structures using Py3DMol. + +Run the following script in your Jupyter notebook to install the necessary packages and perform visualization: + +
+Full Python script + + ```python + import py3Dmol + from IPython.display import display + from Bio import PDB + from Bio.PDB import Superimposer + import numpy as np + + # Keep file paths unchanged to visualize structures of the H1065 sequence in nf-core AWS megatests. + # Update file paths (to PDB files) to visualize structures of your own nf-core/proteinfold output data. + alphafold2_multimer_standard = "/workspace/data/nf-core-awsmegatests-proteinfold-h1065/mode_alphafold2_multimer/alphafold2/standard/H1065.alphafold.pdb" + esmfold_multimer = "/workspace/data/nf-core-awsmegatests-proteinfold-h1065/mode_esmfold_multimer/esmfold/H1065.pdb" + + def align_structures(ref_pdb_path, mobile_pdb_path): + """Align mobile structure to reference structure and return aligned coordinates""" + # Set up parser + parser = PDB.PDBParser() + + # Load structures + ref_structure = parser.get_structure("reference", ref_pdb_path) + mobile_structure = parser.get_structure("mobile", mobile_pdb_path) + + # Get lists of C-alpha atoms from both structures + ref_atoms = [] + mobile_atoms = [] + + for model in ref_structure: + for chain in model: + for residue in chain: + if 'CA' in residue: + ref_atoms.append(residue['CA']) + + for model in mobile_structure: + for chain in model: + for residue in chain: + if 'CA' in residue: + mobile_atoms.append(residue['CA']) + + # Align structures using Superimposer + super_imposer = Superimposer() + super_imposer.set_atoms(ref_atoms, mobile_atoms) + super_imposer.apply(mobile_structure.get_atoms()) + + # Save aligned structure + io = PDB.PDBIO() + io.set_structure(mobile_structure) + aligned_pdb_path = "./"+mobile_pdb_path.split("/")[-1].replace('.pdb', '_aligned.pdb') + io.save(aligned_pdb_path) + + return aligned_pdb_path + + def create_structure_view(pdb_path, color, width=400, height=400, label=None): + """Create a view for a single structure""" + view = py3Dmol.view(width=width, height=height) + + with open(pdb_path, 'r') as f: + pdb_data = f.read() + view.addModel(pdb_data, "pdb") + view.setStyle({'model': -1}, {'cartoon': {'color': color}}) + view.zoomTo() + + if label: + view.addLabel(label, { + 'position': {'x': 0, 'y': 0, 'z': 0}, + 'backgroundColor': color, + 'fontColor': 'white' + }) + + return view + + def visualize_structures(pdb1_path, pdb2_path): + # Align the second structure to the first + aligned_pdb2_path = align_structures(pdb1_path, pdb2_path) + + # Create three separate views + view1 = create_structure_view(pdb1_path, 'blue', label="AlphaFold2") + view2 = create_structure_view(aligned_pdb2_path, 'darkgrey', label="ESMFold") + + # Create combined view + view3 = py3Dmol.view(width=800, height=400) + + # Load and display first structure (AlphaFold2) + with open(pdb1_path, 'r') as f: + pdb1_data = f.read() + view3.addModel(pdb1_data, "pdb") + view3.setStyle({'model': -1}, {'cartoon': {'color': 'blue'}}) + + # Load and display aligned second structure (ESMFold) + with open(aligned_pdb2_path, 'r') as f: + pdb2_data = f.read() + view3.addModel(pdb2_data, "pdb") + view3.setStyle({'model': 1}, {'cartoon': {'color': 'darkgrey'}}) + + # Set up the combined view + view3.zoomTo() + + # Add labels for combined view + view3.addLabel("AlphaFold2", {'position': {'x': -20, 'y': 0, 'z': 0}, 'backgroundColor': 'blue', 'fontColor': 'white'}) + view3.addLabel("ESMFold", {'position': {'x': 20, 'y': 0, 'z': 0}, 'backgroundColor': 'darkgrey', 'fontColor': 'white'}) + + return view1, view2, view3 + + # Visualize the structures + view1, view2, view3 = visualize_structures(alphafold2_multimer_standard, esmfold_multimer) + + # Display all views + print("AlphaFold2 Structure:") + view1.show() + print("\nESMFold Structure:") + view2.show() + print("\nAligned Structures:") + view3.show() + ``` + +
+
+Python script individual steps + + 1. Import libraries: + + ```python + import py3Dmol + from IPython.display import display + from Bio import PDB + from Bio.PDB import Superimposer + import numpy as np + ``` + + 1. Set up PDB file paths: + + ```python + # Keep file paths unchanged to visualize structures of the H1065 sequence in nf-core AWS megatests. + # Update file paths (to PDB files) to visualize structures of your own nf-core/proteinfold output data. + alphafold2_multimer_standard = "/workspace/data/nf-core-awsmegatests-proteinfold-h1065/mode_alphafold2_multimer/alphafold2/standard/H1065.alphafold.pdb" + esmfold_multimer = "/workspace/data/nf-core-awsmegatests-proteinfold-h1065/mode_esmfold_multimer/esmfold/H1065.pdb" + ``` + + 1. Load structures from the PDB files and retrieve lists of C-alpha atoms from both structures: + + ```python + def align_structures(ref_pdb_path, mobile_pdb_path): + """Align mobile structure to reference structure and return aligned coordinates""" + # Set up parser + parser = PDB.PDBParser() + + # Load structures + ref_structure = parser.get_structure("reference", ref_pdb_path) + mobile_structure = parser.get_structure("mobile", mobile_pdb_path) + + # Get lists of C-alpha atoms from both structures + ref_atoms = [] + mobile_atoms = [] + + for model in ref_structure: + for chain in model: + for residue in chain: + if 'CA' in residue: + ref_atoms.append(residue['CA']) + + for model in mobile_structure: + for chain in model: + for residue in chain: + if 'CA' in residue: + mobile_atoms.append(residue['CA']) + ``` + + 1. Align structures using Superimposer: + + ```python + # Align structures using Superimposer + super_imposer = Superimposer() + super_imposer.set_atoms(ref_atoms, mobile_atoms) + super_imposer.apply(mobile_structure.get_atoms()) + + # Save aligned structure + io = PDB.PDBIO() + io.set_structure(mobile_structure) + aligned_pdb_path = "./"+mobile_pdb_path.split("/")[-1].replace('.pdb', '_aligned.pdb') + io.save(aligned_pdb_path) + + return aligned_pdb_path + ``` + + 1. Create a view for a single structure: + + ```python + def create_structure_view(pdb_path, color, width=400, height=400, label=None): + """Create a view for a single structure""" + view = py3Dmol.view(width=width, height=height) + + with open(pdb_path, 'r') as f: + pdb_data = f.read() + view.addModel(pdb_data, "pdb") + view.setStyle({'model': -1}, {'cartoon': {'color': color}}) + view.zoomTo() + + if label: + view.addLabel(label, { + 'position': {'x': 0, 'y': 0, 'z': 0}, + 'backgroundColor': color, + 'fontColor': 'white' + }) + + return view + ``` + + 1. Create individual and combined structure views: + + ```python + def visualize_structures(pdb1_path, pdb2_path): + # Align the second structure to the first + aligned_pdb2_path = align_structures(pdb1_path, pdb2_path) + + # Create three separate views + view1 = create_structure_view(pdb1_path, 'blue', label="AlphaFold2") + view2 = create_structure_view(aligned_pdb2_path, 'darkgrey', label="ESMFold") + + # Create combined view + view3 = py3Dmol.view(width=800, height=400) + + # Load and display first structure (AlphaFold2) + with open(pdb1_path, 'r') as f: + pdb1_data = f.read() + view3.addModel(pdb1_data, "pdb") + view3.setStyle({'model': -1}, {'cartoon': {'color': 'blue'}}) + + # Load and display aligned second structure (ESMFold) + with open(aligned_pdb2_path, 'r') as f: + pdb2_data = f.read() + view3.addModel(pdb2_data, "pdb") + view3.setStyle({'model': 1}, {'cartoon': {'color': 'darkgrey'}}) + + # Set up the combined view + view3.zoomTo() + + # Add labels for combined view + view3.addLabel("AlphaFold2", {'position': {'x': -20, 'y': 0, 'z': 0}, 'backgroundColor': 'blue', 'fontColor': 'white'}) + view3.addLabel("ESMFold", {'position': {'x': 20, 'y': 0, 'z': 0}, 'backgroundColor': 'darkgrey', 'fontColor': 'white'}) + + return view1, view2, view3 + ``` + + 1. Display interactive 3D structure views: + + ```python + # Visualize the structures + view1, view2, view3 = visualize_structures(alphafold2_multimer_standard, esmfold_multimer) + + # Display all views + print("AlphaFold2 Structure:") + view1.show() + print("\nESMFold Structure:") + view2.show() + print("\nAligned Structures:") + view3.show() + ``` + +
+ +![Visualize predicted protein structures in a Jupyter notebook Studio](./_images/protein-vis-short-gif-1080p-cropped.gif) + +#### Interactive collaboration + +To share a link to the running Studio session with collaborators inside your workspace, select the options menu for your Jupyter Studio session, then select **Copy Studio URL**. Using this link, other authenticated users can access the session directly to collaborate in real time. + +## R-IDE: Analyze RNASeq data and differential expression statistics + +The R-IDE enables interactive analysis using R libraries and tools. For example, Shiny for R enables you to render functions in a reactive application and build a custom user interface to explore your data. The public data used in this section consists of RNA sequencing data that was processed by the *nf-core/rnaseq* pipeline to quantify gene expression, followed by *nf-core/differentialabundance* to derive differential expression statistics. This section demonstrates how to create a Studio to perform further analysis with these results from cloud storage. One of these outputs is a web application that can be deployed for interactive analysis. + +#### Create an AWS Batch compute environment + +Studios require an AWS Batch compute environment. If you do not have an existing compute environment available, [create one](../compute-envs/aws-batch#batch-forge-compute-environment) with the following attributes: + +- **Region**: To minimize costs, your compute environment should be in the same region as your data. To browse the nf-core AWS megatests public data optimally, select **eu-west-1**. +- **Provisioning model**: Use **On-Demand** EC2 instances. +- Studios does not support AWS Fargate. Do not enable **Use Fargate for head job**. +- At least 2 available CPUs and 8192 MB of RAM. + +#### Add data using Data Explorer + +For the purposes of this guide, add the nf-core AWS megatests S3 bucket to your workspace using Data Explorer: + +1. From the **Data Explorer** tab, select **Add cloud bucket**. +1. Specify the bucket details: + - **Provider**: AWS + - **Bucket path**: `s3://nf-core-awsmegatests` + - A unique **Name** for the bucket, such as `nf-core-awsmegatests` + - **Credentials**: **Public** + - An optional bucket **Description** +1. Select **Add**. + +:::info +To use your own pipeline data for interactive analysis, add the cloud bucket that contains the results of your *nf-core/differentialabundance* pipeline run. See [Add a cloud bucket](./quickstart-demo/add-data#add-a-cloud-bucket) for more information. +::: + +### Create an R-IDE Studio + +From the **Studios** tab, select **Add a Studio** and complete the following: +- In the **Compute & Data** tab: + - Select your AWS Batch compute environment. + :::note + Studio sessions compete for computing resources when sharing compute environments. Ensure your compute environment has sufficient resources to run both your pipelines and Studio sessions. + ::: + - Optional: Enter CPU and memory allocations. The default values are 2 CPUs and 8192 MB memory (RAM). + - Mount data using Data Explorer: Mount the nf-core AWS megatests S3 bucket, or the directory path that contains the results of your *nf-core/differentialabundance* pipeline run. +- In the **General config** tab: + - Select the latest **R-IDE** container image template from the list. + - Optional: Enter a unique name and description for the Studio. +- Select **Add** or choose to **Add and start** a Studio session immediately. +- If you chose to **Add** the Studio in the preceding step, select **Start** in the options menu, then **Connect** to open a Studio session in a new browser tab when it is running. + +### Configure environment and explore data in the web app + +The following R script installs and configures the prerequisite packages and libraries to deploy ShinyNGS, a web application created by members of the nf-core community to explore genomic data. The script also downloads the RDS file from nf-core AWS megatests to use as input data for the web app's various plots, heatmaps, and tables. To use your own *nf-core/rnaseq* and *nf-core/differentialabundance* results, modify the script as instructed in step 2 below: + +
+R script individual steps + + 1. Configure the R-IDE with installed packages, including [ShinyNGS](https://github.com/pinin4fjords/shinyngs): + + ```r + if (!require("BiocManager", quietly = TRUE)) + install.packages("BiocManager") + + BiocManager::install(version = "3.20", ask = FALSE) + BiocManager::install(c("SummarizedExperiment", "GSEABase", "limma")) + + install.packages(c("devtools", "matrixStats", "rmarkdown", "markdown")) + install.packages("shiny", repos = "https://cran.rstudio.com/") + + devtools::install_version("cpp11", version = "0.2.1", repos = "http://cran.us.r-project.org") + devtools::install_github('pinin4fjords/shinyngs', upgrade_dependencies = FALSE) + ``` + + 1. Download the RDS file from nf-core AWS megatests or your own *nf-core/differentialabundance* results (see [Shiny app](https://nf-co.re/differentialabundance/1.5.0/docs/output/#shiny-app) from the nf-core documentation for file details): + + ```r + # For nf-core AWS megatests + download.file("https://nf-core-awsmegatests.s3-eu-west-1.amazonaws.com/differentialabundance/results-3dd360fed0dca1780db1bdf5dce85e5258fa2253/shinyngs_app/study/data.rds", 'data.rds') + + # For your nf-core/differentialabundance results, replace the URL with your RDS file URL) + download.file("https://bucket.s3-region.amazonaws.com/differentialabundance/results/shinyngs_app/study-name/data.rds", 'data.rds') + ``` + + 1. Import libraries, read your RDS data, and launch the app: + + ```r + library(shinyngs) + library(markdown) + esel <- readRDS("data.rds") + app <- prepareApp("rnaseq", esel) + shiny::shinyApp(app$ui, app$server) + ``` + +
+ +#### Interactive collaboration + +To share a link to the running session with collaborators inside your workspace, select the options menu for your R-IDE session, then select **Copy Studio URL**. Using this link, other authenticated users can access the session directly to collaborate in real time. + +## Xpra: Visualize genetic variants with IGV + +Xpra provides remote desktop functionality that enables many interactive analysis and troubleshooting workflows. One such workflow is to perform genetic variant visualization using IGV desktop, a powerful open-source tool for the visual exploration of genomic data. This section demonstrates how to add public data from the [1000 Genomes project](https://www.coriell.org/1/NHGRI/Collections/1000-Genomes-Project-Collection/1000-Genomes-Project) to your workspace, set up an Xpra environment with IGV desktop pre-installed, and explore a variant of interest. + +#### Create an AWS Batch compute environment + +Studios require an AWS Batch compute environment. If you do not have an existing compute environment available, [create one](../compute-envs/aws-batch#batch-forge-compute-environment) with the following attributes: + +- **Region**: To minimize costs, your compute environment should be in the same region as your data. To browse the 1000 Genomes public data optimally, select **us-east-1**. +- **Provisioning model**: Use **On-demand** EC2 instances. +- Studios does not support AWS Fargate. Do not enable **Use Fargate for head job**. +- At least 2 available CPUs and 8192 MB of RAM. + +#### Add data using Data Explorer + +Add the 1000 Genomes S3 bucket to your workspace using Data Explorer: + +1. From the **Data Explorer** tab, select **Add cloud bucket**. +1. Specify the bucket details: + - **Provider**: AWS + - **Bucket path**: `s3://1000genomes` + - A unique **Name** for the bucket, such as `1000G` + - **Credentials**: **Public** + - An optional bucket **Description** +1. Select **Add**. + +:::info +To use your own data for interactive analysis, see [Add a cloud bucket](./quickstart-demo/add-data#add-a-cloud-bucket) for instructions to add your own public or private cloud bucket. +::: + +### Create an Xpra Studio + +From the **Studios** tab, select **Add a Studio** and complete the following: +- In the **Compute & Data** tab: + - Select your AWS Batch compute environment. + :::note + Studio sessions compete for computing resources when sharing compute environments. Ensure your compute environment has sufficient resources to run both your pipelines and Studio sessions. + ::: + - Optional: Enter CPU and memory allocations. + - Mount the 1000 Genomes S3 bucket you added previously using Data Explorer. +- In the **General config** tab: + - Select the latest **Xpra** container image template from the list. + - Optional: Enter a unique name and description for the Studio. + - Check **Install Conda packages** and paste the following into the YAML textfield: + ```yaml + channels: + - conda-forge + - bioconda + dependencies: + - igv + - samtools + ``` +- Select **Add** or choose to **Add and start** a session immediately. +- If you chose to **Add** the Studio in the preceding step, select **Connect** in the options menu to open a session in a new browser tab. + +### View variants in IGV desktop + +1. In the Xpra terminal, run `igv` to open IGV desktop. +1. In IGV, change the genome version to hg19. +1. Select **File**, then **Load from file**, then navigate to `/workspace/data/xpra-1000Genomes/phase3/data/HG00096/high_coverage_alignment` and select the `.bai` file, as shown below: + ![Load BAM file in IGV desktop](./_images/xpra-data-studios-IGV-load-bam.png) +1. Search for PCSK9 and zoom into one of the exons of the gene. A coverage graph and reads should be shown, as below: + ![BAM file view](./_images/xpra-data-studios-IGV-view-bam.png) + +#### Interactive collaboration + +To share a link to the running session with collaborators inside your workspace, select the options menu for your Xpra session, then select **Copy Studio URL**. Using this link, other authenticated users can access the session directly to collaborate in real time. + +## VS Code: Create an interactive Nextflow development environment + +Using Studios and Visual Studio Code allows you to create a portable and interactive Nextflow development environment with all the tools you need to develop and run Nextflow pipelines. This section demonstrates how to set up a VS Code Studio with Conda and nf-core tools, add public data and run the *nf-core/fetchngs* pipeline with the `test` profile, and create a VS Code project to start coding your own Nextflow pipelines. The Studio includes the [Nextflow VS Code extension](https://marketplace.visualstudio.com/items?itemName=nextflow.nextflow), which makes use of the Nextflow language server to provide syntax highlighting, code navigation, code completion, and diagnostics for Nextflow scripts and configuration files. + +#### Create an AWS Batch compute environment + +Studios require an AWS Batch compute environment. If you do not have an existing compute environment available, [create one](../compute-envs/aws-batch#batch-forge-compute-environment) with the following attributes: + +- **Region**: To minimize costs, your compute environment should be in the same region as your data. To use the iGenomes public data bucket that contains the *nf-core/fetchngs* `test` profile data, select **eu-west-1**. +- **Provisioning model**: Use **On-demand** EC2 instances. +- Studios does not support AWS Fargate. Do not enable **Use Fargate for head job**. +- At least 4 available CPUs and 16384 MB of RAM. + +#### Add data using Data Explorer + +The *nf-core/fetchngs* pipeline uses data from the NGI iGenomes public dataset for its `test` profile. To add this data to your workspace: + +1. From the **Data Explorer** tab, select **Add cloud bucket**. +1. Specify the bucket details: + - **Provider**: AWS + - **Bucket path**: `s3://ngi-igenomes/test-data/` + - A unique **Name** for the bucket, such as `ngi-igenomes-test-data` + - **Credentials**: **Public** + - An optional bucket **Description** +1. Select **Add**. + +### Create a VS Code Studio + +From the **Studios** tab, select **Add a Studio** and complete the following: +- In the **Compute & Data** tab: + - Select your AWS Batch compute environment. + :::note + Studio sessions compete for computing resources when sharing compute environments. Shared compute environments must have sufficient resources to run both your pipelines and Studio sessions. + ::: + - Allocate at least 4 CPUs and 16384 MB RAM. + - Mount data using Data Explorer: To run *nf-core/fetchngs* with the `test` profile, mount the NGI iGenomes S3 bucket you added previously. Mount any other data directories you need to run and code your own Nextflow pipelines. +- In the **General config** tab: + - Select the latest **VS Code** container image template from the list. + - Optional: Enter a unique name and description for the Studio. + - Check **Install Conda packages** and paste the following into the YAML textfield: + ```yaml + channels: + - conda-forge + - bioconda + - anaconda + dependencies: + - nf-core + - conda + ``` +- Select **Add** or choose to **Add and start** a Studio session immediately. +- If you chose to **Add** the Studio in the preceding step, select **Connect** in the options menu to open a Studio session in a new browser tab. +- Once inside the Studio session, run `code .` to use the clipboard. + +:::tip +See [User and workspace settings](https://code.visualstudio.com/docs/editor/settings) if you wish to import existing VS Code configuration and preferences to your Studio session's VS Code environment. +::: + +### Run *nf-core/fetchngs* with Conda + +Run the following Nextflow command to run *nf-core/fetchngs* with Conda: + +```shell +nextflow run nf-core/fetchngs -profile test,conda --outdir ./nf-core-fetchngs-conda-out -resume +``` + +### Write a Nextflow pipeline with nf-core tools + +- Run `nf-core pipelines create` to create a new pipeline. Choose which parts of the nf-core template you want to use. +- Run `code [NEW_PIPELINE]` to open the new pipeline as a project in VSCode. This allows you to code your pipeline with the help of the Nextflow language server and nf-core tools. + +![VS Code Studio session](./_images/guide-vs-code-studio-nf-env-1080p-cropped.gif) + +#### Interactive collaboration + +To share a link to the running session with collaborators inside your workspace, select the options menu for your VS Code Studio session, then select **Copy Studio URL**. Using this link, other authenticated users can access the session directly to collaborate in real time. diff --git a/platform-enterprise/getting-started/workspace-setup.md b/platform-enterprise/getting-started/workspace-setup.md new file mode 100644 index 000000000..745fb7116 --- /dev/null +++ b/platform-enterprise/getting-started/workspace-setup.md @@ -0,0 +1,52 @@ +--- +title: "Set up your workspace" +description: "Instructions to create an organization workspace and add participants in Seqera Platform." +date: "15 April 2024" +tags: [platform, organizations, workspaces, users] +--- + +Workspaces in Seqera Platform contain the resources to run your analyses and manage your computing infrastructure. Workspace members are granted various access roles to interact with the pipelines, compute environments, and data in a workspace. While each Platform user has a personal workspace, resource sharing and access management happens in an organization workspace context. + +To create an organization workspace and begin adding participants, first create your organization: + +### Create an organization + +Organizations are the top-level structure and contain workspaces, members, and teams. You can also add external collaborators to an organization. See [Organization management](../orgs-and-teams/organizations) for more information. + +1. Expand the **Organization | Workspace** dropdown and select **Add organization**. +1. Complete the organization details fields: + - The **Name** to be associated with the organization in Platform. + - The **Full name** of the organization. + - A **Description** of the organization to provide contextual information that may be helpful to other organization members. + - The organization's **Location**. + - The organization's **Website URL**. + - Drag and drop or upload an image to be used as the organization's **Logo** in Platform. +1. Select **Add**. + +You are the first **Owner** of the organizations that you create. Add other organization owners and members as needed from the organization's **Members** tab. + +### Create a workspace + +1. From the organization's **Workspaces** tab, select **Add Workspace**. +1. Complete the workspace details fields: + - The **Name** to be displayed for the workspace in Platform. + - The **Full name** of the workspace. + - A **Description** of the workspace to provide contextual information that may be helpful to other workspace participants. + - **Visibility**: Choose whether the workspace's pipelines must be **Shared** to all organization members, or only visible to workspace participants (**Private**). +1. Select **Add**. You are redirected to your organization's **Workspaces** tab with your new workspace listed. +1. Select your new workspace, then select the **Participants** tab to **Add Participants**. +1. Enter the names of existing organization members or teams and select **Add**. +1. Update a participant's access **Role** from the dropdown, if needed. + +### Simplify workspace access with teams + +Teams simplify workspace role-based access control (RBAC) for groups of organization members. Per-workspace access roles assigned to teams are inherited by all team members. + +Create a new team, add team members, and add the team to workspaces from the **Teams** tab on your organization page: + +1. Select **Add Team**, enter the team's details and an optional team avatar image, then select **Add**. +1. Select **Edit** next to the team name in the list, then select the **Members of team** tab to add new members by name or email. + :::note + Team members must be existing organization members. + ::: +1. From the team edit screen's **Workspaces** tab, add workspaces by name and select an access **Role** from the dropdown next to each workspace in the list. All team members inherit the workspace access role for the team. diff --git a/platform-enterprise/git/overview.md b/platform-enterprise/git/overview.md new file mode 100644 index 000000000..bbe4a869a --- /dev/null +++ b/platform-enterprise/git/overview.md @@ -0,0 +1,200 @@ +--- +title: "Git integration" +description: "Connecting to Git repositories in Seqera Platform." +date: "24 Jun 2024" +tags: [git] +--- + +Data pipelines are composed of many assets, including pipeline scripts, configuration files, dependency descriptors (such as for Conda or Docker), documentation, etc. When you manage complex data pipelines as Git repositories, all assets can be versioned and deployed with a specific tag, release, or commit ID. Version control and containerization are crucial to enable reproducible pipeline executions, and provide the ability to continuously test and validate pipelines as the code evolves over time. + +Seqera Platform has built-in support for [Git](https://git-scm.com) and several Git-hosting platforms. Pipelines can be pulled remotely from both public and private Git providers, including the most popular platforms: GitHub, GitLab, and BitBucket. + +## Public repositories + +Launch a public Nextflow pipeline by entering its Git repository URL in the **Pipeline to launch** field. + +When you specify the **Revision number**, the list of available revisions are automatically pulled using the Git provider's API. By default, the default branch (usually `main` or `master`) will be used. + +:::tip +[nf-core](https://nf-co.re/pipelines) is a great resource for public Nextflow pipelines. +::: + +:::note +The GitHub API imposes [rate limits](https://docs.github.com/en/developers/apps/building-github-apps/rate-limits-for-github-apps) on API requests. You can increase your rate limit by adding [GitHub credentials](#github) to your workspace as shown below. +::: + +## Private repositories + +To access private Nextflow pipelines, add the credentials for your private Git hosting provider to Seqera. + +:::note +Credentials are encrypted with the AES-256 cypher before secure storage and are never exposed in an unencrypted way by any Seqera API. +::: + +### Multiple credential filtering + +When you have multiple stored credentials, Seqera selects the most relevant credential for your repository in the following order: + +1. Seqera evaluates all the stored credentials available to the current workspace. + +2. Credentials are filtered by Git provider (GitHub, GitLab, Bitbucket, etc.) + +3. Seqera selects the credential with a **Repository base URL** most similar to the target repository. + +4. If no **Repository base URL** values are specified in the workspace credentials, the most long-lived credential is selected. + +**Credential filtering example** + +Workspace A contains four credentials: + +_Credential A_ + + Type: GitHub + + Repository base URL: + +_Credential B_ + + Type: GitHub + + Repository base URL: https://github.com/ + +_Credential C_ + + Type: GitHub + + Repository base URL: https://github.com/pipeline-repo + +_Credential D_ + + Type: GitLab + + Repository base URL: https://gitlab.com/repo-a + +If you launch a pipeline with a Nextflow workflow in the https://github.com/pipeline-repo, Seqera will use **Credential C**. + +For the application to select the most appropriate credential for your repository, we recommend that you: + +- Specify the **Repository base URL** values as completely as possible for each Git credential used in the workspace. + +- Favor the use of service account type credentials where possible (such as GitLab group access tokens). + +- Avoid storing multiple user-based tokens with similar permissions. + +### Azure DevOps repositories + +You can authenticate to Azure Devops repositories using a [personal access token (PAT)](https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows#about-pats). + +Once you have created and copied your access token, create a new credential in Seqera using these steps: + +**Create AzureDevOps credentials** + +1. From an organization workspace: Select **Credentials > Add Credentials**. From your personal workspace: Go to the user menu and select **Your credentials > Add credentials**. + +3. Enter a **Name** for the new credentials. + +4. Select _Azure DevOps_ as the **Provider**. + +5. Enter your **Username** and **Access token**. + +6. (Recommended) Enter the **Repository base URL** for which the credentials should be applied. This option is used to apply the provided credentials to a specific repository, e.g., `https://dev.azure.com//`. + +### GitHub + +Use an access token to connect Seqera to a private [GitHub](https://github.com/) repository. Personal (classic) or fine-grained access tokens can be used. + +:::note +A user's personal access token (classic) can access every repository that the user has access to. GitHub recommends using fine-grained personal access tokens (currently in beta) instead, which you can restrict to specific repositories. Fine-grained personal access tokens also enable you to specify granular permissions instead of broad scopes. +::: + +For **personal (classic)** tokens, you must grant access to the private repository by selecting the main `repo` scope when the token is created. See [Creating a personal access token (classic)](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token#creating-a-personal-access-token-classic) for instructions to create your personal access token (classic). + +For **fine-grained** tokens, the repository's organization must [opt in](https://docs.github.com/en/organizations/managing-programmatic-access-to-your-organization/setting-a-personal-access-token-policy-for-your-organization) to the use of fine-grained tokens. Tokens can be restricted by _resource owner (organization)_, _repository access_, and _permissions_. See [Creating a fine-grained personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-fine-grained-personal-access-token) for instructions to create your fine-grained access token. + +After you've created and copied your access token, create a new credential in Seqera: + +**Create GitHub credentials** + +1. From an organization workspace: Select **Credentials > Add Credentials**. From your personal workspace: Go to the user menu and select **Your credentials > Add credentials**. + +2. Enter a **Name** for the new credentials. + +3. Select _GitHub_ as the **Provider**. + +4. Enter your **Username** and **Access token**. + +5. (Recommended) Enter the **Repository base URL** for which the credentials should be applied. This option is used to apply the provided credentials to a specific repository, e.g., `https://github.com/seqeralabs`. + +### GitLab + +GitLab supports [Personal](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html), [Group](https://docs.gitlab.com/ee/user/group/settings/group_access_tokens.html#group-access-tokens), and [Project](https://docs.gitlab.com/ee/user/project/settings/project_access_tokens.html) access tokens for authentication. Your access token must have the `api`, `read_api`, and `read_repository` scopes to work with Seqera. For all three token types, use the token value in both the **Password** and **Access token** fields in the Seqera credential creation form. + +After you have created and copied your access token, create a new credential in Seqera with these steps: + +**Create GitLab credentials** + +1. From an organization workspace: Select **Credentials > Add Credentials**. From your personal workspace: Go to the user menu and select **Your credentials > Add credentials**. + +2. Enter a **Name** for the new credentials. + +3. Select _GitLab_ as the **Provider**. + +4. Enter your **Username**. For Group and Project access tokens, the username can be any non-empty value. + +5. Enter your token value in both the **Password** and **Access token** fields. + +6. Enter the **Repository base URL** (recommended). This option is used to apply the credentials to a specific repository, e.g. `https://gitlab.com/seqeralabs`. + +### Gitea + +To connect to a private [Gitea](https://gitea.io/) repository, use your Gitea user credentials to create a new credential in Seqera with these steps: + +**Create Gitea credentials** + +1. From an organization workspace, go to the **Credentials** tab and select **Add Credentials**. From your personal workspace, select **Your credentials** from the user menu, then select **Add credentials**. + +2. Enter a **Name** for the new credentials. + +3. Select _Gitea_ as the **Provider**. + +4. Enter your **Username**. + +5. Enter your **Password**. + +6. Enter your **Repository base URL** (required). + +### Bitbucket + +To connect to a private BitBucket repository, see the [BitBucket documentation](https://support.atlassian.com/bitbucket-cloud/docs/app-passwords/) to learn how to create a BitBucket App password. Then, create a new credential in Seqera with these steps: + +**Create BitBucket credentials** + +1. From an organization workspace: Select **Credentials > Add Credentials**. From your personal workspace: Go to the user menu and select **Your credentials > Add credentials**. + +2. Enter a **Name** for the new credentials. + +3. Select _BitBucket_ as the **Provider**. + +4. Enter your **Username** and **Password**. + +5. Enter the **Repository base URL** (recommended). This option can be used to apply the credentials to a specific repository, e.g., `https://bitbucket.org/seqeralabs`. + +### AWS CodeCommit + +To connect to a private AWS CodeCommit repository, see the [AWS documentation](https://docs.aws.amazon.com/codecommit/latest/userguide/auth-and-access-control-iam-identity-based-access-control.html) to learn more about IAM permissions for CodeCommit. Then, use your IAM account access key and secret key to create a credential in Seqera with these steps: + +**Create AWS CodeCommit credentials** + +1. From an organization workspace: Select **Credentials > Add Credentials**. From your personal workspace: Go to the user menu and select **Your credentials > Add credentials**. + +2. Enter a **Name** for the new credentials. + +3. Select _CodeCommit_ as the **Provider**. + +4. Enter the **Access key** and **Secret key** of the AWS IAM account that will be used to access the target CodeCommit repository. + +5. Enter the **Repository base URL** for which the credentials should be applied (recommended). This option can be used to apply the credentials to a specific region, e.g., `https://git-codecommit.eu-west-1.amazonaws.com`. + +### Self-hosted Git + +Seqera Platform Enterprise supports Git server endpoints. For more information, see [Git configuration](../../version-24.2/enterprise/configuration/overview#git-integration) in the Enterprise installation guide. diff --git a/platform-enterprise/labels/overview.md b/platform-enterprise/labels/overview.md new file mode 100644 index 000000000..a6d87657b --- /dev/null +++ b/platform-enterprise/labels/overview.md @@ -0,0 +1,59 @@ +--- +title: "Labels" +description: "Instructions for using labels in Nextflow Tower." +date: "21 Apr 2023" +tags: [labels] +--- + +Labels are workspace-specific free-text annotations that can be applied to pipelines, actions, or workflow runs, either during or after creation. Use labels to organize your work and filter key information. + +Labels aren't propagated to Nextflow during workflow execution. + +### Limits + +:::caution +Label names must contain a minimum of 2 and a maximum of 39 alphanumeric characters, separated by dashes or underscores, and must be unique in each workspace. +::: + +- Label names cannot begin or end with dashes `-` or underscores `_`. +- Label names cannot contain a consecutive combination of `-` or `_` characters (`--`, `__`, `-_`, etc.) +- A maximum of 25 labels can be applied to each resource. +- A maximum of 1000 labels can be used in each workspace. + +### Create and apply labels + +Labels can be created, applied, and edited by a workspace owner, admin, or maintainer. When applying a label, users can select from existing labels or add new ones on the fly. + +### Labels applied to a pipeline + +:::caution +Labels are applied to elements in a workspace-specific context. This means that labels applied to a shared pipeline in `workspace A` will not be shown when viewing the pipeline from `workspace B`. +::: + +The labels applied to each pipeline are displayed in both list and card views on the **Launchpad**. Select a pipeline to view all applied labels. + +Apply a label when adding a new pipeline or editing an existing pipeline. + +If a label is applied to a pipeline, all workflow runs of that pipeline will inherit the label. If the labels applied to the pipeline are changed, this change will only be applied to future runs, not past runs. + +### Labels applied to an action + +Apply a label when adding a new action or editing an existing action. Labels applied to an action are displayed in the action card on the **Actions** screen. Hover over labels with **+** to see all labels. + +If a label is applied to an action, all workflow runs triggered by this action inherit the label. If the labels applied to the action are changed, this change will only be applied to future runs, not past runs. + +### Labels applied to a workflow run + +Labels applied to a workflow run are displayed on the **Runs** list screen and on the workflow run detail screen. Hover over labels with **+** to see all labels. Apply a label to a workflow run during launch, on the workflow runs list screen, or on the run detail screen. + +### Search and filter with labels + +You can search and filter pipelines and workflow runs using one or more labels — filter and search are complementary. + +### Overview of labels in a workspace + +All labels used in a workspace can be viewed, added, edited, and deleted by a workspace owner, admin, or maintainer in the workspace **Settings** tab. If a label is edited or deleted on this screen, the change is propagated to all items where the label was used. + +:::caution +You cannot undo editing or deleting a label. +::: diff --git a/platform-enterprise/launch/advanced.md b/platform-enterprise/launch/advanced.md new file mode 100644 index 000000000..8038c124d --- /dev/null +++ b/platform-enterprise/launch/advanced.md @@ -0,0 +1,130 @@ +--- +title: "Advanced options" +description: "Advanced guide to launching Nextflow pipelines in Seqera Platform" +date: "21 Apr 2023" +tags: [advanced, launch] +--- + +You can modify the configuration and execution of a pipeline with advanced launch options. + +### Nextflow config file + +Add settings to the Nextflow configuration file. This must follow the same syntax as the [Nextflow configuration file](https://www.nextflow.io/docs/latest/config.html#config-syntax). + +For example, modify the **manifest** section to give the pipeline a name and description that will show up in the Seqera monitoring section: + +```ini + manifest { + name = 'My_RNASeq_Pipeline' + description = 'Generates RNASeq results using a test profile' + } +``` + +#### Nextflow configuration order of priority + +When launching pipelines in Platform, Nextflow configuration values can be supplied from the `nextflow.config` file in the pipeline repository and the **Nextflow config file** field in the pipeline launch form. If different values of the same configuration parameter are defined, Nextflow parameters defined in the launch form **Nextflow config file** field override the same parameters in your `nextflow.config` file. + +Configuration values set in the **Global Nextflow config** field during compute environment creation are pre-filled in the **Nextflow config file** field during pipeline launch. These pre-filled values from the compute environment can be overridden manually during launch. + +| Priority | Nextflow configuration | +|----------|------------------------------------------------------| +| Highest | Pipeline launch form **Nextflow config file** field | +| | Compute environment **Global Nextflow config** field | +| Lowest | Pipeline repository `nextflow.config` file | + +For example, if: + +1. The `nextflow.config` file in your pipeline repository contains this manifest: + + ```ini title="Pipeline repository config file" + manifest { + name = 'A' + description = 'Pipeline description A' + } + ``` + +2. Your compute environment **Global Nextflow config** field contains this manifest: + + ```ini title="Compute environment Global Nextflow config field" + manifest { + name = 'B' + description = 'Pipeline description B' + } + ``` + +3. You specify this manifest in the **Nextflow config file** field on the pipeline launch form: + + ```ini title="Pipeline launch form Nextflow config file field" + manifest { + name = 'C' + description = 'Pipeline description C' + } + ``` + +The resolved configuration will contain the **Nextflow config file** field's manifest: + + ```ini title="Pipeline launch form Nextflow config file field" + manifest { + name = 'C' + description = 'Pipeline description C' + } + ``` + +### Seqera Cloud config file + +Configure per-pipeline Seqera reporting behavior. Settings specified here override the same settings in the `tower.yml` [configuration file](../../version-24.2/enterprise/configuration/overview) for this execution. Use the `reports` key to specify report paths, titles, and MIME types: + +```yml +reports: + reports/multiqc/index.html: + display: "MultiQC Reports" + mimeType: "text/html" +``` + +### Pre and post-run scripts + +Run custom code either before or after the execution of the Nextflow script. These fields allow you to enter shell commands. + +Pre-run scripts are executed in the nf-launch script prior to invoking Nextflow processes. Pre-run scripts are useful for: +- Specifying an alternate Nextflow version to use for the run: + + ```bash + nextflow self-update + export NXF_VER=24.10.0 + ``` + :::info + `nextflow self-update` is only required when updating a pre-24.10.0 version of Nextflow to version 24.10.0 or later. + ::: +- Executor setup, such as loading a private CA certificate. +- Troubleshooting. For example, add `sleep 3600` to your pre-run script to instruct Nextflow to wait 3600 seconds (60 minutes) before process execution after the nf-launcher container is started, to create a window in which to test connectivity and other issues before your Nextflow processes execute. + +Post-run scripts are executed after all Nextflow processes have completed. Post-run scripts are useful for triggering a third party service via API request. + +### Pull latest + +Instruct Nextflow to pull the latest pipeline version from the Git repository. This is equivalent to using the `-latest` flag. + +### Stub run + +Replace Nextflow process commands with command [stubs](https://www.nextflow.io/docs/latest/process.html#stub), where defined, before execution. + +### Main script + +Nextflow will attempt to run the script named `main.nf` in the project repository by default. You can configure a custom script filename in `manifest.mainScript` or you can provide the script filename in this field. + +:::note +If you specify a custom script filename, the root of the default branch in your pipeline repository must still contain blank `main.nf` and `nextflow.config` files. See [Nextflow configuration](../troubleshooting_and_faqs/nextflow) for more information on this known Nextflow behavior. +::: + +### Workflow entry name + +Nextflow DSL2 provides the ability to launch workflows with specific names. Enter the name of the workflow to be executed in this field. + +### Schema name + +Specify the name of a pipeline schema file in the workflow repository root folder to override the default `nextflow_schema.json`. + +### Head job CPUs and memory + +Specify the compute resources allocated to the Nextflow head job. These fields are only displayed for runs executing on [AWS Batch](../compute-envs/aws-batch) and [Azure Batch](../compute-envs/azure-batch) compute environments. + diff --git a/platform-enterprise/launch/cache-resume.mdx b/platform-enterprise/launch/cache-resume.mdx new file mode 100644 index 000000000..558b0d2ad --- /dev/null +++ b/platform-enterprise/launch/cache-resume.mdx @@ -0,0 +1,155 @@ +--- +title: "Nextflow cache and resume" +description: "Guide to Nextflow cache and resume in Seqera Platform" +date: "21 Apr 2023" +tags: [cache, launch, resume, relaunch] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +Nextflow maintains a [cache](https://www.nextflow.io/docs/latest/cache-and-resume.html) directory where it stores the intermediate results and metadata from workflow runs. Workflows executed in Seqera Platform use this caching mechanism to enable users to relaunch or resume failed or otherwise interrupted runs as needed. This eliminates the need to re-execute successfully completed tasks when a workflow is executed again due to task failures or other interruptions. + +## Cache directory + +Nextflow stores all task executions to the task cache automatically, whether or not the resume or relaunch option is used. This makes it possible to resume or relaunch runs later if needed. Platform HPC and local compute environments use the default Nextflow cache directory (`.nextflow/cache`) to store the task cache. Cloud compute environments use the [cloud cache](https://www.nextflow.io/docs/latest/cache-and-resume.html#cache-stores) mechanism to store the task cache in a sub-folder of the pipeline work directory. + +To override the default cloud cache location in cloud compute environments, specify an alternate directory with the [cache](https://www.nextflow.io/docs/latest/process.html#process-cache) directive in your Nextflow configuration file (either in the **Advanced options > Nextflow config file** field on the launch form, or in the `nextflow.config` file in your pipeline repository). + + + + +To customize the cache location used in your AWS Batch and Amazon EKS compute environments, specify an alternate cache directory in your Nextflow configuration: + +```groovy +cloudcache { + enabled = true + path = 's3://your-bucket/.cache' + } +``` + +The new cache directory must be accessible with the credentials associated with your compute environment. An alternate cloud storage location can be specified if you include the necessary credentials for that location in your Nextflow configuration. **This is not recommended for production environments**. + + + + +To customize the cache location used in your Azure Batch compute environments, specify an alternate cache directory in your Nextflow configuration: + +```groovy +cloudcache { + enabled = true + path = 'az://your-container/.cache' + } +``` + +The new cache directory must be accessible with the credentials associated with your compute environment. An alternate cloud storage location can be specified if you include the necessary credentials for that location in your Nextflow configuration. **This is not recommended for production environments**. + + + + + +To customize the cache location used in your Google Cloud Batch and Google Kubernetes Engine compute environments, specify an alternate cache directory in your Nextflow configuration: + +```groovy +cloudcache { + enabled = true + path = 'gs://your-bucket/.cache' + } +``` + +The new cache directory must be accessible with the credentials associated with your compute environment. An alternate cloud storage location can be specified if you include the necessary credentials for that location in your Nextflow configuration. **This is not recommended for production environments**. + + + + + +Kubernetes compute environments do not use cloud cache by default. To specify a cloud storage cache directory, include the cloud cache path and necessary credentials for that location in your Nextflow configuration. **This is not recommended for production environments**. + +
+ AWS S3 + + ```groovy + // Specify cloud storage credentials + aws { + accessKey = '' + secretKey = '' + region = '' + } + // Set the cloud cache path + cloudcache { + enabled = true + path = 's3://your-bucket/.cache' + } + ``` + +
+ +
+ Azure Blob Storage + + ```groovy + // Specify cloud storage credentials + azure { + storage { + accountName = '' + accountKey = '' + } + } + // Set the cloud cache path + cloudcache { + enabled = true + path = 'az://your-container/.cache' + } + ``` + +
+ +
+ Google Cloud Storage + + 1. See [these instructions](../compute-envs/google-cloud-batch#iam) to set up IAM and create a JSON key file for the custom service account with permissions to your Google Cloud storage account. + 2. If you run the [gcloud CLI authentication flow](https://nextflow.io/docs/edge/google.html#credentials) with `gcloud auth application-default login`, your Application Default Credentials are written to `$HOME/.config/gcloud/application_default_credentials.json` and picked up by Nextflow automatically. Otherwise, declare the `GOOGLE_APPLICATION_CREDENTIALS` environment variable explicitly with the local path to your service account credentials file created in the previous step. + 3. Add the following to the **Nextflow Config file** field when you [launch](../launch/launchpad#launch-form) your pipeline: + + ```groovy + // Specify cloud storage credentials + google { + location = '' + project = '' + batch.serviceAccountEmail = '' + } + // Set the cloud cache path + cloudcache { + enabled = true + path = 'gs://your-bucket/.cache' + } + ``` + +
+ +
+
+ +## Relaunch a workflow run + +An effective way to troubleshoot a workflow execution is to **Relaunch** it with different parameters. Select the **Runs** tab, open the options menu to the right of the run, and select **Relaunch**. You can edit parameters, such as **Pipeline to launch** and **Revision number** before launch. Select **Launch** to execute the run from scratch. + +:::note +The **Relaunch** option is only available for runs launched from the Seqera Platform interface. +::: + +## Resume a workflow run + +Seqera uses Nextflow's **resume** functionality to resume a workflow run with the same parameters, using the cached results of previously completed tasks and only executing failed and pending tasks. Select **Resume** from the options menu to the right of the run of your choice to launch a resumed run of the same workflow, with the option to edit some parameters before launch. Unlike a relaunch, you cannot edit the pipeline to launch or the work directory during a run resume. + +:::note +The **Resume** option is only available for runs launched from the Seqera Platform interface. +::: + +:::tip +For a detailed explanation of the Nextflow resume feature, see _Demystifying Nextflow resume_ ([Part 1](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html) and [Part 2](https://www.nextflow.io/blog/2019/troubleshooting-nextflow-resume.html)) in the Nextflow blog. +::: + +#### Change compute environment during run resume + +Users with appropriate permissions can change the compute environment when resuming a run. The new compute environment must have access to the original run work directory. This means that the new compute environment must have a work directory that matches the root path of the original pipeline work directory. For example, if the original pipeline work directory is `s3://foo/work/12345`, the new compute environment must have access to `s3://foo/work`. \ No newline at end of file diff --git a/platform-enterprise/launch/launchpad.md b/platform-enterprise/launch/launchpad.md new file mode 100644 index 000000000..ce98f508a --- /dev/null +++ b/platform-enterprise/launch/launchpad.md @@ -0,0 +1,155 @@ +--- +title: "Launch pipelines" +description: "Curate and launch workflows" +date: "21 Apr 2023" +tags: [launchpad] +--- + +View, configure, and launch pipelines from your workspace **Launchpad**. + +## Launchpad + +The **Launchpad** enables workspace users to launch pre-configured pipelines, add new pipelines, or perform a quick launch of unsaved pipelines. Use the **Sort by:** dropdown to sort pipelines, either by name or most-recently updated. + +:::note +A pipeline is a repository containing a Nextflow workflow, a compute environment, and pipeline parameters. +::: + +The list layout is the default **Launchpad** view. Use the toggle next to the **Search** field to switch between the list and tile views. Both views display the compute environment of each pipeline for easy reference. + +## Launch form + +:::note +In Platform Enterprise version 24.2, the stepped launch form described below is enabled for all user and organization workspaces by default. You can disable the new launch form in some or all organization workspaces with the `TOWER_STEPPED_LAUNCH_FORM_ALLOWED_WORKSPACES` [environment variable](../../version-24.2/enterprise/configuration/overview#core-features). + +Platform Cloud accounts use the new launch form in all user and organization workspaces. +::: + +The launch form is used to launch pipelines and to add pipelines to the **Launchpad**. Select **Launch** next to a saved pipeline in the list, or select **launch a run without configuration** to perform a quick launch of an unsaved pipeline. + +The launch form consists of [General config](#general-config), [Run parameters](#run-parameters), and [Advanced options](#advanced-options) sections to specify your run parameters before execution, and an execution summary. Use section headings or select the **Previous** and **Next** buttons at the bottom of the page to navigate between sections. + +For saved pipelines, **General config** and **Run parameters** fields are prefilled and can be edited before launch. + +### General config + +- **Pipeline to launch**: A Git repository name or URL. For saved pipelines, this is prefilled and cannot be edited. Private repositories require [access credentials](../credentials/overview). + :::note + Nextflow pipelines are Git repositories that can reside on any public or private Git-hosting platform. See [Git integration](../git/overview) in the Seqera docs and [Pipeline sharing](https://www.nextflow.io/docs/latest/sharing.html) in the Nextflow docs for more details. + ::: +- **Revision number**: A valid repository commit ID, tag, or branch name. For saved pipelines, this is prefilled and cannot be edited. +- **Config profiles**: One or more [configuration profile](https://www.nextflow.io/docs/latest/config.html#config-profiles) names to use for the execution. Config profiles must be defined in the `nextflow.config` file in the pipeline repository. +- **Workflow run name**: A unique identifier for the run, pre-filled with a random name. This can be customized. +- **Labels**: Assign new or existing [labels](../labels/overview) to the run. +- **Compute environment**: The [compute environment](../compute-envs/overview) where the run will be launched. +- **Work directory**: The cloud storage or file system path where pipeline scratch data is stored. Seqera will create a scratch sub-folder if only a cloud bucket location is specified. Use file system paths for local or HPC compute environments. + :::note + The credentials associated with the compute environment must have access to the work directory. + ::: + +#### Config profiles + +The dropdown of available config profiles is populated by inspecting the Nextflow configuration in the pipeline repository. A limited form of static analysis is used to detect profiles in the main configuration and included configurations that match any of the following patterns: + +- Includes with a static string: + ```groovy + includeConfig 'conf/profiles.config' + includeConfig 'http://...' + ``` + +- Includes with dynamic string that depends on parameters defined in the config: + ```groovy + includeConfig params.custom_config + includeConfig "${params.custom_config_base}/nfcore_custom.config" + ``` + +- Includes with a ternary expression: + ```groovy + includeConfig params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" + ``` + + :::note + Only the "true" branch is inspected. + ::: + +- Includes within a try-catch statement: + ```groovy + try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" + } catch (Exception e) { + // ... + } + ``` + +### Run parameters + +There are three ways to enter **Run parameters** prior to launch: + +- The **Input form view** displays form fields to enter text, select attributes from dropdowns, and browse input and output locations with [Data Explorer](../data/data-explorer). +- The **Config view** displays a raw schema that you can edit directly. Select JSON or YAML format from the **View as** dropdown. +- **Upload params file** allows you to upload a JSON or YAML file with run parameters. + +Seqera uses a `nextflow_schema.json` file in the root of the pipeline repository to dynamically create a form with the necessary pipeline parameters. Most pipelines contain at least input and output parameters: + +- **input** +Specify compatible input [datasets](../data/datasets) manually or from the dropdown menu. Select **Browse** to view the available datasets or browse for files in [Data Explorer](../data/data-explorer). The Data Explorer tab allows you to select input datasets that match your [pipeline schema](../pipeline-schema/overview) `mimetype` criteria (`text/csv` for CSV files, or `text/tsv` for TSV files). + +- **outdir** +Specify the output directory where run results will be saved manually, or select **Browse** to choose a cloud storage directory using [Data Explorer](../data/data-explorer). + +The remaining fields will vary for each pipeline, dependent on the parameters specified in the pipeline schema. + +### Advanced settings + +Enter [resource labels](../resource-labels/overview), [pipeline secrets](../secrets/overview), and [advanced options](../launch/advanced) before launch. + +#### Resource labels + +Use resource labels to tag the computing resources created during the workflow execution. While resource labels for the run are inherited from the compute environment and pipeline, admins can override them from the launch form. Applied resource label names must be unique. + +#### Pipeline secrets + +Secrets are used to store keys and tokens used by workflow tasks to interact with external systems. Enter the names of any stored user or workspace secrets required for the workflow execution. + +:::note +In AWS Batch compute environments, Seqera passes stored secrets to jobs as part of the Seqera-created job definition. Seqera secrets cannot be used in Nextflow processes that use a [custom job definition](https://www.nextflow.io/docs/latest/aws.html#custom-job-definition). +::: + +#### Advanced options + +See [Advanced options](../launch/advanced). + +After you have filled the necessary launch details, select **Launch**. The **Runs** tab shows your new run in a **submitted** status at the top of the list. Select the run name to navigate to the [**View Workflow Run**](../monitoring/overview) page and view the configuration, parameters, status of individual tasks, and run report. + +:::note +For more information on relaunch and resume, see [Nextflow cache and resume](./cache-resume). +::: + +## Add new pipeline + +From the **Launchpad**, select **Add pipeline** to add a new pipeline with pre-saved parameters to your workspace. The fields on the new pipeline form are similar to the pipeline launch form. + +See [Add pipelines](../getting-started/quickstart-demo/add-pipelines) for instructions to add pipelines to your workspace via [Seqera Pipelines](https://seqera.io/pipelines) or the Launchpad. + +:::note +Pipeline names must be unique per workspace. +::: +:::tip +To create your own customized Nextflow schema for your pipeline, see [Pipeline schema](../pipeline-schema/overview) and the `nf-core` workflows that have adopted this. [nf-core/eager](https://github.com/nf-core/eager/blob/master/nextflow_schema.json) and [nf-core/rnaseq](https://github.com/nf-core/rnaseq/blob/master/nextflow_schema.json) are good examples. +::: + +## Email notifications + +You can receive email notifications upon completion or failure of a workflow execution. + +Select **Your profile** from the user menu, then toggle **Send notification email on workflow completion** at the bottom of the page. + +## Edit pipeline + +Workspace maintainers can edit existing pipeline details. Select the options menu next to the pipeline in the **Launchpad** list, then select **Edit** to load the pipeline parameters form with pre-filled existing pipeline details to be edited. See [Add from the Launchpad](../getting-started/quickstart-demo/add-pipelines#add-from-the-launchpad) for more information on the pipeline parameters form fields. + +Select **Update** when you are ready to save the updated pipeline. + +:::note +Pipeline names must be unique per workspace. +::: diff --git a/platform-enterprise/limits/overview.md b/platform-enterprise/limits/overview.md new file mode 100644 index 000000000..61ccae8be --- /dev/null +++ b/platform-enterprise/limits/overview.md @@ -0,0 +1,45 @@ +--- +title: "Usage limits" +description: "An overview of Seqera Cloud usage limits" +date: "19 Feb 2025" +tags: [limits] +--- + +Seqera Platform elements and features have default limits per organization and workspace. + +### Organizations + +| Description | Basic | Cloud Pro + Enterprise | +| ----------------------- | ----- | ---------------------- | +| Members | 3 | 50, or per license | +| Workspaces | 50 | 50, or per license | +| Teams | 20 | 20, or per license | +| Run history | 250 | 250, or per license | +| Active runs | 3 | 100, or per license | +| Running Studio sessions | 1 | 1000, or per license | + +:::info +Academic institutions and commercial organizations evaluating Seqera Platform are subject to custom usage limits. [Contact us](https://seqera.io/contact-us/) for more information. +::: + +### Workspaces + +| Description | Basic | Cloud Pro + Enterprise | +| ------------ | ----- | ---------------------- | +| Participants | 3 | 50, or per license | +| Pipelines | 100 | 100, or per license | +| Datasets | 100 | 1000, or per license | +| Labels | 1000 | 1000, or per license | + +:::note +Some Enterprise instances on older licenses are limited to 100 labels per workspace. [Contact support](mailto:support@seqera.io) to upgrade your license. +::: + +### Datasets + +| Description | Default limit | +| -------------------- | ------------- | +| File size | 10 MB | +| Versions per dataset | 100 | + +If you need higher limits and capabilities, [contact us](https://seqera.io/contact-us/) to discuss your application requirements. diff --git a/platform-enterprise/monitoring/audit-logs.md b/platform-enterprise/monitoring/audit-logs.md new file mode 100644 index 000000000..b3ca3bd7f --- /dev/null +++ b/platform-enterprise/monitoring/audit-logs.md @@ -0,0 +1,33 @@ +--- +title: "Audit logs" +description: An overview of application event audit logs in the Admin panel +date: "08 Apr 2024" +tags: [logging, audit logs, admin panel] +--- + +Root users can view application event audit logs from the [Admin panel](../administration/overview) **Audit logs** tab. + +:::info +Application event audit logs are retained for 365 days by default. In Platform Enterprise, this retention period can be [customized](../../version-24.2/enterprise/configuration/overview#logging). +::: + +### Audit log event format + +Audit log entries record the following event details: + +- **Type**: A brief event description, such as `user_sign_in`, `credentials_created`, etc. +- **Target**: ID of the resource associated with the event, such as ID of created credentials, etc. +- **Principal**: ID of the user that performed the action. User IDs for user-initiated events, `system` for Seqera-initiated events. +- **Status**: Additional event information, such as workflow completion status, user sign-in method, etc. +- **Organization ID** +- **Organization name** +- **Workspace ID** +- **Workspace name** +- **Client IP**: IP address of user/client initiating the event. Empty for Seqera-initiated events. +- **Creation date**: Event timestamp in `YYYY-MM-DD-HH-MM-SS` format. + +### Audit log events + +Audit logs include administration, security, and application resource events. + +::table{file=configtables/log_events.yml} diff --git a/platform-enterprise/monitoring/cloud-costs.md b/platform-enterprise/monitoring/cloud-costs.md new file mode 100644 index 000000000..5c93f6dbe --- /dev/null +++ b/platform-enterprise/monitoring/cloud-costs.md @@ -0,0 +1,72 @@ +--- +title: "Monitoring cloud costs" +description: Guidelines for monitoring Seqera Platform cloud expenditure +date: "12 Apr 2023" +tags: [aws, gcp, azure, cloud costs, cost, billing, alerts] +--- + +Monitor cloud costs to manage resources effectively and prevent unexpected expenses when running pipelines in Seqera Platform. + +## Resource labels + +Use [Resource labels](../resource-labels/overview) in your compute environments to annotate and track the actual cloud resources consumed by a pipeline run. Resource labels are applied to the resources spawned during a run and sent to your cloud provider in `key=value` format. + +## Seqera cost estimate + +Run details include an **Estimated cost** display. This is the total estimated compute cost of all tasks in the pipeline run. + +The Seqera cost estimator should only be used for at-a-glance heuristic purposes. For accounting and legal cost reporting, use resource labels and leverage your compute platform's native cost reporting tools. + +The compute cost of a task is computed as follows: + +$$ +\text{Task cost} = \text{VM hourly rate} \times \text{VM fraction} \times \text{Task runtime} +$$ + +$$ +\quad \text{VM fraction} = \text{max} ( \frac{\text{Task CPUs}}{\text{VM CPUs}}, \frac{\text{Task memory}}{\text{VM memory}} ) +$$ + +$$ +\quad \text{Task runtime} = ( \text{Task complete} - \text{Task start} ) +$$ + +See also: **cost**, **start**, **complete**, **cpus**, and **memory** in the task table. + +Seqera uses a database of prices for AWS, Azure, and Google Cloud, across all instance types, regions, and zones, to fetch the VM price for each task. This database is updated periodically to reflect the most recent prices. + +:::note +Prior to version 22.4.x, the cost estimate used `realtime` instead of `complete` and `start` to measure the task runtime. The `realtime` metric tends to underestimate the billable runtime because it doesn't include the time required to stage input and output files. +::: + +The estimated cost is subject to several limitations: + +- It doesn't account for the cost of storage, network, the head job, or how tasks are mapped to VMs. As a result, it tends to underestimate the true cost of a pipeline run. + +- On a resumed pipeline run, the cost of cached tasks is included in the estimated cost. This estimate is an aggregation of all compute costs associated with the run. As a result, the total cost of multiple attempts of a pipeline run tends to overestimate the actual cost, because the cost of cached tasks may be counted multiple times. + +For accurate cost accounting, you should use the cost reporting tools for your cloud provider. + +## Cloud provider cost monitoring and alerts + +AWS, Google Cloud, and Microsoft Azure provide cost alerting and budgeting tools to enable effective cloud resource management and prevent unexpected costs. + +### AWS + +- **Budgets**: [AWS Budgets](https://docs.aws.amazon.com/cost-management/latest/userguide/budgets-managing-costs.html) lets you set custom cost and usage budgets with alerts when costs or usage exceed pre-defined thresholds. Set up notifications via email or SNS (Simple Notification Service) to receive alerts when budget thresholds are reached. + +- **Cost Explorer**: [AWS Cost Explorer](https://docs.aws.amazon.com/cost-management/latest/userguide/ce-what-is.html) provides cost management tools to visualize, understand, and manage your AWS costs and usage over time. + +- **Cost Anomaly Detection**: [AWS Cost Anomaly Detection](https://docs.aws.amazon.com/cost-management/latest/userguide/getting-started-ad.html) uses machine learning models to detect and alert on anomalous spend patterns in your deployed AWS services. + +### Google Cloud + +- **Budgets and budget alerts**: [Budgets](https://cloud.google.com/billing/docs/how-to/budgets) allow you to set budget thresholds for your GCP projects. When costs exceed these thresholds, you can receive alerts via email, SMS, or notifications in the Google Cloud Console. + +- **Cost management tools**: [Cloud Billing](https://cloud.google.com/billing/docs/onboarding-checklist) provides cost management tools such as billing reports and spend visualization to help you analyze and understand your GCP costs. + +### Microsoft Azure + +- **Cost Management**: [Microsoft Cost Management](https://learn.microsoft.com/en-us/azure/cost-management-billing/costs/overview-cost-management) is a suite of FinOps tools that help organizations analyze, monitor, and optimize their Microsoft Cloud costs. + +- **Cost alerts**: Create [alerts](https://learn.microsoft.com/en-us/azure/cost-management-billing/costs/overview-cost-management#monitor-costs-with-alerts) for usage anomalies and costs that exceed pre-defined thresholds. diff --git a/platform-enterprise/monitoring/configtables/log_events.yml b/platform-enterprise/monitoring/configtables/log_events.yml new file mode 100644 index 000000000..c496bbe00 --- /dev/null +++ b/platform-enterprise/monitoring/configtables/log_events.yml @@ -0,0 +1,69 @@ +--- +- + Resource: 'Access tokens' + Events logged: 'Add, delete' + Note: 'Log entry includes the access token ID.' +- + Resource: 'Compute environments' + Events logged: 'Add, edit, delete' + Note: 'Log entry includes the compute environment ID. Edit event entries do not include the edited parameters.' +- + Resource: 'Credentials' + Events logged: 'Add, edit, delete, access' + Note: 'Log entry includes the credential ID. A log entry is also created each time the credentials are accessed by the application.' +- + Resource: 'Data Explorer cloud buckets' + Events logged: 'Add, edit, remove, hide, show' + Note: 'Events for public and private buckets are logged.' +- + Resource: 'Studios sessions' + Events logged: 'Add, start, connect, disconnect, stop, delete' + Note: 'Does not include temporary states (starting, stopping, deleting).' +- + Resource: 'Studios custom environments' + Events logged: 'Build start, build success, build fail' + Note: 'Applies only to Wave-enabled custom environment creation.' +- + Resource: 'Data Explorer files' + Events logged: 'Download, upload, preview' + Note: 'Events for public and private bucket files are logged.' +- + Resource: 'Organizations' + Events logged: 'Add, delete' + Note: 'Log entry includes the organization ID.' +- + Resource: 'Participants' + Events logged: 'Add, update role, delete' + Note: 'Log entry includes the participant ID.' +- + Resource: 'Pipelines' + Events logged: 'Add, edit, delete' + Note: 'Log entry includes the pipeline ID. Edit event entries do not include the edited parameters.' +- + Resource: 'Pipeline actions' + Events logged: 'Add, delete' + Note: 'Log entry includes the pipeline action ID.' +- + Resource: 'Pipeline secrets' + Events logged: 'Add, edit, delete' + Note: 'Log entry includes the pipeline secret ID.' +- + Resource: 'Runs' + Events logged: 'Launch, create, relaunch, resume, status change, complete, delete, drop' + Note: 'Relaunched/resumed runs are stored with the initial launch run ID. Deleted runs are marked for deletion before being dropped from the runs database by the application backend (constituting two events).' +- + Resource: 'Teams' + Events logged: 'Add, delete' + Note: 'Log entry includes the team ID.' +- + Resource: 'Users' + Events logged: 'Add, edit, delete' + Note: 'Log entry includes the user ID. See [user deletion](https://docs.seqera.io/platform/24.1/data-privacy/overview#user-deletion).' +- + Resource: 'User sessions' + Events logged: 'Login' + Note: 'Login event entries include the login IP address.' +- + Resource: 'Workspaces' + Events logged: 'Add, delete' + Note: 'Log entry includes the workspace ID.' diff --git a/platform-enterprise/monitoring/dashboard.md b/platform-enterprise/monitoring/dashboard.md new file mode 100644 index 000000000..9f9b192b3 --- /dev/null +++ b/platform-enterprise/monitoring/dashboard.md @@ -0,0 +1,56 @@ +--- +title: "Dashboard" +description: "View pipeline run status overview in Seqera Platform." +date: "21 Apr 2023" +tags: [dashboard, pipeline runs, monitoring] +--- + +The Seqera Platform **Dashboard** is accessed from the user menu and provides an overview of: + +- Pipeline runs in your personal and organization workspaces. +- Studio sessions in your organization workspaces only. + +## Pipelines + +You can explore the status of pipelines in your personal and in organizational workspaces. On the **Dashboard** page, select **Pipelines**. + +### Filters and summary + +The **Dashboard** view defaults to all organizations and workspaces you can access. Select the **View** dropdown menu to filter by specific organizations and workspaces, or to view statistics for your personal workspace only. You can filter by time, including a custom date range of up to 12 months. To filter the set of pipelines, select **Filter**. When a filter is applied, the button icon and color changes. + +### Export data + +Select **Export data** in the filter panel near the top of the page to export dashboard data, based on the filters you have applied, in a CSV file. + +### Pipelines per organization + +The pipeline totals for your selected filters are displayed for each organization that you have access to. Depending on the filter selected, each card details a separate workspace or organization. Total pipelines for each organization are arranged by workspace and status. + +For a detailed view, you can do one of the following: + +- Select a pipeline integer value in the table to navigate to a list filtered by the status and time range selected. +- Select a workspace name in the table to navigate to a list filtered by the workspace selected. + +## Studios + +You can explore the status of Studio sessions in your organizational workspaces. On the **Dashboard** page, select **Studios**. The following statuses are listed with the number of Studio sessions in each status: + +- `Building` +- `Build-failed` +- `Starting` +- `Running` +- `Stopping` +- `Stopped` +- `Errored` + +### Filters and summary + +The **Dashboard** view defaults to all organizations and workspaces you can access. Select the **View** dropdown menu to filter by organizations and workspaces. Select a status in the table to navigate to a list filtered by the status selected. + +### Export data + +Select **Export data** in the view panel near the top of the page to export a CSV of the dashboard data for the selected organizations and workspaces. + + + +[ds]: ../studios/index diff --git a/platform-enterprise/monitoring/overview.md b/platform-enterprise/monitoring/overview.md new file mode 100644 index 000000000..8a35cf2d3 --- /dev/null +++ b/platform-enterprise/monitoring/overview.md @@ -0,0 +1,96 @@ +--- +title: "Overview" +description: "Monitoring pipeline runs in Seqera Platform." +date: "11 Apr 2024" +tags: [runs, monitoring] +--- + +Workflow executions submitted in Seqera Platform can be monitored wherever you have an internet connection. + +The **Runs** tab contains all previous runs in the workspace. Each new or resumed run is given a random name such as _grave_williams_. Each row corresponds to a specific run. As a run executes, it can transition through the following states: + +- `submitted`: Pending execution +- `running`: Running +- `succeeded`: Completed successfully +- `failed`: Successfully executed, where at least one task failed with a `terminate` [error strategy](https://www.nextflow.io/docs/latest/process.html#errorstrategy) +- `cancelled`: Stopped manually during execution +- `unknown`: Indeterminate status + +Select the name of a run from the list to display that run's [execution details](./run-details). + +## Save run as pipeline + +_Available from version 23.1_ + +From the **Runs** list, any run can be saved as a new pipeline for future use, regardless of run status. Select the item menu next to any run in the list, then select **Save as pipeline**. In the dialog box shown, you can edit the pipeline name, add labels, and **Save**. + +You can **Review and edit** any run details prior to saving the pipeline. After you've saved the pipeline, it is listed on the **Launchpad** and can be run from the same workspace where it was created. + +## All runs view + +The **All runs** page, accessed from the user menu, provides a comprehensive overview of the runs accessible to a user across the entire Seqera instance. This facilitates overall status monitoring and early detection of execution issues from a single view, split across organizations and workspaces. + +The **All runs** view defaults to all organizations and workspaces you can access. Select the dropdown next to **View** to filter by specific organizations and workspaces, or to view runs from your personal workspace only. + +### Search + +The **Search workflow** bar allows you to filter by one or more `:` entries: + +- `status` +- `label` +- `workflowId` +- `runName` +- `username` +- `projectName` +- `after`: YYYY-MM-DD +- `before`: YYYY-MM-DD +- `sessionId` +- `is:starred` + +The search field populates with available suggestions when entering valid keywords. Suggested results for `label:` include available labels from all workspaces. Labels present in multiple workspaces are only suggested once. + +Search covers all workflow runs inside a workspace, enabling easy retrieval of complex queries. Enter a search query in the **Search workflow** field to search and filter the runs in a workspace. The search text is interpreted by identifying all valid substrings formatted by `keyword:value`, combining all the rest in a single freeform text string, and then using all these search criteria to filter the runs. + +For example: + +`rnaseq username:john_doe status:succeeded after:2024-01-01` + +will retrieve all runs from the workspace that meet the following criteria: + +- Ended successfully (`status:succeeded`) +- Launched by user john_doe (`username:john_doe`) +- Include `rnaseq` in the data fields covered by the free text search +- Submitted after January 1, 2024 + +The freetext search uses a _partial_ match to find runs, so it will search for `*freetext*`. The `keyword:value` item uses an _exact_ match to filter runs, so `username:john` will not retrieve runs launched by `john_doe`. + +:::caution +Filtering elements are combined with **AND** logic. This means that queries like `status:succeeded, status:submitted` are formally valid but return an empty list because a workflow can only have one status. + +The freeform text result of all the `keyword:value` pairs is merged into a unique string that includes spaces. This may result in an empty list of results if the search query contains typos. +::: + +:::note +Keywords corresponding to dates (`after` or `before`) are automatically converted to valid ISO-8601, taking your timezone into account. Partial dates are also supported: `before:2022-5` is automatically converted to `before:2022-05-01T00:00:00.000Z`. +::: + +Seqera will suggest matching keywords while you type. Valid values are also suggested for some keywords, when supported. + +### Search keywords + +- **Freeform text** + + The search box allows you to search for partial matches with `project name`, `run name`, `session id`, or `manifest name`. Use wildcards (`*`) before or after keywords to filter results. + +- **Exact match keywords** + + - `workflowId:3b7ToXeH9GvESr`: Search workflows with a specific workflow ID. + - `runName:happy_einstein`: Search workflows with a specific run name. + - `sessionId:85d35eae-21ea-4294-bc92-xxxxxxxxxxxx`: Search workflows with a specific session ID. + - `projectName:nextflow-io/hello`: Search workflows with a specific project name. + - `userName:john_doe`: Search workflows by a specific user. + - `status:succeeded`: Search workflows with a specific status (`submitted`, `running`, `succeeded`, `failed`, `cancelled`, `unknown`). + - `before:2024-01-01`: Search workflows submitted on or before the given date in YYYY-MM-DD format. + - `after:2024-01-01`: Search workflows submitted on or after the given date in YYYY-MM-DD format. + - `label:label1 label:label2`: Search workflows with specific labels. + - `is:starred`: Search workflows that have been starred by the user. diff --git a/platform-enterprise/monitoring/run-details.md b/platform-enterprise/monitoring/run-details.md new file mode 100644 index 000000000..3d547a564 --- /dev/null +++ b/platform-enterprise/monitoring/run-details.md @@ -0,0 +1,167 @@ +--- +title: "Run details" +description: "Monitoring a Nextflow pipeline executed through Seqera Platform." +date: "21 Apr 2023" +tags: [logging, monitoring, execution] +--- + +Select a workflow run from the **Runs** list to view execution details. This view contains: + +- [Run information](#run-information) with real-time Nextflow execution details +- [General summary](#general-summary) and [task status](#task-status) +- List of pipeline [processes](#processes) +- [Aggregated stats](#aggregate-stats), [load](#load), and [utilization](#utilization) +- [Tasks](#tasks) and [metrics](#metrics) + +### Run information + +This section contains details about the Nextflow execution: + +- The Nextflow **Command line** executed. +- The pipeline **Parameters** (taken from the configuration `params` scope). +- The **Configuration** files and the final resolved configuration. +- The **Execution log** from the main Nextflow process, updated in real-time. +- Available **Reports**, if any are [configured](../reports/overview). + +### General summary + +The **General** panel displays top-level information about a pipeline run: + +- Unique workflow run ID +- Run name +- Timestamp of run start +- Project revision and Git commit ID +- Nextflow session ID +- Username of the launcher +- Work directory path + + :::tip + If your work directory resides in cloud storage, select the work directory path in the **General** panel to browse its contents in [Data Explorer](../data/data-explorer). + ::: + +- Container image +- Executor +- Compute environment name +- Resource labels +- Nextflow version + +Hover over each item with the cursor to view a description. Hover over the compute environment name for more compute environment details. + +### Task status + +The **Status** panel provides a real-time status of all tasks in the pipeline run: + +- **pending**: The task has been created, but not yet submitted to an executor +- **submitted**: The task has been submitted to an executor, but is not yet running +- **running**: The task has been launched by an executor (the precise definition of "running" may vary for each executor) +- **cached**: A previous (and valid) execution of the task was found and used instead of executing the task again (See [Cache and resume](../launch/cache-resume)) +- **completed**: The task completed successfully +- **failed**: The task failed + +### Processes + +The **Processes** panel displays the status of each process in a pipeline run. In Nextflow, a process is an individual step in a pipeline, while a task is a particular invocation of a process for given input data. In the panel, each process is shown with a progress bar indicating how many tasks have been completed for that process. + +The progress bar is color-coded based on task status (**created**, **submitted**, **completed**, **failed**). + +Select a process to navigate to the [Tasks](#tasks) panel and filter the table contents by the selected process. + +### Aggregate stats + +The **Aggregate stats** panel displays a real-time summary of the resources used by a pipeline run. + +#### Wall time + +The _wall time_ is the duration of the entire workflow run, from submission to completion. + +#### CPU time + +The _CPU time_ is the total CPU time used by all tasks. It is based on the CPUs _requested_, not the actual CPU usage. The CPU time of an individual task is computed as follows: + +$$ +\text{CPU time (CPU-hours)} = \text{Task CPUs} \times \text{Task runtime} +$$ + +The runtime of an individual task is computed as follows: + +$$ +\text{Task runtime} = \text{Task complete} - \text{Task start} +$$ + +See also: **cpus**, **start**, and **complete** in the task table. + +#### Total memory + +The _total memory_ is the total memory used by all tasks. It is based on the memory _requested_, not the actual memory usage. + +See also: **peakRss** in the task table. + +#### Read and write + +The _read_ and _write_ are the total amount of data read from and written to storage. + +See also: **readBytes** and **writeBytes** in the task table. + +#### Estimated cost + +See [Cloud costs](../monitoring/cloud-costs#seqera-cost-estimate). + +### Load + +The **Load** panel displays the current number of running tasks and CPU cores vs the maximum number of tasks and CPU cores for the entire pipeline run. + +These metrics measure the level of parallelism achieved by the pipeline. Use these metrics to determine whether your pipeline runs are fully utilizing the capacity of your compute environment. + +### Utilization + +The **Utilization** panel displays the average resource utilization of all tasks that have completed successfully in a pipeline run. The CPU and memory efficiency of a task are computed as follows: + +$$ +\text{CPU efficiency (\%)} = \text{CPU usage (\%)} \times \text{Task CPUs} +$$ + +$$ +\text{Memory efficiency (\%)} = \frac{ \text{Peak memory usage} }{ \text{Task memory} } \times \text{100 \%} +$$ + +See also: **pcpu**, **cpus**, **peakRss**, and **memory** in the task table. + +These metrics measure how efficiently the pipeline is using its compute resources. Low utilization indicates that the pipeline may be over-requesting resources for some tasks. + +### Tasks + +The **Tasks** panel shows all the tasks that were executed in a pipeline run. + +#### Search and filter tasks + +Use the search bar to filter tasks with substrings in the table columns such as **process**, **tag**, **hash**, and **status**. For example, if you enter `succeeded` in the **Search task** field, the table displays only tasks that succeeded. + +#### Task details + +Select a task in the task table to open the **Task details** dialog. The dialog has three tabs: + +- **About** + - **Name**: Process name and tag + - **Command**: Task script, defined in the pipeline process + - **Status**: Exit code, task status, attempts + - **Work directory**: Directory where the task was executed + - **Environment**: Environment variables supplied to the task + - **Execution time**: Metrics for task submission, start, and completion time + - **Resources requested**: Metrics for the resources requested by the task + - **Resources used**: Metrics for the resources used by the task + +- **Execution log** + + The **Execution log** tab provides a real-time log of the selected task's execution. Task execution and other logs (such as `stdout` and `stderr`) are available for download from here, if still available in your compute environment. + +- **Data Explorer** + + If the pipeline work directory is in cloud storage, this tab shows a [Data Explorer](../data/data-explorer) view of the task's work directory location with the files associated with the task. + +### Metrics + +The **Metrics** panel displays interactive plots for CPU usage, memory usage, task duration, and I/O usage, grouped by process. These metrics include succeeded and failed tasks. Use these plots to quickly inspect a pipeline run to determine the resources requested and consumed by each process. + +:::tip +Hover the cursor over each box plot to show more details. +::: \ No newline at end of file diff --git a/platform-enterprise/orgs-and-teams/organizations.md b/platform-enterprise/orgs-and-teams/organizations.md new file mode 100644 index 000000000..5eabc5861 --- /dev/null +++ b/platform-enterprise/orgs-and-teams/organizations.md @@ -0,0 +1,92 @@ +--- +title: "Organizations" +description: "Manage organizations in Seqera Platform." +date: "21 Apr 2023" +tags: [organizations, administration] +--- + +Organizations are the top-level structure and contain workspaces, members, and teams. You can create multiple organizations, each of which can contain multiple workspaces with shared users and resources. This means you can customize and organize the use of resources while maintaining an access control layer for users associated with a workspace. + +Organization owners can add or remove members from an organization or workspace, and can allocate specific access roles within workspaces. Teams provide a way to group users and participants together, such as `workflow-developers` or `analysts`, and apply access control for all users within that team. + +You can also add external collaborators to an organization. + +### Create an organization + +1. From the user menu, select [Your organizations](https://cloud.seqera.io/orgs), then **Add Organization**. +2. Enter a **Name** and **Full name** for your organization. +3. Enter any other optional fields as needed: **Description**, **Location**, **Website URL**, and **Logo**. +4. Select **Add**. + +### Edit an organization + +:::note +From version 23.2, **organization owners** can edit their organization name, either from the organizations page or the [Admin panel](../administration/overview). +::: + +As an **organization owner**, access the organization page from the organizations and workspaces dropdown, or open the user menu and select **Your organizations** to view and edit your organizations. As a root user, you can also edit organizations from the [Admin panel](../administration/overview). + +Open the **Settings** tab on the organization page, and select **Edit** in the **Edit Organization** row. Update the settings and select **Update** to save. + +### Organization resource usage tracking + +Select **Usage overview** next to the organization and workspace selector dropdown to view a window with the following usage details: + +- **Run history**: The total number of pipeline runs. +- **Concurrent runs**: Total simultaneous pipeline runs. +- **Running Studio sessions**: Number of concurrent running Studio sessions. +- **Users**: Total users per organization. + +Organization resource usage information is also displayed on the organization's **Settings** tab, under **Usage**. + +Select **Contact us to upgrade** if you need to increase your Platform usage limits for your organization. + +:::info +Usage limits differ per organization and [subscription type](https://seqera.io/pricing/). [Contact us](https://seqera.io/contact-us/) to discuss your needs. +::: + +## Members + +You can view the list of all organization **Members** from the organization's page. Once an organization is created, the user who created the organization is the default owner of that organization. You can invite or add additional members to the workspace from the workspace page or the [Admin panel](../administration/overview). + +Seqera provides access control for members of an organization by classifying them either as an **Owner** or a **Member**. Each organization can have multiple owners and members. + +### Add a member + +To add a new member to an organization: + +1. Go to the **Members** tab of the organization menu. +2. Select **Add member**. +3. Enter the name or email address of the user you'd like to add to the organization. + +An email invitation will be sent to the user. Once they accept the invitation, they can switch to the organization (or organization workspace) from the workspace dropdown. + +:::note +For information about what happens when a user deletes their account, see [user deletion](../data-privacy/overview#user-deletion). +::: + +## Teams + +**Teams** allow organization **owners** to group members and collaborators together into a single unit and to manage them as a whole. + +### Create a new team + +To create a new team within an organization: + +1. Go to the **Teams** tab of the organization menu. +2. Select **Add Team**. +3. Enter the **Name** of team. +4. Optionally, add the **Description** and the team's **Avatar**. +5. Select **Add**. + +To start adding members to your team, select **Edit > Members of team > Add member** and enter the name or email address of the organization members or collaborators. + +## Collaborators + +**Collaborators** are users who are invited to an organization's workspace, but are not members of that organization. As a result, their access is limited to that organization workspace. You can view the list of all organization **Collaborators** from the organization's page. + +New collaborators to an organization's workspace can be added as **Participants** from the workspace page. See [User roles](./roles) to learn more about participant access levels. + +:::note +**Collaborators** can only be added from a workspace. For more information, see [workspace management](./workspace-management#create-a-new-workspace). +::: diff --git a/platform-enterprise/orgs-and-teams/roles.md b/platform-enterprise/orgs-and-teams/roles.md new file mode 100644 index 000000000..11c1c9090 --- /dev/null +++ b/platform-enterprise/orgs-and-teams/roles.md @@ -0,0 +1,77 @@ +--- +title: "User roles" +description: "Understand the various roles in Seqera Platform." +date: "10 Jun 2024" +tags: [roles, user-roles] +--- + +Organization owners can assign role-based access levels to individual **participants** and **teams** in an organization workspace. + +:::tip +You can group **members** and **collaborators** into **teams** and apply a role to that team. Members and collaborators inherit the access role of the team. +::: + +### Organization user roles + +- **Owner**: After an organization is created, the user who created the organization is the default owner of that organization. Aditional users can be assigned as organization owners. Owners have full read/write access to modify members, teams, collaborators, and settings within an organization. +- **Member**: A member is a user who is internal to the organization. Members have an organization role and can operate in one or more organization workspaces. In each workspace, members have a participant role that defines the permissions granted to them within that workspace. + +### Workspace participant roles + +| Permission / Role | Owner | Admin | Maintain | Launch | Connect | View | +|--------------------------------------------|:-------:|:-------:|:----------:|:--------:|:---------:|:------:| +| **Organization: Settings:** Add, edit, delete | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Organization: Workspaces:** Add, delete | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Organization: Workspaces:** Edit, change visibility | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | +| **Organization: Members:** Add, delete, change role | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Organization: Teams:** Add, edit, delete | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Organization: Teams: Members:** Add, remove | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Organization: Teams: Workspaces:** Add, remove, change role | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Organization: Collaborators:** Add, edit, delete | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | +| **Organization: Managed identities:** Add, delete | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Organization: Managed identities:** Edit | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | +| **Organization: Managed identities: Users:** Manage credentials | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Workspace: Settings: Studios:** Edit session lifespan | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | +| **Workspace: Settings: Labels & Resource Labels:** Add, edit, delete | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Compute environments:** Add, rename, make primary, duplicate, delete | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | +| **Workspace: Actions:** Add, edit, delete | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Credentials:** Add, edit, delete | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Secrets:** Add, edit, delete | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Participants:** Add, remove, change role | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Pipelines:** Launch | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | +| **Workspace: Pipelines:** View | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Workspace: Pipelines:** Define input/output parameters | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | +| **Workspace: Pipelines:** Modify execution configurations | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Pipelines:** Add, edit, duplicate, delete | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | +| **Workspace: Pipelines:** Modify resource labels | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Pipelines:** Create, modify, delete | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | +| **Workspace: Pipelines: Run:** Apply labels, relaunch, save as new pipeline | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Pipelines: Run:** Resume, delete, star (favourite) | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | +| **Workspace: Pipelines:** Modify resource labels | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Datasets:** Add, edit | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | +| **Workspace: Datasets:** Delete | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Data Explorer:** Upload, download, preview data | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Data Explorer:** Attach, edit, remove buckets | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Data Explorer:** Hide/unhide buckets | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Data Explorer:** Edit bucket metadata | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Studios:** Add, edit, delete a studio | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Studios:** List/search/view studios | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Workspace: Studios:** Connect to a running session | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | +| **Workspace: Studios:** Add, edit, delete studio | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Studios:** Edit studio resource labels | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Studios:** Start, stop studio session | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Studios:** Add as new (duplicate studio) | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace: Studios: Checkpoints:** Edit studio checkpoint name | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| **Workspace:** View (read-only) resources | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +### Role inheritance + +If a user is concurrently assigned to a workspace as both a named **participant** and member of a **team**, Seqera assigns the higher of the two privilege sets. + +Example: + +- If the participant role is Launch and the team role is Admin, the user will have Admin rights. +- If the participant role is Admin and the team role is Launch, the user will have Admin rights. +- If the participant role is Launch and the team role is Launch, the user will have Launch rights. + +As a best practice, use teams as the primary vehicle for assigning rights within a workspace and only add named participants when one-off privilege escalations are deemed necessary. diff --git a/platform-enterprise/orgs-and-teams/workspace-management.md b/platform-enterprise/orgs-and-teams/workspace-management.md new file mode 100644 index 000000000..dcb9e1b9d --- /dev/null +++ b/platform-enterprise/orgs-and-teams/workspace-management.md @@ -0,0 +1,91 @@ +--- +title: "Workspaces" +description: "Manage users and teams for an organization in Seqera Platform." +date: "24 Apr 2023" +tags: [workspaces, teams, users, administration] +--- + +Each user has a unique **user workspace** to manage resources such as pipelines, compute environments, and credentials. You can also create multiple workspaces within an organization context and associate each of these workspaces with dedicated teams of users, while providing fine-grained access control for each of the teams. + +**Organization workspaces** extend the functionality of user workspaces by adding the ability to fine-tune access levels for specific members, collaborators, or teams. This is achieved by managing **participants** in the organization workspaces. + +Organizations consist of members, while workspaces consist of participants. + +:::note +A workspace participant may be a member of the workspace organization or a collaborator within that workspace only. Collaborators count toward the total number of workspace participants. See [Usage limits](../limits/overview). +::: + +## Create a new workspace + +Organization owners and admins can create a new workspace within an organization: + +1. Go to the **Workspaces** tab of the organization page. +2. Select **Add Workspace**. +3. Enter the **Name** and **Full name** for the workspace. +4. Optionally, add a **Description** for the workspace. +5. Under **Visibility**, select either **Private** or **Shared**. Private visibility means that workspace pipelines are only accessible to workspace participants. +6. Select **Add**. + +:::tip +As a workspace owner, you can modify optional workspace fields after workspace creation. You can either select **Edit** on an organization's workspaces list or the **Settings** tab within the workspace page. +::: + +Apart from the **Participants** tab, the _organization_ workspace is similar to the _user_ workspace. As such, the relation to [runs](../launch/launchpad), [actions](../pipeline-actions/overview), [compute environments](../compute-envs/overview), and [credentials](../credentials/overview) is the same. + +## Edit a workspace + +:::note +From version 23.2, **workspace owners** can edit their workspace name, either from the workspace settings tab or the [Admin panel](../administration/overview). +::: + +Open the **Settings** tab on the workspace page and select **Edit Workspace**. Make your updates and select **Update** to save changes. + +## Add a new participant + +A new workspace participant can be an existing organization member, team, or collaborator. To add a new participant to a workspace: + +1. Go to the **Participants** tab in the workspace menu. +2. Select **Add participant**. +3. Enter the **Name** of the new participant. +4. Optionally, update the participant **role**. + +## Workspace run monitoring + +To allow users executing pipelines from the command line to share their runs with a given workspace, see [deployment options](../getting-started/deployment-options#nextflow--with-tower). + +Seqera Platform introduces the concept of shared workspaces as a solution for synchronization and resource sharing within an organization. A shared workspace enables the creation of pipelines in a centralized location, making them accessible to all members of an organization. + +The benefits of using a shared workspace within an organization include: + +- **Define once and share everywhere**: Set up shared resources once and automatically share them across the organization. +- **Centralize the management of key resources**: Organization administrators can ensure the correct pipeline configuration is used in all areas of an organization without the need to replicate pipelines across multiple workspaces. +- **Immediate update adoption**: Updated parameters for a shared pipeline become immediately available across the entire organization, reducing the risk of pipeline discrepancies. +- **Computational resource provision**: Pipelines in shared workflows can be shared along with the required computational resources. This eliminates the need to duplicate resource setup in individual workspaces across the organization. Shared workspaces centralize and simplify resource sharing within an organization. + +### Create a shared workspace + +Creating a shared workspace is similar to the creation of a private workspace, with the exception of the **Visibility** option, which must be set to **Shared**. + +### Create a shared pipeline + +When you create a pipeline in a shared workspace, associating it with a [compute environment](../compute-envs/overview) is optional. + +If a compute environment from the shared workspace is associated with the pipeline, it will be available to users in other organization workspaces to launch the shared pipeline with the associated compute environment by default. + +### Use shared pipelines from a private workspace + +Once a pipeline is set up in a shared workspace and associated with a compute environment in that workspace, any user can launch the pipeline from an organization workspace using the shared workspace's compute environment. This eliminates the need for users to replicate shared compute environments in their private workspaces. + +:::note +The shared compute environment will not be available to launch other pipelines limited to that specific private workspace. +::: + +If a pipeline from a shared workspace is shared **without** an associated compute environment, users can run it from other organization workspaces. By default, the **primary** compute environment of the launching workspace will be selected. + +### Make shared pipelines visible in a private workspace + +:::note +Pipelines from _all_ shared workspaces are visible when the visibility is set to **Shared workspaces**. +::: + +To view pipelines from shared workspaces, go to the [Launchpad](../launch/launchpad) and set the **Filter > Pipelines from** option to **This and shared workspaces**. \ No newline at end of file diff --git a/platform-enterprise/pipeline-actions/overview.md b/platform-enterprise/pipeline-actions/overview.md new file mode 100644 index 000000000..f4cdd7ce6 --- /dev/null +++ b/platform-enterprise/pipeline-actions/overview.md @@ -0,0 +1,52 @@ +--- +title: "Pipeline actions" +description: "Automate executions with pipeline actions and webhooks in Seqera Platform." +date: "24 Apr 2023" +tags: [actions, webhooks, automation] +--- + +Actions enable event-based pipeline execution, such as triggering a pipeline launch with a GitHub webhook whenever the pipeline repository is updated. Seqera Platform currently offers support for native **GitHub webhooks** and a general **Tower webhook** that can be invoked programmatically. + +### GitHub webhooks + +A **GitHub webhook** listens for any changes made in the pipeline repository. When a change occurs it triggers the launch of the pipeline automatically. + +:::note +You must sign in to Seqera using GitHub authentication to create a GitHub webhook action. If you're signed in via Google the **Add** button in step 6 below will be inactive. +::: + +To create a new action, select the **Actions** tab and select **Add Action**. + +1. Enter a **Name** for your action. +1. Select **GitHub webhook** as the **Event source**. +1. Select the **Compute environment** where the pipeline will be executed. +1. Select the **Pipeline to launch** and (optionally) the **Revision number**. +1. Enter the **Work directory**, the **Config profiles**, and the **Pipeline parameters**. +1. Select **Add**. + +The pipeline action is now set up. When a new commit occurs for the selected repository and revision, an event is triggered and the pipeline is launched. + +Workspace maintainers can edit pipeline actions. Select **Edit** from the options menu to the right of the action on the **Actions** list to load the action details. + +Select **Update** to save the updated pipeline action. + +:::note +Workspace maintainers can edit the names of existing pipeline actions from the **Edit Action** page. +::: + +### Tower launch hooks + +A **Tower launch hook** creates a custom endpoint URL which can be used to trigger the execution of your pipeline programmatically from a script or web service. + +To create a new action, select the **Actions** tab and select **Add Action**. + +1. Enter a **Name** for your action. +1. Select **Tower launch hook** as the event source. +1. Select the **Compute environment** to execute your pipeline. +1. Enter the **Pipeline to launch** and (optionally) the **Revision number**. +1. Enter the **Work directory**, the **Config profiles**, and the **Pipeline parameters**. +1. Select **Add**. + +The pipeline action is now set up and the new endpoint can be used to launch the corresponding pipeline programmatically. + +When you create a **Tower launch hook**, you also create an **access token** for launching pipelines. Access tokens can be managed on the [tokens page](https://cloud.seqera.io/tokens), which is also accessible from the user menu. diff --git a/platform-enterprise/pipeline-optimization/overview.md b/platform-enterprise/pipeline-optimization/overview.md new file mode 100644 index 000000000..40ed101bd --- /dev/null +++ b/platform-enterprise/pipeline-optimization/overview.md @@ -0,0 +1,62 @@ +--- +title: "Pipeline resource optimization" +description: "Optimize the resource usage of your pipelines to save time and money." +date: "12 Feb 2024" +tags: [compute, resource, optimization] +--- + +**Available from version 23.3.0** + +Pipeline resource optimization takes the resource usage information from previous workflow runs to optimize subsequent runs. + +When a run completes successfully, an _optimized profile_ is created. This profile consists of Nextflow configuration settings for each process and each resource directive (where applicable): `cpus`, `memory`, and `time`. The optimized setting for a given process and resource directive is based on the maximum use of that resource across all tasks in that process. + +:::caution +Due to the variability of production pipeline data inputs, optimization results may vary per run. The optimization profile can be updated or removed from your pipeline if you experience unexpected results. Contact [support](https://support.seqera.io) for further assistance. +::: + +## Optimize a pipeline + +On the **Launchpad**, each pipeline that can be optimized shows a lightbulb icon. Any pipeline with at least one successful run can be optimized. + +1. Select the lightbulb icon to open the **Customize optimization profile** menu. + +2. Under the **Optimization profile** tab, select a previous run from the dropdown. The list contains all successful runs. + +3. Select which **Targets** to optimize. + +4. Enable **Retry with dynamic resources** for failed tasks to be retried with increased resources. This option is useful if an optimized setting is too low and causes a task to fail. + +5. Select the **Optimized configuration** tab to preview your configuration. + +6. Select **Save** to save the optimized configuration and enable it for the pipeline. All subsequent launches of the pipeline will use the optimized configuration. + +You can also toggle the optimized profile from the pipeline detail page. + +### Verify the optimized configuration + +You can verify the optimized configuration of a given run by inspecting the resource usage plots for that run and these fields in the run's task table: + +- CPU usage: `pcpu` +- Memory usage: `peakRss` +- Runtime: `start` and `complete` + +### Override the optimized configuration + +While the optimized configuration is applied after the base configuration of the pipeline, it can be overridden by the **Nextflow config file** text box. Ensure there are no conflicting settings in this text box, unless you explicitly want to override some optimization settings. + +### Handle large variations in resource usage + +Each optimized profile is calibrated to a specific run, so it can only be used safely for "similar" runs. Whether a new run is "similar" is subjective, but in general, an optimized profile should only be used for runs that use the same [**compute environment**](../compute-envs/overview) and have similar task-level resource requirements. + +However, it's common for a pipeline to process input files that vary widely in size. In this case, the task-level resource requirements may vary widely for a given process, and the optimized profile may not be accurate or efficient. + +The best way to handle this variation is to create multiple optimized profiles for specific ranges of input sizes. Here is an example strategy: + +1. Separate your input files into "bins" based on their size, e.g., _small_, _medium_, and _large_. Duplicate your pipeline in the **Launchpad** for each bin. + +2. For each bin, run the pipeline with a few representative samples from that bin. When the run completes, Seqera automatically creates an optimized profile for it. + +3. Configure and enable the optimized profile for each pipeline. + +You now have multiple optimized profiles to handle a variety of input sizes. Although this example uses three bins, you can use as many or as few bins as you need to handle the variation of your input data. diff --git a/platform-enterprise/pipeline-schema/_images/pipeline_schema_overview.png b/platform-enterprise/pipeline-schema/_images/pipeline_schema_overview.png new file mode 100644 index 000000000..7afee6566 Binary files /dev/null and b/platform-enterprise/pipeline-schema/_images/pipeline_schema_overview.png differ diff --git a/platform-enterprise/pipeline-schema/overview.md b/platform-enterprise/pipeline-schema/overview.md new file mode 100644 index 000000000..33d16f746 --- /dev/null +++ b/platform-enterprise/pipeline-schema/overview.md @@ -0,0 +1,34 @@ +--- +title: "Pipeline schema" +description: "Introduction to pipeline schema in Seqera Platform." +date: "24 Apr 2023" +tags: [pipeline, schema] +--- + +Pipeline schema files describe the structure and validation constraints of your workflow parameters. They are used to validate parameters before launch to prevent software or pipelines from failing in unexpected ways at runtime. + +You can populate the parameters in the pipeline by uploading a YAML or JSON file, or in the Seqera Platform interface. The platform uses your pipeline schema to build a bespoke launchpad parameters form. + +See [nf-core/rnaseq](https://github.com/nf-core/rnaseq/blob/e049f51f0214b2aef7624b9dd496a404a7c34d14/nextflow_schema.json) as an example of the pipeline parameters that can be represented by a JSON schema file. + +### Building pipeline schema files + +The pipeline schema is based on [json-schema.org](https://json-schema.org/) syntax, with some additional conventions. While you can create your pipeline schema manually, we highly recommend using [nf-core tools](https://nf-co.re/tools/#pipeline-schema), a toolset for developing Nextflow pipelines built by the nf-core community. + +When you run the `nf-core schema build` command in your pipeline root directory, the tool collects your pipeline parameters and gives you interactive prompts about missing or unexpected parameters. If no existing schema file is found, the tool creates one for you. The `schema build` commands include the option to validate and lint your schema file according to best practice guidelines from the nf-core community. + +:::note +The nf-core community creates the schema builder but it can be used with any Nextflow pipeline. +::: + +### Customize pipeline schema + +When the skeleton pipeline schema file has been built with `nf-core schema build`, the command line tool will prompt you to open a [graphical schema editor](https://nf-co.re/pipeline_schema_builder) on the nf-core website. + +![nf-core schema builder interface](./_images/pipeline_schema_overview.png) + +Leave the command line tool running in the background as it checks the status of your schema on the website. When you select **Finished** on the schema editor page, your changes are saved to the schema file locally. + +:::note +Your pipeline schema contains a `mimetype` field that specifies the accepted file type for input [datasets](../data/datasets). When you launch a pipeline from the [Launchpad](../launch/launchpad), the input field drop-down will only show datasets that match the required file type (either `text/csv` or `text/tsv`). +::: diff --git a/platform-enterprise/platform-enterprise.md b/platform-enterprise/platform-enterprise.md new file mode 100644 index 000000000..fe3e6898d --- /dev/null +++ b/platform-enterprise/platform-enterprise.md @@ -0,0 +1,30 @@ +--- +title: "Seqera Platform Enterprise" +description: "Introduction to Seqera Platform Enterprise." +date: "24 Apr 2023" +--- + +Seqera Platform Enterprise is an intuitive, centralized command post designed to make scientific analysis accessible at any scale. + +Seqera acts as a pane of glass to effortlessly launch, manage, monitor, and collaborate on scalable [Nextflow](https://www.nextflow.io) data analysis using your own computing resources and infrastructure. Researchers can focus on the science that matters, rather than worrying about infrastructure engineering. + +Seqera helps organizations: + +- Launch, manage, and monitor portable Nextflow pipelines from anywhere in real-time +- Enable non-technical users to run pipelines via the intuitive Launchpad interface +- Easily provision and leverage cloud-based and HPC compute environments +- Share pipelines and data and collaborate securely between local and remote teams +- Access a [curated library](https://seqera.io/pipelines/) of production-proven Nextflow community pipelines from [nf-core](https://nf-co.re/) and others +- [Automate](./getting-started/quickstart-demo/automation) complex tasks as part of broader enterprise processes + +:::tip +Request a [**demo**](https://seqera.io/demo "Seqera Enterprise Demo") to explore using Seqera Enterprise in your own on-premises or cloud environment. +::: + +### What is Nextflow? + +[Nextflow](https://www.nextflow.io) is a framework for the development of data workflows. It enables engineers and data scientists to create and securely deploy custom, parallel data applications to the cloud or traditional on-premises infrastructure. Nextflow is characterized by its powerful dataflow programming paradigm and execution engines that allow for transparent deployment. + +Nextflow is both a programming workflow language and an execution runtime that supports a wide range of execution platforms, including popular traditional grid scheduling systems such as Slurm and IBM LSF, and cloud services such as AWS, Azure, and Google Cloud Batch. + +See the [Nextflow documentation](https://www.nextflow.io/docs/latest/) to learn more. diff --git a/platform-enterprise/reports/overview.md b/platform-enterprise/reports/overview.md new file mode 100644 index 000000000..c4ba99b1c --- /dev/null +++ b/platform-enterprise/reports/overview.md @@ -0,0 +1,120 @@ +--- +title: "Reports" +description: "Overview of pipeline reports in Seqera Platform." +date: "24 Apr 2023" +tags: [pipeline, schema] +--- + +Most Nextflow pipelines will generate reports or output files which are useful to inspect at the end of the pipeline execution. Reports may be in various formats (e.g. HTML, PDF, TXT) and would typically contain quality control (QC) metrics that would be important to assess the integrity of the results. + +**Reports** allow you to directly visualise supported file types or to download them via the user interface (see [Limitations](#limitations)). This saves users the time and effort of having to retrieve and visualize output files from their local storage. + +### Visualize reports + +Available reports are listed in a **Reports** tab on the **Runs** page. You can select a report from the table to view or download it (see [Limitations](#limitations) for supported file types and sizes). + +To open a report preview, the file must be smaller than 10 MB. + +You can download a report directly or from the provided file path. Reports larger than 25MB cannot be downloaded directly — the option to download from file path is given instead. + +### Configure reports + +Create a config file that defines the paths to a selection of output files published by the pipeline for Seqera to render reports. There are 2 ways to provide the config file, both of which have to be in YAML format: + +1. **Pipeline repository**: If a file called `tower.yml` exists in the root of the pipeline repository then this will be fetched automatically before the pipeline execution. +2. **Seqera Platform interface**: Provide the YAML definition within the **Advanced options > Seqera Cloud config file** box when: + - Creating a pipeline in the Launchpad. + - Amending the launch settings during pipeline launch. This is available to users with the **Maintain** role only. + +:::caution +Any configuration provided in the interface will override configuration supplied in the pipeline repository. +::: + +### Configure reports for Nextflow CLI runs + +The reports and log files for pipeline runs launched with Nextflow CLI (`nextflow run -with-tower`) can be accessed directly in the Seqera UI. The files generated by the run must be accessible to your Seqera workspace primary compute environment. Specify your workspace prior to launch by setting the `TOWER_WORKSPACE_ID` environment variable. Reports are listed under the **Reports** tab on the run details page. + +Execution logs are available in the **Logs** tab by default, provided the output files are accessible to your workspace primary compute environment. To specify additional report files to be made available, your pipeline repository root folder must include a `tower.yml` file that specifies the files to be included (see below). + +### Reports implementation + +Pipeline reports need to be specified using YAML syntax: + +```yaml +reports: + : + display: text to display (required) + mimeType: file mime type (optional) +``` + +### Path pattern + +Only the published files (using the Nextflow `publishDir` directive) are candidate files for Seqera reports. The path pattern is used to match published files to a report entry. It can be a partial path, a glob expression, or just a file name. + +Examples of valid path patterns are: + +- `multiqc.html`: This will match all the published files with this name. +- `**/multiqc.html`: This is a glob expression that matches any subfolder. It's equivalent to the previous expression. +- `results/output.txt`: This will match all the `output.txt` files inside any `results` folder. +- `*_output.tsv`: This will match any file that ends with `\_output.tsv`. + +:::caution +To use `*` in your path pattern, you must wrap the pattern in double quotes for valid YAML syntax. +::: + +### Display + +Display defines the title that will be shown on the website. If there are multiple files that match the same pattern, a suffix will be added automatically. The suffix is the minimum difference between all the matching paths. For example, given this report definition: + +```yaml +reports: + "**/out/sheet.tsv": + display: "Data sheet" +``` + +For paths `/workdir/sample1/out/sheet.tsv` and `/workdir/sample2/out/sheet.tsv`, both match the path pattern. The final display name will for these paths will be _Data sheet (sample1)_ and _Data sheet (sample2)_. + +### MIME type + +By default, the MIME type is deduced from the file extension, so you don't need to explicitly define it. Optionally, you can define it to force a viewer, for example showing a `txt` file as a `tsv`. It is important that it is a valid MIME-type text, otherwise it will be ignored and the extension will be used instead. + +### Built-in reports + +Nextflow can generate a number of built-in reports: + +- [Execution report](https://nextflow.io/docs/latest/tracing.html#execution-report) +- [Execution timeline](https://nextflow.io/docs/latest/tracing.html#timeline-report) +- [Trace file](https://nextflow.io/docs/latest/tracing.html#trace-report) +- [Workflow diagram](https://nextflow.io/docs/latest/tracing.html#dag-visualisation) (i.e. DAG) + +In Nextflow version 24.03.0-edge and later, these reports can be included as pipeline reports in Seqera Platform. Specify them in `tower.yml` like any other file: + +```yaml +reports: + "report.html": + display: "Nextflow execution report" + "timeline.html": + display: "Nextflow execution timeline" + "trace.txt": + display: "Nextflow trace file" + "dag.html": + display: "Nextflow workflow diagram" +``` + +:::note +The filenames must match any custom filenames defined in the Nextflow config: + +- Execution report: `report.file` +- Execution timeline: `timeline.file` +- Trace file: `trace.file` +- Workflow diagram: `dag.file` + +::: + +### Limitations + +The current reports implementation limits rendering to the following formats: `HTML`, `csv`, `tsv`, `pdf`, and `txt`. In-page rendering/report preview is restricted to files smaller than 10 MB. Larger files need to be downloaded first. + +The download is restricted to files smaller than 25 MB. Files larger than 25 MB need to be downloaded from the path. + +YAML formatting validation checks both the `tower.yml` file inside the repository and the UI configuration box. The validation phase will produce an error message if you try to launch a pipeline with non-compliant YAML definitions. diff --git a/platform-enterprise/resource-labels/overview.md b/platform-enterprise/resource-labels/overview.md new file mode 100644 index 000000000..ab1bf56e1 --- /dev/null +++ b/platform-enterprise/resource-labels/overview.md @@ -0,0 +1,203 @@ +--- +title: "Resource labels" +description: "Instructions to use resource labels in Seqera Platform." +date: "24 Apr 2023" +tags: [resource labels, labels] +--- + +From version 22.3.0, Seqera supports resource labels in compute environments and other elements of your Seqera instance. This offers a flexible tagging system for annotation and tracking of the cloud services consumed by a run. +Resource labels are sent to the service provider for each cloud compute environment in `key=value` format. + +Resource labels are applied to elements during: + +- Compute environment creation with Batch Forge +- Submission +- Execution + +### Create and apply labels + +Resource labels can be created, applied, and edited by a workspace admin or owner. When applying a label, users can select from existing labels or add new labels on the fly. + +#### Resource labels applied to a compute environment + +Admins can assign a set of resource labels when creating a compute environment. All runs executed using the compute environment will be tagged with its resource labels. Resource labels applied to a compute environment are displayed on the compute environment details page. + +Apply resource labels when you create a new compute environment. + +:::caution +Once the compute environment has been created, its resource labels cannot be edited. +::: + +If a resource label is applied to a compute environment, all runs in that compute environment will inherit it. Likewise, all cloud resources generated during the workflow execution will be tagged with the same resource label. + +#### Resource labels applied to pipelines, actions, and runs + +**Available from Seqera Platform version 22.4.0** + +Admins can override the default resource labels inherited from the compute environment when creating and editing pipelines, actions, and runs on the fly. The custom resource labels associated with each element will propagate to the associated resources in the cloud environment without altering the default resource labels associated with the compute environment. + +When an admin adds or edits the resource labels associated with a pipeline, action, or run, the **submission and execution time** resource labels are altered. This does not affect the resource labels for resources spawned at (compute environment) **creation time**. + +For example, the resource label `name=ce1` is set during AWS Batch compute environment creation. If you create the resource label `pipeline=pipeline1` while creating a pipeline which uses the same AWS Batch compute environment, the EC2 instances associated with that compute environment will still contain only the `name=ce1` label, while the Job Definitions associated with the pipeline will inherit the `pipeline=pipeline1` resource label. + +If a maintainer changes the compute environment associated with a pipeline or run, the **Resource labels** field is updated with the resource labels from the new compute environment. + +### Search and filter with labels + +Search and filter pipelines and runs using one or more resource labels. The resource label search uses a `label:key=value` format. + +### Overview of resource labels in a workspace + +Select a workspace's **Settings** tab to view all the resource labels used in that workspace. Resource labels can only be edited or deleted by admins and only if they're not already associated with **any** resource. This applies to resource labels associated with compute environments and runs. When you add or edit a resource label, you can optionally set the **"Use as default in compute environment form"** option. Workspace default resource labels are prefilled in the **Resource labels** field when creating a new compute environment in that workspace. + +The deletion of a resource label from a workspace has no influence on the cloud environment. + +### Resource label propagation to cloud environments + +:::note +You can't assign multiple resource labels, using the same key, to the same resource — regardless of whether this option is supported by the destination cloud provider. +::: + +Resource labels are only available for cloud environments that use a resource tagging system. Seqera supports AWS, Google Batch, Google Life Sciences, Azure, and Kubernetes. HPC compute environments do not support resource labels. + +Note that the cloud provider credentials you use must have the appropriate roles or permissions to tag resources in your environment. + +When a run is executed in a compute environment with associated resource labels, Seqera Platform propagates the labels to a set of resources (listed below), while Nextflow distributes the labels for the resources spawned at runtime. + +If the compute environment is created through Forge, the compute environment will propagate the tags to the resources generated by the Forge execution. + +:::caution +Resource label propagation is one-way and not synchronized with the cloud environment. This means that Seqera attaches tags to cloud resources, but isn't aware if those tags are changed or deleted directly in the cloud environment. +::: + +### AWS + +When the compute environment is created with Forge, the following resources will be tagged using the labels associated with the compute environment: + +**Forge creation time** + +- FSX Filesystems (does not cascade to files) +- EFS Filesystems (does not cascade to files) +- Batch Compute Environment +- Batch Queue(s) +- ComputeResource (EC2 instances, including EBS volumes) +- Service role +- Spot Fleet role +- Execution role +- Instance Profile role +- Launch template + +**Submission time** + +- Jobs and Job Definitions +- Tasks (via the `propagateTags` parameter on Job Definitions) + +**Execution time** + +- Work Tasks (via the `propagateTags` parameter on Job Definitions) + +At execution time, when the jobs are submitted to Batch, the requests are set up to propagate tags to all the instances and volumes created by the head job. + +The [`forge-policy.json`](https://github.com/seqeralabs/nf-tower-aws/blob/master/forge/forge-policy.json) file contains the roles needed for Batch Forge-created AWS compute environments to tag AWS resources. Specifically, the required roles are `iam:TagRole`, `iam:TagInstanceProfile`, and `batch:TagResource`. + +To view and manage the resource labels applied to AWS resources by Seqera and Nextflow, go to the [AWS Tag Editor](https://docs.aws.amazon.com/tag-editor/latest/userguide/find-resources-to-tag.html) (as an administrative user) and follow these steps: + +Under **Find resources to tag**, search for the resource label key and value in the relevant search fields under **Tags**. Your search can be further refined by AWS region and resource type. Then select **Search resources**. **Resource search results** display all the resources tagged with your given resource label key and/or value. + +**Include Seqera resource labels in AWS billing reports** + +To include the cost information associated with your resource labels in your AWS billing reports: + +1. [Activate](https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/activating-tags.html) the associated tags in the **AWS Billing and Cost Management console**. Newly-applied tags may take up to 24 hours to appear on your cost allocation tags page. +2. When your tags are activated and displayed in **Billing and Cost Management > Cost allocation tags**, you can apply them when you create [cost allocation reports](https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/configurecostallocreport.html#allocation-viewing). + +#### AWS limits + +- Resource label keys and values must contain a minimum of 2 and a maximum of 39 alphanumeric characters (each), separated by dashes or underscores. +- The key and value cannot begin or end with dashes `-` or underscores `_`. +- The key and value cannot contain a consecutive combination of `-` or `_` characters (`--`, `__`, `-_`, etc.) +- A maximum of 25 resource labels can be applied to each resource. +- A maximum of 1000 resource labels can be used in each workspace. +- Keys and values cannot start with `aws` or `user`, as these are reserved prefixes appended to tags by AWS. +- Keys and values are case-sensitive in AWS. + +See [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions) for more information on AWS resource tagging. + +### Google Batch and Google Life Sciences + +When the compute environment is created with Forge, the following resources will be tagged using the labels associated with the compute environment: + +**Submission time** + +- Job (Batch) +- RunPipeline (Life Sciences) + +**Execution time** + +- AllocationPolicy (Batch) +- VirtualMachine (Life Sciences) +- RunPipeline (Life Sciences) + +#### GCP limits + +- Resource label keys and values must contain a minimum of 2 and a maximum of 39 alphanumeric characters (each), separated by dashes or underscores. +- The key and value cannot begin or end with dashes `-` or underscores `_`. +- The key and value cannot contain a consecutive combination of `-` or `_` characters (`--`, `__`, `-_`, etc.) +- A maximum of 25 resource labels can be applied to each resource. +- A maximum of 1000 resource labels can be used in each workspace. +- Keys and values in Google Cloud Resource Manager may contain only lowercase letters. Resource labels created with uppercase characters are changed to lowercase before propagating to Google Cloud. + +See [here](https://cloud.google.com/resource-manager/docs/creating-managing-labels#requirements) for more information on Google Cloud Resource Manager labeling. + +### Azure + +:::note +The labeling system on Azure Cloud uses the term metadata to refer to resource and other labels +::: + +When creating an Azure Batch compute environment with Forge, resource labels are added to the Pool parameters — this adds set of `key=value` metadata pairs to the Azure Batch Pool. + +#### Azure limits + +- Resource label keys and values must contain a minimum of 2 and a maximum of 39 alphanumeric characters (each), separated by dashes or underscores. +- The key and value cannot begin or end with dashes `-` or underscores `_`. +- The key and value cannot contain a consecutive combination of `-` or `_` characters (`--`, `__`, `-_`, etc.) +- A maximum of 25 resource labels can be applied to each resource. +- A maximum of 1000 resource labels can be used in each workspace. +- Keys are case-insensitive, but values are case-sensitive. +- Microsoft advises against using a non-English language in your resource labels, as this can lead to decoding progress failure while loading your VM's metadata. + +See [here](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/tag-resources?tabs=json) for more information on Azure Resource Manager tagging. + +### Kubernetes + +Both the Head pod and Work pod specs will contain the set of labels associated with the compute environment in addition to the standard labels applied by Seqera Platform and Nextflow. + +:::caution +Currently, tagging with resource labels is not available for the files created during a workflow execution. The cloud instances are the elements being tagged. +::: + +The following resources will be tagged using the labels associated with the compute environment: + +**Forge creation time** + +- Deployment +- PodTemplate + +**Submission time** + +- Head Pod Metadata + +**Execution time** + +- Run Pod Metadata + +#### Kubernetes limits + +- Resource label keys and values must contain a minimum of 2 and a maximum of 39 alphanumeric characters (each), separated by dashes or underscores. +- The key and value cannot begin or end with dashes `-` or underscores `_`. +- The key and value cannot contain a consecutive combination of `-` or `_` characters (`--`, `__`, `-_`, etc.) +- A maximum of 25 resource labels can be applied to each resource. +- A maximum of 1000 resource labels can be used in each workspace. + +See [here](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set) for more information on Kubernetes object labeling. diff --git a/platform-enterprise/secrets/overview.md b/platform-enterprise/secrets/overview.md new file mode 100644 index 000000000..35c05b2ec --- /dev/null +++ b/platform-enterprise/secrets/overview.md @@ -0,0 +1,197 @@ +--- +title: "Secrets" +description: "Instructions to use secrets in Seqera Platform." +date: "24 Apr 2023" +tags: [pipeline, secrets] +--- + +**Secrets** store the keys and tokens used by workflow tasks to interact with external systems, such as a password to connect to an external database or an API token. Seqera Platform relies on third-party secret manager services to maintain security between the workflow execution context and the secret container. This means that no secure data is transmitted from your Seqera instance to the compute environment. + +:::note +AWS, Google Cloud, and HPC compute environments are currently supported. See [AWS Secrets Manager](https://docs.aws.amazon.com/secretsmanager/index.html) and [Google Secret Manager](https://cloud.google.com/secret-manager/docs/overview) for more information. +::: + +## Pipeline secrets + +To create a pipeline secret, go to a workspace (private or shared) and select the **Secrets** tab in the navigation bar. Available secrets are listed here and users with appropriate [permissions](../orgs-and-teams/roles) (maintainer, admin, or owner) can create or update secret values. + +:::note +Multi-line secrets must be base64-encoded. +::: + +Select **Add Pipeline Secret** and enter a name and value for the secret. Then select **Add**. + +## User secrets + +Listing, creating, and updating secrets for users is the same as secrets in a workspace. You can access user secrets from **Your secrets** in the user menu. + +:::caution +Secrets defined by a user have higher priority and will override any secrets with the same name defined in a workspace. +::: + +## Use secrets in workflows + +When you launch a new workflow, all secrets are sent to the corresponding secrets manager for the compute environment. Nextflow downloads these secrets internally when they're referenced in the pipeline code. See [Nextflow secrets](https://www.nextflow.io/docs/edge/secrets.html#process-secrets) for more information. + +Secrets are automatically deleted from the secret manager when the pipeline completes, successfully or unsuccessfully. + +:::note +In AWS Batch compute environments, Seqera passes stored secrets to jobs as part of the Seqera-created job definition. Seqera secrets cannot be used in Nextflow processes that use a [custom job definition](https://www.nextflow.io/docs/latest/aws.html#custom-job-definition). +::: + +## AWS Secrets Manager integration + +Seqera and associated AWS Batch IAM Roles require additional permissions to interact with AWS Secrets Manager. + +### Seqera instance permissions + +Augment the existing instance [permissions](https://github.com/seqeralabs/nf-tower-aws) with this policy: + +**IAM Permissions** + +Augment the permissions given to Seqera with the following Sid: + +```json + { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AllowTowerEnterpriseSecrets", + "Effect": "Allow", + "Action": [ + "secretsmanager:DeleteSecret", + "secretsmanager:ListSecrets", + "secretsmanager:CreateSecret" + ], + "Resource": "*" + } + ] + } +``` + +### ECS Agent permissions + +The ECS Agent uses the [Batch Execution role](https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html#create-execution-role) to communicate with AWS Secrets Manager. + +- If your AWS Batch compute environment does not have an assigned execution role, create one. +- If your AWS Batch compute environment already has an assigned execution role, augment it. + +**IAM permissions** + +1. Add the [`AmazonECSTaskExecutionRolePolicy` managed policy](https://docs.aws.amazon.com/aws-managed-policy/latest/reference/AmazonECSTaskExecutionRolePolicy.html). +1. Add this inline policy (specifying ``): + +```json + { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AllowECSAgentToRetrieveSecrets", + "Effect": "Allow", + "Action": "secretsmanager:GetSecretValue", + "Resource": "arn:aws:secretsmanager::*:secret:tower-*" + } + ] + } +``` + +:::note +Including `tower-*` in the Resource ARN above limits access to Platform secrets only (as opposed to all secrets in the given region). +::: + +**IAM trust relationship** + +```json + { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AllowECSTaskAssumption", + "Effect": "Allow", + "Principal": { + "Service": "ecs-tasks.amazonaws.com" + }, + "Action": "sts:AssumeRole" + } + ] + } +``` + +### Compute permissions + +The Nextflow head job must communicate with AWS Secrets Manager. Its permissions are inherited either from a custom role assigned during the [AWS Batch CE creation process](../compute-envs/aws-batch#advanced-options), or from its host [EC2 instance](https://docs.aws.amazon.com/batch/latest/userguide/instance_IAM_role.html). + +Augment your Nextflow head job permissions source with one of the following policies: + +**EC2 Instance role** + +Add this policy to your EC2 Instance role: + +```json + { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AllowNextflowHeadJobToAccessSecrets", + "Effect": "Allow", + "Action": "secretsmanager:ListSecrets", + "Resource": "*" + } + ] + } +``` + +**Custom IAM role** + +Add this policy to your custom IAM role (specifying `YOUR_ACCOUNT` and `YOUR_BATCH_CLUSTER`): + +```json + { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AllowNextflowHeadJobToAccessSecrets", + "Effect": "Allow", + "Action": "secretsmanager:ListSecrets", + "Resource": "*" + }, + { + "Sid": "AllowNextflowHeadJobToPassRoles", + "Effect": "Allow", + "Action": [ + "iam:GetRole", + "iam:PassRole" + ], + "Resource": "arn:aws:iam::YOUR_ACCOUNT:role/YOUR_BATCH_CLUSTER-ExecutionRole" + } + ] + } +``` + +Add this trust policy to your custom IAM role: + +```json + { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AllowECSTaskAssumption", + "Effect": "Allow", + "Principal": { + "Service": "ecs-tasks.amazonaws.com" + }, + "Action": "sts:AssumeRole" + } + ] + } +``` + +## Google Secret Manager integration + +You must [enable Google Secret Manager](https://cloud.google.com/secret-manager/docs/configuring-secret-manager) in the same project that your Google compute environment credentials have access to. Your compute environment credentials require additional IAM permissions to interact with Google Secret Manager. + +### IAM permissions + +See the [Google documentation](https://cloud.google.com/secret-manager/docs/access-control) for permission configuration instructions to integrate with Google Secret Manager. + +Seqera Platform requires `roles/secretmanager.admin` permissions in the project where it will manage your secrets. Ensure that your compute environment contains credentials with this access role for the same `project_id` listed in the service account JSON file. diff --git a/platform-enterprise/seqerakit/commands.md b/platform-enterprise/seqerakit/commands.md new file mode 100644 index 000000000..f13581675 --- /dev/null +++ b/platform-enterprise/seqerakit/commands.md @@ -0,0 +1,126 @@ +--- +title: "Commands" +description: "Seqerakit command options" +date: "21 Oct 2024" +tags: [seqerakit, cli, automation, commands] +--- + +Use the `--help` or `-h` option to list available commands and options: + +```shell-session +seqerakit --help +``` + +### Input + +Seqerakit supports input through paths to YAML configuration files or directly from standard input (`stdin`). + +- Using file path: + + ```shell-session + seqerakit file.yaml + ``` + +- Using `stdin`: + + ```shell-session + cat file.yaml | seqerakit - + ``` + +See [Define your YAML file using CLI options](./yaml-configuration#yaml-configuration-options) for guidance on formatting your input YAML files. + +### Dryrun + +Confirm that your configuration and command are correct before creating resources in your Seqera account, particularly when automating the end-to-end creation of multiple entities at once. To print the commands that would be executed with Platform CLI when using a YAML file, run your `seqerakit` command with the `--dryrun` option: + +```shell-session +seqerakit file.yaml --dryrun +``` + +### Specify targets + +When using a YAML file as an input that defines multiple resources, use the `--targets` option to specify which resources to create. This option accepts a comma-separated list of resource names. + +Supported resource names include: + +- `actions` +- `compute-envs` +- `credentials` +- `datasets` +- `labels` +- `launch` +- `members` +- `organizations` +- `participants` +- `pipelines` +- `secrets` +- `teams` +- `workspaces` + +For example, given a `test.yaml` file that defines the following resources: + +```yaml +workspaces: + - name: 'workspace-1' + organization: 'seqerakit' +... +compute-envs: + - name: 'compute-env' + type: 'aws-batch forge' + workspace: 'seqerakit/workspace-1' +... +pipelines: + - name: 'hello-world' + url: 'https://github.com/nextflow-io/hello' + workspace: 'seqerakit/workspace-1' + compute-env: 'compute-env' +... +``` + +You can target the creation of `pipelines` only by running: + +```shell-session +seqerakit test.yaml --targets pipelines +``` + +This command will create only the pipelines defined in the YAML file and ignore `workspaces` and `compute-envs`. + +To create both workspaces and pipelines, run: + +```shell-session +seqerakit test.yaml --targets workspaces,pipelines +``` + +### Delete resources + +Instead of adding or creating resources, specify the `--delete` option to recursively delete resources in your YAML file: + +```shell-session +seqerakit file.yaml --delete +``` + +For example, if you have a `file.yaml` that defines an organization, workspace, team, credentials, and compute environment that have already been created, run `seqerakit file.yaml --delete` to recursively delete the same resources. + +### Use `tw`-specific CLI options + +Specify `tw`-specific CLI options with the `--cli=` option: + +```shell-session +seqerakit file.yaml --cli="--arg1 --arg2" +``` + +See [CLI commands](../cli/commands) or run `tw -h` for the full list of options. + +:::note +The `--verbose` option for `tw` CLI is currently not supported in `seqerakit` commands. +::: + +#### Example: HTTP-only connections + +The Platform CLI expects to connect to a Seqera instance that is secured by a TLS certificate. If your Seqera Enterprise instance does not present a certificate, you must run your `tw` commands with the `--insecure` option. + +To use `tw`-specific CLI options such as `--insecure`, use the `--cli=` option, followed by the options to use enclosed in double quotes: + +```shell-session +seqerakit file.yaml --cli="--insecure" +``` diff --git a/platform-enterprise/seqerakit/installation.md b/platform-enterprise/seqerakit/installation.md new file mode 100644 index 000000000..11250195a --- /dev/null +++ b/platform-enterprise/seqerakit/installation.md @@ -0,0 +1,117 @@ +--- +title: "Installation" +description: "Seqerakit installation options" +date: "21 Oct 2024" +tags: [seqerakit, cli, automation, installation] +--- + + +Seqerakit is a Python wrapper that sets [Platform CLI](../cli/overview) command options using YAML configuration files. Individual commands and configuration parameters can be chained together to automate the end-to-end creation of all Seqera Platform entities. + +As an extension of the Platform CLI, Seqerakit enables: + +- **Infrastructure as code**: Users manage and provision their infrastructure from the command line. +- **Simple configuration**: All Platform CLI command-line options can be defined in simple YAML format. +- **Automation**: End-to-end creation of Seqera entities, from adding an organization to launching pipelines. + +### Installation + +Seqerakit has three dependencies: + +1. [Seqera Platform CLI (`>=0.10.1`)](https://github.com/seqeralabs/tower-cli/releases) +2. [Python (`>=3.8`)](https://www.python.org/downloads/) +3. [PyYAML](https://pypi.org/project/PyYAML/) + +#### Pip + +If you already have [Platform CLI](../cli/installation) and Python installed on your system, install Seqerakit directly from [PyPI](https://pypi.org/project/seqerakit/): + +```bash +pip install seqerakit +``` + +Overwrite an existing installation to use the latest version: + +```bash +pip install --upgrade --force-reinstall seqerakit +``` + +#### Conda + +To install `seqerakit` and its dependencies via Conda, first configure the correct channels: + +```bash +conda config --add channels bioconda +conda config --add channels conda-forge +conda config --set channel_priority strict +``` + +Then create a conda environment with `seqerakit` installed: + +```bash +conda env create -n seqerakit seqerakit +conda activate seqerakit +``` + +#### Local development installation + +Install the development branch of `seqerakit` on your local machine to test the latest features and updates: + +1. You must have [Python](https://www.python.org/downloads/) and [Git](https://git-scm.com/downloads) installed on your system. +1. To install directly from pip: + ```bash + pip install git+https://github.com/seqeralabs/seqera-kit.git@dev + ``` +1. Alternatively, clone the repository locally and install manually: + ```bash + git clone https://github.com/seqeralabs/seqera-kit.git + cd seqera-kit + git checkout dev + pip install . + ``` +1. Verify your installation: + ```bash + pip show seqerakit + ``` + +### Configuration + +Create a [Seqera](https://cloud.seqera.io/tokens) access token via **Your Tokens** in the user menu. + +Seqerakit reads your access token from the `TOWER_ACCESS_TOKEN` environment variable: + +```bash +export TOWER_ACCESS_TOKEN= +``` + +For Enterprise installations, specify the custom API endpoint used to connect to Seqera. Export the API endpoint environment variable: + +```bash +export TOWER_API_ENDPOINT= +``` + +By default, this is set to `https://api.cloud.seqera.io` to connect to Seqera Cloud. + +### Usage + +To confirm the installation of `seqerakit`, configuration of the Platform CLI, and connection to Seqera is working as expected, run this command: + +```bash +seqerakit --info +``` + +This runs the `tw info` command under the hood. + +Use `--version` or `-v` to retrieve the current version of your `seqerakit` installation: + +```bash +seqerakit --version +``` + +Use the `--help` or `-h` option to list the available commands and their associated options: + +```bash +seqerakit --help +``` + +See [Commands](./commands) for detailed instructions to use Seqerakit. diff --git a/platform-enterprise/seqerakit/templates.mdx b/platform-enterprise/seqerakit/templates.mdx new file mode 100644 index 000000000..c14bc2ba8 --- /dev/null +++ b/platform-enterprise/seqerakit/templates.mdx @@ -0,0 +1,200 @@ +--- +title: "Templates" +description: "Seqerakit YAML configuration file templates and instructions" +date: "8 Nov 2024" +tags: [seqerakit, cli, automation, yaml, configuration] +toc_max_heading_level: 4 +--- + +import CodeBlock from '@theme/CodeBlock'; +import Organizations from '!!raw-loader!./templates/organizations.yaml'; +import Members from '!!raw-loader!./templates/members.yaml'; +import Workspaces from '!!raw-loader!./templates/workspaces.yaml'; +import Teams from '!!raw-loader!./templates/teams.yaml'; +import Participants from '!!raw-loader!./templates/participants.yaml'; +import ComputeEnvironments from '!!raw-loader!./templates/compute-envs.yaml'; +import Pipelines from '!!raw-loader!./templates/pipelines.yaml'; +import Credentials from '!!raw-loader!./templates/credentials.yaml'; +import Datasets from '!!raw-loader!./templates/datasets.yaml'; +import Labels from '!!raw-loader!./templates/labels.yaml'; +import Launch from '!!raw-loader!./templates/launch.yaml'; +import Secrets from '!!raw-loader!./templates/secrets.yaml'; +import Actions from '!!raw-loader!./templates/actions.yaml'; +import EndToEnd from '!!raw-loader!./templates/seqerakit-e2e.yaml'; + +Customize YAML configuration templates to use in `seqerakit` commands to create, update, or delete Seqera resources. Create or delete multiple resources with a single command by combining them into a single configuration file. + +To use the templates on this page: +1. Copy the template text or download the YAML files you need. +1. Edit the values to specify your resource details, and save as a `.yaml` file. +1. Specify the YAML template file in your `seqerakit` commands: + - To create the resources specified in the file: + ```shell-session + seqerakit file.yaml + ``` + - To delete the existing resources specified in the file: + ```shell-session + seqerakit file.yaml --delete + ``` + +:::info +See [Specify targets](./commands#specify-targets) to create or delete only selected resources from configuration templates that contain multiple resource entries. +::: + +See [End-to-end example](#end-to-end-example) for a template that contains examples of all Seqera resources that can be created with Seqerakit. + +### Administration + +Manage organizations, organization members, workspaces, teams, and participants. + +#### Organizations + +Add or delete organizations. + + +{Organizations} + + +[Download organizations.yaml](./templates/organizations.yaml) + +#### Members + +Add or delete organization members. + + +{Members} + + +[Download members.yaml](./templates/members.yaml) + +#### Workspaces + +Add or delete workspaces. + + +{Workspaces} + + +[Download workspaces.yaml](./templates/workspaces.yaml) + +#### Teams + +Add or delete teams. + + +{Teams} + + +[Download teams.yaml](./templates/teams.yaml) + +#### Participants + +Add or delete participants in workspaces and teams. + + +{Participants} + + +[Download participants.yaml](./templates/participants.yaml) + +### Credentials + +Add or delete compute environment, Git, and container registry credentials in workspaces. + + +{Credentials} + + +[Download credentials.yaml](./templates/credentials.yaml) + +### Compute environments + +Add or delete compute environments. + + +{ComputeEnvironments} + + +[Download compute-envs.yaml](./templates/compute-envs.yaml) + +### Pipelines + +Add or delete pipelines in workspace Launchpads. + + +{Pipelines} + + +[Download pipelines.yaml](./templates/pipelines.yaml) + +### Launch + +Launch a Nextflow pipeline. + + +{Launch} + + +[Download launch.yaml](./templates/launch.yaml) + +### Datasets + +Add or delete workspace datasets for pipeline input data. + + +{Datasets} + + +[Download datasets.yaml](./templates/datasets.yaml) + +### Labels + +Add or delete labels and resource labels to apply to workspace compute environments, pipelines, and runs. + + +{Labels} + + +[Download labels.yaml](./templates/labels.yaml) + +### Secrets + +Add or delete user and workspace secrets. + + +{Secrets} + + +[Download secrets.yaml](./templates/secrets.yaml) + +### Actions + +Add or delete pipeline actions. + + +{Actions} + + +[Download actions.yaml](./templates/actions.yaml) + +### End-to-end example + +A template to create the following resources: + +- An organization +- A workspace +- A team +- Participants +- Credentials +- Secrets +- Compute environments +- Datasets +- Pipelines + +The template also contains `launch` entries to launch saved pipelines. + + +{EndToEnd} + + +[Download seqerakit-e2e.yaml](./templates/seqerakit-e2e.yaml) \ No newline at end of file diff --git a/platform-enterprise/seqerakit/templates/actions.yaml b/platform-enterprise/seqerakit/templates/actions.yaml new file mode 100644 index 000000000..5b65933ea --- /dev/null +++ b/platform-enterprise/seqerakit/templates/actions.yaml @@ -0,0 +1,24 @@ +## To see the full list of options available, run: "tw actions add -h" +actions: + - type: 'github' # required + name: 'your-github-action' # required + pipeline: 'https://github.com/my_username/my_repo' # required + workspace: 'your-organization/workspace-1' # required + compute-env: 'your-aws-compute-environment' # required + work-dir: 's3://your-bucket' # required + revision: 'main' # required + profile: 'test' # optional + params: # optional + outdir: 's3://your-bucket/results' + overwrite: True # optional + - type: 'tower' # required + name: 'your-tower-action' # required + pipeline: 'https://github.com/my_username/my_repo' # required + workspace: 'your-organization/workspace-1' # required + compute-env: 'your-aws-compute-environment' # required + work-dir: 's3://your-bucket' # required + revision: 'main' # required + profile: 'test' # optional + params: # optional + outdir: 's3://your-bucket/results' + overwrite: True # optional diff --git a/platform-enterprise/seqerakit/templates/compute-envs.yaml b/platform-enterprise/seqerakit/templates/compute-envs.yaml new file mode 100644 index 000000000..237f66421 --- /dev/null +++ b/platform-enterprise/seqerakit/templates/compute-envs.yaml @@ -0,0 +1,36 @@ +## To see the full list of options available, run: "tw compute-envs add -h" +## The options required to create compute environments can be specified: +## 1. Explicitly in this file +## 2. Via a JSON file exported from Seqera with the "tw compute-envs export" command +compute-envs: +# To create a compute environment from a JSON configuration file (AWS Example) + - name: 'your-aws-compute-environment' # required + workspace: 'your-organization/workspace-1' # required + credentials: 'your-aws-credentials' # required + wait: 'AVAILABLE' # optional + file-path: './compute-envs/aws_compute_environment.json' # required + overwrite: True # optional + +# To create a compute environment with options specified through YAML (AWS Example) + - type: aws-batch # required + config-mode: forge # required for AWS and Azure (forge or manual) + name: 'your-aws-compute-environment' # required + workspace: 'your-organization/workspace-1' # required + credentials: 'your-aws-credentials' # required + region: 'eu-west-1' # required + work-dir: 's3://your-bucket' # required + provisioning-model: 'SPOT' # optional + fusion-v2: False # optional + wave: False # optional + fargate: False # optional + fast-storage: False # optional + instance-types: 'c6i,r6i,m6i' # optional, comma-separated list + no-ebs-auto-scale: True # optional + max-cpus: 500 # required + labels: 'label1,label2' # optional, comma-separated list + vpc-id: 'vpc-1234567890' # optional + subnets: 'subnet-1234567890,subnet-1234567891' # optional, comma-separated list + security-groups: 'sg-1234567890,sg-1234567891' # optional, comma-separated list + allow-buckets: 's3://your-bucket,s3://your-other-bucket' # optional, comma-separated list + wait: 'AVAILABLE' # optional + overwrite: False # optional \ No newline at end of file diff --git a/platform-enterprise/seqerakit/templates/credentials.yaml b/platform-enterprise/seqerakit/templates/credentials.yaml new file mode 100644 index 000000000..e63db59eb --- /dev/null +++ b/platform-enterprise/seqerakit/templates/credentials.yaml @@ -0,0 +1,49 @@ +## To see the full list of options available, run: "tw credentials add -h" +## To avoid exposing sensitive information about your credentials, +## use environment variables to supply passwords and secret keys. +credentials: + - type: 'github' # required + name: 'your-github-credentials' # required + workspace: 'your-organization/workspace-1' # required + username: 'your-username' # required + password: '$SEQPLATFORM_GITHUB_PASSWORD' # required + overwrite: True # optional + + - type: 'container-reg' # required + name: 'your-dockerhub-credentials' # required + workspace: 'your-organization/workspace-1' # required + username: 'your-username' # required + password: '$DOCKERHUB_PASSWORD' # required + registry: 'docker.io' # required + overwrite: True # optional + + - type: 'google' # required + name: 'your-google-credentials' # required + workspace: 'your-organization/workspace-1' # required + key: '$GOOGLE_KEY' # required + overwrite: True # optional + + - type: 'aws' # required + name: 'your-aws-credentials' # required + workspace: 'your-organization/workspace-1' # required + access-key: '$AWS_ACCESS_KEY_ID' # required + secret-key: '$AWS_SECRET_ACCESS_KEY' # required + assume-role-arn: '$AWS_ASSUME_ROLE_ARN' # required + overwrite: True # optional + + - type: 'azure' # required + name: 'your-azure-credentials' # required + workspace: 'your-organization/workspace-1' # required + batch-key: '$AZURE_BATCH_KEY' # required + batch-name: 'your-batch-name' # required + storage-key: '$AZURE_STORAGE_KEY' # required + storage-name: 'your-storage-name' # required + overwrite: True # optional + + - type: 'codecommit' # required + name: 'your-codecommit-credentials' # required + workspace: 'your-organization/workspace-1' # required + access-key: '$CODECOMMIT_USER' # required + secret-key: '$CODECOMMIT_PASSWORD' # required + base-url: '$CODECOMMIT_BASEURL' # optional + overwrite: False # optional diff --git a/platform-enterprise/seqerakit/templates/datasets.yaml b/platform-enterprise/seqerakit/templates/datasets.yaml new file mode 100644 index 000000000..b93364ddc --- /dev/null +++ b/platform-enterprise/seqerakit/templates/datasets.yaml @@ -0,0 +1,8 @@ +## To see the full list of options available, run: "tw datasets add -h" +datasets: + - name: 'your-dataset' # required + description: 'Your dataset description' # optional + header: true # optional + workspace: 'your-organization/workspace-1' # required + file-path: './datasets/dataset.csv' # required + overwrite: True # optional \ No newline at end of file diff --git a/platform-enterprise/seqerakit/templates/labels.yaml b/platform-enterprise/seqerakit/templates/labels.yaml new file mode 100644 index 000000000..f8496a7f4 --- /dev/null +++ b/platform-enterprise/seqerakit/templates/labels.yaml @@ -0,0 +1,6 @@ +## To see the full list of options available, run: "tw labels add -h" +labels: + - name: 'label_name' # required + value: 'label_value' # required + workspace: 'your-organization/workspace-1' # required + overwrite: True # optional diff --git a/platform-enterprise/seqerakit/templates/launch.yaml b/platform-enterprise/seqerakit/templates/launch.yaml new file mode 100644 index 000000000..745c63c04 --- /dev/null +++ b/platform-enterprise/seqerakit/templates/launch.yaml @@ -0,0 +1,23 @@ +## To see the full list of options available, run: "tw launch -h" +## The options will vary if you are launching a pipeline: +## 1. Pre-configured in the Seqera Launchpad with most options already specified +## 2. From source via a remote Git repository URL where you must explicitly specify all launch options +## Note: overwrite is not supported for "tw launch" + +launch: + - name: 'launchpad-launch' # required + workspace: 'your-organization/workspace-1' # required + pipeline: 'nf-core-rnaseq' # required + params: # optional + outdir: 's3://your-bucket/results' + + - name: 'remote-launch' # required + workspace: 'your-organization/workspace-1' # required + compute-env: 'your-aws-compute-environment' # required + pipeline: 'https://github.com/my_username/my_repo' # required + work-dir: 's3://your-bucket' # optional + profile: 'test' # optional + revision: 'main' # optional + params-file: './pipelines/params.yml' # optional + config: './pipelines/nextflow.config' # optional + pre-run: './pipelines/pre-run.txt' # optional diff --git a/platform-enterprise/seqerakit/templates/members.yaml b/platform-enterprise/seqerakit/templates/members.yaml new file mode 100644 index 000000000..f06b9f4af --- /dev/null +++ b/platform-enterprise/seqerakit/templates/members.yaml @@ -0,0 +1,5 @@ +## To see the full list of options available, run: "tw members add -h" +members: + - user: 'user@domain.com' # required + organization: 'your-organization' # required + overwrite: True # optional \ No newline at end of file diff --git a/platform-enterprise/seqerakit/templates/organizations.yaml b/platform-enterprise/seqerakit/templates/organizations.yaml new file mode 100644 index 000000000..6cf0518d2 --- /dev/null +++ b/platform-enterprise/seqerakit/templates/organizations.yaml @@ -0,0 +1,8 @@ +## To see the full list of options available, run: "tw organizations add -h" +organizations: + - name: 'your-organization' # required + full-name: 'Your Organization LLC' # required + description: 'Your organization description' # optional + location: 'Global' # optional + website: 'https://domain.com/' # optional + overwrite: True # optional \ No newline at end of file diff --git a/platform-enterprise/seqerakit/templates/participants.yaml b/platform-enterprise/seqerakit/templates/participants.yaml new file mode 100644 index 000000000..3c5e77da6 --- /dev/null +++ b/platform-enterprise/seqerakit/templates/participants.yaml @@ -0,0 +1,10 @@ +## To see the full list of options available, run: "tw participants add -h" +participants: + - name: 'your-team' # required + type: 'TEAM' # required + workspace: 'your-organization/workspace-1' # required + role: 'ADMIN' # required + - name: 'team-member@domain.com' # required + type: 'MEMBER' # required + workspace: 'your-organization/workspace-1' # required + role: 'LAUNCH' # required \ No newline at end of file diff --git a/platform-enterprise/seqerakit/templates/pipelines.yaml b/platform-enterprise/seqerakit/templates/pipelines.yaml new file mode 100644 index 000000000..017d72527 --- /dev/null +++ b/platform-enterprise/seqerakit/templates/pipelines.yaml @@ -0,0 +1,24 @@ +## To see the full list of options available, run: "tw pipelines add -h" +## The options required to create pipelines can be specified: +## 1. Explicitly in this file +## 2. Via a JSON file exported from Seqera with the "tw pipelines export" command +pipelines: + - name: 'your-first-pipeline' # required + workspace: 'your-organization/workspace-1' # required + description: 'Options specified in this file' # optional + compute-env: 'your-aws-compute-environment' # required + work-dir: 's3://your-bucket' # optional + profile: 'test' # optional + revision: 'main' # required + params: # optional + outdir: 's3://your-bucket/results' + config: './pipelines/nextflow.config' # optional + pre-run: './pipelines/pre-run.txt' # optional + url: 'https://github.com/my_username/my_repo' # required + overwrite: True # optional + - name: 'your-second-pipeline' # required + workspace: 'your-organization/workspace-1' # required + description: 'Options specified via JSON file' # optional + compute-env: 'your-aws-compute-environment' # required + file-path: './pipelines/pipeline.json' # required + overwrite: True # optional diff --git a/platform-enterprise/seqerakit/templates/secrets.yaml b/platform-enterprise/seqerakit/templates/secrets.yaml new file mode 100644 index 000000000..743faba3b --- /dev/null +++ b/platform-enterprise/seqerakit/templates/secrets.yaml @@ -0,0 +1,6 @@ +## To see the full list of options available, run: "tw secrets add -h" +secrets: + - name: 'your-secret' # required + workspace: 'your-organization/workspace-1' # required + value: 'your-secret-value' # required + overwrite: True # optional \ No newline at end of file diff --git a/platform-enterprise/seqerakit/templates/seqerakit-e2e.yaml b/platform-enterprise/seqerakit/templates/seqerakit-e2e.yaml new file mode 100644 index 000000000..fcdea506e --- /dev/null +++ b/platform-enterprise/seqerakit/templates/seqerakit-e2e.yaml @@ -0,0 +1,148 @@ +organizations: + - name: 'your-organization' # required + full-name: 'Your organization LLC' # required + description: 'Organization created E2E with seqerakit CLI scripting' # optional + location: 'Global' # optional + website: 'https://yourdomain.com/' # optional + overwrite: True # optional + +teams: + - name: 'pipelines-team' # required + organization: 'your-organization' # required + description: 'Pipelines team @ Your organization' # optional + members: # optional + - 'user1@domain.com' + - 'user2@domain.com' + - 'user3@domain.com' + overwrite: True # optional + +workspaces: + - name: 'workspace-1' # required + full-name: 'Workspace one' # required + organization: 'your-organization' # required + description: 'Workspace created E2E with seqerakit CLI scripting' # optional + visibility: 'PRIVATE' # optional + overwrite: True # optional + +participants: + - name: 'pipelines-team' # required + type: 'TEAM' # required + workspace: 'your-organization/workspace-1' # required + role: 'ADMIN' # required + overwrite: True # optional + - name: 'user@domain.com' # required + type: 'MEMBER' # required + workspace: 'your-organization/workspace-1' # required + role: 'LAUNCH' # required + overwrite: True # optional + +credentials: + - type: 'github' # required + name: 'github-credentials' # required + workspace: 'your-organization/workspace-1' # required + username: 'user1' # required + password: '$TOWER_GITHUB_PASSWORD' # required + overwrite: True # optional + - type: 'container-reg' # required + name: 'dockerhub-credentials' # required + workspace: 'your-organization/workspace-1' # required + username: 'user1' # required + password: '$DOCKERHUB_PASSWORD' # required + registry: 'docker.io' # required + overwrite: True # optional + - type: 'aws' # required + name: 'aws-credentials' # required + workspace: 'your-organization/workspace-1' # required + access-key: '$AWS_ACCESS_KEY_ID' # required + secret-key: '$AWS_SECRET_ACCESS_KEY' # required + assume-role-arn: '$AWS_ASSUME_ROLE_ARN' # required + overwrite: True # optional + +secrets: + - name: 'SENTIEON_LICENSE_BASE64' # required + workspace: 'your-organization/workspace-1' # required + value: '$SENTIEON_LICENSE_BASE64' # required + overwrite: True # optional + +compute-envs: +# To create a compute environment with options specified through YAML (AWS Example) + - type: aws-batch # required + config-mode: forge # required for AWS and Azure + name: 'your-aws-compute-environment' # required + workspace: 'your-organization/workspace-1' # required + credentials: 'your-aws-credentials' # required + region: 'eu-west-1' # required + work-dir: 's3://your-bucket' # required + provisioning-model: 'SPOT' # optional + fusion-v2: False # optional + wave: False # optional + fargate: False # optional + fast-storage: False # optional + instance-types: 'c6i,r6i,m6i' # optional, comma-separated list + no-ebs-auto-scale: True # optional + max-cpus: 500 # required + labels: 'label1,label2' # optional, comma-separated list + vpc-id: 'vpc-1234567890' # optional + subnets: 'subnet-1234567890,subnet-1234567891' # optional, comma-separated list + security-groups: 'sg-1234567890,sg-1234567891' # optional, comma-separated list + allow-buckets: 's3://your-bucket,s3://your-other-bucket' # optional, comma-separated list + wait: 'AVAILABLE' # optional + overwrite: False # optional + +# To create a compute environment from a JSON configuration file (AWS Example) + - name: 'your-aws-compute-environment' # required + workspace: 'your-organization/workspace-1' # required + credentials: 'your-aws-credentials' # required + wait: 'AVAILABLE' # optional + file-path: './compute-envs/your_aws_compute_environment.json' # required + overwrite: True # optional + +datasets: + - name: 'rnaseq_samples' # required + description: 'Samplesheet to run the nf-core/rnaseq pipeline end-to-end' # optional + header: true # optional + workspace: 'your-organization/workspace-1' # required + file-path: 'path/to/rnaseq_samples.csv' # required + overwrite: True # optional + +pipelines: +# To create a pipeline with options specified through YAML + - name: 'nf-core-rnaseq' # required + url: 'https://github.com/nf-core/rnaseq' # required + workspace: 'your-organization/workspace-1' # required + description: 'RNA sequencing analysis pipeline with gene/isoform counts and extensive quality control.' # optional + compute-env: 'your-aws-compute-environment' # required + work-dir: 's3://your-bucket/pipeline/working/directory' # optional + profile: 'test' # optional + revision: '3.12.0' # required + params: # optional + outdir: 's3://your-bucket/nf-core-rnaseq/results' + config: 'path/to/nextflow.config' # optional + pre-run: 'path/to/pipeline/pre_run.txt' # optional + overwrite: True # optional + +# To create a pipeline from a JSON configuration file + - name: 'nf-core-sarek' # required + workspace: 'your-organization/workspace-1' # required + compute-env: 'your-aws-compute-environment' # required + file-path: 'path/to/nf-core-sarek_pipeline.json' # required + overwrite: True # optional + +launch: +# To launch a preconfigured pipeline from your workspace Launchpad + - name: 'nf-core-rnaseq-launchpad' # required + workspace: 'your-organization/workspace-1' # required + pipeline: 'nf-core-rnaseq' # required + compute-env: 'your-aws-compute-environment' # optional (defaults to workspace primary compute environment) + +# To launch an unsaved pipeline, include the pipeline repository URL and launch details + - name: 'nf-core-rnaseq-remote' # required + workspace: 'your-organization/workspace-1' # required + pipeline: 'https://github.com/nf-core/rnaseq' # required + compute-env: 'your-aws-compute-environment' # optional (defaults to workspace primary compute environment) + work-dir: 's3://your-bucket/pipeline/working/directory' # optional + profile: 'test' # optional + revision: '3.12.0' # optional + params-file: 'path/to/pipelines/nf_core_rnaseq_params.yml' # optional + config: 'path/to/pipelines/nextflow.config' # optional + pre-run: 'path/to/pipelines/pre_run.txt' # optional diff --git a/platform-enterprise/seqerakit/templates/teams.yaml b/platform-enterprise/seqerakit/templates/teams.yaml new file mode 100644 index 000000000..b1258de24 --- /dev/null +++ b/platform-enterprise/seqerakit/templates/teams.yaml @@ -0,0 +1,8 @@ +## To see the full list of options available, run: "tw teams add -h" +teams: + - name: 'your-team' # required + organization: 'your-organization' # required + description: 'Your team description.' # optional + members: # optional + - 'team-member@domain.com' + overwrite: True # optional \ No newline at end of file diff --git a/platform-enterprise/seqerakit/templates/workspaces.yaml b/platform-enterprise/seqerakit/templates/workspaces.yaml new file mode 100644 index 000000000..ca9392340 --- /dev/null +++ b/platform-enterprise/seqerakit/templates/workspaces.yaml @@ -0,0 +1,8 @@ +## To see the full list of options available, run: "tw workspaces add -h" +workspaces: + - name: 'workspace-1' # required + full-name: 'Workspace one' # required + organization: 'your-organization' # required + description: 'Your workspace description' # optional + visibility: 'PRIVATE' # optional + overwrite: True # optional \ No newline at end of file diff --git a/platform-enterprise/seqerakit/yaml-configuration.md b/platform-enterprise/seqerakit/yaml-configuration.md new file mode 100644 index 000000000..126390f17 --- /dev/null +++ b/platform-enterprise/seqerakit/yaml-configuration.md @@ -0,0 +1,154 @@ +--- +title: "YAML configuration" +description: "seqerakit YAML configuration file options" +date: "21 Oct 2024" +tags: [seqerakit, cli, automation, yaml, configuration] +--- + +Seqerakit supports the creation and deletion of the following Seqera Platform resources, listed here with their respective Platform CLI resource names: + +- Pipeline actions: `actions` +- Compute environments: `compute-envs` +- Credentials: `credentials` +- Datasets: `datasets` +- Labels (including resource labels): `labels` +- Pipeline launch: `launch` +- Organization members: `members` +- Organizations: `organizations` +- Workspace and team participants: `participants` +- Pipelines: `pipelines` +- Pipeline secrets: `secrets` +- Teams: `teams` +- Workspaces: `workspaces` + +To determine the options to provide as definitions in your YAML file, run the Platform CLI help command for the resource you want to create. + +1. Retrieve CLI options: + + Obtain a list of available CLI options for defining your YAML file with the Platform CLI `help` command. For example, to add a pipeline to your workspace, view the options for adding a pipeline: + + ```shell-session + tw pipelines add -h + ``` + + ```shell-session + Usage: tw pipelines add [OPTIONS] PIPELINE_URL + + Add a workspace pipeline. + + Parameters: + * PIPELINE_URL Nextflow pipeline URL. + + Options: + * -n, --name= Pipeline name. + -w, --workspace= Workspace numeric identifier (TOWER_WORKSPACE_ID as default) or workspace reference as OrganizationName/WorkspaceName + -d, --description= Pipeline description. + --labels=[,...] List of labels seperated by coma. + -c, --compute-env= Compute environment name. + --work-dir= Path where the pipeline scratch data is stored. + -p, --profile=[,...] Comma-separated list of one or more configuration profile names you want to use for this pipeline execution. + --params-file= Pipeline parameters in either JSON or YML format. + --revision= A valid repository commit Id, tag or branch name. + ... + ``` + +1. Define key-value pairs in YAML: + + Translate each CLI option into a key-value pair in the YAML file. The structure of your YAML file should reflect the hierarchy and format of the CLI options. For example: + + ```yaml + pipelines: + - name: 'my_first_pipeline' + url: 'https://github.com/username/my_pipeline' + workspace: 'my_organization/my_workspace' + description: 'My test pipeline' + labels: 'yeast,test_data' + compute-env: 'my_compute_environment' + work-dir: 's3://my_bucket' + profile: 'test' + params-file: '/path/to/params.yaml' + revision: '1.0' + ``` + + In this example: + + - The keys (`name`, `url`, `workspace`, and so forth) are the keys derived from the CLI options. + - The corresponding values are user-defined. + +#### Best practices + +- The indentation and structure of the YAML file must be correct — YAML is sensitive to formatting. +- Use quotes around strings that contain special characters or spaces. +- To list multiple values (such as multiple `labels`, `instance-types`, or `allow-buckets`), separate values with commas. This is shown with `labels` in the preceding example. +- For complex configurations, see [Templates](./templates). + +### Templates + +See [Templates](./templates) for YAML file templates for each of the entities that can be created in Seqera. + +### YAML Configuration Options + +Some options handled specially by `seqerakit` or not exposed as `tw` CLI options can be provided in your YAML configuration file. + +#### Pipeline parameters using `params` and `params-file` + +To specify pipeline parameters, use `params:` to specify a list of parameters or `params-file:` to point to a parameters file. + +For example, to specify pipeline parameters within your YAML: + +```yaml +params: + outdir: 's3://path/to/outdir' + fasta: 's3://path/to/reference.fasta' +``` + +To specify a file containing pipeline parameters: + +```yaml +params-file: '/path/to/my/parameters.yaml' +``` + +Or provide both: + +```yaml +params-file: '/path/to/my/parameters.yaml' +params: + outdir: 's3://path/to/outdir' + fasta: 's3://path/to/reference.fasta' +``` + +:::note +If duplicate parameters are provided, the parameters provided as key-value pairs inside the `params` nested dictionary of the YAML file will take precedence **over** values in the `params-file`. +::: + +#### Overwrite + +For every entity defined in your YAML file, specify `overwrite: True` to overwrite any existing Seqera entities of the same name. + +Seqerakit will first check to see if the name of the entity exists. If so, it will invoke a `tw delete` command before attempting to create it based on the options defined in the YAML file. + +```shell-session +DEBUG:root: Overwrite is set to 'True' for organizations + +DEBUG:root: Running command: tw -o json organizations list +DEBUG:root: The attempted organizations resource already exists. Overwriting. + +DEBUG:root: Running command: tw organizations delete --name $SEQERA_ORGANIZATION_NAME +DEBUG:root: Running command: tw organizations add --name $SEQERA_ORGANIZATION_NAME --full-name $SEQERA_ORGANIZATION_NAME --description 'Example of an organization' +``` + +#### Specify JSON configuration files with `file-path` + +The Platform CLI allows the export and import of entities through JSON configuration files for pipelines and compute environments. To use these files to add a pipeline or compute environment to a workspace, use the `file-path` key to specify a path to a JSON configuration file. + +An example of the `file-path` option is provided in the [compute-envs.yaml](./templates/compute-envs.yaml) template: + +```yaml +compute-envs: + - name: 'my_aws_compute_environment' # required + workspace: 'my_organization/my_workspace' # required + credentials: 'my_aws_credentials' # required + wait: 'AVAILABLE' # optional + file-path: './compute-envs/my_aws_compute_environment.json' # required + overwrite: True +``` diff --git a/platform-enterprise/sidebar.json b/platform-enterprise/sidebar.json new file mode 100644 index 000000000..a80a47dc0 --- /dev/null +++ b/platform-enterprise/sidebar.json @@ -0,0 +1,262 @@ +{ + "platformSidebar": [ + "platform-enterprise", + { + "type": "category", + "label": "Installation", + "collapsed": true, + "link": { + "type": "doc", + "id": "enterprise/overview" + }, + "items": [ + { + "type": "category", + "label": "Prerequisites", + "collapsed": true, + "items": [ + "enterprise/prerequisites/aws", + "enterprise/prerequisites/azure", + "enterprise/prerequisites/gcp", + "enterprise/prerequisites/on-prem" + ] + }, + { + "type": "category", + "label": "Configuration", + "collapsed": true, + "items": [ + "enterprise/configuration/overview", + "enterprise/configuration/authentication", + "enterprise/configuration/aws_parameter_store", + "enterprise/configuration/networking", + "enterprise/configuration/reverse_proxy", + "enterprise/configuration/ssl_tls", + "enterprise/configuration/pipeline_optimization", + "enterprise/configuration/wave", + "enterprise/studios" + ] + }, + { + "type": "category", + "label": "Deployment", + "collapsed": true, + "items": [ + "enterprise/docker-compose", + "enterprise/kubernetes", + "enterprise/testing", + "enterprise/upgrade" + ] + }, + { + "type": "category", + "label": "Advanced", + "collapsed": true, + "items": [ + "enterprise/advanced-topics/db-docker-to-RDS", + "enterprise/advanced-topics/use-iam-role", + "enterprise/advanced-topics/custom-launch-container", + "enterprise/advanced-topics/firewall-configuration", + "enterprise/advanced-topics/seqera-container-images" + ] + }, + "enterprise/general_troubleshooting" + ] + }, + { + "type": "category", + "label": "Tutorials", + "collapsed": true, + "items": [ + "getting-started/rnaseq", + "getting-started/proteinfold", + "getting-started/studios", + "tutorials/retry-strategy" + ] + }, + { + "type": "category", + "label": "Get started", + "collapsed": true, + "items": [ + "getting-started/deployment-options", + "getting-started/workspace-setup", + "getting-started/quickstart-demo/add-pipelines", + "getting-started/quickstart-demo/add-data", + "getting-started/quickstart-demo/launch-pipelines", + "getting-started/quickstart-demo/monitor-runs", + "getting-started/quickstart-demo/automation" + ] + }, + { + "type": "category", + "label": "Credentials", + "items": [ + "credentials/overview", + "git/overview", + { + "type": "category", + "label": "Container registry credentials", + "items": [ + "credentials/docker_hub_registry_credentials", + "credentials/aws_registry_credentials", + "credentials/azure_registry_credentials", + "credentials/google_registry_credentials", + "credentials/quay_registry_credentials", + "credentials/gitlab_registry_credentials", + "credentials/github_registry_credentials", + "credentials/gitea_registry_credentials" + ] + }, + "credentials/managed_identities", + "credentials/ssh_credentials", + "credentials/agent_credentials" + ] + }, + { + "type": "category", + "label": "Compute", + "items": [ + "compute-envs/overview", + { + "type": "category", + "label": "Cloud", + "items": [ + "compute-envs/aws-batch", + "compute-envs/azure-batch", + "compute-envs/google-cloud-batch", + "compute-envs/google-cloud-lifesciences", + "compute-envs/eks", + "compute-envs/gke", + "monitoring/cloud-costs" + ] + }, + "compute-envs/k8s", + "compute-envs/hpc", + "resource-labels/overview", + "supported_software/fusion/overview", + "supported_software/agent/overview", + "supported_software/dragen/overview", + { + "type": "category", + "label": "Advanced options", + "items":[ + "enterprise/advanced-topics/manual-aws-batch-setup", + "enterprise/advanced-topics/manual-azure-batch-setup" + ] + } + ] + }, + { + "type": "category", + "label": "Data", + "items": [ + "data/data-explorer", + "data/datasets" + ] + }, + { + "type": "category", + "label": "Pipelines", + "items": [ + "pipeline-schema/overview", + "launch/launchpad", + "labels/overview", + { + "type": "category", + "label": "Monitor runs", + "items": [ + "monitoring/overview", + "monitoring/dashboard", + "monitoring/run-details" + ] + }, + "reports/overview", + "launch/cache-resume", + "secrets/overview", + "pipeline-optimization/overview", + "launch/advanced" + ] + }, + { + "type": "category", + "label": "Studios", + "items": [ + "studios/overview", + "studios/custom-envs", + "studios/managing" + ] + }, + { + "type": "category", + "label": "Administration", + "items": [ + "administration/overview", + "orgs-and-teams/organizations", + "orgs-and-teams/workspace-management", + "orgs-and-teams/roles", + "monitoring/audit-logs" + ] + }, + { + "type": "category", + "label": "Developer tools", + "collapsed": true, + "items": [ + "api/overview", + { + "type": "category", + "label": "CLI", + "collapsed": true, + "items": [ + "cli/overview", + "cli/installation", + "cli/commands" + ] + }, + { + "type": "category", + "label": "Seqerakit", + "collapsed": true, + "items": [ + "seqerakit/installation", + "seqerakit/commands", + "seqerakit/yaml-configuration", + "seqerakit/templates" + ] + }, + "pipeline-actions/overview" + ] + }, + { + "type": "category", + "label": "Reference", + "collapsed": true, + "items": [ + "functionality_matrix/overview", + "data-privacy/overview", + "limits/overview" + ] + }, + { + "type": "link", + "label": "Enterprise changelog", + "href": "/changelog/tags/seqera-enterprise" + }, + { + "type": "category", + "label": "Troubleshooting", + "collapsed": true, + "items": [ + "troubleshooting_and_faqs/troubleshooting", + "troubleshooting_and_faqs/api_and_cli", + "troubleshooting_and_faqs/studios_troubleshooting", + "troubleshooting_and_faqs/nextflow", + "troubleshooting_and_faqs/aws_troubleshooting", + "troubleshooting_and_faqs/azure_troubleshooting", + "troubleshooting_and_faqs/workspaces_troubleshooting", + "troubleshooting_and_faqs/datasets_troubleshooting" + ] + } + ] + } diff --git a/platform-enterprise/studios/_images/studios_collaboration.png b/platform-enterprise/studios/_images/studios_collaboration.png new file mode 100644 index 000000000..1bdd95a7a Binary files /dev/null and b/platform-enterprise/studios/_images/studios_collaboration.png differ diff --git a/platform-enterprise/studios/_images/studios_notebook_fusion.png b/platform-enterprise/studios/_images/studios_notebook_fusion.png new file mode 100644 index 000000000..758698fff Binary files /dev/null and b/platform-enterprise/studios/_images/studios_notebook_fusion.png differ diff --git a/platform-enterprise/studios/custom-envs.md b/platform-enterprise/studios/custom-envs.md new file mode 100644 index 000000000..190c527ac --- /dev/null +++ b/platform-enterprise/studios/custom-envs.md @@ -0,0 +1,133 @@ +--- +title: "Custom environments" +description: "Custom environments for Studios" +date: "1 Oct 2024" +tags: [environments, custom, studio, studio] +--- + +In addition to the Seqera-provided container template images, you can provide your own custom container environments by augmenting the Seqera-provided images with a list of Conda packages or by providing your own base container template image. + +Studios uses the [Wave][wave-home] service to build custom container template images. + +## Conda packages + +### Prerequisites + +- Wave must be configured. For more information, see [Wave containers][wave]. + + :::note + To augment Seqera-provided images, Enterprise deployments must either allow access to the public Wave server, or self-host their own Wave server. + ::: + +### Conda package syntax {#conda-package-syntax} + +When adding a new Studio, you can customize its configuration to install a list of Conda packages to the template image. The supported schema is identical to that used by the Conda `environment.yml` file. For more information on the Conda environment file, see [Creating an environment file manually][env-manually]. + +```yaml title="Example environment.yml file" +channels: + - conda-forge +dependencies: + - numpy + - pip: + - matplotlib + - seaborn +``` + +To create a Studio with custom Conda packages, see [Add a Studio][add-s]. + +## Custom container template image {#custom-containers} + +For advanced use cases, you can build your own container template image. + +:::note +Public container registries are supported by default. Amazon Elastic Container Registry (ECR) is the only supported private container registry. +::: + +### Prerequisites + +- Access to a container image repository, either a public container registry or a private Amazon ECR repository +- A container template image + +### Dockerfile configuration {#dockerfile} + +For your custom template container image, you must use a Seqera-provided base image and include several additional build steps for compatibility with Studios. + +To create a Studio with a custom template image, see [Add a Studio][add-s]. + +#### Ports + +The container must use the value of the `CONNECT_TOOL_PORT` environment variable as the listening port for any interactive software you include in your custom container. + +#### Signals + +Upon termination, the container's main process must handle the `SIGTERM` signal and perform any necessary cleanup. After a 30-second grace period, the container receives the `SIGKILL` signal. + +#### Minimal Dockerfile + +The minimal Dockerfile includes directives to accomplish the following: + +- Pull a Seqera-provided base image with prerequisite binaries +- Copy the `connect` binary into the build +- Set the container entry point + +Customize the following Dockerfile to include any additional software that you require: + +```docker title="Minimal Dockerfile" +# Add a default Connect client version. Can be overridden by build arg +ARG CONNECT_CLIENT_VERSION="0.8" + +# Seqera base image +FROM public.cr.seqera.io/platform/connect-client:${CONNECT_CLIENT_VERSION} AS connect + +# 1. Add connect binary +COPY --from=connect /usr/bin/connect-client /usr/bin/connect-client + +# 2. Install connect dependencies +RUN /usr/bin/connect-client --install + +# 3. Configure connect as the entrypoint +ENTRYPOINT ["/usr/bin/connect-client", "--entrypoint"] +``` + +For example, to run a basic Python-based HTTP server, build a container from the following Dockerfile. When a Studio runs the custom template environment, the value for the `CONNECT_TOOL_PORT` environment variable is provided dynamically. + +```docker title="Example Dockerfile with Python HTTP server" +# Add a default Connect client version. Can be overridden by build arg +ARG CONNECT_CLIENT_VERSION="0.8" + +# Seqera base image +FROM public.cr.seqera.io/platform/connect-client:${CONNECT_CLIENT_VERSION} AS connect + +FROM ubuntu:20.04 +RUN apt-get update --yes && apt-get install --yes --no-install-recommends python3 + +COPY --from=connect /usr/bin/connect-client /usr/bin/connect-client +RUN /usr/bin/connect-client --install +ENTRYPOINT ["/usr/bin/connect-client", "--entrypoint"] + +CMD ["/usr/bin/bash", "-c", "python3 -m http.server $CONNECT_TOOL_PORT"] +``` +### Getting started with custom containers template images + +You can review a series of example custom studio environment container template images [here](custom-studios-examples). + +### Inspect container augmentation build status {#build-status} + +You can inspect the progress of a custom container template image build, including any errors if the build fails. A link to the [Wave service][wave-home] container build report is always available for builds. + +If the custom container template image build fails, the Studio session has the **build-failed** status. The details about build failures are available when inspecting the session details in the **Error report** tab. + +To inspect the status of an ongoing build, or a successful or failed build, complete the following steps: + +1. Select the **Studios** tab in Seqera Platform. +1. From the list of sessions, select the name of the session with **building** or **build-failed** status that you want to inspect, and then select **View**. +1. In the **Details** tab, scroll to **Build reports** and select **Summary** to open the Wave service container build report for your build. +1. Optional: If the build failed, select the **Error report** tab to view the errors associated with the build failure. + + +{/* links */} +[add-s]: ./managing#add-a-studio +[wave]: ../../version-24.2/enterprise/configuration/wave +[custom-studios-examples]: https://github.com/seqeralabs/custom-studios-examples +[wave-home]: https://seqera.io/wave/ +[env-manually]: https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-file-manually diff --git a/platform-enterprise/studios/managing.md b/platform-enterprise/studios/managing.md new file mode 100644 index 000000000..c5a089722 --- /dev/null +++ b/platform-enterprise/studios/managing.md @@ -0,0 +1,212 @@ +--- +title: "Manage Studio sessions" +description: "Manage Studio sessions." +date: "6 February 2025" +tags: [data, session, studios] +--- + +Select the **Studios** tab in Platform to view all Studio sessions. The list includes the name, cloud provider, analysis template, region, author, creation date, and status of each session. In this view, you can add a new Studio and start, stop, or connect to an existing session. Dynamically filter the list of Studios using the search bar to search by name (default), author username, or compute environment name. Select a Studio to open a detailed view that displays configuration information. + +## Add a Studio + +This functionality is available to users with the **Maintain** role and above. + +To add a new Studio, select the **Studios** tab in your workspace then select **Add Studio**. + +### Compute and Data + + 1. Customize the following fields: + - **Select compute environment**: Only AWS Batch (without Fargate enabled) is supported. + - **Define resource labels**: Any [resource label](../labels/overview) already defined for the compute environment is added by default. Additional custom resource labels can be added or removed as needed. + - **CPUs allocated**: The default allocation is 2 CPUs. + - **GPUs allocated**: Available only if the selected compute environment has GPU support enabled. For more information about GPUs on AWS, see [Amazon ECS task definitions for GPU workloads][aws-gpu]. The default allocation is 0 GPUs. + - **Maximum memory allocated**: The default allocation is 8192 MiB of memory. + 1. Select **Mount data**, and then from the **Mount data** modal, select data to mount. Select **Mount data** to confirm your selection. + + :::tip + Datasets are mounted using the [Fusion file system](https://docs.seqera.io/fusion) and are available at `/workspace/data/`. Mounted data doesn't need to match the compute environment or region of the cloud provider of the Studio. However, this might cause increased costs or errors. + ::: + + 1. Select **Next**. + +### General config + + - To use one of the Seqera-provided container templates, complete the following steps: + + 1. Customize the following fields: + - **Container template**: Select a template from the dropdown list. + - **Studio name** + - Optional: **Description** + 1. Optional: Select **Install Conda packages** to enter or upload a list of Conda packages to include with the Studio. For more information on the syntax for specifying Conda packages, see [Conda package syntax][conda-syntax]. + 1. **Collaboration**: By default, all Studios are collaborative. This means all workspace users with the launch role and above can connect to the session. You can toggle **Private** on which means that only the workspace user who created the Studio can connect to it. When **Private** is on, workspace administrators can still start, stop, and delete sessions but cannot connect to them. + 1. **Session lifespan**: Depending on your workspace settings, you may be able to choose between the following options. + - **Stop the session automatically after a predefined period of time.** + - If there is an existing defined session lifespan workspace setting, you won't be able to edit this. If no workspace setting is defined, you can edit this field. The minimum value is 1 hour and the maximum is 120 hours. The default value is 8 hours. + - If you change the default value, the change applies only to that session. Once you've stopped the session, the value returns to default. + - **Keep the session running until it's manually stopped or encounters an error which ends the session.** + 1. Select **Next**. + + - To use a custom container template image that you supply, complete the following steps: + + 1. Customize the following fields: + - **Container template**: Select **Prebuilt container image** from the list. For information about providing your own template, see [Custom container template image][custom-image]. + + :::tip + If you select the **Prebuilt container image** template, you cannot select **Install Conda packages** as these options are mutually exclusive. + ::: + + - **Studio name** + - Optional: **Description** + 1. Select **Next**. + +### Summary + + 1. Ensure that the specified configuration is correct. + 1. Save your configuration: + - To not immediately start the session, select **Add only**. + - If you want to save and immediately start the Studio, select **Add and start**. + +You'll be returned to the Studios landing page that displays the list of Studio sessions in your workspace. Select a Studio to inspect its configuration details. The Studio you created will be listed with a status of either **stopped** or **starting**, based on whether you elected to **Add** it or to **Add and start** a session as well. + +:::note +By default, sessions only have read permissions to mounted data paths. Write permissions can be added for specific cloud storage buckets during the compute environment configuration by defining additional **Allowed S3 Buckets**. This means that data can be written from the session back to the cloud storage path(s) mounted. If a new file is uploaded to the cloud storage bucket path while a session is running, the file may not be available to the session immediately. +::: + +## Start a Studio session + +This functionality is available to users with the **Maintain** role and above. + +A Studio session needs to be started before you can connect to it. Select the three dots next to the status message for the Studio you want to start, then select **Start**. You can optionally change the configuration of the Studio, then select **Start in new tab**. A new browser tab will open that displays the startup state of the session. Once the session is running, you can connect to it. A session will run until it is stopped manually or it encounters a technical issue. + +:::note +A session consumes resources until it's **stopped**. +::: + +Once a Studio session is in a **running** state, you can connect to it, obtain a public link to the session to share with collaborators inside your workspace, and stop it. + +## Start an existing Studio as a new session + +This functionality is available to users with the **Maintain** role and above. + +You can use any existing Studio as the foundation for adding a new session. This functionality creates a clone of the session, including its checkpoint history, preserving any modifications made to the original Studio. When you create a session in this way, future changes are isolated from the original session. + +When adding a new session from an existing session or checkpoint, the following fields cannot be changed: + +- **Studio template** +- **Original Studio session and checkpoint** +- **Compute environment** +- **Installed Conda packages** +- **Session duration** + +To add a new session from an existing **stopped** session, complete the steps described in [Add a Studio](#add-a-studio). + +Additionally, you can add a new session from any existing Studio checkpoint except the currently running checkpoint. From the detail page, select the **Checkpoints** tab and in the **Actions** column, select **Add as new Studio**. This is useful for interactive analysis experimentation without impacting the state of the original Studio. + +## Connect to a Studio + +This functionality is available to all user roles excluding the **View** role. + +To connect to a running session, select the three dots next to the status message and choose **Connect**. + +:::warning +An active connection to a session will not prevent administrative actions that might disrupt that connection. For example, a session can be stopped by another workspace user while you are active in the session, the underlying credentials can be changed, or the compute environment can be deleted. These are independent actions and the user in the session won't be alerted to any changes - the only alert will be a server connection error in the active session browser tab. +::: + +Once connected, the session will display the status of **running** in the list, and any connected user's avatar will be displayed under the status in both the list of Studios and in each Studio's detail page. + +## Collaborate in a Studio session + +This functionality is available to all user roles excluding the **View** role. + +To share a link to a running session with collaborators inside your workspace, select the three dots next to the status message for the session you want to share, then select **Copy Studio URL**. Using this link, other authenticated users can access the session directly. + +![](./_images/studios_collaboration.png) + +:::note +Collaborators need valid workspace permissions to connect to the running Studio. +::: + +## Stop a Studio session + +This functionality is available to users with the **Maintain** role and above. + +To stop a running session, select the three dots next to the status message and then select **Stop**. The status will change from **running** to **stopped**. When a session is stopped, the compute resources it's using are deallocated. You can stop a session at any time, except when it is **starting**. + +Stopping a running session creates a new checkpoint. + +## Restart a stopped session + +This functionality is available to users with the **Maintain** role and above. When you restart a stopped session, the session uses the most recent checkpoint. + +## Start a new session from a checkpoint + +This functionality is available to users with the **Maintain** role and above. + +You can start a new session from an existing stopped session. This will inherit the history of the parent checkpoint state. From the list of **stopped** Studios in your workspace, select the three dots next to the status message for the Studio you want to start and select **Add as new**. Alternatively, select the **Checkpoints** tab on the detail page, select the three dots in the **Actions** column, and then select **Add as new Studio** to start a new session. + +## Delete a Studios + +This functionality is available to users with the **Maintain** role and above. + +You can only delete a Studio when it's **stopped**. Select the three dots next to the status message and then select **Delete**. The Studio is deleted immediately and can't be recovered. + +## Limit Studio access to a specific cloud bucket subdirectory {#cloud-bucket-subdirectory} + +For a cloud bucket that is writeable, as enabled by including the bucket in a compute environment's **Allowed S3 bucket** list, you can limit write access to that bucket from within a Studio session. + +To limit read-write access to a specific subdirectory, complete the following steps: + +1. From your Seqera instance, select the **Data Explorer** tab. +1. Select **Add Cloud Bucket**. +1. Complete the following fields: + - **Provider**: Select your cloud provider. + - **Bucket path**: Enter the full path to the subdirectory of the bucket that you want to use with your Studio, such as `s3://1000genomes/data`. + - **Name**: Enter a name for this cloud bucket, such as *1000-genomes-data-dir*, to indicate the bucket name and subdirectory path. + - **Credentials**: Select your provider credentials. + - Optional: **Description**: Enter a description for this cloud bucket. +1. Select **Add** to create a custom data-link to a subdirectory in the cloud bucket. + +When defining a new Studio, you can configure the **Mounted data** by selecting the custom data-link created by the previous steps. + +## Migrate a Studio from an earlier container image template + +As Studios matures and new versions of JupyterLab, R-IDE, Visual Studio Code, and Xpra are released, new Seqera-provided image templates will be periodically released including updated versions of Seqera Connect. The most recent container template images will be tagged `recommended` and earlier template images will be tagged `deprecated`. + +:::info +Temporary container templates tagged with `experimental` are not supported and should not be used in production environments. +::: + +To migrate a Studio to a more recent container version and Seqera Connect: + +:::tip +Always use the `recommended` tagged template image for new Studios. Only two earlier minor versions of [Seqera Connect][connect] are supported by Seqera. +::: + +1. Select the Studio that you wish to migrate from the list of Studios. +1. Select **Add as new**. By default this selects the latest session checkpoint. +1. In the **General config** section, change the image template selection in the dropdown list to use the `latest` tagged version of the same interactive environment. +1. For the **Summary** section, ensure that the specified configuration is correct. +1. Immediately start the new, duplicated Studio session by selecting **Add and start**. +1. **Connect** to the new running Studio session. + 1. Make a note of any package or environment errors displayed. +1. **Stop** the running Studio session. +1. Go back to the original Studio: + 1. **Start** the session. + 1. **Connect** to the session. + 1. Uninstall any packages related to the errors: + 1. JupyterLab: Execute `!pip uninstall ` or `apt remove ` to uninstall system-level packages. + 1. R-IDE: Execute `uninstall.packages("")` to uninstall R packages or `apt remove ` to uninstall system-level packages. + 1. Visual Studio Code: Select the **Manage** gear button at the right of an extension entry and then choose **Uninstall** from the dropdown menu. + 1. Xpra: Use `apt remove ` to uninstall system-level packages. + 1. **Stop** the running Studio session. A new checkpoint is created. +1. Repeat Step 1 **Add as new** using the new, most recent created checkpoint from the steps above. + +:::warning +Due to the nature of fully customizable, containerized applications, users can modify environments leading to a variety of configurations and outcomes. This is therefore a best effort to support Studio migrations and a successful outcome is not guaranteed. +::: + +{/* links */} +[aws-gpu]: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-gpu.html +[conda-syntax]: ./custom-envs#conda-package-syntax +[custom-image]: ./custom-envs#custom-containers +[connect]: ./overview#container-image-templates diff --git a/platform-enterprise/studios/overview.md b/platform-enterprise/studios/overview.md new file mode 100644 index 000000000..c6f06f30f --- /dev/null +++ b/platform-enterprise/studios/overview.md @@ -0,0 +1,188 @@ +--- +title: "Overview" +description: "Studios public preview." +date: "6 February 2025" +tags: [studios] +--- + +Studios is a unified platform where you can host a combination of container images and compute environments for interactive analysis using your preferred tools, like JupyterLab, an R-IDE, Visual Studio Code IDEs, or Xpra remote desktops. Each Studio session is an individual interactive environment that encapsulates the live environment for dynamic data analysis. + +On Seqera Cloud, the free tier permits only one running Studio session at a time. To run simultaneous sessions, [contact Seqera][contact] for a Seqera Cloud Pro license. + +:::note +Studios in Enterprise is not enabled by default. You can enable Studios in the [environment variables configuration](../../version-25.1/enterprise/studios). +::: + +## Requirements + +Before you get started, you need the following: + +- Valid credentials to access your cloud storage data resources. +- At least the **Maintain** role set of permissions. +- A compute environment with sufficient resources. This is highly dependent on the volume of data you wish to process, but we recommended at least 2 CPUs allocated with 8192 MB of memory. See [AWS Batch](../compute-envs/aws-batch) for more information about compute environment configuration. +- [Data Explorer](../data/data-explorer) is enabled. + +:::note +Currently, Studios only supports AWS Batch compute environments that **do not** have Fargate enabled. +::: + +## Limitations + +If you configured your AWS Batch compute environment to include an EFS file system with **EFS file system > EFS mount path**, the mount path must be explicitly specified. The mount path cannot be the same as your compute environment work directory. If the EFS file system is mounted as your compute environment work directory, snapshots cannot be saved and sessions fail. To mount an EFS volume in a Studio session (for example, if your organization has a custom, managed, and standardized software stack in an EFS volume), add the EFS volume to the compute environment (system ID and mount path). The volume will be available at the specified mount path in the session. + +For more information on AWS Batch configuration, see [AWS Batch][aws-batch]. + +## Container image templates + +There are four container image templates provided: JupyterLab, R-IDE, Visual Studio Code, and Xpra. The image templates install a very limited number of packages when the Studio session container is built. You can install additional packages as needed during a Studio session. + +The image template tag includes the version of the analysis application, an optional incompatibility flag, and the Seqera Connect version. Connect is the proprietary Seqera web server client that manages communication with the container. The tag string looks like this: + +```ignore title="Image template tag" +-[u]- +``` + +- ``: Third-party analysis application that follows its own semantic versioning `..`, such as `4.2.5` for JupyterLab. +- ``: Optional analysis application update version, such as `u1`, for instances where a backwards incompatible change is introduced. +- ``: Seqera Connect client version, such as `0.7` or `0.7.0`. + +Additionally, the Seqera Connect client version string has the format: + +```ignore title="Seqera version tag subset" +.. +``` + +- ``: Signifies major version changes in the underlying Seqera Connect client. +- ``: Signifies breaking changes in the underlying Seqera Connect client. +- ``: Signifies patch (non-breaking) changes in the underlying Seqera Connect client. + +When pushed to the container registry, an image template is tagged with the following tags: + +- `-.`, such as `4.2.3-0.7`. When adding a new container template image this is the tag displayed in Seqera Platform. +- `-..`, such as `4.2.3-0.7.1`. + +To view the latest versions of the images, see [public.cr.seqera.io](https://public.cr.seqera.io/). You can also augment the Seqera-provided image templates or use your own custom container image templates. This approach is recommended for managing reproducible analysis environments. For more information, see [Custom environments][custom-envs]. + +**JupyterLab 4.2.5** + +The default user is the `root` account. The following [conda-forge](https://conda-forge.org/) packages are available by default: + +- `python=3.13.0` +- `pip=24.2` +- `jedi-language-server=0.41.4` +- `jupyterlab=4.2.5` +- `jupyter-collaboration=1.2.0` +- `jupyterlab-git=0.50.1` +- `jupytext=1.16.4` +- `jupyter-dash=0.4.2` +- `ipywidgets=7.8.4` +- `pandas[all]=2.2.3` +- `scikit-learn=1.5.2` +- `statsmodels=0.14.4` +- `itables=2.2.2` +- `seaborn[stats]=0.13.2` +- `altair=5.4.1` +- `plotly=5.24.1` +- `r-ggplot2=3.5.1` +- `nb_black=1.0.7` +- `qgrid=1.3.1` + +To install additional Python packages during a running Studio session, execute `!pip install ` commands in your notebook environment. Additional system-level packages can be installed in a terminal window using `apt install `. + +To see the list of all JupyterLab image templates available, including security scan results or to inspect the container specification, see [public.cr.seqera.io/repo/platform/data-studio-jupyter][ds-jupyter]. + +**R-IDE 4.4.1** + +The default user is the `root` account. To install R packages during a running Studio session, execute `install.packages("")` commands in your notebook environment. Additional system-level packages can be installed in a terminal window using `apt install `. + +To see the list of all R-IDE image templates available, including security scan results or to inspect the container specification, see [https://public.cr.seqera.io/repo/platform/data-studio-ride][ds-ride]. + +**Visual Studio Code 1.93.1** + +[Visual Studio Code][def-vsc] is an integrated development environment (IDE) that supports many programming languages. The default user is the `root` account. The container template image ships with the latest stable version of [Nextflow] and the [VS Code extension for Nextflow][nf-lang-server] to make troubleshooting Nextflow workflows easier. To install additional extensions during a running Studio session, select **Extensions**. Additional system-level packages can be installed in a terminal window using `apt install `. + +To see the list of all Visual Studio Code image templates available, including security scan results or to inspect the container specification, see [public.cr.seqera.io/platform/data-studio-vscode][ds-vscode]. + +**Xpra 6.2.0** + +[Xpra][def-xpra], known as _screen for X_, allows you to run X11 programs by giving you remote access to individual graphical applications. The container template image also installs NVIDIA Linux x64 (AMD64/EM64T) drivers for Ubuntu 22.04 for running GPU-enabled applications. To use these GPU drivers, your compute environment must specify GPU instance families. + +The default user is the `root` account. The image is based on `ubuntu:jammy`. Additional system-level packages can be installed during a running Studio session in a terminal window using `apt install `. + +To see the list of all Xpra image templates available, including security scan results or to inspect the container specification, see [public.cr.seqera.io/repo/platform/data-studio-xpra][ds-xpra]. + +## Studio session statuses + +Sessions have the following possible statuses: + +- **building**: When a custom environment is building the template image for a new session. The [Wave] service performs the build action. For more information on this status, see [Inspect custom container template build status][build-status]. +- **build-failed**: When a custom environment build has failed. This is a non-recoverable error. Logs are provided to assist with troubleshooting. For more information on this status, see [Inspect custom container template build status][build-status]. +- **starting**: The Studio is initializing. +- **running**: When a session is **running**, you can connect to it, copy the URL, or stop it. In addition, the session can continue to process requests/run computations in the absence of an ongoing connection. +- **stopping**: The recently-running session is in the process of being stopped. +- **stopped**: When a session is stopped, the associated compute resources are deallocated. You can start or delete the session when it's in this state. +- **errored**: This state most often indicates that there has been an error starting the session but it is in a **stopped** state. + +:::note +There might be errors reported by the session itself but these will be overwritten with a **running** status if the session is still running. +::: + +## Studio session data links + +You can configure a Studio session to mount one or more data links, where cloud buckets that you have configured in your compute environment are read-only, or read-write available to the session. + +If your compute environment includes a cloud bucket in the **Allowed S3 bucket** list, the bucket is writeable from within a session when that bucket is included as a data link. + +You can limit write access to just a subdirectory of a bucket by creating a custom data-link for only that subdirectory in Data Explorer, and then mount the data-link to the Studio session. For example, if you have the following S3 buckets: + +- `s3://biopharmaXs`: Entire bucket +- `s3://biopharmaX/experiments/project-A/experiment-1/data`: Subdirectory to mount in a Studio session + +Mounted data links are exposed at the `/workspace/data/` directory path inside a Studio session. For example, the bucket subdirectory `s3://biopharmaX/experiments/project-A/experiment-1/data`, when mounted as a data link, is exposed at `/workspace/data/biopharmaxs-project-a-experiment-1-data`. + +For more information, see [Limit Studio access to a specific cloud bucket subdirectory][cloud-bucket-subdirectory]. + +## Studio session checkpoints + +When starting a Studio session, a *checkpoint* is automatically created. A checkpoint saves all changes made to the root filesystem and stores it in the attached compute environment's pipeline work directory in the `.studios/checkpoints` folder with a unique name. The current checkpoint is updated every five minutes during a session. + +:::warning +Checkpoints vary in size depending on libraries installed in your session environment. This can potentially result in many large files stored in the compute environment's pipeline work directory and saved to cloud storage. This storage will incur costs based on the cloud provider. Due to the architecture of Studios, you cannot delete any checkpoint files to save on storage costs. Deleting a Studio session's checkpoints will result in a corrupted Studio session that cannot be started nor recovered. +::: + +When you stop and start a session, or start a new session from a previously created checkpoint, changes such as installed software packages and configuration files are restored and made available. Changes made to mounted data are not included in a checkpoint. + +Checkpoints can be renamed and the name has to be unique per Studio. Spaces in checkpoint names are converted to underscores automatically. + +Checkpoint files in the compute environment work directory may be shared by multiple Studios. Each checkpoint file is cleaned up asynchronously after the last Studio referencing the checkpoint is deleted. + +:::note +The cleanup process is a best effort and not guaranteed. Seqera attempts to remove the checkpoint, but it can fail if, for example, the compute environment credentials used do not have sufficient permissions to delete objects from storage buckets. +::: + +## Session volume automatic resizing + +By default, a session allocates an initial 2 GB of storage. Available disk space is continually monitored and if the available space drops below a 1 GB threshold, the file system is dynamically resized to include an additional 2 GB of available disk space. + +This approach ensures that a session doesn't initially include unnecessary free disk space, while providing the flexibility to accommodate installation of large software packages required for data analysis. + +The maximum storage allocation for a session is limited by the compute environment disk boot size. By default, this is 30 GB. This limit is shared by all sessions running in the same compute environment. If the maximum allocation size is reached, it is possible to reclaim storage space using a snapshot. + +Stop the active session to trigger a snapshot from the active volume. The snapshot is uploaded to cloud storage with Fusion. When you start from the newly saved snapshot, all previous data is loaded, and the newly started session will have 2 GB of available space. + +{/* links */} +[contact]: https://support.seqera.io/ +[aws-batch]: ../compute-envs/aws-batch +[custom-envs]: ./custom-envs +[build-status]: ./custom-envs#build-status +[cloud-bucket-subdirectory]: ./managing#cloud-bucket-subdirectory +[ds-jupyter]: https://public.cr.seqera.io/repo/platform/data-studio-jupyter +[ds-ride]: https://public.cr.seqera.io/repo/platform/data-studio-ride +[def-vsc]: https://code.visualstudio.com/ +[Nextflow]: https://nextflow.io/ +[nf-lang-server]: https://marketplace.visualstudio.com/items?itemName=nextflow.nextflow +[ds-vscode]: https://public.cr.seqera.io/repo/platform/data-studio-vscode +[def-xpra]: https://github.com/Xpra-org/xpra +[ds-xpra]: https://public.cr.seqera.io/repo/platform/data-studio-xpra +[Wave]: https://seqera.io/wave/ +[build-status]: ./custom-envs#build-status diff --git a/platform-enterprise/supported_software/agent/overview.md b/platform-enterprise/supported_software/agent/overview.md new file mode 100644 index 000000000..deed55306 --- /dev/null +++ b/platform-enterprise/supported_software/agent/overview.md @@ -0,0 +1,69 @@ +--- +title: "Tower Agent" +description: "Instructions to use Tower Agent." +date: "24 Apr 2023" +tags: [agent] +--- + +Tower Agent enables Seqera Platform to launch pipelines on HPC clusters that don't allow direct access through an SSH client. + +Tower Agent is a standalone process that runs on a node that can submit jobs to the cluster (e.g., a login node). It establishes an authenticated secure reverse connection with Seqera, allowing Seqera to submit and monitor new jobs. The jobs are submitted on behalf of the user running the agent. + +### Installation + +Tower Agent is distributed as a single executable file to simply download and execute. + +1. Download the latest release from [GitHub](https://github.com/seqeralabs/tower-agent) and make the file executable: + + ```bash + curl -fSL https://github.com/seqeralabs/tower-agent/releases/latest/download/tw-agent-linux-x86_64 > tw-agent + chmod +x ./tw-agent + ``` + +2. (Optional) Move it to a folder that's in your $PATH. + +### Quickstart + +Before running the Agent: + +1. Create a [**personal access token**](../../api/overview#authentication). + +2. Create [Tower Agent credentials](../../credentials/agent_credentials) in a Seqera Platform workspace. + +:::note +To share a single Tower Agent instance with all members of a workspace, create a Tower Agent credential with **Shared agent** enabled. +::: + +When you create the credentials, you receive an _Agent Connection ID_. You can use the default ID or enter a custom ID. The connection ID in the workspace credentials must match the ID entered when you run the agent. + +The agent should always be running in order to accept incoming requests from Seqera. We recommend that you use a terminal multiplexer like [tmux](https://github.com/tmux/tmux) or [GNU Screen](https://www.gnu.org/software/screen/), so that it keeps running even if you close your SSH session. + +```bash +export TOWER_ACCESS_TOKEN= +./tw-agent +``` + +### Tips + +- If you're using the agent with Seqera Platform Enterprise (on-prem), you can set the API URL using the `TOWER_API_ENDPOINT` environment variable or the `--url` option. +- By default, the Agent uses the folder `${HOME}/work` as the Nextflow work directory. You can change this directory using the `--work-dir` option. +- The work directory _must_ exist before running the agent. +- You can also change the work directory in Seqera when you create a compute environment or launch a pipeline. + +### Usage + +```bash +Usage: tw-agent [OPTIONS] AGENT_CONNECTION_ID + +Nextflow Tower Agent + +Parameters: +* AGENT_CONNECTION_ID Agent connection ID to identify this agent. + +Options: +* -t, --access-token= Tower personal access token. If not provided, the TOWER_ACCESS_TOKEN variable will be used. + -u, --url= Tower server API endpoint URL. If not provided TOWER_API_ENDPOINT variable will be used [default: https://api.cloud.seqera.io]. + -w, --work-dir= Default path where the pipeline scratch data is stored. It can be changed when launching a pipeline from Tower [default: ~/work]. + -h, --help Show this help message and exit. + -V, --version Print version information and exit. +``` diff --git a/platform-enterprise/supported_software/dragen/overview.md b/platform-enterprise/supported_software/dragen/overview.md new file mode 100644 index 000000000..9344c7ae8 --- /dev/null +++ b/platform-enterprise/supported_software/dragen/overview.md @@ -0,0 +1,70 @@ +--- +title: "Illumina DRAGEN" +description: "Instructions to integrate Illumina DRAGEN with Seqera Platform." +date: "24 Apr 2023" +tags: [dragen, integration] +--- + +DRAGEN is a platform provided by Illumina that offers accurate, comprehensive, and efficient secondary analysis of next-generation sequencing (NGS) data with a significant speed increase over tools that are commonly used for such tasks. + +The improved performance offered by DRAGEN is possible due to the use of Illumina proprietary algorithms in conjunction with a special type of hardware accelerator called field programmable gate arrays (FPGAs). For example, when using AWS, FPGAs are available via the [F1 instance type](https://aws.amazon.com/ec2/instance-types/f1/). + +## Run DRAGEN on Seqera Platform + +We have extended the [Batch Forge](../../compute-envs/aws-batch?h=forge#batch-forge) feature for AWS Batch to support DRAGEN. Batch Forge ensures that all of the appropriate components and settings are automatically provisioned when creating an AWS Batch [compute environment](../../compute-envs/aws-batch#batch-forge). + +When deploying data analysis workflows, some tasks will need to use normal instance types (e.g., for non-DRAGEN processing of samples) and others will need to be executed on F1 instances. If the DRAGEN feature is enabled, Batch Forge will create an additional AWS Batch compute queue which only uses F1 instances, to which DRAGEN tasks will be dispatched. + +## Getting started + +To showcase the capability of this integration, we have implemented a proof of concept pipeline called [nf-dragen](https://github.com/seqeralabs/nf-dragen). To run it, sign into Seqera Platform, navigate to the [Community Showcase](https://tower.nf/orgs/community/workspaces/showcase/launchpad) and select the "nf-dragen" pipeline. + +You can run this pipeline at your convenience without any extra setup. Note however that it will be deployed in the compute environment owned by the Community Showcase. + +To deploy the pipeline on your own AWS cloud infrastructure, follow the instructions in the next section. + +## Deploy DRAGEN in your own workspace + +DRAGEN is a commercial technology provided by Illumina, so you will need to purchase a license from them. To run on Seqera, you will need to obtain the following information from Illumina: + +1. DRAGEN AWS private AMI ID +2. DRAGEN license username +3. DRAGEN license password + +Batch Forge automates most of the tasks required to set up an AWS Batch compute environment. See [AWS Batch](../../compute-envs/aws-batch) for more details. + +In order to enable support for DRAGEN acceleration, simply toggle the **Enable DRAGEN** option when setting up the compute environment via Batch Forge. + +In the **DRAGEN AMI ID** field, enter the AWS AMI ID provided by Illumina. Then select the instance type from the drop-down menu. + +:::note +The Region you select must contain DRAGEN F1 instances. +::: + +## Pipeline implementation and deployment + +See the [dragen.nf](https://github.com/seqeralabs/nf-dragen/blob/master/modules/local/dragen.nf) module implemented in the [nf-dragen](https://github.com/seqeralabs/nf-dragen) pipeline for reference. Any Nextflow processes that run DRAGEN must: + +1. Define the `dragen` label in your Nextflow process: + + The `label` directive allows you to annotate a process with mnemonic identifiers of your choice. Seqera will use the `dragen` label to determine which processes need to be executed on DRAGEN F1 instances. + + ``` + process DRAGEN { + label 'dragen' + + + } + ``` + + See the [Nextflow label docs](https://www.nextflow.io/docs/latest/process.html?highlight=label#label) for more information. + +2. Define secrets in Seqera: + + At Seqera, we use secrets to safely encrypt sensitive information when running licensed software via Nextflow. This enables our team to use the DRAGEN software safely via the `nf-dragen` pipeline without the need to configure the license key. These secrets will be provided securely to the `--lic-server` option when running DRAGEN on the CLI to validate the license. + + In the nf-dragen pipeline, we have defined two secrets called `DRAGEN_USERNAME` and `DRAGEN_PASSWORD`, which you can add to Seqera from the [Secrets](../../secrets/overview) tab. + +## Limitations + +DRAGEN integration with Seqera Platform is currently only available for use on AWS, however, we plan to extend the functionality to other supported platforms like Azure in the future. diff --git a/platform-enterprise/supported_software/fusion/overview.md b/platform-enterprise/supported_software/fusion/overview.md new file mode 100644 index 000000000..db096cc96 --- /dev/null +++ b/platform-enterprise/supported_software/fusion/overview.md @@ -0,0 +1,33 @@ +--- +title: "Fusion v2 file system" +description: "Fusion file system" +--- + +Fusion v2 is a lightweight container-based client that enables containerized tasks to access data in Amazon S3, Google Cloud, or Azure Blob Storage buckets using POSIX file access semantics. Depending on your data handling requirements, Fusion can improve pipeline throughput and reduce cloud computing costs. + +See [here](https://docs.seqera.io/fusion) for more information on Fusion's features. + +### Fusion mechanics + +The Fusion file system implements a lazy download and upload algorithm that runs in the background to transfer files in parallel to and from object storage into a container-local temporary folder. This means that the performance of the disk volume used to carry out your computation is key to achieving maximum performance. + +By default, Fusion uses the container `/tmp` directory as a temporary cache, so the size of the volume can be much lower than the actual needs of your pipeline processes. Fusion has a built-in garbage collector that constantly monitors remaining disk space and deletes old cached entries when necessary. + +### Fusion performance and cost considerations + +Fusion v2 improves pipeline throughput for containerized tasks by simplifying direct access to cloud data storage. Compute instance performance, local storage, and networking influence pipeline execution — the following guidelines are important when creating a compute environment that uses Fusion: + +- Fusion requires compute instances with attached local storage: + - We recommend at least 200 GB storage with a random read speed of 1000 MBps or more. Machines with local disks that do not meet this requirement may encounter issues where local storage cannot keep up with streaming data. +- Based on internal benchmarking, we recommend instances with 16 vCPUs and 128 GB memory or more for large, long-lived production pipelines. Seqera benchmarking runs of [nf-core/rnaseq](https://github.com/nf-core/rnaseq) used profile `test_full`, consisting of an input dataset with 16 FASTQ files and a total size of approximately 123.5 GB. +- Dedicated networking and fast I/O influence pipeline performance and are important to consider when selecting compute instances. + +### Configure Seqera Platform compute environments with Fusion + +See the compute environment page for your cloud provider for Fusion configuration instructions: + +- [AWS Batch](../../compute-envs/aws-batch.md) +- [Amazon EKS](../../compute-envs/eks.md) +- [Azure Batch](../../compute-envs/azure-batch.md) +- [Google Cloud Batch](../../compute-envs/google-cloud-batch.md) +- [Google Kubernetes Engine](../../compute-envs/gke.md) diff --git a/platform-enterprise/troubleshooting_and_faqs/_images/studios_notebook_fusion.png b/platform-enterprise/troubleshooting_and_faqs/_images/studios_notebook_fusion.png new file mode 100644 index 000000000..758698fff Binary files /dev/null and b/platform-enterprise/troubleshooting_and_faqs/_images/studios_notebook_fusion.png differ diff --git a/platform-enterprise/troubleshooting_and_faqs/api_and_cli.md b/platform-enterprise/troubleshooting_and_faqs/api_and_cli.md new file mode 100644 index 000000000..9314d69aa --- /dev/null +++ b/platform-enterprise/troubleshooting_and_faqs/api_and_cli.md @@ -0,0 +1,70 @@ +--- +title: "Developer tools" +description: "API and CLI troubleshooting with Seqera Platform." +date: "26 August 2024" +tags: [faq, help, aws help, aws troubleshooting] +--- + +## API + +### Maximum results returned + +Use pagination to fetch the results in smaller chunks through multiple API calls with `max` and `offset` parameters. The error below indicates that you have run into the maximum result limit: + +`{object} length parameter cannot be greater than 100 (current value={value_sent})` + +To remedy this, see the example requests below: + +``` +curl -X GET "https://$TOWER_SERVER_URL/workflow/$WORKFLOW_ID/tasks? workspaceId=$WORKSPACE_ID&max=100" \ + -H "Accept: application/json" \ + -H "Authorization: Bearer $TOWER_ACCESS_TOKEN" +curl -X GET "https://$TOWER_SERVER_URL/workflow/$WORKFLOW_ID/tasks? workspaceId=$WORKSPACE_ID&max=100&offset=100" \ + -H "Accept: application/json" \ + -H "Authorization: Bearer $TOWER_ACCESS_TOKEN" +``` + +## tw CLI + +**Connection errors when creating or viewing AWS Batch compute environments with `tw compute-envs` commands** + +Versions of tw CLI earlier than v0.8 do not support the `SPOT_PRICE_CAPACITY_OPTIMIZED` [allocation strategy](../compute-envs/aws-batch#advanced-options) in AWS Batch. Creating or viewing AWS Batch compute environments with this allocation strategy will lead to errors. This issue was [addressed in CLI v0.9](https://github.com/seqeralabs/tower-cli/issues/332). + +**Segfault errors** + +Users of legacy tw CLI versions may experience segmentation faults in older operating systems. + +To resolve segfault errors, first upgrade your tw CLI to the latest available version. If errors persist, use our alternative Java [JAR-based solution](https://github.com/seqeralabs/tower-cli/releases/download/v0.8.0/tw.jar). + +**Insecure HTTP errors** + +The error _ERROR: You are trying to connect to an insecure server: `http://hostname:port/api` if you want to force the connection use '--insecure'. NOT RECOMMENDED!_ indicates that your Seqera host accepts connections using insecure HTTP instead of HTTPS. If your host cannot be configured to accept HTTPS connections, add the `--insecure` flag **before** your CLI command: + +``` +tw --insecure info +``` + +:::caution +HTTP must not be used in production environments. +::: + +**Resume/relaunch runs with tw CLI** + +Runs can be [relaunched](../launch/cache-resume#relaunch-a-workflow-run) with `tw runs relaunch` command. + +``` +tw runs relaunch -i 3adMwRdD75ah6P -w 161372824019700 + + Workflow 5fUvqUMB89zr2W submitted at [org / private] workspace. + + +tw runs list -w 161372824019700 + + Pipeline runs at [org / private] workspace: + + ID | Status | Project Name | Run Name | Username | Submit Date + ----------------+-----------+----------------+-----------------+-------------+------------------------------- + 5fUvqUMB89zr2W | SUBMITTED | nf/hello | magical_darwin | seqera-user | Tue, 10 Sep 2022 14:40:52 GMT + 3adMwRdD75ah6P | SUCCEEDED | nf/hello | high_hodgkin | seqera-user | Tue, 10 Sep 2022 13:10:50 GMT + +``` diff --git a/platform-enterprise/troubleshooting_and_faqs/aws_troubleshooting.md b/platform-enterprise/troubleshooting_and_faqs/aws_troubleshooting.md new file mode 100644 index 000000000..6b91cd88a --- /dev/null +++ b/platform-enterprise/troubleshooting_and_faqs/aws_troubleshooting.md @@ -0,0 +1,95 @@ +--- +title: "AWS" +description: "AWS troubleshooting with Seqera Platform." +date: "26 August 2024" +tags: [faq, help, aws help, aws troubleshooting] +--- + +### Elastic Block Store (EBS) + +**EBS Autoscaling: EBS volumes remain active after job completion** + +The EBS autoscaling solution relies on an AWS-provided script which runs on each container host. This script performs AWS EC2 API requests to delete EBS volumes when the jobs using those volumes have been completed. + +When running large Batch clusters (hundreds of compute nodes or more), EC2 API rate limits may cause the deletion of unattached EBS volumes to fail. Volumes that remain active after Nextflow jobs have been completed will incur additional costs and should therefore be manually deleted. You can monitor your AWS account for any orphaned EBS volumes via the EC2 console or with a Lambda function. See [Controlling your AWS costs by deleting unused Amazon EBS volumes](https://aws.amazon.com/blogs/mt/controlling-your-aws-costs-by-deleting-unused-amazon-ebs-volumes/) for more information. + +### Elastic Container Service (ECS) + +**ECS Agent Docker image pull frequency** + +As part of the AWS Batch creation process, Batch Forge will set ECS Agent parameters in the EC2 launch template that is created for your cluster's EC2 instances: + +- For clients using Seqera Enterprise v22.01 or later: + - Any AWS Batch environment created by Batch Forge will set the ECS Agent's `ECS_IMAGE_PULL_BEHAVIOUR` to `once`. +- For clients using Seqera Enterprise v21.12 or earlier: + - Any AWS Batch environment created by Batch Forge will set the ECS Agent's `ECS_IMAGE_PULL_BEHAVIOUR` to `default`. + +See the [AWS ECS documentation](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-agent-config.html) for an in-depth explanation of this difference. + +:::note +This behaviour can't be changed within Seqera Platform. +::: + +### Container errors + +**CannotPullContainerError: Error response from daemon: error parsing HTTP 429 response body: invalid character 'T' looking for beginning of value: "Too Many Requests (HAP429)"** + +Docker Hub imposes a rate limit of 100 anonymous pulls per 6 hours. Add the following to your launch template to avoid this issue: + +`echo ECS_IMAGE_PULL_BEHAVIOR=once >> /etc/ecs/ecs.config` + +**CannotInspectContainerError** + +If your run fails with an _Essential container in task exited - CannotInspectContainerError: Could not transition to inspecting; timed out after waiting 30s_ error, try the following: + +1. Upgrade your [ECS Agent](https://github.com/aws/amazon-ecs-agent/releases) to [1.54.1](https://github.com/aws/amazon-ecs-agent/pull/2940) or newer. See [Check for ECS Container Instance Agent Version](https://www.trendmicro.com/cloudoneconformity/knowledge-base/aws/ECS/latest-agent-version.html) for instructions to check your ECS Agent version. +2. Provision more storage for your EC2 instance (preferably via EBS-autoscaling to ensure scalability). +3. If the error is accompanied by _command exit status: 123_ and a _permissions denied_ error tied to a system command, ensure that the ECS Agent binary is set to be executable (`chmod u+x`). + +## Queues + +**Multiple AWS Batch queues for a single job execution** + +Although you can only create/identify a single work queue during the definition of your AWS Batch compute environment in Seqera, you can spread tasks across multiple queues when your job is sent to Batch for execution via your pipeline configuration. Add the following snippet to your `nextflow.config`, or the **Advanced Features > Nextflow config file** field of the Seqera Launch UI, for processes to be distributed across two AWS Batch queues, depending on the assigned name. + +```bash +# nextflow.config + +process { + withName: foo { + queue: `TowerForge-1jJRSZmHyrrCvCVEOhmL3c-work` + } +} + +process { + withName: bar { + queue: `custom-second-queue` + } +} +``` + +## Storage + +**Enable pipelines to write to S3 buckets that enforces AES256 server-side encryption** + +:::note +This solution requires Seqera v21.10.4 and Nextflow [22.04.0](https://github.com/nextflow-io/nextflow/releases/tag/v22.04.0) or later. +::: + +If you need to save files to an S3 bucket with a policy that [enforces AES256 server-side encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingServerSideEncryption.html), the [nf-launcher](https://quay.io/repository/seqeralabs/nf-launcher?tab=tags) script which invokes the Nextflow head job requires additional configuration: + +1. Add the following configuration to the **Advanced options > Nextflow config file** textbox of the **Launch Pipeline** screen: + + ``` + aws { + client { + storageEncryption = 'AES256' + } + } + ``` + +2. Add the following configuration to the **Advanced options > Pre-run script** textbox of the **Launch Pipeline** screen: + + ```bash + export TOWER_AWS_SSE=AES256 + ``` diff --git a/platform-enterprise/troubleshooting_and_faqs/azure_troubleshooting.md b/platform-enterprise/troubleshooting_and_faqs/azure_troubleshooting.md new file mode 100644 index 000000000..377fc786b --- /dev/null +++ b/platform-enterprise/troubleshooting_and_faqs/azure_troubleshooting.md @@ -0,0 +1,51 @@ +--- +title: "Azure" +description: "Azure troubleshooting with Seqera Platform." +date: "26 August 2024" +tags: [faq, help, azure help, azure troubleshooting] +--- + +### Batch compute environments + +**Use separate Batch pools for head and compute nodes** + +The default Azure Batch implementation in Seqera Platform uses a single pool for head and compute nodes. This means that all jobs spawn dedicated/on-demand VMs by default. To save cloud costs by using low priority VMs for compute jobs, specify separate pools for head and compute jobs: + +1. Create two Batch pools in Azure: + - One Dedicated + - One [Low priority](https://learn.microsoft.com/en-us/azure/batch/batch-spot-vms#differences-between-spot-and-low-priority-vms) + +:::note +Both pools must meet the requirements of a pre-existing pool as detailed in the [Nextflow documentation](https://www.nextflow.io/docs/latest/azure.html#requirements-on-pre-existing-named-pools). +::: + +2. Create a manual [Azure Batch](../compute-envs/azure-batch#manual) compute environment in Seqera Platform. +3. In **Compute pool name**, specify your dedicated Batch pool. +4. Specify the Low priority pool using the `process.queue` [directive](https://www.nextflow.io/docs/latest/process.html#queue) in your `nextflow.config` file either via the launch form, or your pipeline repository's `nextflow.config` file. + +### Azure Kubernetes Service (AKS) + +**... /.git/HEAD.lock: Operation not supported** + +This error can occur if your Nextflow pod uses an Azure Files-type (SMB) persistent volume as its storage medium. By default, the `jgit` library used by Nextflow attempts a filesystem link operation which [is not supported](https://docs.microsoft.com/en-us/azure/storage/files/files-smb-protocol?tabs=azure-portal#limitations) by Azure Files (SMB). + +To avoid this problem, add the following code snippet in your pipeline's [**Pre-run script**](../launch/advanced#pre-and-post-run-scripts) field: + +```bash +cat < ~/.gitconfig +[core] + supportsatomicfilecreation = true +EOT +``` + +### SSL + +**Problem with the SSL CA cert** + +This can occur if a tool/library in your task container requires SSL certificates to validate the identity of an external data source. Mount SSL certificates into the container to resolve this issue. See [SSL/TLS](../enterprise/configuration/ssl_tls#configure-seqera-to-trust-your-private-certificate) for more information. + +**Azure SQL database error: _Connections using insecure transport are prohibited while --require_secure_transport=ON_** + +This error is due to Azure's default MySQL behavior which enforces the SSL connections between your server and client application, as detailed in [SSL/TLS connectivity in Azure Database for MySQL](https://learn.microsoft.com/en-us/azure/mysql/single-server/concepts-ssl-connection-security). To fix this, append `useSSL=true&enabledSslProtocolSuites=TLSv1.2&trustServerCertificate=true` to your `TOWER_DB_URL` connection string. For example: + +`TOWER_DB_URL: jdbc:mysql://mysql:3306/tower?permitMysqlScheme=true/azuredatabase.com/tower?serverTimezone=UTC&useSSL=true&enabledSslProtocolSuites=TLSv1.2&trustServerCertificate=true` diff --git a/platform-enterprise/troubleshooting_and_faqs/datasets_troubleshooting.md b/platform-enterprise/troubleshooting_and_faqs/datasets_troubleshooting.md new file mode 100644 index 000000000..afa2ac8ae --- /dev/null +++ b/platform-enterprise/troubleshooting_and_faqs/datasets_troubleshooting.md @@ -0,0 +1,51 @@ +--- +title: "Datasets" +description: "Dataset troubleshooting with Seqera Platform." +date: "26 August 2024" +tags: [faq, help, database, dataset] +--- + +### API: Dataset upload failure + +When uploading datasets via the Seqera UI or CLI, some steps are automatically done on your behalf. To upload datasets via the Seqera API, additional steps are required: + +1. Explicitly define the MIME type of the file being uploaded. +2. Make two calls to the API: + 1. Create a dataset object. + 2. Upload the samplesheet to the dataset object. + +Example: + +**Step 1: Create the dataset object.** + +```bash +curl -X POST "https://api.cloud.seqera.io/workspaces/$WORKSPACE_ID/datasets/" -H "Content-Type: application/json" -H "Authorization: Bearer $TOWER_ACCESS_TOKEN" --data '{"name":"placeholder", "description":"A placeholder for the data we will submit in the next call"}' +``` + +**Step 2: Upload the datasheet into the dataset object.** + +```bash +curl -X POST "https://api.cloud.seqera.io/workspaces/$WORKSPACE_ID/datasets/$DATASET_ID/upload" -H "Accept: application/json" -H "Authorization: Bearer $TOWER_ACCESS_TOKEN" -H "Content-Type: multipart/form-data" -F "file=@samplesheet_full.csv; type=text/csv" +``` + +:::tip +You can also use the [tower-cli](https://github.com/seqeralabs/tower-cli) to upload the dataset to a particular workspace: + + ```bash + tw datasets add --name "cli_uploaded_samplesheet" ./samplesheet_full.csv + ``` +::: + +### Datasets converted to 'application/vnd.ms-excel' data type + +This is a known issue when using Firefox browser with Seqera versions older than 22.2.0. You can either upgrade to 22.2.0 or higher, or use Chrome. + +Seqera displays this error for this issue: + +``` +"Given file is not a dataset file. Detected media type: 'application/vnd.ms-excel'. Allowed types: 'text/csv, text/tab-separated-values'" +``` + +### TSV-formatted datasets not shown + +An issue was identified in Seqera version 22.2 which caused TSV datasets to be unavailable in the input data drop-down menu on the launch screen. This has been fixed in version 22.4.1. \ No newline at end of file diff --git a/platform-enterprise/troubleshooting_and_faqs/nextflow.md b/platform-enterprise/troubleshooting_and_faqs/nextflow.md new file mode 100644 index 000000000..abbf3f4b5 --- /dev/null +++ b/platform-enterprise/troubleshooting_and_faqs/nextflow.md @@ -0,0 +1,228 @@ +--- +title: "Nextflow" +description: "Nextflow troubleshooting with Seqera Platform." +date: "26 August 2024" +tags: [faq, help, nextflow help, nextflow troubleshooting] +--- + +### Nextflow configuration + +**Default Nextflow DSL version in Seqera Platform** + +From [Nextflow 22.03.0-edge](https://github.com/nextflow-io/nextflow/releases/tag/v22.03.0-edge), DSL2 is the default syntax. + +To minimize disruption on existing pipelines, version 22.1.x and later are configured to default Nextflow head jobs to DSL1 for a transition period (end date to be confirmed). You can force your Nextflow head job to use DSL2 syntax via any of the following techniques: + +- Add `export NXF_DEFAULT_DSL=2` in the **Advanced Features > Pre-run script** field of the Platform launch screen. +- Specify `nextflow.enable.dsl = 2` at the top of your Nextflow workflow file. +- Provide the `-dsl2` flag when invoking the Nextflow CLI (e.g., `nextflow run ... -dsl2`). + +**Invoke Nextflow CLI run arguments during Seqera launch** + +From [Nextflow v22.09.1-edge](https://github.com/nextflow-io/nextflow/releases/tag/v22.09.1-edge), you can specify [Nextflow CLI run arguments](https://www.nextflow.io/docs/latest/cli.html?highlight=dump#run) when invoking a pipeline from Seqera. Set the `NXF_CLI_OPTS` environment variable using a [pre-run script](../launch/advanced#pre-and-post-run-scripts): + +``` +export NXF_CLI_OPTS='-dump-hashes' +``` + +**Cloud compute environment execution: `--outdir` artefacts not available** + +Nextflow resolves relative paths against the current working directory. In a classic grid HPC, this normally corresponds to a subdirectory of the `$HOME` directory. In a cloud execution environment, however, the path will be resolved relative to the _container file system_, meaning files will be lost when the container is terminated. See [here](https://github.com/nextflow-io/nextflow/issues/2661#issuecomment-1047259845) for more details. + +Specify the absolute path to your persistent storage using the `NXF_FILE_ROOT` environment variable in your [`nextflow.config`](../launch/advanced#nextflow-config-file) file. This resolves the relative paths defined in your Netflow script so that output files are written to your stateful storage, rather than ephemeral container storage. + +**Nextflow: Ignore Singularity cache** + +To ignore the Singularity cache, add this configuration item to your workflow: `process.container = 'file:///some/singularity/image.sif'`. + +**Nextflow error: _WARN: Cannot read project manifest ... path=nextflow.config_** + +This error can occur when executing a pipeline where the source Git repository's default branch is not populated with `main.nf` and `nextflow.config` files, regardless of whether the invoked pipeline is using a non-default revision/branch (e.g., `dev`). + +Currently, you can resolve this by creating empty `main.nf` and `nextflow.config` files in the default branch. This allows the pipeline to run and use the content of the `main.nf` and `nextflow.config` in your target revision. + +**Use multiple Nextflow configuration files for different environments** + +The main `nextflow.config` file is always imported by default. Instead of managing multiple `nextflow.config` files (each customized for an environment), you can create unique environment config files and import them as [config profiles](https://www.nextflow.io/docs/latest/config.html#config-profiles) in the main `nextflow.config`. + +Example: + +``` + + +profiles { + test { includeConfig 'conf/test.config' } + prod { includeConfig 'conf/prod.config' } + uat { includeConfig 'conf/uat.config' } +} + + +``` + +**AWS S3 upload file size limits** + +You may encounter _`WARN: Failed to publish file: s3://`_ log messages. These are often related to AWS S3 object size limitations when using the multipart upload feature. + +See the [AWS documentation](https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html) for more information, particularly _maximum number of parts per upload_. + +The following configuration is suggested to overcome AWS limitations: + +- Head Job CPUs: 16 +- Head Job Memory: 60000 +- [Pre-run script](../launch/advanced#pre-and-post-run-scripts): `export NXF_OPTS="-Xms20G -Xmx40G"` +- Increase chunk size and slow down the number of transfers using `nextflow.config`: + + ``` + aws { + batch { + maxParallelTransfers = 5 + maxTransferAttempts = 3 + delayBetweenAttempts = 30 + } + client { + uploadChunkSize = '200MB' + maxConnections = 10 + maxErrorRetry = 10 + uploadMaxThreads = 10 + uploadMaxAttempts = 10 + uploadRetrySleep = '10 sec' + } + } + ``` + +**Nextflow unable to parse a params file from Seqera** + +Ephemeral endpoints can only be consumed once. Nextflow versions older than 22.04 may try to call the same endpoint more than once, resulting in an error: + +_Cannot parse params file: /ephemeral/example.json - Cause: Server returned HTTP response code: 403 for URL: https://api.tower.nf/ephemeral/example.json_ + +To resolve this problem, upgrade Nextflow to version 22.04.x or later. + +**Prevent Nextflow from uploading intermediate files from local scratch to AWS S3 work directory** + +Nextflow will only unstage files/folders that have been explicitly defined as process outputs. If your workflow has processes that generate folder-type outputs, ensure that the process also purges any intermediate files in those folders. Otherwise, the intermediate files are copied as part of the task unstaging process, resulting in additional storage costs and lengthened pipeline execution times. + +**Values specified in Git repository `nextflow.config` change during Seqera launch** + +Some values specified in your pipeline repository's `nextflow.config` may change when the pipeline is invoked via Seqera. This occurs because Seqera is configured with a set of default values that override the pipeline configuration. For example, the following code block is specified in your `nextflow.config`: + +``` +aws { + region = 'us-east-1' + client { + uploadChunkSize = 209715200 // 200 MB + } + ... +} +``` + +When the job instantiates on the AWS Batch compute environment, the `uploadChunkSize` is changed: + +``` +aws { + region = 'us-east-1' + client { + uploadChunkSize = 10485760 // 10 MB + } + ... +} +``` + +This change occurs because Seqera superimposes its 10 MB default value rather than the value specified in your `nextflow.config`. + +To force the Seqera-invoked job to use your `nextflow.config` value, add the configuration setting in the workspace Launch screen's [**Nextflow config file** field](../launch/launchpad). For the example above, you would add `aws.client.uploadChunkSize = 209715200 // 200 MB`. + +Nextflow configuration values affected by this behaviour include: + +- `aws.client.uploadChunkSize` +- `aws.client.storageEncryption` + +**Fusion v1 execution: _Missing output file(s) [X] expected by process [Y]_ error** + +Fusion v1 has a limitation which causes tasks that run for less than 60 seconds to fail as the output file generated by the task is not yet detected by Nextflow. This is a limitation inherited from a Goofys driver used by the Fusion v1 implementation. [Fusion v2](../../version-25.1/supported_software/fusion/overview) resolves this issue. + +If you can't update to Fusion v2, this issue can be addressed by instructing Nextflow to wait for 60 seconds after the task completes. + +From **Pipeline settings > Advanced options > Nextflow config file** add this line to your Nextflow configuration: + +``` +process.afterScript = 'sleep 60' +``` + +**Jobs remain in RUNNING status when a pipeline run is canceled** + +Your instance's behavior when canceling a run depends on the Nextflow [`errorStrategy`](https://www.nextflow.io/docs/latest/process.html#errorstrategy) defined in your process script. If the process `errorStrategy` is set to `finish`, an orderly pipeline shutdown is initiated when you cancel (or otherwise interrupt) a run. This instructs Nextflow to wait for the completion of any submitted jobs. To ensure that all jobs are terminated when your run is canceled, set `errorStrategy` to `terminate` in your Nextflow config. For example: + +``` +process terminateError { + errorStrategy 'terminate' + script: + +} +``` + +**Cached tasks run from scratch during pipeline relaunch** + +When relaunching a pipeline, Seqera relies on Nextflow's `resume` functionality for the continuation of a workflow execution. This skips previously completed tasks and uses a cached result in downstream tasks, rather than running the completed tasks again. The unique ID (hash) of the task is calculated using a composition of the task's: + +- Input values +- Input files +- Command line string +- Container ID +- Conda environment +- Environment modules +- Any executed scripts in the bin directory + +A change in any of these values results in a changed task hash. Changing the task hash value means that the task will be run again when the pipeline is relaunched. To aid debugging efforts when a relaunch behaves unexpectedly, run the pipeline twice with `dumpHashes=true` set in your Nextflow config file (from **Advanced options > Nextflow config file** in the Pipeline settings). This will instruct Nextflow to dump the task hashes for both executions in the `nextflow.log` file. Compare the log files to determine the point at which the hashes diverge in your pipeline when it is resumed. + +See [here](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html) for more information on the Nextflow `resume` mechanism. + +**Run failure: _o.h.e.jdbc.spi.SqlExceptionHelper - Incorrect string value_ error** + +``` + [scheduled-executor-thread-2] - WARN o.h.e.jdbc.spi.SqlExceptionHelper - SQL Error: 1366, SQLState: HY000 + + [scheduled-executor-thread-2] - ERROR o.h.e.jdbc.spi.SqlExceptionHelper - (conn=34) Incorrect string value: '\xF0\x9F\x94\x8D |...' for column 'error_report' at row 1 + + [scheduled-executor-thread-2] - ERROR i.s.t.service.job.JobSchedulerImpl - Unable to save status of job id=18165; name=nf-workflow-26uD5XXXXXXXX; opId=nf-workflow-26uD5XXXXXXXX; status=UNKNOWN +``` + +Runs will fail if your Nextflow script or Nextflow config contain illegal characters (such as emojis or other non-UTF8 characters). Validate your script and config files for any illegal characters before atttempting to run again. + +**Run failures: Nextflow script exceeds 64KiB** + +The Groovy shell used by Nextflow to execute your workflow has a hard limit on string size (64KiB). Check the size of your scripts with the `ls -llh` command. If the size is greater than 65,535 bytes, consider these mitigation techniques: + +1. Remove any unnecessary code or comments from the script. +2. Move long script bodies into a separate script file in the pipeline `/bin` directory. +3. Consider using DSL2 so you can move each function, process, and workflow definition into its own script and include these scripts as [modules](https://www.nextflow.io/docs/latest/dsl2.html#modules). + +### Nextflow Launcher + +**Seqera Platform / [nf-launcher image](https://quay.io/repository/seqeralabs/nf-launcher?tab=tags) compatibility** + +Your Seqera installation knows the nf-launcher image version it needs and specifies this value automatically when launching a pipeline. + +If you're restricted from using public container registries, see Seqera Enterprise release instructions for the specific image to set as the default when invoking pipelines. + +**Specify Nextflow version** + +Each Seqera Platform release uses a specific nf-launcher image by default. This image is loaded with a specific Nextflow version that any workflow run in the container uses by default. Force your jobs to use a newer/older version of Nextflow with one of the following strategies: + +- Use a [pre-run script](../launch/advanced#pre-and-post-run-scripts) to set the desired Nextflow version. For example: `export NXF_VER=22.08.0-edge` +- For jobs executing in an AWS Batch compute environment, create a [custom job definition](../enterprise/advanced-topics/custom-launch-container) which references a different nf-launcher image. + +### Spot instance failures and retries in Nextflow + +Up to version 24.10, Nextflow silently retried Spot instance failures up to five times when using AWS Batch or Google Batch. These retries were controlled by cloud-specific configuration parameters (e.g., `aws.batch.maxSpotAttempts`) and happened in cloud infrastructure without explicit visibility to Nextflow. + +From version 24.10, the default Spot reclamation retry setting changed to `0` on AWS and Google. By default, no _internal_ retries are attempted on these platforms. Spot reclamations now lead to an immediate failure, exposed to Nextflow in the same way as other generic failures (returning, for example, `exit code 1` on AWS). Nextflow will treat these failures like any other job failure unless you actively configure a retry strategy. + +**Impact on existing workflows** + +If you rely on silent Spot retries (the previous default behavior), you may now see more tasks fail with the following characteristics: + +- **AWS**: Generic failure with `exit code 1`. You may see messages indicating the host machine was terminated. +- **Google**: Spot reclamation typically produces a specific code, but is now surfaced as a recognizable task failure in Nextflow logs. + +Since the default for Spot retries is now zero, you must actively enable a retry strategy if you want Nextflow to handle reclaimed Spot instances automatically. For more information, see [manage Spot interruptions](../tutorials/retry-strategy). diff --git a/platform-enterprise/troubleshooting_and_faqs/studios_troubleshooting.md b/platform-enterprise/troubleshooting_and_faqs/studios_troubleshooting.md new file mode 100644 index 000000000..6f088f7c5 --- /dev/null +++ b/platform-enterprise/troubleshooting_and_faqs/studios_troubleshooting.md @@ -0,0 +1,123 @@ +--- +title: "Studios" +description: "Studios troubleshooting with Seqera Platform." +date: "26 August 2024" +tags: [faq, help, studios, troubleshooting] +--- + +## View all mounted datasets + +In your interactive analysis environment, open a new terminal and type `ls -la /workspace/data`. This displays all the mounted datasets available in the current session. + +![](./_images/studios_notebook_fusion.png) + +## Enabling AI coding assistants in Studios + +VS Code, RStudio, and Jupyter environments natively integrate with [GitHub Copilot][gh-copilot]. Enabling it requires a GitHub Account and an active Copilot subscription. + +- **VS Code:** To enable GitHub Copilot in your VS Code session, install the extension and then sign in with your GitHub account. [Learn more][vscode-blog]. +- **RStudio:** To enable GitHub Copilot in your RStudio session requires RStudio configuration changes. By default, the Studio session user has root permissions, so configuration changes are possible. You will need to restart the RStudio once the required changes have been made. [Learn more][posit-ghcopilot-guide]. +- **Jupyter:** [Notebook Intelligence (NBI)][nbi] is an AI coding assistant and extensible AI framework for Jupyter. It can use GitHub Copilot or AI models from any other LLM Provider. [Learn more][nbi-blog]. + +## Session size limited by compute environment advanced options: Head job CPUs and Head job memory + +When adding a compute environment, setting the Advanced options **Head job CPUs** and **Head job memory** for Nextflow **also applies** to any Studio session created in the compute environment. This is because Studio sessions are managed by the Nextflow runner job. To avoid artifically constraining the resources of your Studio sessions, **do not define these optional compute environment settings**. + +## Rebuild of a failed custom Studios environment: rebuilding from cache + +Occasionally, building a custom Studios image using the Wave service will fail. This is typically due to conflicting libraries. When attempting to rebuild the image, if it reuses the same name and tag, Studios and Wave will use the cached version (if available). Changing the version number and/or tag will ensure that the custom image is freshly pulled again. + +This is determined by the configuration of the Elastic Container Service (ECS) agent defined by the `ECS_IMAGE_PULL_BEHAVIOR` environment variable. In the case of the Seqera Platform Cloud, when creating the compute environment this is set to the value **once**. Enterprise installations of Seqera Platform may be configured differently. Contact your organization's administrator to learn more. + +## Session is stuck in **starting** + +If your Studio session doesn't advance from **starting** status to **running** status within 30 minutes, and you have access to the AWS Console for your organization, check that the AWS Batch compute environment associated with the session is in the **ENABLED** state with a **VALID** status. You can also check the **Compute resources** settings. Contact your organization's AWS administrator if you don't have access to the AWS Console. + +If sufficient compute environment resources are unavailable, **Stop** the session and any others that may be running before trying again. If you have access to the AWS Console for your organization, you can terminate a specific session from the AWS Batch Jobs page (filtering by compute environment queue). + +## Session status is **errored** + +The **errored** status is generally related to issues encountered when creating the Studio session resources in the compute environment (e.g., invalid credentials, insufficient permissions, network issues). It can also be related to insufficient compute resources, which are set in your compute environment configuration. Contact your organization's AWS administrator if you don't have access to the AWS Console. Also contact your Seqera account executive so we can investigate the issue. + +## Session can't be **stopped** + +If you're not able to stop a session, it's usually because the Batch job running the session failed for some reason. In this case, and if you have access to the AWS Console for your organization, you can stop the session from the compute environment screen. Contact your organization's AWS administrator if you don't have access to the AWS Console. Also contact your Seqera account executive so we can investigate the issue. + +## Session performance is poor + +A slow or unresponsive session may be due to its AWS Batch compute environment being utilized for other jobs, such as running Nextflow pipelines. The compute environment is responsible for scheduling jobs to the available compute resources. Sessions compete for resources with the Nextflow pipeline head job and Seqera does not currently have an established pattern of precedence. + +If you have access to the AWS Console for your organization, check the jobs associated with the AWS Batch compute environment and compare the resources allocated with its **Compute resources** settings. + +## Memory allocation of the session is exceeded + +The running container in the AWS Batch compute environment inherits the memory limits specified by the session configuration when adding or starting the session. The kernel then handles the memory as if running natively on Linux. Linux can overcommit memory, leading to possible out-of-memory errors in a container environment. The kernel has protections in place to prevent this, but it can happen, and in this case, the process is killed. This can manifest as a performance lag, killed subprocesses, or at worst, a killed session. + +Running sessions have automated snapshots created every five minutes, so if the running container is killed only those changes made after the prior snapshot creation will be lost. + +## All datasets are read-only + +By default, AWS Batch compute environments that are created with Batch Forge restrict access to S3 to the working directory only, unless additional **Allowed S3 Buckets** are specified. If the compute environment does not have write access to the mounted dataset, it will be mounted as read-only. + +## My session with GPU isn't starting + +Check whether the instance type you selected [supports GPU](https://aws.amazon.com/ec2/instance-types/). If you specify multiple GPUs make sure that multi-GPU instances can be launched by your compute environment and are not limited by the maximum CPU config that you've set. + +## R-IDE session initializes with error + +Connecting to a running R-IDE session with R version 4.4.1 (2024-06-14) -- "Race for Your Life" returns a `[rsession-root]` error similar to the following: + +``` +ERROR system error 2 (No such file or directory) [path:/sys/fs/cgroup/memory/memory.limit_in_bytes]; OCCURRED AT rstudio::core::Error rstudio::core::FilePath::openForRead(std::shared_ptr >&) +... +``` + +This is displayed because logging is set to `stderr` by default to ensure all logs are shown during the session, and can safely be ignored. + +## Container template image security scan false positives + +### VS Code + +When running an SCA security scan (e.g., with Trivy) on the latest Seqera-provided VS Code image [container template](../studios#container-image-templates), you may encounter multiple false-positive findings. This issue is due to how VS Code defines extensions, which can cause certain security scanners to incorrectly identify them as `npm` packages. + +This is a known limitation and has been discussed in the Trivy community [here(https://github.com/aquasecurity/trivy/discussions/6112)]. + +These are the false positive confirmed findings: + +| Component | Vulnerability id⁠ | +| :--------------- | :------------------- | +| handlebars:1.0.0 | CVE-2021-23383⁠ | +| handlebars:1.0.0 | CVE-2021-23369⁠ | +| handlebars:1.0.0 | CVE-2019-19919⁠ | +| handlebars:1.0.0 | GHSA-q42p-pg8m-cqh6 | +| handlebars:1.0.0 | GHSA-q2c6-c6pm-g3gh⁠ | +| handlebars:1.0.0 | GHSA-g9r4-xpmj-mj65⁠ | +| handlebars:1.0.0 | GHSA-2cf5-4w76-r9qv⁠ | +| handlebars:1.0.0 | CVE-2019-20920⁠ | +| handlebars:1.0.0 | CVE-2015-8861⁠ | +| handlebars:1.0.0 | GMS-2015-33⁠ | +| npm:1.0.1 | CVE-2019-16777⁠ | +| npm:1.0.1 | CVE-2019-16776⁠ | +| npm:1.0.1 | CVE-2019-16775⁠ | +| npm:1.0.1 | CVE-2018-7408⁠ | +| npm:1.0.1 | CVE-2016-3956⁠ | +| npm:1.0.1 | CVE-2020-15095⁠ | +| npm:1.0.1 | CVE-2013-4116⁠ | +| npm:1.0.1 | GMS-2016-23⁠ | +| grunt:1.0.0 | CVE-2022-1537⁠ | +| grunt:1.0.0 | CVE-2020-7729⁠ | +| grunt:1.0.0 | CVE-2022-0436⁠ | +| pug:1.0.0 | CVE-2021-21353⁠ | +| pug:1.0.0 | CVE-2024-36361⁠ | +| json:1.0.0 | CVE-2020-7712⁠ | +| ini:1.0.0 | CVE-2020-7788⁠ | +| diff:1.0.0 | GHSA-h6ch-v84p-w6p9⁠ | + +{/* links */} + +[gh-copilot]: https://github.com/features/copilot +[vscode-blog]: https://code.visualstudio.com/docs/copilot/setup-simplified +[posit-ghcopilot-guide]: https://docs.posit.co/ide/user/ide/guide/tools/copilot.html +[nbi]: https://github.com/notebook-intelligence/notebook-intelligence +[nbi-blog]: https://blog.jupyter.org/introducing-notebook-intelligence-3648c306b91a +[contact]: https://seqera.io/contact-us/ diff --git a/platform-enterprise/troubleshooting_and_faqs/troubleshooting.md b/platform-enterprise/troubleshooting_and_faqs/troubleshooting.md new file mode 100644 index 000000000..6a0357702 --- /dev/null +++ b/platform-enterprise/troubleshooting_and_faqs/troubleshooting.md @@ -0,0 +1,439 @@ +--- +title: "General troubleshooting" +description: "Troubleshooting Seqera Platform" +date: "24 Apr 2023" +tags: [troubleshooting, help] +--- + +## Common errors + +**_timeout is not an integer or out of range_** or **_ERR timeout is not an integer or out of range_** + +This error can occur if you're using Seqera Platfrom v24.2 upwards and have an outdated version of Redis. From v24.2 Redis version 6.2 or greater is required. Follow your cloud provider specifications to upgrade your instance. + +**_Unknown pipeline repository or missing credentials_ error from public GitHub repositories** + +GitHub imposes [rate limits](https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting) on repository pulls (including public repositories), where unauthenticated requests are capped at 60 requests/hour and authenticated requests are capped at 5000 requests/hour. Seqera Platform users tend to encounter this error due to the 60 requests/hour cap. + +Try the following: + +1. Ensure there's at least one GitHub credential in your workspace's **Credentials** tab. +2. Ensure that the **Access token** field of all GitHub credential objects is populated with a [Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) value and **not** a user password. GitHub PATs are typically longer than passwords and include a `ghp_` prefix. For example: `ghp*IqIMNOZH6zOwIEB4T9A2g4EHMy8Ji42q4HA` +3. Confirm that your PAT is providing the elevated threshold and transactions are being charged against it: + + `curl -H "Authorization: token ghp_LONG_ALPHANUMERIC_PAT" -H "Accept: application/vnd.github.v3+json" https://api.github.com/rate_limit` + +**_Row was updated or deleted by another transaction (or unsaved-value mapping was incorrect)_ error** + +This error can occur if incorrect configuration values are assigned to the `backend` and `cron` containers' [`MICRONAUT_ENVIRONMENTS`](../enterprise/configuration/overview#compute-environments) environment variable. You may see other unexpected system behavior, like two exact copies of the same Nextflow job submitted to the executor for scheduling. + +Verify the following: + +1. The `MICRONAUT_ENVIRONMENTS` environment variable associated with the `backend` container: + - Contains `prod,redis,ha` + - Does not contain `cron` +2. The `MICRONAUT_ENVIRONMENTS` environment variable associated with the `cron` container: + - Contains `prod,redis,cron` + - Does not contain `ha` +3. You don't have another copy of the `MICRONAUT_ENVIRONMENTS` environment variable defined elsewhere in your application (such as a `tower.env` file or Kubernetes `ConfigMap`). +4. If you're using a separate container/pod to execute `migrate-db.sh`, ensure there's no `MICRONAUT_ENVIRONMENTS` environment variable assigned to it. + +**_No such variable_ error** + +This error can occur if you execute a DSL1-based Nextflow workflow using [Nextflow 22.03.0-edge](https://github.com/nextflow-io/nextflow/releases/tag/v22.03.0-edge) or later. + +**Sleep commands in Nextflow workflows** + +The `sleep` commands in your Nextflow workflows may differ in behavior depending on where they are: + +- If used within an `errorStrategy` block, the Groovy sleep function will be used (which takes its value in milliseconds). +- If used within a process script block, that language's sleep binary/method will be used. For example, [this bash script](https://www.nextflow.io/docs/latest/metrics.html?highlight=sleep) uses the bash sleep binary, which takes its value in seconds. + + +**Large number of batch job definitions** + +Platform normally looks for an existing job definition that matches your workflow requirement. If nothing matches, it recreates the job definition. You can use a simple bash script to clear job definitions. You can tailor this according to your needs, e.g., deregister only job definitions older than x days. + +```bash +jobs=$(aws --region eu-west-1 batch describe-job-definitions | jq -r .jobDefinitions[].jobDefinitionArn) + +for x in $jobs; do + echo "Deregister $x"; + sleep 0.01; + aws --region eu-west-1 batch deregister-job-definition --job-definition $x; +done +``` + +## Containers + +**Use rootless containers in Nextflow pipelines** + +Most containers use the root user by default. However, some users prefer to define a non-root user in the container to minimize the risk of privilege escalation. Because Nextflow and its tasks use a shared work directory to manage input and output data, using rootless containers can lead to file permissions errors in some environments: + +``` +touch: cannot touch '/fsx/work/ab/27d78d2b9b17ee895b88fcee794226/.command.begin': Permission denied +``` + +This should not occur when using AWS Batch from Seqera version 22.1.0. In other situations, you can avoid this issue by forcing all task containers to run as root. Add one of the following snippets to your [Nextflow configuration](../launch/advanced#nextflow-config-file): + +``` +// cloud executors +process.containerOptions = "--user 0:0" + +// Kubernetes +k8s.securityContext = [ + "runAsUser": 0, + "runAsGroup": 0 +] +``` + +## Databases + +**Seqera Enterprise 22.2.0: Database connection failure** + +Seqera Enterprise 22.2.0 introduced a breaking change whereby the `TOWER_DB_DRIVER` is now required to be `org.mariadb.jdbc.Driver`. + +If you use Amazon Aurora as your database solution, you may encounter a _java.sql.SQLNonTransientConnectionException: ... could not load system variables_ error, likely due to a [known error](https://jira.mariadb.org/browse/CONJ-824) tracked within the MariaDB project. + +Please modify the Seqera Enterprise configuration as follows to try resolving the problem: + +1. Ensure your `TOWER_DB_DRIVER` uses the specified MariaDB URI. +2. Modify your `TOWER_DB_URL` to: `TOWER_DB_URL=jdbc:mysql://YOUR_DOMAIN:YOUR_PORT/YOUR_TOWER_DB?usePipelineAuth=false&useBatchMultiSend=false` + +## Email and TLS + +**TLS errors** + +Nextflow and Seqera Platform both have the ability to interact with email providers on your behalf. These providers often require TLS connections, with many now requiring at least TLSv1.2. + +TLS connection errors can occur due to variability in the [default TLS version specified by your JDK distribution](https://aws.amazon.com/blogs/opensource/tls-1-0-1-1-changes-in-openjdk-and-amazon-corretto/). If you encounter any of the following errors, there is likely a mismatch between your default TLS version and what is supported by the email provider: + +- _Unexpected error sending mail ... TLS 1.0 and 1.1 are not supported. Please upgrade/update your client to support TLS 1.2_ +- _ERROR nextflow.script.WorkflowMetadata - Failed to invoke 'workflow.onComplete' event handler ... javax.net.ssl.SSLHandshakeException: No appropriate protocol (protocol is disabled or cipher suites are inappropriate)_ + +To fix the problem, try the following: + +1. Set a JDK environment variable to force Nextflow and Seqera containers to use TLSv1.2 by default: + + ``` + export JAVA_OPTIONS="-Dmail.smtp.ssl.protocols=TLSv1.2" + ``` + +2. Add this parameter to your [nextflow.config file](../launch/advanced#nextflow-config-file): + + ``` + mail { + smtp.ssl.protocols = 'TLSv1.2' + } + ``` + +3. Ensure these values are also set for Nextflow and/or Seqera: + + - `mail.smtp.starttls.enable=true` + - `mail.smtp.starttls.required=true` + +## Git integration + +**BitBucket authentication failure: _Can't retrieve revisions for pipeline - https://my.bitbucketserver.com/path/to/pipeline/repo - Cause: Get branches operation not supported by BitbucketServerRepositoryProvider provider_** + +If you supplied the correct BitBucket credentials and URL details in your `tower.yml` and still experience this error, update your version to at least v22.3.0. This version addresses SCM provider authentication issues and is likely to resolve the retrieval failure described here. + +## Healthcheck + +**Seqera Platform API healthcheck endpoint** + +To implement automated healthcheck functionality, use Seqera's `service-info` endpoint. For example: + +``` +curl -o /dev/null -s -w "%{http_code}\n" --connect-timeout 2 "https://api.cloud.seqera.io/service-info" -H "Accept: application/json" +200 +``` + +## Login + +**Login failures: screen frozen at `/auth?success=true`** + +From version 22.1, Seqera Enterprise implements stricter cookie security by default and will only send an auth cookie if the client is connected via HTTPS. Login attempts via HTTP fail by default. + +Set the environment variable `TOWER_ENABLE_UNSAFE_MODE=true` to allow HTTP connectivity to Seqera (**not recommended for production environments**). + +**Restrict Seqera access to a set of email addresses, or none** + +Removing the email section from the login page is not currently supported. You can, however, restrict which email identities may log into your Seqera Enterprise instance using the `trustedEmails` configuration parameter in your `tower.yml` file: + +```yaml +# tower.yml +tower: + trustedEmails: + # Any email address pattern which matches will have automatic access. + - '*@seqera.io` + - 'named_user@example.com' + + # Alternatively, specify a single entry to deny access to all other emails. + - 'fake_email_address_which_cannot_be_accessed@your_domain.org' +``` + +Users with email addresses other than the `trustedEmails` list will undergo an approval process on the **Profile > Admin > Users** page. This has been used effectively as a backup method when SSO becomes unavailable. + +:::note + +1. You must rebuild your containers (`docker compose down`) to force Seqera to implement this change. Ensure your database is persistent before issuing the teardown command. See [here](../enterprise/docker-compose) for more information. +2. All login attempts are visible to the root user at **Profile > Admin panel > Users**. +3. Any user logged in prior to the restriction will not be subject to the new restriction. An admin of the organization should remove users that have previously logged in via (untrusted) email from the Admin panel users list. This will restart the approval process before they can log in via email. + +::: + +**Login failure: Admin approval is required when using Entra ID OIDC** + +The Entra ID app integrated with Seqera must have user consent settings configured to "Allow user consent for apps" to ensure that admin approval is not required for each application login. See [User consent settings](https://learn.microsoft.com/en-us/azure/active-directory/manage-apps/configure-user-consent?pivots=portal#configure-user-consent-settings). + +**Google SMTP: _Username and Password not accepted_ errors** + +Previously functioning Seqera Enterprise email integration with Google SMTP likely to encounter errors as of May 30, 2022 due to a [security posture change](https://support.google.com/accounts/answer/6010255#more-secure-apps-how&zippy=%2Cuse-more-secure-apps) implemented by Google. + +To re-establish email connectivity, see [these instructions](https://support.google.com/accounts/answer/3466521) to provision an app password. Update your `TOWER_SMTP_PASSWORD` environment variable with the app password, then restart the application. + +## Logging + +**v22.3.1: Broken Nextflow log file** + +A Seqera Launcher issue has been identified that affects the Nextflow log file download in version 22.3.1. A patch was released in version 22.3.2 that addresses this behavior. Update to version 22.3.2 or later. + +## Miscellaneous + +**Maximum parallel Seqera browser tabs** + +Due to a limitation of [server-side event technology implementation in HTTP/1.1](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events), up to five tabs can be open simultaneously (per browser product). Any more will remain stuck in a loading state. + +## Monitoring + +**Integration with 3rd-party Java-based Application Performance Monitoring (APM) solutions** + +Mount the APM solution's JAR file in Seqera's `backend` container and set the agent JVM option via the `JAVA_OPTS` env variable. + +**Retrieve the trace logs for a Seqera-based workflow run** + +Although it's not possible to directly download the trace logs via Seqera, you can configure your workflow to export the file to persistent storage: + +1. Set this block in your [`nextflow.config`](../launch/advanced#nextflow-config-file): + + ```nextflow + trace { + enabled = true + } + ``` + +2. Add a copy command to your pipeline's **Advanced options > Post-run script** field: + + ``` + aws s3 cp ./trace.txt s3://MY_BUCKET/trace/trace.txt + ``` + +**Runs monitoring: Seqera Platform intermittently reports _Live events sync offline_** + +Seqera Platform uses [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events) to push real-time updates to your browser. The client must establish a connection to the server's `/api/live` endpoint to initiate the stream of data, and this connection can occasionally fail due to factors like network latency. + +To resolve the issue, try reloading the Platform browser tab to reinitiate the client's connection to the server. If reloading fails to resolve the problem, contact [Seqera support](https://support.seqera.io) for assistance with webserver timeout settings adjustments. + + +## Optimization + +**Optimized task failures: _OutOfMemoryError: Container killed due to memory usage_ error** + +Improvements are being made to the way Nextflow calculates the optimal memory needed for containerized tasks, which will resolve issues with underestimating memory allocation in an upcoming release. + +A temporary workaround for this issue is to implement a `retry` error strategy in the failing process that will increase the allocated memory each time the failed task is retried. Add the following `errorStrategy` block to the failing process: + +```bash +process { + errorStrategy = 'retry' + maxRetries = 3 + memory = { 1.GB * task.attempt } +} +``` + +## Plugins + +**Use the Nextflow SQL DB plugin to query AWS Athena** + +From [Nextflow 22.05.0-edge](https://github.com/nextflow-io/nextflow/releases/tag/v22.05.0-edge), your Nextflow pipelines can query data from AWS Athena. Add these configuration items to your `nextflow.config`. The use of secrets is optional: + +``` +plugins { + id 'nf-sqldb@0.4.0' +} + +sql { + db { + 'athena' { + url = 'jdbc:awsathena://AwsRegion=YOUR_REGION;S3OutputLocation=s3://YOUR_S3_BUCKET' + user = secrets.ATHENA_USER + password = secrets.ATHENA_PASSWORD + } + } +} +``` + +Then, call the functionality in your workflow: + +``` +channel.sql.fromQuery("select * from test", db: "athena", emitColumns:true).view() +} +``` + +See [here](https://github.com/nextflow-io/nf-sqldb/discussions/5) for more information. + +## Repositories + +**Private Docker registry integration** + +Seqera-invoked jobs can pull container images from private Docker registries, such as JFrog Artifactory. The method to enable this depends on your computing platform. + +For **AWS Batch**, modify your EC2 Launch Template using [these AWS instructions](https://aws.amazon.com/blogs/compute/how-to-authenticate-private-container-registries-using-aws-batch/). + +:::note +This solution requires Docker Engine [17.07 or greater](https://docs.docker.com/engine/release-notes/17.07/), to use `--password-stdin`.
+ + You may need to add additional commands to your Launch template, depending on your security posture:
+ `cp /root/.docker/config.json /home/ec2-user/.docker/config.json && chmod 777 /home/ec2-user/.docker/config.json` +::: + +For **Azure Batch**, create a **Container registry**-type credential in your Seqera workspace and associate it with the Azure Batch compute environment defined in the same workspace. + +For **Kubernetes**, use an `imagePullSecret`, per [#2827](https://github.com/nextflow-io/nextflow/issues/2827). + +**Nextflow error: _Remote resource not found_** + +This error can occur if the Nextflow head job fails to retrieve the necessary repository credentials from Seqera. If your Nextflow log contains an entry like `DEBUG nextflow.scm.RepositoryProvider - Request [credentials -:-]`, check the protocol of your instance's `TOWER_SERVER_URL` configuration value. This must be set to `https` rather than `http` (unless you are using `TOWER_ENABLE_UNSAFE_MODE` to allow HTTP connections to Seqera in a test environment). + +## Secrets + +**_Missing AWS execution role arn_ error during Seqera launch** + +The [ECS Agent must have access](https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html) to retrieve secrets from the AWS Secrets Manager. Secrets-using pipelines launched from your instance in an AWS Batch compute environment will encounter this error if an IAM Execution Role is not provided. See [Secrets](../secrets/overview) for more information. + +**AWS Batch task failures with secrets** + +You may encounter errors when executing pipelines that use secrets via AWS Batch: + +- If you use `nf-sqldb` version 0.4.1 or earlier and have secrets in your `nextflow.config`, you may encounter _nextflow.secret.MissingSecretException: Unknown config secret_ errors in your Nextflow log. + Resolve this error by explicitly defining the `xpack-amzn` plugin in your configuration: + + ``` + plugins { + id 'xpack-amzn' + id 'nf-sqldb' + } + ``` + +- If you have two or more processes that use the same container image, but only a subset of these processes use secrets, your secret-using processes may fail during the initial run and then succeed when resumed. This is due to a bug in how Nextflow (22.07.1-edge and earlier) registers jobs with AWS Batch. + + To resolve the issue, upgrade your Nextflow to version 22.08.0-edge or later. If you cannot upgrade, use the following as workarounds: + + - Use a different container image for each process. + - Define the same set of secrets in each process that uses the same container image. + +## Tower Agent + +**"_Unexpected Exception in WebSocket [...]: Operation timed out java.io.IOException: Operation timed out_" error** + +We have improved Tower Agent reconnection logic with the release of version 0.5.0. [Update your Tower Agent version](https://github.com/seqeralabs/tower-agent) before relaunching your pipeline. + +## Google + +**VM preemption causes task interruptions** + +Running your pipelines on preemptible VMs provides significant cost savings, but increases the likelihood that a task will be interrupted before completion. It is a recommended best practice to implement a retry strategy when you encounter [exit codes](https://cloud.google.com/life-sciences/docs/troubleshooting#retrying_after_encountering_errors) that are commonly related to preemption. For example: + +```config +process { + errorStrategy = { task.exitStatus in [8,10,14] ? 'retry' : 'finish' } + maxRetries = 3 + maxErrors = '-1' +} +``` + +**Seqera Service account permissions for Google Life Sciences and GKE** + +The following roles must be granted to the `nextflow-service-account`: + +1. Cloud Life Sciences Workflows Runner +2. Service Account User +3. Service Usage Consumer +4. Storage Object Admin + +For detailed information, see [this guide](https://cloud.google.com/life-sciences/docs/tutorials/nextflow#create_a_service_account_and_add_roles). + +## Kubernetes + +**_Invalid value: "xxx": must be less or equal to memory limit_ error** + +This error may be encountered when you specify a value in the **Head Job memory** field during the creation of a Kubernetes-type compute environment. + +If you receive an error that includes _field: spec.containers[x].resources.requests_ and _message: Invalid value: "xxx": must be less than or equal to memory limit_, your Kubernetes cluster may be configured with [system resource limits](https://kubernetes.io/docs/tasks/administer-cluster/manage-resources/) which deny the Nextflow head job's resource request. To isolate the component causing the problem, try to launch a pod directly on your cluster via your Kubernetes administration solution. For example: + +```yaml +--- +apiVersion: v1 +kind: Pod +metadata: + name: debug + labels: + app: debug +spec: + containers: + - name: debug + image: busybox + command: ["sh", "-c", "sleep 10"] + resources: + requests: + memory: "xxxMi" # or "xxxGi" + restartPolicy: Never +``` + +## On-prem HPC + +**_java: command not found_ error** + +When submitting jobs to your on-prem HPC (using either SSH or Tower Agent authentication), the following error may appear in your Nextflow logs, even with Java on your `PATH` environment variable: + +``` +java: command not found +Nextflow is trying to use the Java VM defined for the following environment variables: + JAVA_CMD: java + NXF_OPTS: +``` + +Possible reasons for this error: + +1. The queue where the Nextflow head job runs is in a different environment/node than your login node userspace. +2. If your HPC cluster uses modules, the Java module may not be loaded by default. + +To troubleshoot: + +1. Open an interactive session with the head job queue. +2. Launch the Nextflow job from the interactive session. +3. If your cluster uses modules: + - Add `module load ` in the **Advanced Features > Pre-run script** field when creating your HPC compute environment in Seqera. +4. If your cluster doesn't use modules: + 1. Source an environment with Java and Nextflow using the **Advanced Features > Pre-run script** field when creating your HPC compute environment in Seqera. + +**Pipeline submissions to HPC clusters fail for some users** + +Nextflow launcher scripts will fail if processed by a non-Bash shell (e.g., `zsh`, `tcsh`). This problem can be identified from certain error entries: + +1. Your _.nextflow.log_ contains an error like _Invalid workflow status - expected: SUBMITTED; current: FAILED_. +2. Your Seqera **Error report** tab contains an error like: + +```yaml +Slurm job submission failed +- command: mkdir -p /home//\//scratch; cd /home//\//scratch; echo | base64 -d > nf-.launcher.sh; sbatch ./nf-.launcher.sh +- exit : 1 +- message: Submitted batch job <#> +``` + +Connect to the head node via SSH and run `ps -p $$` to verify your default shell. If you see an entry other than Bash, fix as follows: + +1. Check which shells are available to you: `cat /etc/shells` +2. Change your shell: `chsh -s /usr/bin/bash` (the path to the binary may differ, depending on your HPC configuration) +3. If submissions continue to fail after this shell change, ask your Seqera Platform admin to restart the **backend** and **cron** containers, then submit again. diff --git a/platform-enterprise/troubleshooting_and_faqs/workspaces_troubleshooting.md b/platform-enterprise/troubleshooting_and_faqs/workspaces_troubleshooting.md new file mode 100644 index 000000000..83874791b --- /dev/null +++ b/platform-enterprise/troubleshooting_and_faqs/workspaces_troubleshooting.md @@ -0,0 +1,17 @@ +--- +title: "Workspaces" +description: "Workspaces troubleshooting with Seqera Platform." +date: "26 August 2024" +tags: [faq, help, workspaces help, workspaces troubleshooting] +--- + +**Seqera-invoked pipeline contacting a workspace other than the launch workspace** + +You may encounter this entry in your Nextflow log: + +*Unexpected response for request `http://TOWER_SERVER_URL/api/trace/TRACE_ID/begin?workspaceId=WORKSPACE_ID`* + +If the workspace ID in this message differs from your launch workspace, Seqera retrieved an incorrect Seqera access token from a Nextflow configuration file: + +- A Seqera access token may be hardcoded in the `tower.accessToken` block of your `nextflow.config` (either from the Git repository or an override value in the Seqera launch form). +- In an HPC cluster compute environment, the credential user's home directory may contain a stateful `nextflow.config` with a hardcoded access token (e.g., `~/.nextflow/config`). diff --git a/platform-enterprise/tutorials/retry-strategy.md b/platform-enterprise/tutorials/retry-strategy.md new file mode 100644 index 000000000..cd16e5535 --- /dev/null +++ b/platform-enterprise/tutorials/retry-strategy.md @@ -0,0 +1,79 @@ +--- +title: "Manage AWS Spot interruptions in Seqera Platform" +description: "Managing AWS Spot Interruptions in Seqera Platform." +date: "16 Jul 2024" +tags: [aws, spot, platform, fusion, retry] +--- + +In AWS Batch environments that use Spot instances, tasks can be interrupted when instances are reclaimed, and this is a normal part of how Spot instances operate. The frequency of interruptions can be highly variable, based on factors including the wider demand on AWS services. AWS offers an insight into the frequency of Spot reclamations with their **instance-advisor** service which you can find [here](https://aws.amazon.com/ec2/spot/instance-advisor/). + +In Seqera Platform, Spot reclamations will sometimes manifest with logging messages like `Host EC2 (instance i-0282b396e52b4c95d) terminated` and will produce non-specific exit codes such as `143 (representing `SIGTERM`) or even no exit code at all (`-`), depending on the order in which the underlying AWS components have been destroyed. If you're seeing unexpected task failures with one or more of these features, especially with no obvious application error, it's worth reviewing your Spot configuration and retry strategy. + +This guide outlines best practices for mitigating the impact of Spot interruptions and ensuring critical tasks can retry or recover reliably. + +## Recommended mitigations + +### Use an On-Demand compute environment + +For workflows with a significant proportion of long-running processes, the costs of, and mitigations necessary for working with Spot may outweigh the benefits. You may find it simpler and even, possibly, cheaper to simply run those workloads in On-Demand compute environments. + +### Move long-running tasks to On-Demand + +Tasks with long runtimes are particularly vulnerable to Spot termination. In Platform, you can explicitly assign critical or long-duration tasks to On-Demand queues and leave other tasks to run in a default Spot queue by default: + +```bash +process { + withName: 'run_bcl2fastq' { + queue = 'TowerForge-MyOnDemandQueue' + } +} +``` + +If you don’t already have one, you may need to create an On-Demand compute environment in the Seqera Platform. Once it’s available, you can find the corresponding On-Demand queue name by navigating to **Compute Environments** in the Platform UI. Locate the configuration for your specific On-Demand environment, then scroll down to the **Manual Config Attributes** section. This section lists key configuration details, including queue names. Look for the queue name prefixed with `TowerForge-` if it was created by Forge. + +### Use retry strategies for Spot Interruptions + +#### Handle retries in Nextflow by setting `errorStrategy` and `maxRetries` + +A simple generic retry strategy at the Nextflow level can be more appropriate where run times are sufficiently low that retries are likely to succeed. This can be configured as follows: + +```bash +process { + errorStrategy = 'retry' + maxRetries = 3 +} +``` + +This example configuration will apply to all types of job failure. Because Spot reclamations do not produce diagnostic exit codes, it is currently not possible to configure retries at the Nextflow level specifically for reclamations. Note that, given the escalating costs of repeated retries, an On-Demand queue is likely a more cost-effective option than very large numbers of retries. If you still see failures after applying configuration like this, solutions involving On-Demand queues are likely to be more effective at limiting costs and runtimes. + +#### Handle retries in AWS by setting `aws.batch.maxSpotAttempts` + +If all processes in your workflow have runtimes short enough to feasibly complete before reclamation, you can consider configuring automatic retries in case of interruption: + +`aws.batch.maxSpotAttempts = 3` + +This is a global setting (not configurable per process) that in this example allows a job to retry up to three times on a new Spot instance if the original instance is reclaimed. Retries happen automatically within AWS and restart the task from the beginning. Because this occurs behind the scenes, you won't see any evidence of the retries within the Platform. In fact, as far as Nextflow (and Platform) is concerned, only one attempt has occurred, and it will submit the task again to AWS, up to any `maxRetries` configuration you have in place (see above). The total number of retries in that case will be `maxRetries` * `aws.batch.maxSpotAttempts`. For a long running process being pre-empted repeatedly, this can represent very significant costs in time and compute. + +:::note +Starting with Nextflow version 24.08.0-edge, the default value for this setting has been changed to `0` to help avoid unexpected expenses, and you should be careful when activating this setting. +::: + +### Implement Spot-to-On-Demand fallback logic + +If you prefer to optimize for cost but ensure task reliability, consider a hybrid fallback pattern: + +```bash +process { + withName: 'run_bcl2fastq' { + errorStrategy = 'retry' + maxRetries = 2 + queue = { task.attempt > 1 ? 'TowerForge-MyOnDemandQueue' : 'TowerForge-MySpotQueue' } + } +} +``` + +With this setup, the first attempt of a task is sent to the Spot queue, while any retries are directed to the On-Demand queue, where they won't be preempted. This helps avoid repeated preemption of longer-running tasks and can serve as a useful default strategy. However, longer-running jobs should still be submitted directly to an On-Demand queue whenever possible, to avoid the unnecessary cost of the initial preemption. + +### Consider enabling Fusion Snapshots (preview feature) + +Fusion Snapshots can help mitigate interruption risk by checkpointing task state before termination. This is currently in preview and best suited for compute-intensive or long-running tasks. If you're interested in testing this feature, reach out to our support team at https://support.seqera.io and we will be happy to assist you. diff --git a/platform-enterprise_versions.json b/platform-enterprise_versions.json index 808dc8b90..6d658c951 100644 --- a/platform-enterprise_versions.json +++ b/platform-enterprise_versions.json @@ -1 +1 @@ -["25.1", "24.2", "24.1", "23.4", "23.3", "23.2", "23.1"] \ No newline at end of file +["25.1", "24.2", "24.1", "23.4", "23.3"] \ No newline at end of file diff --git a/src/modules/Homepage/Resources/Platform.tsx b/src/modules/Homepage/Resources/Platform.tsx index 89a2ab778..4edb8a259 100644 --- a/src/modules/Homepage/Resources/Platform.tsx +++ b/src/modules/Homepage/Resources/Platform.tsx @@ -1,8 +1,6 @@ import React from "react"; import Link from "@docusaurus/Link"; -import platform_enterprise_latest_version from "@site/platform-enterprise_latest_version"; - type Props = {}; const Platform: React.FC = () => { @@ -34,7 +32,7 @@ const Platform: React.FC = () => {
  • For installation and configuration, the Seqera Platform{" "} deployment guide {" "} diff --git a/src/pages/platform-enterprise/index.tsx b/src/pages/platform-enterprise/index.tsx deleted file mode 100644 index 394771849..000000000 --- a/src/pages/platform-enterprise/index.tsx +++ /dev/null @@ -1,12 +0,0 @@ -import React from "react"; -import { Redirect } from "react-router-dom"; - -import platform_enterprise_latest_version from "@site/platform-enterprise_latest_version"; - -export default function Platform(): JSX.Element { - return ( - - ); -} diff --git a/src/pages/platform-enterprise/latest.tsx b/src/pages/platform-enterprise/latest.tsx index 4f0be5856..c6dbfc73c 100644 --- a/src/pages/platform-enterprise/latest.tsx +++ b/src/pages/platform-enterprise/latest.tsx @@ -4,18 +4,22 @@ import { useLocation, useHistory } from "@docusaurus/router"; import platform_enterprise_latest_version from "@site/platform-enterprise_latest_version"; export default function Platform(): JSX.Element { - const match = useLocation(); + const location = useLocation(); const history = useHistory(); - const { pathname } = match; - const actualPath = pathname.replace( - "latest", - platform_enterprise_latest_version, - ); + const { pathname } = location; + + // Only perform the replacement if the path actually contains "latest" + const containsLatest = pathname.includes("latest"); + const actualPath = containsLatest ? pathname.replace("latest", "") : pathname; - useEffect(function redirectToActualPath() { - if (typeof window === "undefined") return; - history.push(actualPath); - }, []); + useEffect( + function redirectToActualPath() { + // Only redirect if we actually changed the path + if (typeof window === "undefined" || pathname === actualPath) return; + history.push(actualPath); + }, + [pathname, actualPath, history], + ); return
    ; } diff --git a/src/theme/DocSidebar/Desktop/ProductSwitcher/VersionSwitcher.tsx b/src/theme/DocSidebar/Desktop/ProductSwitcher/VersionSwitcher.tsx index bf4ead85c..82c608c44 100644 --- a/src/theme/DocSidebar/Desktop/ProductSwitcher/VersionSwitcher.tsx +++ b/src/theme/DocSidebar/Desktop/ProductSwitcher/VersionSwitcher.tsx @@ -38,15 +38,21 @@ const VersionSwitcher = ({ isOpen, setIsOpen }) => { if (typeof window === "undefined") return null; if (!versions) return null; - if (!location.pathname.startsWith("/platform-enterprise/")) return null; + if (!location.pathname.startsWith("/platform-enterprise")) return null; const items = versions.filter( - (version) => version.label !== currentVersion.label, + (version) => version.label !== currentVersion?.label, ); - // Extract the part of the URL after the current version - const currentVersionPrefix = `/platform-enterprise/${currentVersion.label}`; - const urlSuffix = location.pathname.replace(currentVersionPrefix, ""); + let urlSuffix = ""; + + if ( + currentVersion && + location.pathname.startsWith(`/platform-enterprise/${currentVersion.label}`) + ) { + const currentVersionPrefix = `/platform-enterprise/${currentVersion.label}`; + urlSuffix = location.pathname.replace(currentVersionPrefix, ""); + } return (
    @@ -57,8 +63,10 @@ const VersionSwitcher = ({ isOpen, setIsOpen }) => { })} > - v{currentVersion.label}{" "} - {currentVersion.label == versions[0].label ? " (current)" : ""} + {currentVersion ? `v${currentVersion.label}` : "Version"}{" "} + {currentVersion && currentVersion.label === versions[0].label + ? " (current)" + : ""} {isOpen && ( diff --git a/src/theme/Navbar/Layout/SeqeraHeader/HeaderDesktop/NavItems/index.jsx b/src/theme/Navbar/Layout/SeqeraHeader/HeaderDesktop/NavItems/index.jsx index 39c5bd4d1..2d25edfb3 100644 --- a/src/theme/Navbar/Layout/SeqeraHeader/HeaderDesktop/NavItems/index.jsx +++ b/src/theme/Navbar/Layout/SeqeraHeader/HeaderDesktop/NavItems/index.jsx @@ -99,7 +99,7 @@ const NavItems = ({ isDark = false, hideMenu }) => {
  • - + Enterprise
  • diff --git a/static/_redirects b/static/_redirects index fc9d4eb6e..84b92482a 100644 --- a/static/_redirects +++ b/static/_redirects @@ -3,6 +3,7 @@ # Fix top nav 404 for Cloud and Enterprise after index page rename /platform-cloud /platform-cloud/platform-cloud 301 /platform-enterprise/:version /platform-enterprise/:version/platform-enterprise 301 +/platform-enterprise /platform-enterprise/platform-enterprise 301 # Add redirects to account for Wave docs refresh /wave/cli/install /wave/cli 301