diff --git a/docs/requirements.txt b/docs/requirements.txt
index 53fc1f3..873a68b 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,2 +1,5 @@
sphinx==7.1.2
sphinx-rtd-theme==1.3.0rc1
+sphinx-material
+sphinx-book-theme
+sphinx-toolbox
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 65aa4ce..6d2d9c8 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -3,11 +3,13 @@
# -- Project information
project = 'Cypienta'
-copyright = '2024, Cypienta'
+copyright = '2025, Cypienta'
author = 'Cypienta'
-release = '0.1'
-version = '0.1.0'
+# release = '1.0'
+release = '0.9'
+# version = '1.0.0'
+version = '0.9.0'
# -- General configuration
@@ -17,6 +19,7 @@
'sphinx.ext.autodoc',
'sphinx.ext.autosummary',
'sphinx.ext.intersphinx',
+ 'sphinx_toolbox.sidebar_links',
]
intersphinx_mapping = {
@@ -29,7 +32,9 @@
# -- Options for HTML output
-html_theme = 'sphinx_rtd_theme'
+# html_theme = 'sphinx_rtd_theme'
+# html_theme = 'sphinx_material'
+html_theme = "sphinx_book_theme"
# -- Options for EPUB output
-epub_show_urls = 'footnote'
+# epub_show_urls = 'footnote'
diff --git a/docs/source/dag/dags.rst b/docs/source/dag/dags.rst
new file mode 100644
index 0000000..458da18
--- /dev/null
+++ b/docs/source/dag/dags.rst
@@ -0,0 +1,69 @@
+Overview of Airflow DAGs
+=================================
+
+Functionality of DAGs
+---------------------------------
+
+The fleet of airflow DAGs will be responsible for end-to-end flow for the Cypienta Correlation Pipeline.
+
+#. **s3_trigger:**
+
+ - The DAG is triggered from the periodic schedule in the Cypienta UI.
+ - Gets the list of file in the upload folders for multiple data sources ``mapping/input/``. If the list of files is not empty then the skip_input DAG is triggered. Else, it exits.
+
+#. **skip_input:**
+
+ - The DAG is triggered by s3_trigger DAG so that the list of input files can be processed.
+ - Maintains a queue of input files to be processed by the pipeline.
+ - The DAG is triggered from pipeline_part_1, pipeline_part_2, to clear the input queue.
+
+#. **pipeline_part_1:**
+
+ - The DAG is triggered by skip_input DAG to process the input files.
+ - It triggers the following tasks in sequence:
+
+ - enrich_with_technique
+ - update_lookup_table - trigger update_lookup_table DAG
+
+ - The DAG then triggers pipeline_part_2 concurrently, one for each clustering agent to be processed.
+
+#. **pipeline_part_2:**
+
+ - The DAG is triggered by pipeline_part_1 DAG to process the batches per clustering agent.
+ - It triggers the following tasks in sequence:
+
+ - clustering part 1 - batches for clustering part 1 are ran concurrently
+
+ - The DAG then triggers pipeline_part_3 concurrently for per clustering agent to be processed. Each clustering agent DAG run will run only single batch of data in sequential manner.
+
+#. **pipeline_part_3:**
+
+ - The DAG is triggered by pipeline_part_2 DAG to process the batches in sequential order per clustering agent.
+ - It triggers the following tasks in sequence:
+
+ - clustering part 2
+ - retrigger pipeline_part_3 if there are more batches to process
+
+ - Once the current batch is processed successfully, it triggers the pipeline_part_4 DAG for the pertinent clustering agent.
+
+#. **pipeline_part_4:**
+
+ - The DAG is triggered by pipeline_part_3 DAG.
+ - It triggers the following tasks in sequence:
+
+ - flow - batches for flows are ran concurrently
+ - create campaign
+
+#. **snapshot:**
+
+ - The DAG is triggered by the pipeline_part_4 DAG to create a snapshot of the current state of the pipeline.
+ - It snapshots the database and restarts it.
+
+#. **restore:**
+
+ - This DAG is triggered by the failure callback of the pipeline_part_1, pipeline_part_2, pipeline_part_3, and pipeline_part_4 DAG.
+ - It restores the database to the last saved snapshot and restarts it.
+
+#. **update_lookup_table:**
+
+ - The DAG is triggered by the pipeline_part_1 DAG to update the lookup table for techniques.
diff --git a/docs/source/deploy_ui/airflow.rst b/docs/source/deploy_ui/airflow.rst
new file mode 100644
index 0000000..8367598
--- /dev/null
+++ b/docs/source/deploy_ui/airflow.rst
@@ -0,0 +1,51 @@
+Airflow Configuration and Errors
+================================
+
+Airflow Errors
+--------------
+
+1. Go to the Cypienta UI and login with your credentials.
+
+ .. image:: resources/ui_login.png
+ :alt: Login to UI
+ :align: center
+
+
+ .. note::
+ The default credentials are present in :doc:`start_using` page.
+
+2. On the left hand side panel, click on ``Airflow``.
+
+ .. image:: resources/bastet_airflow.png
+ :alt: Airflow
+ :align: center
+
+3. The tab ``Error`` shows the Airflow Error List that will show the errors that have occurred in the Airflow pipeline.
+
+4. Click on the ``Go to Airflow`` link at the top right to go to the Airflow UI.
+
+
+Airflow Scheduler
+-----------------
+
+1. On the Cypienta UI, on the left hand side panel, click on ``Airflow``.
+
+ .. image:: resources/bastet_airflow.png
+ :alt: Airflow
+ :align: center
+
+3. Click on the tab ``Schedule``. The default schedule is setup using a cron tab which will trigger the Pipeline every 5 hours.
+
+ .. image:: resources/airflow_schedule.png
+ :alt: Airflow
+ :align: center
+
+4. To edit the pipeline schedule, click on the ``Edit`` button. The schedule can be edited using the cron tab format.
+
+ .. image:: resources/airflow_schedule_edit.png
+ :alt: Airflow
+ :align: center
+
+ .. note::
+
+ To learn more about cron expressions and create a schedule in cron tab, `Click here `__.
\ No newline at end of file
diff --git a/docs/source/deploy_ui/mapping.rst b/docs/source/deploy_ui/mapping.rst
new file mode 100644
index 0000000..3350033
--- /dev/null
+++ b/docs/source/deploy_ui/mapping.rst
@@ -0,0 +1,94 @@
+Map Alert fields to Cypienta Internal Format
+============================================
+
+1. Go to the Cypienta UI and login with your credentials.
+
+ .. image:: resources/ui_login.png
+ :alt: Login to UI
+ :align: center
+
+
+ .. note::
+ The default ``Username`` is ``cypienta`` and the default ``Password`` is ``cypienta``
+
+2. On the left hand side panel, click on ``Add Alerts``.
+
+ .. image:: resources/add_alerts.png
+ :alt: Add Alerts
+ :align: center
+
+3. Drag and drop a file or click on the drag and drop area to upload a file. Once the file is selected, click on ``Upload File``.
+
+ .. note::
+ The maximum file size that can be uploaded is 10MB. The file should be in CSV/XML/JSON format.
+
+ CSV: The CSV format file must have filename with extension as ``.csv``. The first row will be considered as the header row and there must be atleast 1 alert in the file.
+
+ JSON: The JSON format file must have filename with extension as ``.json``. The file must have a json list of alerts format and must contain atleast 1 alert.
+
+ XML: The XML format file must have filename with extension as ``.xml``. The file must have a list element for ``alerts`` and must contain atleast 1 alert.
+
+4. Once the file is uploaded, the system will automatically map and suggest the fields to the internal format.
+
+ .. image:: resources/alerts_mapping.png
+ :alt: Alerts Mapping
+ :align: center
+
+5. There are 5 required fields that must be mapped to the internal format. The fields are:
+
+ - ``Id``: This field is used to uniquely identify the alert.
+ - ``Time``: This field is used to represent the time of the alert and must be in datetime format.
+ - ``Name``: Human readable alert text.
+ - ``Src``: Source of the alerts for network alerts.
+ - ``Dst``: Destination of the alerts for network alerts.
+
+ The remaining fields are optional and can be mapped as per the requirement.
+
+ - ``Event_feature``
+ - ``Node_feature``
+
+ The mapping of fields to internal is put in a interactive UI element partitioned in 2 different sections. The left section shows the fields from the uploaded file which are currently not selected for any internal mapping and the right section shows the fields that are chosen for the internal format.
+ The user may click on the plus button to map a field to the internal format. The user may also click on the minus button to remove a field from the internal format.
+ To select all the fields currently in the ``Unused fields`` section, click on ``Choose all`` button. To remove all the fields currently in the ``Chosen fields`` section, click on ``Remove all`` button.
+
+ The top 3 values for the fields in the ``Chosen fields`` section are displayed as sample values, if available.
+
+ All chosen fields for required field mappings have a drop down for selecting the priority. If user wants to select multiple options for fetching the field value for the required fields, the user may select the priority for the field. The priority is used to select the value of the field from the multiple options available in the alert. Field value with priority 1 is given the highest priority and if fount to be not empty, other values are ignored.
+
+ For node feature field mapping, each selected field will have a drop down with options ``src``, ``dst``, and ``both``. The user must select any one option for each field to associate the node feature field to the source, destination or both.
+
+
+ .. image:: resources/node_feature.png
+ :alt: Alerts Mapping
+ :align: center
+
+6. Once all the required fields have atleast 1 chosen field. Click on ``Activate mapping for ingestion`` to save the mapping and start the ingestion process.
+
+ .. image:: resources/activate_mapping.png
+ :alt: Activate Mapping
+ :align: center
+
+7. Click on ``OK`` to confirm saving the mapping.
+
+ .. image:: resources/save_mapping.png
+ :alt: Confirm Mapping
+ :align: center
+
+8. Give a unique name for the mapping and then click ``OK``. A recommended mapping name is a source type of the alerts.
+
+ .. image:: resources/save_mapping_name.png
+ :alt: Mapping Name
+ :align: center
+
+9. Once the mapping is saved, an alert with a bucket prefix will appear. Note down this bucket prefix for future use. The alerts can then directly be uploaded to the bucket prefix path to automatically map the alerts to the internal format and start the ingestion process.
+ Click on ``OK`` to close the alert.
+
+ .. image:: resources/mapping_saved.png
+ :alt: Bucket Prefix
+ :align: center
+
+10. Another alert shows that the pipeline have been triggered with the uploaded input file. Click on ``OK`` to close the alert.
+
+ .. image:: resources/pipeline_triggered.png
+ :alt: Pipeline Triggered
+ :align: center
\ No newline at end of file
diff --git a/docs/source/deploy_ui/resources/activate_mapping.png b/docs/source/deploy_ui/resources/activate_mapping.png
new file mode 100644
index 0000000..d495187
Binary files /dev/null and b/docs/source/deploy_ui/resources/activate_mapping.png differ
diff --git a/docs/source/deploy_ui/resources/add_alerts.png b/docs/source/deploy_ui/resources/add_alerts.png
new file mode 100644
index 0000000..4eab461
Binary files /dev/null and b/docs/source/deploy_ui/resources/add_alerts.png differ
diff --git a/docs/source/deploy_ui/resources/add_label.png b/docs/source/deploy_ui/resources/add_label.png
new file mode 100644
index 0000000..bf7fb5e
Binary files /dev/null and b/docs/source/deploy_ui/resources/add_label.png differ
diff --git a/docs/source/deploy_ui/resources/add_rule.png b/docs/source/deploy_ui/resources/add_rule.png
new file mode 100644
index 0000000..9ed47d8
Binary files /dev/null and b/docs/source/deploy_ui/resources/add_rule.png differ
diff --git a/docs/source/deploy_ui/resources/airflow_schedule.png b/docs/source/deploy_ui/resources/airflow_schedule.png
new file mode 100644
index 0000000..0201680
Binary files /dev/null and b/docs/source/deploy_ui/resources/airflow_schedule.png differ
diff --git a/docs/source/deploy_ui/resources/airflow_schedule_edit.png b/docs/source/deploy_ui/resources/airflow_schedule_edit.png
new file mode 100644
index 0000000..2399a02
Binary files /dev/null and b/docs/source/deploy_ui/resources/airflow_schedule_edit.png differ
diff --git a/docs/source/deploy_ui/resources/alerts_mapping.png b/docs/source/deploy_ui/resources/alerts_mapping.png
new file mode 100644
index 0000000..2600717
Binary files /dev/null and b/docs/source/deploy_ui/resources/alerts_mapping.png differ
diff --git a/docs/source/deploy_ui/resources/bastet_airflow.png b/docs/source/deploy_ui/resources/bastet_airflow.png
new file mode 100644
index 0000000..89679c5
Binary files /dev/null and b/docs/source/deploy_ui/resources/bastet_airflow.png differ
diff --git a/docs/source/deploy_ui/resources/gen_ai_add_key.png b/docs/source/deploy_ui/resources/gen_ai_add_key.png
new file mode 100644
index 0000000..0addc89
Binary files /dev/null and b/docs/source/deploy_ui/resources/gen_ai_add_key.png differ
diff --git a/docs/source/deploy_ui/resources/gen_ai_create_summary.png b/docs/source/deploy_ui/resources/gen_ai_create_summary.png
new file mode 100644
index 0000000..ac4363a
Binary files /dev/null and b/docs/source/deploy_ui/resources/gen_ai_create_summary.png differ
diff --git a/docs/source/deploy_ui/resources/gen_ai_summary.png b/docs/source/deploy_ui/resources/gen_ai_summary.png
new file mode 100644
index 0000000..de023de
Binary files /dev/null and b/docs/source/deploy_ui/resources/gen_ai_summary.png differ
diff --git a/docs/source/deploy_ui/resources/mapping_saved.png b/docs/source/deploy_ui/resources/mapping_saved.png
new file mode 100644
index 0000000..3b6c5c8
Binary files /dev/null and b/docs/source/deploy_ui/resources/mapping_saved.png differ
diff --git a/docs/source/deploy_ui/resources/new_label.png b/docs/source/deploy_ui/resources/new_label.png
new file mode 100644
index 0000000..0aa58f4
Binary files /dev/null and b/docs/source/deploy_ui/resources/new_label.png differ
diff --git a/docs/source/deploy_ui/resources/new_rule.png b/docs/source/deploy_ui/resources/new_rule.png
new file mode 100644
index 0000000..3e6f543
Binary files /dev/null and b/docs/source/deploy_ui/resources/new_rule.png differ
diff --git a/docs/source/deploy_ui/resources/node_feature.png b/docs/source/deploy_ui/resources/node_feature.png
new file mode 100644
index 0000000..5c46f32
Binary files /dev/null and b/docs/source/deploy_ui/resources/node_feature.png differ
diff --git a/docs/source/deploy_ui/resources/pipeline_triggered.png b/docs/source/deploy_ui/resources/pipeline_triggered.png
new file mode 100644
index 0000000..61413ad
Binary files /dev/null and b/docs/source/deploy_ui/resources/pipeline_triggered.png differ
diff --git a/docs/source/deploy_ui/resources/save_mapping.png b/docs/source/deploy_ui/resources/save_mapping.png
new file mode 100644
index 0000000..01f4c88
Binary files /dev/null and b/docs/source/deploy_ui/resources/save_mapping.png differ
diff --git a/docs/source/deploy_ui/resources/save_mapping_name.png b/docs/source/deploy_ui/resources/save_mapping_name.png
new file mode 100644
index 0000000..f3bf45f
Binary files /dev/null and b/docs/source/deploy_ui/resources/save_mapping_name.png differ
diff --git a/docs/source/deploy_ui/resources/select_labels.png b/docs/source/deploy_ui/resources/select_labels.png
new file mode 100644
index 0000000..1ff613a
Binary files /dev/null and b/docs/source/deploy_ui/resources/select_labels.png differ
diff --git a/docs/source/deploy_ui/resources/select_rules.png b/docs/source/deploy_ui/resources/select_rules.png
new file mode 100644
index 0000000..1d9002f
Binary files /dev/null and b/docs/source/deploy_ui/resources/select_rules.png differ
diff --git a/docs/source/deploy_ui/resources/ui_tune_cluster_config.png b/docs/source/deploy_ui/resources/ui_tune_cluster_config.png
new file mode 100644
index 0000000..ac5a23c
Binary files /dev/null and b/docs/source/deploy_ui/resources/ui_tune_cluster_config.png differ
diff --git a/docs/source/deploy_ui/resources/view_labels.png b/docs/source/deploy_ui/resources/view_labels.png
new file mode 100644
index 0000000..ff8dc3d
Binary files /dev/null and b/docs/source/deploy_ui/resources/view_labels.png differ
diff --git a/docs/source/deploy_ui/start_using.rst b/docs/source/deploy_ui/start_using.rst
index 9361a0e..643e820 100644
--- a/docs/source/deploy_ui/start_using.rst
+++ b/docs/source/deploy_ui/start_using.rst
@@ -3,6 +3,10 @@ Start using Cypienta UI
Once all your resources are deployed and the ECS app is up and in ``Running`` status. You can start using the Cypienta UI.
+.. note::
+ The Cypienta UI will include those events that have MITRE ATT&CK techniques associated with them or recognized with the ``enrich with technique`` step of the pipeline. If no techniques are associated with the events, they will not be used as part of clustering step, and will not be visible in the UI.
+
+
Start using Cypienta UI
-----------------------
@@ -26,6 +30,9 @@ Start using Cypienta UI
.. image:: resources/home_page.png
:alt: Home page
:align: center
+
+ .. note::
+ The default ``Username`` is ``cypienta`` and the default ``Password`` is ``cypienta``
How to use the Hide feature for events in UI
@@ -37,6 +44,11 @@ How to use the Hide feature for events in UI
:alt: Campaign list
:align: center
+
+ .. note::
+ The ``Campaigns`` page will show all the clusters that were created from the events. Each event will only be present in one cluster. Clusters with single event will not be visible on UI.
+ The ``Flows`` page will show all the flows that were created from the events. Each event can be present in none or more than one flow.
+
2. Click on any campaign that you want to modify:
.. image:: resources/hide_open_campaign.png
@@ -66,6 +78,7 @@ How to use the Hide feature for events in UI
You can see the list of hidden events now has an event that was selected earlier and hide action was taken.
+
Edit recognized techniques for events
-------------------------------------
@@ -141,4 +154,84 @@ How to use "Cut Events" feature
.. image:: resources/cut_completed.png
:alt: cut completed
+ :align: center
+
+
+How to add Rules and Labels for campaigns
+-----------------------------------------
+
+1. On the left hand side panel, click on ``Cluster`` drop down and select ``Rules``.
+
+ .. image:: resources/select_rules.png
+ :alt: select rules
+ :align: center
+
+2. Click on ``Add Rule`` button to add a new rule.
+
+ .. image:: resources/add_rule.png
+ :alt: add rule
+ :align: center
+
+3. Fill in the details for the rule. Give a distinguishable name to the rule. Select the metric on which you want to set a rule.
+ Select the condition and value for the rule. Do not select any of the campaigns in the ``Campaigns`` field and click on ``Save``.
+
+ .. image:: resources/new_rule.png
+ :alt: add rule details
+ :align: center
+
+4. Now to utilize the new rule we need to add a label to the campaign. On the left hand side panel, click on ``Cluster`` drop down and select ``Labels``.
+
+ .. image:: resources/select_labels.png
+ :alt: select labels
+ :align: center
+
+5. Click on ``Add Label`` button to add a new label.
+
+ .. image:: resources/add_label.png
+ :alt: add label
+ :align: center
+
+6. Fill in the details for the label. Give a distinguishable name to the label, which will be applied to all campaigns. Select the rules that you want to apply to the label and click on ``Save``.
+
+ .. image:: resources/new_label.png
+ :alt: add label details
+ :align: center
+
+7. Now go back to the ``Clusters`` page to see the list of Campaigns and you will see the label applied to all the campaigns.
+
+ .. image:: resources/view_labels.png
+ :alt: label applied
+ :align: center
+
+ .. note::
+ Applying new or edited rules or labels to all campaigns may take some time. Refresh the campaigns page to check if the changes have been applied.
+
+
+Generate summary using Open AI
+------------------------------
+
+1. On the left hand side panel, click on ``GenAI``
+
+ .. image:: resources/gen_ai_add_key.png
+ :alt: gen ai config
+ :align: center
+
+2. Add your API key in the input field and click on ``Add API key``.
+
+3. On the left hand side panel, click on ``Campaigns``
+
+ .. image:: resources/campaign_list.png
+ :alt: Campaign list
+ :align: center
+
+4. Select any campaign for which you want to generate a summary. Click on ``Generate Summary`` button.
+
+ .. image:: resources/gen_ai_create_summary.png
+ :alt: gen ai summary
+ :align: center
+
+4. Click on the ``Diamond`` tab and view the summary created for your selected campaign.
+
+ .. image:: resources/gen_ai_summary.png
+ :alt: gen ai summary
:align: center
\ No newline at end of file
diff --git a/docs/source/deploy_ui/tune_config.rst b/docs/source/deploy_ui/tune_config.rst
new file mode 100644
index 0000000..bd6b961
--- /dev/null
+++ b/docs/source/deploy_ui/tune_config.rst
@@ -0,0 +1,23 @@
+Tune Cluster Model Config
+================================
+
+
+1. Go to the Cypienta UI and login with your credentials.
+
+ .. image:: resources/ui_login.png
+ :alt: Login to UI
+ :align: center
+
+
+ .. note::
+ The default credentials are present in :doc:`start_using` page.
+
+2. On the left hand side panel, click on ``Config``.
+
+ .. image:: resources/ui_tune_cluster_config.png
+ :alt: Airflow
+ :align: center
+
+3. You may choose which cluster model parameter you want to give more or less weights and tune the model accordingly.
+
+4. Click on the ``Save`` button to apply the changes for the next pipeline run.
diff --git a/docs/source/elastic/elastic.rst b/docs/source/elastic/elastic.rst
index a994b13..a816893 100644
--- a/docs/source/elastic/elastic.rst
+++ b/docs/source/elastic/elastic.rst
@@ -1,6 +1,11 @@
Configure Elastic
=================
+Prerequisites
+-------------
+
+Make sure that you have deployed the Cypienta application detailed in :doc:`../getting_started/deploy` before integrating.
+
Logstash pipeline from Elastic Search to AWS S3
-----------------------------------------------
diff --git a/docs/source/getting_started/deploy.rst b/docs/source/getting_started/deploy.rst
index 2431013..4956ed6 100644
--- a/docs/source/getting_started/deploy.rst
+++ b/docs/source/getting_started/deploy.rst
@@ -1,70 +1,26 @@
AWS Deployment
==============
-.. _setup_lambda_repository:
-
-Setup Lambda repository
------------------------
-
-1. Navigate to the AWS console, and select ``CloudShell`` at the bottom left of the console. Open the cloud shell in the region you want to deploy.
-
-2. Store the AWS Account ID, and ECR repository name to environment variable in cloud shell.
-
- .. code-block:: shell
-
- $ export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query "Account" --output text)
-
- # Replace value with ECR repository name you want to give
- $ export REPO_NAME="cypienta-vrl-lambda"
-
-3. Pull the Cypienta VRL Lambda image from the AWS public repository using the following command.
-
- .. code-block:: shell
-
- $ docker pull public.ecr.aws/p2d2x2s3/cypienta/vrl-lambda:v0.1
-
-4. Once the image pull is completed, create an ECR repository to push the Cypienta VRL Lambda image.
-
- .. code-block:: shell
-
- $ aws ecr create-repository --repository-name ${REPO_NAME}
-
-5. After successfully create ECR repository, you can navigate to ECR private repository to view the responsitory you just created.
-
- .. image:: resources/lambda_ecr.png
- :alt: lambda ecr repo
- :align: center
-
-6. Run the following commands to push the image to ECR repository.
-
- .. code-block:: shell
-
- $ export ECR_URI="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com"
- $ aws ecr get-login-password --region ${AWS_REGION} | docker login --username AWS --password-stdin ${ECR_URI}
- $ docker tag public.ecr.aws/p2d2x2s3/cypienta/vrl-lambda:v0.1 ${ECR_URI}/${REPO_NAME}:v0.1
- $ docker push ${ECR_URI}/${REPO_NAME}:v0.1
-
-7. Copy the ECR Image URI and make a note of it to use in CloudFormation template
-
- .. code-block:: shell
-
- $ echo ${ECR_URI}/${REPO_NAME}:v0.1
-
+.. _deploy_cloud_formation:
Deploy resources using the Cloud Formation template
---------------------------------------------------
-1. Clone the Github repo
+1. On your local machine, download the template file from Github. `Template file `__. Or, use the following command to download the ``template.yaml`` file.
.. code-block:: shell
- $ git clone -b v0.7 https://github.com/cypienta/Lambda.git
+ $ wget https://github.com/cypienta/AWS/raw/v0.9/template.yaml
.. note::
- This command will clone the repository and checkout the branch ``v0.7``
+ Run this command on your local machine. This command will download the template.yaml file.
2. Navigate to the AWS console, and search for ``CloudFormation``.
+ .. note::
+ The UI component deployed from this template is only supported in the following AWS Regions. Make sure that you create stack in the supported region.
+ Supported AWS regions: eu-north-1, ap-south-1, eu-west-3, us-east-2, eu-west-1, eu-central-1, sa-east-1, ap-east-1, us-east-1, ap-northeast-2, eu-west-2, ap-northeast-1, us-west-2, us-west-1, ap-southeast-1, ap-southeast-2, ca-central-1
+
3. Click on ``Stacks`` on the left hand side panel, and click on ``Create stack`` dropdown. Select ``With new resources (standard)`` to start creating a stack
.. image:: resources/create_stack_start.png
@@ -82,26 +38,7 @@ Deploy resources using the Cloud Formation template
Give a name to the stack in ``Stack name``.
- Fill in the following parameter values as they require user input:
-
- **BucketName:** The name of S3 bucket that you want to create.
- (required to change as the current value populated may not be
- valid). Follow these
- `rules `__
- for naming a bucket. Constraint of the bucket name by AWS is that
- the bucket name must be globally unique. So note that your cloud
- formation stack may fail if the name provided is already taken. You
- can see the failure reasons by clicking on the stack that was
- created and clicking on the ``Events`` tab.
-
- **TechniqueModelARN:** The ARN of the subscribed model package for
- ATTACK Technique detector. Use version 0.4 Product ARN for the region in which CloudFormation stack is created.
-
- **ClusterModelARN:** The ARN of the subscribed model package for
- Temporal Clustering. Use version 0.6 Product ARN for the region in which CloudFormation stack is created.
-
- **FlowModelARN:** The ARN of the subscribed model package for MITRE
- flow detector. Use version 0.6 Product ARN for the region in which CloudFormation stack is created.
+ All parameter values are pre-filled for quick user experience. Some of the parameters are:
**SuperuserEmail:** The email for admin user for UI
@@ -109,12 +46,6 @@ Deploy resources using the Cloud Formation template
**SuperuserPassword:** The password of the admin user for UI
- **WebContainerImage:** The container image of the subscribed marketplace UI product with tag ``market*``. The ``Web container image`` noted in the section :doc:`subscribe`.
-
- **NginxContainerImage:** The container image of the subscribed marketplace UI product with tag ``nginx-market*``. The ``Nginx container image`` noted in the section :doc:`subscribe`.
-
- **VRLLambdaImage:** The container image of the VRL Lambda that was pushed to ECR private repository in :ref:`setup_lambda_repository`
-
The constraints for choosing the ``Cpu`` and ``Memory`` for the cluster can be found `here `__
Recommended value for parameter **ChunkSize** is below ``100000``.
@@ -128,7 +59,11 @@ Deploy resources using the Cloud Formation template
failure options``, select ``Roll back all stack resources`` for
``Behaviour on provisioning failure``. Select ``Delete all newly
created resources`` for ``Delete newly created resources during a
- rollback``. And then click on ``Next``.
+ rollback``. Expand the options for ``Stack creation options - optional`` and under ``Timeout``, enter ``20`` to set a max timeout of 20 minutes for the stack. And then click on ``Next``.
+
+ .. image:: resources/stack_timeout.png
+ :alt: stack timeout
+ :align: center
8. Now in the ``Review and create`` page, you can review your parameters.
At the bottom of the page, select all checkboxes for ``I
@@ -138,10 +73,35 @@ Deploy resources using the Cloud Formation template
9. You can monitor the events of the cloud stack by clicking on the
recently created cloud stack and going to the ``Events`` tab.
+ .. note::
+ **Resource Creation Time:** The cloud stack will take approximately 15 minutes to complete the creation of all the resources.
+
10. Once the cloud stack is completed successfully. You can start using
- the products.
+ the products. Click on the ``Outputs`` tab for the recently created cloud
+ stack and note down the load balancer URL for the UI under ``CypientaUI``.
+ Load balancer for the airflow will be under ``CypientaAirflow``.
+ Bucket name for the S3 bucket will be under ``CypientaBucket``.
+ Click on the link to open the UI.
+
+ .. image:: resources/template_output.png
+ :alt: lb url
+ :align: center
+
+ .. note::
+ The default credentials for Cypienta UI: Default ``Username`` is ``cypienta`` and the default ``Password`` is ``cypienta``
+
+ The default credentials for Cypienta Airflow: Default ``Username`` is ``cypienta`` and the default ``Password`` is ``cypienta``
Now all your resources are ready to be used.
-You may now go to the step :doc:`end_to_end_test` to start testing
-your application.
+
+Handling Multiple Inputs
+-------------------------
+
+The pipeline will process files in the input folder in a batch.
+The files will be processed at a scheduled time which can be setup in Cypienta UI. Once a file is finished processing the
+pipeline will start with the next batch of files in the queue automatically.
+
+.. note::
+
+ **Handling Large Input Files:** Currently the pipeline can handle upto 100,000 events in single input file. Be mindful of the input file that is used as input.
diff --git a/docs/source/getting_started/end_to_end_test.rst b/docs/source/getting_started/end_to_end_test.rst
index dc003a2..58ed0cf 100644
--- a/docs/source/getting_started/end_to_end_test.rst
+++ b/docs/source/getting_started/end_to_end_test.rst
@@ -1,8 +1,8 @@
-End to End Test
+End to End Test (non-CEF input format)
==================================================
How to test end-to-end
---------------------------
+----------------------
1. Navigate to the AWS console and search for ``S3``. Select the S3 bucket
that you created, and click on ``Create folder``. Set the name of the folder as ``input`` and create the folder.
@@ -72,34 +72,21 @@ How to test end-to-end
All fields are required unless mentioned otherwise.
-3. Upload input json file to the s3 bucket in path: ``s3://{bucket-name}/input/``. The name of the input file does not matter to the end-to-end flow. Note that if you upload a file with the same name, it will be overwritten in S3 bucket.
+3. Update the environment variables for the lambda functions: ``enrich_with_technique`` and ``process_flow``, and update variable ``map_cef_to_internal`` to value set as ``false``.
- 1. Once you upload the input file. Lets say ``input.json``. Then the control flow will be as follows:
+ .. note::
+ To access the environment variable for lambda functions, navigate to the AWS console and search for ``Lambda``. Select the lambda function that you want to edit. Click on the ``Configuration`` tab and select ``Environment variables``
+ from the left panel under ``Configuration``. Click on ``Edit`` button to open the edit page and update the pertinent values.
- - Enrich_with_technique: lambda function
- - Transform-job-tech-{unique-id}: Batch transform job
+ .. note::
+ If the node_feature are already in encoded format, skip the encoding of node features by updating environment variable to ``enrich_with_technique``. Update the value for ``encode_node_feature`` to ``false``.
- - Reads input from: ``{bucket}/intermediate/{unique-id}/input_classification.json``
- - Output to: ``{bucket}/response/classification_out/{unique-id}/input_classification.json.out``
+4. Upload input json file to the s3 bucket in path: ``s3://{bucket-name}/input/``. The name of the input file does not matter to the end-to-end flow. Note that if you upload a file with the same name, it will be overwritten in S3 bucket.
- - Process_enriched_with_technique: lambda function
- - Create_cluster: lambda function
- - Transform-job-cluster-{unique-id}: Batch transform job
-
- - Reads input from: ``{bucket}/output/classification/{unique-id}/input.zip``
- - Output to: ``{bucket}/response/cluster_out/{unique-id}/input.zip.out``
-
- - Process_cluster: lambda function
- - Create_flow: lambda function
- - Transform-job-flow-{unique-id}: Batch transform job
-
- - Reads input from: ``{bucket}/output/cluster/{unique-id}/input_flow.json``
- - Output to: ``{bucket}/response/flow_out/{unique-id}/input_flow.json.out``
-
- - Process_flow: lambda function
+ 1. Once you upload the input file. You can use the AWS step function to monitor the flow of your input.
2. You can use the Amazon SageMaker console and navigate to Inference → Batch transform jobs, to view the created jobs for your input.
3. You can monitor the progress on CloudWatch logs for each lambda function and transform job created.
-4. Wait for a complete output to show up on the S3 bucket. ``s3://alert-detector/output/flow/{unique-id}/``
\ No newline at end of file
+5. Final output will be put on the S3 bucket with prefix ``s3://alert-detector/output/``
\ No newline at end of file
diff --git a/docs/source/getting_started/end_to_end_test_cef.rst b/docs/source/getting_started/end_to_end_test_cef.rst
new file mode 100644
index 0000000..695852b
--- /dev/null
+++ b/docs/source/getting_started/end_to_end_test_cef.rst
@@ -0,0 +1,56 @@
+End to End Test (CEF input format)
+==================================================
+
+How to test end-to-end
+----------------------
+
+1. Navigate to the AWS console and search for ``S3``. Select the S3 bucket
+ that you created and navigate to the folder ``mapping/input/cypienta_cef/``.
+
+ .. note::
+ The folder structure should be as follows:
+ ``s3://{bucket-name}/mapping/input/cypienta_cef/``
+
+2. Sample input json file:
+
+ .. code-block:: JSON
+
+ [
+ {
+ "_cd": "318:6",
+ "_eventType": "tech",
+ "_time": "1568916650",
+ "id": "318:6",
+ "sourceAddressIPv6": "10.0.0.4",
+ "destinationAddress": "10.0.0.4",
+ "description": "initial execution of malicious document calls wmic to execute the file with regsvr32"
+ ...
+ }, ...
+ ]
+
+ View the `sample input file `__ for your reference
+
+ Input data JSON description:
+
+ .. code-block:: JSON
+
+ [
+ {
+ "_cd": "318:6", // id for Event in splunk - optional
+ "_eventType": "tech", // event type - optional
+ "_time": "1568916650", // time from splunk
+ "id": "318:6", // internal id for Event
+ "sourceAddressIPv6": "10.0.0.4", // source address
+ "destinationAddress": "10.0.0.4", // destination address
+ "description": "initial execution of malicious document calls wmic to execute the file with regsvr32" // description of the event
+ ... // other cef fields
+ }, ...
+ ]
+
+ All fields are required unless mentioned otherwise. If the value for the field is not present, keep empty string as value.
+
+3. Upload input json file to the s3 bucket in path: ``s3://{bucket-name}/mapping/input/cypienta_cef/``. The name of the input file does not matter to the end-to-end flow. Note that if you upload a file with the same name, it will be overwritten in S3 bucket.
+
+ 1. Once you upload the input file. You can use the Airflow to monitor the flow of your input.
+
+4. Final output will be put on the S3 bucket with prefix ``s3://{bucket-name}/output/``
\ No newline at end of file
diff --git a/docs/source/getting_started/prerequisites.rst b/docs/source/getting_started/prerequisites.rst
index adfe4a0..825a88e 100644
--- a/docs/source/getting_started/prerequisites.rst
+++ b/docs/source/getting_started/prerequisites.rst
@@ -5,15 +5,20 @@ Permissions
-----------
Make sure that you have the required permissions for resources for the IAM user you will be using.
-- SageMaker
-- Lambda
- S3
- ECS
- EC2
- ECR
- IAM
- CloudFormation
+- Lambda
+
+To confirm you have the required permssion for the resources necessary to run the
+pipeline you can check that with the following script. To run the script the iam user must have ``iam:SimulatePrincipalPolicy`` policy.
+
+.. code-block:: console
+ $ wget -O- https://raw.githubusercontent.com/cypienta/AWS/v0.9.2/check_permissions.py | python
Quotas
------
@@ -21,24 +26,16 @@ Quotas
Instance types
~~~~~~~~~~~~~~
-Verify your instance type quotas by going to the AWS console. Search for ``Service Quotas``, and select SageMaker from the AWS Services list. Search for ``transform job usage``. You will require a GPU instance type for ``ATTACK Technique Detector`` and ``Temporal Clustering``, so look at the supported and recommended instance types for the product before subscribing and request for an increase of quota if found to be less than 1. The recommended GPU instance types are p2 and p3. The ``MITRE ATTACK Flow Detector`` requires a CPU-based instance type such as c5.
+Verify your instance type quotas by going to the AWS console. Search for ``Service Quotas``, and select ``Amazon Elastic Compute Cloud (Amazon EC2)`` from the AWS Services list. Search for ``Running On-Demand G and VT instances`` or ``Running On-Demand P instances``. You will require a GPU instance type for ``ATTACK Technique Detector`` and ``Temporal Clustering``, so look at the supported and recommended instance types for the product before subscribing and request for an increase of quota if found to be less than 1. The recommended GPU instance types are g4dn. The ``MITRE ATTACK Flow Detector`` requires a CPU-based instance type such as c5.
.. note::
Example:
- Given the target region, go to service quotas or visit https://us-east-2.console.aws.amazon.com/servicequotas/home/services/sagemaker/quotas
- - Search and select "ml.p2.xlarge for transform job usage" or visit https://us-east-2.console.aws.amazon.com/servicequotas/home/services/sagemaker/quotas/L-89843D09
- - If the applied account-level quota value is less than 1, request an increase to at least 1.
- - Search and select "ml.p3.2xlarge for transform job usage" or visit https://us-east-2.console.aws.amazon.com/servicequotas/home/services/sagemaker/quotas/L-45F58E7E
- - If the applied account-level quota value is less than 1, request an increase to at least 1.
- - Search and select "ml.c5.4xlarge for transform job usage" or visit https://us-east-2.console.aws.amazon.com/servicequotas/home/services/sagemaker/quotas/L-89843D09
- - If the applied account-level quota value is less than 1, request an increase to at least 1.
-
-.. note::
- To check for the supported and recommended instance type. On the AWS marketplace model product page, scroll down to the ``Pricing`` section and click on ``Model Batch Transform`` under ``Software Pricing``.
+ - Search and select "Running On-Demand G and VT instances" or visit https://us-east-2.console.aws.amazon.com/servicequotas/home/services/ec2/quotas/L-DB2E81BA
+ - If the applied account-level quota value is less than 4, request an increase to at least 4.
-Lambda concurrent executions
+VPC and Internet Gateways
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Verify that quota limit for ``Concurrent executions`` for AWS Lambda function. On your AWS console for the region where you want to deploy your resources, Search for ``Service Quotas``, and select ``AWS Lambda`` from the AWS Services list. Search for quota name ``Concurrent executions``. Make sure that the applied account-level quota value is more than 12 to allow reserved concurrency for the enrich_with_technique, update_lookup_table lambda function. If the value is not greater than 10, select the ``Concurrent executions`` and click on ``Request increase at account level`` and set to any value greater than 10.
-
+Verify that there are enough quota limit for creating 1 VPC, 1 Internet Gateway, 2 public subnets for 1 template deployment. On your AWS console for the region where you want to deploy your resources, Search for ``Service Quotas``, and select ``Amazon Virtual Private Cloud (Amazon VPC)`` from the AWS Services list. Search for relevant quota names and make sure that the applied account-level quota value is as desired. There should be enough quota to create at least 1 VPC, 1 Internet Gateway, and 2 public subnets.
diff --git a/docs/source/getting_started/resources/cluster_confirm.png b/docs/source/getting_started/resources/cluster_confirm.png
new file mode 100644
index 0000000..774325e
Binary files /dev/null and b/docs/source/getting_started/resources/cluster_confirm.png differ
diff --git a/docs/source/getting_started/resources/cluster_container_images.png b/docs/source/getting_started/resources/cluster_container_images.png
new file mode 100644
index 0000000..ea24f9e
Binary files /dev/null and b/docs/source/getting_started/resources/cluster_container_images.png differ
diff --git a/docs/source/getting_started/resources/cluster_subscribe.png b/docs/source/getting_started/resources/cluster_subscribe.png
new file mode 100644
index 0000000..f7614c8
Binary files /dev/null and b/docs/source/getting_started/resources/cluster_subscribe.png differ
diff --git a/docs/source/getting_started/resources/cluster_to_launch.png b/docs/source/getting_started/resources/cluster_to_launch.png
new file mode 100644
index 0000000..2897daf
Binary files /dev/null and b/docs/source/getting_started/resources/cluster_to_launch.png differ
diff --git a/docs/source/getting_started/resources/embed_confirm.png b/docs/source/getting_started/resources/embed_confirm.png
new file mode 100644
index 0000000..c049321
Binary files /dev/null and b/docs/source/getting_started/resources/embed_confirm.png differ
diff --git a/docs/source/getting_started/resources/embed_container_images.png b/docs/source/getting_started/resources/embed_container_images.png
new file mode 100644
index 0000000..63aac0f
Binary files /dev/null and b/docs/source/getting_started/resources/embed_container_images.png differ
diff --git a/docs/source/getting_started/resources/embed_subscribe.png b/docs/source/getting_started/resources/embed_subscribe.png
new file mode 100644
index 0000000..be11d8b
Binary files /dev/null and b/docs/source/getting_started/resources/embed_subscribe.png differ
diff --git a/docs/source/getting_started/resources/embed_to_launch.png b/docs/source/getting_started/resources/embed_to_launch.png
new file mode 100644
index 0000000..e361a3b
Binary files /dev/null and b/docs/source/getting_started/resources/embed_to_launch.png differ
diff --git a/docs/source/getting_started/resources/failed_dag_task.png b/docs/source/getting_started/resources/failed_dag_task.png
new file mode 100644
index 0000000..ce0c096
Binary files /dev/null and b/docs/source/getting_started/resources/failed_dag_task.png differ
diff --git a/docs/source/getting_started/resources/failed_task_clear.png b/docs/source/getting_started/resources/failed_task_clear.png
new file mode 100644
index 0000000..333bdd2
Binary files /dev/null and b/docs/source/getting_started/resources/failed_task_clear.png differ
diff --git a/docs/source/getting_started/resources/failed_task_clear_task.png b/docs/source/getting_started/resources/failed_task_clear_task.png
new file mode 100644
index 0000000..7df2dbf
Binary files /dev/null and b/docs/source/getting_started/resources/failed_task_clear_task.png differ
diff --git a/docs/source/getting_started/resources/flow_confirm.png b/docs/source/getting_started/resources/flow_confirm.png
new file mode 100644
index 0000000..cd345ad
Binary files /dev/null and b/docs/source/getting_started/resources/flow_confirm.png differ
diff --git a/docs/source/getting_started/resources/flow_container_images.png b/docs/source/getting_started/resources/flow_container_images.png
new file mode 100644
index 0000000..1f08cce
Binary files /dev/null and b/docs/source/getting_started/resources/flow_container_images.png differ
diff --git a/docs/source/getting_started/resources/flow_subscribe.png b/docs/source/getting_started/resources/flow_subscribe.png
new file mode 100644
index 0000000..406148c
Binary files /dev/null and b/docs/source/getting_started/resources/flow_subscribe.png differ
diff --git a/docs/source/getting_started/resources/flow_to_launch.png b/docs/source/getting_started/resources/flow_to_launch.png
new file mode 100644
index 0000000..6fac3b6
Binary files /dev/null and b/docs/source/getting_started/resources/flow_to_launch.png differ
diff --git a/docs/source/getting_started/resources/lb_url.png b/docs/source/getting_started/resources/lb_url.png
new file mode 100644
index 0000000..06ea6cc
Binary files /dev/null and b/docs/source/getting_started/resources/lb_url.png differ
diff --git a/docs/source/getting_started/resources/model_arn_cluster.png b/docs/source/getting_started/resources/model_arn_cluster.png
deleted file mode 100644
index 399a68b..0000000
Binary files a/docs/source/getting_started/resources/model_arn_cluster.png and /dev/null differ
diff --git a/docs/source/getting_started/resources/model_arn_flow.png b/docs/source/getting_started/resources/model_arn_flow.png
deleted file mode 100644
index d53e22e..0000000
Binary files a/docs/source/getting_started/resources/model_arn_flow.png and /dev/null differ
diff --git a/docs/source/getting_started/resources/model_arn_tech.png b/docs/source/getting_started/resources/model_arn_tech.png
deleted file mode 100644
index a014b19..0000000
Binary files a/docs/source/getting_started/resources/model_arn_tech.png and /dev/null differ
diff --git a/docs/source/getting_started/resources/pipeline_confirm.png b/docs/source/getting_started/resources/pipeline_confirm.png
new file mode 100644
index 0000000..c7145af
Binary files /dev/null and b/docs/source/getting_started/resources/pipeline_confirm.png differ
diff --git a/docs/source/getting_started/resources/pipeline_container_images.png b/docs/source/getting_started/resources/pipeline_container_images.png
new file mode 100644
index 0000000..e27de65
Binary files /dev/null and b/docs/source/getting_started/resources/pipeline_container_images.png differ
diff --git a/docs/source/getting_started/resources/pipeline_subscribe.png b/docs/source/getting_started/resources/pipeline_subscribe.png
new file mode 100644
index 0000000..4673bba
Binary files /dev/null and b/docs/source/getting_started/resources/pipeline_subscribe.png differ
diff --git a/docs/source/getting_started/resources/pipeline_to_launch.png b/docs/source/getting_started/resources/pipeline_to_launch.png
new file mode 100644
index 0000000..df56ec4
Binary files /dev/null and b/docs/source/getting_started/resources/pipeline_to_launch.png differ
diff --git a/docs/source/getting_started/resources/stack_timeout.png b/docs/source/getting_started/resources/stack_timeout.png
new file mode 100644
index 0000000..1b06fd7
Binary files /dev/null and b/docs/source/getting_started/resources/stack_timeout.png differ
diff --git a/docs/source/getting_started/resources/subscribe_to_technique_detector.png b/docs/source/getting_started/resources/subscribe_to_technique_detector.png
deleted file mode 100644
index e2dbb5d..0000000
Binary files a/docs/source/getting_started/resources/subscribe_to_technique_detector.png and /dev/null differ
diff --git a/docs/source/getting_started/resources/technique_confirm.png b/docs/source/getting_started/resources/technique_confirm.png
new file mode 100644
index 0000000..455cffb
Binary files /dev/null and b/docs/source/getting_started/resources/technique_confirm.png differ
diff --git a/docs/source/getting_started/resources/technique_container_images.png b/docs/source/getting_started/resources/technique_container_images.png
new file mode 100644
index 0000000..f4b2cd3
Binary files /dev/null and b/docs/source/getting_started/resources/technique_container_images.png differ
diff --git a/docs/source/getting_started/resources/technique_subscribe.png b/docs/source/getting_started/resources/technique_subscribe.png
new file mode 100644
index 0000000..972401b
Binary files /dev/null and b/docs/source/getting_started/resources/technique_subscribe.png differ
diff --git a/docs/source/getting_started/resources/technique_to_launch.png b/docs/source/getting_started/resources/technique_to_launch.png
new file mode 100644
index 0000000..8c1286f
Binary files /dev/null and b/docs/source/getting_started/resources/technique_to_launch.png differ
diff --git a/docs/source/getting_started/resources/template_output.png b/docs/source/getting_started/resources/template_output.png
new file mode 100644
index 0000000..cab2674
Binary files /dev/null and b/docs/source/getting_started/resources/template_output.png differ
diff --git a/docs/source/getting_started/subscription.rst b/docs/source/getting_started/subscription.rst
index a2757c9..ce7d02c 100644
--- a/docs/source/getting_started/subscription.rst
+++ b/docs/source/getting_started/subscription.rst
@@ -1,122 +1,74 @@
Subscribing to Cypienta products on AWS Marketplace
===================================================
-ATTACK Technique Detector
+Cypienta Correlation Pipeline
-------------------------
-1. Use the `link `_ to explore the marketplace model packages in AWS. Search for ``Cypienta ATTACK Technique Detector``
+1. Subscribe to the `Cypienta Correlation Pipeline `__ by clicking on ``View purchase options`` button.
- Click on ``Continue to Subscribe``.
-
- .. image:: resources/subscribe_to_technique_detector.png
- :alt: Subscribe to technique detector
- :align: center
-
-2. Click on the ``Accept offer`` button on the next page.
-
- .. image:: resources/accept_offer.png
- :alt: Subscribe to technique detector
- :align: center
-
-3. Click on ``Continue to configuration``. In the section ``Select your launch method``, select ``AWS CloudFormation``. Select the ``Software Version`` as ``0.4`` from the drop down. Select the ``Region`` in which you would want to deploy Cypienta products. Copy and make note of the ``Product Arn``.
-
- .. image:: resources/model_arn_tech.png
- :alt: Subscribe to technique detector
- :align: center
-
-
-Temporal Clustering
--------------------
-
-1. Use the `link `_ to explore the marketplace model packages in AWS. Search for ``Cypienta Temporal Clustering``
-
- Click on ``Continue to Subscribe``.
-
- .. image:: resources/subscribe_to_temporal_clustering.png
- :alt: Subscribe to temporal clustering
+ .. image:: resources/pipeline_subscribe.png
+ :alt: UI product subscribe
:align: center
-2. Click on the ``Accept offer`` button on the next page.
-
- .. image:: resources/accept_offer.png
- :alt: Subscribe to technique detector
- :align: center
+2. On the next page, click on ``Accept terms`` button to agree with the terms.
-3. Click on ``Continue to configuration``. In the section ``Select your launch method``, select ``AWS CloudFormation``. Select the ``Software Version`` as ``0.6`` from the drop down. Select the ``Region`` in which you would want to deploy Cypienta products. Copy and make note of the ``Product Arn``.
+3. Wait for the subscription confirmation page to appear. Once you see the ``Continue to Configuration`` button, you are now subscribed to the product and can move to the :doc:`deploy` step.
- .. image:: resources/model_arn_cluster.png
- :alt: Subscribe to flow detector
+ .. image:: resources/pipeline_confirm.png
+ :alt: confirm subscribe
:align: center
-MITRE ATTACK Flow Detector
--------------------
+.. Optional steps
+.. ~~~~~~~~~~~~~~
-1. Use the `link `_ to explore the marketplace model packages in AWS. Search for ``Cypienta MITRE ATTACK Flow Detector``
+.. If you want to deploy the pipeline manually, follow the steps below to get the list of container images available in the product offering.
- Click on ``Continue to Subscribe``.
+.. 1. Wait for the subscription confirmation page to appear. Then click on ``Continue to Configuration``.
- .. image:: resources/subscribe_to_flow_detector.png
- :alt: Subscribe to technique detector
- :align: center
-
-2. Click on the ``Accept offer`` button on the next page.
+.. .. image:: resources/pipeline_confirm.png
+.. :alt: confirm subscribe
+.. :align: center
- .. image:: resources/accept_offer.png
- :alt: Subscribe to technique detector
- :align: center
+.. 2. Select the ``Fulfillment option`` as ``ECS``. Select the ``Software version`` as ``v0.9``. Then click on ``Continue to Launch``
-3. Click on ``Continue to configuration``. In the section ``Select your launch method``, select ``AWS CloudFormation``. Select the ``Software Version`` as ``0.6`` from the drop down. Select the ``Region`` in which you would want to deploy Cypienta products. Copy and make note of the ``Product Arn``.
-
- .. image:: resources/model_arn_flow.png
- :alt: Subscribe to technique detector
- :align: center
+.. .. image:: resources/pipeline_to_launch.png
+.. :alt: to launch
+.. :align: center
+.. 3. Click on the Copy button in the ``Container images`` section and make note of the ``CONTAINER_IMAGES``
-Cypienta User Interface (UI)
-----------------------------
-
-1. Subscribe to the `Cypienta User Interface (UI) `__ by clicking on ``Continue to Subscribe`` button.
-
- .. image:: resources/ui_product.png
- :alt: UI product subscribe
- :align: center
+.. .. image:: resources/pipeline_container_images.png
+.. :alt: container images
+.. :align: center
-2. On the next page, click on ``Accept terms`` button to agree with the terms.
+.. Make note of the ``CONTAINER_IMAGES`` from the copied snippet:
-3. Wait for the subscription confirmation page to appear. Then click on ``Continue to Configuration``.
+.. .. code-block::
+
+.. aws ecr get-login-password \
+.. --region us-east-1 | docker login \
+.. --username AWS \
+.. --password-stdin 709825985650.dkr.ecr.us-east-1.amazonaws.com
+
+.. CONTAINER_IMAGES="709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-cluster-part-1:v0.9.1,709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-cluster-part-2:v0.9.1,709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-ui-nginx:v0.9.1,709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-flow-detector:v0.9.1,709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-lambda-function:v0.9.1,709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-technique-detector:v0.9.1,709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-airflow:v0.9.1,709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-ui:v0.9.1"
- .. image:: resources/confirm_subscribe.png
- :alt: confirm subscribe
- :align: center
+.. for i in $(echo $CONTAINER_IMAGES | sed "s/,/ /g"); do docker pull $i; done
-4. Select the ``Fulfillment option`` as ``ECS``. Select the ``Software version`` as ``v0.1.2``. Then click on ``Continue to Launch``
+.. Here the model container images are:
- .. image:: resources/to_launch.png
- :alt: to launch
- :align: center
+.. - **Technique container image:** ``709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-technique-detector:v0.9.1.1``
-5. Click on the Copy button in the ``Container images`` section and make note of the ``CONTAINER_IMAGES``
+.. - **Cluster part 1 container image:** ``709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-cluster-part-1:v0.9.1.1``
- .. image:: resources/container_images.png
- :alt: container images
- :align: center
+.. - **Cluster part 2 container image:** ``709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-cluster-part-2:v0.9.1.1``
- Make note of the ``CONTAINER_IMAGES`` from the copied snippet:
+.. - **Flow detector container image:** ``709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-flow-detector:v0.9.1.1``
- .. code-block::
-
- aws ecr get-login-password \
- --region us-east-1 | docker login \
- --username AWS \
- --password-stdin 709825985650.dkr.ecr.us-east-1.amazonaws.com
-
- CONTAINER_IMAGES="709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/cytech:nginx-marketv0.0.3,709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/cytech:marketv0.1.2"
+.. - **Lambda function container image:** ``709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-lambda-function:v0.9.1.1``
- for i in $(echo $CONTAINER_IMAGES | sed "s/,/ /g"); do docker pull $i; done
+.. - **Airflow container image:** ``709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-airflow:v0.9.1.1``
- Here the two images are:
+.. - **UI container image:** ``709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-ui:v0.9.1.1``
- - **Web container image:** 709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/cytech:marketv0.1.2
-
- - **Nginx container image:** 709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/cytech:nginx-marketv0.0.3
+.. - **UI-nginx container image:** ``709825985650.dkr.ecr.us-east-1.amazonaws.com/cypienta/pipeline-ui-nginx:v0.9.1.1``
diff --git a/docs/source/getting_started/troubleshoot.rst b/docs/source/getting_started/troubleshoot.rst
index dd760d3..fc3841a 100644
--- a/docs/source/getting_started/troubleshoot.rst
+++ b/docs/source/getting_started/troubleshoot.rst
@@ -1,6 +1,7 @@
Troubleshoot
========
+
ERROR: ResourceLimitExceeded for batch transform job instance type.
-------------------------------------------------------------------
@@ -22,3 +23,170 @@ AWS console for the region you are using. Search for ``Service Quotas``
3. Select the required instance type from the list and click on ``Request
increase at account level``.
+
+
+How to delete stack
+-------------------
+
+1. Navigate to the AWS console and search for ``CloudWatch``. Make sure you are in the same region in which you created CloudFormation stack.
+
+2. On the left hand side panel, under ``Logs``, click on ``Log groups``. Select all the check boxes for the ``Log groups`` that were created by the CloudFormation stack, click on ``Actions`` dropdown and click on ``Delete log group(s)``, and then click on ``Delete`` button.
+
+3. Next, search for ``S3`` in the AWS console search bar.
+
+4. Select the bucket that was created from the CloudFormation stack and click on ``Empty``. Type in ``permanently delete`` in the confirmation box and click on ``Empty``.
+
+5. Now search for ``CloudFormation`` in the the AWS console search bar.
+
+6. Open the stack that you want to delete and click on ``Delete``. Wait for the entire stack to be deleted before you move on to creating new stack.
+
+ .. note::
+ If there are any failures in deleting the stack, then ``Retry delete``.
+
+ To speed up delete for stack, follow the optional steps below:
+
+ 1. Navigate to AWS console and search for ``ECS`` and select ``Elastic Container Service``.
+
+ 2. Click on the ECS cluster deployed from the stack. Select all the service from the ``Services`` tab and click on ``Delete service``. Check the box for ``Force delete`` and type in ``delete`` in the confirmation box and then click on ``Delete``.
+
+ 3. Navigate to AWS console and search for ``EC2``.
+
+ 4. Manually reduce the desired capacity of the Auto Scaling Groups with name ``-*``. Select each auto scaling group and select ``Actions`` dropdown and select ``Edit``. Reduce the ``Desired capacity`` to ``0`` and reduce the ``Min desired capacity`` to ``0``. Click on ``Update``.
+
+ 5. Manually delete the running EC2 instance with name ``* - ``. Select all the pertinent instances, click on the ``Instance state`` dropdown and click on ``Terminate instance``.
+
+7. In the EC2 AWS Service, navigate to ``Volumes`` under ``Elastic Block Storage`` and select all the volumes with name ``_*``. And click on ``Actions`` and then click on ``Delete volume``.
+
+
+Common Mistakes
+----------------
+
+Some of the common errors that can result in the failure of the CloudFormation stack:
+
+- Duplicate S3 bucket name
+- Incorrect arn for Models/UI
+- Incorrect Image for VRL Lambda
+
+
+Duplicate S3 bucket name
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+In the case of a duplicate S3 bucket name, delete the failed CloudFormation stack,
+then choose a new globally unique S3 bucket name and recreate the stack.
+
+
+Incorrect arn for Models/UI
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+In the case of an incorrect arn for models/UI, delete the failed CloudFormation stack,
+then confirm the arns for all models and UI components as seen in :doc:`subscription` and recreate the stack.
+
+
+Incorrect Image for VRL Lambda
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+In the case of an incorrect image for VRL lambda, delete the failed CloudFormation stack,
+then ensure that you have the correct ECR Image URI and version number, and recreate the stack.
+
+
+Common errors
+-------------
+
+
+CapacityError: Unable to provision requested ML compute capacity. Please retry using a different ML instance type.
+~~~~~~~~~~~~~~~
+
+If the SageMaker batch transform job fails for ``transform-job-cluster-*`` with the error
+``CapacityError: Unable to provision requested ML compute capacity. Please retry using a different ML instance type.``
+the batch transform job can be retriggered manually. Follow the steps below to retrigger:
+
+1. Open the lambda function ``create_cluster``.
+
+2. Click on the ``Configuration`` tab, then click on ``Environment variables``.
+Click on ``Edit`` button, and click on ``Add environment variable``. Under the ``Key`` text field enter ``batch_transform_job_suffix``, under ``Value`` text field enter any unique value. Limit the text value to length of 3. For example, ``1``. And, click on ``Save`` button.
+
+3. Open the S3 bucket created by the CloudFormation stack. Navigate to ``scratch/output/classification//``.
+
+4. Select the ``input.json``, click on ``Actions``, click on ``Copy``. On the Copy page, click on ``Browse S3``, click on ``Choose destination``, and then click on ``Copy``.
+
+5. This will trigger a new batch transform job.
+
+If the SageMaker batch transform job fails for ``transform-job-flow-*`` with the error
+``CapacityError: Unable to provision requested ML compute capacity. Please retry using a different ML instance type.``
+the batch transform job can be retriggered manually. Follow the steps below to retrigger:
+
+1. Open the lambda function ``create_flow``.
+
+2. Click on the ``Configuration`` tab, then click on ``Environment variables``.
+Click on ``Edit`` button, and click on ``Add environment variable``. Under the ``Key`` text field enter ``batch_transform_job_suffix``, under ``Value`` text field enter any unique value. Limit the text value to length of 3. For example, ``1``. And, click on ``Save`` button.
+
+3. Open the S3 bucket created by the CloudFormation stack. Navigate to ``scratch/output/cluster//``.
+
+4. Select the ``input_flow.json``, click on ``Actions``, click on ``Copy``. On the Copy page, click on ``Browse S3``, click on ``Choose destination``, and then click on ``Copy``.
+
+5. This will trigger a new batch transform job.
+
+
+Airflow Task failure
+~~~~~~~~~~~~~~~~~~~
+
+If any DAG run in airflow has failed, then you may manually rerun the task. Follow the steps below to rerun the task:
+
+1. Login to the Airflow UI.
+
+2. Navigate to the DAG that has failed.
+
+3. On the left hand side panel which shows all the DAG runs, select the failed DAG task.
+
+ .. image:: resources/failed_dag_task.png
+ :alt: failed_dag_task
+ :align: center
+
+4. Click on the ``Clear task`` button on the top right corner of the page.
+
+ .. image:: resources/failed_task_clear_task.png
+ :alt: failed_task_clear_task
+ :align: center
+
+5. Click on the ``Clear`` button to clear the task.
+
+ .. image:: resources/failed_task_clear.png
+ :alt: failed_task_clear
+ :align: center
+
+
+S3 schema
+---------
+
+Folder structure
+~~~~~~~~~~~~~~~~
+
+The S3 bucket folder structure is as follows:
+
+.. code-block:: text
+
+ bucket/
+ ├── input/
+ │ └── cypienta_cef/
+ │ └── input.json
+ ├── output/
+ │ └── 2024-08-08 21:22:52 +0000/
+ | ├── cluster.json
+ | ├── event.json
+ │ └── flow.json
+ ├── scratch/
+ ├── splunk_input/
+ | ├── scratch/
+ | └── input/
+ ├── splunk/
+ └── README.rst
+
+**input/:** The input folder contains all the files that will be processed by the Cypienta pipeline. Once the file is created in this folder, the file is added to the queue to be processed in a step function execution. There will be one step function execution per file in the input folder in sequential order. The status of the current execution can be viewed on Airflow UI.
+
+**output/:** The output folder contains event, cluster, flow output for the input that was processed by the Cypienta pipeline.
+
+**scratch/:** The folder contains necessary files that are required for proper functioning of the Cypienta pipeline.
+
+**splunk_input/:** This folder is created once the connector from splunk to S3 is configured. If you do not see a file under the ``splunk_input/input/`` prefix path, then you connector has either not triggered yet or there was some error that occurred. Once the file in the ``splunk_input/input/`` folder is preprocessed to be used in the Cypienta pipeline, there will be a file with same name created under the ``input/`` folder. You may check the progress for the same in the logs for ``splunk_input`` lambda function. If the mapping for the data was not able to extract fields that Cypienta requires, then a ``splunk_input/scratch/transformed_failed/{input_file_name}/`` will be created.
+
+**splunk/:** This folder will contain the files with data that needs to be added to splunk. If you see the file in the folder, but not in splunk, check you connector settings. If you do not see the file at all, check the ``process_flow`` lambda function logs for any error.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index e1a48f6..7bcf037 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -3,50 +3,115 @@ Cypienta
Welcome to the official documentation for Cypienta. This guide will help you understand and utilize the powerful features of our software, ensuring a smooth and efficient experience from subscription to installation.
-In this documentation, you will find detailed instructions for:
-- **Prerequisites**: A list of prerequisites for installation and usage of the product
-- **Subscribing to Cypienta products on AWS Marketplace**: A step-by-step guide to help you subscribe to our offering on AWS Marketplace.
-- **AWS Deployment**: Detailed instructions to ensure a smooth deployment process to AWS.
-- **Sample Test**: An example of how to use the product with a sample test.
-- **Troubleshooting and Support**: Solutions to common issues and information on how to get further assistance.
+Getting Started
+---------------
-.. toctree::
- :maxdepth: 1
+:doc:`getting_started/prerequisites`: Prerequisites for deploying Cypienta on AWS.
+
+:doc:`getting_started/subscription`: Subscription details for Cypienta.
+
+:doc:`getting_started/deploy`: Deploy Cypienta on AWS.
+
+:doc:`deploy_ui/mapping`: Mapping custom data fields to Cypienta input format.
+
+:doc:`deploy_ui/airflow`: Airflow configuration and errors.
+
+:doc:`getting_started/troubleshoot`: Troubleshooting common issues.
+
+
+Cypienta UI
+---------------
+
+:doc:`deploy_ui/start_using`: Start using Cypienta UI.
+
+:doc:`deploy_ui/tune_config`: Tune Cluster Model Config.
+
+
+Splunk Integration
+---------------
+
+:doc:`splunk/splunk`: Splunk integrating with Cypienta to get data as input.
+
+:doc:`splunk/output`: Splunk integrating with Cypienta to get output on Splunk SOAR.
+
+
+Elastic Integration
+---------------
+
+:doc:`elastic/elastic`: Elastic integrating with Cypienta to get data as input.
+
+:doc:`elastic/output`: Elastic integrating with Cypienta to get output on Elastic.
+
+
+JIRA Integration
+---------------
+
+:doc:`jira/jira`: JIRA integrating with Cypienta to create tickets from output.
+
+
+Pipeline Overview
+---------------
+
+:doc:`dag/dags`: Overview of the pipeline.
+
+
+Open-Source Lambda Functions
+---------------
+
+:doc:`lambda/functions`: Overview of functioning of open-source lambda functions.
+
+
+Test with CEF input format
+---------------
+
+:doc:`getting_started/end_to_end_test_cef`: Test Cypienta with CEF input format.
+
+
+.. sidebar-links::
:caption: Getting Started
getting_started/prerequisites
getting_started/subscription
getting_started/deploy
+ deploy_ui/mapping
+ deploy_ui/airflow
getting_started/troubleshoot
-.. toctree::
- :maxdepth: 1
- :caption: Ex Integrations (SIEM, XDR, SOAR)
+.. sidebar-links::
+ :caption: Cypienta UI
+
+ deploy_ui/start_using
+ deploy_ui/tune_config
+
+.. sidebar-links::
+ :caption: Splunk Integration
splunk/splunk
- splunk/vrl
splunk/output
+
+.. sidebar-links::
+ :caption: Elastic Integration
+
elastic/elastic
- elastic/vrl
elastic/output
-.. toctree::
- :maxdepth: 1
- :caption: Cypienta UI
+.. sidebar-links::
+ :caption: JIRA Integration
- deploy_ui/start_using
+ jira/jira
+
+.. sidebar-links::
+ :caption: Pipeline Overview
-.. toctree::
- :maxdepth: 1
+ dag/dags
+
+.. sidebar-links::
:caption: Open-Source Lambda Functions
lambda/functions
+.. sidebar-links::
+ :caption: Test with CEF input format
-.. toctree::
- :maxdepth: 1
- :caption: Data Mapping & Transfromations
-
- vrl/transform
- vrl/ai
+ getting_started/end_to_end_test_cef
\ No newline at end of file
diff --git a/docs/source/jira/jira.rst b/docs/source/jira/jira.rst
new file mode 100644
index 0000000..a221f66
--- /dev/null
+++ b/docs/source/jira/jira.rst
@@ -0,0 +1,87 @@
+Configure JIRA
+=================
+
+Prerequisites
+-------------
+
+Make sure that you have deployed the Cypienta application detailed in :doc:`../getting_started/deploy` before integrating.
+
+Make sure that you note the JIRA API token, JIRA Username, JIRA URL, and JIRA Project key before setting up the lambda function.
+
+Setup JIRA connection
+-----------------------------------------------
+
+Follow the steps below to add a lambda function that will create the jira issue.
+
+Create lambda function
+~~~~~~~~~~~~~~~~~~~~~~
+
+1. Navigate to the AWS console in the same region where your Cypienta application is deployed. Search for Lambda in the search bar and click on ``Create function``
+
+2. Select ``Author from scratch``, write the ``Function name`` as ``create_jira`` with desired prefix and suffix. For ``Runtime`` select ``Python 3.11``, and select the ``Architecture`` as ``x86_64``. Expand the ``Change default execution role`` and select ``Use an existing role``. Click on ``Create function`` to create the function.
+
+ .. image:: resources/create_function.png
+ :alt: create function
+ :align: center
+
+3. Scroll to the bottom of the lambda function that was just created and find the section ``Layers``. Click on ``Add a layer`` to add a layer.
+
+ .. image:: resources/add_layer.png
+ :alt: add layer
+ :align: center
+
+4. Select ``AWS layers`` as ``Layer source``. Select ``AWSSDKPandas-Python311`` as ``AWS layers`` and select the version that is available in the dropdown. Click on ``Add`` to add the layer to the lambda function.
+
+ .. image:: resources/layer_config.png
+ :alt: layer config
+ :align: center
+
+5. Now select the ``Configuration`` tab on the lambda function overview page and select ``General configuration`` from the left hand side panel. Click on ``Edit`` button to modify the values.
+
+6. Edit the ``Memory`` field with max value that is available. Edit the ``Ephemeral storage`` field with max value that is available. Edit the ``Timeout`` with max value that is available. And then, click on ``Save``.
+
+ .. image:: resources/general_config.png
+ :alt: general config
+ :align: center
+
+7. Again on the ``Configuration`` tab on the lambda function overview page, select ``Triggers`` from the left hand side panel. Click on ``Add trigger`` button to add a trigger.
+
+8. For the trigger configuration, select the source as ``S3``. From the bucket dropdown, select the bucket that was created for the Cypienta application. Select ``All object create events`` for ``Event types``. Add Prefix as ``scratch/output/flow/`` and suffix as ``.json``. Check the box to acknowledge the message for recursive invocation and click on ``Add``.
+
+ .. image:: resources/trigger.png
+ :alt: add trigger
+ :align: center
+
+9. Back on the ``Configuration`` tab, select ``Environment variables`` from the right hand side panel. Click on ``Edit`` to add variables.
+
+10. To add environment vairables, click on ``Add environment variable`` button and enter the following key, value. Finally, click on ``Save``.
+
+ - Key: ``cluster_or_flow``
+ Value: ``cluster``
+
+ - Key: ``event_threshold``
+ Value: ``2``
+ Description: The minimum number of events that must be present in a cluster for which a JIRA issue is to be created.
+
+ - Key: ``jira_api_token``
+ Value: ````
+
+ - Key: ``jira_lookup_object``
+ Value: ``scratch/jira/issues.csv``
+
+ - Key: ``jira_project_key``
+ Value: ````
+
+ - Key: ``jira_url``
+ Value: ````
+ Sample value: ``https://cypienta-demo.atlassian.net``
+
+ - Key: ``jira_username``
+ Value: ````
+
+11. Get the lambda function `create_jira `__ and copy paste the code in the ``Code`` tab in the ``Code source`` section. Click on the ``Deploy`` button to save the lambda function.
+
+
+Now all the new clusters created will be pushed to the JIRA project that was configured in the environment vairables of the lambda function.
+
+Refer to :doc:`../splunk/splunk` for integrating JIRA with Splunk SOAR.
diff --git a/docs/source/jira/resources/add_layer.png b/docs/source/jira/resources/add_layer.png
new file mode 100644
index 0000000..c59326f
Binary files /dev/null and b/docs/source/jira/resources/add_layer.png differ
diff --git a/docs/source/jira/resources/create_function.png b/docs/source/jira/resources/create_function.png
new file mode 100644
index 0000000..e0bbb13
Binary files /dev/null and b/docs/source/jira/resources/create_function.png differ
diff --git a/docs/source/jira/resources/general_config.png b/docs/source/jira/resources/general_config.png
new file mode 100644
index 0000000..7debd00
Binary files /dev/null and b/docs/source/jira/resources/general_config.png differ
diff --git a/docs/source/jira/resources/layer_config.png b/docs/source/jira/resources/layer_config.png
new file mode 100644
index 0000000..08fd67d
Binary files /dev/null and b/docs/source/jira/resources/layer_config.png differ
diff --git a/docs/source/jira/resources/trigger.png b/docs/source/jira/resources/trigger.png
new file mode 100644
index 0000000..d8b5479
Binary files /dev/null and b/docs/source/jira/resources/trigger.png differ
diff --git a/docs/source/lambda/functions.rst b/docs/source/lambda/functions.rst
index 3043aaa..286f400 100644
--- a/docs/source/lambda/functions.rst
+++ b/docs/source/lambda/functions.rst
@@ -4,62 +4,117 @@ Overview of Lambda
Functionality of Lambda Functions
---------------------------------
-The fleet of lambda functions will be responsible for end-to-end flow for the Cypienta Sagemaker products.
+The fleet of lambda functions will be responsible for end-to-end flow for the Cypienta Correlation Pipeline.
-1. **enrich_with_technique:**
+#. **create_mapping:**
-- Get the input data from the ``input/`` S3 folder
-- Chunk the input, sanitize it in format as required for cluster model, encode node_features, encode other_attributes_dict, create mappings for internal ids to user given ids, mappings for chunk unique id to internal ids.
-- Enrich input with techniques. If the lookup table does not contain the specific technique, then start technique classification transform job per chunk
+ - Gets a file that was uploaded to Field mapping in the UI.
+ - Suggests the mapping for the file and responds to the API call.
-2. **process_enriched_with_technique:**
+#. **preprocess_input:**
-- Get response from technique transform job and enrich the input with recognized techniques
-- Create input for the clustering model by adding node features if present. And save the resulting file to S3
+ - Transforms uploaded file to json format. ``mapping/input//`` S3 folder.
+ - Map the uploaded file to internal format using corresponding saved field mappings.
+ - Save a raw input file to ``mapping/raw//`` S3 folder.
+ - Save the transformed input file to ``mapping/input//`` S3 folder.
-3. **update_lookup_table:**
+#. **skip_input:**
-- Update technique lookup table.
+ - Get the transformed input files from the ``input//`` S3 folder
+ - Check if an execution is running for pipeline. If execution is not running, start one. Else, add the current input files to queue.
-4. **create_cluster:**
+#. **enrich_with_technique:**
-- Read the input file saved to S3. If this is the first batch for the input file, then start clustering transform job. Else skip the file.
+ - Get the input data from the ``input//`` S3 folder
+ - Merge and sort the data from multiple datasources by time and batch the data
+ - encode node_features, encode other_attributes_dict, create mappings for internal ids to user given ids, internal ids to data sources.
+ - Sanitize it in format as required for cluster model.
+ - Enrich input with techniques. If the lookup table does not contain the specific technique, then start technique classification transform job per batch
-5. **process_cluster:**
+#. **start_tech_task:**
-- Read the response from clustering model.
-- Check if there is another batch that needs to run after the current response. If yes, then create input for the next batch, save to S3, and start clustering transform job. Else, extract agg_alert.json, cluster.json (for internal scratch) to S3, and create input for flow model and save to S3.
+ - Get the input and output s3 path for the technique classification model.
-6. **create_flow:**
+#. **process_enriched_with_technique:**
-- Triggered by input saved to s3 for flow model. Create flow transform job
+ - Get response from technique model and enrich the input with recognized techniques
+ - Create input for the clustering model by adding node features if present. And save the resulting file to S3
-7. **process_flow:**
+#. **update_lookup_table:**
-- Read response from the flow model. Save the flow_output.json to s3 (for internal scratch)
-- Clean up flow.json, cluster.json for user and save to ``output/`` folder.
-- Create enrich_alert_input.json and save to S3 (for internal scratch)
+ - Update technique lookup table.
-8. **create_campaign:**
+#. **create_embedding:**
-- Read enrich_alert_input.json and create campaigns on UI
+ - Read the input file saved to S3.
+ - Filter alerts that do not have any techniques.
+ - Filter batch file that do not have more than a threshold number of alerts.
+ - Skip processing current input any further if there are no batches left to process after filtering.
+ - Create sequential order of batches to be processed by the pipeline_part_2 DAG.
-9. **create_jira:**
+#. **start_embedding_task:**
-- Read enrich_alert_input.json
-- Read lookup for the JIRA issue to cluster id.
-- If the cluster id already has JIRA created, and the status is ``open`` / ``in progress`` / ``to do``, overwrite the description with new details. If the status is not ``open`` / ``in progress`` / ``to do``, then create new JIRA issue with updated summary and description
-- If the cluster id does not have JIRA created, then create JIRA issue with summary, description and attachment to subset of involved alerts
+ - Get the input and output s3 path for the cluster part 1 model.
-10. **create_case:**
+#. **process_embedding:**
-- Read enrich_alert_input.json
-- Read lookup for the Elastic case to cluster id.
-- If the cluster id already has case created, and the status is ``open`` / ``in progress``, overwrite the description with new details. If the status is not ``open`` / ``in progress``, then create new case with updated summary and description
-- If the cluster id does not have case created, then create case with summary, description.
+ - Get response from cluster part 1 model.
+ - Create input for the clustering model part 2 by adding node features if present. And save the resulting file to S3
-11. **save_feedback:**
+#. **create_cluster:**
-- Triggered by cut action performed on UI.
-- Fetch involved events and campaigns from UI and update weights for node and event attributes.
-- Create cluster ticket output for involved clusters, and save feedback.
+ - Read the input file saved to S3 for the current batch. Start clustering part 2 model.
+
+#. **process_cluster:**
+
+ - Read the response from clustering part 2 model.
+ - Check if there is another batch that needs to run after the current response.
+ - If yes, then create input for the next batch, save to S3.
+ - Else, extract agg_alert.json, cluster.json (for internal scratch) to S3, and create input for flow model and save to S3.
+
+.. #. **create_flow:**
+
+.. - Triggered by input saved to s3 for flow model. Create flow transform job
+
+#. **process_flow:**
+
+ - Read response from the flow model. Save the flow_output.json to s3 (for internal scratch)
+ - Clean up flow.json, cluster.json for user and save to ``output/`` folder.
+
+#. **create_campaign:**
+
+ - Create or update campaigns on UI.
+ - Delete older campaigns if threshold of number of campaigns on UI is reached.
+ - Update Pipeline dashboard
+ - Update global metrics
+ - Update node features and event attributes weights
+
+#. **delete_event:**
+
+ - Delete older campaigns and relevant events from database if threshold of number of campaigns on UI is reached.
+
+#. **save_feedback:**
+
+ - Triggered by cut action performed on UI.
+ - Fetch involved events and campaigns from UI and update weights for node and event attributes.
+ - Create cluster ticket output for involved clusters, and save feedback.
+
+#. **restore:**
+
+ - Check if there are any snapshots for database to revert to.
+ - If yes, then restore the database to the last saved snapshot and restart it.
+ - Else, clear database and restart it.
+
+.. 13. **create_jira:**
+
+.. - Read enrich_alert_input.json
+.. - Read lookup for the JIRA issue to cluster id.
+.. - If the cluster id already has JIRA created, and the status is ``open`` / ``in progress`` / ``to do``, overwrite the description with new details. If the status is not ``open`` / ``in progress`` / ``to do``, then create new JIRA issue with updated summary and description
+.. - If the cluster id does not have JIRA created, then create JIRA issue with summary, description and attachment to subset of involved alerts
+
+.. 14. **create_case:**
+
+.. - Read enrich_alert_input.json
+.. - Read lookup for the Elastic case to cluster id.
+.. - If the cluster id already has case created, and the status is ``open`` / ``in progress``, overwrite the description with new details. If the status is not ``open`` / ``in progress``, then create new case with updated summary and description
+.. - If the cluster id does not have case created, then create case with summary, description.
diff --git a/docs/source/splunk/splunk.rst b/docs/source/splunk/splunk.rst
index 72b646c..4a5b96a 100644
--- a/docs/source/splunk/splunk.rst
+++ b/docs/source/splunk/splunk.rst
@@ -1,6 +1,11 @@
Configure Splunk
================
+Prerequisites
+-------------
+
+Make sure that you have deployed the Cypienta application detailed in :doc:`../getting_started/deploy` before integrating.
+
Getting AWS Access key
----------------------
@@ -85,6 +90,11 @@ To get search results of Splunk to AWS S3. Follow the steps below:
:alt: Configure action for alert
:align: center
+
+ .. note::
+ To make a quick test of the alert setup, select ``Run on Cron Schedule`` an type in the cron job schedule with reference to UTC-0 timezone. For example, if the current time is 13:46 UTC-0, then the ``Cron Expression`` can be set to 2 minutes past the current time.
+ ``48 * * * *``. This cron expression will run the alert at every 48th minute past the hour every hour in referene to UTC-0. If this is a one-off test, make sure to use dummy bucket and path to save the output, and make sure to disable the alert once test is successful.
+
8. Add the ``Bucket name`` which was created using the CloudFormation template to save the results. For ``Object key``, enter ``splunk_input/input/%d-%b-%Y %H:%M:%S.json``. Select ``Account`` that you created on the configuration page from the dropdown. Finally click ``Save``.
.. note::
diff --git a/docs/source/vrl/ai.rst b/docs/source/vrl/ai.rst
index 84a9580..0fe3690 100644
--- a/docs/source/vrl/ai.rst
+++ b/docs/source/vrl/ai.rst
@@ -1,4 +1,4 @@
Using an AI model to generate mappings
----------------------------
+======================================
https://github.com/cypienta/data_mapper_model
diff --git a/docs/source/vrl/transform.rst b/docs/source/vrl/transform.rst
index fd99b0d..c63bdca 100644
--- a/docs/source/vrl/transform.rst
+++ b/docs/source/vrl/transform.rst
@@ -1,5 +1,5 @@
VRL mapping language & engine
----------------------------
+=============================
Once you have the Elastic or Splunk logs stored in S3 in JSON Lines format, you
can use the `VRL (Vector Remap
@@ -8,7 +8,7 @@ your data to meet the format expected by the Cypienta end-to-end
processing.
VRL Transformations
-~~~~~~~~~~~~~~~~~~~
+-------------------
To apply a transformation to your log source using the VRL tool, you
need to specify a VRL program file to transform your data as a string in
@@ -16,7 +16,7 @@ the ``transforms`` key in your ``log_source.yml`` file. Write your VRL
transformation script and save it as a ``.vrl`` file. Here, ``program.vrl``
Example: parsing JSON
-^^^^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~
To look at a simple example. Let's assume the following event.
@@ -208,7 +208,7 @@ The resulting event:
}
Writing transformation VRL expressions
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The input to your VRL expression is a single record from your data
source. The output of the VRL expression is the transformed record.
@@ -217,7 +217,7 @@ source. The output of the VRL expression is the transformed record.
Using an AI model to generate mappings
-~~~~~~~~~~~~~~~~~~~
+--------------------------------------
https://github.com/cypienta/data_mapper_model
diff --git a/lumache.py b/lumache.py
deleted file mode 100644
index 3ea7ce9..0000000
--- a/lumache.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""
-Lumache - Python library for cooks and food lovers.
-"""
-
-__version__ = "0.1.0"
-
-
-class InvalidKindError(Exception):
- """Raised if the kind is invalid."""
- pass
-
-
-def get_random_ingredients(kind=None):
- """
- Return a list of random ingredients as strings.
-
- :param kind: Optional "kind" of ingredients.
- :type kind: list[str] or None
- :raise lumache.InvalidKindError: If the kind is invalid.
- :return: The ingredients list.
- :rtype: list[str]
- """
- return ["shells", "gorgonzola", "parsley"]