From 6504a7e807a3d795300bde4b99fa0aaa13a730b9 Mon Sep 17 00:00:00 2001
From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com>
Date: Thu, 16 Apr 2026 22:28:53 +0530
Subject: [PATCH 01/22] feat(workflow-engine): Add support for callback hooks
---
press/hooks.py | 3 +-
.../doctype/press_workflow/decorators.py | 2 +-
.../press_workflow/press_workflow.json | 70 +++++++++-
.../doctype/press_workflow/press_workflow.py | 125 +++++++++++++++++-
4 files changed, 189 insertions(+), 11 deletions(-)
diff --git a/press/hooks.py b/press/hooks.py
index b6a214b3259..61041d0d501 100644
--- a/press/hooks.py
+++ b/press/hooks.py
@@ -260,7 +260,6 @@
"press.press.doctype.invoice.invoice.finalize_draft_invoices",
"press.press.doctype.invoice.invoice.finalize_razorpay_mandate_invoices",
"press.press.doctype.agent_job.agent_job.fail_old_jobs",
- "press.press.doctype.press_job.press_job.fail_stuck_press_jobs",
"press.press.doctype.site_update.site_update.mark_stuck_updates_as_fatal",
"press.press.doctype.deploy_candidate_build.deploy_candidate_build.cleanup_build_directories",
"press.press.doctype.deploy_candidate_build.deploy_candidate_build.check_builds_status",
@@ -310,7 +309,6 @@
"press.press.doctype.press_webhook_log.press_webhook_log.process",
"press.press.doctype.telegram_message.telegram_message.send_telegram_message",
"press.press.doctype.agent_update.agent_update.process_bulk_agent_update",
- "press.press.doctype.press_job.press_job.process_failed_callbacks",
"press.press.doctype.server_snapshot_recovery.server_snapshot_recovery.resume_warmed_up_restorations",
"press.press.doctype.server_snapshot.server_snapshot.move_pending_snapshots_to_processing",
"press.press.doctype.bench.bench.process_bench_queue",
@@ -350,6 +348,7 @@
"press.press.doctype.app.app.poll_new_releases",
"press.utils.jobs.alert_on_zombie_rq_jobs",
"press.saas.doctype.product_trial.product_trial.replenish_standby_sites",
+ "press.workflow_engine.doctype.press_workflow.press_workflow.retry_workflow_callbacks",
],
"* * * * *": [
"press.press.doctype.virtual_disk_snapshot.virtual_disk_snapshot.sync_physical_backup_snapshots",
diff --git a/press/workflow_engine/doctype/press_workflow/decorators.py b/press/workflow_engine/doctype/press_workflow/decorators.py
index cd6122359a7..679eaca1975 100644
--- a/press/workflow_engine/doctype/press_workflow/decorators.py
+++ b/press/workflow_engine/doctype/press_workflow/decorators.py
@@ -176,7 +176,7 @@ def run_as_workflow(self, *args: Any, **kwargs: Any) -> str:
"args": PressWorkflowObject.store(args) if args else None,
"kwargs": PressWorkflowObject.store(kwargs) if kwargs else None,
"linked_doctype": instance.doctype, # type: ignore
- "linked_docname": instance.name, # type: ignore
+ "linked_docname": str(instance.name), # type: ignore
"main_method_name": self._wrapped.__name__,
"main_method_title": method_title(self._wrapped),
"steps": [
diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.json b/press/workflow_engine/doctype/press_workflow/press_workflow.json
index e3503645a35..9267e6adefe 100644
--- a/press/workflow_engine/doctype/press_workflow/press_workflow.json
+++ b/press/workflow_engine/doctype/press_workflow/press_workflow.json
@@ -30,9 +30,18 @@
"output",
"column_break_lhnh",
"exception",
+ "callback_section",
+ "max_no_of_callback_attempts",
+ "column_break_amxx",
+ "callback_status",
+ "column_break_gvim",
+ "no_of_callback_attempts",
+ "column_break_gteb",
+ "callback_next_retry_at",
"section_break_xglm",
"stdout",
- "traceback"
+ "traceback",
+ "callback_traceback"
],
"fields": [
{
@@ -98,6 +107,7 @@
"read_only": 1
},
{
+ "depends_on": "eval: doc.key_value_store.length > 0",
"fieldname": "kv_storage_section",
"fieldtype": "Section Break",
"label": "KV Storage"
@@ -207,6 +217,62 @@
"label": "Steps",
"options": "Press Workflow Step",
"read_only": 1
+ },
+ {
+ "fieldname": "callback_section",
+ "fieldtype": "Section Break",
+ "label": "Callback"
+ },
+ {
+ "default": "5",
+ "fieldname": "max_no_of_callback_attempts",
+ "fieldtype": "Int",
+ "label": "Maximum Attempts",
+ "reqd": 1,
+ "set_only_once": 1
+ },
+ {
+ "fieldname": "column_break_amxx",
+ "fieldtype": "Column Break"
+ },
+ {
+ "default": "0",
+ "fieldname": "no_of_callback_attempts",
+ "fieldtype": "Int",
+ "label": "Attempts",
+ "non_negative": 1,
+ "read_only": 1,
+ "reqd": 1
+ },
+ {
+ "fieldname": "column_break_gvim",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "callback_next_retry_at",
+ "fieldtype": "Datetime",
+ "label": "Next Retry At",
+ "read_only": 1,
+ "search_index": 1
+ },
+ {
+ "fieldname": "callback_traceback",
+ "fieldtype": "Long Text",
+ "label": "Callback Traceback"
+ },
+ {
+ "default": "Pending",
+ "fieldname": "callback_status",
+ "fieldtype": "Select",
+ "label": "Status",
+ "options": "Pending\nSuccess\nFailure\nFatal",
+ "read_only": 1,
+ "reqd": 1,
+ "search_index": 1
+ },
+ {
+ "fieldname": "column_break_gteb",
+ "fieldtype": "Column Break"
}
],
"grid_page_length": 50,
@@ -217,7 +283,7 @@
"link_fieldname": "workflow"
}
],
- "modified": "2026-03-11 00:51:08.486677",
+ "modified": "2026-04-16 22:25:25.102297",
"modified_by": "Administrator",
"module": "Workflow Engine",
"name": "Press Workflow",
diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.py b/press/workflow_engine/doctype/press_workflow/press_workflow.py
index 25ec0d8a493..3c10461e41f 100644
--- a/press/workflow_engine/doctype/press_workflow/press_workflow.py
+++ b/press/workflow_engine/doctype/press_workflow/press_workflow.py
@@ -38,14 +38,13 @@ class PressWorkflow(Document):
if TYPE_CHECKING:
from frappe.types import DF
- from press.workflow_engine.doctype.press_workflow_kv.press_workflow_kv import (
- PressWorkflowKV,
- )
- from press.workflow_engine.doctype.press_workflow_step.press_workflow_step import (
- PressWorkflowStep,
- )
+ from press.workflow_engine.doctype.press_workflow_kv.press_workflow_kv import PressWorkflowKV
+ from press.workflow_engine.doctype.press_workflow_step.press_workflow_step import PressWorkflowStep
args: DF.Link | None
+ callback_next_retry_at: DF.Datetime | None
+ callback_status: DF.Literal["Pending", "Success", "Failure", "Fatal"]
+ callback_traceback: DF.LongText | None
duration: DF.Duration | None
end: DF.Datetime | None
exception: DF.Link | None
@@ -55,6 +54,8 @@ class PressWorkflow(Document):
linked_doctype: DF.Link
main_method_name: DF.Data
main_method_title: DF.Data
+ max_no_of_callback_attempts: DF.Int
+ no_of_callback_attempts: DF.Int
output: DF.Link | None
start: DF.Datetime | None
status: DF.Literal["Queued", "Running", "Success", "Failure", "Fatal"]
@@ -63,9 +64,16 @@ class PressWorkflow(Document):
traceback: DF.LongText | None
# end: auto-generated types
+ def before_save(self):
+ if self.linked_docname:
+ self.linked_docname = str(self.linked_docname)
+
def after_insert(self):
enqueue_workflow(self.name) # type: ignore
+ def on_trash(self):
+ frappe.db.delete("Press Workflow Task", {"workflow": self.name})
+
def run(self): # noqa: C901 - best to keep it in one place
if not self.linked_doctype or not self.linked_docname:
frappe.throw("Cannot run flow without linked_doctype and linked_docname", frappe.ValidationError)
@@ -138,6 +146,75 @@ def run(self): # noqa: C901 - best to keep it in one place
self.update_skipped_steps_status(save=False)
self.save()
+ self.execute_callback_in_background()
+
+ def execute_callback_in_background(self):
+ frappe.enqueue_doc(
+ self.doctype,
+ self.name,
+ method="execute_callback",
+ queue="default",
+ timeout=300,
+ deduplicate=True,
+ enqueue_after_commit=True,
+ job_id=f"press_workflow||{self.name}||execute_callback",
+ )
+
+ def execute_callback(self):
+ """
+ If the workflow reached it's termination state, execute callback
+ - on_workflow_success(doc:PressWorkflow) if status is Success
+ - on_workflow_failure(doc:PressWorkflow) if status is Failure
+ """
+
+ if self.status not in ["Success", "Failure"]:
+ return
+
+ if not frappe.db.exists(self.linked_doctype, self.linked_docname):
+ return
+
+ reference_doc: WorkflowBuilder = frappe.get_doc(self.linked_doctype, self.linked_docname) # type: ignore
+ callback_method = {
+ "Success": "on_workflow_success",
+ "Failure": "on_workflow_failure",
+ }[self.status]
+
+ if not hasattr(reference_doc, callback_method):
+ self.callback_status = "Success"
+ self.save()
+ return
+
+ try:
+ getattr(reference_doc, callback_method)(self)
+ self.callback_status = "Success"
+ self.save()
+ except Exception as e:
+ frappe.log_error(
+ f"Error executing workflow callback {callback_method}",
+ message=str(e),
+ reference_doctype=self.linked_doctype,
+ reference_name=self.linked_docname,
+ )
+
+ self.no_of_callback_attempts += 1
+ if self.no_of_callback_attempts >= self.max_no_of_callback_attempts:
+ self.callback_status = "Fatal"
+ self.callback_traceback = frappe.get_traceback()
+ else:
+ self.callback_status = "Failure"
+ self.callback_next_retry_at = frappe.utils.add_minutes(
+ now_datetime(), 2**self.no_of_callback_attempts
+ )
+
+ self.save()
+
+ if self.callback_status == "Fatal":
+ frappe.log_error(
+ f"Workflow {self.name} has reached max callback retry attempts and is marked as Fatal",
+ reference_doctype="Press Workflow",
+ reference_name=self.name,
+ )
+
def update_skipped_steps_status(self, save: bool = True): # noqa: C901 - best to keep it in one place
is_updated = False
@@ -225,3 +302,39 @@ def retry_workflows():
reference_doctype="Press Workflow",
reference_name=workflow_name,
)
+
+
+def retry_workflow_callbacks():
+ workflows = frappe.get_all(
+ "Press Workflow",
+ filters={
+ "callback_status": "Failure",
+ "callback_next_retry_at": ("<=", now_datetime()),
+ },
+ pluck="name",
+ order_by="modified asc",
+ )
+
+ # Include workflows with no callback_next_retry_at_set
+ # and in Pending or Failure state
+ workflows += frappe.get_all(
+ "Press Workflow",
+ filters={
+ "callback_status": ("in", ["Pending", "Failure"]),
+ "callback_next_retry_at": None,
+ },
+ pluck="name",
+ order_by="modified asc",
+ )
+
+ for workflow_name in workflows:
+ try:
+ workflow: PressWorkflow = frappe.get_doc("Press Workflow", workflow_name)
+ workflow.execute_callback_in_background()
+ except Exception as e:
+ frappe.log_error(
+ "Error retrying workflow callback",
+ message=str(e),
+ reference_doctype="Press Workflow",
+ reference_name=workflow_name,
+ )
From 612da86d8fe0d98ee1db866fcbadbb769ebbf8c1 Mon Sep 17 00:00:00 2001
From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com>
Date: Thu, 16 Apr 2026 22:38:46 +0530
Subject: [PATCH 02/22] feat(press-job): Integrate Workflow Engine and create
wrapper
---
.../press_job/jobs/reset_swap_on_server.py | 31 ++
press/press/doctype/press_job/press_job.json | 95 +-----
press/press/doctype/press_job/press_job.py | 283 +++++-------------
press/press/doctype/server/server.py | 3 +-
4 files changed, 122 insertions(+), 290 deletions(-)
create mode 100644 press/press/doctype/press_job/jobs/reset_swap_on_server.py
diff --git a/press/press/doctype/press_job/jobs/reset_swap_on_server.py b/press/press/doctype/press_job/jobs/reset_swap_on_server.py
new file mode 100644
index 00000000000..fb444f1a0ce
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/reset_swap_on_server.py
@@ -0,0 +1,31 @@
+from contextlib import suppress
+
+import frappe
+
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class ResetSwapOnServerJob(PressJob):
+ @flow
+ def execute(self):
+ if self.status == "Pending":
+ self.status = "Running"
+ self.save()
+
+ with suppress(Exception):
+ self.send_telegram_notification()
+
+ self.reset_swap()
+
+ @task
+ def send_telegram_notification(self):
+ telegram_message = frappe.get_doc("Press Settings").telegram_message
+ telegram_message.enqueue(
+ f"Resetting swap on [{self.server}]({frappe.utils.get_url_to_form(self.server_type, self.server)})",
+ "Information",
+ )
+
+ @task(queue="long", timeout=1200)
+ def reset_swap(self):
+ self.server_doc.reset_swap(now=True)
diff --git a/press/press/doctype/press_job/press_job.json b/press/press/doctype/press_job/press_job.json
index ffcf4c982d2..57e58006a72 100644
--- a/press/press/doctype/press_job/press_job.json
+++ b/press/press/doctype/press_job/press_job.json
@@ -15,17 +15,8 @@
"section_break_7",
"server_type",
"server",
- "virtual_machine",
- "column_break_11",
- "arguments",
- "callback_section",
- "callback_executed",
- "callback_failed",
- "callback_retry_limit_reached",
- "callback_failure_issue_resolved",
- "column_break_zynz",
- "callback_failure_count",
- "next_callback_retry_at"
+ "column_break_fhyz",
+ "virtual_machine"
],
"fields": [
{
@@ -35,9 +26,9 @@
"in_standard_filter": 1,
"label": "Job Type",
"options": "Press Job Type",
- "read_only": 1,
"reqd": 1,
- "search_index": 1
+ "search_index": 1,
+ "set_only_once": 1
},
{
"fieldname": "status",
@@ -50,14 +41,6 @@
"reqd": 1,
"search_index": 1
},
- {
- "default": "{}",
- "fieldname": "arguments",
- "fieldtype": "Code",
- "label": "Arguments",
- "read_only": 1,
- "reqd": 1
- },
{
"fieldname": "column_break_3",
"fieldtype": "Column Break"
@@ -91,8 +74,8 @@
"fieldtype": "Link",
"label": "Server Type",
"options": "DocType",
- "read_only": 1,
- "search_index": 1
+ "search_index": 1,
+ "set_only_once": 1
},
{
"fieldname": "server",
@@ -101,8 +84,8 @@
"in_standard_filter": 1,
"label": "Server",
"options": "server_type",
- "read_only": 1,
- "search_index": 1
+ "search_index": 1,
+ "set_only_once": 1
},
{
"fetch_if_empty": 1,
@@ -112,74 +95,26 @@
"in_standard_filter": 1,
"label": "Virtual Machine",
"options": "Virtual Machine",
- "read_only": 1
+ "set_only_once": 1
},
{
- "fieldname": "column_break_11",
+ "fieldname": "column_break_fhyz",
"fieldtype": "Column Break"
- },
- {
- "default": "0",
- "fieldname": "callback_failure_count",
- "fieldtype": "Int",
- "label": "Callback Failure Count",
- "read_only": 1,
- "reqd": 1
- },
- {
- "default": "0",
- "fieldname": "callback_failed",
- "fieldtype": "Check",
- "label": "Callback Failed",
- "read_only": 1
- },
- {
- "default": "0",
- "fieldname": "callback_retry_limit_reached",
- "fieldtype": "Check",
- "label": "Callback Retry Limit Reached",
- "read_only": 1
- },
- {
- "fieldname": "next_callback_retry_at",
- "fieldtype": "Datetime",
- "label": "Next Callback Retry At",
- "read_only": 1
- },
- {
- "fieldname": "callback_section",
- "fieldtype": "Section Break",
- "label": "Callback"
- },
- {
- "default": "0",
- "fieldname": "callback_failure_issue_resolved",
- "fieldtype": "Check",
- "label": "Callback Failure Issue Resolved",
- "read_only": 1
- },
- {
- "fieldname": "column_break_zynz",
- "fieldtype": "Column Break"
- },
- {
- "default": "0",
- "fieldname": "callback_executed",
- "fieldtype": "Check",
- "label": "Callback Executed",
- "read_only": 1
}
],
"grid_page_length": 50,
- "in_create": 1,
"index_web_pages_for_search": 1,
"links": [
{
"link_doctype": "Press Job Step",
"link_fieldname": "job"
+ },
+ {
+ "link_doctype": "Press Workflow",
+ "link_fieldname": "linked_docname"
}
],
- "modified": "2026-03-17 19:26:46.940966",
+ "modified": "2026-04-16 22:33:03.958588",
"modified_by": "Administrator",
"module": "Press",
"name": "Press Job",
diff --git a/press/press/doctype/press_job/press_job.py b/press/press/doctype/press_job/press_job.py
index 0336ed0faa9..f019bbd417d 100644
--- a/press/press/doctype/press_job/press_job.py
+++ b/press/press/doctype/press_job/press_job.py
@@ -1,16 +1,36 @@
+from __future__ import annotations
+
# Copyright (c) 2022, Frappe and contributors
# For license information, please see license.txt
-
-import json
+from typing import TYPE_CHECKING
import frappe
-from frappe.model.document import Document
-from frappe.utils import add_days, add_to_date
+from frappe.utils import now_datetime
+
+from press.workflow_engine.doctype.press_workflow.workflow_builder import WorkflowBuilder
+
+if TYPE_CHECKING:
+ from press.press.doctype.database_server.database_server import DatabaseServer
+ from press.press.doctype.server.server import Server
+ from press.press.doctype.virtual_machine.virtual_machine import VirtualMachine
+ from press.workflow_engine.doctype.press_workflow.press_workflow import PressWorkflow
+
+_JOBS_REGISTRY: dict[str, type] = {}
+
-from press.press.doctype.press_job_step.press_job_step import safe_exec
+def _init_jobs_registry() -> None:
+ global _JOBS_REGISTRY
+ if _JOBS_REGISTRY:
+ return
+ from press.press.doctype.press_job.jobs.reset_swap_on_server import ResetSwapOnServerJob
-class PressJob(Document):
+ _JOBS_REGISTRY = {
+ "Reset Swap": ResetSwapOnServerJob,
+ }
+
+
+class PressJob(WorkflowBuilder):
# begin: auto-generated types
# This code is auto-generated. Do not modify anything in this block.
@@ -19,17 +39,10 @@ class PressJob(Document):
if TYPE_CHECKING:
from frappe.types import DF
- arguments: DF.Code
- callback_executed: DF.Check
- callback_failed: DF.Check
- callback_failure_count: DF.Int
- callback_failure_issue_resolved: DF.Check
- callback_retry_limit_reached: DF.Check
duration: DF.Duration | None
end: DF.Datetime | None
job_type: DF.Link
name: DF.Int | None
- next_callback_retry_at: DF.Datetime | None
server: DF.DynamicLink | None
server_type: DF.Link | None
start: DF.Datetime | None
@@ -37,6 +50,23 @@ class PressJob(Document):
virtual_machine: DF.Link | None
# end: auto-generated types
+ @property
+ def server_doc(self) -> "Server | DatabaseServer":
+ if hasattr(self, "_server_doc") and self._server_doc: # type: ignore
+ return self._server_doc # type: ignore
+ self._server_doc = frappe.get_doc(self.server_type, self.server)
+ return self._server_doc
+
+ @property
+ def virtual_machine_doc(self) -> VirtualMachine | None:
+ if not self.virtual_machine:
+ return None
+
+ if hasattr(self, "_virtual_machine_doc") and self._virtual_machine_doc: # type: ignore
+ return self._virtual_machine_doc # type: ignore
+ self._virtual_machine_doc = frappe.get_doc("Virtual Machine", self.virtual_machine)
+ return self._virtual_machine_doc # type: ignore
+
def before_insert(self):
frappe.db.get_value(self.server_type, self.server, "status", for_update=True)
if existing_jobs := frappe.db.get_all(
@@ -53,212 +83,47 @@ def before_insert(self):
)
def after_insert(self):
- self.create_press_job_steps()
- self.execute()
+ self.start_workflow()
def on_update(self):
if self.has_value_changed("status"):
- self.process_callback(save=True)
-
- def on_change(self):
- self.publish_update()
+ save = False
+ if self.status == "Running" and not self.start:
+ self.start = now_datetime()
+ save = True
+
+ if self.status in ["Success", "Failure"]:
+ if not self.start:
+ self.start = now_datetime()
+ if not self.end:
+ self.end = now_datetime()
+ save = True
- def create_press_job_steps(self):
- job_type = frappe.get_doc("Press Job Type", self.job_type)
- for step in job_type.steps:
- doc = frappe.get_doc(
- {
- "doctype": "Press Job Step",
- "job": self.name,
- "status": "Pending",
- "job_type": self.job_type,
- "step_name": step.step_name,
- "wait_until_true": step.wait_until_true,
- }
- )
- doc.insert()
+ if save:
+ self.save()
- def execute(self):
- self.status = "Running"
- self.start = frappe.utils.now_datetime()
- self.save()
- self.next()
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.workflow_name = self.job_type
+ _init_jobs_registry()
+ if self.job_type in _JOBS_REGISTRY:
+ self.__class__ = _JOBS_REGISTRY[self.job_type]
- def fail(self, arguments=None):
- self.status = "Failure"
- pending_steps = frappe.get_all("Press Job Step", {"job": self.name, "status": "Pending"})
- for step in pending_steps:
- frappe.db.set_value("Press Job Step", step.name, "status", "Skipped")
- self.end = frappe.utils.now_datetime()
- self.duration = (self.end - self.start).total_seconds()
- self.save()
+ def start_workflow(self) -> str:
+ if not hasattr(self, "execute"):
+ raise NotImplementedError("Press Job implementation must have an execute method")
+ return self.execute.run_as_workflow()
- def succeed(self):
+ def on_workflow_success(self, workflow: "PressWorkflow"):
self.status = "Success"
- self.end = frappe.utils.now_datetime()
- self.duration = (self.end - self.start).total_seconds()
- self.save()
-
- @frappe.whitelist()
- def next(self, arguments=None):
- if arguments:
- old_arguments = json.loads(self.arguments)
- old_arguments.update(arguments)
- self.arguments = json.dumps(old_arguments, indent=2)
- self.status = "Running"
self.save()
- next_step = self.next_step
-
- if not next_step:
- self.succeed()
- return
-
- frappe.enqueue_doc("Press Job Step", next_step, "execute", enqueue_after_commit=True)
- @frappe.whitelist()
- def force_continue(self):
- for step in frappe.get_all(
- "Press Job Step",
- {"job": self.name, "status": ("in", ("Failure", "Skipped"))},
- pluck="name",
- ):
- frappe.db.set_value("Press Job Step", step, "status", "Pending")
- self.next()
-
- @frappe.whitelist()
- def force_fail(self):
- for step in frappe.get_all(
- "Press Job Step",
- {"job": self.name, "status": "Pending"},
- pluck="name",
- ):
- frappe.db.set_value("Press Job Step", step, "status", "Failure")
- frappe.db.set_value("Press Job", self.name, "status", "Failure")
-
- @property
- def next_step(self):
- return frappe.db.get_value(
- "Press Job Step",
- {"job": self.name, "status": "Pending"},
- "name",
- order_by="name asc",
- as_dict=True,
- )
-
- def detail(self):
- steps = frappe.get_all(
- "Press Job Step",
- filters={"job": self.name},
- fields=["name", "step_name", "status", "start", "end", "duration"],
- order_by="name asc",
- )
+ if hasattr(self, "on_press_job_success"):
+ self.on_press_job_success(workflow)
- for index, step in enumerate(steps):
- if step.status == "Pending" and index and steps[index - 1].status == "Success":
- step.status = "Running"
-
- return {
- "name": self.name,
- "job_type": self.job_type,
- "server": self.server,
- "server_type": self.server_type,
- "virtual_machine": self.virtual_machine,
- "status": self.status,
- "steps": steps,
- }
-
- def publish_update(self):
- frappe.publish_realtime(
- "press_job_update", doctype=self.doctype, docname=self.name, message=self.detail()
- )
-
- @frappe.whitelist()
- def mark_callback_failure_issue_resolved(self):
- self.callback_failure_issue_resolved = True
+ def on_workflow_failure(self, workflow: "PressWorkflow"):
+ self.status = "Failure"
self.save()
- def process_callback(self, save: bool = False): # noqa: C901
- if self.status not in ["Success", "Failure"]:
- return
-
- if self.callback_executed or self.callback_failure_issue_resolved:
- return
-
- job_type = frappe.db.get_value(
- "Press Job Type", self.job_type, ["callback_script", "callback_max_retry"], as_dict=True
- )
- if not job_type.callback_script:
- self.callback_executed = True
- if save:
- self.save()
- # No callback script defined, so just mark as executed
- return
-
- if self.callback_failed and self.callback_failure_count >= (job_type.callback_max_retry or 0):
- self.callback_retry_limit_reached = True
- self.next_callback_retry_at = None
- if save:
- self.save()
- return
-
- local = {"arguments": frappe._dict(json.loads(self.arguments)), "doc": self}
- current_user = frappe.session.user
- try:
- frappe.set_user("Administrator")
- safe_exec(job_type.callback_script, _locals=local)
- self.callback_failed = False
- self.callback_executed = True
- self.next_callback_retry_at = None
- self.callback_failure_issue_resolved = False
- except Exception:
- frappe.log_error(f"Error executing callback script for {self.name}")
- self.callback_failed = True
- self.callback_failure_count += 1
- self.next_callback_retry_at = add_to_date(None, minutes=5)
- finally:
- frappe.set_user(current_user)
-
- if save:
- self.save()
-
- def on_trash(self):
- frappe.db.delete("Press Job Step", {"job": self.name})
-
-
-def fail_stuck_press_jobs():
- jobs = frappe.get_all(
- "Press Job",
- filters={
- "status": ("in", ["Running", "Pending"]),
- "creation": ("<", add_days(None, -1)),
- },
- pluck="name",
- limit=100,
- )
- for job_name in jobs:
- job = PressJob("Press Job", job_name)
- job.force_fail()
- frappe.db.commit()
-
-
-def process_failed_callbacks():
- jobs = frappe.get_all(
- "Press Job",
- filters={
- "status": ("in", ["Success", "Failure"]),
- "callback_failed": True,
- "callback_executed": False,
- "callback_failure_issue_resolved": False,
- "callback_retry_limit_reached": False,
- "next_callback_retry_at": ("<", frappe.utils.now_datetime()),
- },
- pluck="name",
- )
- for job_name in jobs:
- frappe.enqueue_doc(
- "Press Job",
- job_name,
- "process_callback",
- enqueue_after_commit=True,
- save=True,
- )
+ if hasattr(self, "on_press_job_failure"):
+ self.on_press_job_failure(workflow)
diff --git a/press/press/doctype/server/server.py b/press/press/doctype/server/server.py
index e38e4e0edda..384e2ca7ea8 100644
--- a/press/press/doctype/server/server.py
+++ b/press/press/doctype/server/server.py
@@ -1529,7 +1529,7 @@ def increase_swap_locked(self, swap_size=4):
self._increase_swap(swap_size)
@frappe.whitelist()
- def reset_swap(self, swap_size=1):
+ def reset_swap(self, swap_size=1, now: bool = False):
"""
Replace existing swap files with new swap file of given size
"""
@@ -1540,6 +1540,7 @@ def reset_swap(self, swap_size=1):
queue="long",
timeout=1200,
**{"swap_size": swap_size},
+ now=now,
)
def reset_swap_locked(self, swap_size=1):
From 5cf1ced6d44149a0a5e5693c995318334862773c Mon Sep 17 00:00:00 2001
From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com>
Date: Thu, 16 Apr 2026 23:29:02 +0530
Subject: [PATCH 03/22] feat(press-job): Move all press job types to code
---
press/fixtures/press_job_type.json | 85 -----
.../database_server/database_server.py | 23 +-
.../doctype/press_job/jobs/archive_server.py | 63 ++++
.../doctype/press_job/jobs/attach_volume.py | 17 +
.../jobs/auto_scale_application_server.py | 16 +
.../auto_scale_down_application_server.py | 19 +
.../jobs/auto_scale_up_application_server.py | 18 +
.../doctype/press_job/jobs/create_server.py | 350 ++++++++++++++++++
.../press_job/jobs/create_server_snapshot.py | 68 ++++
.../press_job/jobs/increase_disk_size.py | 106 ++++++
.../doctype/press_job/jobs/increase_swap.py | 27 ++
.../press_job/jobs/prune_docker_system.py | 27 ++
.../press_job/jobs/prune_mirror_registry.py | 15 +
.../press_job/jobs/remove_on_prem_failover.py | 58 +++
.../press_job/jobs/reset_swap_on_server.py | 4 -
.../doctype/press_job/jobs/resize_server.py | 101 +++++
.../jobs/resume_services_after_snapshot.py | 47 +++
.../press_job/jobs/setup_on_prem_failover.py | 100 +++++
.../doctype/press_job/jobs/snapshot_disk.py | 106 ++++++
.../press_job/jobs/stop_and_start_server.py | 50 +++
.../jobs/trigger_build_server_cleanup.py | 18 +
.../doctype/press_job/jobs/upgrade_mariadb.py | 27 ++
.../press/doctype/press_job/jobs/warn_disk.py | 13 +
press/press/doctype/press_job/press_job.json | 13 +-
press/press/doctype/press_job/press_job.py | 66 +++-
.../registry_server/registry_server.py | 11 +-
press/press/doctype/server/server.py | 43 ++-
.../tls_certificate/tls_certificate.py | 6 +-
.../doctype/press_workflow/exceptions.py | 4 +-
.../press_workflow/workflow_builder.py | 14 +
30 files changed, 1398 insertions(+), 117 deletions(-)
create mode 100644 press/press/doctype/press_job/jobs/archive_server.py
create mode 100644 press/press/doctype/press_job/jobs/attach_volume.py
create mode 100644 press/press/doctype/press_job/jobs/auto_scale_application_server.py
create mode 100644 press/press/doctype/press_job/jobs/auto_scale_down_application_server.py
create mode 100644 press/press/doctype/press_job/jobs/auto_scale_up_application_server.py
create mode 100644 press/press/doctype/press_job/jobs/create_server.py
create mode 100644 press/press/doctype/press_job/jobs/create_server_snapshot.py
create mode 100644 press/press/doctype/press_job/jobs/increase_disk_size.py
create mode 100644 press/press/doctype/press_job/jobs/increase_swap.py
create mode 100644 press/press/doctype/press_job/jobs/prune_docker_system.py
create mode 100644 press/press/doctype/press_job/jobs/prune_mirror_registry.py
create mode 100644 press/press/doctype/press_job/jobs/remove_on_prem_failover.py
create mode 100644 press/press/doctype/press_job/jobs/resize_server.py
create mode 100644 press/press/doctype/press_job/jobs/resume_services_after_snapshot.py
create mode 100644 press/press/doctype/press_job/jobs/setup_on_prem_failover.py
create mode 100644 press/press/doctype/press_job/jobs/snapshot_disk.py
create mode 100644 press/press/doctype/press_job/jobs/stop_and_start_server.py
create mode 100644 press/press/doctype/press_job/jobs/trigger_build_server_cleanup.py
create mode 100644 press/press/doctype/press_job/jobs/upgrade_mariadb.py
create mode 100644 press/press/doctype/press_job/jobs/warn_disk.py
diff --git a/press/fixtures/press_job_type.json b/press/fixtures/press_job_type.json
index db909307562..5da8cd90430 100644
--- a/press/fixtures/press_job_type.json
+++ b/press/fixtures/press_job_type.json
@@ -329,91 +329,6 @@
}
]
},
- {
- "callback_max_retry": 0,
- "callback_script": null,
- "docstatus": 0,
- "doctype": "Press Job Type",
- "modified": "2025-08-31 20:54:46.857348",
- "name": "Create Server (old)",
- "steps": [
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.provision()\n",
- "step_name": "Create Server",
- "wait_until_true": 0
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.sync()\nresult = (machine.status == \"Running\", False)\n",
- "step_name": "Wait for Server to start",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.ping_ansible()\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Ping Server\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", False)\n",
- "step_name": "Wait for Server to be accessible",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.wait_for_cloud_init()",
- "step_name": "Check Cloud Init status",
- "wait_until_true": 0
- },
- {
- "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Wait for Cloud Init to finish\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status in (\"Success\", \"Failure\"), False)",
- "step_name": "Wait for Cloud Init to finish",
- "wait_until_true": 1
- },
- {
- "script": "provider = frappe.db.get_value(doc.server_type, doc.server, 'provider')\nif provider == \"Hetzner\" and doc.server_type != \"Proxy Server\":\n vm = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\n vm.attach_new_volume(100)\n vm.sync()\n server = frappe.get_doc(doc.server_type, doc.server)\n server.validate_mounts()\n server.save()\n server.mount_volumes()\n",
- "step_name": "Create and mount volumes",
- "wait_until_true": 0
- },
- {
- "script": "provider = frappe.db.get_value(doc.server_type, doc.server, 'provider')\nif provider == 'Hetzner' and doc.server_type != \"Proxy Server\":\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Mount Volumes\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", False)\nelse:\n result = (True, False)",
- "step_name": "Wait for volumes to mount",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"Hetzner\" :\n if server.doctype == \"Server\":\n server.setup_docker()\n elif server.doctype == \"Database Server\":\n server.set_mariadb_mount_dependency()\n ",
- "step_name": "Configure apps for mounts",
- "wait_until_true": 0
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.update_tls_certificate()",
- "step_name": "Update TLS Certificate",
- "wait_until_true": 0
- },
- {
- "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Setup TLS Certificates\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status in (\"Success\", \"Failure\"), False)",
- "step_name": "Wait for TLS Certificate to be updated",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.update_agent_ansible()",
- "step_name": "Update Agent Ansible",
- "wait_until_true": 0
- },
- {
- "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Update Agent\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status in (\"Success\", \"Failure\"), False)",
- "step_name": "Wait for Agent to be updated",
- "wait_until_true": 1
- },
- {
- "script": "if doc.server_type == \"Database Server\":\n server = frappe.get_doc(\"Database Server\", doc.server)\n server.upgrade_mariadb()",
- "step_name": "Upgrade MariaDB",
- "wait_until_true": 0
- },
- {
- "script": "if doc.server_type == \"Database Server\":\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Upgrade MariaDB\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\nelse:\n result = (True,)",
- "step_name": "Wait for MariaDB Upgrade to Complete",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.set_additional_config()",
- "step_name": "Set additional config",
- "wait_until_true": 0
- }
- ]
- },
{
"callback_max_retry": 0,
"callback_script": "",
diff --git a/press/press/doctype/database_server/database_server.py b/press/press/doctype/database_server/database_server.py
index f3c3cc04c21..30ed747284a 100644
--- a/press/press/doctype/database_server/database_server.py
+++ b/press/press/doctype/database_server/database_server.py
@@ -547,7 +547,7 @@ def _restart_mariadb(self):
def stop_mariadb(self):
frappe.enqueue_doc(self.doctype, self.name, "_stop_mariadb", timeout=1800)
- def _stop_mariadb(self):
+ def _stop_mariadb(self, throw_on_failure: bool = False):
ansible = Ansible(
playbook="stop_mariadb.yml",
server=self,
@@ -558,8 +558,12 @@ def _stop_mariadb(self):
},
)
play = ansible.run()
- if play.status == "Failure":
+ if play.status != "Success":
log_error("MariaDB Stop Error", server=self.name)
+ if throw_on_failure:
+ frappe.throw(f"Failed to stop MariaDB on server: {self.name}")
+
+ return play
@frappe.whitelist()
def run_upgrade_mariadb_job(self):
@@ -568,7 +572,7 @@ def run_upgrade_mariadb_job(self):
def upgrade_mariadb(self):
frappe.enqueue_doc(self.doctype, self.name, "_upgrade_mariadb", timeout=1800)
- def _upgrade_mariadb(self):
+ def _upgrade_mariadb(self, throw_on_failure: bool = False):
ansible = Ansible(
playbook="upgrade_mariadb.yml",
server=self,
@@ -579,8 +583,10 @@ def _upgrade_mariadb(self):
},
)
play = ansible.run()
- if play.status == "Failure":
+ if play.status != "Success":
log_error("MariaDB Upgrade Error", server=self.name)
+ if throw_on_failure:
+ frappe.throw(f"Failed to upgrade MariaDB on server: {self.name}")
return play
def _downgrade_mariadb_to_10_6(self):
@@ -1252,7 +1258,7 @@ def prepare_mariadb_replica(self):
self.doctype, self.name, "_prepare_mariadb_replica", queue="long", timeout=1200, at_front=True
)
- def _prepare_mariadb_replica(self):
+ def _prepare_mariadb_replica(self, throw_on_failure: bool = False):
if self.is_primary:
return
@@ -1271,8 +1277,13 @@ def _prepare_mariadb_replica(self):
"mariadb_server_id": self.server_id,
},
)
- ansible.run()
+ play = ansible.run()
+ if play.status != "Success":
+ raise Exception("Failed to prepare MariaDB replica")
except Exception:
+ if throw_on_failure:
+ raise
+
log_error("MariaDB Prepare Replica Exception", server=self.as_dict())
def configure_replication(self, gtid_slave_pos: str | None = None):
diff --git a/press/press/doctype/press_job/jobs/archive_server.py b/press/press/doctype/press_job/jobs/archive_server.py
new file mode 100644
index 00000000000..f5be638da93
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/archive_server.py
@@ -0,0 +1,63 @@
+from contextlib import suppress
+
+import frappe
+
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class ArchiveServerJob(PressJob):
+ @flow
+ def execute(self):
+ self.disable_termination_protection()
+ self.terminate_virtual_machine()
+ self.wait_for_virtual_machine_to_terminate()
+
+ @task
+ def disable_termination_protection(self):
+ self.virtual_machine_doc.disable_termination_protection()
+
+ @task(queue="long", timeout=600)
+ def terminate_virtual_machine(self):
+ self.virtual_machine_doc.terminate()
+
+ @task
+ def wait_for_virtual_machine_to_terminate(self):
+ with suppress(Exception):
+ self.virtual_machine_doc.sync()
+
+ if self.virtual_machine_doc.status == "Terminated":
+ return
+
+ self.defer_current_task()
+
+ def on_press_job_success(self, _):
+ if self.server_type not in ["Server", "Database Server"]:
+ return
+
+ if not self.server_doc.is_for_recovery:
+ return
+
+ recovery_record_name = None
+ if self.server_type == "Server":
+ recovery_record_name = frappe.db.get_value(
+ "Server Snapshot Recovery", {"app_server": self.server}, "name"
+ )
+ elif self.server_type == "Database Server":
+ recovery_record_name = frappe.db.get_value(
+ "Server Snapshot Recovery", {"database_server": self.server}, "name"
+ )
+
+ if not recovery_record_name:
+ return
+
+ recovery_record = frappe.get_doc(
+ "Server Snapshot Recovery",
+ recovery_record_name,
+ for_update=True,
+ )
+ if self.server_type == "Server":
+ recovery_record.app_server_archived = True
+ else:
+ recovery_record.database_server_archived = True
+ recovery_record.save()
diff --git a/press/press/doctype/press_job/jobs/attach_volume.py b/press/press/doctype/press_job/jobs/attach_volume.py
new file mode 100644
index 00000000000..f64cdb66f90
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/attach_volume.py
@@ -0,0 +1,17 @@
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class AttachVolumeJob(PressJob):
+ @flow
+ def execute(self):
+ self.attach_volume()
+
+ @task
+ def attach_volume(self):
+ machine = self.virtual_machine_doc
+
+ if machine.cloud_provider in ["AWS EC2", "OCI"]:
+ machine.attach_new_volume(machine.size, machine.iops, machine.throughput)
+ else:
+ machine.attach_volume(size=100)
diff --git a/press/press/doctype/press_job/jobs/auto_scale_application_server.py b/press/press/doctype/press_job/jobs/auto_scale_application_server.py
new file mode 100644
index 00000000000..baeb86924e9
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/auto_scale_application_server.py
@@ -0,0 +1,16 @@
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class AutoScaleApplicationServerJob(PressJob):
+ @flow
+ def execute(self):
+ if self.server_type != "Server":
+ return
+
+ self.scale_app_server()
+
+ @task
+ def scale_app_server(self):
+ """Scale Application Server"""
+ self.server_doc.scale_up()
diff --git a/press/press/doctype/press_job/jobs/auto_scale_down_application_server.py b/press/press/doctype/press_job/jobs/auto_scale_down_application_server.py
new file mode 100644
index 00000000000..55af9c393e5
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/auto_scale_down_application_server.py
@@ -0,0 +1,19 @@
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class AutoScaleDownApplicationServerJob(PressJob):
+ @flow
+ def execute(self):
+ if self.server_type != "Server":
+ return
+
+ self.scale_down()
+
+ @task
+ def scale_down(self):
+ """Scale Down Application Server"""
+ if not self.server_doc.scaled_up:
+ return
+
+ self.server_doc.scale_down(is_automatically_triggered=True)
diff --git a/press/press/doctype/press_job/jobs/auto_scale_up_application_server.py b/press/press/doctype/press_job/jobs/auto_scale_up_application_server.py
new file mode 100644
index 00000000000..4c78e7a4bd6
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/auto_scale_up_application_server.py
@@ -0,0 +1,18 @@
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class AutoScaleUpApplicationServerJob(PressJob):
+ @flow
+ def execute(self):
+ if self.server_type != "Server":
+ return
+ self.scale_up()
+
+ @task
+ def scale_up(self):
+ """Scale Up Application Server"""
+ if self.server_doc.scaled_up:
+ return
+
+ self.server_doc.scale_up(is_automatically_triggered=True)
diff --git a/press/press/doctype/press_job/jobs/create_server.py b/press/press/doctype/press_job/jobs/create_server.py
new file mode 100644
index 00000000000..add0a81b700
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/create_server.py
@@ -0,0 +1,350 @@
+import time
+from typing import TYPE_CHECKING
+
+import frappe
+
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+if TYPE_CHECKING:
+ from press.press.doctype.database_server.database_server import DatabaseServer
+ from press.press.doctype.server_snapshot_recovery.server_snapshot_recovery import ServerSnapshotRecovery
+ from press.press.doctype.virtual_machine_image.virtual_machine_image import VirtualMachineImage
+
+
+class CreateServerJob(PressJob):
+ @flow
+ def execute(self):
+ self.provision_server()
+ self.wait_for_server_to_start()
+ self.wait_for_server_to_be_accessible()
+ self.sync_default_volumes()
+
+ if self.virtual_machine_doc.data_disk_snapshot:
+ self.create_volume_from_snapshot()
+ self.attach_snapshotted_volume()
+ self.sync_attached_volumes()
+ self.mount_snapshotted_volume()
+
+ self.check_cloud_init_status()
+
+ if self.server_doc.provider == "Hetzner" and self.virtual_machine:
+ self.create_and_mount_volumes_hetzner()
+ self.configure_apps_for_mounts_hetzner()
+
+ self.update_tls_certificate()
+ self.update_agent()
+
+ if self.server_type == "Database Server" or (
+ self.server_type == "Server" and self.server_doc.is_unified_server
+ ):
+ self.upgrade_mariadb()
+
+ if self.is_setup_db_replication:
+ self.prepare_mariadb_replica()
+ self.configure_mariadb_replica()
+ self.start_mariadb_replica()
+
+ self.set_additional_config()
+
+ if self.is_fs_server:
+ self.share_benches_over_nfs()
+
+ @property
+ def is_setup_db_replication(self):
+ return self.server_type == "Database Server" and self.arguments_dict.get(
+ "setup_db_replication", False
+ )
+
+ @property
+ def is_fs_server(self):
+ return self.server.startswith("fs") and self.server_type == "Server"
+
+ @task
+ def provision_server(self):
+ machine = self.virtual_machine_doc
+ machine.provision()
+
+ @task
+ def wait_for_server_to_start(self):
+ retry_later = True
+ try:
+ self.virtual_machine_doc.sync()
+ except (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError):
+ retry_later = True
+ except Exception as e:
+ if "rate_limit_exceeded" in str(e):
+ retry_later = True
+ else:
+ raise e
+
+ if self.virtual_machine_doc.status == "Running":
+ retry_later = False
+
+ if retry_later:
+ self.defer_current_task()
+
+ @task
+ def wait_for_server_to_be_accessible(self):
+ server = self.server_doc
+ play = server.ping_ansible()
+ if not play or play.status != "Success":
+ self.defer_current_task()
+
+ self.virtual_machine_doc.reload()
+ if not self.virtual_machine_doc.private_ip_address:
+ raise Exception("Virtual machine does not have a private IP address yet")
+
+ @task
+ def sync_default_volumes(self):
+ try:
+ self.virtual_machine_doc.sync()
+ if len(self.virtual_machine_doc.volumes) > 0:
+ return
+ except (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError):
+ pass
+
+ self.defer_current_task()
+
+ @task
+ def create_volume_from_snapshot(self):
+ if not self.virtual_machine_doc.data_disk_snapshot:
+ return
+
+ max_retries = self.arguments_dict.get("max_volume_creation_retries", 6)
+ if self.kv.get("volume_creation_attempts", 0) >= max_retries:
+ raise Exception(f"Failed to create volume from snapshot after {max_retries} retries")
+
+ is_created = self.virtual_machine_doc.create_data_disk_volume_from_snapshot()
+ if is_created:
+ return
+
+ self.kv.set("volume_creation_attempts", self.kv.get("volume_creation_attempts", 0) + 1)
+ self.defer_current_task()
+
+ @task
+ def attach_snapshotted_volume(self):
+ vm = frappe.get_doc("Virtual Machine", self.virtual_machine)
+ if not vm.data_disk_snapshot:
+ return
+
+ while True:
+ is_attached = vm.check_and_attach_data_disk_snapshot_volume()
+ if is_attached:
+ return
+ time.sleep(10)
+ vm = frappe.get_doc("Virtual Machine", self.virtual_machine)
+
+ @task
+ def sync_attached_volumes(self):
+ server = self.server_doc
+ if server.provider != "AWS EC2" or not frappe.db.get_value(
+ "Virtual Machine", server.virtual_machine, "data_disk_snapshot"
+ ):
+ return
+
+ while True:
+ time.sleep(10)
+ try:
+ vm = frappe.get_doc("Virtual Machine", server.virtual_machine)
+ vm.sync()
+ if len(vm.volumes) == 0 or (vm.data_disk_snapshot_attached and len(vm.volumes) == 1):
+ continue
+ server.reload()
+ server.validate_mounts()
+ server.save()
+ break
+ except (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError):
+ continue
+
+ @task(queue="long", timeout=7200)
+ def mount_snapshotted_volume(self):
+ if self.server_doc.provider != "AWS EC2" or not self.virtual_machine_doc.data_disk_snapshot:
+ return
+
+ cleanup_db_replication_files = False
+ if self.server_type == "Database Server" and (
+ self.server_doc.is_for_recovery or self.is_setup_db_replication
+ ):
+ cleanup_db_replication_files = True
+
+ self.server_doc.mount_volumes(
+ now=True,
+ stop_docker_before_mount=self.server_type == "Server",
+ stop_mariadb_before_mount=self.server_type == "Database Server",
+ start_docker_after_mount=self.server_type == "Server" and not self.server_doc.is_for_recovery,
+ start_mariadb_after_mount=not self.is_setup_db_replication,
+ cleanup_db_replication_files=cleanup_db_replication_files,
+ rotate_additional_volume_metadata=True,
+ )
+
+ @task(queue="short")
+ def check_cloud_init_status(self):
+ self.server_doc._wait_for_cloud_init()
+
+ @task(queue="long", timeout=1200)
+ def create_and_mount_volumes_hetzner(self):
+ if self.server_doc.provider != "Hetzner" or not self.virtual_machine:
+ return
+
+ if not self.virtual_machine_doc.virtual_machine_image:
+ return
+
+ vmi: VirtualMachineImage = frappe.get_doc(
+ "Virtual Machine Image", self.virtual_machine_doc.virtual_machine_image
+ )
+ if not vmi.has_data_volume:
+ return
+
+ server = self.server_doc
+ if server.plan:
+ data_disk_size = int(frappe.db.get_value("Server Plan", server.plan, "disk"))
+ else:
+ data_disk_size = 25
+
+ self.virtual_machine_doc.attach_new_volume(data_disk_size)
+
+ max_sync_tries = 100
+ while max_sync_tries:
+ try:
+ self.virtual_machine_doc.sync()
+ break
+ except Exception as e:
+ max_sync_tries -= 1
+ if max_sync_tries <= 0:
+ raise e
+
+ server.validate_mounts()
+ server.save(ignore_version=True)
+ server.mount_volumes(now=True)
+
+ @task(queue="long", timeout=1200)
+ def configure_apps_for_mounts_hetzner(self):
+ server = self.server_doc
+ if server.provider != "Hetzner" or not getattr(server, "has_data_volume", False):
+ return
+
+ if server.doctype == "Server":
+ server.setup_docker(now=True)
+ elif server.doctype == "Database Server":
+ server.set_mariadb_mount_dependency(now=True)
+
+ @task
+ def update_tls_certificate(self):
+ self.server_doc.update_tls_certificate(throw_on_failure=True)
+
+ @task
+ def update_agent(self):
+ self.server_doc._update_agent_ansible(throw_on_failure=True)
+
+ @task(queue="long", timeout=1800)
+ def upgrade_mariadb(self):
+ if self.server_type == "Database Server":
+ play = self.server_doc._upgrade_mariadb()
+ if play.status != "Success":
+ raise Exception("Failed to upgrade MariaDB")
+
+ if self.server_type == "Server" and self.server_doc.is_unified_server:
+ database_server: DatabaseServer = frappe.get_doc(
+ "Database Server", self.server_doc.database_server
+ )
+ database_server._upgrade_mariadb()
+
+ @task(queue="long", timeout=1200)
+ def prepare_mariadb_replica(self):
+ if not self.is_setup_db_replication:
+ return
+
+ self.server_doc._prepare_mariadb_replica(throw_on_failure=True)
+
+ @task
+ def configure_mariadb_replica(self):
+ if not self.is_setup_db_replication:
+ return
+
+ self.server_doc.configure_replication()
+
+ @task
+ def start_mariadb_replica(self):
+ if not self.is_setup_db_replication:
+ return
+
+ self.server_doc.start_replication()
+
+ @task
+ def set_additional_config(self):
+ self.server_doc.set_additional_config()
+
+ @task
+ def share_benches_over_nfs(self):
+ if self.server.startswith("fs") and self.server_type == "Server":
+ primary_server = frappe.db.get_value("Server", self.server, "primary")
+ nfs_volume_attachment = frappe.get_doc(
+ {"doctype": "NFS Volume Attachment", "primary_server": primary_server}
+ )
+ nfs_volume_attachment.insert(ignore_permissions=True)
+
+ # Callbacks
+ def on_press_job_success(self, _):
+ args = self.arguments_dict
+
+ # Mark provisioning flag of the server
+ if self.server_type in ["Server", "Database Server"]:
+ self.server_doc.is_provisioning_press_job_completed = 1
+ self.server_doc.save(ignore_permissions=True)
+
+ # In case of unified server, also mark linked database server as provisioned
+ if self.server_type == "Server" and self.server_doc.is_unified_server:
+ frappe.db.set_value(
+ "Database Server",
+ self.server_doc.database_server,
+ "is_provisioning_press_job_completed",
+ 1,
+ update_modified=False,
+ )
+
+ # Update "Server Snapshot Recovery" record if this server is being provisioned for recovery
+ if self.server_type in ["Server", "Database Server"] and self.server_doc.is_for_recovery:
+ recovery_record_name = frappe.db.get_value(
+ "Server Snapshot Recovery",
+ {
+ "app_server" if self.server_type == "Server" else "database_server": self.server,
+ },
+ "name",
+ )
+ if recovery_record_name:
+ recovery_record = frappe.get_doc(
+ "Server Snapshot Recovery", recovery_record_name, for_update=True
+ )
+ if self.server_type == "Server":
+ recovery_record.is_app_server_ready = True
+ else:
+ recovery_record.is_database_server_ready = True
+ recovery_record.save()
+
+ # Resume logical replication backup if it was setup as part of server provisioning
+ if self.server_type in ["Server", "Database Server"] and "logical_replication_backup" in args:
+ frappe.get_doc("Logical Replication Backup", args.get("logical_replication_backup")).next()
+
+ def on_press_job_failure(self, _):
+ if self.server_type not in ["Server", "Database Server"]:
+ return
+
+ # Mark Server Snapshot Recovery as failed if the server provisioning was for recovery
+ if self.server_doc.is_for_recovery:
+ recovery_record_name = frappe.db.get_value(
+ "Server Snapshot Recovery",
+ {"app_server" if self.server_type == "Server" else "database_server": self.server},
+ "name",
+ )
+ if recovery_record_name:
+ recovery_record: ServerSnapshotRecovery = frappe.get_doc(
+ "Server Snapshot Recovery", recovery_record_name, for_update=True
+ )
+ recovery_record.mark_server_provisioning_as_failed()
+
+ # Mark logical replication backup as failed if it was setup as part of server provisioning
+ if "logical_replication_backup" in self.arguments_dict:
+ frappe.get_doc(
+ "Logical Replication Backup", self.arguments_dict.get("logical_replication_backup")
+ ).fail()
diff --git a/press/press/doctype/press_job/jobs/create_server_snapshot.py b/press/press/doctype/press_job/jobs/create_server_snapshot.py
new file mode 100644
index 00000000000..da5e7502749
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/create_server_snapshot.py
@@ -0,0 +1,68 @@
+from contextlib import suppress
+from typing import TYPE_CHECKING
+
+import frappe
+
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+if TYPE_CHECKING:
+ from press.press.doctype.virtual_machine_image.virtual_machine_image import VirtualMachineImage
+
+
+class CreateServerSnapshotJob(PressJob):
+ @flow
+ def execute(self):
+ self.stop_virtual_machine()
+ self.wait_for_virtual_machine_to_stop()
+ self.create_snapshot()
+ self.start_virtual_machine()
+ self.wait_for_virtual_machine_to_start()
+ self.wait_for_snapshot_complete()
+
+ @task
+ def stop_virtual_machine(self):
+ machine = self.virtual_machine_doc
+ machine.stop()
+
+ @task
+ def wait_for_virtual_machine_to_stop(self):
+ with suppress(Exception):
+ self.virtual_machine_doc.sync()
+
+ if self.virtual_machine_doc.status == "Stopped":
+ return
+
+ self.defer_current_task()
+
+ @task
+ def create_snapshot(self):
+ machine = self.virtual_machine_doc
+ self.kv.set("image", machine.create_image())
+
+ @task
+ def start_virtual_machine(self):
+ try:
+ self.virtual_machine_doc.start()
+ except Exception:
+ self.defer_current_task()
+
+ @task
+ def wait_for_virtual_machine_to_start(self):
+ with suppress(Exception):
+ self.virtual_machine_doc.sync()
+
+ if self.virtual_machine_doc.status == "Running":
+ return
+
+ self.defer_current_task()
+
+ @task
+ def wait_for_snapshot_complete(self):
+ image_name = self.kv.get("image")
+ image: VirtualMachineImage = frappe.get_doc("Virtual Machine Image", image_name) # type: ignore
+ image.sync()
+ if image.status == "Available":
+ return
+
+ self.defer_current_task()
diff --git a/press/press/doctype/press_job/jobs/increase_disk_size.py b/press/press/doctype/press_job/jobs/increase_disk_size.py
new file mode 100644
index 00000000000..c3679df1601
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/increase_disk_size.py
@@ -0,0 +1,106 @@
+from contextlib import suppress
+
+import frappe
+
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class IncreaseDiskSizeJob(PressJob):
+ @flow
+ def execute(self):
+ self.increase_disk_size()
+
+ provider = self.server_doc.provider
+ if provider == "AWS EC2":
+ self.wait_for_partition_to_resize_for_aws_ec2()
+
+ elif provider == "OCI":
+ self.wait_for_server_to_start_start_oci()
+ self.wait_for_server_to_be_accessible_oci()
+ self.add_glass_file_oci()
+
+ if self.server_type == "Server":
+ self.restart_active_benches()
+
+ @task
+ def increase_disk_size(self):
+ mountpoint = self.arguments_dict.labels.get("mountpoint")
+ self.server_doc.calculated_increase_disk_size(mountpoint=mountpoint)
+
+ if not frappe.db.get_value(self.server_type, self.server, "auto_increase_storage"):
+ return
+
+ @task
+ def wait_for_partition_to_resize_for_aws_ec2(self):
+ """Wait for partition to resize (AWS)"""
+ if self.server_doc.provider != "AWS EC2":
+ return
+
+ plays = frappe.get_all(
+ "Ansible Play",
+ {"server": self.server, "play": "Extend EC2 Volume"},
+ ["status"],
+ order_by="creation desc",
+ limit=1,
+ )
+ if not plays:
+ self.defer_current_task()
+
+ if plays[0].status == "Success":
+ return
+
+ if plays[0].status == "Failure":
+ raise Exception("Failed to extend EC2 volume")
+
+ self.defer_current_task()
+
+ @task
+ def wait_for_server_to_start_start_oci(self):
+ """Wait for server to start (OCI)"""
+ if self.server_doc.provider != "OCI":
+ return
+
+ with suppress(Exception):
+ self.virtual_machine_doc.sync()
+
+ if self.virtual_machine_doc.status == "Running":
+ return
+
+ self.defer_current_task()
+
+ @task(queue="long", timeout=600)
+ def wait_for_server_to_be_accessible_oci(self):
+ """Wait for server to be accessible (OCI)"""
+ if self.server_doc.provider != "OCI":
+ return
+
+ play = self.server_doc.ping_ansible()
+ if play and play.status == "Success":
+ return
+
+ self.defer_current_task()
+
+ @task
+ def add_glass_file_oci(self):
+ """Add glass file back (OCI)"""
+ if self.server_doc.provider != "OCI":
+ return
+
+ self.server_doc._add_glass_file()
+
+ @task
+ def restart_active_benches(self):
+ if self.server_type != "Server":
+ return
+
+ self.server_doc._start_active_benches(
+ benches=frappe.get_all(
+ "Bench",
+ {
+ "server": self.server,
+ "status": "Active",
+ },
+ pluck="name",
+ )
+ )
diff --git a/press/press/doctype/press_job/jobs/increase_swap.py b/press/press/doctype/press_job/jobs/increase_swap.py
new file mode 100644
index 00000000000..2cedc70b71a
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/increase_swap.py
@@ -0,0 +1,27 @@
+from contextlib import suppress
+
+import frappe
+
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class IncreaseSwapJob(PressJob):
+ @flow
+ def execute(self):
+ with suppress(Exception):
+ self.send_telegram_notification()
+
+ self.add_swap_on_server()
+
+ @task
+ def send_telegram_notification(self):
+ telegram_message = frappe.get_doc("Press Settings").telegram_message
+ telegram_message.enqueue(
+ f"Increasing swap on [{self.server}]({frappe.utils.get_url_to_form(self.server_type, self.server)})",
+ "Information",
+ )
+
+ @task(queue="long", timeout=1200)
+ def add_swap_on_server(self):
+ self.server_doc.increase_swap_locked(swap_size=4, throw_on_failure=True)
diff --git a/press/press/doctype/press_job/jobs/prune_docker_system.py b/press/press/doctype/press_job/jobs/prune_docker_system.py
new file mode 100644
index 00000000000..e571b9b490f
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/prune_docker_system.py
@@ -0,0 +1,27 @@
+from contextlib import suppress
+
+import frappe
+
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class PruneDockerSystemJob(PressJob):
+ @flow
+ def execute(self):
+ with suppress(Exception):
+ self.send_telegram_notification()
+
+ self.prune_docker_system()
+
+ @task
+ def send_telegram_notification(self):
+ telegram_message = frappe.get_doc("Press Settings").telegram_message
+ telegram_message.enqueue(
+ f"Pruning docker cache on [{self.server}]({frappe.utils.get_url_to_form(self.server_type, self.server)})",
+ "Information",
+ )
+
+ @task(queue="long", timeout=8000)
+ def prune_docker_system(self):
+ self.server_doc._prune_docker_system(throw_on_failure=True)
diff --git a/press/press/doctype/press_job/jobs/prune_mirror_registry.py b/press/press/doctype/press_job/jobs/prune_mirror_registry.py
new file mode 100644
index 00000000000..151598e283f
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/prune_mirror_registry.py
@@ -0,0 +1,15 @@
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class PruneMirrorRegistryJob(PressJob):
+ @flow
+ def execute(self):
+ if self.server_type != "Registry Server":
+ return
+
+ self.prune_mirror_registry()
+
+ @task(queue="long", timeout=3600)
+ def prune_mirror_registry(self):
+ self.server_doc._prune_mirror_registry(throw_on_failure=True)
diff --git a/press/press/doctype/press_job/jobs/remove_on_prem_failover.py b/press/press/doctype/press_job/jobs/remove_on_prem_failover.py
new file mode 100644
index 00000000000..5f221b1b7c7
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/remove_on_prem_failover.py
@@ -0,0 +1,58 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import frappe
+
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+if TYPE_CHECKING:
+ from press.press.doctype.on_prem_failover.on_prem_failover import OnPremFailover
+
+
+class RemoveOnPremFailoverJob(PressJob):
+ @flow
+ def execute(self):
+ self.remove_app_server_from_firewall()
+ self.remove_db_server_from_firewall()
+ self.stop_replication_from_app_server()
+ self.stop_replication_from_db_server()
+ self.delete_firewall()
+
+ @property
+ def failover_doc(self) -> OnPremFailover:
+ if not hasattr(self, "_on_prem_failover_doc") or not self._on_prem_failover_doc: # type: ignore
+ self._on_prem_failover_doc = frappe.get_doc("On-Prem Failover", self.arguments_dict.failover)
+ return self._on_prem_failover_doc # type: ignore
+
+ @task
+ def remove_app_server_from_firewall(self):
+ """Remove Wireguard Port Access from App Server"""
+ self.failover_doc.remove_app_server_from_firewall()
+
+ @task
+ def remove_db_server_from_firewall(self):
+ """Remove Wireguard Port Access from DB Server"""
+ self.failover_doc.remove_db_server_from_firewall()
+
+ @task(queue="long", timeout=1800)
+ def stop_replication_from_app_server(self):
+ """Stop Replication from App Server"""
+ self.failover_doc._stop_replication_from_app_server()
+
+ @task(queue="long", timeout=1800)
+ def stop_replication_from_db_server(self):
+ """Stop Replication from DB Server"""
+ self.failover_doc._stop_replication_from_db_server()
+
+ @task
+ def delete_firewall(self):
+ """Delete Firewall"""
+ self.failover_doc.delete_firewall()
+
+ def on_press_job_success(self, _):
+ self.failover_doc.is_db_server_failover_setup = False
+ self.failover_doc.is_app_server_failover_setup = False
+ self.failover_doc.enabled = False
+ self.failover_doc.save()
diff --git a/press/press/doctype/press_job/jobs/reset_swap_on_server.py b/press/press/doctype/press_job/jobs/reset_swap_on_server.py
index fb444f1a0ce..8d96bdc196d 100644
--- a/press/press/doctype/press_job/jobs/reset_swap_on_server.py
+++ b/press/press/doctype/press_job/jobs/reset_swap_on_server.py
@@ -9,10 +9,6 @@
class ResetSwapOnServerJob(PressJob):
@flow
def execute(self):
- if self.status == "Pending":
- self.status = "Running"
- self.save()
-
with suppress(Exception):
self.send_telegram_notification()
diff --git a/press/press/doctype/press_job/jobs/resize_server.py b/press/press/doctype/press_job/jobs/resize_server.py
new file mode 100644
index 00000000000..19cfa0f6c24
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/resize_server.py
@@ -0,0 +1,101 @@
+from __future__ import annotations
+
+from contextlib import suppress
+from typing import TYPE_CHECKING
+
+import frappe
+
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+if TYPE_CHECKING:
+ from press.press.doctype.database_server.database_server import DatabaseServer
+ from press.press.doctype.server.server import Server
+
+
+class ResizeServerJob(PressJob):
+ @flow
+ def execute(self):
+ self.stop_virtual_machine()
+ self.wait_for_virtual_machine_to_stop()
+
+ self.resize_virtual_machine()
+
+ self.start_virtual_machine()
+ self.wait_for_virtual_machine_to_start()
+
+ self.wait_for_server_to_be_accessible()
+ self.set_additional_config()
+ self.increase_disk_size()
+
+ @task
+ def stop_virtual_machine(self):
+ self.virtual_machine_doc.stop()
+
+ @task
+ def wait_for_virtual_machine_to_stop(self):
+ with suppress(Exception):
+ self.virtual_machine_doc.sync()
+
+ if self.virtual_machine_doc.status == "Stopped":
+ return
+
+ self.defer_current_task()
+
+ @task
+ def resize_virtual_machine(self):
+ self.virtual_machine_doc.resize(
+ self.arguments_dict.machine_type, self.arguments_dict.get("upgrade_disk", False)
+ )
+
+ @task
+ def start_virtual_machine(self):
+ try:
+ if self.virtual_machine_doc.status != "Running":
+ self.virtual_machine_doc.start()
+ except Exception:
+ self.defer_current_task()
+
+ @task
+ def wait_for_virtual_machine_to_start(self):
+ with suppress(Exception):
+ self.virtual_machine_doc.sync()
+
+ if self.virtual_machine_doc.status == "Running":
+ return
+
+ self.defer_current_task()
+
+ @task
+ def wait_for_server_to_be_accessible(self):
+ play = self.server_doc.ping_ansible()
+ if not play or play.status != "Success":
+ self.defer_current_task()
+
+ @task
+ def set_additional_config(self):
+ if self.server_type not in ["Server", "Database Server"]:
+ return
+
+ if self.server_type == "Server" and self.server_doc.is_unified_server:
+ server_doc: Server = frappe.get_doc("Server", self.server)
+ server_doc.auto_scale_workers()
+ db_doc: DatabaseServer = frappe.get_doc("Database Server", self.server)
+ db_doc.adjust_memory_config()
+ else:
+ if self.server_type == "Database Server":
+ self.server_doc.adjust_memory_config()
+ elif self.server_type == "Server":
+ self.server_doc.auto_scale_workers()
+
+ @task
+ def increase_disk_size(self):
+ if not self.server_doc.plan:
+ return
+
+ plan_disk_size = frappe.db.get_value("Server Plan", self.server_doc.plan, "disk")
+ if not plan_disk_size or plan_disk_size <= self.virtual_machine_doc.disk_size:
+ return
+
+ with suppress(Exception):
+ self.server_doc.increase_disk_size(increment=plan_disk_size - self.virtual_machine_doc.disk_size)
diff --git a/press/press/doctype/press_job/jobs/resume_services_after_snapshot.py b/press/press/doctype/press_job/jobs/resume_services_after_snapshot.py
new file mode 100644
index 00000000000..7b8ce7b0138
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/resume_services_after_snapshot.py
@@ -0,0 +1,47 @@
+import frappe
+
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class ResumeServicesAfterSnapshotJob(PressJob):
+ @flow
+ def execute(self):
+ self.start_docker_daemon()
+ self.start_mariadb_service()
+
+ @task(queue="long", timeout=1200)
+ def start_docker_daemon(self):
+ server_snapshot = self.arguments_dict.get("server_snapshot")
+
+ if self.server_type == "Server" and self.arguments_dict.get("is_consistent_snapshot", False):
+ server = frappe.get_doc("Server", self.server)
+ output = server.ansible_run("systemctl start docker")
+ if not (output and output.get("status") == "Success"):
+ raise Exception("Failed to start docker daemon")
+
+ frappe.db.set_value(
+ "Server Snapshot",
+ server_snapshot,
+ "app_server_services_started",
+ True,
+ update_modified=False,
+ )
+
+ @task(queue="long", timeout=3600)
+ def start_mariadb_service(self):
+ server_snapshot = self.arguments_dict.get("server_snapshot")
+
+ if self.server_type == "Database Server" and self.arguments_dict.get("is_consistent_snapshot", False):
+ server = frappe.get_doc("Database Server", self.server)
+ output = server.ansible_run("systemctl start mariadb")
+ if not (output and output.get("status") == "Success"):
+ raise Exception("Failed to start mariadb service")
+
+ frappe.db.set_value(
+ "Server Snapshot",
+ server_snapshot,
+ "database_server_services_started",
+ True,
+ update_modified=False,
+ )
diff --git a/press/press/doctype/press_job/jobs/setup_on_prem_failover.py b/press/press/doctype/press_job/jobs/setup_on_prem_failover.py
new file mode 100644
index 00000000000..b89c885334d
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/setup_on_prem_failover.py
@@ -0,0 +1,100 @@
+from __future__ import annotations
+
+import time
+from typing import TYPE_CHECKING
+
+import frappe
+
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+if TYPE_CHECKING:
+ from press.press.doctype.on_prem_failover.on_prem_failover import OnPremFailover
+
+
+class SetupOnPremFailoverJob(PressJob):
+ @flow
+ def execute(self):
+ self.add_app_server_to_firewall()
+ self.add_db_server_to_firewall()
+ self.setup_wireguard_on_app_server()
+ self.setup_wireguard_on_db_server()
+ self.test_connectivity()
+ self.setup_replication_for_app_server()
+ self.setup_db_lsync_for_initial_sync()
+ self.wait_for_initial_db_sync()
+ self.rsync_new_db_files()
+ self.setup_replica_in_on_prem_server()
+
+ @property
+ def failover_doc(self) -> OnPremFailover:
+ if not hasattr(self, "_on_prem_failover_doc") or not self._on_prem_failover_doc: # type: ignore
+ self._on_prem_failover_doc = frappe.get_doc("On-Prem Failover", self.arguments_dict.failover)
+ return self._on_prem_failover_doc # type: ignore
+
+ @task
+ def add_app_server_to_firewall(self):
+ """Allow Wireguard Port Through Security Group on App Server"""
+ self.failover_doc.add_app_server_to_firewall()
+
+ @task
+ def add_db_server_to_firewall(self):
+ """Allow Wireguard Port Through Security Group on DB Server"""
+ self.failover_doc.add_db_server_to_firewall()
+
+ @task
+ def setup_wireguard_on_app_server(self):
+ """Setup Wireguard on App Server"""
+ self.failover_doc.setup_wireguard_on_app_server()
+
+ @task
+ def setup_wireguard_on_db_server(self):
+ """Setup Wireguard on DB Server"""
+ self.failover_doc.setup_wireguard_on_database_server()
+
+ @task(queue="long", timeout=600)
+ def test_connectivity(self):
+ """Test Connectivity to On-Prem Server"""
+ self.failover_doc.check_connectivity_to_on_premise_server()
+ self.failover_doc.reload()
+
+ if (
+ self.failover_doc.is_on_prem_server_ssh_from_app_server_working
+ and self.failover_doc.is_on_prem_server_ssh_from_db_server_working
+ ):
+ return
+
+ self.defer_current_task()
+
+ @task(queue="long", timeout=3600)
+ def setup_replication_for_app_server(self):
+ """Setup Replication for App Server"""
+ self.failover_doc._setup_app_server_replica()
+
+ @task(queue="long", timeout=3600)
+ def setup_db_lsync_for_initial_sync(self):
+ """Setup Lsyncd For Initial DB Sync"""
+ self.failover_doc._setup_db_lsync_for_initial_sync()
+
+ @task
+ def wait_for_initial_db_sync(self):
+ if (
+ self.failover_doc.db_lsyncd_stop_at
+ and frappe.utils.now_datetime() > self.failover_doc.db_lsyncd_stop_at
+ ):
+ return
+ time.sleep(1)
+ self.defer_current_task()
+
+ @task(queue="long", timeout=3600)
+ def rsync_new_db_files(self):
+ self.failover_doc._setup_db_rsync_for_final_sync()
+
+ @task(queue="long", timeout=3600)
+ def setup_replica_in_on_prem_server(self):
+ self.failover_doc._setup_and_configure_database_replica()
+
+ def on_press_job_success(self, _):
+ self.failover_doc.is_db_server_failover_setup = True
+ self.failover_doc.is_app_server_failover_setup = True
+ self.failover_doc.save()
diff --git a/press/press/doctype/press_job/jobs/snapshot_disk.py b/press/press/doctype/press_job/jobs/snapshot_disk.py
new file mode 100644
index 00000000000..8b6356aebed
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/snapshot_disk.py
@@ -0,0 +1,106 @@
+from __future__ import annotations
+
+from contextlib import suppress
+from typing import TYPE_CHECKING
+
+import frappe
+
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+if TYPE_CHECKING:
+ from press.press.doctype.server_snapshot.server_snapshot import ServerSnapshot
+
+
+class SnapshotDiskJob(PressJob):
+ @flow
+ def execute(self):
+ self.verify_virtual_machine_status()
+
+ if self.is_consistent_snapshot:
+ if self.server_type == "Server":
+ self.stop_docker_daemon()
+ if self.server_type == "Database Server":
+ self.stop_mariadb_service()
+
+ self.flush_file_system_buffers()
+ self.snapshot_disk()
+
+ @property
+ def is_consistent_snapshot(self):
+ return self.arguments_dict.get("is_consistent_snapshot", False)
+
+ @task
+ def verify_virtual_machine_status(self):
+ try:
+ self.virtual_machine_doc.sync()
+ except Exception:
+ self.defer_current_task()
+
+ if self.virtual_machine_doc.status == "Terminated":
+ raise Exception("Can't snapshot terminated virtual machine")
+
+ if self.virtual_machine_doc.status == "Draft":
+ raise Exception("Can't snapshot draft virtual machine")
+
+ @task
+ def stop_docker_daemon(self):
+ if not (self.server_type == "Server" and self.is_consistent_snapshot):
+ return
+
+ output = self.server_doc.ansible_run("systemctl stop docker")
+ if not (output and output.get("status") == "Success"):
+ raise Exception("Failed to stop docker daemon")
+
+ @task
+ def stop_mariadb_service(self):
+ if not (self.server_type == "Database Server" and self.is_consistent_snapshot):
+ return
+
+ output = self.server_doc.ansible_run("systemctl stop mariadb")
+ if not (output and output.get("status") == "Success"):
+ raise Exception("Failed to stop mariadb service")
+
+ @task
+ def flush_file_system_buffers(self):
+ output = self.server_doc.ansible_run("sync")
+ if not (output and output.get("status") == "Success"):
+ raise Exception("Failed to flush file system buffers to disk")
+
+ @task
+ def snapshot_disk(self):
+ machine = self.virtual_machine_doc
+ machine.create_snapshots(exclude_boot_volume=True, dedicated_snapshot=True)
+
+ field_name = "app_server_snapshot" if self.server_type == "Server" else "database_server_snapshot"
+ no_of_snapshots = len(machine.flags.created_snapshots)
+ if no_of_snapshots != 1:
+ raise Exception(f"Expected 1 disk snapshot. Found: {no_of_snapshots}")
+
+ frappe.db.set_value(
+ "Server Snapshot",
+ self.arguments_dict.get("server_snapshot"),
+ field_name,
+ machine.flags.created_snapshots[0],
+ update_modified=False,
+ )
+
+ def _resume_services(self) -> ServerSnapshot:
+ snapshot = frappe.get_doc("Server Snapshot", self.arguments_dict.get("server_snapshot"))
+ if self.server_type == "Server":
+ snapshot.resume_app_server_services()
+ elif self.server_type == "Database Server":
+ snapshot.resume_database_server_services()
+
+ return snapshot
+
+ def on_press_job_success(self, workflow):
+ snapshot = self._resume_services()
+ snapshot.sync(now=False)
+
+ def on_press_job_failure(self, workflow):
+ snapshot = self._resume_services()
+ frappe.db.set_value("Server Snapshot", snapshot.name, "status", "Failure", update_modified=False)
+ for s in snapshot.snapshots:
+ with suppress(Exception):
+ frappe.get_doc("Virtual Disk Snapshot", s).delete_snapshot(ignore_validation=True)
diff --git a/press/press/doctype/press_job/jobs/stop_and_start_server.py b/press/press/doctype/press_job/jobs/stop_and_start_server.py
new file mode 100644
index 00000000000..74d93d7502b
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/stop_and_start_server.py
@@ -0,0 +1,50 @@
+from contextlib import suppress
+
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class StopAndStartServerJob(PressJob):
+ @flow
+ def execute(self):
+ self.stop_virtual_machine()
+ self.wait_for_virtual_machine_to_stop()
+
+ self.start_virtual_machine()
+ self.wait_for_virtual_machine_to_start()
+
+ self.wait_for_server_to_be_accessible()
+
+ @task
+ def stop_virtual_machine(self):
+ self.virtual_machine_doc.stop()
+
+ @task
+ def wait_for_virtual_machine_to_stop(self):
+ with suppress(Exception):
+ self.virtual_machine_doc.sync()
+
+ if self.virtual_machine_doc.status == "Stopped":
+ return
+
+ self.defer_current_task()
+
+ @task
+ def start_virtual_machine(self):
+ self.virtual_machine_doc.start()
+
+ @task
+ def wait_for_virtual_machine_to_start(self):
+ with suppress(Exception):
+ self.virtual_machine_doc.sync()
+
+ if self.virtual_machine_doc.status == "Running":
+ return
+
+ self.defer_current_task()
+
+ @task
+ def wait_for_server_to_be_accessible(self):
+ play = self.server_doc.ping_ansible()
+ if not play or play.status != "Success":
+ self.defer_current_task()
diff --git a/press/press/doctype/press_job/jobs/trigger_build_server_cleanup.py b/press/press/doctype/press_job/jobs/trigger_build_server_cleanup.py
new file mode 100644
index 00000000000..42bd972444f
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/trigger_build_server_cleanup.py
@@ -0,0 +1,18 @@
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class TriggerBuildServerCleanupJob(PressJob):
+ @flow
+ def execute(self):
+ if self.server_type != "Server" or not self.server_doc.use_for_build:
+ return
+
+ self.trigger_build_server_cleanup()
+
+ @task
+ def trigger_build_server_cleanup(self):
+ if not self.server_doc.use_for_build:
+ return
+
+ self.server_doc.prune_docker_system()
diff --git a/press/press/doctype/press_job/jobs/upgrade_mariadb.py b/press/press/doctype/press_job/jobs/upgrade_mariadb.py
new file mode 100644
index 00000000000..831fe61cb3a
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/upgrade_mariadb.py
@@ -0,0 +1,27 @@
+import frappe
+
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class UpgradeMariaDBJob(PressJob):
+ @flow
+ def execute(self):
+ self.stop_mariadb()
+ self.create_server_snapshot()
+ self.upgrade_mariadb()
+
+ @task(queue="long", timeout=1800)
+ def stop_mariadb(self):
+ self.server_doc._stop_mariadb(throw_on_failure=True)
+
+ @task
+ def create_server_snapshot(self):
+ self.virtual_machine_doc.create_snapshots()
+
+ snapshot = frappe.get_last_doc("Virtual Disk Snapshot", {"virtual_machine": self.virtual_machine})
+ snapshot.add_comment(text="Before MariaDB Upgrade")
+
+ @task(queue="long", timeout=1800)
+ def upgrade_mariadb(self):
+ self.server_doc._upgrade_mariadb(throw_on_failure=True)
diff --git a/press/press/doctype/press_job/jobs/warn_disk.py b/press/press/doctype/press_job/jobs/warn_disk.py
new file mode 100644
index 00000000000..a7823982428
--- /dev/null
+++ b/press/press/doctype/press_job/jobs/warn_disk.py
@@ -0,0 +1,13 @@
+from press.press.doctype.press_job.press_job import PressJob
+from press.workflow_engine.doctype.press_workflow.decorators import flow, task
+
+
+class WarnDiskJob(PressJob):
+ @flow
+ def execute(self):
+ self.send_warning()
+
+ @task
+ def send_warning(self):
+ mountpoint = self.arguments_dict.labels.get("mountpoint")
+ self.server_doc.recommend_disk_increase(mountpoint=mountpoint)
diff --git a/press/press/doctype/press_job/press_job.json b/press/press/doctype/press_job/press_job.json
index 57e58006a72..546418bf126 100644
--- a/press/press/doctype/press_job/press_job.json
+++ b/press/press/doctype/press_job/press_job.json
@@ -15,8 +15,9 @@
"section_break_7",
"server_type",
"server",
+ "virtual_machine",
"column_break_fhyz",
- "virtual_machine"
+ "arguments"
],
"fields": [
{
@@ -100,6 +101,14 @@
{
"fieldname": "column_break_fhyz",
"fieldtype": "Column Break"
+ },
+ {
+ "default": "{}",
+ "fieldname": "arguments",
+ "fieldtype": "Small Text",
+ "label": "Arguments",
+ "reqd": 1,
+ "set_only_once": 1
}
],
"grid_page_length": 50,
@@ -114,7 +123,7 @@
"link_fieldname": "linked_docname"
}
],
- "modified": "2026-04-16 22:33:03.958588",
+ "modified": "2026-04-16 23:16:27.885432",
"modified_by": "Administrator",
"module": "Press",
"name": "Press Job",
diff --git a/press/press/doctype/press_job/press_job.py b/press/press/doctype/press_job/press_job.py
index f019bbd417d..1e416ddc8e0 100644
--- a/press/press/doctype/press_job/press_job.py
+++ b/press/press/doctype/press_job/press_job.py
@@ -1,7 +1,9 @@
-from __future__ import annotations
-
# Copyright (c) 2022, Frappe and contributors
# For license information, please see license.txt
+from __future__ import annotations
+
+import json
+from functools import cached_property
from typing import TYPE_CHECKING
import frappe
@@ -23,10 +25,60 @@ def _init_jobs_registry() -> None:
if _JOBS_REGISTRY:
return
+ from press.press.doctype.press_job.jobs.archive_server import ArchiveServerJob
+ from press.press.doctype.press_job.jobs.attach_volume import AttachVolumeJob
+ from press.press.doctype.press_job.jobs.auto_scale_application_server import (
+ AutoScaleApplicationServerJob,
+ )
+ from press.press.doctype.press_job.jobs.auto_scale_down_application_server import (
+ AutoScaleDownApplicationServerJob,
+ )
+ from press.press.doctype.press_job.jobs.auto_scale_up_application_server import (
+ AutoScaleUpApplicationServerJob,
+ )
+ from press.press.doctype.press_job.jobs.create_server import CreateServerJob
+ from press.press.doctype.press_job.jobs.create_server_snapshot import CreateServerSnapshotJob
+ from press.press.doctype.press_job.jobs.increase_disk_size import IncreaseDiskSizeJob
+ from press.press.doctype.press_job.jobs.increase_swap import IncreaseSwapJob
+ from press.press.doctype.press_job.jobs.prune_docker_system import PruneDockerSystemJob
+ from press.press.doctype.press_job.jobs.prune_mirror_registry import PruneMirrorRegistryJob
+ from press.press.doctype.press_job.jobs.remove_on_prem_failover import RemoveOnPremFailoverJob
from press.press.doctype.press_job.jobs.reset_swap_on_server import ResetSwapOnServerJob
+ from press.press.doctype.press_job.jobs.resize_server import ResizeServerJob
+ from press.press.doctype.press_job.jobs.resume_services_after_snapshot import (
+ ResumeServicesAfterSnapshotJob,
+ )
+ from press.press.doctype.press_job.jobs.setup_on_prem_failover import SetupOnPremFailoverJob
+ from press.press.doctype.press_job.jobs.snapshot_disk import SnapshotDiskJob
+ from press.press.doctype.press_job.jobs.stop_and_start_server import StopAndStartServerJob
+ from press.press.doctype.press_job.jobs.trigger_build_server_cleanup import (
+ TriggerBuildServerCleanupJob,
+ )
+ from press.press.doctype.press_job.jobs.upgrade_mariadb import UpgradeMariaDBJob
+ from press.press.doctype.press_job.jobs.warn_disk import WarnDiskJob
_JOBS_REGISTRY = {
+ "Archive Server": ArchiveServerJob,
+ "Attach Volume": AttachVolumeJob,
+ "Auto Scale Application Server": AutoScaleApplicationServerJob,
+ "Auto Scale Down Application Server": AutoScaleDownApplicationServerJob,
+ "Auto Scale Up Application Server": AutoScaleUpApplicationServerJob,
+ "Create Server": CreateServerJob,
+ "Create Server Snapshot": CreateServerSnapshotJob,
+ "Increase Disk Size": IncreaseDiskSizeJob,
+ "Increase Swap": IncreaseSwapJob,
+ "Prune Docker system": PruneDockerSystemJob,
+ "Prune Mirror Registry": PruneMirrorRegistryJob,
+ "Remove On-Prem Failover": RemoveOnPremFailoverJob,
"Reset Swap": ResetSwapOnServerJob,
+ "Resize Server": ResizeServerJob,
+ "Resume Services After Snapshot": ResumeServicesAfterSnapshotJob,
+ "Setup On-Prem Failover": SetupOnPremFailoverJob,
+ "Snapshot Disk": SnapshotDiskJob,
+ "Stop and Start Server": StopAndStartServerJob,
+ "Trigger Build Server Cleanup": TriggerBuildServerCleanupJob,
+ "Upgrade MariaDB": UpgradeMariaDBJob,
+ "Warn disk at 80%": WarnDiskJob,
}
@@ -39,6 +91,7 @@ class PressJob(WorkflowBuilder):
if TYPE_CHECKING:
from frappe.types import DF
+ arguments: DF.SmallText
duration: DF.Duration | None
end: DF.Datetime | None
job_type: DF.Link
@@ -50,6 +103,10 @@ class PressJob(WorkflowBuilder):
virtual_machine: DF.Link | None
# end: auto-generated types
+ @cached_property
+ def arguments_dict(self) -> "frappe._dict":
+ return frappe._dict(json.loads(self.get("arguments") or "{}"))
+
@property
def server_doc(self) -> "Server | DatabaseServer":
if hasattr(self, "_server_doc") and self._server_doc: # type: ignore
@@ -110,8 +167,13 @@ def __init__(self, *args, **kwargs):
self.__class__ = _JOBS_REGISTRY[self.job_type]
def start_workflow(self) -> str:
+ if self.status != "Pending":
+ frappe.throw("Only jobs with Pending status can be started")
+
if not hasattr(self, "execute"):
raise NotImplementedError("Press Job implementation must have an execute method")
+ self.start = now_datetime()
+ self.status = "Running"
return self.execute.run_as_workflow()
def on_workflow_success(self, workflow: "PressWorkflow"):
diff --git a/press/press/doctype/registry_server/registry_server.py b/press/press/doctype/registry_server/registry_server.py
index 9e71dca1979..e6d774d05bb 100644
--- a/press/press/doctype/registry_server/registry_server.py
+++ b/press/press/doctype/registry_server/registry_server.py
@@ -133,8 +133,9 @@ def prune_mirror_registry(self):
frappe.enqueue_doc(self.doctype, self.name, "_prune_mirror_registry", queue="long", timeout=3600)
- def _prune_mirror_registry(self):
+ def _prune_mirror_registry(self, throw_on_failure: bool = False):
try:
+ assert self.docker_data_mountpoint, "Docker data mountpoint is required to prune mirror registry"
ansible = Ansible(
playbook="prune_mirror_registry.yml",
server=self,
@@ -147,9 +148,15 @@ def _prune_mirror_registry(self):
"registry_container": "registry-registry-1",
},
)
- ansible.run()
+ play = ansible.run()
+ if play.status != "Success" and throw_on_failure:
+ frappe.throw("Failed to prune mirror registry") # nosemgrep
+ return play
except Exception:
log_error("Mirror Registry Prune Failed", server=self.as_dict())
+ if throw_on_failure:
+ frappe.throw("Failed to prune mirror registry") # nosemgrep
+ return None
@frappe.whitelist()
def show_registry_password(self):
diff --git a/press/press/doctype/server/server.py b/press/press/doctype/server/server.py
index 384e2ca7ea8..4f839cf5b33 100644
--- a/press/press/doctype/server/server.py
+++ b/press/press/doctype/server/server.py
@@ -871,7 +871,7 @@ def install_exporters(self):
frappe.enqueue_doc(self.doctype, self.name, "_install_exporters", queue="long", timeout=1200)
@frappe.whitelist()
- def ping_ansible(self):
+ def ping_ansible(self) -> AnsiblePlay | None:
try:
ansible = Ansible(
playbook="ping.yml",
@@ -879,15 +879,16 @@ def ping_ansible(self):
user=self._ssh_user(),
port=self._ssh_port(),
)
- ansible.run()
+ return ansible.run()
except Exception:
log_error("Server Ping Exception", server=self.as_dict())
+ return None
@frappe.whitelist()
def update_agent_ansible(self):
frappe.enqueue_doc(self.doctype, self.name, "_update_agent_ansible")
- def _update_agent_ansible(self):
+ def _update_agent_ansible(self, throw_on_failure: bool = False):
try:
agent_branch = frappe.get_value("Press Settings", "Press Settings", "branch")
if not agent_branch:
@@ -905,8 +906,12 @@ def _update_agent_ansible(self):
user=self._ssh_user(),
port=self._ssh_port(),
)
- ansible.run()
- except Exception:
+ play = ansible.run()
+ if throw_on_failure and play.status != "Success":
+ raise Exception("Failed to update agent")
+ except Exception as e:
+ if throw_on_failure:
+ raise e
log_error("Agent Update Exception", server=self.as_dict())
@frappe.whitelist()
@@ -1502,7 +1507,7 @@ def increase_swap(self, swap_size=4):
**{"swap_size": swap_size},
)
- def _increase_swap(self, swap_size=4):
+ def _increase_swap(self, swap_size=4, throw_on_failure: bool = False):
"""Increase swap by size defined"""
from press.api.server import calculate_swap
@@ -1520,13 +1525,19 @@ def _increase_swap(self, swap_size=4):
"swap_file": swap_file_name,
},
)
- ansible.run()
+ play = ansible.run()
+ if play.status != "Success" and throw_on_failure:
+ raise Exception("Failed to increase swap")
+ return play
except Exception:
+ if throw_on_failure:
+ raise
log_error("Increase swap exception", doc=self)
+ return None
- def increase_swap_locked(self, swap_size=4):
+ def increase_swap_locked(self, swap_size=4, throw_on_failure: bool = False):
with filelock(f"{self.name}-swap-update"):
- self._increase_swap(swap_size)
+ self._increase_swap(swap_size, throw_on_failure=throw_on_failure)
@frappe.whitelist()
def reset_swap(self, swap_size=1, now: bool = False):
@@ -1631,7 +1642,7 @@ def _set_swappiness(self):
log_error("Swappiness Setup Exception", doc=self)
@frappe.whitelist()
- def update_tls_certificate(self):
+ def update_tls_certificate(self, throw_on_failure: bool = False):
from press.press.doctype.tls_certificate.tls_certificate import (
update_server_tls_certifcate,
)
@@ -1648,7 +1659,7 @@ def update_tls_certificate(self):
certificate = frappe.get_last_doc("TLS Certificate", filters)
- update_server_tls_certifcate(self, certificate)
+ update_server_tls_certifcate(self, certificate, throw_on_failure=throw_on_failure)
@frappe.whitelist()
def show_agent_version(self) -> str:
@@ -2300,7 +2311,7 @@ def prune_docker_system(self):
timeout=8000,
)
- def _prune_docker_system(self):
+ def _prune_docker_system(self, throw_on_failure: bool = False):
try:
ansible = Ansible(
playbook="docker_system_prune.yml",
@@ -2308,9 +2319,15 @@ def _prune_docker_system(self):
user=self._ssh_user(),
port=self._ssh_port(),
)
- ansible.run()
+ play = ansible.run()
+ if play.status != "Success" and throw_on_failure:
+ frappe.throw("Failed to prune docker system") # nosemgrep
+ return play
except Exception:
log_error("Prune Docker System Exception", doc=self)
+ if throw_on_failure:
+ frappe.throw("Failed to prune docker system") # nosemgrep
+ return None
def get_nat_gateway_ip(self):
if hasattr(self, "nat_server") and self.nat_server:
diff --git a/press/press/doctype/tls_certificate/tls_certificate.py b/press/press/doctype/tls_certificate/tls_certificate.py
index fb5b3f109ed..6f2e4250ed2 100644
--- a/press/press/doctype/tls_certificate/tls_certificate.py
+++ b/press/press/doctype/tls_certificate/tls_certificate.py
@@ -414,7 +414,7 @@ def notify_custom_tls_renewal():
)
-def update_server_tls_certifcate(server, certificate):
+def update_server_tls_certifcate(server, certificate, throw_on_failure: bool = False):
try:
proxysql_admin_password = None
if server.doctype == "Proxy Server":
@@ -441,8 +441,10 @@ def update_server_tls_certifcate(server, certificate):
# to avoid causing TimestampMismatchError in other important tasks
update_modified=False,
)
- except Exception:
+ except Exception as e:
log_error("TLS Setup Exception", server=server.as_dict())
+ if throw_on_failure:
+ raise Exception(f"Failed to update TLS certificate on {server.doctype} {server.name}") from e
def retrigger_failed_wildcard_tls_callbacks():
diff --git a/press/workflow_engine/doctype/press_workflow/exceptions.py b/press/workflow_engine/doctype/press_workflow/exceptions.py
index 8174a391be0..0ecf212241b 100644
--- a/press/workflow_engine/doctype/press_workflow/exceptions.py
+++ b/press/workflow_engine/doctype/press_workflow/exceptions.py
@@ -1,11 +1,13 @@
# Copyright (c) 2026, Frappe and contributors
# For license information, please see license.txt
+from __future__ import annotations
+
class PressWorkflowTaskEnqueued(Exception):
"""Raised when a task is enqueued and the flow needs to be paused."""
- def __init__(self, message: str, workflow_name: str, task_name: str):
+ def __init__(self, message: str, workflow_name: str, task_name: str | None = None):
super().__init__(message)
self.workflow_name = workflow_name
self.task_name = task_name
diff --git a/press/workflow_engine/doctype/press_workflow/workflow_builder.py b/press/workflow_engine/doctype/press_workflow/workflow_builder.py
index a607cf7d57a..b9ea57cd914 100644
--- a/press/workflow_engine/doctype/press_workflow/workflow_builder.py
+++ b/press/workflow_engine/doctype/press_workflow/workflow_builder.py
@@ -189,3 +189,17 @@ def resolve_context(self) -> None:
if self.kv_store_type != "in_memory":
self.kv_store_type = "in_memory"
self.kv_store_reference = None
+
+ def defer_current_task(self, message: str = "User has requested to defer the task later.") -> None:
+ if not self.flags.in_press_workflow_execution:
+ return
+
+ assert self.workflow_name is not None, "Workflow name must be set to defer current task"
+
+ raise PressWorkflowTaskEnqueued(
+ "User has requested to retry the task later.",
+ self.workflow_name,
+ self.flags.current_press_workflow_task
+ if hasattr(self.flags, "current_press_workflow_task")
+ else None,
+ )
From 2ac199bcea53dbfbb9e1559d760d1bccdf68b28f Mon Sep 17 00:00:00 2001
From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com>
Date: Fri, 17 Apr 2026 02:46:59 +0530
Subject: [PATCH 04/22] feat(workflow-engine): Allow press admin and member in
permissions
---
press/press/doctype/press_job/press_job.py | 4 +++-
press/press/doctype/site/archive.py | 2 +-
.../press_workflow/press_workflow.json | 20 ++++++++++++++++-
.../press_workflow_object.json | 22 ++++++++++++++++++-
.../press_workflow_task.json | 22 ++++++++++++++++++-
5 files changed, 65 insertions(+), 5 deletions(-)
diff --git a/press/press/doctype/press_job/press_job.py b/press/press/doctype/press_job/press_job.py
index 1e416ddc8e0..8c447ce7f18 100644
--- a/press/press/doctype/press_job/press_job.py
+++ b/press/press/doctype/press_job/press_job.py
@@ -168,7 +168,9 @@ def __init__(self, *args, **kwargs):
def start_workflow(self) -> str:
if self.status != "Pending":
- frappe.throw("Only jobs with Pending status can be started")
+ frappe.throw(
+ "Only jobs with Pending status can be started.
Please wait and retry after some time."
+ )
if not hasattr(self, "execute"):
raise NotImplementedError("Press Job implementation must have an execute method")
diff --git a/press/press/doctype/site/archive.py b/press/press/doctype/site/archive.py
index c56a1cd8e36..80c07137cd7 100644
--- a/press/press/doctype/site/archive.py
+++ b/press/press/doctype/site/archive.py
@@ -82,7 +82,7 @@ def delete_offsite_backups_for_archived_sites():
offsite_backups DESC
""",
as_dict=True,
- )
+ ) # nosemgrep
for site in archived_sites:
try:
frappe.get_doc("Site", site.site).delete_offsite_backups()
diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.json b/press/workflow_engine/doctype/press_workflow/press_workflow.json
index 9267e6adefe..c36cadf458c 100644
--- a/press/workflow_engine/doctype/press_workflow/press_workflow.json
+++ b/press/workflow_engine/doctype/press_workflow/press_workflow.json
@@ -283,7 +283,7 @@
"link_fieldname": "workflow"
}
],
- "modified": "2026-04-16 22:25:25.102297",
+ "modified": "2026-04-17 02:44:53.455627",
"modified_by": "Administrator",
"module": "Workflow Engine",
"name": "Press Workflow",
@@ -301,6 +301,24 @@
"role": "System Manager",
"share": 1,
"write": 1
+ },
+ {
+ "email": 1,
+ "export": 1,
+ "print": 1,
+ "report": 1,
+ "role": "Press Admin",
+ "share": 1,
+ "write": 1
+ },
+ {
+ "email": 1,
+ "export": 1,
+ "print": 1,
+ "report": 1,
+ "role": "Press Member",
+ "share": 1,
+ "write": 1
}
],
"row_format": "Dynamic",
diff --git a/press/workflow_engine/doctype/press_workflow_object/press_workflow_object.json b/press/workflow_engine/doctype/press_workflow_object/press_workflow_object.json
index b24708a6002..75061a8b09e 100644
--- a/press/workflow_engine/doctype/press_workflow_object/press_workflow_object.json
+++ b/press/workflow_engine/doctype/press_workflow_object/press_workflow_object.json
@@ -54,7 +54,7 @@
],
"grid_page_length": 50,
"links": [],
- "modified": "2026-03-03 21:32:26.447603",
+ "modified": "2026-04-17 02:45:58.030816",
"modified_by": "Administrator",
"module": "Workflow Engine",
"name": "Press Workflow Object",
@@ -72,6 +72,26 @@
"role": "System Manager",
"share": 1,
"write": 1
+ },
+ {
+ "create": 1,
+ "email": 1,
+ "export": 1,
+ "print": 1,
+ "report": 1,
+ "role": "Press Admin",
+ "share": 1,
+ "write": 1
+ },
+ {
+ "create": 1,
+ "email": 1,
+ "export": 1,
+ "print": 1,
+ "report": 1,
+ "role": "Press Member",
+ "share": 1,
+ "write": 1
}
],
"row_format": "Dynamic",
diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json
index 1f9b7c93b29..e9b72c61577 100644
--- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json
+++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json
@@ -179,7 +179,7 @@
],
"grid_page_length": 50,
"links": [],
- "modified": "2026-04-14 16:25:01.842528",
+ "modified": "2026-04-17 02:45:26.200457",
"modified_by": "Administrator",
"module": "Workflow Engine",
"name": "Press Workflow Task",
@@ -197,6 +197,26 @@
"role": "System Manager",
"share": 1,
"write": 1
+ },
+ {
+ "create": 1,
+ "email": 1,
+ "export": 1,
+ "print": 1,
+ "report": 1,
+ "role": "Press Admin",
+ "share": 1,
+ "write": 1
+ },
+ {
+ "create": 1,
+ "email": 1,
+ "export": 1,
+ "print": 1,
+ "report": 1,
+ "role": "Press Member",
+ "share": 1,
+ "write": 1
}
],
"row_format": "Dynamic",
From 9eb85872dfd57f0c21c566b30cd24845da3e7430 Mon Sep 17 00:00:00 2001
From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com>
Date: Fri, 17 Apr 2026 02:49:38 +0530
Subject: [PATCH 05/22] fix(ci): Add OpenSSL in mypy ignore
---
mypy.ini | 2 ++
1 file changed, 2 insertions(+)
diff --git a/mypy.ini b/mypy.ini
index 4bc5a28bfeb..a9a09818af0 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -47,3 +47,5 @@ ignore_missing_imports = true
ignore_missing_imports = true
[mypy-PIL.*]
ignore_missing_imports = true
+[mypy-OpenSSL.*]
+ignore_missing_imports = true
\ No newline at end of file
From 97c88341848e0cd220f78f0386141da444eebde9 Mon Sep 17 00:00:00 2001
From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com>
Date: Fri, 17 Apr 2026 02:56:25 +0530
Subject: [PATCH 06/22] feat(press-job): Delete the step and scripts from
fixture
---
press/fixtures/press_job_type.json | 642 +-----------------
press/press/doctype/press_job/press_job.json | 5 +-
press/press/doctype/press_job/press_job.py | 2 +-
.../press_job_type/press_job_type.json | 28 +-
.../doctype/press_job_type/press_job_type.py | 5 -
5 files changed, 36 insertions(+), 646 deletions(-)
diff --git a/press/fixtures/press_job_type.json b/press/fixtures/press_job_type.json
index 5da8cd90430..3ebee370db8 100644
--- a/press/fixtures/press_job_type.json
+++ b/press/fixtures/press_job_type.json
@@ -1,722 +1,134 @@
[
{
- "callback_max_retry": 1,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2025-11-26 15:28:08.243873",
- "name": "Auto Scale Application Server",
- "steps": [
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type == \"Server\":\n server.scale_up()",
- "step_name": "Auto Scale Application Server",
- "wait_until_true": 0
- }
- ]
+ "name": "Auto Scale Application Server"
},
{
- "callback_max_retry": 5,
- "callback_script": "snapshot = frappe.get_doc(\"Server Snapshot\", arguments.get(\"server_snapshot\"))\nif doc.server_type == \"Server\":\n\tsnapshot.resume_app_server_services()\nelif doc.server_type == \"Database Server\":\n\tsnapshot.resume_database_server_services()\n\nif doc.status == \"Failure\":\n frappe.db.set_value(\"Server Snapshot\", snapshot.name, \"status\", \"Failure\", update_modified=False)\n for s in snapshot.snapshots:\n try:\n frappe.get_doc(\"Virtual Disk Snapshot\", s).delete_snapshot(ignore_validation=True)\n except:\n pass\n \nelse:\n snapshot.sync(now=False)",
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2026-04-14 11:33:58.268508",
- "name": "Snapshot Disk",
- "steps": [
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\n\nwhile True:\n try:\n machine.sync()\n finally:\n break\n\nif machine.status == \"Terminated\":\n raise Exception(\"Can't snapshot terminated virtual machine\")\nelif machine.status == \"Draft\":\n raise Exception(\"Can't snapshot draft virtual machine\")",
- "step_name": "Verify Virtual Machine Status",
- "wait_until_true": 0
- },
- {
- "script": "if doc.server_type == \"Server\" and arguments.get(\"is_consistent_snapshot\", False):\n server = frappe.get_doc(\"Server\", doc.server)\n output = server.ansible_run(\"systemctl stop docker\")\n if not (output and output.get(\"status\") == \"Success\"):\n raise Exception(\"Failed to stop docker daemon\")\nelse:\n result = (False, False) # Skipped\n",
- "step_name": "Stop Docker Daemon",
- "wait_until_true": 0
- },
- {
- "script": "if doc.server_type == \"Database Server\" and arguments.get(\"is_consistent_snapshot\", False):\n server = frappe.get_doc(\"Database Server\", doc.server)\n output = server.ansible_run(\"systemctl stop mariadb\")\n if not (output and output.get(\"status\") == \"Success\"):\n raise Exception(\"Failed to stop mariadb service\")\n result = (True, False)\nelse:\n result = (False, False) # Skipped\n",
- "step_name": "Stop MariaDB Service",
- "wait_until_true": 0
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\noutput = server.ansible_run(\"sync\")\nif not (output and output.get(\"status\") == \"Success\"):\n raise Exception(\"Failed to flush file system buffers to disk\")\n",
- "step_name": "Flush File System Buffers to Disk",
- "wait_until_true": 0
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.create_snapshots(exclude_boot_volume=True, dedicated_snapshot=True)\n\nfield_name = \"app_server_snapshot\" if doc.server_type == \"Server\" else \"database_server_snapshot\"\nno_of_snapshots = len(machine.flags.created_snapshots)\nif no_of_snapshots != 1:\n raise Exception(\"Expected 1 disk snapshot. Found : \"+str(no_of_snapshots))\n \nfrappe.db.set_value(\"Server Snapshot\", arguments.get(\"server_snapshot\"), field_name, machine.flags.created_snapshots[0], update_modified=False)",
- "step_name": "Snapshot Disk",
- "wait_until_true": 0
- }
- ]
+ "name": "Snapshot Disk"
},
{
- "callback_max_retry": 0,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2025-08-12 16:24:10.555919",
- "name": "Attach Volume",
- "steps": [
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nprint(machine.as_dict())\nif machine.cloud_provider in [\"AWS EC2\", \"OCI\"]:\n machine.attach_new_volume(machine.size, machine.iops, machine.throughput)\nelse:\n machine.attach_volume(size=100)",
- "step_name": "Attach Volume",
- "wait_until_true": 0
- }
- ]
+ "name": "Attach Volume"
},
{
- "callback_max_retry": 0,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2024-02-05 17:08:00.514456",
- "name": "Create Server Snapshot",
- "steps": [
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.stop()\n",
- "step_name": "Stop Virtual Machine",
- "wait_until_true": 0
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.sync()\nresult = (machine.status == \"Stopped\", False)",
- "step_name": "Wait for Virtual Machine to Stop",
- "wait_until_true": 1
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\narguments.image = machine.create_image()",
- "step_name": "Create Snapshot",
- "wait_until_true": 0
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\ntry:\n machine.start()\n result = (True, False)\nexcept:\n result = (False, False)",
- "step_name": "Start Virtual Machine",
- "wait_until_true": 1
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.sync()\nresult = (machine.status == \"Running\", False)",
- "step_name": "Wait for Virtual Machine to Start",
- "wait_until_true": 1
- },
- {
- "script": "image = frappe.get_doc(\"Virtual Machine Image\", arguments.image)\nimage.sync()\nresult = (image.status == \"Available\", False)",
- "step_name": "Wait for Snapshot to Complete",
- "wait_until_true": 1
- }
- ]
+ "name": "Create Server Snapshot"
},
{
- "callback_max_retry": 0,
- "callback_script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type in [\"Server\", \"Database Server\"] and server.is_for_recovery:\n filter_field = \"app_server\" if doc.server_type == \"Server\" else \"database_server\"\n recovery_record_name = frappe.db.get_value(\"Server Snapshot Recovery\", {filter_field: doc.server}, \"name\")\n if recovery_record_name:\n recovery_record = frappe.get_doc(\"Server Snapshot Recovery\", recovery_record_name, for_update=True)\n \n if doc.status == \"Success\":\n if doc.server_type == \"Server\":\n recovery_record.app_server_archived = True\n else:\n recovery_record.database_server_archived = True\n recovery_record.save()",
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2025-09-08 11:42:40.490054",
- "name": "Archive Server",
- "steps": [
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.disable_termination_protection()",
- "step_name": "Disable Termination Protection",
- "wait_until_true": 0
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.terminate()",
- "step_name": "Terminate Virtual Machine",
- "wait_until_true": 0
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\ntry:\n # Usually machine sync never fails\n # It can fail weirdly due to TimestampMismatchError or lock timeout\n # Don't fail this whole thing just because of that\n # Ignore the errors of sync and keep retrying\n machine.sync()\nexcept:\n pass\nresult = (machine.status == \"Terminated\", False)",
- "step_name": "Wait for Virtual Machine to Terminate",
- "wait_until_true": 1
- }
- ]
+ "name": "Archive Server"
},
{
- "callback_max_retry": 0,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2024-01-05 13:40:21.038901",
- "name": "Upgrade MariaDB",
- "steps": [
- {
- "script": "server = frappe.get_doc(\"Database Server\", doc.server)\nserver.stop_mariadb()",
- "step_name": "Stop MariaDB",
- "wait_until_true": 0
- },
- {
- "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Stop MariaDB\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", False)\n",
- "step_name": "Wait for MariaDB to Stop",
- "wait_until_true": 1
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.create_snapshots()\nsnapshot = frappe.get_last_doc(\"Virtual Disk Snapshot\", {\"virtual_machine\": doc.virtual_machine})\nsnapshot.add_comment(text=\"Before MariaDB Upgrade\")",
- "step_name": "Create Server Snapshot",
- "wait_until_true": 0
- },
- {
- "script": "server = frappe.get_doc(\"Database Server\", doc.server)\nserver.upgrade_mariadb()",
- "step_name": "Upgrade MariaDB",
- "wait_until_true": 0
- },
- {
- "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Upgrade MariaDB\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\n",
- "step_name": "Wait for MariaDB Upgrade to Complete",
- "wait_until_true": 1
- }
- ]
+ "name": "Upgrade MariaDB"
},
{
- "callback_max_retry": 0,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2025-02-14 14:30:00.676187",
- "name": "Increase Disk Size",
- "steps": [
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nmountpoint = arguments.labels.get(\"mountpoint\")\nserver.calculated_increase_disk_size(mountpoint=mountpoint)",
- "step_name": "Increase Disk Size",
- "wait_until_true": 0
- },
- {
- "script": "should_auto_increase = frappe.db.get_value(doc.server_type, doc.server, \"auto_increase_storage\")\nif not should_auto_increase:\n result = (True, False)\n\nelse:\n if frappe.db.get_value(doc.server_type, doc.server, \"provider\") == \"AWS EC2\":\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Extend EC2 Volume\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\n else:\n result = (True, False)",
- "step_name": "Wait for partition to resize (AWS)",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"OCI\":\n machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\n machine.sync()\n result = (machine.status == \"Running\", False)\nelse:\n result = (True, False)",
- "step_name": "Wait for server to start (OCI)",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"OCI\":\n server.ping_ansible()\n\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Ping Server\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", False)\nelse:\n result = (True, False)\n",
- "step_name": "Wait for server to be accessible (OCI)",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"OCI\":\n server.add_glass_file()\nelse:\n result = (True, False)\n # handled for aws already in extend playbook",
- "step_name": "Add glass file back (OCI)",
- "wait_until_true": 0
- },
- {
- "script": "if doc.server_type == 'Server':\n server = frappe.get_doc(doc.server_type, doc.server)\n server.start_active_benches()\n",
- "step_name": "Restart Active Benches",
- "wait_until_true": 0
- }
- ]
+ "name": "Increase Disk Size"
},
{
- "callback_max_retry": 0,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2025-11-12 09:35:41.121169",
- "name": "Prune Docker system",
- "steps": [
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\ntelegram_message = frappe.get_doc(\"Press Settings\").telegram_message\ntelegram_message.enqueue(f\"Pruning docker cache on [{server.name}]({frappe.utils.get_url_to_form(server.doctype, server.name)})\", \"Information\")\nserver.prune_docker_system()",
- "step_name": "Prune Docker system",
- "wait_until_true": 0
- },
- {
- "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Prune Docker System\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\n",
- "step_name": "Wait for docker system prune",
- "wait_until_true": 1
- }
- ]
+ "name": "Prune Docker system"
},
{
- "callback_max_retry": 0,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2025-11-12 09:35:21.819679",
- "name": "Increase Swap",
- "steps": [
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\ntelegram_message = frappe.get_doc(\"Press Settings\").telegram_message\ntelegram_message.enqueue(f\"Increasing swap on [{server.name}]({frappe.utils.get_url_to_form(server.doctype, server.name)})\", \"Information\")\nserver.increase_swap(4)",
- "step_name": "Add swap on server",
- "wait_until_true": 0
- },
- {
- "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Increase Swap\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")",
- "step_name": "Wait for swap to be added",
- "wait_until_true": 1
- }
- ]
+ "name": "Increase Swap"
},
{
- "callback_max_retry": 0,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2024-12-06 10:59:08.032149",
- "name": "Stop and Start Server",
- "steps": [
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.stop()",
- "step_name": "Stop Virtual Machine",
- "wait_until_true": 0
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.sync()\nresult = (machine.status == \"Stopped\", False)",
- "step_name": "Wait for Virtual Machine to Stop",
- "wait_until_true": 1
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\ntry:\n machine.start()\n result = (True, False)\nexcept:\n result = (False, False)",
- "step_name": "Start Virtual Machine",
- "wait_until_true": 1
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.sync()\nresult = (machine.status == \"Running\", False)",
- "step_name": "Wait for Virtual Machine to Start",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.ping_ansible()\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Ping Server\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", False)\n",
- "step_name": "Wait for Server to be accessible",
- "wait_until_true": 1
- }
- ]
+ "name": "Stop and Start Server"
},
{
- "callback_max_retry": 0,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2025-11-12 09:34:59.467479",
- "name": "Reset Swap",
- "steps": [
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\ntelegram_message = frappe.get_doc(\"Press Settings\").telegram_message\ntelegram_message.enqueue(f\"Resetting swap on [{server.name}]({frappe.utils.get_url_to_form(server.doctype, server.name)})\", \"Information\")\nserver.reset_swap()",
- "step_name": "Reset swap on server",
- "wait_until_true": 0
- },
- {
- "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Reset Swap\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")",
- "step_name": "Wait for swap to be reset",
- "wait_until_true": 1
- }
- ]
+ "name": "Reset Swap"
},
{
- "callback_max_retry": 0,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2025-07-11 15:20:56.780290",
- "name": "Warn disk at 80%",
- "steps": [
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nmountpoint = arguments.labels.get(\"mountpoint\")\nserver.recommend_disk_increase(mountpoint=mountpoint)",
- "step_name": "Send Warning",
- "wait_until_true": 0
- }
- ]
+ "name": "Warn disk at 80%"
},
{
- "callback_max_retry": 0,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2025-07-25 21:43:11.895128",
- "name": "Trigger Build Server Cleanup",
- "steps": [
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif server.use_for_build:\n server.prune_docker_system()\n \n",
- "step_name": "Trigger Build Server Cleanup",
- "wait_until_true": 0
- }
- ]
+ "name": "Trigger Build Server Cleanup"
+ },
+ {
+ "docstatus": 0,
+ "doctype": "Press Job Type",
+ "modified": "2025-08-31 20:54:46.857348",
+ "name": "Create Server (old)"
},
{
- "callback_max_retry": 0,
- "callback_script": "",
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2025-09-08 11:36:55.450275",
- "name": "Resume Services After Snapshot",
- "steps": [
- {
- "script": "if doc.server_type == \"Server\" and arguments.get(\"is_consistent_snapshot\", False):\n server = frappe.get_doc(\"Server\", doc.server)\n output = server.ansible_run(\"systemctl start docker\")\n if not (output and output.get(\"status\") == \"Success\"):\n raise Exception(\"Failed to start docker daemon\")\n\n frappe.db.set_value(\"Server Snapshot\", arguments.get(\"server_snapshot\"), \"app_server_services_started\", True, update_modified=False)\nelse:\n result = (False, False) # Skipped\n # As it wasn't a consistent snapshot services were never stopped\n frappe.db.set_value(\"Server Snapshot\", arguments.get(\"server_snapshot\"), \"app_server_services_started\", True, update_modified=False)",
- "step_name": "Start Docker Daemon",
- "wait_until_true": 0
- },
- {
- "script": "if doc.server_type == \"Database Server\" and arguments.get(\"is_consistent_snapshot\", False):\n server = frappe.get_doc(\"Database Server\", doc.server)\n output = server.ansible_run(\"systemctl start mariadb\")\n if not (output and output.get(\"status\") == \"Success\"):\n raise Exception(\"Failed to start mariadb service\")\n frappe.db.set_value(\"Server Snapshot\", arguments.get(\"server_snapshot\"), \"database_server_services_started\", True, update_modified=False)\n\nelse:\n result = (False, False) # Skipped\n # As it wasn't a consistent snapshot services were never stopped\n frappe.db.set_value(\"Server Snapshot\", arguments.get(\"server_snapshot\"), \"database_server_services_started\", True, update_modified=False)",
- "step_name": "Start MariaDB Service",
- "wait_until_true": 0
- }
- ]
+ "name": "Resume Services After Snapshot"
},
{
- "callback_max_retry": 1,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2025-11-12 11:06:08.243873",
- "name": "Prune Mirror Registry",
- "steps": [
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type == \"Registry Server\":\n server.prune_mirror_registry()",
- "step_name": "Prune Mirror Registry",
- "wait_until_true": 0
- }
- ]
+ "name": "Prune Mirror Registry"
},
{
- "callback_max_retry": 1,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2025-12-18 15:28:08.243873",
- "name": "Auto Scale Up Application Server",
- "steps": [
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type == \"Server\" and not server.scaled_up:\n server.scale_up(is_automatically_triggered=True)",
- "step_name": "Auto Scale Up Application Server",
- "wait_until_true": 0
- }
- ]
+ "name": "Auto Scale Up Application Server"
},
{
- "callback_max_retry": 1,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2025-12-18 15:28:08.243873",
- "name": "Auto Scale Down Application Server",
- "steps": [
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type == \"Server\" and server.scaled_up:\n server.scale_down(is_automatically_triggered=True)",
- "step_name": "Auto Scale Down Application Server",
- "wait_until_true": 0
- }
- ]
+ "name": "Auto Scale Down Application Server"
},
{
- "callback_max_retry": 0,
- "callback_script": null,
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2026-02-22 22:29:46.984146",
- "name": "Resize Server",
- "steps": [
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.stop()",
- "step_name": "Stop Virtual Machine",
- "wait_until_true": 0
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.sync()\nresult = (machine.status == \"Stopped\", False)",
- "step_name": "Wait for Virtual Machine to Stop",
- "wait_until_true": 1
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.resize(arguments.machine_type, arguments.get(\"upgrade_disk\", None))",
- "step_name": "Resize Virtual Machine",
- "wait_until_true": 0
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\ntry:\n machine.start()\n result = (True, False)\nexcept:\n result = (False, False)",
- "step_name": "Start Virtual Machine",
- "wait_until_true": 1
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.sync()\nresult = (machine.status == \"Running\", False)",
- "step_name": "Wait for Virtual Machine to Start",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.ping_ansible()\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Ping Server\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", False)\n",
- "step_name": "Wait for Server to be accessible",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif server.is_unified_server:\n server_doc = frappe.get_doc(\"Server\", doc.server)\n db_doc = frappe.get_doc(\"Database Server\", doc.server)\n\n server_doc.auto_scale_workers()\n db_doc.adjust_memory_config()\n\nelse:\n if doc.server_type == \"Database Server\":\n server.adjust_memory_config()\n\n elif doc.server_type == \"Server\":\n server.auto_scale_workers()\n",
- "step_name": "Set additional config",
- "wait_until_true": 0
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\n\nserver = frappe.get_doc(doc.server_type, doc.server)\nif server.plan:\n plan_disk_size = frappe.db.get_value(\"Server Plan\", server.plan, \"disk\")\n if plan_disk_size and plan_disk_size > machine.disk_size:\n try:\n server.increase_disk_size(increment=plan_disk_size - machine.disk_size)\n except:\n pass",
- "step_name": "Increase Disk Size",
- "wait_until_true": 0
- }
- ]
+ "name": "Resize Server"
},
{
- "callback_max_retry": 1,
- "callback_script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type in [\"Server\", \"Database Server\"] and server.is_for_recovery:\n filter_field = \"app_server\" if doc.server_type == \"Server\" else \"database_server\"\n recovery_record_name = frappe.db.get_value(\"Server Snapshot Recovery\", {filter_field: doc.server}, \"name\")\n if recovery_record_name:\n recovery_record = frappe.get_doc(\"Server Snapshot Recovery\", recovery_record_name, for_update=True)\n \n if doc.status == \"Success\":\n if doc.server_type == \"Server\":\n recovery_record.is_app_server_ready = True\n else:\n recovery_record.is_database_server_ready = True\n recovery_record.save()\n else:\n recovery_record.mark_server_provisioning_as_failed()\n \nif doc.server_type in [\"Server\", \"Database Server\"] and \"logical_replication_backup\" in arguments:\n if doc.status == \"Success\":\n frappe.get_doc(\"Logical Replication Backup\", arguments.get(\"logical_replication_backup\")).next()\n if doc.status == \"Failure\":\n frappe.get_doc(\"Logical Replication Backup\", arguments.get(\"logical_replication_backup\")).fail()\n \nif doc.server_type in [\"Server\", \"Database Server\"] and doc.status == \"Success\":\n server.is_provisioning_press_job_completed = 1\n server.save(ignore_permissions=True)\n \n if server.is_unified_server:\n frappe.db.set_value(\"Database Server\", server.database_server, \"is_provisioning_press_job_completed\", 1, update_modified =False)",
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2026-04-08 15:12:31.123007",
- "name": "Create Server",
- "steps": [
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.provision()\n",
- "step_name": "Create Server",
- "wait_until_true": 0
- },
- {
- "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\ntry:\n # Usually machine sync never fails\n # It can fail weirdly due to TimestampMismatchError or lock timeout\n # Don't fail this whole thing just because of that\n # Ignore the errors of sync and keep retrying\n machine.sync()\n\nexcept (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError):\n result = (False, False)\nexcept Exception as e:\n if \"rate_limit_exceeded\" in str(e):\n result = (False, False)\n else:\n raise e\nelse:\n result = (machine.status == \"Running\", False)\n",
- "step_name": "Wait for Server to start",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.ping_ansible()\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Ping Server\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n\nvirtual_machine = frappe.get_doc(\"Virtual Machine\", server.virtual_machine)\n\nresult = (plays and plays[0].status == \"Success\" and virtual_machine.private_ip_address != '', False)\n",
- "step_name": "Wait for Server to be accessible",
- "wait_until_true": 1
- },
- {
- "script": "try:\n vm = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\n vm.sync()\n if len(vm.volumes) > 0:\n result = (True, False)\n else:\n result = (False, False)\nexcept (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError):\n result = (False, False)\nexcept Exception as e:\n raise e",
- "step_name": "Sync Default Volumes",
- "wait_until_true": 1
- },
- {
- "script": "vm = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nif vm.data_disk_snapshot:\n is_created = vm.create_data_disk_volume_from_snapshot()\n if is_created:\n result = (True, False)\n else:\n arguments.update({\"max_volume_creation_retries\": arguments.get(\"max_volume_creation_retries\", 6)-1})\n if arguments.get(\"max_volume_creation_retries\") <= 0:\n result = (False, True)\n result = (False, False)",
- "step_name": "Create Volume From Snapshot",
- "wait_until_true": 1
- },
- {
- "script": "vm = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nif vm.data_disk_snapshot:\n is_attached = vm.check_and_attach_data_disk_snapshot_volume()\n if is_attached:\n result = (True, False)\n else:\n result = (False, False)",
- "step_name": "Attach Snapshotted Volume",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"AWS EC2\" and frappe.db.get_value(\"Virtual Machine\", server.virtual_machine, \"data_disk_snapshot\"):\n try:\n vm = frappe.get_doc(\"Virtual Machine\", server.virtual_machine)\n vm.sync()\n \n if len(vm.volumes) == 0 or (vm.data_disk_snapshot_attached and len(vm.volumes) == 1):\n result = (False, False)\n else:\n server.reload()\n server.validate_mounts()\n server.save()\n result = (True, False)\n except (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError):\n result = (False, False)\n except Exception as e:\n raise e\nelse:\n result = (True, False)",
- "step_name": "Sync Attached Volumes",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"AWS EC2\" and frappe.db.get_value(\"Virtual Machine\", server.virtual_machine, \"data_disk_snapshot\"):\n cleanup_db_replication_files = False\n if doc.server_type == \"Database Server\" and (server.is_for_recovery or arguments.get(\"setup_db_replication\", False)):\n cleanup_db_replication_files = True\n server.mount_volumes(\n now=False,\n stop_docker_before_mount=doc.server_type == \"Server\",\n stop_mariadb_before_mount=doc.server_type == \"Database Server\",\n # If server is in recovery mode, don't start docker and containers\n # Because If site gets active, background job witll be started and that can modify data\n start_docker_after_mount=doc.server_type == \"Server\" and not server.is_for_recovery,\n # If goal is to create replica server, don't start database\n # As we need to do some additional config before starting database\n start_mariadb_after_mount=doc.server_type == \"Database Server\" and not arguments.get(\"setup_db_replication\", False),\n cleanup_db_replication_files=cleanup_db_replication_files,\n # It's important to change uuid, labels of attached disk\n # There is high chance that the root disk and data disk might have same disk info\n rotate_additional_volume_metadata=True\n )\nelse:\n result = (False, False)",
- "step_name": "Mount Data Disk",
- "wait_until_true": 0
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"AWS EC2\" and frappe.db.get_value(\"Virtual Machine\", server.virtual_machine, \"data_disk_snapshot\"):\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Mount Volumes\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\nelse:\n result = None",
- "step_name": "Wait for Data Disk Mount to Complete",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.wait_for_cloud_init()",
- "step_name": "Check Cloud Init status",
- "wait_until_true": 0
- },
- {
- "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Wait for Cloud Init to finish\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status in (\"Success\", \"Failure\"), False)",
- "step_name": "Wait for Cloud Init to finish",
- "wait_until_true": 1
- },
- {
- "script": "provider = frappe.db.get_value(doc.server_type, doc.server, 'provider')\nif provider == \"Hetzner\" and doc.virtual_machine:\n vm = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\n server = frappe.get_doc(doc.server_type, doc.server)\n \n if vm.virtual_machine_image:\n vmi = frappe.get_doc(\"Virtual Machine Image\", vm.virtual_machine_image)\n\n if vmi.has_data_volume:\n # If VMI has data volume, then only proceed with attaching disk\n if server.plan:\n data_disk_size = int(frappe.db.get_value(\"Server Plan\", server.plan ,\"disk\"))\n else:\n data_disk_size = 25\n \n vm.attach_new_volume(data_disk_size)\n \n # Do Virtual Machine Sync\n # Until we got no error\n max_sync_tries = 100\n while max_sync_tries:\n try:\n vm.sync()\n break\n except Exception as e:\n max_sync_tries = max_sync_tries - 1\n if max_sync_tries <=0 :\n raise e\n \n server.validate_mounts()\n server.save(ignore_version=True) # To avoid timestamp mismatch errors\n server.mount_volumes(now=False)\n result = (True, False)\n else:\n result = (False, False)\n",
- "step_name": "Create and mount volumes (Hetzner)",
- "wait_until_true": 0
- },
- {
- "script": "provider = frappe.db.get_value(doc.server_type, doc.server, 'provider')\nif provider == \"Hetzner\" and doc.virtual_machine:\n vm = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\n\n if vm.virtual_machine_image:\n vmi = frappe.get_doc(\"Virtual Machine Image\", vm.virtual_machine_image)\n if vmi.has_data_volume:\n # Check for running ansible play\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Mount Volumes\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", False)\n else:\n result = None\n",
- "step_name": "Wait for volumes to mount",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"Hetzner\" and server.has_data_volume:\n if server.doctype == \"Server\":\n server.setup_docker()\n elif server.doctype == \"Database Server\":\n server.set_mariadb_mount_dependency()\nelse:\n result = (False, False)",
- "step_name": "Configure apps for mounts (Hetzner)",
- "wait_until_true": 0
- },
- {
- "script": "play_statuses = frappe.db.get_all(\"Ansible Play\", filters={\n \"server_type\": doc.server_type,\n \"server\": doc.server,\n \"play\": (\"in\", [\"Install Docker\", \"Setup MariaDB Mount Dependency\"])\n}, pluck=\"status\")\n\nall_completed = True\nis_failure = False\n\nfor status in play_statuses:\n if status == \"Failure\":\n is_failure = True\n \n all_completed = all_completed and status == \"Success\"\n \nif all_completed:\n result = (True, False)\nelif is_failure:\n result = (False, True)\nelse:\n result = (False, False)",
- "step_name": "Wait For Dependent Plays To Finish (Hetzner)",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.update_tls_certificate()",
- "step_name": "Update TLS Certificate",
- "wait_until_true": 0
- },
- {
- "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Setup TLS Certificates\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status in (\"Success\", \"Failure\"), False)",
- "step_name": "Wait for TLS Certificate to be updated",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.update_agent_ansible()",
- "step_name": "Update Agent Ansible",
- "wait_until_true": 0
- },
- {
- "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Update Agent\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status in (\"Success\", \"Failure\"), False)",
- "step_name": "Wait for Agent to be updated",
- "wait_until_true": 1
- },
- {
- "script": "\nif doc.server_type == \"Database Server\" and arguments.get(\n\t\"setup_db_replication\", False\n):\n\tpass\n",
- "step_name": "Stop MariaDB Slave",
- "wait_until_true": 0
- },
- {
- "script": "if doc.server_type == \"Database Server\":\n server = frappe.get_doc(\"Database Server\", doc.server)\n server.upgrade_mariadb()\n\nif doc.is_unified_server:\n database_server = frappe.get_doc(\"Database Server\", doc.database_server)\n database_server.upgrade_mariadb()",
- "step_name": "Upgrade MariaDB",
- "wait_until_true": 0
- },
- {
- "script": "if doc.server_type == \"Database Server\" or doc.is_unified_server:\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Upgrade MariaDB\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\nelse:\n result = (True,)",
- "step_name": "Wait for MariaDB Upgrade to Complete",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type == \"Database Server\" and arguments.get(\n\t\"setup_db_replication\", False\n):\n\tserver.prepare_mariadb_replica()\n\tresult = (True, False)\nelse:\n\tresult = (False, False)\n",
- "step_name": "Prepare MariaDB Replica",
- "wait_until_true": 0
- },
- {
- "script": "if doc.server_type == \"Database Server\" and arguments.get(\"setup_db_replication\", False):\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Prepare MariaDB Replica\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\nelse:\n result = (True,)",
- "step_name": "Wait for MariaDB Replica to Be Prepared",
- "wait_until_true": 1
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type == \"Database Server\" and arguments.get(\n\t\"setup_db_replication\", False\n):\n\tserver.configure_replication()\n\tresult = (True, False)\nelse:\n\tresult = (False, False)\n",
- "step_name": "Configure MariaDB Replica",
- "wait_until_true": 0
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type == \"Database Server\" and arguments.get(\n\t\"setup_db_replication\", False\n):\n\tserver.start_replication()\n\tresult = (True, False)\nelse:\n\tresult = (False, False)\n",
- "step_name": "Start MariaDB Replica",
- "wait_until_true": 0
- },
- {
- "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.set_additional_config()",
- "step_name": "Set additional config",
- "wait_until_true": 0
- },
- {
- "script": "if doc.server.startswith(\"fs\") and doc.server_type == \"Server\":\n primary_server = frappe.db.get_value(\"Server\", doc.server, \"primary\")\n nfs_volume_attachment = frappe.get_doc(\n\t {\"doctype\": \"NFS Volume Attachment\", \"primary_server\": primary_server}\n )\n nfs_volume_attachment.insert(ignore_permissions=True)\n frappe.db.commit()",
- "step_name": "Share benches over NFS",
- "wait_until_true": 0
- }
- ]
+ "name": "Create Server"
},
{
- "callback_max_retry": 1,
- "callback_script": "if doc.status == \"Success\":\n failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n failover.is_db_server_failover_setup = False\n failover.is_app_server_failover_setup = False\n failover.enabled = False\n failover.save()",
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2026-03-18 17:20:29.325620",
- "name": "Remove On-Prem Failover",
- "steps": [
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.remove_app_server_from_firewall()",
- "step_name": "Remove Wireguard Port Access from App Server",
- "wait_until_true": 0
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.remove_db_server_from_firewall()",
- "step_name": "Remove Wireguard Port Access from DB Server",
- "wait_until_true": 0
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.stop_replication_from_app_server()",
- "step_name": "Stop Replication from App Server",
- "wait_until_true": 0
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": failover.app_server, \"play\": \"Stop App Server Replication to On-Premise\", \"creation\": (\">=\", doc.creation)}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\n",
- "step_name": "Wait for Stop Replication from App Server",
- "wait_until_true": 1
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.stop_replication_from_db_server()",
- "step_name": "Stop Replication from DB Server",
- "wait_until_true": 0
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": failover.database_server, \"play\": \"Stop Database Server Replication to On-Premise\", \"creation\": (\">=\", doc.creation)}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\n",
- "step_name": "Wait for Stop Replication from DB Server",
- "wait_until_true": 1
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.delete_firewall()",
- "step_name": "Delete Firewall",
- "wait_until_true": 0
- }
- ]
+ "name": "Remove On-Prem Failover"
},
{
- "callback_max_retry": 1,
- "callback_script": "if doc.status == \"Success\":\n failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n failover.is_db_server_failover_setup = True\n failover.is_app_server_failover_setup = True\n failover.save()",
"docstatus": 0,
"doctype": "Press Job Type",
"modified": "2026-03-18 17:17:19.436686",
- "name": "Setup On-Prem Failover",
- "steps": [
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.add_app_server_to_firewall()",
- "step_name": "Allow Wireguard Port Through Security Group on App Server",
- "wait_until_true": 0
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.add_db_server_to_firewall()",
- "step_name": "Allow Wireguard Port Through Security Group on DB Server",
- "wait_until_true": 0
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.setup_wireguard_on_app_server()",
- "step_name": "Setup Wireguard on App Server",
- "wait_until_true": 0
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.setup_wireguard_on_database_server()",
- "step_name": "Setup Wireguard on DB Server",
- "wait_until_true": 0
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.check_connectivity_to_on_premise_server()\n\nfailover.reload()\nif failover.is_on_prem_server_ssh_from_app_server_working and failover.is_on_prem_server_ssh_from_db_server_working:\n result = (True, False)\nelse:\n result = (False, False)",
- "step_name": "Test Connectivity to On-Prem Server",
- "wait_until_true": 1
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.setup_app_server_replica()",
- "step_name": "Setup Replication for App Server",
- "wait_until_true": 0
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": failover.app_server, \"play\": \"Setup App Server Replication Sync\", \"creation\": (\">=\", doc.creation)}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\n",
- "step_name": "Wait For Replication Setup for App Server",
- "wait_until_true": 1
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.setup_db_lsync_for_initial_sync()",
- "step_name": "Setup Lsyncd For Initial DB Sync",
- "wait_until_true": 0
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": failover.database_server, \"play\": \"Setup Database Lsyncd for On-Premise Failover\", \"creation\": (\">=\", doc.creation)}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")",
- "step_name": "Wait For Setup Lsyncd For Initial DB Sync",
- "wait_until_true": 1
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n\nif failover.db_lsyncd_stop_at and (frappe.utils.now_datetime() > failover.db_lsyncd_stop_at):\n result = (True, False)\nelse:\n result = (False, False)",
- "step_name": "Wait For Initial DB Sync To Complete",
- "wait_until_true": 1
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.setup_db_rsync_for_final_sync()",
- "step_name": "Rsync New DB Files",
- "wait_until_true": 0
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": failover.database_server, \"play\": \"Final Database Sync for On-Premise Failover\", \"creation\": (\">=\", doc.creation)}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")",
- "step_name": "Wait For Rsync New DB Files",
- "wait_until_true": 1
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.setup_and_configure_database_replica()\n",
- "step_name": "Setup Replica In On-Prem Server",
- "wait_until_true": 0
- },
- {
- "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": failover.database_server, \"play\": \"Setup Replica on On-Premise Server\", \"creation\": (\">=\", doc.creation)}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")",
- "step_name": "Wait For Setup Replica In On-Prem Server",
- "wait_until_true": 1
- }
- ]
+ "name": "Setup On-Prem Failover"
}
]
\ No newline at end of file
diff --git a/press/press/doctype/press_job/press_job.json b/press/press/doctype/press_job/press_job.json
index 546418bf126..68b9a6b57c4 100644
--- a/press/press/doctype/press_job/press_job.json
+++ b/press/press/doctype/press_job/press_job.json
@@ -22,11 +22,10 @@
"fields": [
{
"fieldname": "job_type",
- "fieldtype": "Link",
+ "fieldtype": "Data",
"in_list_view": 1,
"in_standard_filter": 1,
"label": "Job Type",
- "options": "Press Job Type",
"reqd": 1,
"search_index": 1,
"set_only_once": 1
@@ -123,7 +122,7 @@
"link_fieldname": "linked_docname"
}
],
- "modified": "2026-04-16 23:16:27.885432",
+ "modified": "2026-04-17 02:56:56.915694",
"modified_by": "Administrator",
"module": "Press",
"name": "Press Job",
diff --git a/press/press/doctype/press_job/press_job.py b/press/press/doctype/press_job/press_job.py
index 8c447ce7f18..552beb92114 100644
--- a/press/press/doctype/press_job/press_job.py
+++ b/press/press/doctype/press_job/press_job.py
@@ -94,7 +94,7 @@ class PressJob(WorkflowBuilder):
arguments: DF.SmallText
duration: DF.Duration | None
end: DF.Datetime | None
- job_type: DF.Link
+ job_type: DF.Data
name: DF.Int | None
server: DF.DynamicLink | None
server_type: DF.Link | None
diff --git a/press/press/doctype/press_job_type/press_job_type.json b/press/press/doctype/press_job_type/press_job_type.json
index 0e350db1652..65a21feba46 100644
--- a/press/press/doctype/press_job_type/press_job_type.json
+++ b/press/press/doctype/press_job_type/press_job_type.json
@@ -7,37 +7,21 @@
"editable_grid": 1,
"engine": "InnoDB",
"field_order": [
- "steps",
- "callback_script",
- "callback_max_retry"
+ "disclaimer"
],
"fields": [
- {
- "fieldname": "steps",
- "fieldtype": "Table",
- "label": "Steps",
- "options": "Press Job Type Step",
- "reqd": 1
- },
- {
- "description": "The callback function will be called once Press Job reaches the terminating state [Success, Failure].",
- "fieldname": "callback_script",
- "fieldtype": "Code",
- "label": "Callback Script",
- "options": "Python"
- },
{
"default": "1",
- "fieldname": "callback_max_retry",
- "fieldtype": "Int",
- "in_list_view": 1,
- "label": "Callback Max Retry"
+ "fieldname": "disclaimer",
+ "fieldtype": "HTML",
+ "label": "Disclaimer",
+ "options": "The steps and callback script for press job has been deprecated and moved to code.
\n
Please check press/press/doctype/press_job/jobs folder for more info
" } ], "grid_page_length": 50, "index_web_pages_for_search": 1, "links": [], - "modified": "2025-07-31 13:52:28.892322", + "modified": "2026-04-17 02:55:16.274403", "modified_by": "Administrator", "module": "Press", "name": "Press Job Type", diff --git a/press/press/doctype/press_job_type/press_job_type.py b/press/press/doctype/press_job_type/press_job_type.py index aef0ee11eae..0a0c5dfb090 100644 --- a/press/press/doctype/press_job_type/press_job_type.py +++ b/press/press/doctype/press_job_type/press_job_type.py @@ -14,11 +14,6 @@ class PressJobType(Document): if TYPE_CHECKING: from frappe.types import DF - from press.press.doctype.press_job_type_step.press_job_type_step import PressJobTypeStep - - callback_max_retry: DF.Int - callback_script: DF.Code | None - steps: DF.Table[PressJobTypeStep] # end: auto-generated types pass From 78ca7ca9821e5cf44cc9013a7e7f6d6188bf988c Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 17 Apr 2026 03:04:31 +0530 Subject: [PATCH 07/22] refactor(release-pipeline): Use defer_current_task function for retry --- .../release_pipeline/release_pipeline.py | 44 ++++--------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/press/press/doctype/release_pipeline/release_pipeline.py b/press/press/doctype/release_pipeline/release_pipeline.py index eb1c4633d8d..e8aabe9feab 100644 --- a/press/press/doctype/release_pipeline/release_pipeline.py +++ b/press/press/doctype/release_pipeline/release_pipeline.py @@ -19,7 +19,6 @@ ) from press.press.doctype.bench_update.bench_update import get_bench_update from press.workflow_engine.doctype.press_workflow.decorators import flow, task -from press.workflow_engine.doctype.press_workflow.exceptions import PressWorkflowTaskEnqueued from press.workflow_engine.doctype.press_workflow.workflow_builder import WorkflowBuilder if typing.TYPE_CHECKING: @@ -169,12 +168,6 @@ def workflow_name(self) -> str: "Press Workflow", {"linked_doctype": "Release Pipeline", "linked_docname": self.name}, "name" ) - def get_task_name(self, func): - """Get task name for the given function""" - return frappe.db.get_value( - "Press Workflow Task", {"method_name": func.__name__, "workflow": self.workflow_name}, "name" - ) - @task def validate_app_hashes(self, apps: list[dict[str, str]]): """Validate App Hashes""" @@ -245,10 +238,8 @@ def _check_for_scheduled_build_retries(self, deploy_candidate_build: str): if deploy_candidate_build_doc.should_build_retry(exc=None, job=agent_job): self.update_pipeline_status("Retrying") - raise PressWorkflowTaskEnqueued( - f"Build {deploy_candidate_build} has scheduled retries. Waiting for retries to complete.", - self.workflow_name, - self.get_task_name(self.monitor_pre_build_validation), + self.defer_current_task( + f"Build {deploy_candidate_build} has scheduled retries. Waiting for retries to complete." ) def _get_latest_retried_build(self, deploy_candidate_build: str) -> str: @@ -274,7 +265,6 @@ def _get_latest_retried_build(self, deploy_candidate_build: str) -> str: @task def monitor_pre_build_validation(self, deploy_candidate_build: str): """Monitors the Deploy Candidate Build until the remote build job is created.""" - task_name = self.get_task_name(self.monitor_pre_build_validation) deploy_candidate_build_status = frappe.db.get_value( "Deploy Candidate Build", deploy_candidate_build, "status" ) @@ -288,10 +278,8 @@ def monitor_pre_build_validation(self, deploy_candidate_build: str): "Please check the build logs for more details." ) - raise PressWorkflowTaskEnqueued( - f"Waiting for remote build job to be enqueued for Deploy Candidate Build {deploy_candidate_build}", - self.workflow_name, - task_name, + self.defer_current_task( + f"Waiting for remote build job to be enqueued for Deploy Candidate Build {deploy_candidate_build}" ) @task @@ -312,10 +300,8 @@ def monitor_build_success(self, deploy_candidate_build: str): f"Remote build failed for Deploy Candidate Build {deploy_candidate_build}. Please check the build logs for more details." ) - raise PressWorkflowTaskEnqueued( - f"Waiting for build to complete for Deploy Candidate Build {deploy_candidate_build}", - self.workflow_name, - self.get_task_name(self.monitor_build_success), + self.defer_current_task( + f"Waiting for build to complete for Deploy Candidate Build {deploy_candidate_build}" ) def _is_active_bench_work_in_progress(self, builds: list[str]) -> bool: @@ -510,11 +496,7 @@ def orchestrate_build_monitoring(self, deploy_candidate: str, primary_build: str if not secondary_build: # Wait for sometime for the secondary build to be created in case of any delays in build scheduling - raise PressWorkflowTaskEnqueued( - f"Waiting for secondary build creation for {deploy_candidate}", - self.workflow_name, - self.get_task_name(self.monitor_build_success), - ) + self.defer_current_task(f"Waiting for secondary build to be created for {deploy_candidate}") self.monitor_pre_build_validation(secondary_build) self.monitor_build_success(secondary_build) @@ -535,22 +517,14 @@ def monitor_bench_creation(self, deploy_candidate_build: str): # This should take care of the retries as well. if self._is_active_bench_work_in_progress(builds): - raise PressWorkflowTaskEnqueued( - "Benches in progress, Waiting...", - self.workflow_name, - self.get_task_name(self.monitor_bench_creation), - ) + self.defer_current_task("Benches in progress, Waiting...") # Just another safety lock to ensure no early failures occur statues = frappe.db.get_all("Bench", {"build": ["in", builds]}, pluck="status") in_transition = [status for status in statues if status in BENCH_TRANSITION_STATES] if in_transition: - raise PressWorkflowTaskEnqueued( - "Benches are in transition states...", - self.workflow_name, - self.get_task_name(self.monitor_bench_creation), - ) + self.defer_current_task("Benches are in transition states...") self._finalize_pipeline_status(builds=builds, expected_count=expected) From d141bec0f2813d2cbd1772a9580a50b0c88ca77d Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Sat, 18 Apr 2026 20:47:32 +0530 Subject: [PATCH 08/22] fix(server): Fix server creation tests --- press/api/tests/test_server.py | 82 +++++++++++++------ press/press/doctype/press_job/press_job.py | 15 ++++ press/utils/test.py | 69 ++++++++-------- .../doctype/press_workflow/press_workflow.py | 5 +- .../press_workflow/workflow_builder.py | 3 +- 5 files changed, 117 insertions(+), 57 deletions(-) diff --git a/press/api/tests/test_server.py b/press/api/tests/test_server.py index 241eb206550..e8e3e1cdb57 100644 --- a/press/api/tests/test_server.py +++ b/press/api/tests/test_server.py @@ -3,6 +3,8 @@ from __future__ import annotations +from typing import TYPE_CHECKING +from unittest import mock from unittest.mock import MagicMock, Mock, patch import frappe @@ -14,6 +16,7 @@ from press.press.doctype.cluster.cluster import Cluster from press.press.doctype.cluster.test_cluster import create_test_cluster from press.press.doctype.database_server.database_server import DatabaseServer +from press.press.doctype.press_job.jobs.resize_server import ResizeServerJob from press.press.doctype.proxy_server.test_proxy_server import create_test_proxy_server from press.press.doctype.server.server import BaseServer from press.press.doctype.team.test_team import create_test_press_admin_team @@ -27,6 +30,9 @@ from press.runner import Ansible from press.utils.test import foreground_enqueue_doc_with_user +if TYPE_CHECKING: + from press.press.doctype.press_job.press_job import PressJob + def create_test_server_plan( document_type: str, @@ -67,53 +73,68 @@ def unavailable_check_machine_availability(self: Cluster, machine_type: str, ins return False -def successful_sync(self: VirtualMachine): +def successful_sync_with_memory(memory): + def _sync(self): + return successful_sync(self, memory) + + return _sync + + +def successful_sync(self: VirtualMachine, memory: int | None = None): self.status = "Running" if not self.volumes: self.append( "volumes", {"volume_id": "vol-123456", "size": 20, "volume_type": "gp2", "device": "/dev/sda1"} ) + if memory: + self.ram = memory self.save() self.update_servers() -def successful_ping_ansible(self: BaseServer): - create_test_ansible_play("Ping Server", "ping.yml", self.doctype, self.name) +def successful_ping_ansible(self: BaseServer, *args, **kwargs): + return create_test_ansible_play("Ping Server", "ping.yml", self.doctype, self.name) -def successful_upgrade_mariadb(self: DatabaseServer): - create_test_ansible_play("Upgrade MariaDB", "upgrade_mariadb.yml", self.doctype, self.name) +def successful_upgrade_mariadb(self: DatabaseServer, *args, **kwargs): + return create_test_ansible_play("Upgrade MariaDB", "upgrade_mariadb.yml", self.doctype, self.name) -def successful_upgrade_mariadb_patched(self: DatabaseServer): - create_test_ansible_play( +def successful_upgrade_mariadb_patched(self: DatabaseServer, *args, **kwargs): + return create_test_ansible_play( "Upgrade MariaDB Patched", "upgrade_mariadb_patched.yml", self.doctype, self.name ) -def successful_tls_certificate(self: BaseServer): - create_test_ansible_play("Setup TLS Certificates", "tls.yml", self.doctype, self.name) +def successful_tls_certificate(self: BaseServer, *args, **kwargs): + return create_test_ansible_play("Setup TLS Certificates", "tls.yml", self.doctype, self.name) -def successful_update_agent_ansible(self: BaseServer): - create_test_ansible_play("Update Agent", "update_agent.yml", self.doctype, self.name) +def successful_update_agent_ansible(self: BaseServer, *args, **kwargs): + return create_test_ansible_play("Update Agent", "update_agent.yml", self.doctype, self.name) -def successful_wait_for_cloud_init(self: BaseServer): - create_test_ansible_play( +def successful_wait_for_cloud_init(self: BaseServer, *args, **kwargs): + return create_test_ansible_play( "Wait for Cloud Init to finish", "wait_for_cloud_init.yml", self.doctype, self.name ) @patch.object(VirtualMachineImage, "client", new=MagicMock()) @patch.object(VirtualMachine, "client", new=MagicMock()) +@patch.object(VirtualMachine, "provision", new=successful_provision) +@patch.object(VirtualMachine, "sync", new=successful_sync) @patch.object(Ansible, "run", new=Mock()) @patch.object(BaseServer, "ping_ansible", new=successful_ping_ansible) @patch.object(DatabaseServer, "upgrade_mariadb", new=successful_upgrade_mariadb) -@patch.object(DatabaseServer, "upgrade_mariadb_patched", new=successful_upgrade_mariadb_patched) +@patch.object(DatabaseServer, "_upgrade_mariadb", new=successful_upgrade_mariadb) +@patch.object(DatabaseServer, "upgrade_mariadb_patched", new=successful_upgrade_mariadb) +@patch.object(DatabaseServer, "_upgrade_mariadb_patched", new=successful_upgrade_mariadb_patched) @patch.object(BaseServer, "wait_for_cloud_init", new=successful_wait_for_cloud_init) +@patch.object(BaseServer, "_wait_for_cloud_init", new=successful_wait_for_cloud_init) @patch.object(BaseServer, "update_tls_certificate", new=successful_tls_certificate) @patch.object(BaseServer, "update_agent_ansible", new=successful_update_agent_ansible) +@patch.object(BaseServer, "_update_agent_ansible", new=successful_update_agent_ansible) @patch.object(Cluster, "check_machine_availability", new=available_check_machine_availability) class TestAPIServer(FrappeTestCase): @patch.object(Cluster, "provision_on_aws_ec2", new=Mock()) @@ -140,12 +161,13 @@ def test_create_new_server_creates_pending_server_and_db_server(self): create_test_virtual_machine_image( cluster=self.cluster, series="f" ) # call from here and not setup, so mocks work + frappe.set_user(self.team.user) - servers_before = self._get_doc_count("Server", "Pending", self.team.name) - db_servers_before = self._get_doc_count("Database Server", "Pending", self.team.name) + servers_before = self._get_doc_count("Server", "Active", self.team.name) + db_servers_before = self._get_doc_count("Database Server", "Active", self.team.name) - new( + response = new( { "cluster": self.cluster.name, "db_plan": self.db_plan.name, @@ -154,8 +176,20 @@ def test_create_new_server_creates_pending_server_and_db_server(self): } ) - servers_after = self._get_doc_count("Server", "Pending", self.team.name) - db_servers_after = self._get_doc_count("Database Server", "Pending", self.team.name) + server_name = response["server"] + database_server_name = frappe.db.get_value("Server", server_name, "database_server") + + create_app_server_press_job: PressJob = frappe.get_last_doc( + "Press Job", {"server_type": "Server", "server": server_name} + ) + create_db_server_press_job: PressJob = frappe.get_last_doc( + "Press Job", {"server_type": "Database Server", "server": database_server_name} + ) + self.assertEqual(create_app_server_press_job.status, "Success") + self.assertEqual(create_db_server_press_job.status, "Success") + + servers_after = self._get_doc_count("Server", "Active", self.team.name) + db_servers_after = self._get_doc_count("Database Server", "Active", self.team.name) self.assertEqual(servers_before + 1, servers_after) self.assertEqual(db_servers_before + 1, db_servers_after) @@ -232,6 +266,7 @@ def test_new_fn_creates_server_with_active_subscription(self): @patch.object(VirtualMachine, "provision", new=successful_provision) @patch.object(VirtualMachine, "sync", new=successful_sync) + @patch.object(ResizeServerJob, "wait_for_virtual_machine_to_stop", new=mock.Mock()) def test_change_plan_changes_plan_of_server_and_updates_subscription_doc(self): create_test_virtual_machine_image(cluster=self.cluster, series="m") create_test_virtual_machine_image( @@ -259,10 +294,11 @@ def test_change_plan_changes_plan_of_server_and_updates_subscription_doc(self): "Press Job", {"status": "Running"}, "status", "Success" ) # Mark running jobs as success as extra steps we don't check - change_plan( - server.name, - app_plan_2.name, - ) + with patch.object(VirtualMachine, "sync", new=successful_sync_with_memory(app_plan_2.memory)): + change_plan( + server.name, + app_plan_2.name, + ) server.reload() app_subscription = frappe.get_doc( diff --git a/press/press/doctype/press_job/press_job.py b/press/press/doctype/press_job/press_job.py index 552beb92114..a5148da225b 100644 --- a/press/press/doctype/press_job/press_job.py +++ b/press/press/doctype/press_job/press_job.py @@ -124,6 +124,21 @@ def virtual_machine_doc(self) -> VirtualMachine | None: self._virtual_machine_doc = frappe.get_doc("Virtual Machine", self.virtual_machine) return self._virtual_machine_doc # type: ignore + @property + def steps(self) -> list[dict[str, str]]: + try: + workflow = frappe.get_last_doc("Press Workflow", {"linked_docname": self.name}) + return [ + { + "method": step.step_method, + "title": step.step_title, + "status": step.status, + } + for step in workflow.steps + ] + except frappe.DoesNotExistError: + return [] + def before_insert(self): frappe.db.get_value(self.server_type, self.server, "status", for_update=True) if existing_jobs := frappe.db.get_all( diff --git a/press/utils/test.py b/press/utils/test.py index 7de9df6d505..4f8e907618a 100644 --- a/press/utils/test.py +++ b/press/utils/test.py @@ -1,5 +1,6 @@ """Utility methods for writing tests""" +import os import sys from collections.abc import Callable from urllib.parse import urlparse, urlunparse @@ -7,6 +8,8 @@ import frappe import requests +_workflow_log_buffer: list[str] = [] + def foreground_enqueue_doc_with_user(run_as_user: str): def wrapper(*args, **kwargs): @@ -47,7 +50,7 @@ def foreground_enqueue_doc( getattr(frappe.get_doc(doctype, docname), method)(**kwargs) -def _foreground_run_workflow_doc(doctype: str, docname: str, job_id: str) -> None: +def _foreground_run_workflow_doc(doctype: str, docname: str, job_id: str, max_retries: int = 50) -> None: # noqa: C901 """ Tracks in-flight job IDs to prevent direct recursion. When the same job_id is re-enqueued while it is already on the call-stack the request is deferred; @@ -63,17 +66,21 @@ def _foreground_run_workflow_doc(doctype: str, docname: str, job_id: str) -> Non in_flight: set = frappe.local._fg_wf_in_flight pending: dict = frappe.local._fg_wf_pending + log_immediate = os.environ.get("PRESS_LOG_WORKFLOW_DEBUG_INFO") in ("1", "true", "True") + + def _log(msg: str) -> None: + _workflow_log_buffer.append(msg) + if log_immediate: + print(msg, file=sys.stderr, flush=True) + if job_id in in_flight: # Already executing this job - defer until the outermost call drains it. - print( - f"[FG] DEFER {job_id} (in-flight: {sorted(in_flight)})", - file=sys.stderr, - flush=True, - ) + _log(f"[WORKFLOW] DEFER {job_id} (in-flight: {sorted(in_flight)})") pending[job_id] = (doctype, docname) return - print(f"[FG] START {job_id}", file=sys.stderr, flush=True) + _log(f"[WORKFLOW] START {job_id}") + in_flight.add(job_id) method_title = "unknown_method" try: @@ -83,46 +90,38 @@ def _foreground_run_workflow_doc(doctype: str, docname: str, job_id: str) -> Non if hasattr(doc, "main_method_title") else (doc.method_title if hasattr(doc, "method_title") else "unknown_method") ) - print( - f"[FG] RUN {job_id} {method_title} | status={getattr(doc, 'status', '?')}", - file=sys.stderr, - flush=True, - ) + _log(f"[WORKFLOW] RUN {job_id} {method_title} | status={getattr(doc, 'status', '?')}") doc.run() - print( - f"[FG] DONE {job_id} {method_title} | status={getattr(frappe.get_doc(doctype, docname), 'status', '?')}", - file=sys.stderr, - flush=True, + _log( + f"[WORKFLOW] DONE {job_id} {method_title} | status={getattr(frappe.get_doc(doctype, docname), 'status', '?')}" ) # Drain any re-enqueue requests that arrived while this job was running. retry = 0 while job_id in pending: retry += 1 + if retry > max_retries: + _log( + f"[WORKFLOW] MAX RETRIES EXCEEDED for {job_id} {method_title} | pending={list(pending.keys())}" + ) + break pending.pop(job_id) - print(f"[FG] RETRY {job_id} {method_title} (#{retry})", file=sys.stderr, flush=True) + _log(f"[WORKFLOW] RETRY {job_id} {method_title} (#{retry})") doc = frappe.get_doc(doctype, docname) - print( - f"[FG] RUN {job_id} {method_title} | status={getattr(doc, 'status', '?')} (retry #{retry})", - file=sys.stderr, - flush=True, + _log( + f"[WORKFLOW] RUN {job_id} {method_title} | status={getattr(doc, 'status', '?')} (retry #{retry})" ) doc.run() - print( - f"[FG] DONE {job_id} {method_title} | status={getattr(frappe.get_doc(doctype, docname), 'status', '?')} (retry #{retry})", - file=sys.stderr, - flush=True, + _log( + f"[WORKFLOW] DONE {job_id} {method_title} | status={getattr(frappe.get_doc(doctype, docname), 'status', '?')} (retry #{retry})" ) + except Exception: + raise finally: - print( - f"[FG] FINISH {job_id} {method_title} | pending={list(pending.keys())}", - file=sys.stderr, - flush=True, - ) + _log(f"[WORKFLOW] FINISH {job_id} {method_title} | pending={list(pending.keys())}") in_flight.discard(job_id) def foreground_enqueue_task(task_name: str) -> None: - print(f"[FG] enqueue_task({task_name})", file=sys.stderr, flush=True) _foreground_run_workflow_doc( "Press Workflow Task", task_name, @@ -131,12 +130,18 @@ def foreground_enqueue_task(task_name: str) -> None: def foreground_enqueue_workflow(workflow_name: str) -> None: - print(f"[FG] enqueue_workflow({workflow_name})", file=sys.stderr, flush=True) + log_immediate = os.environ.get("PRESS_LOG_WORKFLOW_DEBUG_INFO") in ("1", "true", "True") + _workflow_log_buffer.clear() _foreground_run_workflow_doc( "Press Workflow", workflow_name, f"press_workflow||{workflow_name}||run", ) + if not log_immediate: + doc = frappe.get_doc("Press Workflow", workflow_name) + if getattr(doc, "status", None) == "Failure": + for msg in _workflow_log_buffer: + print(msg, file=sys.stderr, flush=True) def foreground_enqueue( diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.py b/press/workflow_engine/doctype/press_workflow/press_workflow.py index 3c10461e41f..22c2b0bef46 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.py +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.py @@ -146,7 +146,10 @@ def run(self): # noqa: C901 - best to keep it in one place self.update_skipped_steps_status(save=False) self.save() - self.execute_callback_in_background() + if frappe.flags.in_test: + self.execute_callback() + else: + self.execute_callback_in_background() def execute_callback_in_background(self): frappe.enqueue_doc( diff --git a/press/workflow_engine/doctype/press_workflow/workflow_builder.py b/press/workflow_engine/doctype/press_workflow/workflow_builder.py index b9ea57cd914..ae7bf5906c1 100644 --- a/press/workflow_engine/doctype/press_workflow/workflow_builder.py +++ b/press/workflow_engine/doctype/press_workflow/workflow_builder.py @@ -26,6 +26,7 @@ ) if TYPE_CHECKING: + from press.workflow_engine.doctype.press_workflow.press_workflow import PressWorkflow from press.workflow_engine.doctype.press_workflow_task.press_workflow_task import ( PressWorkflowTask, ) @@ -180,7 +181,7 @@ def resolve_context(self) -> None: current_workflow = getattr(frappe.flags, "current_press_workflow", None) if current_workflow: self.workflow_name = str(current_workflow) - self.workflow_doc = frappe.get_doc("Press Workflow", self.workflow_name) # type: ignore + self.workflow_doc: PressWorkflow = frappe.get_doc("Press Workflow", self.workflow_name) # type: ignore if self.kv_store_type != "workflow_store": # Store type is changing — discard any cached in-memory store. self.kv_store_type = "workflow_store" From ea0d7856d6ba6b3af6e749022be228531e5acd75 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Sat, 18 Apr 2026 21:11:23 +0530 Subject: [PATCH 09/22] fix(release-pipeline): Don't override workflow_name The workflow_name param is configured by WorkflowBuilder class automatically --- press/press/doctype/release_pipeline/release_pipeline.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/press/press/doctype/release_pipeline/release_pipeline.py b/press/press/doctype/release_pipeline/release_pipeline.py index e8aabe9feab..3e0b917b12a 100644 --- a/press/press/doctype/release_pipeline/release_pipeline.py +++ b/press/press/doctype/release_pipeline/release_pipeline.py @@ -162,12 +162,6 @@ def update_pipeline_status( def release_group_doc(self) -> "ReleaseGroup": return frappe.get_doc("Release Group", self.release_group) - @cached_property - def workflow_name(self) -> str: - return frappe.db.get_value( - "Press Workflow", {"linked_doctype": "Release Pipeline", "linked_docname": self.name}, "name" - ) - @task def validate_app_hashes(self, apps: list[dict[str, str]]): """Validate App Hashes""" @@ -498,6 +492,7 @@ def orchestrate_build_monitoring(self, deploy_candidate: str, primary_build: str # Wait for sometime for the secondary build to be created in case of any delays in build scheduling self.defer_current_task(f"Waiting for secondary build to be created for {deploy_candidate}") + assert secondary_build, "Secondary build should be present for candidates requiring 2 builds" self.monitor_pre_build_validation(secondary_build) self.monitor_build_success(secondary_build) From 01f375dfec7a17b24b21382371a356d2c3dbb56d Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Sat, 18 Apr 2026 21:37:15 +0530 Subject: [PATCH 10/22] feat(press-job): Add option to retry --- .../doctype/press_job/jobs/create_server.py | 2 + press/press/doctype/press_job/press_job.js | 40 ++++++++----------- press/press/doctype/press_job/press_job.py | 11 +++++ 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/press/press/doctype/press_job/jobs/create_server.py b/press/press/doctype/press_job/jobs/create_server.py index add0a81b700..fcdc1e101bb 100644 --- a/press/press/doctype/press_job/jobs/create_server.py +++ b/press/press/doctype/press_job/jobs/create_server.py @@ -63,6 +63,8 @@ def is_fs_server(self): @task def provision_server(self): machine = self.virtual_machine_doc + if machine.status != "Draft": + return machine.provision() @task diff --git a/press/press/doctype/press_job/press_job.js b/press/press/doctype/press_job/press_job.js index e25deb104ec..41afd743164 100644 --- a/press/press/doctype/press_job/press_job.js +++ b/press/press/doctype/press_job/press_job.js @@ -3,29 +3,21 @@ frappe.ui.form.on('Press Job', { refresh: function (frm) { - [ - [__('Force Continue'), 'force_continue', frm.doc.status === 'Failure'], - [__('Force Fail'), 'force_fail', frm.doc.status === 'Running'], - [ - __('Mark Callback Failure Issue Resolved'), - 'mark_callback_failure_issue_resolved', - frm.doc.callback_failed && - !frm.doc.callback_executed && - !frm.doc.callback_failure_issue_resolved, - ], - ].forEach(([label, method, condition]) => { - if (condition) { - frm.add_custom_button( - label, - () => { - frappe.confirm( - `Are you sure you want to ${label.toLowerCase()}?`, - () => frm.call(method).then(() => frm.refresh()), - ); - }, - __('Actions'), - ); - } - }); + [[__('Retry'), 'retry', frm.doc.status === 'Failed']].forEach( + ([label, method, condition]) => { + if (condition) { + frm.add_custom_button( + label, + () => { + frappe.confirm( + `Are you sure you want to ${label.toLowerCase()}?`, + () => frm.call(method).then(() => frm.refresh()), + ); + }, + __('Actions'), + ); + } + }, + ); }, }); diff --git a/press/press/doctype/press_job/press_job.py b/press/press/doctype/press_job/press_job.py index a5148da225b..c5156796de2 100644 --- a/press/press/doctype/press_job/press_job.py +++ b/press/press/doctype/press_job/press_job.py @@ -206,3 +206,14 @@ def on_workflow_failure(self, workflow: "PressWorkflow"): if hasattr(self, "on_press_job_failure"): self.on_press_job_failure(workflow) + + @frappe.whitelist() + def retry(self): + if self.status != "Failure": + frappe.throw("Only workflows in Failure state can be retried.") # nosemgrep + return + + self.status = "Pending" + self.save() + self.start_workflow() + frappe.db.commit() # nosemgrep From d4bc7516f286b47bac90a289fd557daf6d6f3266 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 11:15:33 +0530 Subject: [PATCH 11/22] feat(press-job): Make functions idempotent --- .../doctype/press_job/jobs/archive_server.py | 12 ++++ .../doctype/press_job/jobs/create_server.py | 59 +++++++++++-------- .../press_job/jobs/create_server_snapshot.py | 12 ++++ .../doctype/press_job/jobs/resize_server.py | 24 +++++++- .../press_job/jobs/stop_and_start_server.py | 12 ++++ 5 files changed, 91 insertions(+), 28 deletions(-) diff --git a/press/press/doctype/press_job/jobs/archive_server.py b/press/press/doctype/press_job/jobs/archive_server.py index f5be638da93..e5c99555bb2 100644 --- a/press/press/doctype/press_job/jobs/archive_server.py +++ b/press/press/doctype/press_job/jobs/archive_server.py @@ -15,10 +15,22 @@ def execute(self): @task def disable_termination_protection(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Terminated": + return + self.virtual_machine_doc.disable_termination_protection() @task(queue="long", timeout=600) def terminate_virtual_machine(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Terminated": + return + self.virtual_machine_doc.terminate() @task diff --git a/press/press/doctype/press_job/jobs/create_server.py b/press/press/doctype/press_job/jobs/create_server.py index fcdc1e101bb..78f7ce794b4 100644 --- a/press/press/doctype/press_job/jobs/create_server.py +++ b/press/press/doctype/press_job/jobs/create_server.py @@ -1,4 +1,4 @@ -import time +import contextlib from typing import TYPE_CHECKING import frappe @@ -113,6 +113,10 @@ def create_volume_from_snapshot(self): if not self.virtual_machine_doc.data_disk_snapshot: return + if self.virtual_machine_doc.data_disk_snapshot_volume_id: + # Volume has already been created from the snapshot, proceed to attach it + return + max_retries = self.arguments_dict.get("max_volume_creation_retries", 6) if self.kv.get("volume_creation_attempts", 0) >= max_retries: raise Exception(f"Failed to create volume from snapshot after {max_retries} retries") @@ -126,38 +130,41 @@ def create_volume_from_snapshot(self): @task def attach_snapshotted_volume(self): - vm = frappe.get_doc("Virtual Machine", self.virtual_machine) - if not vm.data_disk_snapshot: + if not self.virtual_machine_doc.data_disk_snapshot: return - while True: - is_attached = vm.check_and_attach_data_disk_snapshot_volume() - if is_attached: - return - time.sleep(10) - vm = frappe.get_doc("Virtual Machine", self.virtual_machine) + if self.virtual_machine_doc.data_disk_snapshot_attached: + # Volume has already been attached, proceed to sync it + return + + try: + self.virtual_machine_doc.check_and_attach_data_disk_snapshot_volume() + except (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError): + self.defer_current_task() @task def sync_attached_volumes(self): - server = self.server_doc - if server.provider != "AWS EC2" or not frappe.db.get_value( - "Virtual Machine", server.virtual_machine, "data_disk_snapshot" - ): + if not self.virtual_machine_doc.data_disk_snapshot: return - while True: - time.sleep(10) - try: - vm = frappe.get_doc("Virtual Machine", server.virtual_machine) - vm.sync() - if len(vm.volumes) == 0 or (vm.data_disk_snapshot_attached and len(vm.volumes) == 1): - continue - server.reload() - server.validate_mounts() - server.save() - break - except (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError): - continue + with contextlib.suppress( + frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError + ): + self.virtual_machine_doc.sync() + if ( + self.virtual_machine_doc.data_disk_snapshot_attached + and len(self.virtual_machine_doc.volumes) == 1 + ) or ( + not self.virtual_machine_doc.data_disk_snapshot_attached + and len(self.virtual_machine_doc.volumes) == 0 + ): + self.defer_current_task() + return + + server = self.server_doc + server.reload() + server.validate_mounts() + server.save() @task(queue="long", timeout=7200) def mount_snapshotted_volume(self): diff --git a/press/press/doctype/press_job/jobs/create_server_snapshot.py b/press/press/doctype/press_job/jobs/create_server_snapshot.py index da5e7502749..584fb3954c5 100644 --- a/press/press/doctype/press_job/jobs/create_server_snapshot.py +++ b/press/press/doctype/press_job/jobs/create_server_snapshot.py @@ -23,6 +23,12 @@ def execute(self): @task def stop_virtual_machine(self): machine = self.virtual_machine_doc + with suppress(Exception): + machine.sync() + + if machine.status == "Stopped": + return + machine.stop() @task @@ -42,6 +48,12 @@ def create_snapshot(self): @task def start_virtual_machine(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Running": + return + try: self.virtual_machine_doc.start() except Exception: diff --git a/press/press/doctype/press_job/jobs/resize_server.py b/press/press/doctype/press_job/jobs/resize_server.py index 19cfa0f6c24..c12acf2d169 100644 --- a/press/press/doctype/press_job/jobs/resize_server.py +++ b/press/press/doctype/press_job/jobs/resize_server.py @@ -30,6 +30,12 @@ def execute(self): @task def stop_virtual_machine(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Stopped": + return + self.virtual_machine_doc.stop() @task @@ -44,15 +50,29 @@ def wait_for_virtual_machine_to_stop(self): @task def resize_virtual_machine(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if ( + self.arguments_dict.get("upgrade_disk", False) + and self.virtual_machine_doc.machine_type == self.arguments_dict.machine_type + ): + return + self.virtual_machine_doc.resize( self.arguments_dict.machine_type, self.arguments_dict.get("upgrade_disk", False) ) @task def start_virtual_machine(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Running": + return + try: - if self.virtual_machine_doc.status != "Running": - self.virtual_machine_doc.start() + self.virtual_machine_doc.start() except Exception: self.defer_current_task() diff --git a/press/press/doctype/press_job/jobs/stop_and_start_server.py b/press/press/doctype/press_job/jobs/stop_and_start_server.py index 74d93d7502b..5dbbdc55d1b 100644 --- a/press/press/doctype/press_job/jobs/stop_and_start_server.py +++ b/press/press/doctype/press_job/jobs/stop_and_start_server.py @@ -17,6 +17,12 @@ def execute(self): @task def stop_virtual_machine(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Stopped": + return + self.virtual_machine_doc.stop() @task @@ -31,6 +37,12 @@ def wait_for_virtual_machine_to_stop(self): @task def start_virtual_machine(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Running": + return + self.virtual_machine_doc.start() @task From 3fc42f001f1c334b987664134e475b5890940560 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 11:43:10 +0530 Subject: [PATCH 12/22] feat(press-workflow): Add Support for requesting force failure --- .../doctype/press_workflow/press_workflow.js | 25 +++++++++++++++---- .../press_workflow/press_workflow.json | 13 ++++++++-- .../doctype/press_workflow/press_workflow.py | 12 +++++++++ .../press_workflow_task.py | 6 ++++- 4 files changed, 48 insertions(+), 8 deletions(-) diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.js b/press/workflow_engine/doctype/press_workflow/press_workflow.js index 6f82d82f35d..9e7f1967396 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.js +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.js @@ -1,8 +1,23 @@ // Copyright (c) 2026, Frappe and contributors // For license information, please see license.txt -// frappe.ui.form.on("Press Workflow", { -// refresh(frm) { - -// }, -// }); +frappe.ui.form.on('Press Workflow', { + refresh(frm) { + if (frm.doc.status === 'Running') { + frm.add_custom_button( + 'Force Fail', + () => { + frappe.confirm( + 'Are you sure you want to force fail this workflow? This action cannot be undone.', + () => { + frm.call('force_fail').then(() => { + frm.reload_doc(); + }); + }, + ); + }, + 'Actions', + ); + } + }, +}); diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.json b/press/workflow_engine/doctype/press_workflow/press_workflow.json index 306ac0b52b7..0d6b9a10455 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.json +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.json @@ -6,6 +6,7 @@ "engine": "InnoDB", "field_order": [ "status", + "is_force_failure_requested", "column_break_lkci", "linked_doctype", "column_break_xuyw", @@ -41,7 +42,7 @@ "section_break_xglm", "stdout", "traceback", - "callback_traceback" + "callback_traceback", "workflow_traceback" ], "fields": [ @@ -274,9 +275,17 @@ { "fieldname": "column_break_gteb", "fieldtype": "Column Break" + }, + { "fieldname": "workflow_traceback", "fieldtype": "Long Text", "label": "Workflow Traceback" + }, + { + "default": "0", + "fieldname": "is_force_failure_requested", + "fieldtype": "Check", + "label": "Force Failure Requested" } ], "grid_page_length": 50, @@ -287,7 +296,7 @@ "link_fieldname": "workflow" } ], - "modified": "2026-04-23 19:16:29.284785", + "modified": "2026-04-24 11:33:09.864201", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow", diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.py b/press/workflow_engine/doctype/press_workflow/press_workflow.py index 94ac2b910aa..efb796f7c49 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.py +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.py @@ -48,6 +48,7 @@ class PressWorkflow(Document): duration: DF.Duration | None end: DF.Datetime | None exception: DF.Link | None + is_force_failure_requested: DF.Check key_value_store: DF.Table[PressWorkflowKV] kwargs: DF.Link | None linked_docname: DF.DynamicLink @@ -75,6 +76,14 @@ def after_insert(self): def on_trash(self): frappe.db.delete("Press Workflow Task", {"workflow": self.name}) + @frappe.whitelist() + def force_fail(self): + if self.status in ["Success", "Failure", "Fatal"]: + frappe.throw("Cannot force fail a workflow that has already completed.") + return + + frappe.db.set_value(self.doctype, self.name, "is_force_failure_requested", True) + def run(self): # noqa: C901 - best to keep it in one place if not self.linked_doctype or not self.linked_docname: frappe.throw("Cannot run flow without linked_doctype and linked_docname", frappe.ValidationError) @@ -111,6 +120,9 @@ def run(self): # noqa: C901 - best to keep it in one place frappe.db.commit() # nosemgrep try: + if self.is_force_failure_requested: + raise Exception("Workflow was forcefully failed based on user request.") + with redirect_stdout(buffer): result = getattr(reference_doc, self.main_method_name)(*args, **kwargs) diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py index 33b664a94e2..e8f605ff8ff 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py @@ -91,7 +91,7 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac workflow_info = frappe.get_value( "Press Workflow", self.workflow, - ["name", "status", "linked_docname", "linked_doctype"], + ["name", "status", "linked_docname", "linked_doctype", "is_force_failure_requested"], as_dict=True, ) @@ -142,6 +142,10 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac existing_task_signature = reference_doc.current_task_signature try: reference_doc.current_task_signature = self.signature + + if workflow_info.is_force_failure_requested: + raise Exception("Workflow was forcefully failed based on user request.") + with redirect_stdout(buffer): result = getattr(reference_doc, self.method_name)(*args, **kwargs) From 02ea86cc47fb54eb46c93a2653413f5870f91869 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 13:14:44 +0530 Subject: [PATCH 13/22] feat(workflow-kv): Store primitive types as json data To prevent creating too many workflow object --- .../doctype/press_workflow/press_workflow.py | 2 +- .../press_workflow_kv/press_workflow_kv.json | 19 ++- .../press_workflow_kv/press_workflow_kv.py | 23 ++-- press/workflow_engine/test_utils.py | 32 +++++ press/workflow_engine/utils.py | 128 +++++++++++++++++- 5 files changed, 188 insertions(+), 16 deletions(-) diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.py b/press/workflow_engine/doctype/press_workflow/press_workflow.py index efb796f7c49..912f4001a95 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.py +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.py @@ -79,7 +79,7 @@ def on_trash(self): @frappe.whitelist() def force_fail(self): if self.status in ["Success", "Failure", "Fatal"]: - frappe.throw("Cannot force fail a workflow that has already completed.") + frappe.throw("Cannot force fail a workflow that has already completed.") # nosemgrep return frappe.db.set_value(self.doctype, self.name, "is_force_failure_requested", True) diff --git a/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json b/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json index 2d828c74343..57f63a65a2d 100644 --- a/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json +++ b/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json @@ -6,7 +6,8 @@ "engine": "InnoDB", "field_order": [ "key", - "value" + "value", + "type" ], "fields": [ { @@ -18,11 +19,19 @@ "search_index": 1 }, { + "description": "Actual value or link to the object", "fieldname": "value", - "fieldtype": "Link", + "fieldtype": "Data", + "in_list_view": 1, + "label": "Value" + }, + { + "default": "object", + "fieldname": "type", + "fieldtype": "Select", "in_list_view": 1, - "label": "Value", - "options": "Press Workflow Object", + "label": "Type", + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject", "reqd": 1 } ], @@ -30,7 +39,7 @@ "index_web_pages_for_search": 1, "istable": 1, "links": [], - "modified": "2026-03-03 21:15:51.697093", + "modified": "2026-04-24 12:02:00.810732", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow KV", diff --git a/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.py b/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.py index a4d89b28d1b..4dc6dbfc8f1 100644 --- a/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.py +++ b/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.py @@ -7,9 +7,7 @@ import frappe from frappe.model.document import Document -from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( - PressWorkflowObject, -) +from press.workflow_engine.utils import deserialize_value, serialize_and_store_value class KVStoreInterface(abc.ABC): @@ -43,10 +41,16 @@ def set(self, key: str, value: Any, throw_on_error: bool = True): kv_doc.parenttype = self.parent_type kv_doc.key = key - if kv_doc.value: + if kv_doc.value and kv_doc.type == "object": frappe.db.set_value("Press Workflow Object", str(kv_doc.value), "deleted", True) - kv_doc.value = PressWorkflowObject.store(value, throw_on_error=throw_on_error) + value_type, value = serialize_and_store_value(value, throw_on_error=throw_on_error) + if value_type is None: + self.delete(key) + return + + kv_doc.type = value_type + kv_doc.value = value kv_doc.save(ignore_permissions=True) def get(self, key: str) -> Any | None: @@ -54,11 +58,11 @@ def get(self, key: str) -> Any | None: if not kv_name: return None - object_name = frappe.db.get_value("Press Workflow KV", kv_name, "value") - if not object_name: + value, value_type = frappe.db.get_value("Press Workflow KV", kv_name, ["value", "type"]) + if not value: return None - return PressWorkflowObject.get_object(str(object_name)) + return deserialize_value(value_type, value) def delete(self, key: str): kv_name = self._get_kv_record_name(key) @@ -111,5 +115,6 @@ class PressWorkflowKV(Document): parent: DF.Data parentfield: DF.Data parenttype: DF.Data - value: DF.Link + type: DF.Literal["int", "float", "string", "tuple", "list", "dict", "object"] + value: DF.Data | None # end: auto-generated types diff --git a/press/workflow_engine/test_utils.py b/press/workflow_engine/test_utils.py index 7a9e83e47b2..33a3d7ce8b6 100644 --- a/press/workflow_engine/test_utils.py +++ b/press/workflow_engine/test_utils.py @@ -10,9 +10,11 @@ _canonicalize, calculate_duration, called_methods_in_order, + deserialize_value, generate_function_signature, is_func_accept_task_id, method_title, + serialize_and_store_value, ) @@ -144,3 +146,33 @@ def my_func(a, b=2, task_id=None): sig4 = generate_function_signature(my_func, args=(1,), kwargs={"task_id": "123"}) # In this implementation, the payload structure incorporates task_id so the digest will be different. self.assertNotEqual(sig1, sig4) + + def test_serialize_deserialize_json_types(self): + cases = [ + (True, "bool"), + (7, "int"), + (1.5, "float"), + ("value", "string"), + ((1, "a"), "tuple"), + ([1, "a"], "list"), + ({"a": 1}, "dict"), + ] + + for original, expected_type in cases: + with self.subTest(value=original, value_type=expected_type): + value_type, serialized_value = serialize_and_store_value(original) + self.assertEqual(value_type, expected_type) + deserialized_value = deserialize_value(value_type, serialized_value) + self.assertEqual(type(deserialized_value), type(original)) + self.assertEqual(deserialized_value, original) + + def test_serialize_deserialize_exception_as_object(self): + original = ValueError("something went wrong") + value_type, serialized_value = serialize_and_store_value(original) + + self.assertEqual(value_type, "object") + self.assertIsNotNone(serialized_value) + + deserialized = deserialize_value(value_type, serialized_value) + self.assertIsInstance(deserialized, ValueError) + self.assertEqual(str(deserialized), str(original)) diff --git a/press/workflow_engine/utils.py b/press/workflow_engine/utils.py index c97d82048b7..153824a679a 100644 --- a/press/workflow_engine/utils.py +++ b/press/workflow_engine/utils.py @@ -10,7 +10,7 @@ import textwrap from collections.abc import Callable from datetime import datetime -from typing import Any +from typing import Any, Literal from frappe.model.document import Document from frappe.utils import get_datetime @@ -150,3 +150,129 @@ def generate_function_signature(func: Callable, args: tuple, kwargs: dict) -> st blob = json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8") return hashlib.sha256(blob).hexdigest() + + +def _is_serializable_value(value: Any) -> bool: + """Return True if value can round-trip through JSON without loss.""" + if value is None or isinstance(value, bool | str): + return True + if isinstance(value, int): # after bool: bool subclasses int + return True + if isinstance(value, float): + return math.isfinite(value) + if isinstance(value, list | tuple): + return all(_is_serializable_value(v) for v in value) + if isinstance(value, dict): + return all(isinstance(k, str) for k in value) and all( + _is_serializable_value(v) for v in value.values() + ) + return False + + +ValueType = Literal["bool", "int", "float", "string", "tuple", "list", "dict", "object"] + + +def get_type_of_value( + value: Any, +) -> ValueType | None: + if value is None: + return None + + value_type = type(value) + primitive_types: dict[type[Any], ValueType] = { + bool: "bool", + int: "int", + str: "string", + } + primitive_type = primitive_types.get(value_type) + if primitive_type: + return primitive_type + + if value_type is float: + return "float" if math.isfinite(value) else "object" + + container_types: dict[type[Any], ValueType] = { + tuple: "tuple", + list: "list", + dict: "dict", + } + container_type = container_types.get(value_type) + if container_type: + return container_type if _is_serializable_value(value) else "object" + + return "object" + + +def serialize_and_store_value( + value: Any, + throw_on_error: bool = True, +) -> tuple[ValueType | None, str | None]: + """ + Serialize a value to a string for storage, along with its type. + If the value is not JSON-serializable, it will be stored as a PressWorkflowObject and the type will be "object". + """ + + from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import PressWorkflowObject + + value_type = get_type_of_value(value) + if value_type is None: + return None, None + + if value_type == "object": + return value_type, PressWorkflowObject.store(value, throw_on_error=throw_on_error) + + try: + serialized_value = json.dumps(value, sort_keys=True, separators=(",", ":")) + return value_type, serialized_value + except (TypeError, ValueError): + # Fallback to pickling for non-JSON-serializable objects + return "object", PressWorkflowObject.store(value) + + +def deserialize_value( + value_type: ValueType | None, + serialized_value: str | None, +) -> Any: + """ + Deserialize a value from its serialized form based on its type. + + Args: + value_type: The type of the value. + serialized_value: The serialized representation of the value. + + Returns: + The deserialized value. + """ + if value_type is None: + return None + + if value_type == "object": + assert serialized_value is not None + from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( + PressWorkflowObject, + ) + + return PressWorkflowObject.get_object(serialized_value) + + try: + value = json.loads(serialized_value) if serialized_value is not None else None + except (TypeError, ValueError) as e: + raise ValueError(f"Cannot deserialize value of type {value_type!r}") from e + + if value is None: + return None + + value_casters: dict[str, Callable[[Any], Any]] = { + "bool": bool, + "int": int, + "float": float, + "string": str, + "tuple": tuple, + "list": list, + "dict": dict, + } + + try: + return value_casters[value_type](value) + except (KeyError, TypeError, ValueError) as e: + raise ValueError(f"Cannot deserialize value of type {value_type!r}") from e From 72ca62e81d6f5c1cf3364f05b84b199df23b53bb Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 13:52:04 +0530 Subject: [PATCH 14/22] feat(workflow-engine): Store args and kwargs as json if possible --- .../doctype/press_workflow/decorators.py | 13 +++--- .../press_workflow/press_workflow.json | 43 ++++++++++++++----- .../doctype/press_workflow/press_workflow.py | 19 +++++--- .../press_workflow/workflow_builder.py | 9 +++- .../press_workflow_kv/press_workflow_kv.json | 5 ++- .../press_workflow_task.json | 26 ++++++++++- .../press_workflow_task.py | 17 +++++--- 7 files changed, 98 insertions(+), 34 deletions(-) diff --git a/press/workflow_engine/doctype/press_workflow/decorators.py b/press/workflow_engine/doctype/press_workflow/decorators.py index 679eaca1975..c23a7622e83 100644 --- a/press/workflow_engine/doctype/press_workflow/decorators.py +++ b/press/workflow_engine/doctype/press_workflow/decorators.py @@ -11,13 +11,11 @@ from frappe.model.document import Document from press.workflow_engine.doctype.press_workflow.workflow_builder import WorkflowBuilder -from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( - PressWorkflowObject, -) from press.workflow_engine.utils import ( called_methods_in_order, is_func_accept_task_id, method_title, + serialize_and_store_value, ) if typing.TYPE_CHECKING: @@ -169,12 +167,17 @@ def run_as_workflow(self, *args: Any, **kwargs: Any) -> str: seen: set[str] = set() methods = [m for m in methods if not (m[0] in seen or seen.add(m[0]))] # type: ignore[func-returns-value] + args_type, args_value = serialize_and_store_value(args) + kwargs_type, kwargs_value = serialize_and_store_value(kwargs) + return ( frappe.get_doc( { "doctype": "Press Workflow", - "args": PressWorkflowObject.store(args) if args else None, - "kwargs": PressWorkflowObject.store(kwargs) if kwargs else None, + "args": args_value, + "args_type": args_type, + "kwargs": kwargs_value, + "kwargs_type": kwargs_type, "linked_doctype": instance.doctype, # type: ignore "linked_docname": str(instance.name), # type: ignore "main_method_name": self._wrapped.__name__, diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.json b/press/workflow_engine/doctype/press_workflow/press_workflow.json index 0d6b9a10455..6f564454759 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.json +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.json @@ -13,10 +13,12 @@ "linked_docname", "section_break_bicj", "main_method_name", - "main_method_title", - "column_break_ccie", "args", + "args_type", + "column_break_ccie", + "main_method_title", "kwargs", + "kwargs_type", "kv_storage_section", "key_value_store", "section_break_zpgq", @@ -29,6 +31,7 @@ "steps", "section_break_pfpj", "output", + "output_type", "column_break_lhnh", "exception", "callback_section", @@ -96,9 +99,9 @@ }, { "fieldname": "output", - "fieldtype": "Link", + "fieldtype": "Data", "label": "Output", - "options": "Press Workflow Object", + "length": 1000, "read_only": 1 }, { @@ -184,16 +187,16 @@ }, { "fieldname": "args", - "fieldtype": "Link", + "fieldtype": "Data", "label": "Args", - "options": "Press Workflow Object", + "length": 1000, "set_only_once": 1 }, { "fieldname": "kwargs", - "fieldtype": "Link", + "fieldtype": "Data", "label": "Kwargs", - "options": "Press Workflow Object", + "length": 1000, "set_only_once": 1 }, { @@ -286,6 +289,26 @@ "fieldname": "is_force_failure_requested", "fieldtype": "Check", "label": "Force Failure Requested" + }, + { + "fieldname": "args_type", + "fieldtype": "Select", + "label": "Args Type", + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject", + "set_only_once": 1 + }, + { + "fieldname": "kwargs_type", + "fieldtype": "Select", + "label": "Kwargs Type", + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject", + "set_only_once": 1 + }, + { + "fieldname": "output_type", + "fieldtype": "Select", + "label": "Output Type", + "read_only": 1 } ], "grid_page_length": 50, @@ -296,7 +319,7 @@ "link_fieldname": "workflow" } ], - "modified": "2026-04-24 11:33:09.864201", + "modified": "2026-04-24 13:36:34.775783", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow", @@ -339,4 +362,4 @@ "sort_field": "creation", "sort_order": "DESC", "states": [] -} \ No newline at end of file +} diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.py b/press/workflow_engine/doctype/press_workflow/press_workflow.py index 912f4001a95..ffce70a7d5a 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.py +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.py @@ -20,7 +20,7 @@ from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( PressWorkflowObject, ) -from press.workflow_engine.utils import calculate_duration +from press.workflow_engine.utils import calculate_duration, serialize_and_store_value if TYPE_CHECKING: from press.workflow_engine.doctype.press_workflow.workflow_builder import WorkflowBuilder @@ -41,7 +41,8 @@ class PressWorkflow(Document): from press.workflow_engine.doctype.press_workflow_kv.press_workflow_kv import PressWorkflowKV from press.workflow_engine.doctype.press_workflow_step.press_workflow_step import PressWorkflowStep - args: DF.Link | None + args: DF.Data | None + args_type: DF.Literal["int", "float", "string", "tuple", "list", "dict", "object"] callback_next_retry_at: DF.Datetime | None callback_status: DF.Literal["Pending", "Success", "Failure", "Fatal"] callback_traceback: DF.LongText | None @@ -50,14 +51,16 @@ class PressWorkflow(Document): exception: DF.Link | None is_force_failure_requested: DF.Check key_value_store: DF.Table[PressWorkflowKV] - kwargs: DF.Link | None + kwargs: DF.Data | None + kwargs_type: DF.Literal["int", "float", "string", "tuple", "list", "dict", "object"] linked_docname: DF.DynamicLink linked_doctype: DF.Link main_method_name: DF.Data main_method_title: DF.Data max_no_of_callback_attempts: DF.Int no_of_callback_attempts: DF.Int - output: DF.Link | None + output: DF.Data | None + output_type: DF.Literal[None] start: DF.Datetime | None status: DF.Literal["Queued", "Running", "Success", "Failure", "Fatal"] stdout: DF.LongText | None @@ -102,7 +105,8 @@ def run(self): # noqa: C901 - best to keep it in one place self.save() return - output = None + output_value = None + output_type = None exception = None workflow_exception_traceback = None status = "Running" @@ -127,7 +131,7 @@ def run(self): # noqa: C901 - best to keep it in one place result = getattr(reference_doc, self.main_method_name)(*args, **kwargs) if result is not None: - output = PressWorkflowObject.store(result) # type: ignore + output_type, output_value = serialize_and_store_value(result) status = "Success" except PressWorkflowTaskEnqueued: # This is expected when a task is enqueued. @@ -150,7 +154,8 @@ def run(self): # noqa: C901 - best to keep it in one place self.duration = calculate_duration(self.start, self.end) self.status = status - self.output = output + self.output = output_value + self.output_type = output_type self.stdout = (self.stdout or "") + buffer.getvalue() if frappe.flags.in_test and self.stdout: diff --git a/press/workflow_engine/doctype/press_workflow/workflow_builder.py b/press/workflow_engine/doctype/press_workflow/workflow_builder.py index 049a142d38f..edfda65a724 100644 --- a/press/workflow_engine/doctype/press_workflow/workflow_builder.py +++ b/press/workflow_engine/doctype/press_workflow/workflow_builder.py @@ -23,6 +23,7 @@ generate_function_signature, is_func_accept_task_id, method_title, + serialize_and_store_value, ) if TYPE_CHECKING: @@ -86,8 +87,12 @@ def run_task( # noqa: C901 task_doc.method_title = method_title(wrapped) # type: ignore task_doc.signature = signature # type: ignore - task_doc.args = PressWorkflowObject.store(args) if args else None # type: ignore - task_doc.kwargs = PressWorkflowObject.store(kwargs) if kwargs else None # type: ignore + args_type, args_value = serialize_and_store_value(args) + kwargs_type, kwargs_value = serialize_and_store_value(kwargs) + task_doc.args = args_value + task_doc.args_type = args_type + task_doc.kwargs = kwargs_value + task_doc.kwargs_type = kwargs_type task_doc.status = "Queued" # type: ignore task_doc.queue = queue # type: ignore task_doc.timeout = timeout or 0 # type: ignore diff --git a/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json b/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json index 57f63a65a2d..fd7b913a385 100644 --- a/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json +++ b/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json @@ -23,7 +23,8 @@ "fieldname": "value", "fieldtype": "Data", "in_list_view": 1, - "label": "Value" + "label": "Value", + "length": 1000 }, { "default": "object", @@ -39,7 +40,7 @@ "index_web_pages_for_search": 1, "istable": 1, "links": [], - "modified": "2026-04-24 12:02:00.810732", + "modified": "2026-04-24 13:24:11.790831", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow KV", diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json index 463ee0d37f1..3053ea93d5f 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json @@ -14,9 +14,12 @@ "method_name", "method_title", "args", + "args_type", "kwargs", + "kwargs_type", "column_break_fiyw", "output", + "output_type", "exception", "signature", "section_break_jvoo", @@ -182,11 +185,30 @@ "fieldtype": "Long Text", "label": "Traceback", "read_only": 1 + }, + { + "fieldname": "args_type", + "fieldtype": "Select", + "label": "Args Type", + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject" + }, + { + "fieldname": "kwargs_type", + "fieldtype": "Data", + "label": "Kwargs Type", + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject" + }, + { + "fieldname": "output_type", + "fieldtype": "Select", + "label": "Output Type", + "length": 1000, + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject" } ], "grid_page_length": 50, "links": [], - "modified": "2026-04-23 19:21:35.153779", + "modified": "2026-04-24 13:38:30.338341", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow Task", @@ -231,4 +253,4 @@ "sort_field": "creation", "sort_order": "DESC", "states": [] -} \ No newline at end of file +} diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py index e8f605ff8ff..8928b66f5db 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py @@ -16,7 +16,7 @@ from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( PressWorkflowObject, ) -from press.workflow_engine.utils import calculate_duration +from press.workflow_engine.utils import calculate_duration, deserialize_value, serialize_and_store_value if TYPE_CHECKING: from press.workflow_engine.doctype.press_workflow.workflow_builder import WorkflowBuilder @@ -32,13 +32,16 @@ class PressWorkflowTask(Document): from frappe.types import DF args: DF.Link | None + args_type: DF.Literal["int", "float", "string", "tuple", "list", "dict", "object"] duration: DF.Duration | None end: DF.Datetime | None exception: DF.Link | None kwargs: DF.Link | None + kwargs_type: DF.Data | None method_name: DF.Data method_title: DF.Data output: DF.Link | None + output_type: DF.Literal["int", "float", "string", "tuple", "list", "dict", "object"] parent_task: DF.Link | None queue: DF.Data | None signature: DF.Data @@ -103,8 +106,8 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac reference_doc.flags.current_press_workflow_task = self.name try: - args = PressWorkflowObject.get_object(self.args) if self.args else () - kwargs = PressWorkflowObject.get_object(self.kwargs) if self.kwargs else {} + args = deserialize_value(self.args) if self.args else () + kwargs = deserialize_value(self.kwargs) if self.kwargs else {} except Exception as e: self.exception = PressWorkflowObject.store(e, throw_on_error=False) self.status = "Failure" @@ -133,7 +136,8 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac if not frappe.flags.in_test: frappe.db.commit() # nosemgrep - output = None + output_value = None + output_type = None exception = None exception_traceback = None status = "Running" @@ -150,7 +154,7 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac result = getattr(reference_doc, self.method_name)(*args, **kwargs) if result is not None: - output = PressWorkflowObject.store(result) + output_type, output_value = serialize_and_store_value(result) status = "Success" except PressWorkflowTaskEnqueued: @@ -174,7 +178,8 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac self.duration = calculate_duration(self.start, self.end) self.status = status - self.output = output + self.output = output_value + self.output_type = output_type self.exception = exception self.stdout = (self.stdout or "") + buffer.getvalue() self.traceback = exception_traceback or getattr(self, "traceback", None) From 4e064bd6d963a88bfa450eeab51f15351cce3ad9 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 14:10:41 +0530 Subject: [PATCH 15/22] refactor(release-pipeline): Use workflow failure callback --- .../doctype/release_pipeline/release_pipeline.py | 6 +++++- .../press_workflow_task/press_workflow_task.py | 12 +----------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/press/press/doctype/release_pipeline/release_pipeline.py b/press/press/doctype/release_pipeline/release_pipeline.py index fadcec4b479..6b05e2d8b70 100644 --- a/press/press/doctype/release_pipeline/release_pipeline.py +++ b/press/press/doctype/release_pipeline/release_pipeline.py @@ -180,9 +180,10 @@ def update_pipeline_status( "Failure", "Retrying", ], + ignore_permissions: bool = False, ): self.status = status - self.save() + self.save(ignore_permissions=ignore_permissions) if self.status == "Failure": self.send_failure_notification() @@ -625,3 +626,6 @@ def create_release( workflow_status = frappe.db.get_value("Press Workflow", self.workflow, "status") if workflow_status == "Failure": self.update_pipeline_status("Failure") + + def on_workflow_failure(self): + self.update_pipeline_status("Failure", ignore_permissions=True) diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py index 8928b66f5db..e3380a9aa7e 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py @@ -4,7 +4,7 @@ from __future__ import annotations import io -from contextlib import redirect_stdout, suppress +from contextlib import redirect_stdout from typing import TYPE_CHECKING import frappe @@ -78,13 +78,6 @@ def update_tracked_step_status(self): }.get(self.status, "Pending"), ) - def _mark_reference_doc_as_failed(self, reference_doc: WorkflowBuilder): - """In case the link document has a status field try and mark it as failure to reflect the workflow failure.""" - with suppress(Exception): # Try your best but don't fail - if hasattr(reference_doc, "status"): - reference_doc.status = "Failure" - reference_doc.save(ignore_permissions=True) - def run(self): # noqa: C901 - Best to keep workflow execution logic in one place assert self.name, "Task must be saved before it can be run" frappe.get_value( @@ -184,9 +177,6 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac self.stdout = (self.stdout or "") + buffer.getvalue() self.traceback = exception_traceback or getattr(self, "traceback", None) - if self.status == "Failure": - self._mark_reference_doc_as_failed(reference_doc) - if frappe.flags.in_test and self.stdout: print(self.stdout) From ebfba735f4e844a1fc06a61add1ef4db38fc5893 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 14:16:24 +0530 Subject: [PATCH 16/22] fix(press-workflow): In task while deserialize pass the type as well --- press/press/doctype/press_job/press_job.js | 2 +- .../doctype/press_workflow_task/press_workflow_task.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/press/press/doctype/press_job/press_job.js b/press/press/doctype/press_job/press_job.js index 41afd743164..125c8b82525 100644 --- a/press/press/doctype/press_job/press_job.js +++ b/press/press/doctype/press_job/press_job.js @@ -3,7 +3,7 @@ frappe.ui.form.on('Press Job', { refresh: function (frm) { - [[__('Retry'), 'retry', frm.doc.status === 'Failed']].forEach( + [[__('Retry'), 'retry', frm.doc.status === 'Failure']].forEach( ([label, method, condition]) => { if (condition) { frm.add_custom_button( diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py index e3380a9aa7e..eb80263070d 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py @@ -99,8 +99,8 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac reference_doc.flags.current_press_workflow_task = self.name try: - args = deserialize_value(self.args) if self.args else () - kwargs = deserialize_value(self.kwargs) if self.kwargs else {} + args = deserialize_value(self.args_type, self.args) if self.args else () + kwargs = deserialize_value(self.kwargs_type, self.kwargs) if self.kwargs else {} except Exception as e: self.exception = PressWorkflowObject.store(e, throw_on_error=False) self.status = "Failure" From 7d47f5d728820418c87a8de71b3db392d499f22e Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 14:48:43 +0530 Subject: [PATCH 17/22] fix(workflow-engine): Test cases --- .../doctype/press_workflow/press_workflow.py | 16 +- .../doctype/press_workflow/test_decorators.py | 187 +++++++++++ .../doctype/press_workflow/test_exceptions.py | 61 ++++ .../press_workflow/test_press_workflow.py | 136 ++++++++ .../press_workflow/test_workflow_builder.py | 211 +++++++++++++ .../press_workflow/workflow_builder.py | 26 +- .../test_press_workflow_kv.py | 67 +++- .../test_press_workflow_object.py | 42 ++- .../press_workflow_task.json | 14 +- .../press_workflow_task.py | 47 ++- .../test_press_workflow_task.py | 298 +++++++++++++++++- .../test_press_workflow_test.py | 161 +++++++++- press/workflow_engine/test_utils.py | 141 +++++++++ 13 files changed, 1330 insertions(+), 77 deletions(-) create mode 100644 press/workflow_engine/doctype/press_workflow/test_decorators.py create mode 100644 press/workflow_engine/doctype/press_workflow/test_exceptions.py create mode 100644 press/workflow_engine/doctype/press_workflow/test_workflow_builder.py diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.py b/press/workflow_engine/doctype/press_workflow/press_workflow.py index ffce70a7d5a..b6e2cbc64d3 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.py +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.py @@ -20,7 +20,11 @@ from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( PressWorkflowObject, ) -from press.workflow_engine.utils import calculate_duration, serialize_and_store_value +from press.workflow_engine.utils import ( + calculate_duration, + deserialize_value, + serialize_and_store_value, +) if TYPE_CHECKING: from press.workflow_engine.doctype.press_workflow.workflow_builder import WorkflowBuilder @@ -97,8 +101,8 @@ def run(self): # noqa: C901 - best to keep it in one place reference_doc.workflow_name = self.name reference_doc.flags.in_press_workflow_execution = True - args = PressWorkflowObject.get_object(self.args) if self.args else () - kwargs = PressWorkflowObject.get_object(self.kwargs) if self.kwargs else {} + args = deserialize_value(self.args_type, self.args) or () + kwargs = deserialize_value(self.kwargs_type, self.kwargs) or {} except Exception: self.status = "Fatal" self.traceback = frappe.get_traceback() @@ -227,8 +231,8 @@ def execute_callback(self): self.callback_traceback = frappe.get_traceback() else: self.callback_status = "Failure" - self.callback_next_retry_at = frappe.utils.add_minutes( - now_datetime(), 2**self.no_of_callback_attempts + self.callback_next_retry_at = frappe.utils.add_to_date( + minutes=2**self.no_of_callback_attempts ) self.save() @@ -274,7 +278,7 @@ def get_result(self): if self.status == "Success": if self.output: - return PressWorkflowObject.get_object(self.output) + return deserialize_value(self.output_type, self.output) return None if self.status == "Failure": diff --git a/press/workflow_engine/doctype/press_workflow/test_decorators.py b/press/workflow_engine/doctype/press_workflow/test_decorators.py new file mode 100644 index 00000000000..34ab8961e32 --- /dev/null +++ b/press/workflow_engine/doctype/press_workflow/test_decorators.py @@ -0,0 +1,187 @@ +# Copyright (c) 2026, Frappe and Contributors +# See license.txt + +from unittest.mock import patch + +import frappe +from frappe.model.document import Document +from frappe.tests.utils import FrappeTestCase + +from press.utils.test import foreground_enqueue, foreground_enqueue_doc +from press.workflow_engine.doctype.press_workflow.decorators import ( + BoundFlow, + _in_workflow_execution, + flow, + task, +) +from press.workflow_engine.doctype.press_workflow.workflow_builder import WorkflowBuilder + + +@patch("frappe.enqueue_doc", new=foreground_enqueue_doc) +@patch("frappe.enqueue", new=foreground_enqueue) +@patch("frappe.db.commit", new=lambda: None) +class TestDecorators(FrappeTestCase): + def setUp(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + self.doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 3, + "input_b": 2, + } + ).insert() + + def tearDown(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + self.doc.delete() + + def test_in_workflow_execution_true(self): + class TestDoc(WorkflowBuilder): + pass + + instance = TestDoc({"doctype": "Press Workflow Test"}) + instance.name = "test-name" + + def test_in_workflow_execution_false_no_workflow_name(self): + class TestDoc(WorkflowBuilder): + pass + + instance = TestDoc({"doctype": "Press Workflow Test"}) + instance.name = "test-name" + instance.workflow_name = None + instance.flags.in_press_workflow_execution = True + + self.assertFalse(_in_workflow_execution(instance)) + + def test_in_workflow_execution_false_no_flag(self): + class TestDoc(WorkflowBuilder): + pass + + instance = TestDoc({"doctype": "Press Workflow Test"}) + instance.name = "test-name" + instance.workflow_name = "test-workflow" + instance.flags.in_press_workflow_execution = False + + self.assertFalse(_in_workflow_execution(instance)) + + def test_in_workflow_execution_false_not_workflow_builder(self): + class NotWorkflowBuilder: + pass + + instance = NotWorkflowBuilder() + self.assertFalse(_in_workflow_execution(instance)) + + def test_task_decorator_direct_call(self): + class TestDoc(WorkflowBuilder): + @task + def my_task(self): + return "task result" + + instance = TestDoc({"doctype": "Press Workflow Test"}) + result = instance.my_task() + self.assertEqual(result, "task result") + + def test_task_decorator_with_queue_and_timeout(self): + class TestDoc(WorkflowBuilder): + @task(queue="long", timeout=3600) + def my_task(self): + return "task result" + + instance = TestDoc({"doctype": "Press Workflow Test"}) + result = instance.my_task() + self.assertEqual(result, "task result") + + def test_task_with_task_id(self): + class TestDoc(WorkflowBuilder): + @task + def my_task(self, task_id=None): + return f"task_id={task_id}" + + instance = TestDoc({"doctype": "Press Workflow Test"}) + result = instance.my_task.with_task_id("my-id")() + self.assertEqual(result, "task_id=my-id") + + def test_task_with_task_id_in_workflow(self): + wf_name = self.doc.main_with_task_id_passthrough.run_as_workflow() + wf = frappe.get_doc("Press Workflow", wf_name) + wf.run() + + self.assertEqual(wf.status, "Success") + self.assertEqual(wf.get_result(), 9) + + def test_flow_decorator_normal_call(self): + result = self.doc.main_success() + self.assertEqual(result, "success output") + + def test_flow_decorator_run_as_workflow(self): + wf_name = self.doc.main_success.run_as_workflow() + self.assertTrue(wf_name) + self.assertTrue(frappe.db.exists("Press Workflow", wf_name)) + + def test_flow_decorator_with_args(self): + wf_name = self.doc.flow_with_args.run_as_workflow(x=5, y=10) + wf = frappe.get_doc("Press Workflow", wf_name) + wf.run() + + self.assertEqual(wf.status, "Success") + self.assertEqual(wf.get_result(), 15) + + def test_flow_descriptor_on_non_document_raises(self): + with self.assertRaises((TypeError, RuntimeError)): + + class NotADocument: + @flow + def my_flow(self): + pass + + def test_run_as_workflow_on_non_workflow_builder_raises(self): + class TestDoc(Document): + @flow + def my_flow(self): + return "result" + + instance = TestDoc({"doctype": "Press Workflow Test"}) + instance.name = "test" + instance.doctype = "TestDoc" + + bound_flow = instance.my_flow + self.assertIsInstance(bound_flow, BoundFlow) + + with self.assertRaises(TypeError): + bound_flow.run_as_workflow() + + def test_flow_callable_protocol(self): + bound_flow = self.doc.main_success + self.assertTrue(callable(bound_flow)) + self.assertTrue(hasattr(bound_flow, "run_as_workflow")) + + def test_task_descriptor_class_access(self): + class TestDoc(WorkflowBuilder): + @task + def my_task(self): + return "result" + + self.assertTrue(hasattr(TestDoc, "my_task")) + + def test_task_without_task_id_strips_kwarg(self): + class TestDoc(WorkflowBuilder): + @task + def my_task(self): + return "no task_id" + + instance = TestDoc({"doctype": "Press Workflow Test"}) + result = instance.my_task() + self.assertEqual(result, "no task_id") + + def test_flow_creates_workflow_with_steps(self): + wf_name = self.doc.main_with_task.run_as_workflow() + wf = frappe.get_doc("Press Workflow", wf_name) + + self.assertEqual(wf.linked_doctype, "Press Workflow Test") + self.assertEqual(wf.linked_docname, self.doc.name) + self.assertEqual(wf.main_method_name, "main_with_task") + self.assertTrue(len(wf.steps) > 0) diff --git a/press/workflow_engine/doctype/press_workflow/test_exceptions.py b/press/workflow_engine/doctype/press_workflow/test_exceptions.py new file mode 100644 index 00000000000..daa8460c1ed --- /dev/null +++ b/press/workflow_engine/doctype/press_workflow/test_exceptions.py @@ -0,0 +1,61 @@ +# Copyright (c) 2026, Frappe and Contributors +# See license.txt + +from frappe.tests.utils import FrappeTestCase + +from press.workflow_engine.doctype.press_workflow.exceptions import ( + PressWorkflowFailedError, + PressWorkflowFatalError, + PressWorkflowRunningError, + PressWorkflowTaskEnqueued, +) + + +class TestPressWorkflowExceptions(FrappeTestCase): + def test_press_workflow_task_enqueued_with_task_name(self): + exc = PressWorkflowTaskEnqueued("Task is enqueued", "wf-001", "task-001") + self.assertEqual(str(exc), "Task is enqueued") + self.assertEqual(exc.workflow_name, "wf-001") + self.assertEqual(exc.task_name, "task-001") + + def test_press_workflow_task_enqueued_without_task_name(self): + exc = PressWorkflowTaskEnqueued("Task is enqueued", "wf-001") + self.assertEqual(str(exc), "Task is enqueued") + self.assertEqual(exc.workflow_name, "wf-001") + self.assertIsNone(exc.task_name) + + def test_press_workflow_running_error(self): + exc = PressWorkflowRunningError("Workflow wf-001 is currently running") + self.assertEqual(str(exc), "Workflow wf-001 is currently running") + + def test_press_workflow_failed_error(self): + exc = PressWorkflowFailedError("Workflow failed with no exception") + self.assertEqual(str(exc), "Workflow failed with no exception") + + def test_press_workflow_fatal_error_with_traceback(self): + traceback = "Traceback (most recent call last):\n File 'test.py', line 1" + exc = PressWorkflowFatalError("Fatal error occurred", traceback=traceback) + self.assertEqual(str(exc), "Fatal error occurred") + self.assertEqual(exc.traceback, traceback) + + def test_press_workflow_fatal_error_without_traceback(self): + exc = PressWorkflowFatalError("Fatal error occurred") + self.assertEqual(str(exc), "Fatal error occurred") + self.assertIsNone(exc.traceback) + + def test_exceptions_are_subclasses_of_exception(self): + self.assertTrue(issubclass(PressWorkflowTaskEnqueued, Exception)) + self.assertTrue(issubclass(PressWorkflowRunningError, Exception)) + self.assertTrue(issubclass(PressWorkflowFailedError, Exception)) + self.assertTrue(issubclass(PressWorkflowFatalError, Exception)) + + def test_catch_press_workflow_task_enqueued(self): + with self.assertRaises(PressWorkflowTaskEnqueued) as ctx: + raise PressWorkflowTaskEnqueued("Test message", "wf-001", "task-001") + self.assertEqual(ctx.exception.workflow_name, "wf-001") + self.assertEqual(ctx.exception.task_name, "task-001") + + def test_catch_press_workflow_fatal_error(self): + with self.assertRaises(PressWorkflowFatalError) as ctx: + raise PressWorkflowFatalError("Test fatal", traceback="test traceback") + self.assertEqual(ctx.exception.traceback, "test traceback") diff --git a/press/workflow_engine/doctype/press_workflow/test_press_workflow.py b/press/workflow_engine/doctype/press_workflow/test_press_workflow.py index 510c4dc5939..d0768612b98 100644 --- a/press/workflow_engine/doctype/press_workflow/test_press_workflow.py +++ b/press/workflow_engine/doctype/press_workflow/test_press_workflow.py @@ -107,3 +107,139 @@ def test_flow_with_args(self): wf = self.get_wf(self.doc.flow_with_args.run_as_workflow(x=4, y=5)) self.assertEqual(wf.status, "Success") self.assertEqual(wf.get_result(), 9) + + def test_force_fail(self): + with patch( + "press.workflow_engine.doctype.press_workflow.press_workflow.enqueue_workflow", + new=lambda *_args, **_kwargs: None, + ): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + "status": "Queued", + } + ).insert(ignore_permissions=True) + + wf.force_fail() + self.assertTrue(frappe.db.get_value("Press Workflow", wf.name, "is_force_failure_requested")) + + def test_force_fail_already_completed(self): + wf_name = self.doc.main_success.run_as_workflow() + wf = self.get_wf(wf_name) + self.assertEqual(wf.status, "Success") + + with self.assertRaises(frappe.ValidationError): + wf.force_fail() + + def test_on_trash_deletes_tasks(self): + wf_name = self.doc.main_with_task.run_as_workflow() + wf = self.get_wf(wf_name) + self.assertEqual(wf.status, "Success") + + tasks_before = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + self.assertTrue(len(tasks_before) > 0) + + wf.delete() + tasks_after = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + self.assertEqual(len(tasks_after), 0) + + def test_workflow_fatal_status(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + "status": "Fatal", + "traceback": "Test traceback", + } + ).insert(ignore_permissions=True) + + from press.workflow_engine.doctype.press_workflow.exceptions import PressWorkflowFatalError + + with self.assertRaises(PressWorkflowFatalError) as ctx: + wf.get_result() + self.assertIn("fatal error", str(ctx.exception).lower()) + self.assertEqual(ctx.exception.traceback, "Test traceback") + + def test_workflow_queued_running_error(self): + with patch( + "press.workflow_engine.doctype.press_workflow.press_workflow.enqueue_workflow", + new=lambda *_args, **_kwargs: None, + ): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + "status": "Queued", + } + ).insert(ignore_permissions=True) + + from press.workflow_engine.doctype.press_workflow.exceptions import PressWorkflowRunningError + + with self.assertRaises(PressWorkflowRunningError): + wf.get_result() + + wf.reload() + wf.status = "Running" + wf.save() + with self.assertRaises(PressWorkflowRunningError): + wf.get_result() + + def test_workflow_success_with_none_output(self): + wf_name = self.doc.main_success.run_as_workflow() + wf = self.get_wf(wf_name) + self.assertEqual(wf.status, "Success") + result = wf.get_result() + self.assertEqual(result, "success output") + + def test_workflow_with_skipped_steps(self): + wf_name = self.doc.skipped_steps_flow.run_as_workflow() + wf = self.get_wf(wf_name) + self.assertEqual(wf.status, "Success") + + steps = wf.steps + self.assertTrue(len(steps) > 0) + for step in steps: + self.assertEqual(step.status, "Skipped") + + def test_workflow_as_flow_with_multiple_tasks(self): + wf_name = self.doc.main_as_flow.run_as_workflow() + wf = self.get_wf(wf_name) + self.assertEqual(wf.status, "Success") + self.assertEqual(wf.get_result(), "flow done") + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}, pluck="name") + self.assertTrue(len(tasks) >= 2) + + def test_workflow_with_kwargs(self): + wf_name = self.doc.flow_with_args.run_as_workflow(x=10, y=20) + wf = self.get_wf(wf_name) + self.assertEqual(wf.status, "Success") + self.assertEqual(wf.get_result(), 30) + + def test_workflow_failure_with_no_exception(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + "status": "Failure", + } + ).insert(ignore_permissions=True) + + from press.workflow_engine.doctype.press_workflow.exceptions import PressWorkflowFailedError + + with self.assertRaises(PressWorkflowFailedError) as ctx: + wf.get_result() + self.assertIn("no exception was recorded", str(ctx.exception).lower()) diff --git a/press/workflow_engine/doctype/press_workflow/test_workflow_builder.py b/press/workflow_engine/doctype/press_workflow/test_workflow_builder.py new file mode 100644 index 00000000000..83658beb8b1 --- /dev/null +++ b/press/workflow_engine/doctype/press_workflow/test_workflow_builder.py @@ -0,0 +1,211 @@ +# Copyright (c) 2026, Frappe and Contributors +# See license.txt + +from unittest.mock import patch + +import frappe +from frappe.tests.utils import FrappeTestCase + +from press.utils.test import foreground_enqueue, foreground_enqueue_doc +from press.workflow_engine.doctype.press_workflow.exceptions import PressWorkflowTaskEnqueued +from press.workflow_engine.doctype.press_workflow.workflow_builder import ( + ensure_to_resolve_context, +) +from press.workflow_engine.doctype.press_workflow_kv.press_workflow_kv import ( + InMemoryKVStore, + WorkflowKVStore, +) + + +@patch("frappe.enqueue_doc", new=foreground_enqueue_doc) +@patch("frappe.enqueue", new=foreground_enqueue) +@patch("frappe.db.commit", new=lambda: None) +class TestWorkflowBuilder(FrappeTestCase): + def setUp(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + frappe.db.delete("Press Workflow KV") + self.doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 3, + "input_b": 2, + } + ).insert() + + def tearDown(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + frappe.db.delete("Press Workflow KV") + self.doc.delete() + + def test_kv_property_in_memory_default(self): + kv = self.doc.kv + self.assertIsInstance(kv, InMemoryKVStore) + + def test_kv_property_set_and_get_in_memory(self): + self.doc.kv.set("test_key", "test_value") + self.assertEqual(self.doc.kv.get("test_key"), "test_value") + + def test_kv_property_delete_in_memory(self): + self.doc.kv.set("test_key", "test_value") + self.doc.kv.delete("test_key") + self.assertIsNone(self.doc.kv.get("test_key")) + + def test_kv_property_workflow_store(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + } + ).insert(ignore_permissions=True) + + self.doc.workflow_name = wf.name + self.doc.kv_store_type = "workflow_store" + self.doc.kv_store_reference = None + + kv = self.doc.kv + self.assertIsInstance(kv, WorkflowKVStore) + + def test_resolve_context_with_workflow_name(self): + self.doc.workflow_name = "test-workflow-123" + self.doc.resolve_context() + self.assertEqual(self.doc.workflow_name, "test-workflow-123") + + def test_resolve_context_with_frappe_flag(self): + self.addCleanup(lambda: frappe.flags.pop("current_press_workflow", None)) + frappe.flags.current_press_workflow = "test-workflow-from-flag" + + self.doc.workflow_name = None + self.doc.resolve_context() + + self.assertEqual(self.doc.workflow_name, "test-workflow-from-flag") + del frappe.flags.current_press_workflow + + def test_resolve_context_without_workflow(self): + self.doc.workflow_name = None + self.doc.resolve_context() + + self.assertIsNone(self.doc.workflow_name) + self.assertEqual(self.doc.kv_store_type, "in_memory") + + def test_defer_current_task_outside_workflow(self): + self.doc.flags.in_press_workflow_execution = False + self.doc.defer_current_task("Defer this task") + + def test_defer_current_task_inside_workflow(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + } + ).insert(ignore_permissions=True) + + self.doc.workflow_name = wf.name + self.doc.flags.in_press_workflow_execution = True + self.doc.flags.current_press_workflow_task = "task-001" + + with self.assertRaises(PressWorkflowTaskEnqueued) as ctx: + self.doc.defer_current_task("Please defer") + self.assertEqual(ctx.exception.workflow_name, wf.name) + self.assertEqual(ctx.exception.task_name, "task-001") + + def test_defer_current_task_without_task_name(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + } + ).insert(ignore_permissions=True) + + self.doc.workflow_name = wf.name + self.doc.flags.in_press_workflow_execution = True + + with self.assertRaises(PressWorkflowTaskEnqueued) as ctx: + self.doc.defer_current_task() + self.assertEqual(ctx.exception.workflow_name, wf.name) + self.assertIsNone(ctx.exception.task_name) + + def test_ensure_to_resolve_context_decorator(self): + @ensure_to_resolve_context + def my_method(self): + return "resolved" + + result = my_method(self.doc) + self.assertEqual(result, "resolved") + + def test_run_task_returns_cached_result_on_success(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_args_task", + "main_method_title": "Main With Args Task", + "steps": [ + { + "step_title": "Add", + "step_method": "add", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + self.assertEqual(wf.status, "Success") + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + self.assertEqual(len(tasks), 1) + + def test_run_task_raises_exception_on_failure(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_failing_task", + "main_method_title": "Main With Failing Task", + "steps": [ + { + "step_title": "Sample Failing Task", + "step_method": "sample_failing_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + self.assertEqual(wf.status, "Failure") + + def test_workflow_builder_attributes(self): + self.assertIsNone(self.doc.workflow_name) + self.assertIsNone(self.doc.workflow_doc) + self.assertEqual(self.doc.kv_store_type, "in_memory") + self.assertIsNone(self.doc.kv_store_reference) + self.assertIsNone(self.doc.current_task_signature) + + def test_kv_store_type_change_discards_cache(self): + self.doc.kv.set("key1", "value1") + self.doc.kv_store_reference = InMemoryKVStore() + + self.addCleanup(lambda: frappe.flags.pop("current_press_workflow", None)) + frappe.flags.current_press_workflow = "test-wf-for-kv-change" + self.doc.workflow_name = None + self.doc.resolve_context() + + self.assertEqual(self.doc.kv_store_type, "workflow_store") + self.assertIsNone(self.doc.kv_store_reference) + del frappe.flags.current_press_workflow diff --git a/press/workflow_engine/doctype/press_workflow/workflow_builder.py b/press/workflow_engine/doctype/press_workflow/workflow_builder.py index edfda65a724..8ff28f2b850 100644 --- a/press/workflow_engine/doctype/press_workflow/workflow_builder.py +++ b/press/workflow_engine/doctype/press_workflow/workflow_builder.py @@ -17,9 +17,9 @@ from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( ObjectDeserializeError, ObjectPreviousSerializationFailedError, - PressWorkflowObject, ) from press.workflow_engine.utils import ( + deserialize_value, generate_function_signature, is_func_accept_task_id, method_title, @@ -106,19 +106,17 @@ def run_task( # noqa: C901 # Store the reference of the task in workflow doctype # If it's a nested task, ignore it if not task_doc.parent_task and ( - tracked_step := str( - frappe.db.exists( - "Press Workflow Step", - { - "parenttype": "Press Workflow", - "parent": self.workflow_name, - "step_method": wrapped.__name__, - "task": ("is", "not set"), - }, - ) + tracked_step := frappe.db.exists( + "Press Workflow Step", + { + "parenttype": "Press Workflow", + "parent": self.workflow_name, + "step_method": wrapped.__name__, + "task": ("is", "not set"), + }, ) ): - frappe.db.set_value("Press Workflow Step", tracked_step, "task", task_doc.name) + frappe.db.set_value("Press Workflow Step", str(tracked_step), "task", task_doc.name) task_name = task_doc.name assert task_name, "Task must be saved successfully before it can be run" @@ -133,12 +131,12 @@ def run_task( # noqa: C901 task_doc: PressWorkflowTask = frappe.get_doc("Press Workflow Task", task_name) # type: ignore if task_doc.status == "Success": - return PressWorkflowObject.get_object(task_doc.output) if task_doc.output else None + return deserialize_value(task_doc.output_type, task_doc.output) if task_doc.status == "Failure": if task_doc.exception: try: - exc = PressWorkflowObject.get_object(task_doc.exception) + exc = deserialize_value("object", task_doc.exception) except ObjectPreviousSerializationFailedError as e: raise RuntimeError( f"Task '{task_doc.method_title}' failed. Original exception could not be " diff --git a/press/workflow_engine/doctype/press_workflow_kv/test_press_workflow_kv.py b/press/workflow_engine/doctype/press_workflow_kv/test_press_workflow_kv.py index 3af2845a30b..ddd7a4aed60 100644 --- a/press/workflow_engine/doctype/press_workflow_kv/test_press_workflow_kv.py +++ b/press/workflow_engine/doctype/press_workflow_kv/test_press_workflow_kv.py @@ -36,17 +36,22 @@ def test_workflow_kv_store_set_and_get(self): def test_workflow_kv_store_update(self): self.store.set("test_key", "initial_value") initial_kv_name = self.store._get_kv_record_name("test_key") - initial_obj_name = frappe.db.get_value("Press Workflow KV", initial_kv_name, "value") + initial_type, initial_value = frappe.db.get_value( + "Press Workflow KV", initial_kv_name, ["type", "value"] + ) self.store.set("test_key", "updated_value") updated_kv_name = self.store._get_kv_record_name("test_key") - updated_obj_name = frappe.db.get_value("Press Workflow KV", updated_kv_name, "value") + _, updated_value = frappe.db.get_value("Press Workflow KV", updated_kv_name, ["type", "value"]) self.assertEqual(initial_kv_name, updated_kv_name) - self.assertNotEqual(initial_obj_name, updated_obj_name) + self.assertNotEqual(initial_value, updated_value) - is_deleted = frappe.db.get_value("Press Workflow Object", initial_obj_name, "deleted") - self.assertTrue(is_deleted) + # For JSON-serializable values, no Press Workflow Object is created. + # Only verify object deletion tracking when type is "object". + if initial_type == "object": + is_deleted = frappe.db.get_value("Press Workflow Object", initial_value, "deleted") + self.assertTrue(is_deleted) value = self.store.get("test_key") self.assertEqual(value, "updated_value") @@ -54,16 +59,62 @@ def test_workflow_kv_store_update(self): def test_workflow_kv_store_delete(self): self.store.set("test_key", "to_be_deleted") kv_name = self.store._get_kv_record_name("test_key") - obj_name = frappe.db.get_value("Press Workflow KV", kv_name, "value") + obj_type, obj_name = frappe.db.get_value("Press Workflow KV", kv_name, ["type", "value"]) self.store.delete("test_key") self.assertFalse(frappe.db.exists("Press Workflow KV", kv_name)) - is_deleted = frappe.db.get_value("Press Workflow Object", obj_name, "deleted") - self.assertTrue(is_deleted) + # Only Press Workflow Object documents are marked as deleted. + # JSON-serializable values are stored directly in the KV record. + if obj_type == "object": + is_deleted = frappe.db.get_value("Press Workflow Object", obj_name, "deleted") + self.assertTrue(is_deleted) self.assertIsNone(self.store.get("test_key")) def test_workflow_kv_store_get_nonexistent(self): self.assertIsNone(self.store.get("nonexistent_key")) + + def test_in_memory_kv_store_multiple_keys(self): + store = InMemoryKVStore() + store.set("key1", "value1") + store.set("key2", "value2") + store.set("key3", "value3") + + self.assertEqual(store.get("key1"), "value1") + self.assertEqual(store.get("key2"), "value2") + self.assertEqual(store.get("key3"), "value3") + + def test_in_memory_kv_store_overwrite(self): + store = InMemoryKVStore() + store.set("key", "initial") + store.set("key", "updated") + + self.assertEqual(store.get("key"), "updated") + + def test_in_memory_kv_store_delete_nonexistent(self): + store = InMemoryKVStore() + store.delete("nonexistent") + self.assertIsNone(store.get("nonexistent")) + + def test_workflow_kv_store_with_none_value(self): + self.store.set("null_key", None) + self.assertIsNone(self.store.get("null_key")) + + def test_workflow_kv_store_with_complex_value(self): + value = {"nested": {"data": [1, 2, 3]}, "list": ["a", "b", "c"]} + self.store.set("complex_key", value) + retrieved = self.store.get("complex_key") + self.assertEqual(retrieved, value) + + def test_workflow_kv_store_multiple_keys(self): + self.store.set("key1", "value1") + self.store.set("key2", "value2") + + self.assertEqual(self.store.get("key1"), "value1") + self.assertEqual(self.store.get("key2"), "value2") + + def test_workflow_kv_store_delete_nonexistent(self): + self.store.delete("nonexistent_key") + self.assertIsNone(self.store.get("nonexistent_key")) diff --git a/press/workflow_engine/doctype/press_workflow_object/test_press_workflow_object.py b/press/workflow_engine/doctype/press_workflow_object/test_press_workflow_object.py index f1ef91662e6..b59a170cae5 100644 --- a/press/workflow_engine/doctype/press_workflow_object/test_press_workflow_object.py +++ b/press/workflow_engine/doctype/press_workflow_object/test_press_workflow_object.py @@ -13,8 +13,8 @@ # On IntegrationTestCase, the doctype test records and all # link-field test record dependencies are recursively loaded # Use these module variables to add/remove to/from that list -EXTRA_TEST_RECORD_DEPENDENCIES = [] # eg. ["User"] -IGNORE_TEST_RECORD_DEPENDENCIES = [] # eg. ["User"] +EXTRA_TEST_RECORD_DEPENDENCIES: list[str] = [] # eg. ["User"] +IGNORE_TEST_RECORD_DEPENDENCIES: list[str] = [] # eg. ["User"] class MyCustomClass: @@ -78,3 +78,41 @@ def test_get_summary(self): summary = PressWorkflowObject.get_summary(doc_name) self.assertEqual(summary, str(obj)) + + def test_get_summary_nonexistent(self): + with self.assertRaises(frappe.DoesNotExistError): + PressWorkflowObject.get_summary("nonexistent-doc-name") + + def test_get_object_nonexistent(self): + with self.assertRaises(frappe.DoesNotExistError): + PressWorkflowObject.get_object("nonexistent-doc-name") + + def test_store_and_get_none_value(self): + doc_name = PressWorkflowObject.store(None) + self.assertTrue(doc_name) + retrieved = PressWorkflowObject.get_object(doc_name) + self.assertIsNone(retrieved) + + def test_store_and_get_complex_nested_object(self): + obj = { + "list_of_dicts": [{"a": 1}, {"b": 2}], + "dict_of_lists": {"x": [1, 2], "y": [3, 4]}, + "nested": {"deep": {"deeper": {"value": 42}}}, + } + doc_name = PressWorkflowObject.store(obj) + retrieved = PressWorkflowObject.get_object(doc_name) + self.assertEqual(retrieved, obj) + + def test_delete_trashed_objects(self): + from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( + delete_trashed_objects, + ) + + obj = {"key": "value"} + doc_name = PressWorkflowObject.store(obj) + + frappe.db.set_value("Press Workflow Object", doc_name, "deleted", True) + + delete_trashed_objects() + + self.assertFalse(frappe.db.exists("Press Workflow Object", doc_name)) diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json index 3053ea93d5f..8a188322945 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json @@ -131,9 +131,9 @@ }, { "fieldname": "output", - "fieldtype": "Link", + "fieldtype": "Data", "label": "Output", - "options": "Press Workflow Object", + "length": 1000, "read_only": 1 }, { @@ -146,16 +146,16 @@ }, { "fieldname": "args", - "fieldtype": "Link", + "fieldtype": "Data", "label": "Args", - "options": "Press Workflow Object", + "length": 1000, "set_only_once": 1 }, { "fieldname": "kwargs", - "fieldtype": "Link", + "fieldtype": "Data", "label": "Kwargs", - "options": "Press Workflow Object", + "length": 1000, "set_only_once": 1 }, { @@ -208,7 +208,7 @@ ], "grid_page_length": 50, "links": [], - "modified": "2026-04-24 13:38:30.338341", + "modified": "2026-04-24 14:46:19.016442", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow Task", diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py index eb80263070d..a8ec354713f 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py @@ -31,16 +31,16 @@ class PressWorkflowTask(Document): if TYPE_CHECKING: from frappe.types import DF - args: DF.Link | None + args: DF.Data | None args_type: DF.Literal["int", "float", "string", "tuple", "list", "dict", "object"] duration: DF.Duration | None end: DF.Datetime | None exception: DF.Link | None - kwargs: DF.Link | None + kwargs: DF.Data | None kwargs_type: DF.Data | None method_name: DF.Data method_title: DF.Data - output: DF.Link | None + output: DF.Data | None output_type: DF.Literal["int", "float", "string", "tuple", "list", "dict", "object"] parent_task: DF.Link | None queue: DF.Data | None @@ -63,20 +63,39 @@ def update_tracked_step_status(self): if self.is_new(): return + if self.flags.in_insert: + # Called from run_post_save_methods() after insert, where the in-memory status + # is still the original "Queued" but the task may have already run synchronously + return + if not self.has_value_changed("status"): return - frappe.db.set_value( - "Press Workflow Step", - {"task": self.name}, - "status", - { - "Queued": "Pending", - "Running": "Running", - "Success": "Success", - "Failure": "Failure", - }.get(self.status, "Pending"), - ) + new_status = { + "Queued": "Pending", + "Running": "Running", + "Success": "Success", + "Failure": "Failure", + }.get(self.status, "Pending") + + # Primary lookup: find the step already linked to this task. + step_name = frappe.db.get_value("Press Workflow Step", {"task": self.name}, "name") + + if not step_name: + # Fallback: the step may not yet be linked (e.g. in synchronous test execution + # where after_insert runs the task before run_task sets the step.task reference). + step_name = frappe.db.get_value( + "Press Workflow Step", + { + "parenttype": "Press Workflow", + "parent": self.workflow, + "step_method": self.method_name, + }, + "name", + ) + + if step_name: + frappe.db.set_value("Press Workflow Step", step_name, "status", new_status) def run(self): # noqa: C901 - Best to keep workflow execution logic in one place assert self.name, "Task must be saved before it can be run" diff --git a/press/workflow_engine/doctype/press_workflow_task/test_press_workflow_task.py b/press/workflow_engine/doctype/press_workflow_task/test_press_workflow_task.py index a73f1d47daf..a924f12af17 100644 --- a/press/workflow_engine/doctype/press_workflow_task/test_press_workflow_task.py +++ b/press/workflow_engine/doctype/press_workflow_task/test_press_workflow_task.py @@ -1,20 +1,292 @@ # Copyright (c) 2026, Frappe and Contributors # See license.txt -# import frappe -from frappe.tests import IntegrationTestCase +from unittest.mock import patch -# On IntegrationTestCase, the doctype test records and all -# link-field test record dependencies are recursively loaded -# Use these module variables to add/remove to/from that list -EXTRA_TEST_RECORD_DEPENDENCIES = [] # eg. ["User"] -IGNORE_TEST_RECORD_DEPENDENCIES = [] # eg. ["User"] +import frappe +from frappe.tests.utils import FrappeTestCase +from press.utils.test import foreground_enqueue, foreground_enqueue_doc -class IntegrationTestPressWorkflowTask(IntegrationTestCase): - """ - Integration tests for PressWorkflowTask. - Use this class for testing interactions between multiple components. - """ - pass +@patch("frappe.enqueue_doc", new=foreground_enqueue_doc) +@patch("frappe.enqueue", new=foreground_enqueue) +@patch("frappe.db.commit", new=lambda: None) +class TestPressWorkflowTask(FrappeTestCase): + def setUp(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + self.doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 3, + "input_b": 2, + } + ).insert() + + def tearDown(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + self.doc.delete() + + def test_task_after_insert_enqueues(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_task", + "main_method_title": "Main With Task", + "steps": [ + { + "step_title": "Sample Task", + "step_method": "sample_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + self.assertEqual(wf.status, "Success") + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + self.assertEqual(len(tasks), 1) + + task = frappe.get_doc("Press Workflow Task", tasks[0].name) + self.assertEqual(task.status, "Success") + self.assertEqual(task.method_name, "sample_task") + + def test_task_update_tracked_step_status(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_task", + "main_method_title": "Main With Task", + "steps": [ + { + "step_title": "Sample Task", + "step_method": "sample_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + step = frappe.get_doc("Press Workflow Step", {"parent": wf.name}) + self.assertEqual(step.status, "Success") + + def test_task_failure_status(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_failing_task", + "main_method_title": "Main With Failing Task", + "steps": [ + { + "step_title": "Sample Failing Task", + "step_method": "sample_failing_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + self.assertEqual(wf.status, "Failure") + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + self.assertEqual(len(tasks), 1) + + task = frappe.get_doc("Press Workflow Task", tasks[0].name) + self.assertEqual(task.status, "Failure") + self.assertIsNotNone(task.exception) + + def test_task_with_args_and_kwargs(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_args_task", + "main_method_title": "Main With Args Task", + "steps": [ + { + "step_title": "Add", + "step_method": "add", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + self.assertEqual(wf.status, "Success") + self.assertEqual(wf.get_result(), 5) + + def test_task_with_nested_task(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_nested_task", + "main_method_title": "Main With Nested Task", + "steps": [ + { + "step_title": "Sample Nested Task", + "step_method": "sample_nested_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + self.assertEqual(wf.status, "Success") + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}, pluck="name") + self.assertTrue(len(tasks) >= 2) + + child_task = frappe.get_doc("Press Workflow Task", tasks[0]) + if child_task.method_name == "sample_nested_task": + self.assertIsNotNone(child_task.parent_task) + + def test_task_resume_workflow_on_success(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_task", + "main_method_title": "Main With Task", + "steps": [ + { + "step_title": "Sample Task", + "step_method": "sample_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + self.assertEqual(wf.status, "Success") + + def test_task_signature_deduplication(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_args_task", + "main_method_title": "Main With Args Task", + "steps": [ + { + "step_title": "Add", + "step_method": "add", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + self.assertEqual(len(tasks), 1) + + task = frappe.get_doc("Press Workflow Task", tasks[0].name) + self.assertIsNotNone(task.signature) + + def test_task_with_queue_and_timeout(self): + wf_name = ( + frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + "steps": [], + } + ) + .insert(ignore_permissions=True) + .name + ) + + with patch( + "press.workflow_engine.doctype.press_workflow_task.press_workflow_task.enqueue_task", + return_value=None, + ): + task_doc = frappe.new_doc("Press Workflow Task") + task_doc.workflow = wf_name + task_doc.method_name = "sample_task" + task_doc.method_title = "Sample Task" + task_doc.signature = "test-signature" + task_doc.args_type = "tuple" + task_doc.args = "[]" + task_doc.kwargs_type = "dict" + task_doc.kwargs = "{}" + task_doc.status = "Queued" + task_doc.queue = "long" + task_doc.timeout = 600 + task_doc.insert(ignore_permissions=True) + + self.assertEqual(task_doc.queue, "long") + self.assertEqual(task_doc.timeout, 600) + + def test_task_stdout_capture(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_noisy_task", + "main_method_title": "Main With Noisy Task", + "steps": [ + { + "step_title": "Noisy Task", + "step_method": "noisy_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + self.assertEqual(len(tasks), 1) + + task = frappe.get_doc("Press Workflow Task", tasks[0].name) + self.assertIn("hello from noisy_task", task.stdout or "") + + def test_task_duration_calculation(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_task", + "main_method_title": "Main With Task", + "steps": [ + { + "step_title": "Sample Task", + "step_method": "sample_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + task = frappe.get_doc("Press Workflow Task", tasks[0].name) + + self.assertIsNotNone(task.start) + self.assertIsNotNone(task.end) + self.assertIsNotNone(task.duration) diff --git a/press/workflow_engine/doctype/press_workflow_test/test_press_workflow_test.py b/press/workflow_engine/doctype/press_workflow_test/test_press_workflow_test.py index 9f9e22f354e..b28a34cd145 100644 --- a/press/workflow_engine/doctype/press_workflow_test/test_press_workflow_test.py +++ b/press/workflow_engine/doctype/press_workflow_test/test_press_workflow_test.py @@ -1,20 +1,155 @@ # Copyright (c) 2026, Frappe and Contributors # See license.txt -# import frappe -from frappe.tests import IntegrationTestCase +from unittest.mock import patch -# On IntegrationTestCase, the doctype test records and all -# link-field test record dependencies are recursively loaded -# Use these module variables to add/remove to/from that list -EXTRA_TEST_RECORD_DEPENDENCIES = [] # eg. ["User"] -IGNORE_TEST_RECORD_DEPENDENCIES = [] # eg. ["User"] +import frappe +from frappe.tests.utils import FrappeTestCase +from press.utils.test import foreground_enqueue, foreground_enqueue_doc -class IntegrationTestPressWorkflowTest(IntegrationTestCase): - """ - Integration tests for PressWorkflowTest. - Use this class for testing interactions between multiple components. - """ - pass +@patch("frappe.enqueue_doc", new=foreground_enqueue_doc) +@patch("frappe.enqueue", new=foreground_enqueue) +@patch("frappe.db.commit", new=lambda: None) +class TestPressWorkflowTestDoctype(FrappeTestCase): + def setUp(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + + def tearDown(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + + def test_create_workflow_test_doc(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 10, + "input_b": 5, + } + ).insert() + + self.assertEqual(doc.input_a, 10) + self.assertEqual(doc.input_b, 5) + doc.delete() + + def test_workflow_test_sample_task(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 1, + "input_b": 2, + } + ).insert() + + result = doc.sample_task() + self.assertEqual(result, "task done") + doc.delete() + + def test_workflow_test_sample_failing_task(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 1, + "input_b": 2, + } + ).insert() + + with self.assertRaises(ValueError) as ctx: + doc.sample_failing_task() + self.assertIn("task failed", str(ctx.exception)) + doc.delete() + + def test_workflow_test_add_task(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 10, + "input_b": 20, + } + ).insert() + + result = doc.add(10, 20) + self.assertEqual(result, 30) + doc.delete() + + def test_workflow_test_multiply_task(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 3, + "input_b": 4, + } + ).insert() + + result = doc.multiply(3, 4) + self.assertEqual(result, 12) + doc.delete() + + def test_workflow_test_power_task(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 2, + "input_b": 3, + } + ).insert() + + result = doc.power(2, 3) + self.assertEqual(result, 8) + doc.delete() + + def test_workflow_test_noisy_task(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 1, + "input_b": 2, + } + ).insert() + + result = doc.noisy_task() + self.assertEqual(result, "done") + doc.delete() + + def test_workflow_test_main_success_flow(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 1, + "input_b": 2, + } + ).insert() + + result = doc.main_success() + self.assertEqual(result, "success output") + doc.delete() + + def test_workflow_test_main_fail_flow(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 1, + "input_b": 2, + } + ).insert() + + with self.assertRaises(ValueError): + doc.main_fail() + doc.delete() + + def test_workflow_test_skipped_steps_flow(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 1, + "input_b": 2, + } + ).insert() + + result = doc.skipped_steps_flow() + self.assertEqual(result, "skipped") + doc.delete() diff --git a/press/workflow_engine/test_utils.py b/press/workflow_engine/test_utils.py index 33a3d7ce8b6..63ea43eb4df 100644 --- a/press/workflow_engine/test_utils.py +++ b/press/workflow_engine/test_utils.py @@ -12,6 +12,7 @@ called_methods_in_order, deserialize_value, generate_function_signature, + get_type_of_value, is_func_accept_task_id, method_title, serialize_and_store_value, @@ -176,3 +177,143 @@ def test_serialize_deserialize_exception_as_object(self): deserialized = deserialize_value(value_type, serialized_value) self.assertIsInstance(deserialized, ValueError) self.assertEqual(str(deserialized), str(original)) + + def test_get_type_of_value_none(self): + self.assertIsNone(get_type_of_value(None)) + + def test_get_type_of_value_bool(self): + self.assertEqual(get_type_of_value(True), "bool") + self.assertEqual(get_type_of_value(False), "bool") + + def test_get_type_of_value_int(self): + self.assertEqual(get_type_of_value(0), "int") + self.assertEqual(get_type_of_value(-100), "int") + self.assertEqual(get_type_of_value(999999), "int") + + def test_get_type_of_value_float_finite(self): + self.assertEqual(get_type_of_value(1.5), "float") + self.assertEqual(get_type_of_value(0.0), "float") + + def test_get_type_of_value_float_infinite(self): + self.assertEqual(get_type_of_value(float("inf")), "object") + self.assertEqual(get_type_of_value(float("-inf")), "object") + self.assertEqual(get_type_of_value(float("nan")), "object") + + def test_get_type_of_value_string(self): + self.assertEqual(get_type_of_value(""), "string") + self.assertEqual(get_type_of_value("hello"), "string") + + def test_get_type_of_value_tuple_serializable(self): + self.assertEqual(get_type_of_value((1, 2, 3)), "tuple") + self.assertEqual(get_type_of_value(("a", "b")), "tuple") + + def test_get_type_of_value_tuple_non_serializable(self): + self.assertEqual(get_type_of_value((float("inf"),)), "object") + + def test_get_type_of_value_list_serializable(self): + self.assertEqual(get_type_of_value([1, 2, 3]), "list") + + def test_get_type_of_value_list_non_serializable(self): + self.assertEqual(get_type_of_value([float("inf")]), "object") + + def test_get_type_of_value_dict_serializable(self): + self.assertEqual(get_type_of_value({"a": 1, "b": 2}), "dict") + + def test_get_type_of_value_dict_non_serializable(self): + self.assertEqual(get_type_of_value({"a": float("inf")}), "object") + + def test_get_type_of_value_custom_object(self): + obj = DummyDataclass(a=1, b="test") + self.assertEqual(get_type_of_value(obj), "object") + + def test_serialize_and_store_value_none(self): + value_type, serialized_value = serialize_and_store_value(None) + self.assertIsNone(value_type) + self.assertIsNone(serialized_value) + + def test_serialize_and_store_value_object(self): + obj = DummyDataclass(a=1, b="test") + value_type, serialized_value = serialize_and_store_value(obj) + self.assertEqual(value_type, "object") + self.assertIsNotNone(serialized_value) + + def test_deserialize_value_none_type(self): + self.assertIsNone(deserialize_value(None, None)) + + def test_deserialize_value_invalid_json(self): + with self.assertRaises(ValueError): + deserialize_value("dict", "not valid json") + + def test_deserialize_value_unsupported_type(self): + with self.assertRaises(ValueError): + deserialize_value("unsupported", "value") + + def test_canonicalize_frozenset(self): + result = _canonicalize(frozenset([1, 2, 3])) + self.assertEqual(result["__type__"], "frozenset") + self.assertEqual(sorted(result["values"]), [1, 2, 3]) + + def test_generate_function_signature_with_self(self): + class MyClass: + def my_method(self, a, b): + pass + + sig = generate_function_signature(MyClass.my_method, args=(1, 2), kwargs={}) + self.assertIsInstance(sig, str) + self.assertTrue(len(sig) > 0) + + def test_generate_function_signature_different_args(self): + def my_func(a, b): + pass + + sig1 = generate_function_signature(my_func, args=(1, 2), kwargs={}) + sig2 = generate_function_signature(my_func, args=(3, 4), kwargs={}) + self.assertNotEqual(sig1, sig2) + + def test_is_func_accept_task_id_with_kwargs(self): + def func_with_kwargs(**kwargs): + pass + + self.assertFalse(is_func_accept_task_id(func_with_kwargs)) + + def test_is_func_accept_task_id_with_variadic(self): + def func_with_variadic(*args, **kwargs): + pass + + self.assertFalse(is_func_accept_task_id(func_with_variadic)) + + def test_method_title_with_multiline_docstring(self): + def func(): + """First line + Second line + Third line + """ + pass + + self.assertEqual(method_title(func), "First line") + + def test_method_title_with_underscores(self): + def my_function_name(): + pass + + self.assertEqual(method_title(my_function_name), "My Function Name") + + def test_called_methods_in_order_with_method_name(self): + calls = called_methods_in_order(DummyClassForCallVisitor, "method_three") + self.assertEqual(len(calls), 2) + self.assertEqual(calls[0][0], "method_one") + self.assertEqual(calls[1][0], "method_two") + + def test_serialize_deserialize_empty_collections(self): + cases = [ + ([], "list"), + ((), "tuple"), + ({}, "dict"), + ] + + for original, expected_type in cases: + with self.subTest(value=original, value_type=expected_type): + value_type, serialized_value = serialize_and_store_value(original) + self.assertEqual(value_type, expected_type) + deserialized_value = deserialize_value(value_type, serialized_value) + self.assertEqual(deserialized_value, original) From 9c61826af9ddbd63604aaf2fe6a712ae43d57835 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 18:49:12 +0530 Subject: [PATCH 18/22] fix(release-pipeline): In on_workflow_failure add 2nd args --- press/press/doctype/release_pipeline/release_pipeline.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/press/press/doctype/release_pipeline/release_pipeline.py b/press/press/doctype/release_pipeline/release_pipeline.py index 1c67e777703..7839f652921 100644 --- a/press/press/doctype/release_pipeline/release_pipeline.py +++ b/press/press/doctype/release_pipeline/release_pipeline.py @@ -180,7 +180,6 @@ def update_pipeline_status( "Failure", "Retrying", ], - ignore_permissions: bool = False, ): # If the workflow doc touches this for any reason # Document native methods would raise a `TimeStampMismatch` error @@ -624,9 +623,5 @@ def create_release( # Just in case, make sure that we mark the pipeline as failed and notify the frontend to stop listening for deploy updates self.update_pipeline_status("Failure") - workflow_status = frappe.db.get_value("Press Workflow", self.workflow, "status") - if workflow_status == "Failure": - self.update_pipeline_status("Failure") - - def on_workflow_failure(self): - self.update_pipeline_status("Failure", ignore_permissions=True) + def on_workflow_failure(self, *args, **kwargs): + self.update_pipeline_status("Failure") From d9ee6895d9665843f033e723d2f97d868f320218 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 18:55:37 +0530 Subject: [PATCH 19/22] chore(workflow-engine): Make traceback fields read only --- .../doctype/press_workflow/press_workflow.json | 8 +++++--- .../press_workflow_task/press_workflow_task.json | 11 +++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.json b/press/workflow_engine/doctype/press_workflow/press_workflow.json index 6f564454759..d52bb95423e 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.json +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.json @@ -263,7 +263,8 @@ { "fieldname": "callback_traceback", "fieldtype": "Long Text", - "label": "Callback Traceback" + "label": "Callback Traceback", + "read_only": 1 }, { "default": "Pending", @@ -282,7 +283,8 @@ { "fieldname": "workflow_traceback", "fieldtype": "Long Text", - "label": "Workflow Traceback" + "label": "Workflow Traceback", + "read_only": 1 }, { "default": "0", @@ -319,7 +321,7 @@ "link_fieldname": "workflow" } ], - "modified": "2026-04-24 13:36:34.775783", + "modified": "2026-04-24 18:53:20.041521", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow", diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json index 8a188322945..22789dae75e 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json @@ -190,25 +190,28 @@ "fieldname": "args_type", "fieldtype": "Select", "label": "Args Type", - "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject" + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject", + "read_only": 1 }, { "fieldname": "kwargs_type", "fieldtype": "Data", "label": "Kwargs Type", - "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject" + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject", + "read_only": 1 }, { "fieldname": "output_type", "fieldtype": "Select", "label": "Output Type", "length": 1000, - "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject" + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject", + "read_only": 1 } ], "grid_page_length": 50, "links": [], - "modified": "2026-04-24 14:46:19.016442", + "modified": "2026-04-24 18:52:37.880235", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow Task", From 80c4ed210f23b035f19fa77f0f14d1c35b21cbf2 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 18:55:37 +0530 Subject: [PATCH 20/22] chore(workflow-engine): Make traceback fields read only --- .../doctype/press_workflow/press_workflow.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.json b/press/workflow_engine/doctype/press_workflow/press_workflow.json index d52bb95423e..8e3ebb043b3 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.json +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.json @@ -290,7 +290,8 @@ "default": "0", "fieldname": "is_force_failure_requested", "fieldtype": "Check", - "label": "Force Failure Requested" + "label": "Force Failure Requested", + "read_only": 1 }, { "fieldname": "args_type", @@ -321,7 +322,7 @@ "link_fieldname": "workflow" } ], - "modified": "2026-04-24 18:53:20.041521", + "modified": "2026-04-24 19:15:31.340535", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow", From 14e4ce71c24d3ab28bf4b24871b785647f429361 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 19:21:24 +0530 Subject: [PATCH 21/22] fix(workflow-engine): Load workflow_doc lazily --- .../doctype/press_workflow/workflow_builder.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/press/workflow_engine/doctype/press_workflow/workflow_builder.py b/press/workflow_engine/doctype/press_workflow/workflow_builder.py index 8ff28f2b850..d3a803de1fe 100644 --- a/press/workflow_engine/doctype/press_workflow/workflow_builder.py +++ b/press/workflow_engine/doctype/press_workflow/workflow_builder.py @@ -47,11 +47,21 @@ def wrapper(self: "WorkflowBuilder", *args, **kwargs): class WorkflowBuilder(Document): workflow_name: str | None = None - workflow_doc = None + _workflow_doc_cache: "PressWorkflow | None" = None kv_store_type: Literal["in_memory", "workflow_store"] = "in_memory" kv_store_reference: KVStoreInterface | None = None current_task_signature: str | None = None + @property + def workflow_doc(self) -> "PressWorkflow | None": + if self._workflow_doc_cache is None and self.workflow_name: + self._workflow_doc_cache = frappe.get_doc("Press Workflow", self.workflow_name) # type: ignore + return self._workflow_doc_cache + + @workflow_doc.setter + def workflow_doc(self, value: "PressWorkflow | None") -> None: + self._workflow_doc_cache = value + @ensure_to_resolve_context def run_task( # noqa: C901 self, @@ -184,7 +194,7 @@ def resolve_context(self) -> None: current_workflow = getattr(frappe.flags, "current_press_workflow", None) if current_workflow: self.workflow_name = str(current_workflow) - self.workflow_doc: PressWorkflow = frappe.get_doc("Press Workflow", self.workflow_name) # type: ignore + # workflow_doc will be loaded lazily on first access if self.kv_store_type != "workflow_store": # Store type is changing — discard any cached in-memory store. self.kv_store_type = "workflow_store" From af6aa4631a45fd49476ec9cfe224456db8e526bd Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 20:01:16 +0530 Subject: [PATCH 22/22] feat(press-job): Expand job step details --- press/press/doctype/press_job/press_job.py | 28 +++++++++++++++++++--- press/press/doctype/server/server.py | 10 ++++---- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/press/press/doctype/press_job/press_job.py b/press/press/doctype/press_job/press_job.py index c5156796de2..4a69ade1b51 100644 --- a/press/press/doctype/press_job/press_job.py +++ b/press/press/doctype/press_job/press_job.py @@ -127,12 +127,34 @@ def virtual_machine_doc(self) -> VirtualMachine | None: @property def steps(self) -> list[dict[str, str]]: try: - workflow = frappe.get_last_doc("Press Workflow", {"linked_docname": self.name}) + workflow: PressWorkflow = frappe.get_last_doc("Press Workflow", {"linked_docname": self.name}) + tasks = frappe.get_all( + "Press Workflow Task", + filters={"workflow": workflow.name}, + fields=[ + "name", + "method_title", + "status", + "stdout", + "creation", + "start", + "end", + "duration", + ], + ) + # Convert to a dict with task name as key for easy lookup + task_dict = {task.name: task for task in tasks} return [ { - "method": step.step_method, - "title": step.step_title, + "name": step.name, + "step_name": step.step_title, # backward compatibility + "step_title": step.step_title, "status": step.status, + "result": task_dict.get(step.task, {}).get("stdout", ""), + "traceback": task_dict.get(step.task, {}).get("traceback", ""), + "start": task_dict.get(step.task, {}).get("start"), + "end": task_dict.get(step.task, {}).get("end"), + "duration": task_dict.get(step.task, {}).get("duration"), } for step in workflow.steps ] diff --git a/press/press/doctype/server/server.py b/press/press/doctype/server/server.py index e6a28aca99c..578a26e2723 100644 --- a/press/press/doctype/server/server.py +++ b/press/press/doctype/server/server.py @@ -3,6 +3,7 @@ from __future__ import annotations +import contextlib import datetime import ipaddress import json @@ -3451,12 +3452,9 @@ def generate_on_prem_failover_config(self): running_press_job = next((job for job in press_jobs if job.status in ("Pending", "Running")), None) if press_jobs: for press_job in press_jobs: - press_job["steps"] = frappe.get_all( - "Press Job Step", - filters={"job": press_job.name}, - fields=["name", "step_name", "status", "result", "traceback", "start", "end", "duration"], - order_by="creation asc", - ) + press_job["steps"] = [] + with contextlib.suppress(frappe.DoesNotExistError): + press_job["steps"] = frappe.get_doc("Press Job", press_job.name).steps return { "running_press_job_type": running_press_job.job_type if running_press_job else None,