From 6504a7e807a3d795300bde4b99fa0aaa13a730b9 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Thu, 16 Apr 2026 22:28:53 +0530 Subject: [PATCH 01/22] feat(workflow-engine): Add support for callback hooks --- press/hooks.py | 3 +- .../doctype/press_workflow/decorators.py | 2 +- .../press_workflow/press_workflow.json | 70 +++++++++- .../doctype/press_workflow/press_workflow.py | 125 +++++++++++++++++- 4 files changed, 189 insertions(+), 11 deletions(-) diff --git a/press/hooks.py b/press/hooks.py index b6a214b3259..61041d0d501 100644 --- a/press/hooks.py +++ b/press/hooks.py @@ -260,7 +260,6 @@ "press.press.doctype.invoice.invoice.finalize_draft_invoices", "press.press.doctype.invoice.invoice.finalize_razorpay_mandate_invoices", "press.press.doctype.agent_job.agent_job.fail_old_jobs", - "press.press.doctype.press_job.press_job.fail_stuck_press_jobs", "press.press.doctype.site_update.site_update.mark_stuck_updates_as_fatal", "press.press.doctype.deploy_candidate_build.deploy_candidate_build.cleanup_build_directories", "press.press.doctype.deploy_candidate_build.deploy_candidate_build.check_builds_status", @@ -310,7 +309,6 @@ "press.press.doctype.press_webhook_log.press_webhook_log.process", "press.press.doctype.telegram_message.telegram_message.send_telegram_message", "press.press.doctype.agent_update.agent_update.process_bulk_agent_update", - "press.press.doctype.press_job.press_job.process_failed_callbacks", "press.press.doctype.server_snapshot_recovery.server_snapshot_recovery.resume_warmed_up_restorations", "press.press.doctype.server_snapshot.server_snapshot.move_pending_snapshots_to_processing", "press.press.doctype.bench.bench.process_bench_queue", @@ -350,6 +348,7 @@ "press.press.doctype.app.app.poll_new_releases", "press.utils.jobs.alert_on_zombie_rq_jobs", "press.saas.doctype.product_trial.product_trial.replenish_standby_sites", + "press.workflow_engine.doctype.press_workflow.press_workflow.retry_workflow_callbacks", ], "* * * * *": [ "press.press.doctype.virtual_disk_snapshot.virtual_disk_snapshot.sync_physical_backup_snapshots", diff --git a/press/workflow_engine/doctype/press_workflow/decorators.py b/press/workflow_engine/doctype/press_workflow/decorators.py index cd6122359a7..679eaca1975 100644 --- a/press/workflow_engine/doctype/press_workflow/decorators.py +++ b/press/workflow_engine/doctype/press_workflow/decorators.py @@ -176,7 +176,7 @@ def run_as_workflow(self, *args: Any, **kwargs: Any) -> str: "args": PressWorkflowObject.store(args) if args else None, "kwargs": PressWorkflowObject.store(kwargs) if kwargs else None, "linked_doctype": instance.doctype, # type: ignore - "linked_docname": instance.name, # type: ignore + "linked_docname": str(instance.name), # type: ignore "main_method_name": self._wrapped.__name__, "main_method_title": method_title(self._wrapped), "steps": [ diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.json b/press/workflow_engine/doctype/press_workflow/press_workflow.json index e3503645a35..9267e6adefe 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.json +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.json @@ -30,9 +30,18 @@ "output", "column_break_lhnh", "exception", + "callback_section", + "max_no_of_callback_attempts", + "column_break_amxx", + "callback_status", + "column_break_gvim", + "no_of_callback_attempts", + "column_break_gteb", + "callback_next_retry_at", "section_break_xglm", "stdout", - "traceback" + "traceback", + "callback_traceback" ], "fields": [ { @@ -98,6 +107,7 @@ "read_only": 1 }, { + "depends_on": "eval: doc.key_value_store.length > 0", "fieldname": "kv_storage_section", "fieldtype": "Section Break", "label": "KV Storage" @@ -207,6 +217,62 @@ "label": "Steps", "options": "Press Workflow Step", "read_only": 1 + }, + { + "fieldname": "callback_section", + "fieldtype": "Section Break", + "label": "Callback" + }, + { + "default": "5", + "fieldname": "max_no_of_callback_attempts", + "fieldtype": "Int", + "label": "Maximum Attempts", + "reqd": 1, + "set_only_once": 1 + }, + { + "fieldname": "column_break_amxx", + "fieldtype": "Column Break" + }, + { + "default": "0", + "fieldname": "no_of_callback_attempts", + "fieldtype": "Int", + "label": "Attempts", + "non_negative": 1, + "read_only": 1, + "reqd": 1 + }, + { + "fieldname": "column_break_gvim", + "fieldtype": "Column Break" + }, + { + "fieldname": "callback_next_retry_at", + "fieldtype": "Datetime", + "label": "Next Retry At", + "read_only": 1, + "search_index": 1 + }, + { + "fieldname": "callback_traceback", + "fieldtype": "Long Text", + "label": "Callback Traceback" + }, + { + "default": "Pending", + "fieldname": "callback_status", + "fieldtype": "Select", + "label": "Status", + "options": "Pending\nSuccess\nFailure\nFatal", + "read_only": 1, + "reqd": 1, + "search_index": 1 + }, + { + "fieldname": "column_break_gteb", + "fieldtype": "Column Break" } ], "grid_page_length": 50, @@ -217,7 +283,7 @@ "link_fieldname": "workflow" } ], - "modified": "2026-03-11 00:51:08.486677", + "modified": "2026-04-16 22:25:25.102297", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow", diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.py b/press/workflow_engine/doctype/press_workflow/press_workflow.py index 25ec0d8a493..3c10461e41f 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.py +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.py @@ -38,14 +38,13 @@ class PressWorkflow(Document): if TYPE_CHECKING: from frappe.types import DF - from press.workflow_engine.doctype.press_workflow_kv.press_workflow_kv import ( - PressWorkflowKV, - ) - from press.workflow_engine.doctype.press_workflow_step.press_workflow_step import ( - PressWorkflowStep, - ) + from press.workflow_engine.doctype.press_workflow_kv.press_workflow_kv import PressWorkflowKV + from press.workflow_engine.doctype.press_workflow_step.press_workflow_step import PressWorkflowStep args: DF.Link | None + callback_next_retry_at: DF.Datetime | None + callback_status: DF.Literal["Pending", "Success", "Failure", "Fatal"] + callback_traceback: DF.LongText | None duration: DF.Duration | None end: DF.Datetime | None exception: DF.Link | None @@ -55,6 +54,8 @@ class PressWorkflow(Document): linked_doctype: DF.Link main_method_name: DF.Data main_method_title: DF.Data + max_no_of_callback_attempts: DF.Int + no_of_callback_attempts: DF.Int output: DF.Link | None start: DF.Datetime | None status: DF.Literal["Queued", "Running", "Success", "Failure", "Fatal"] @@ -63,9 +64,16 @@ class PressWorkflow(Document): traceback: DF.LongText | None # end: auto-generated types + def before_save(self): + if self.linked_docname: + self.linked_docname = str(self.linked_docname) + def after_insert(self): enqueue_workflow(self.name) # type: ignore + def on_trash(self): + frappe.db.delete("Press Workflow Task", {"workflow": self.name}) + def run(self): # noqa: C901 - best to keep it in one place if not self.linked_doctype or not self.linked_docname: frappe.throw("Cannot run flow without linked_doctype and linked_docname", frappe.ValidationError) @@ -138,6 +146,75 @@ def run(self): # noqa: C901 - best to keep it in one place self.update_skipped_steps_status(save=False) self.save() + self.execute_callback_in_background() + + def execute_callback_in_background(self): + frappe.enqueue_doc( + self.doctype, + self.name, + method="execute_callback", + queue="default", + timeout=300, + deduplicate=True, + enqueue_after_commit=True, + job_id=f"press_workflow||{self.name}||execute_callback", + ) + + def execute_callback(self): + """ + If the workflow reached it's termination state, execute callback + - on_workflow_success(doc:PressWorkflow) if status is Success + - on_workflow_failure(doc:PressWorkflow) if status is Failure + """ + + if self.status not in ["Success", "Failure"]: + return + + if not frappe.db.exists(self.linked_doctype, self.linked_docname): + return + + reference_doc: WorkflowBuilder = frappe.get_doc(self.linked_doctype, self.linked_docname) # type: ignore + callback_method = { + "Success": "on_workflow_success", + "Failure": "on_workflow_failure", + }[self.status] + + if not hasattr(reference_doc, callback_method): + self.callback_status = "Success" + self.save() + return + + try: + getattr(reference_doc, callback_method)(self) + self.callback_status = "Success" + self.save() + except Exception as e: + frappe.log_error( + f"Error executing workflow callback {callback_method}", + message=str(e), + reference_doctype=self.linked_doctype, + reference_name=self.linked_docname, + ) + + self.no_of_callback_attempts += 1 + if self.no_of_callback_attempts >= self.max_no_of_callback_attempts: + self.callback_status = "Fatal" + self.callback_traceback = frappe.get_traceback() + else: + self.callback_status = "Failure" + self.callback_next_retry_at = frappe.utils.add_minutes( + now_datetime(), 2**self.no_of_callback_attempts + ) + + self.save() + + if self.callback_status == "Fatal": + frappe.log_error( + f"Workflow {self.name} has reached max callback retry attempts and is marked as Fatal", + reference_doctype="Press Workflow", + reference_name=self.name, + ) + def update_skipped_steps_status(self, save: bool = True): # noqa: C901 - best to keep it in one place is_updated = False @@ -225,3 +302,39 @@ def retry_workflows(): reference_doctype="Press Workflow", reference_name=workflow_name, ) + + +def retry_workflow_callbacks(): + workflows = frappe.get_all( + "Press Workflow", + filters={ + "callback_status": "Failure", + "callback_next_retry_at": ("<=", now_datetime()), + }, + pluck="name", + order_by="modified asc", + ) + + # Include workflows with no callback_next_retry_at_set + # and in Pending or Failure state + workflows += frappe.get_all( + "Press Workflow", + filters={ + "callback_status": ("in", ["Pending", "Failure"]), + "callback_next_retry_at": None, + }, + pluck="name", + order_by="modified asc", + ) + + for workflow_name in workflows: + try: + workflow: PressWorkflow = frappe.get_doc("Press Workflow", workflow_name) + workflow.execute_callback_in_background() + except Exception as e: + frappe.log_error( + "Error retrying workflow callback", + message=str(e), + reference_doctype="Press Workflow", + reference_name=workflow_name, + ) From 612da86d8fe0d98ee1db866fcbadbb769ebbf8c1 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Thu, 16 Apr 2026 22:38:46 +0530 Subject: [PATCH 02/22] feat(press-job): Integrate Workflow Engine and create wrapper --- .../press_job/jobs/reset_swap_on_server.py | 31 ++ press/press/doctype/press_job/press_job.json | 95 +----- press/press/doctype/press_job/press_job.py | 283 +++++------------- press/press/doctype/server/server.py | 3 +- 4 files changed, 122 insertions(+), 290 deletions(-) create mode 100644 press/press/doctype/press_job/jobs/reset_swap_on_server.py diff --git a/press/press/doctype/press_job/jobs/reset_swap_on_server.py b/press/press/doctype/press_job/jobs/reset_swap_on_server.py new file mode 100644 index 00000000000..fb444f1a0ce --- /dev/null +++ b/press/press/doctype/press_job/jobs/reset_swap_on_server.py @@ -0,0 +1,31 @@ +from contextlib import suppress + +import frappe + +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class ResetSwapOnServerJob(PressJob): + @flow + def execute(self): + if self.status == "Pending": + self.status = "Running" + self.save() + + with suppress(Exception): + self.send_telegram_notification() + + self.reset_swap() + + @task + def send_telegram_notification(self): + telegram_message = frappe.get_doc("Press Settings").telegram_message + telegram_message.enqueue( + f"Resetting swap on [{self.server}]({frappe.utils.get_url_to_form(self.server_type, self.server)})", + "Information", + ) + + @task(queue="long", timeout=1200) + def reset_swap(self): + self.server_doc.reset_swap(now=True) diff --git a/press/press/doctype/press_job/press_job.json b/press/press/doctype/press_job/press_job.json index ffcf4c982d2..57e58006a72 100644 --- a/press/press/doctype/press_job/press_job.json +++ b/press/press/doctype/press_job/press_job.json @@ -15,17 +15,8 @@ "section_break_7", "server_type", "server", - "virtual_machine", - "column_break_11", - "arguments", - "callback_section", - "callback_executed", - "callback_failed", - "callback_retry_limit_reached", - "callback_failure_issue_resolved", - "column_break_zynz", - "callback_failure_count", - "next_callback_retry_at" + "column_break_fhyz", + "virtual_machine" ], "fields": [ { @@ -35,9 +26,9 @@ "in_standard_filter": 1, "label": "Job Type", "options": "Press Job Type", - "read_only": 1, "reqd": 1, - "search_index": 1 + "search_index": 1, + "set_only_once": 1 }, { "fieldname": "status", @@ -50,14 +41,6 @@ "reqd": 1, "search_index": 1 }, - { - "default": "{}", - "fieldname": "arguments", - "fieldtype": "Code", - "label": "Arguments", - "read_only": 1, - "reqd": 1 - }, { "fieldname": "column_break_3", "fieldtype": "Column Break" @@ -91,8 +74,8 @@ "fieldtype": "Link", "label": "Server Type", "options": "DocType", - "read_only": 1, - "search_index": 1 + "search_index": 1, + "set_only_once": 1 }, { "fieldname": "server", @@ -101,8 +84,8 @@ "in_standard_filter": 1, "label": "Server", "options": "server_type", - "read_only": 1, - "search_index": 1 + "search_index": 1, + "set_only_once": 1 }, { "fetch_if_empty": 1, @@ -112,74 +95,26 @@ "in_standard_filter": 1, "label": "Virtual Machine", "options": "Virtual Machine", - "read_only": 1 + "set_only_once": 1 }, { - "fieldname": "column_break_11", + "fieldname": "column_break_fhyz", "fieldtype": "Column Break" - }, - { - "default": "0", - "fieldname": "callback_failure_count", - "fieldtype": "Int", - "label": "Callback Failure Count", - "read_only": 1, - "reqd": 1 - }, - { - "default": "0", - "fieldname": "callback_failed", - "fieldtype": "Check", - "label": "Callback Failed", - "read_only": 1 - }, - { - "default": "0", - "fieldname": "callback_retry_limit_reached", - "fieldtype": "Check", - "label": "Callback Retry Limit Reached", - "read_only": 1 - }, - { - "fieldname": "next_callback_retry_at", - "fieldtype": "Datetime", - "label": "Next Callback Retry At", - "read_only": 1 - }, - { - "fieldname": "callback_section", - "fieldtype": "Section Break", - "label": "Callback" - }, - { - "default": "0", - "fieldname": "callback_failure_issue_resolved", - "fieldtype": "Check", - "label": "Callback Failure Issue Resolved", - "read_only": 1 - }, - { - "fieldname": "column_break_zynz", - "fieldtype": "Column Break" - }, - { - "default": "0", - "fieldname": "callback_executed", - "fieldtype": "Check", - "label": "Callback Executed", - "read_only": 1 } ], "grid_page_length": 50, - "in_create": 1, "index_web_pages_for_search": 1, "links": [ { "link_doctype": "Press Job Step", "link_fieldname": "job" + }, + { + "link_doctype": "Press Workflow", + "link_fieldname": "linked_docname" } ], - "modified": "2026-03-17 19:26:46.940966", + "modified": "2026-04-16 22:33:03.958588", "modified_by": "Administrator", "module": "Press", "name": "Press Job", diff --git a/press/press/doctype/press_job/press_job.py b/press/press/doctype/press_job/press_job.py index 0336ed0faa9..f019bbd417d 100644 --- a/press/press/doctype/press_job/press_job.py +++ b/press/press/doctype/press_job/press_job.py @@ -1,16 +1,36 @@ +from __future__ import annotations + # Copyright (c) 2022, Frappe and contributors # For license information, please see license.txt - -import json +from typing import TYPE_CHECKING import frappe -from frappe.model.document import Document -from frappe.utils import add_days, add_to_date +from frappe.utils import now_datetime + +from press.workflow_engine.doctype.press_workflow.workflow_builder import WorkflowBuilder + +if TYPE_CHECKING: + from press.press.doctype.database_server.database_server import DatabaseServer + from press.press.doctype.server.server import Server + from press.press.doctype.virtual_machine.virtual_machine import VirtualMachine + from press.workflow_engine.doctype.press_workflow.press_workflow import PressWorkflow + +_JOBS_REGISTRY: dict[str, type] = {} + -from press.press.doctype.press_job_step.press_job_step import safe_exec +def _init_jobs_registry() -> None: + global _JOBS_REGISTRY + if _JOBS_REGISTRY: + return + from press.press.doctype.press_job.jobs.reset_swap_on_server import ResetSwapOnServerJob -class PressJob(Document): + _JOBS_REGISTRY = { + "Reset Swap": ResetSwapOnServerJob, + } + + +class PressJob(WorkflowBuilder): # begin: auto-generated types # This code is auto-generated. Do not modify anything in this block. @@ -19,17 +39,10 @@ class PressJob(Document): if TYPE_CHECKING: from frappe.types import DF - arguments: DF.Code - callback_executed: DF.Check - callback_failed: DF.Check - callback_failure_count: DF.Int - callback_failure_issue_resolved: DF.Check - callback_retry_limit_reached: DF.Check duration: DF.Duration | None end: DF.Datetime | None job_type: DF.Link name: DF.Int | None - next_callback_retry_at: DF.Datetime | None server: DF.DynamicLink | None server_type: DF.Link | None start: DF.Datetime | None @@ -37,6 +50,23 @@ class PressJob(Document): virtual_machine: DF.Link | None # end: auto-generated types + @property + def server_doc(self) -> "Server | DatabaseServer": + if hasattr(self, "_server_doc") and self._server_doc: # type: ignore + return self._server_doc # type: ignore + self._server_doc = frappe.get_doc(self.server_type, self.server) + return self._server_doc + + @property + def virtual_machine_doc(self) -> VirtualMachine | None: + if not self.virtual_machine: + return None + + if hasattr(self, "_virtual_machine_doc") and self._virtual_machine_doc: # type: ignore + return self._virtual_machine_doc # type: ignore + self._virtual_machine_doc = frappe.get_doc("Virtual Machine", self.virtual_machine) + return self._virtual_machine_doc # type: ignore + def before_insert(self): frappe.db.get_value(self.server_type, self.server, "status", for_update=True) if existing_jobs := frappe.db.get_all( @@ -53,212 +83,47 @@ def before_insert(self): ) def after_insert(self): - self.create_press_job_steps() - self.execute() + self.start_workflow() def on_update(self): if self.has_value_changed("status"): - self.process_callback(save=True) - - def on_change(self): - self.publish_update() + save = False + if self.status == "Running" and not self.start: + self.start = now_datetime() + save = True + + if self.status in ["Success", "Failure"]: + if not self.start: + self.start = now_datetime() + if not self.end: + self.end = now_datetime() + save = True - def create_press_job_steps(self): - job_type = frappe.get_doc("Press Job Type", self.job_type) - for step in job_type.steps: - doc = frappe.get_doc( - { - "doctype": "Press Job Step", - "job": self.name, - "status": "Pending", - "job_type": self.job_type, - "step_name": step.step_name, - "wait_until_true": step.wait_until_true, - } - ) - doc.insert() + if save: + self.save() - def execute(self): - self.status = "Running" - self.start = frappe.utils.now_datetime() - self.save() - self.next() + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.workflow_name = self.job_type + _init_jobs_registry() + if self.job_type in _JOBS_REGISTRY: + self.__class__ = _JOBS_REGISTRY[self.job_type] - def fail(self, arguments=None): - self.status = "Failure" - pending_steps = frappe.get_all("Press Job Step", {"job": self.name, "status": "Pending"}) - for step in pending_steps: - frappe.db.set_value("Press Job Step", step.name, "status", "Skipped") - self.end = frappe.utils.now_datetime() - self.duration = (self.end - self.start).total_seconds() - self.save() + def start_workflow(self) -> str: + if not hasattr(self, "execute"): + raise NotImplementedError("Press Job implementation must have an execute method") + return self.execute.run_as_workflow() - def succeed(self): + def on_workflow_success(self, workflow: "PressWorkflow"): self.status = "Success" - self.end = frappe.utils.now_datetime() - self.duration = (self.end - self.start).total_seconds() - self.save() - - @frappe.whitelist() - def next(self, arguments=None): - if arguments: - old_arguments = json.loads(self.arguments) - old_arguments.update(arguments) - self.arguments = json.dumps(old_arguments, indent=2) - self.status = "Running" self.save() - next_step = self.next_step - - if not next_step: - self.succeed() - return - - frappe.enqueue_doc("Press Job Step", next_step, "execute", enqueue_after_commit=True) - @frappe.whitelist() - def force_continue(self): - for step in frappe.get_all( - "Press Job Step", - {"job": self.name, "status": ("in", ("Failure", "Skipped"))}, - pluck="name", - ): - frappe.db.set_value("Press Job Step", step, "status", "Pending") - self.next() - - @frappe.whitelist() - def force_fail(self): - for step in frappe.get_all( - "Press Job Step", - {"job": self.name, "status": "Pending"}, - pluck="name", - ): - frappe.db.set_value("Press Job Step", step, "status", "Failure") - frappe.db.set_value("Press Job", self.name, "status", "Failure") - - @property - def next_step(self): - return frappe.db.get_value( - "Press Job Step", - {"job": self.name, "status": "Pending"}, - "name", - order_by="name asc", - as_dict=True, - ) - - def detail(self): - steps = frappe.get_all( - "Press Job Step", - filters={"job": self.name}, - fields=["name", "step_name", "status", "start", "end", "duration"], - order_by="name asc", - ) + if hasattr(self, "on_press_job_success"): + self.on_press_job_success(workflow) - for index, step in enumerate(steps): - if step.status == "Pending" and index and steps[index - 1].status == "Success": - step.status = "Running" - - return { - "name": self.name, - "job_type": self.job_type, - "server": self.server, - "server_type": self.server_type, - "virtual_machine": self.virtual_machine, - "status": self.status, - "steps": steps, - } - - def publish_update(self): - frappe.publish_realtime( - "press_job_update", doctype=self.doctype, docname=self.name, message=self.detail() - ) - - @frappe.whitelist() - def mark_callback_failure_issue_resolved(self): - self.callback_failure_issue_resolved = True + def on_workflow_failure(self, workflow: "PressWorkflow"): + self.status = "Failure" self.save() - def process_callback(self, save: bool = False): # noqa: C901 - if self.status not in ["Success", "Failure"]: - return - - if self.callback_executed or self.callback_failure_issue_resolved: - return - - job_type = frappe.db.get_value( - "Press Job Type", self.job_type, ["callback_script", "callback_max_retry"], as_dict=True - ) - if not job_type.callback_script: - self.callback_executed = True - if save: - self.save() - # No callback script defined, so just mark as executed - return - - if self.callback_failed and self.callback_failure_count >= (job_type.callback_max_retry or 0): - self.callback_retry_limit_reached = True - self.next_callback_retry_at = None - if save: - self.save() - return - - local = {"arguments": frappe._dict(json.loads(self.arguments)), "doc": self} - current_user = frappe.session.user - try: - frappe.set_user("Administrator") - safe_exec(job_type.callback_script, _locals=local) - self.callback_failed = False - self.callback_executed = True - self.next_callback_retry_at = None - self.callback_failure_issue_resolved = False - except Exception: - frappe.log_error(f"Error executing callback script for {self.name}") - self.callback_failed = True - self.callback_failure_count += 1 - self.next_callback_retry_at = add_to_date(None, minutes=5) - finally: - frappe.set_user(current_user) - - if save: - self.save() - - def on_trash(self): - frappe.db.delete("Press Job Step", {"job": self.name}) - - -def fail_stuck_press_jobs(): - jobs = frappe.get_all( - "Press Job", - filters={ - "status": ("in", ["Running", "Pending"]), - "creation": ("<", add_days(None, -1)), - }, - pluck="name", - limit=100, - ) - for job_name in jobs: - job = PressJob("Press Job", job_name) - job.force_fail() - frappe.db.commit() - - -def process_failed_callbacks(): - jobs = frappe.get_all( - "Press Job", - filters={ - "status": ("in", ["Success", "Failure"]), - "callback_failed": True, - "callback_executed": False, - "callback_failure_issue_resolved": False, - "callback_retry_limit_reached": False, - "next_callback_retry_at": ("<", frappe.utils.now_datetime()), - }, - pluck="name", - ) - for job_name in jobs: - frappe.enqueue_doc( - "Press Job", - job_name, - "process_callback", - enqueue_after_commit=True, - save=True, - ) + if hasattr(self, "on_press_job_failure"): + self.on_press_job_failure(workflow) diff --git a/press/press/doctype/server/server.py b/press/press/doctype/server/server.py index e38e4e0edda..384e2ca7ea8 100644 --- a/press/press/doctype/server/server.py +++ b/press/press/doctype/server/server.py @@ -1529,7 +1529,7 @@ def increase_swap_locked(self, swap_size=4): self._increase_swap(swap_size) @frappe.whitelist() - def reset_swap(self, swap_size=1): + def reset_swap(self, swap_size=1, now: bool = False): """ Replace existing swap files with new swap file of given size """ @@ -1540,6 +1540,7 @@ def reset_swap(self, swap_size=1): queue="long", timeout=1200, **{"swap_size": swap_size}, + now=now, ) def reset_swap_locked(self, swap_size=1): From 5cf1ced6d44149a0a5e5693c995318334862773c Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Thu, 16 Apr 2026 23:29:02 +0530 Subject: [PATCH 03/22] feat(press-job): Move all press job types to code --- press/fixtures/press_job_type.json | 85 ----- .../database_server/database_server.py | 23 +- .../doctype/press_job/jobs/archive_server.py | 63 ++++ .../doctype/press_job/jobs/attach_volume.py | 17 + .../jobs/auto_scale_application_server.py | 16 + .../auto_scale_down_application_server.py | 19 + .../jobs/auto_scale_up_application_server.py | 18 + .../doctype/press_job/jobs/create_server.py | 350 ++++++++++++++++++ .../press_job/jobs/create_server_snapshot.py | 68 ++++ .../press_job/jobs/increase_disk_size.py | 106 ++++++ .../doctype/press_job/jobs/increase_swap.py | 27 ++ .../press_job/jobs/prune_docker_system.py | 27 ++ .../press_job/jobs/prune_mirror_registry.py | 15 + .../press_job/jobs/remove_on_prem_failover.py | 58 +++ .../press_job/jobs/reset_swap_on_server.py | 4 - .../doctype/press_job/jobs/resize_server.py | 101 +++++ .../jobs/resume_services_after_snapshot.py | 47 +++ .../press_job/jobs/setup_on_prem_failover.py | 100 +++++ .../doctype/press_job/jobs/snapshot_disk.py | 106 ++++++ .../press_job/jobs/stop_and_start_server.py | 50 +++ .../jobs/trigger_build_server_cleanup.py | 18 + .../doctype/press_job/jobs/upgrade_mariadb.py | 27 ++ .../press/doctype/press_job/jobs/warn_disk.py | 13 + press/press/doctype/press_job/press_job.json | 13 +- press/press/doctype/press_job/press_job.py | 66 +++- .../registry_server/registry_server.py | 11 +- press/press/doctype/server/server.py | 43 ++- .../tls_certificate/tls_certificate.py | 6 +- .../doctype/press_workflow/exceptions.py | 4 +- .../press_workflow/workflow_builder.py | 14 + 30 files changed, 1398 insertions(+), 117 deletions(-) create mode 100644 press/press/doctype/press_job/jobs/archive_server.py create mode 100644 press/press/doctype/press_job/jobs/attach_volume.py create mode 100644 press/press/doctype/press_job/jobs/auto_scale_application_server.py create mode 100644 press/press/doctype/press_job/jobs/auto_scale_down_application_server.py create mode 100644 press/press/doctype/press_job/jobs/auto_scale_up_application_server.py create mode 100644 press/press/doctype/press_job/jobs/create_server.py create mode 100644 press/press/doctype/press_job/jobs/create_server_snapshot.py create mode 100644 press/press/doctype/press_job/jobs/increase_disk_size.py create mode 100644 press/press/doctype/press_job/jobs/increase_swap.py create mode 100644 press/press/doctype/press_job/jobs/prune_docker_system.py create mode 100644 press/press/doctype/press_job/jobs/prune_mirror_registry.py create mode 100644 press/press/doctype/press_job/jobs/remove_on_prem_failover.py create mode 100644 press/press/doctype/press_job/jobs/resize_server.py create mode 100644 press/press/doctype/press_job/jobs/resume_services_after_snapshot.py create mode 100644 press/press/doctype/press_job/jobs/setup_on_prem_failover.py create mode 100644 press/press/doctype/press_job/jobs/snapshot_disk.py create mode 100644 press/press/doctype/press_job/jobs/stop_and_start_server.py create mode 100644 press/press/doctype/press_job/jobs/trigger_build_server_cleanup.py create mode 100644 press/press/doctype/press_job/jobs/upgrade_mariadb.py create mode 100644 press/press/doctype/press_job/jobs/warn_disk.py diff --git a/press/fixtures/press_job_type.json b/press/fixtures/press_job_type.json index db909307562..5da8cd90430 100644 --- a/press/fixtures/press_job_type.json +++ b/press/fixtures/press_job_type.json @@ -329,91 +329,6 @@ } ] }, - { - "callback_max_retry": 0, - "callback_script": null, - "docstatus": 0, - "doctype": "Press Job Type", - "modified": "2025-08-31 20:54:46.857348", - "name": "Create Server (old)", - "steps": [ - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.provision()\n", - "step_name": "Create Server", - "wait_until_true": 0 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.sync()\nresult = (machine.status == \"Running\", False)\n", - "step_name": "Wait for Server to start", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.ping_ansible()\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Ping Server\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", False)\n", - "step_name": "Wait for Server to be accessible", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.wait_for_cloud_init()", - "step_name": "Check Cloud Init status", - "wait_until_true": 0 - }, - { - "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Wait for Cloud Init to finish\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status in (\"Success\", \"Failure\"), False)", - "step_name": "Wait for Cloud Init to finish", - "wait_until_true": 1 - }, - { - "script": "provider = frappe.db.get_value(doc.server_type, doc.server, 'provider')\nif provider == \"Hetzner\" and doc.server_type != \"Proxy Server\":\n vm = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\n vm.attach_new_volume(100)\n vm.sync()\n server = frappe.get_doc(doc.server_type, doc.server)\n server.validate_mounts()\n server.save()\n server.mount_volumes()\n", - "step_name": "Create and mount volumes", - "wait_until_true": 0 - }, - { - "script": "provider = frappe.db.get_value(doc.server_type, doc.server, 'provider')\nif provider == 'Hetzner' and doc.server_type != \"Proxy Server\":\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Mount Volumes\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", False)\nelse:\n result = (True, False)", - "step_name": "Wait for volumes to mount", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"Hetzner\" :\n if server.doctype == \"Server\":\n server.setup_docker()\n elif server.doctype == \"Database Server\":\n server.set_mariadb_mount_dependency()\n ", - "step_name": "Configure apps for mounts", - "wait_until_true": 0 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.update_tls_certificate()", - "step_name": "Update TLS Certificate", - "wait_until_true": 0 - }, - { - "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Setup TLS Certificates\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status in (\"Success\", \"Failure\"), False)", - "step_name": "Wait for TLS Certificate to be updated", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.update_agent_ansible()", - "step_name": "Update Agent Ansible", - "wait_until_true": 0 - }, - { - "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Update Agent\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status in (\"Success\", \"Failure\"), False)", - "step_name": "Wait for Agent to be updated", - "wait_until_true": 1 - }, - { - "script": "if doc.server_type == \"Database Server\":\n server = frappe.get_doc(\"Database Server\", doc.server)\n server.upgrade_mariadb()", - "step_name": "Upgrade MariaDB", - "wait_until_true": 0 - }, - { - "script": "if doc.server_type == \"Database Server\":\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Upgrade MariaDB\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\nelse:\n result = (True,)", - "step_name": "Wait for MariaDB Upgrade to Complete", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.set_additional_config()", - "step_name": "Set additional config", - "wait_until_true": 0 - } - ] - }, { "callback_max_retry": 0, "callback_script": "", diff --git a/press/press/doctype/database_server/database_server.py b/press/press/doctype/database_server/database_server.py index f3c3cc04c21..30ed747284a 100644 --- a/press/press/doctype/database_server/database_server.py +++ b/press/press/doctype/database_server/database_server.py @@ -547,7 +547,7 @@ def _restart_mariadb(self): def stop_mariadb(self): frappe.enqueue_doc(self.doctype, self.name, "_stop_mariadb", timeout=1800) - def _stop_mariadb(self): + def _stop_mariadb(self, throw_on_failure: bool = False): ansible = Ansible( playbook="stop_mariadb.yml", server=self, @@ -558,8 +558,12 @@ def _stop_mariadb(self): }, ) play = ansible.run() - if play.status == "Failure": + if play.status != "Success": log_error("MariaDB Stop Error", server=self.name) + if throw_on_failure: + frappe.throw(f"Failed to stop MariaDB on server: {self.name}") + + return play @frappe.whitelist() def run_upgrade_mariadb_job(self): @@ -568,7 +572,7 @@ def run_upgrade_mariadb_job(self): def upgrade_mariadb(self): frappe.enqueue_doc(self.doctype, self.name, "_upgrade_mariadb", timeout=1800) - def _upgrade_mariadb(self): + def _upgrade_mariadb(self, throw_on_failure: bool = False): ansible = Ansible( playbook="upgrade_mariadb.yml", server=self, @@ -579,8 +583,10 @@ def _upgrade_mariadb(self): }, ) play = ansible.run() - if play.status == "Failure": + if play.status != "Success": log_error("MariaDB Upgrade Error", server=self.name) + if throw_on_failure: + frappe.throw(f"Failed to upgrade MariaDB on server: {self.name}") return play def _downgrade_mariadb_to_10_6(self): @@ -1252,7 +1258,7 @@ def prepare_mariadb_replica(self): self.doctype, self.name, "_prepare_mariadb_replica", queue="long", timeout=1200, at_front=True ) - def _prepare_mariadb_replica(self): + def _prepare_mariadb_replica(self, throw_on_failure: bool = False): if self.is_primary: return @@ -1271,8 +1277,13 @@ def _prepare_mariadb_replica(self): "mariadb_server_id": self.server_id, }, ) - ansible.run() + play = ansible.run() + if play.status != "Success": + raise Exception("Failed to prepare MariaDB replica") except Exception: + if throw_on_failure: + raise + log_error("MariaDB Prepare Replica Exception", server=self.as_dict()) def configure_replication(self, gtid_slave_pos: str | None = None): diff --git a/press/press/doctype/press_job/jobs/archive_server.py b/press/press/doctype/press_job/jobs/archive_server.py new file mode 100644 index 00000000000..f5be638da93 --- /dev/null +++ b/press/press/doctype/press_job/jobs/archive_server.py @@ -0,0 +1,63 @@ +from contextlib import suppress + +import frappe + +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class ArchiveServerJob(PressJob): + @flow + def execute(self): + self.disable_termination_protection() + self.terminate_virtual_machine() + self.wait_for_virtual_machine_to_terminate() + + @task + def disable_termination_protection(self): + self.virtual_machine_doc.disable_termination_protection() + + @task(queue="long", timeout=600) + def terminate_virtual_machine(self): + self.virtual_machine_doc.terminate() + + @task + def wait_for_virtual_machine_to_terminate(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Terminated": + return + + self.defer_current_task() + + def on_press_job_success(self, _): + if self.server_type not in ["Server", "Database Server"]: + return + + if not self.server_doc.is_for_recovery: + return + + recovery_record_name = None + if self.server_type == "Server": + recovery_record_name = frappe.db.get_value( + "Server Snapshot Recovery", {"app_server": self.server}, "name" + ) + elif self.server_type == "Database Server": + recovery_record_name = frappe.db.get_value( + "Server Snapshot Recovery", {"database_server": self.server}, "name" + ) + + if not recovery_record_name: + return + + recovery_record = frappe.get_doc( + "Server Snapshot Recovery", + recovery_record_name, + for_update=True, + ) + if self.server_type == "Server": + recovery_record.app_server_archived = True + else: + recovery_record.database_server_archived = True + recovery_record.save() diff --git a/press/press/doctype/press_job/jobs/attach_volume.py b/press/press/doctype/press_job/jobs/attach_volume.py new file mode 100644 index 00000000000..f64cdb66f90 --- /dev/null +++ b/press/press/doctype/press_job/jobs/attach_volume.py @@ -0,0 +1,17 @@ +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class AttachVolumeJob(PressJob): + @flow + def execute(self): + self.attach_volume() + + @task + def attach_volume(self): + machine = self.virtual_machine_doc + + if machine.cloud_provider in ["AWS EC2", "OCI"]: + machine.attach_new_volume(machine.size, machine.iops, machine.throughput) + else: + machine.attach_volume(size=100) diff --git a/press/press/doctype/press_job/jobs/auto_scale_application_server.py b/press/press/doctype/press_job/jobs/auto_scale_application_server.py new file mode 100644 index 00000000000..baeb86924e9 --- /dev/null +++ b/press/press/doctype/press_job/jobs/auto_scale_application_server.py @@ -0,0 +1,16 @@ +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class AutoScaleApplicationServerJob(PressJob): + @flow + def execute(self): + if self.server_type != "Server": + return + + self.scale_app_server() + + @task + def scale_app_server(self): + """Scale Application Server""" + self.server_doc.scale_up() diff --git a/press/press/doctype/press_job/jobs/auto_scale_down_application_server.py b/press/press/doctype/press_job/jobs/auto_scale_down_application_server.py new file mode 100644 index 00000000000..55af9c393e5 --- /dev/null +++ b/press/press/doctype/press_job/jobs/auto_scale_down_application_server.py @@ -0,0 +1,19 @@ +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class AutoScaleDownApplicationServerJob(PressJob): + @flow + def execute(self): + if self.server_type != "Server": + return + + self.scale_down() + + @task + def scale_down(self): + """Scale Down Application Server""" + if not self.server_doc.scaled_up: + return + + self.server_doc.scale_down(is_automatically_triggered=True) diff --git a/press/press/doctype/press_job/jobs/auto_scale_up_application_server.py b/press/press/doctype/press_job/jobs/auto_scale_up_application_server.py new file mode 100644 index 00000000000..4c78e7a4bd6 --- /dev/null +++ b/press/press/doctype/press_job/jobs/auto_scale_up_application_server.py @@ -0,0 +1,18 @@ +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class AutoScaleUpApplicationServerJob(PressJob): + @flow + def execute(self): + if self.server_type != "Server": + return + self.scale_up() + + @task + def scale_up(self): + """Scale Up Application Server""" + if self.server_doc.scaled_up: + return + + self.server_doc.scale_up(is_automatically_triggered=True) diff --git a/press/press/doctype/press_job/jobs/create_server.py b/press/press/doctype/press_job/jobs/create_server.py new file mode 100644 index 00000000000..add0a81b700 --- /dev/null +++ b/press/press/doctype/press_job/jobs/create_server.py @@ -0,0 +1,350 @@ +import time +from typing import TYPE_CHECKING + +import frappe + +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + +if TYPE_CHECKING: + from press.press.doctype.database_server.database_server import DatabaseServer + from press.press.doctype.server_snapshot_recovery.server_snapshot_recovery import ServerSnapshotRecovery + from press.press.doctype.virtual_machine_image.virtual_machine_image import VirtualMachineImage + + +class CreateServerJob(PressJob): + @flow + def execute(self): + self.provision_server() + self.wait_for_server_to_start() + self.wait_for_server_to_be_accessible() + self.sync_default_volumes() + + if self.virtual_machine_doc.data_disk_snapshot: + self.create_volume_from_snapshot() + self.attach_snapshotted_volume() + self.sync_attached_volumes() + self.mount_snapshotted_volume() + + self.check_cloud_init_status() + + if self.server_doc.provider == "Hetzner" and self.virtual_machine: + self.create_and_mount_volumes_hetzner() + self.configure_apps_for_mounts_hetzner() + + self.update_tls_certificate() + self.update_agent() + + if self.server_type == "Database Server" or ( + self.server_type == "Server" and self.server_doc.is_unified_server + ): + self.upgrade_mariadb() + + if self.is_setup_db_replication: + self.prepare_mariadb_replica() + self.configure_mariadb_replica() + self.start_mariadb_replica() + + self.set_additional_config() + + if self.is_fs_server: + self.share_benches_over_nfs() + + @property + def is_setup_db_replication(self): + return self.server_type == "Database Server" and self.arguments_dict.get( + "setup_db_replication", False + ) + + @property + def is_fs_server(self): + return self.server.startswith("fs") and self.server_type == "Server" + + @task + def provision_server(self): + machine = self.virtual_machine_doc + machine.provision() + + @task + def wait_for_server_to_start(self): + retry_later = True + try: + self.virtual_machine_doc.sync() + except (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError): + retry_later = True + except Exception as e: + if "rate_limit_exceeded" in str(e): + retry_later = True + else: + raise e + + if self.virtual_machine_doc.status == "Running": + retry_later = False + + if retry_later: + self.defer_current_task() + + @task + def wait_for_server_to_be_accessible(self): + server = self.server_doc + play = server.ping_ansible() + if not play or play.status != "Success": + self.defer_current_task() + + self.virtual_machine_doc.reload() + if not self.virtual_machine_doc.private_ip_address: + raise Exception("Virtual machine does not have a private IP address yet") + + @task + def sync_default_volumes(self): + try: + self.virtual_machine_doc.sync() + if len(self.virtual_machine_doc.volumes) > 0: + return + except (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError): + pass + + self.defer_current_task() + + @task + def create_volume_from_snapshot(self): + if not self.virtual_machine_doc.data_disk_snapshot: + return + + max_retries = self.arguments_dict.get("max_volume_creation_retries", 6) + if self.kv.get("volume_creation_attempts", 0) >= max_retries: + raise Exception(f"Failed to create volume from snapshot after {max_retries} retries") + + is_created = self.virtual_machine_doc.create_data_disk_volume_from_snapshot() + if is_created: + return + + self.kv.set("volume_creation_attempts", self.kv.get("volume_creation_attempts", 0) + 1) + self.defer_current_task() + + @task + def attach_snapshotted_volume(self): + vm = frappe.get_doc("Virtual Machine", self.virtual_machine) + if not vm.data_disk_snapshot: + return + + while True: + is_attached = vm.check_and_attach_data_disk_snapshot_volume() + if is_attached: + return + time.sleep(10) + vm = frappe.get_doc("Virtual Machine", self.virtual_machine) + + @task + def sync_attached_volumes(self): + server = self.server_doc + if server.provider != "AWS EC2" or not frappe.db.get_value( + "Virtual Machine", server.virtual_machine, "data_disk_snapshot" + ): + return + + while True: + time.sleep(10) + try: + vm = frappe.get_doc("Virtual Machine", server.virtual_machine) + vm.sync() + if len(vm.volumes) == 0 or (vm.data_disk_snapshot_attached and len(vm.volumes) == 1): + continue + server.reload() + server.validate_mounts() + server.save() + break + except (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError): + continue + + @task(queue="long", timeout=7200) + def mount_snapshotted_volume(self): + if self.server_doc.provider != "AWS EC2" or not self.virtual_machine_doc.data_disk_snapshot: + return + + cleanup_db_replication_files = False + if self.server_type == "Database Server" and ( + self.server_doc.is_for_recovery or self.is_setup_db_replication + ): + cleanup_db_replication_files = True + + self.server_doc.mount_volumes( + now=True, + stop_docker_before_mount=self.server_type == "Server", + stop_mariadb_before_mount=self.server_type == "Database Server", + start_docker_after_mount=self.server_type == "Server" and not self.server_doc.is_for_recovery, + start_mariadb_after_mount=not self.is_setup_db_replication, + cleanup_db_replication_files=cleanup_db_replication_files, + rotate_additional_volume_metadata=True, + ) + + @task(queue="short") + def check_cloud_init_status(self): + self.server_doc._wait_for_cloud_init() + + @task(queue="long", timeout=1200) + def create_and_mount_volumes_hetzner(self): + if self.server_doc.provider != "Hetzner" or not self.virtual_machine: + return + + if not self.virtual_machine_doc.virtual_machine_image: + return + + vmi: VirtualMachineImage = frappe.get_doc( + "Virtual Machine Image", self.virtual_machine_doc.virtual_machine_image + ) + if not vmi.has_data_volume: + return + + server = self.server_doc + if server.plan: + data_disk_size = int(frappe.db.get_value("Server Plan", server.plan, "disk")) + else: + data_disk_size = 25 + + self.virtual_machine_doc.attach_new_volume(data_disk_size) + + max_sync_tries = 100 + while max_sync_tries: + try: + self.virtual_machine_doc.sync() + break + except Exception as e: + max_sync_tries -= 1 + if max_sync_tries <= 0: + raise e + + server.validate_mounts() + server.save(ignore_version=True) + server.mount_volumes(now=True) + + @task(queue="long", timeout=1200) + def configure_apps_for_mounts_hetzner(self): + server = self.server_doc + if server.provider != "Hetzner" or not getattr(server, "has_data_volume", False): + return + + if server.doctype == "Server": + server.setup_docker(now=True) + elif server.doctype == "Database Server": + server.set_mariadb_mount_dependency(now=True) + + @task + def update_tls_certificate(self): + self.server_doc.update_tls_certificate(throw_on_failure=True) + + @task + def update_agent(self): + self.server_doc._update_agent_ansible(throw_on_failure=True) + + @task(queue="long", timeout=1800) + def upgrade_mariadb(self): + if self.server_type == "Database Server": + play = self.server_doc._upgrade_mariadb() + if play.status != "Success": + raise Exception("Failed to upgrade MariaDB") + + if self.server_type == "Server" and self.server_doc.is_unified_server: + database_server: DatabaseServer = frappe.get_doc( + "Database Server", self.server_doc.database_server + ) + database_server._upgrade_mariadb() + + @task(queue="long", timeout=1200) + def prepare_mariadb_replica(self): + if not self.is_setup_db_replication: + return + + self.server_doc._prepare_mariadb_replica(throw_on_failure=True) + + @task + def configure_mariadb_replica(self): + if not self.is_setup_db_replication: + return + + self.server_doc.configure_replication() + + @task + def start_mariadb_replica(self): + if not self.is_setup_db_replication: + return + + self.server_doc.start_replication() + + @task + def set_additional_config(self): + self.server_doc.set_additional_config() + + @task + def share_benches_over_nfs(self): + if self.server.startswith("fs") and self.server_type == "Server": + primary_server = frappe.db.get_value("Server", self.server, "primary") + nfs_volume_attachment = frappe.get_doc( + {"doctype": "NFS Volume Attachment", "primary_server": primary_server} + ) + nfs_volume_attachment.insert(ignore_permissions=True) + + # Callbacks + def on_press_job_success(self, _): + args = self.arguments_dict + + # Mark provisioning flag of the server + if self.server_type in ["Server", "Database Server"]: + self.server_doc.is_provisioning_press_job_completed = 1 + self.server_doc.save(ignore_permissions=True) + + # In case of unified server, also mark linked database server as provisioned + if self.server_type == "Server" and self.server_doc.is_unified_server: + frappe.db.set_value( + "Database Server", + self.server_doc.database_server, + "is_provisioning_press_job_completed", + 1, + update_modified=False, + ) + + # Update "Server Snapshot Recovery" record if this server is being provisioned for recovery + if self.server_type in ["Server", "Database Server"] and self.server_doc.is_for_recovery: + recovery_record_name = frappe.db.get_value( + "Server Snapshot Recovery", + { + "app_server" if self.server_type == "Server" else "database_server": self.server, + }, + "name", + ) + if recovery_record_name: + recovery_record = frappe.get_doc( + "Server Snapshot Recovery", recovery_record_name, for_update=True + ) + if self.server_type == "Server": + recovery_record.is_app_server_ready = True + else: + recovery_record.is_database_server_ready = True + recovery_record.save() + + # Resume logical replication backup if it was setup as part of server provisioning + if self.server_type in ["Server", "Database Server"] and "logical_replication_backup" in args: + frappe.get_doc("Logical Replication Backup", args.get("logical_replication_backup")).next() + + def on_press_job_failure(self, _): + if self.server_type not in ["Server", "Database Server"]: + return + + # Mark Server Snapshot Recovery as failed if the server provisioning was for recovery + if self.server_doc.is_for_recovery: + recovery_record_name = frappe.db.get_value( + "Server Snapshot Recovery", + {"app_server" if self.server_type == "Server" else "database_server": self.server}, + "name", + ) + if recovery_record_name: + recovery_record: ServerSnapshotRecovery = frappe.get_doc( + "Server Snapshot Recovery", recovery_record_name, for_update=True + ) + recovery_record.mark_server_provisioning_as_failed() + + # Mark logical replication backup as failed if it was setup as part of server provisioning + if "logical_replication_backup" in self.arguments_dict: + frappe.get_doc( + "Logical Replication Backup", self.arguments_dict.get("logical_replication_backup") + ).fail() diff --git a/press/press/doctype/press_job/jobs/create_server_snapshot.py b/press/press/doctype/press_job/jobs/create_server_snapshot.py new file mode 100644 index 00000000000..da5e7502749 --- /dev/null +++ b/press/press/doctype/press_job/jobs/create_server_snapshot.py @@ -0,0 +1,68 @@ +from contextlib import suppress +from typing import TYPE_CHECKING + +import frappe + +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + +if TYPE_CHECKING: + from press.press.doctype.virtual_machine_image.virtual_machine_image import VirtualMachineImage + + +class CreateServerSnapshotJob(PressJob): + @flow + def execute(self): + self.stop_virtual_machine() + self.wait_for_virtual_machine_to_stop() + self.create_snapshot() + self.start_virtual_machine() + self.wait_for_virtual_machine_to_start() + self.wait_for_snapshot_complete() + + @task + def stop_virtual_machine(self): + machine = self.virtual_machine_doc + machine.stop() + + @task + def wait_for_virtual_machine_to_stop(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Stopped": + return + + self.defer_current_task() + + @task + def create_snapshot(self): + machine = self.virtual_machine_doc + self.kv.set("image", machine.create_image()) + + @task + def start_virtual_machine(self): + try: + self.virtual_machine_doc.start() + except Exception: + self.defer_current_task() + + @task + def wait_for_virtual_machine_to_start(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Running": + return + + self.defer_current_task() + + @task + def wait_for_snapshot_complete(self): + image_name = self.kv.get("image") + image: VirtualMachineImage = frappe.get_doc("Virtual Machine Image", image_name) # type: ignore + image.sync() + if image.status == "Available": + return + + self.defer_current_task() diff --git a/press/press/doctype/press_job/jobs/increase_disk_size.py b/press/press/doctype/press_job/jobs/increase_disk_size.py new file mode 100644 index 00000000000..c3679df1601 --- /dev/null +++ b/press/press/doctype/press_job/jobs/increase_disk_size.py @@ -0,0 +1,106 @@ +from contextlib import suppress + +import frappe + +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class IncreaseDiskSizeJob(PressJob): + @flow + def execute(self): + self.increase_disk_size() + + provider = self.server_doc.provider + if provider == "AWS EC2": + self.wait_for_partition_to_resize_for_aws_ec2() + + elif provider == "OCI": + self.wait_for_server_to_start_start_oci() + self.wait_for_server_to_be_accessible_oci() + self.add_glass_file_oci() + + if self.server_type == "Server": + self.restart_active_benches() + + @task + def increase_disk_size(self): + mountpoint = self.arguments_dict.labels.get("mountpoint") + self.server_doc.calculated_increase_disk_size(mountpoint=mountpoint) + + if not frappe.db.get_value(self.server_type, self.server, "auto_increase_storage"): + return + + @task + def wait_for_partition_to_resize_for_aws_ec2(self): + """Wait for partition to resize (AWS)""" + if self.server_doc.provider != "AWS EC2": + return + + plays = frappe.get_all( + "Ansible Play", + {"server": self.server, "play": "Extend EC2 Volume"}, + ["status"], + order_by="creation desc", + limit=1, + ) + if not plays: + self.defer_current_task() + + if plays[0].status == "Success": + return + + if plays[0].status == "Failure": + raise Exception("Failed to extend EC2 volume") + + self.defer_current_task() + + @task + def wait_for_server_to_start_start_oci(self): + """Wait for server to start (OCI)""" + if self.server_doc.provider != "OCI": + return + + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Running": + return + + self.defer_current_task() + + @task(queue="long", timeout=600) + def wait_for_server_to_be_accessible_oci(self): + """Wait for server to be accessible (OCI)""" + if self.server_doc.provider != "OCI": + return + + play = self.server_doc.ping_ansible() + if play and play.status == "Success": + return + + self.defer_current_task() + + @task + def add_glass_file_oci(self): + """Add glass file back (OCI)""" + if self.server_doc.provider != "OCI": + return + + self.server_doc._add_glass_file() + + @task + def restart_active_benches(self): + if self.server_type != "Server": + return + + self.server_doc._start_active_benches( + benches=frappe.get_all( + "Bench", + { + "server": self.server, + "status": "Active", + }, + pluck="name", + ) + ) diff --git a/press/press/doctype/press_job/jobs/increase_swap.py b/press/press/doctype/press_job/jobs/increase_swap.py new file mode 100644 index 00000000000..2cedc70b71a --- /dev/null +++ b/press/press/doctype/press_job/jobs/increase_swap.py @@ -0,0 +1,27 @@ +from contextlib import suppress + +import frappe + +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class IncreaseSwapJob(PressJob): + @flow + def execute(self): + with suppress(Exception): + self.send_telegram_notification() + + self.add_swap_on_server() + + @task + def send_telegram_notification(self): + telegram_message = frappe.get_doc("Press Settings").telegram_message + telegram_message.enqueue( + f"Increasing swap on [{self.server}]({frappe.utils.get_url_to_form(self.server_type, self.server)})", + "Information", + ) + + @task(queue="long", timeout=1200) + def add_swap_on_server(self): + self.server_doc.increase_swap_locked(swap_size=4, throw_on_failure=True) diff --git a/press/press/doctype/press_job/jobs/prune_docker_system.py b/press/press/doctype/press_job/jobs/prune_docker_system.py new file mode 100644 index 00000000000..e571b9b490f --- /dev/null +++ b/press/press/doctype/press_job/jobs/prune_docker_system.py @@ -0,0 +1,27 @@ +from contextlib import suppress + +import frappe + +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class PruneDockerSystemJob(PressJob): + @flow + def execute(self): + with suppress(Exception): + self.send_telegram_notification() + + self.prune_docker_system() + + @task + def send_telegram_notification(self): + telegram_message = frappe.get_doc("Press Settings").telegram_message + telegram_message.enqueue( + f"Pruning docker cache on [{self.server}]({frappe.utils.get_url_to_form(self.server_type, self.server)})", + "Information", + ) + + @task(queue="long", timeout=8000) + def prune_docker_system(self): + self.server_doc._prune_docker_system(throw_on_failure=True) diff --git a/press/press/doctype/press_job/jobs/prune_mirror_registry.py b/press/press/doctype/press_job/jobs/prune_mirror_registry.py new file mode 100644 index 00000000000..151598e283f --- /dev/null +++ b/press/press/doctype/press_job/jobs/prune_mirror_registry.py @@ -0,0 +1,15 @@ +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class PruneMirrorRegistryJob(PressJob): + @flow + def execute(self): + if self.server_type != "Registry Server": + return + + self.prune_mirror_registry() + + @task(queue="long", timeout=3600) + def prune_mirror_registry(self): + self.server_doc._prune_mirror_registry(throw_on_failure=True) diff --git a/press/press/doctype/press_job/jobs/remove_on_prem_failover.py b/press/press/doctype/press_job/jobs/remove_on_prem_failover.py new file mode 100644 index 00000000000..5f221b1b7c7 --- /dev/null +++ b/press/press/doctype/press_job/jobs/remove_on_prem_failover.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import frappe + +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + +if TYPE_CHECKING: + from press.press.doctype.on_prem_failover.on_prem_failover import OnPremFailover + + +class RemoveOnPremFailoverJob(PressJob): + @flow + def execute(self): + self.remove_app_server_from_firewall() + self.remove_db_server_from_firewall() + self.stop_replication_from_app_server() + self.stop_replication_from_db_server() + self.delete_firewall() + + @property + def failover_doc(self) -> OnPremFailover: + if not hasattr(self, "_on_prem_failover_doc") or not self._on_prem_failover_doc: # type: ignore + self._on_prem_failover_doc = frappe.get_doc("On-Prem Failover", self.arguments_dict.failover) + return self._on_prem_failover_doc # type: ignore + + @task + def remove_app_server_from_firewall(self): + """Remove Wireguard Port Access from App Server""" + self.failover_doc.remove_app_server_from_firewall() + + @task + def remove_db_server_from_firewall(self): + """Remove Wireguard Port Access from DB Server""" + self.failover_doc.remove_db_server_from_firewall() + + @task(queue="long", timeout=1800) + def stop_replication_from_app_server(self): + """Stop Replication from App Server""" + self.failover_doc._stop_replication_from_app_server() + + @task(queue="long", timeout=1800) + def stop_replication_from_db_server(self): + """Stop Replication from DB Server""" + self.failover_doc._stop_replication_from_db_server() + + @task + def delete_firewall(self): + """Delete Firewall""" + self.failover_doc.delete_firewall() + + def on_press_job_success(self, _): + self.failover_doc.is_db_server_failover_setup = False + self.failover_doc.is_app_server_failover_setup = False + self.failover_doc.enabled = False + self.failover_doc.save() diff --git a/press/press/doctype/press_job/jobs/reset_swap_on_server.py b/press/press/doctype/press_job/jobs/reset_swap_on_server.py index fb444f1a0ce..8d96bdc196d 100644 --- a/press/press/doctype/press_job/jobs/reset_swap_on_server.py +++ b/press/press/doctype/press_job/jobs/reset_swap_on_server.py @@ -9,10 +9,6 @@ class ResetSwapOnServerJob(PressJob): @flow def execute(self): - if self.status == "Pending": - self.status = "Running" - self.save() - with suppress(Exception): self.send_telegram_notification() diff --git a/press/press/doctype/press_job/jobs/resize_server.py b/press/press/doctype/press_job/jobs/resize_server.py new file mode 100644 index 00000000000..19cfa0f6c24 --- /dev/null +++ b/press/press/doctype/press_job/jobs/resize_server.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +from contextlib import suppress +from typing import TYPE_CHECKING + +import frappe + +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + +if TYPE_CHECKING: + from press.press.doctype.database_server.database_server import DatabaseServer + from press.press.doctype.server.server import Server + + +class ResizeServerJob(PressJob): + @flow + def execute(self): + self.stop_virtual_machine() + self.wait_for_virtual_machine_to_stop() + + self.resize_virtual_machine() + + self.start_virtual_machine() + self.wait_for_virtual_machine_to_start() + + self.wait_for_server_to_be_accessible() + self.set_additional_config() + self.increase_disk_size() + + @task + def stop_virtual_machine(self): + self.virtual_machine_doc.stop() + + @task + def wait_for_virtual_machine_to_stop(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Stopped": + return + + self.defer_current_task() + + @task + def resize_virtual_machine(self): + self.virtual_machine_doc.resize( + self.arguments_dict.machine_type, self.arguments_dict.get("upgrade_disk", False) + ) + + @task + def start_virtual_machine(self): + try: + if self.virtual_machine_doc.status != "Running": + self.virtual_machine_doc.start() + except Exception: + self.defer_current_task() + + @task + def wait_for_virtual_machine_to_start(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Running": + return + + self.defer_current_task() + + @task + def wait_for_server_to_be_accessible(self): + play = self.server_doc.ping_ansible() + if not play or play.status != "Success": + self.defer_current_task() + + @task + def set_additional_config(self): + if self.server_type not in ["Server", "Database Server"]: + return + + if self.server_type == "Server" and self.server_doc.is_unified_server: + server_doc: Server = frappe.get_doc("Server", self.server) + server_doc.auto_scale_workers() + db_doc: DatabaseServer = frappe.get_doc("Database Server", self.server) + db_doc.adjust_memory_config() + else: + if self.server_type == "Database Server": + self.server_doc.adjust_memory_config() + elif self.server_type == "Server": + self.server_doc.auto_scale_workers() + + @task + def increase_disk_size(self): + if not self.server_doc.plan: + return + + plan_disk_size = frappe.db.get_value("Server Plan", self.server_doc.plan, "disk") + if not plan_disk_size or plan_disk_size <= self.virtual_machine_doc.disk_size: + return + + with suppress(Exception): + self.server_doc.increase_disk_size(increment=plan_disk_size - self.virtual_machine_doc.disk_size) diff --git a/press/press/doctype/press_job/jobs/resume_services_after_snapshot.py b/press/press/doctype/press_job/jobs/resume_services_after_snapshot.py new file mode 100644 index 00000000000..7b8ce7b0138 --- /dev/null +++ b/press/press/doctype/press_job/jobs/resume_services_after_snapshot.py @@ -0,0 +1,47 @@ +import frappe + +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class ResumeServicesAfterSnapshotJob(PressJob): + @flow + def execute(self): + self.start_docker_daemon() + self.start_mariadb_service() + + @task(queue="long", timeout=1200) + def start_docker_daemon(self): + server_snapshot = self.arguments_dict.get("server_snapshot") + + if self.server_type == "Server" and self.arguments_dict.get("is_consistent_snapshot", False): + server = frappe.get_doc("Server", self.server) + output = server.ansible_run("systemctl start docker") + if not (output and output.get("status") == "Success"): + raise Exception("Failed to start docker daemon") + + frappe.db.set_value( + "Server Snapshot", + server_snapshot, + "app_server_services_started", + True, + update_modified=False, + ) + + @task(queue="long", timeout=3600) + def start_mariadb_service(self): + server_snapshot = self.arguments_dict.get("server_snapshot") + + if self.server_type == "Database Server" and self.arguments_dict.get("is_consistent_snapshot", False): + server = frappe.get_doc("Database Server", self.server) + output = server.ansible_run("systemctl start mariadb") + if not (output and output.get("status") == "Success"): + raise Exception("Failed to start mariadb service") + + frappe.db.set_value( + "Server Snapshot", + server_snapshot, + "database_server_services_started", + True, + update_modified=False, + ) diff --git a/press/press/doctype/press_job/jobs/setup_on_prem_failover.py b/press/press/doctype/press_job/jobs/setup_on_prem_failover.py new file mode 100644 index 00000000000..b89c885334d --- /dev/null +++ b/press/press/doctype/press_job/jobs/setup_on_prem_failover.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +import time +from typing import TYPE_CHECKING + +import frappe + +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + +if TYPE_CHECKING: + from press.press.doctype.on_prem_failover.on_prem_failover import OnPremFailover + + +class SetupOnPremFailoverJob(PressJob): + @flow + def execute(self): + self.add_app_server_to_firewall() + self.add_db_server_to_firewall() + self.setup_wireguard_on_app_server() + self.setup_wireguard_on_db_server() + self.test_connectivity() + self.setup_replication_for_app_server() + self.setup_db_lsync_for_initial_sync() + self.wait_for_initial_db_sync() + self.rsync_new_db_files() + self.setup_replica_in_on_prem_server() + + @property + def failover_doc(self) -> OnPremFailover: + if not hasattr(self, "_on_prem_failover_doc") or not self._on_prem_failover_doc: # type: ignore + self._on_prem_failover_doc = frappe.get_doc("On-Prem Failover", self.arguments_dict.failover) + return self._on_prem_failover_doc # type: ignore + + @task + def add_app_server_to_firewall(self): + """Allow Wireguard Port Through Security Group on App Server""" + self.failover_doc.add_app_server_to_firewall() + + @task + def add_db_server_to_firewall(self): + """Allow Wireguard Port Through Security Group on DB Server""" + self.failover_doc.add_db_server_to_firewall() + + @task + def setup_wireguard_on_app_server(self): + """Setup Wireguard on App Server""" + self.failover_doc.setup_wireguard_on_app_server() + + @task + def setup_wireguard_on_db_server(self): + """Setup Wireguard on DB Server""" + self.failover_doc.setup_wireguard_on_database_server() + + @task(queue="long", timeout=600) + def test_connectivity(self): + """Test Connectivity to On-Prem Server""" + self.failover_doc.check_connectivity_to_on_premise_server() + self.failover_doc.reload() + + if ( + self.failover_doc.is_on_prem_server_ssh_from_app_server_working + and self.failover_doc.is_on_prem_server_ssh_from_db_server_working + ): + return + + self.defer_current_task() + + @task(queue="long", timeout=3600) + def setup_replication_for_app_server(self): + """Setup Replication for App Server""" + self.failover_doc._setup_app_server_replica() + + @task(queue="long", timeout=3600) + def setup_db_lsync_for_initial_sync(self): + """Setup Lsyncd For Initial DB Sync""" + self.failover_doc._setup_db_lsync_for_initial_sync() + + @task + def wait_for_initial_db_sync(self): + if ( + self.failover_doc.db_lsyncd_stop_at + and frappe.utils.now_datetime() > self.failover_doc.db_lsyncd_stop_at + ): + return + time.sleep(1) + self.defer_current_task() + + @task(queue="long", timeout=3600) + def rsync_new_db_files(self): + self.failover_doc._setup_db_rsync_for_final_sync() + + @task(queue="long", timeout=3600) + def setup_replica_in_on_prem_server(self): + self.failover_doc._setup_and_configure_database_replica() + + def on_press_job_success(self, _): + self.failover_doc.is_db_server_failover_setup = True + self.failover_doc.is_app_server_failover_setup = True + self.failover_doc.save() diff --git a/press/press/doctype/press_job/jobs/snapshot_disk.py b/press/press/doctype/press_job/jobs/snapshot_disk.py new file mode 100644 index 00000000000..8b6356aebed --- /dev/null +++ b/press/press/doctype/press_job/jobs/snapshot_disk.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +from contextlib import suppress +from typing import TYPE_CHECKING + +import frappe + +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + +if TYPE_CHECKING: + from press.press.doctype.server_snapshot.server_snapshot import ServerSnapshot + + +class SnapshotDiskJob(PressJob): + @flow + def execute(self): + self.verify_virtual_machine_status() + + if self.is_consistent_snapshot: + if self.server_type == "Server": + self.stop_docker_daemon() + if self.server_type == "Database Server": + self.stop_mariadb_service() + + self.flush_file_system_buffers() + self.snapshot_disk() + + @property + def is_consistent_snapshot(self): + return self.arguments_dict.get("is_consistent_snapshot", False) + + @task + def verify_virtual_machine_status(self): + try: + self.virtual_machine_doc.sync() + except Exception: + self.defer_current_task() + + if self.virtual_machine_doc.status == "Terminated": + raise Exception("Can't snapshot terminated virtual machine") + + if self.virtual_machine_doc.status == "Draft": + raise Exception("Can't snapshot draft virtual machine") + + @task + def stop_docker_daemon(self): + if not (self.server_type == "Server" and self.is_consistent_snapshot): + return + + output = self.server_doc.ansible_run("systemctl stop docker") + if not (output and output.get("status") == "Success"): + raise Exception("Failed to stop docker daemon") + + @task + def stop_mariadb_service(self): + if not (self.server_type == "Database Server" and self.is_consistent_snapshot): + return + + output = self.server_doc.ansible_run("systemctl stop mariadb") + if not (output and output.get("status") == "Success"): + raise Exception("Failed to stop mariadb service") + + @task + def flush_file_system_buffers(self): + output = self.server_doc.ansible_run("sync") + if not (output and output.get("status") == "Success"): + raise Exception("Failed to flush file system buffers to disk") + + @task + def snapshot_disk(self): + machine = self.virtual_machine_doc + machine.create_snapshots(exclude_boot_volume=True, dedicated_snapshot=True) + + field_name = "app_server_snapshot" if self.server_type == "Server" else "database_server_snapshot" + no_of_snapshots = len(machine.flags.created_snapshots) + if no_of_snapshots != 1: + raise Exception(f"Expected 1 disk snapshot. Found: {no_of_snapshots}") + + frappe.db.set_value( + "Server Snapshot", + self.arguments_dict.get("server_snapshot"), + field_name, + machine.flags.created_snapshots[0], + update_modified=False, + ) + + def _resume_services(self) -> ServerSnapshot: + snapshot = frappe.get_doc("Server Snapshot", self.arguments_dict.get("server_snapshot")) + if self.server_type == "Server": + snapshot.resume_app_server_services() + elif self.server_type == "Database Server": + snapshot.resume_database_server_services() + + return snapshot + + def on_press_job_success(self, workflow): + snapshot = self._resume_services() + snapshot.sync(now=False) + + def on_press_job_failure(self, workflow): + snapshot = self._resume_services() + frappe.db.set_value("Server Snapshot", snapshot.name, "status", "Failure", update_modified=False) + for s in snapshot.snapshots: + with suppress(Exception): + frappe.get_doc("Virtual Disk Snapshot", s).delete_snapshot(ignore_validation=True) diff --git a/press/press/doctype/press_job/jobs/stop_and_start_server.py b/press/press/doctype/press_job/jobs/stop_and_start_server.py new file mode 100644 index 00000000000..74d93d7502b --- /dev/null +++ b/press/press/doctype/press_job/jobs/stop_and_start_server.py @@ -0,0 +1,50 @@ +from contextlib import suppress + +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class StopAndStartServerJob(PressJob): + @flow + def execute(self): + self.stop_virtual_machine() + self.wait_for_virtual_machine_to_stop() + + self.start_virtual_machine() + self.wait_for_virtual_machine_to_start() + + self.wait_for_server_to_be_accessible() + + @task + def stop_virtual_machine(self): + self.virtual_machine_doc.stop() + + @task + def wait_for_virtual_machine_to_stop(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Stopped": + return + + self.defer_current_task() + + @task + def start_virtual_machine(self): + self.virtual_machine_doc.start() + + @task + def wait_for_virtual_machine_to_start(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Running": + return + + self.defer_current_task() + + @task + def wait_for_server_to_be_accessible(self): + play = self.server_doc.ping_ansible() + if not play or play.status != "Success": + self.defer_current_task() diff --git a/press/press/doctype/press_job/jobs/trigger_build_server_cleanup.py b/press/press/doctype/press_job/jobs/trigger_build_server_cleanup.py new file mode 100644 index 00000000000..42bd972444f --- /dev/null +++ b/press/press/doctype/press_job/jobs/trigger_build_server_cleanup.py @@ -0,0 +1,18 @@ +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class TriggerBuildServerCleanupJob(PressJob): + @flow + def execute(self): + if self.server_type != "Server" or not self.server_doc.use_for_build: + return + + self.trigger_build_server_cleanup() + + @task + def trigger_build_server_cleanup(self): + if not self.server_doc.use_for_build: + return + + self.server_doc.prune_docker_system() diff --git a/press/press/doctype/press_job/jobs/upgrade_mariadb.py b/press/press/doctype/press_job/jobs/upgrade_mariadb.py new file mode 100644 index 00000000000..831fe61cb3a --- /dev/null +++ b/press/press/doctype/press_job/jobs/upgrade_mariadb.py @@ -0,0 +1,27 @@ +import frappe + +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class UpgradeMariaDBJob(PressJob): + @flow + def execute(self): + self.stop_mariadb() + self.create_server_snapshot() + self.upgrade_mariadb() + + @task(queue="long", timeout=1800) + def stop_mariadb(self): + self.server_doc._stop_mariadb(throw_on_failure=True) + + @task + def create_server_snapshot(self): + self.virtual_machine_doc.create_snapshots() + + snapshot = frappe.get_last_doc("Virtual Disk Snapshot", {"virtual_machine": self.virtual_machine}) + snapshot.add_comment(text="Before MariaDB Upgrade") + + @task(queue="long", timeout=1800) + def upgrade_mariadb(self): + self.server_doc._upgrade_mariadb(throw_on_failure=True) diff --git a/press/press/doctype/press_job/jobs/warn_disk.py b/press/press/doctype/press_job/jobs/warn_disk.py new file mode 100644 index 00000000000..a7823982428 --- /dev/null +++ b/press/press/doctype/press_job/jobs/warn_disk.py @@ -0,0 +1,13 @@ +from press.press.doctype.press_job.press_job import PressJob +from press.workflow_engine.doctype.press_workflow.decorators import flow, task + + +class WarnDiskJob(PressJob): + @flow + def execute(self): + self.send_warning() + + @task + def send_warning(self): + mountpoint = self.arguments_dict.labels.get("mountpoint") + self.server_doc.recommend_disk_increase(mountpoint=mountpoint) diff --git a/press/press/doctype/press_job/press_job.json b/press/press/doctype/press_job/press_job.json index 57e58006a72..546418bf126 100644 --- a/press/press/doctype/press_job/press_job.json +++ b/press/press/doctype/press_job/press_job.json @@ -15,8 +15,9 @@ "section_break_7", "server_type", "server", + "virtual_machine", "column_break_fhyz", - "virtual_machine" + "arguments" ], "fields": [ { @@ -100,6 +101,14 @@ { "fieldname": "column_break_fhyz", "fieldtype": "Column Break" + }, + { + "default": "{}", + "fieldname": "arguments", + "fieldtype": "Small Text", + "label": "Arguments", + "reqd": 1, + "set_only_once": 1 } ], "grid_page_length": 50, @@ -114,7 +123,7 @@ "link_fieldname": "linked_docname" } ], - "modified": "2026-04-16 22:33:03.958588", + "modified": "2026-04-16 23:16:27.885432", "modified_by": "Administrator", "module": "Press", "name": "Press Job", diff --git a/press/press/doctype/press_job/press_job.py b/press/press/doctype/press_job/press_job.py index f019bbd417d..1e416ddc8e0 100644 --- a/press/press/doctype/press_job/press_job.py +++ b/press/press/doctype/press_job/press_job.py @@ -1,7 +1,9 @@ -from __future__ import annotations - # Copyright (c) 2022, Frappe and contributors # For license information, please see license.txt +from __future__ import annotations + +import json +from functools import cached_property from typing import TYPE_CHECKING import frappe @@ -23,10 +25,60 @@ def _init_jobs_registry() -> None: if _JOBS_REGISTRY: return + from press.press.doctype.press_job.jobs.archive_server import ArchiveServerJob + from press.press.doctype.press_job.jobs.attach_volume import AttachVolumeJob + from press.press.doctype.press_job.jobs.auto_scale_application_server import ( + AutoScaleApplicationServerJob, + ) + from press.press.doctype.press_job.jobs.auto_scale_down_application_server import ( + AutoScaleDownApplicationServerJob, + ) + from press.press.doctype.press_job.jobs.auto_scale_up_application_server import ( + AutoScaleUpApplicationServerJob, + ) + from press.press.doctype.press_job.jobs.create_server import CreateServerJob + from press.press.doctype.press_job.jobs.create_server_snapshot import CreateServerSnapshotJob + from press.press.doctype.press_job.jobs.increase_disk_size import IncreaseDiskSizeJob + from press.press.doctype.press_job.jobs.increase_swap import IncreaseSwapJob + from press.press.doctype.press_job.jobs.prune_docker_system import PruneDockerSystemJob + from press.press.doctype.press_job.jobs.prune_mirror_registry import PruneMirrorRegistryJob + from press.press.doctype.press_job.jobs.remove_on_prem_failover import RemoveOnPremFailoverJob from press.press.doctype.press_job.jobs.reset_swap_on_server import ResetSwapOnServerJob + from press.press.doctype.press_job.jobs.resize_server import ResizeServerJob + from press.press.doctype.press_job.jobs.resume_services_after_snapshot import ( + ResumeServicesAfterSnapshotJob, + ) + from press.press.doctype.press_job.jobs.setup_on_prem_failover import SetupOnPremFailoverJob + from press.press.doctype.press_job.jobs.snapshot_disk import SnapshotDiskJob + from press.press.doctype.press_job.jobs.stop_and_start_server import StopAndStartServerJob + from press.press.doctype.press_job.jobs.trigger_build_server_cleanup import ( + TriggerBuildServerCleanupJob, + ) + from press.press.doctype.press_job.jobs.upgrade_mariadb import UpgradeMariaDBJob + from press.press.doctype.press_job.jobs.warn_disk import WarnDiskJob _JOBS_REGISTRY = { + "Archive Server": ArchiveServerJob, + "Attach Volume": AttachVolumeJob, + "Auto Scale Application Server": AutoScaleApplicationServerJob, + "Auto Scale Down Application Server": AutoScaleDownApplicationServerJob, + "Auto Scale Up Application Server": AutoScaleUpApplicationServerJob, + "Create Server": CreateServerJob, + "Create Server Snapshot": CreateServerSnapshotJob, + "Increase Disk Size": IncreaseDiskSizeJob, + "Increase Swap": IncreaseSwapJob, + "Prune Docker system": PruneDockerSystemJob, + "Prune Mirror Registry": PruneMirrorRegistryJob, + "Remove On-Prem Failover": RemoveOnPremFailoverJob, "Reset Swap": ResetSwapOnServerJob, + "Resize Server": ResizeServerJob, + "Resume Services After Snapshot": ResumeServicesAfterSnapshotJob, + "Setup On-Prem Failover": SetupOnPremFailoverJob, + "Snapshot Disk": SnapshotDiskJob, + "Stop and Start Server": StopAndStartServerJob, + "Trigger Build Server Cleanup": TriggerBuildServerCleanupJob, + "Upgrade MariaDB": UpgradeMariaDBJob, + "Warn disk at 80%": WarnDiskJob, } @@ -39,6 +91,7 @@ class PressJob(WorkflowBuilder): if TYPE_CHECKING: from frappe.types import DF + arguments: DF.SmallText duration: DF.Duration | None end: DF.Datetime | None job_type: DF.Link @@ -50,6 +103,10 @@ class PressJob(WorkflowBuilder): virtual_machine: DF.Link | None # end: auto-generated types + @cached_property + def arguments_dict(self) -> "frappe._dict": + return frappe._dict(json.loads(self.get("arguments") or "{}")) + @property def server_doc(self) -> "Server | DatabaseServer": if hasattr(self, "_server_doc") and self._server_doc: # type: ignore @@ -110,8 +167,13 @@ def __init__(self, *args, **kwargs): self.__class__ = _JOBS_REGISTRY[self.job_type] def start_workflow(self) -> str: + if self.status != "Pending": + frappe.throw("Only jobs with Pending status can be started") + if not hasattr(self, "execute"): raise NotImplementedError("Press Job implementation must have an execute method") + self.start = now_datetime() + self.status = "Running" return self.execute.run_as_workflow() def on_workflow_success(self, workflow: "PressWorkflow"): diff --git a/press/press/doctype/registry_server/registry_server.py b/press/press/doctype/registry_server/registry_server.py index 9e71dca1979..e6d774d05bb 100644 --- a/press/press/doctype/registry_server/registry_server.py +++ b/press/press/doctype/registry_server/registry_server.py @@ -133,8 +133,9 @@ def prune_mirror_registry(self): frappe.enqueue_doc(self.doctype, self.name, "_prune_mirror_registry", queue="long", timeout=3600) - def _prune_mirror_registry(self): + def _prune_mirror_registry(self, throw_on_failure: bool = False): try: + assert self.docker_data_mountpoint, "Docker data mountpoint is required to prune mirror registry" ansible = Ansible( playbook="prune_mirror_registry.yml", server=self, @@ -147,9 +148,15 @@ def _prune_mirror_registry(self): "registry_container": "registry-registry-1", }, ) - ansible.run() + play = ansible.run() + if play.status != "Success" and throw_on_failure: + frappe.throw("Failed to prune mirror registry") # nosemgrep + return play except Exception: log_error("Mirror Registry Prune Failed", server=self.as_dict()) + if throw_on_failure: + frappe.throw("Failed to prune mirror registry") # nosemgrep + return None @frappe.whitelist() def show_registry_password(self): diff --git a/press/press/doctype/server/server.py b/press/press/doctype/server/server.py index 384e2ca7ea8..4f839cf5b33 100644 --- a/press/press/doctype/server/server.py +++ b/press/press/doctype/server/server.py @@ -871,7 +871,7 @@ def install_exporters(self): frappe.enqueue_doc(self.doctype, self.name, "_install_exporters", queue="long", timeout=1200) @frappe.whitelist() - def ping_ansible(self): + def ping_ansible(self) -> AnsiblePlay | None: try: ansible = Ansible( playbook="ping.yml", @@ -879,15 +879,16 @@ def ping_ansible(self): user=self._ssh_user(), port=self._ssh_port(), ) - ansible.run() + return ansible.run() except Exception: log_error("Server Ping Exception", server=self.as_dict()) + return None @frappe.whitelist() def update_agent_ansible(self): frappe.enqueue_doc(self.doctype, self.name, "_update_agent_ansible") - def _update_agent_ansible(self): + def _update_agent_ansible(self, throw_on_failure: bool = False): try: agent_branch = frappe.get_value("Press Settings", "Press Settings", "branch") if not agent_branch: @@ -905,8 +906,12 @@ def _update_agent_ansible(self): user=self._ssh_user(), port=self._ssh_port(), ) - ansible.run() - except Exception: + play = ansible.run() + if throw_on_failure and play.status != "Success": + raise Exception("Failed to update agent") + except Exception as e: + if throw_on_failure: + raise e log_error("Agent Update Exception", server=self.as_dict()) @frappe.whitelist() @@ -1502,7 +1507,7 @@ def increase_swap(self, swap_size=4): **{"swap_size": swap_size}, ) - def _increase_swap(self, swap_size=4): + def _increase_swap(self, swap_size=4, throw_on_failure: bool = False): """Increase swap by size defined""" from press.api.server import calculate_swap @@ -1520,13 +1525,19 @@ def _increase_swap(self, swap_size=4): "swap_file": swap_file_name, }, ) - ansible.run() + play = ansible.run() + if play.status != "Success" and throw_on_failure: + raise Exception("Failed to increase swap") + return play except Exception: + if throw_on_failure: + raise log_error("Increase swap exception", doc=self) + return None - def increase_swap_locked(self, swap_size=4): + def increase_swap_locked(self, swap_size=4, throw_on_failure: bool = False): with filelock(f"{self.name}-swap-update"): - self._increase_swap(swap_size) + self._increase_swap(swap_size, throw_on_failure=throw_on_failure) @frappe.whitelist() def reset_swap(self, swap_size=1, now: bool = False): @@ -1631,7 +1642,7 @@ def _set_swappiness(self): log_error("Swappiness Setup Exception", doc=self) @frappe.whitelist() - def update_tls_certificate(self): + def update_tls_certificate(self, throw_on_failure: bool = False): from press.press.doctype.tls_certificate.tls_certificate import ( update_server_tls_certifcate, ) @@ -1648,7 +1659,7 @@ def update_tls_certificate(self): certificate = frappe.get_last_doc("TLS Certificate", filters) - update_server_tls_certifcate(self, certificate) + update_server_tls_certifcate(self, certificate, throw_on_failure=throw_on_failure) @frappe.whitelist() def show_agent_version(self) -> str: @@ -2300,7 +2311,7 @@ def prune_docker_system(self): timeout=8000, ) - def _prune_docker_system(self): + def _prune_docker_system(self, throw_on_failure: bool = False): try: ansible = Ansible( playbook="docker_system_prune.yml", @@ -2308,9 +2319,15 @@ def _prune_docker_system(self): user=self._ssh_user(), port=self._ssh_port(), ) - ansible.run() + play = ansible.run() + if play.status != "Success" and throw_on_failure: + frappe.throw("Failed to prune docker system") # nosemgrep + return play except Exception: log_error("Prune Docker System Exception", doc=self) + if throw_on_failure: + frappe.throw("Failed to prune docker system") # nosemgrep + return None def get_nat_gateway_ip(self): if hasattr(self, "nat_server") and self.nat_server: diff --git a/press/press/doctype/tls_certificate/tls_certificate.py b/press/press/doctype/tls_certificate/tls_certificate.py index fb5b3f109ed..6f2e4250ed2 100644 --- a/press/press/doctype/tls_certificate/tls_certificate.py +++ b/press/press/doctype/tls_certificate/tls_certificate.py @@ -414,7 +414,7 @@ def notify_custom_tls_renewal(): ) -def update_server_tls_certifcate(server, certificate): +def update_server_tls_certifcate(server, certificate, throw_on_failure: bool = False): try: proxysql_admin_password = None if server.doctype == "Proxy Server": @@ -441,8 +441,10 @@ def update_server_tls_certifcate(server, certificate): # to avoid causing TimestampMismatchError in other important tasks update_modified=False, ) - except Exception: + except Exception as e: log_error("TLS Setup Exception", server=server.as_dict()) + if throw_on_failure: + raise Exception(f"Failed to update TLS certificate on {server.doctype} {server.name}") from e def retrigger_failed_wildcard_tls_callbacks(): diff --git a/press/workflow_engine/doctype/press_workflow/exceptions.py b/press/workflow_engine/doctype/press_workflow/exceptions.py index 8174a391be0..0ecf212241b 100644 --- a/press/workflow_engine/doctype/press_workflow/exceptions.py +++ b/press/workflow_engine/doctype/press_workflow/exceptions.py @@ -1,11 +1,13 @@ # Copyright (c) 2026, Frappe and contributors # For license information, please see license.txt +from __future__ import annotations + class PressWorkflowTaskEnqueued(Exception): """Raised when a task is enqueued and the flow needs to be paused.""" - def __init__(self, message: str, workflow_name: str, task_name: str): + def __init__(self, message: str, workflow_name: str, task_name: str | None = None): super().__init__(message) self.workflow_name = workflow_name self.task_name = task_name diff --git a/press/workflow_engine/doctype/press_workflow/workflow_builder.py b/press/workflow_engine/doctype/press_workflow/workflow_builder.py index a607cf7d57a..b9ea57cd914 100644 --- a/press/workflow_engine/doctype/press_workflow/workflow_builder.py +++ b/press/workflow_engine/doctype/press_workflow/workflow_builder.py @@ -189,3 +189,17 @@ def resolve_context(self) -> None: if self.kv_store_type != "in_memory": self.kv_store_type = "in_memory" self.kv_store_reference = None + + def defer_current_task(self, message: str = "User has requested to defer the task later.") -> None: + if not self.flags.in_press_workflow_execution: + return + + assert self.workflow_name is not None, "Workflow name must be set to defer current task" + + raise PressWorkflowTaskEnqueued( + "User has requested to retry the task later.", + self.workflow_name, + self.flags.current_press_workflow_task + if hasattr(self.flags, "current_press_workflow_task") + else None, + ) From 2ac199bcea53dbfbb9e1559d760d1bccdf68b28f Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 17 Apr 2026 02:46:59 +0530 Subject: [PATCH 04/22] feat(workflow-engine): Allow press admin and member in permissions --- press/press/doctype/press_job/press_job.py | 4 +++- press/press/doctype/site/archive.py | 2 +- .../press_workflow/press_workflow.json | 20 ++++++++++++++++- .../press_workflow_object.json | 22 ++++++++++++++++++- .../press_workflow_task.json | 22 ++++++++++++++++++- 5 files changed, 65 insertions(+), 5 deletions(-) diff --git a/press/press/doctype/press_job/press_job.py b/press/press/doctype/press_job/press_job.py index 1e416ddc8e0..8c447ce7f18 100644 --- a/press/press/doctype/press_job/press_job.py +++ b/press/press/doctype/press_job/press_job.py @@ -168,7 +168,9 @@ def __init__(self, *args, **kwargs): def start_workflow(self) -> str: if self.status != "Pending": - frappe.throw("Only jobs with Pending status can be started") + frappe.throw( + "Only jobs with Pending status can be started.
Please wait and retry after some time." + ) if not hasattr(self, "execute"): raise NotImplementedError("Press Job implementation must have an execute method") diff --git a/press/press/doctype/site/archive.py b/press/press/doctype/site/archive.py index c56a1cd8e36..80c07137cd7 100644 --- a/press/press/doctype/site/archive.py +++ b/press/press/doctype/site/archive.py @@ -82,7 +82,7 @@ def delete_offsite_backups_for_archived_sites(): offsite_backups DESC """, as_dict=True, - ) + ) # nosemgrep for site in archived_sites: try: frappe.get_doc("Site", site.site).delete_offsite_backups() diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.json b/press/workflow_engine/doctype/press_workflow/press_workflow.json index 9267e6adefe..c36cadf458c 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.json +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.json @@ -283,7 +283,7 @@ "link_fieldname": "workflow" } ], - "modified": "2026-04-16 22:25:25.102297", + "modified": "2026-04-17 02:44:53.455627", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow", @@ -301,6 +301,24 @@ "role": "System Manager", "share": 1, "write": 1 + }, + { + "email": 1, + "export": 1, + "print": 1, + "report": 1, + "role": "Press Admin", + "share": 1, + "write": 1 + }, + { + "email": 1, + "export": 1, + "print": 1, + "report": 1, + "role": "Press Member", + "share": 1, + "write": 1 } ], "row_format": "Dynamic", diff --git a/press/workflow_engine/doctype/press_workflow_object/press_workflow_object.json b/press/workflow_engine/doctype/press_workflow_object/press_workflow_object.json index b24708a6002..75061a8b09e 100644 --- a/press/workflow_engine/doctype/press_workflow_object/press_workflow_object.json +++ b/press/workflow_engine/doctype/press_workflow_object/press_workflow_object.json @@ -54,7 +54,7 @@ ], "grid_page_length": 50, "links": [], - "modified": "2026-03-03 21:32:26.447603", + "modified": "2026-04-17 02:45:58.030816", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow Object", @@ -72,6 +72,26 @@ "role": "System Manager", "share": 1, "write": 1 + }, + { + "create": 1, + "email": 1, + "export": 1, + "print": 1, + "report": 1, + "role": "Press Admin", + "share": 1, + "write": 1 + }, + { + "create": 1, + "email": 1, + "export": 1, + "print": 1, + "report": 1, + "role": "Press Member", + "share": 1, + "write": 1 } ], "row_format": "Dynamic", diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json index 1f9b7c93b29..e9b72c61577 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json @@ -179,7 +179,7 @@ ], "grid_page_length": 50, "links": [], - "modified": "2026-04-14 16:25:01.842528", + "modified": "2026-04-17 02:45:26.200457", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow Task", @@ -197,6 +197,26 @@ "role": "System Manager", "share": 1, "write": 1 + }, + { + "create": 1, + "email": 1, + "export": 1, + "print": 1, + "report": 1, + "role": "Press Admin", + "share": 1, + "write": 1 + }, + { + "create": 1, + "email": 1, + "export": 1, + "print": 1, + "report": 1, + "role": "Press Member", + "share": 1, + "write": 1 } ], "row_format": "Dynamic", From 9eb85872dfd57f0c21c566b30cd24845da3e7430 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 17 Apr 2026 02:49:38 +0530 Subject: [PATCH 05/22] fix(ci): Add OpenSSL in mypy ignore --- mypy.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mypy.ini b/mypy.ini index 4bc5a28bfeb..a9a09818af0 100644 --- a/mypy.ini +++ b/mypy.ini @@ -47,3 +47,5 @@ ignore_missing_imports = true ignore_missing_imports = true [mypy-PIL.*] ignore_missing_imports = true +[mypy-OpenSSL.*] +ignore_missing_imports = true \ No newline at end of file From 97c88341848e0cd220f78f0386141da444eebde9 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 17 Apr 2026 02:56:25 +0530 Subject: [PATCH 06/22] feat(press-job): Delete the step and scripts from fixture --- press/fixtures/press_job_type.json | 642 +----------------- press/press/doctype/press_job/press_job.json | 5 +- press/press/doctype/press_job/press_job.py | 2 +- .../press_job_type/press_job_type.json | 28 +- .../doctype/press_job_type/press_job_type.py | 5 - 5 files changed, 36 insertions(+), 646 deletions(-) diff --git a/press/fixtures/press_job_type.json b/press/fixtures/press_job_type.json index 5da8cd90430..3ebee370db8 100644 --- a/press/fixtures/press_job_type.json +++ b/press/fixtures/press_job_type.json @@ -1,722 +1,134 @@ [ { - "callback_max_retry": 1, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2025-11-26 15:28:08.243873", - "name": "Auto Scale Application Server", - "steps": [ - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type == \"Server\":\n server.scale_up()", - "step_name": "Auto Scale Application Server", - "wait_until_true": 0 - } - ] + "name": "Auto Scale Application Server" }, { - "callback_max_retry": 5, - "callback_script": "snapshot = frappe.get_doc(\"Server Snapshot\", arguments.get(\"server_snapshot\"))\nif doc.server_type == \"Server\":\n\tsnapshot.resume_app_server_services()\nelif doc.server_type == \"Database Server\":\n\tsnapshot.resume_database_server_services()\n\nif doc.status == \"Failure\":\n frappe.db.set_value(\"Server Snapshot\", snapshot.name, \"status\", \"Failure\", update_modified=False)\n for s in snapshot.snapshots:\n try:\n frappe.get_doc(\"Virtual Disk Snapshot\", s).delete_snapshot(ignore_validation=True)\n except:\n pass\n \nelse:\n snapshot.sync(now=False)", "docstatus": 0, "doctype": "Press Job Type", "modified": "2026-04-14 11:33:58.268508", - "name": "Snapshot Disk", - "steps": [ - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\n\nwhile True:\n try:\n machine.sync()\n finally:\n break\n\nif machine.status == \"Terminated\":\n raise Exception(\"Can't snapshot terminated virtual machine\")\nelif machine.status == \"Draft\":\n raise Exception(\"Can't snapshot draft virtual machine\")", - "step_name": "Verify Virtual Machine Status", - "wait_until_true": 0 - }, - { - "script": "if doc.server_type == \"Server\" and arguments.get(\"is_consistent_snapshot\", False):\n server = frappe.get_doc(\"Server\", doc.server)\n output = server.ansible_run(\"systemctl stop docker\")\n if not (output and output.get(\"status\") == \"Success\"):\n raise Exception(\"Failed to stop docker daemon\")\nelse:\n result = (False, False) # Skipped\n", - "step_name": "Stop Docker Daemon", - "wait_until_true": 0 - }, - { - "script": "if doc.server_type == \"Database Server\" and arguments.get(\"is_consistent_snapshot\", False):\n server = frappe.get_doc(\"Database Server\", doc.server)\n output = server.ansible_run(\"systemctl stop mariadb\")\n if not (output and output.get(\"status\") == \"Success\"):\n raise Exception(\"Failed to stop mariadb service\")\n result = (True, False)\nelse:\n result = (False, False) # Skipped\n", - "step_name": "Stop MariaDB Service", - "wait_until_true": 0 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\noutput = server.ansible_run(\"sync\")\nif not (output and output.get(\"status\") == \"Success\"):\n raise Exception(\"Failed to flush file system buffers to disk\")\n", - "step_name": "Flush File System Buffers to Disk", - "wait_until_true": 0 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.create_snapshots(exclude_boot_volume=True, dedicated_snapshot=True)\n\nfield_name = \"app_server_snapshot\" if doc.server_type == \"Server\" else \"database_server_snapshot\"\nno_of_snapshots = len(machine.flags.created_snapshots)\nif no_of_snapshots != 1:\n raise Exception(\"Expected 1 disk snapshot. Found : \"+str(no_of_snapshots))\n \nfrappe.db.set_value(\"Server Snapshot\", arguments.get(\"server_snapshot\"), field_name, machine.flags.created_snapshots[0], update_modified=False)", - "step_name": "Snapshot Disk", - "wait_until_true": 0 - } - ] + "name": "Snapshot Disk" }, { - "callback_max_retry": 0, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2025-08-12 16:24:10.555919", - "name": "Attach Volume", - "steps": [ - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nprint(machine.as_dict())\nif machine.cloud_provider in [\"AWS EC2\", \"OCI\"]:\n machine.attach_new_volume(machine.size, machine.iops, machine.throughput)\nelse:\n machine.attach_volume(size=100)", - "step_name": "Attach Volume", - "wait_until_true": 0 - } - ] + "name": "Attach Volume" }, { - "callback_max_retry": 0, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2024-02-05 17:08:00.514456", - "name": "Create Server Snapshot", - "steps": [ - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.stop()\n", - "step_name": "Stop Virtual Machine", - "wait_until_true": 0 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.sync()\nresult = (machine.status == \"Stopped\", False)", - "step_name": "Wait for Virtual Machine to Stop", - "wait_until_true": 1 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\narguments.image = machine.create_image()", - "step_name": "Create Snapshot", - "wait_until_true": 0 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\ntry:\n machine.start()\n result = (True, False)\nexcept:\n result = (False, False)", - "step_name": "Start Virtual Machine", - "wait_until_true": 1 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.sync()\nresult = (machine.status == \"Running\", False)", - "step_name": "Wait for Virtual Machine to Start", - "wait_until_true": 1 - }, - { - "script": "image = frappe.get_doc(\"Virtual Machine Image\", arguments.image)\nimage.sync()\nresult = (image.status == \"Available\", False)", - "step_name": "Wait for Snapshot to Complete", - "wait_until_true": 1 - } - ] + "name": "Create Server Snapshot" }, { - "callback_max_retry": 0, - "callback_script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type in [\"Server\", \"Database Server\"] and server.is_for_recovery:\n filter_field = \"app_server\" if doc.server_type == \"Server\" else \"database_server\"\n recovery_record_name = frappe.db.get_value(\"Server Snapshot Recovery\", {filter_field: doc.server}, \"name\")\n if recovery_record_name:\n recovery_record = frappe.get_doc(\"Server Snapshot Recovery\", recovery_record_name, for_update=True)\n \n if doc.status == \"Success\":\n if doc.server_type == \"Server\":\n recovery_record.app_server_archived = True\n else:\n recovery_record.database_server_archived = True\n recovery_record.save()", "docstatus": 0, "doctype": "Press Job Type", "modified": "2025-09-08 11:42:40.490054", - "name": "Archive Server", - "steps": [ - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.disable_termination_protection()", - "step_name": "Disable Termination Protection", - "wait_until_true": 0 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.terminate()", - "step_name": "Terminate Virtual Machine", - "wait_until_true": 0 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\ntry:\n # Usually machine sync never fails\n # It can fail weirdly due to TimestampMismatchError or lock timeout\n # Don't fail this whole thing just because of that\n # Ignore the errors of sync and keep retrying\n machine.sync()\nexcept:\n pass\nresult = (machine.status == \"Terminated\", False)", - "step_name": "Wait for Virtual Machine to Terminate", - "wait_until_true": 1 - } - ] + "name": "Archive Server" }, { - "callback_max_retry": 0, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2024-01-05 13:40:21.038901", - "name": "Upgrade MariaDB", - "steps": [ - { - "script": "server = frappe.get_doc(\"Database Server\", doc.server)\nserver.stop_mariadb()", - "step_name": "Stop MariaDB", - "wait_until_true": 0 - }, - { - "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Stop MariaDB\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", False)\n", - "step_name": "Wait for MariaDB to Stop", - "wait_until_true": 1 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.create_snapshots()\nsnapshot = frappe.get_last_doc(\"Virtual Disk Snapshot\", {\"virtual_machine\": doc.virtual_machine})\nsnapshot.add_comment(text=\"Before MariaDB Upgrade\")", - "step_name": "Create Server Snapshot", - "wait_until_true": 0 - }, - { - "script": "server = frappe.get_doc(\"Database Server\", doc.server)\nserver.upgrade_mariadb()", - "step_name": "Upgrade MariaDB", - "wait_until_true": 0 - }, - { - "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Upgrade MariaDB\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\n", - "step_name": "Wait for MariaDB Upgrade to Complete", - "wait_until_true": 1 - } - ] + "name": "Upgrade MariaDB" }, { - "callback_max_retry": 0, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2025-02-14 14:30:00.676187", - "name": "Increase Disk Size", - "steps": [ - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nmountpoint = arguments.labels.get(\"mountpoint\")\nserver.calculated_increase_disk_size(mountpoint=mountpoint)", - "step_name": "Increase Disk Size", - "wait_until_true": 0 - }, - { - "script": "should_auto_increase = frappe.db.get_value(doc.server_type, doc.server, \"auto_increase_storage\")\nif not should_auto_increase:\n result = (True, False)\n\nelse:\n if frappe.db.get_value(doc.server_type, doc.server, \"provider\") == \"AWS EC2\":\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Extend EC2 Volume\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\n else:\n result = (True, False)", - "step_name": "Wait for partition to resize (AWS)", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"OCI\":\n machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\n machine.sync()\n result = (machine.status == \"Running\", False)\nelse:\n result = (True, False)", - "step_name": "Wait for server to start (OCI)", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"OCI\":\n server.ping_ansible()\n\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Ping Server\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", False)\nelse:\n result = (True, False)\n", - "step_name": "Wait for server to be accessible (OCI)", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"OCI\":\n server.add_glass_file()\nelse:\n result = (True, False)\n # handled for aws already in extend playbook", - "step_name": "Add glass file back (OCI)", - "wait_until_true": 0 - }, - { - "script": "if doc.server_type == 'Server':\n server = frappe.get_doc(doc.server_type, doc.server)\n server.start_active_benches()\n", - "step_name": "Restart Active Benches", - "wait_until_true": 0 - } - ] + "name": "Increase Disk Size" }, { - "callback_max_retry": 0, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2025-11-12 09:35:41.121169", - "name": "Prune Docker system", - "steps": [ - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\ntelegram_message = frappe.get_doc(\"Press Settings\").telegram_message\ntelegram_message.enqueue(f\"Pruning docker cache on [{server.name}]({frappe.utils.get_url_to_form(server.doctype, server.name)})\", \"Information\")\nserver.prune_docker_system()", - "step_name": "Prune Docker system", - "wait_until_true": 0 - }, - { - "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Prune Docker System\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\n", - "step_name": "Wait for docker system prune", - "wait_until_true": 1 - } - ] + "name": "Prune Docker system" }, { - "callback_max_retry": 0, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2025-11-12 09:35:21.819679", - "name": "Increase Swap", - "steps": [ - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\ntelegram_message = frappe.get_doc(\"Press Settings\").telegram_message\ntelegram_message.enqueue(f\"Increasing swap on [{server.name}]({frappe.utils.get_url_to_form(server.doctype, server.name)})\", \"Information\")\nserver.increase_swap(4)", - "step_name": "Add swap on server", - "wait_until_true": 0 - }, - { - "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Increase Swap\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")", - "step_name": "Wait for swap to be added", - "wait_until_true": 1 - } - ] + "name": "Increase Swap" }, { - "callback_max_retry": 0, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2024-12-06 10:59:08.032149", - "name": "Stop and Start Server", - "steps": [ - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.stop()", - "step_name": "Stop Virtual Machine", - "wait_until_true": 0 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.sync()\nresult = (machine.status == \"Stopped\", False)", - "step_name": "Wait for Virtual Machine to Stop", - "wait_until_true": 1 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\ntry:\n machine.start()\n result = (True, False)\nexcept:\n result = (False, False)", - "step_name": "Start Virtual Machine", - "wait_until_true": 1 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.sync()\nresult = (machine.status == \"Running\", False)", - "step_name": "Wait for Virtual Machine to Start", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.ping_ansible()\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Ping Server\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", False)\n", - "step_name": "Wait for Server to be accessible", - "wait_until_true": 1 - } - ] + "name": "Stop and Start Server" }, { - "callback_max_retry": 0, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2025-11-12 09:34:59.467479", - "name": "Reset Swap", - "steps": [ - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\ntelegram_message = frappe.get_doc(\"Press Settings\").telegram_message\ntelegram_message.enqueue(f\"Resetting swap on [{server.name}]({frappe.utils.get_url_to_form(server.doctype, server.name)})\", \"Information\")\nserver.reset_swap()", - "step_name": "Reset swap on server", - "wait_until_true": 0 - }, - { - "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Reset Swap\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")", - "step_name": "Wait for swap to be reset", - "wait_until_true": 1 - } - ] + "name": "Reset Swap" }, { - "callback_max_retry": 0, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2025-07-11 15:20:56.780290", - "name": "Warn disk at 80%", - "steps": [ - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nmountpoint = arguments.labels.get(\"mountpoint\")\nserver.recommend_disk_increase(mountpoint=mountpoint)", - "step_name": "Send Warning", - "wait_until_true": 0 - } - ] + "name": "Warn disk at 80%" }, { - "callback_max_retry": 0, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2025-07-25 21:43:11.895128", - "name": "Trigger Build Server Cleanup", - "steps": [ - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif server.use_for_build:\n server.prune_docker_system()\n \n", - "step_name": "Trigger Build Server Cleanup", - "wait_until_true": 0 - } - ] + "name": "Trigger Build Server Cleanup" + }, + { + "docstatus": 0, + "doctype": "Press Job Type", + "modified": "2025-08-31 20:54:46.857348", + "name": "Create Server (old)" }, { - "callback_max_retry": 0, - "callback_script": "", "docstatus": 0, "doctype": "Press Job Type", "modified": "2025-09-08 11:36:55.450275", - "name": "Resume Services After Snapshot", - "steps": [ - { - "script": "if doc.server_type == \"Server\" and arguments.get(\"is_consistent_snapshot\", False):\n server = frappe.get_doc(\"Server\", doc.server)\n output = server.ansible_run(\"systemctl start docker\")\n if not (output and output.get(\"status\") == \"Success\"):\n raise Exception(\"Failed to start docker daemon\")\n\n frappe.db.set_value(\"Server Snapshot\", arguments.get(\"server_snapshot\"), \"app_server_services_started\", True, update_modified=False)\nelse:\n result = (False, False) # Skipped\n # As it wasn't a consistent snapshot services were never stopped\n frappe.db.set_value(\"Server Snapshot\", arguments.get(\"server_snapshot\"), \"app_server_services_started\", True, update_modified=False)", - "step_name": "Start Docker Daemon", - "wait_until_true": 0 - }, - { - "script": "if doc.server_type == \"Database Server\" and arguments.get(\"is_consistent_snapshot\", False):\n server = frappe.get_doc(\"Database Server\", doc.server)\n output = server.ansible_run(\"systemctl start mariadb\")\n if not (output and output.get(\"status\") == \"Success\"):\n raise Exception(\"Failed to start mariadb service\")\n frappe.db.set_value(\"Server Snapshot\", arguments.get(\"server_snapshot\"), \"database_server_services_started\", True, update_modified=False)\n\nelse:\n result = (False, False) # Skipped\n # As it wasn't a consistent snapshot services were never stopped\n frappe.db.set_value(\"Server Snapshot\", arguments.get(\"server_snapshot\"), \"database_server_services_started\", True, update_modified=False)", - "step_name": "Start MariaDB Service", - "wait_until_true": 0 - } - ] + "name": "Resume Services After Snapshot" }, { - "callback_max_retry": 1, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2025-11-12 11:06:08.243873", - "name": "Prune Mirror Registry", - "steps": [ - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type == \"Registry Server\":\n server.prune_mirror_registry()", - "step_name": "Prune Mirror Registry", - "wait_until_true": 0 - } - ] + "name": "Prune Mirror Registry" }, { - "callback_max_retry": 1, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2025-12-18 15:28:08.243873", - "name": "Auto Scale Up Application Server", - "steps": [ - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type == \"Server\" and not server.scaled_up:\n server.scale_up(is_automatically_triggered=True)", - "step_name": "Auto Scale Up Application Server", - "wait_until_true": 0 - } - ] + "name": "Auto Scale Up Application Server" }, { - "callback_max_retry": 1, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2025-12-18 15:28:08.243873", - "name": "Auto Scale Down Application Server", - "steps": [ - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type == \"Server\" and server.scaled_up:\n server.scale_down(is_automatically_triggered=True)", - "step_name": "Auto Scale Down Application Server", - "wait_until_true": 0 - } - ] + "name": "Auto Scale Down Application Server" }, { - "callback_max_retry": 0, - "callback_script": null, "docstatus": 0, "doctype": "Press Job Type", "modified": "2026-02-22 22:29:46.984146", - "name": "Resize Server", - "steps": [ - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.stop()", - "step_name": "Stop Virtual Machine", - "wait_until_true": 0 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.sync()\nresult = (machine.status == \"Stopped\", False)", - "step_name": "Wait for Virtual Machine to Stop", - "wait_until_true": 1 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.resize(arguments.machine_type, arguments.get(\"upgrade_disk\", None))", - "step_name": "Resize Virtual Machine", - "wait_until_true": 0 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\ntry:\n machine.start()\n result = (True, False)\nexcept:\n result = (False, False)", - "step_name": "Start Virtual Machine", - "wait_until_true": 1 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.sync()\nresult = (machine.status == \"Running\", False)", - "step_name": "Wait for Virtual Machine to Start", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.ping_ansible()\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Ping Server\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", False)\n", - "step_name": "Wait for Server to be accessible", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif server.is_unified_server:\n server_doc = frappe.get_doc(\"Server\", doc.server)\n db_doc = frappe.get_doc(\"Database Server\", doc.server)\n\n server_doc.auto_scale_workers()\n db_doc.adjust_memory_config()\n\nelse:\n if doc.server_type == \"Database Server\":\n server.adjust_memory_config()\n\n elif doc.server_type == \"Server\":\n server.auto_scale_workers()\n", - "step_name": "Set additional config", - "wait_until_true": 0 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\n\nserver = frappe.get_doc(doc.server_type, doc.server)\nif server.plan:\n plan_disk_size = frappe.db.get_value(\"Server Plan\", server.plan, \"disk\")\n if plan_disk_size and plan_disk_size > machine.disk_size:\n try:\n server.increase_disk_size(increment=plan_disk_size - machine.disk_size)\n except:\n pass", - "step_name": "Increase Disk Size", - "wait_until_true": 0 - } - ] + "name": "Resize Server" }, { - "callback_max_retry": 1, - "callback_script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type in [\"Server\", \"Database Server\"] and server.is_for_recovery:\n filter_field = \"app_server\" if doc.server_type == \"Server\" else \"database_server\"\n recovery_record_name = frappe.db.get_value(\"Server Snapshot Recovery\", {filter_field: doc.server}, \"name\")\n if recovery_record_name:\n recovery_record = frappe.get_doc(\"Server Snapshot Recovery\", recovery_record_name, for_update=True)\n \n if doc.status == \"Success\":\n if doc.server_type == \"Server\":\n recovery_record.is_app_server_ready = True\n else:\n recovery_record.is_database_server_ready = True\n recovery_record.save()\n else:\n recovery_record.mark_server_provisioning_as_failed()\n \nif doc.server_type in [\"Server\", \"Database Server\"] and \"logical_replication_backup\" in arguments:\n if doc.status == \"Success\":\n frappe.get_doc(\"Logical Replication Backup\", arguments.get(\"logical_replication_backup\")).next()\n if doc.status == \"Failure\":\n frappe.get_doc(\"Logical Replication Backup\", arguments.get(\"logical_replication_backup\")).fail()\n \nif doc.server_type in [\"Server\", \"Database Server\"] and doc.status == \"Success\":\n server.is_provisioning_press_job_completed = 1\n server.save(ignore_permissions=True)\n \n if server.is_unified_server:\n frappe.db.set_value(\"Database Server\", server.database_server, \"is_provisioning_press_job_completed\", 1, update_modified =False)", "docstatus": 0, "doctype": "Press Job Type", "modified": "2026-04-08 15:12:31.123007", - "name": "Create Server", - "steps": [ - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nmachine.provision()\n", - "step_name": "Create Server", - "wait_until_true": 0 - }, - { - "script": "machine = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\ntry:\n # Usually machine sync never fails\n # It can fail weirdly due to TimestampMismatchError or lock timeout\n # Don't fail this whole thing just because of that\n # Ignore the errors of sync and keep retrying\n machine.sync()\n\nexcept (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError):\n result = (False, False)\nexcept Exception as e:\n if \"rate_limit_exceeded\" in str(e):\n result = (False, False)\n else:\n raise e\nelse:\n result = (machine.status == \"Running\", False)\n", - "step_name": "Wait for Server to start", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.ping_ansible()\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Ping Server\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n\nvirtual_machine = frappe.get_doc(\"Virtual Machine\", server.virtual_machine)\n\nresult = (plays and plays[0].status == \"Success\" and virtual_machine.private_ip_address != '', False)\n", - "step_name": "Wait for Server to be accessible", - "wait_until_true": 1 - }, - { - "script": "try:\n vm = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\n vm.sync()\n if len(vm.volumes) > 0:\n result = (True, False)\n else:\n result = (False, False)\nexcept (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError):\n result = (False, False)\nexcept Exception as e:\n raise e", - "step_name": "Sync Default Volumes", - "wait_until_true": 1 - }, - { - "script": "vm = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nif vm.data_disk_snapshot:\n is_created = vm.create_data_disk_volume_from_snapshot()\n if is_created:\n result = (True, False)\n else:\n arguments.update({\"max_volume_creation_retries\": arguments.get(\"max_volume_creation_retries\", 6)-1})\n if arguments.get(\"max_volume_creation_retries\") <= 0:\n result = (False, True)\n result = (False, False)", - "step_name": "Create Volume From Snapshot", - "wait_until_true": 1 - }, - { - "script": "vm = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\nif vm.data_disk_snapshot:\n is_attached = vm.check_and_attach_data_disk_snapshot_volume()\n if is_attached:\n result = (True, False)\n else:\n result = (False, False)", - "step_name": "Attach Snapshotted Volume", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"AWS EC2\" and frappe.db.get_value(\"Virtual Machine\", server.virtual_machine, \"data_disk_snapshot\"):\n try:\n vm = frappe.get_doc(\"Virtual Machine\", server.virtual_machine)\n vm.sync()\n \n if len(vm.volumes) == 0 or (vm.data_disk_snapshot_attached and len(vm.volumes) == 1):\n result = (False, False)\n else:\n server.reload()\n server.validate_mounts()\n server.save()\n result = (True, False)\n except (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError):\n result = (False, False)\n except Exception as e:\n raise e\nelse:\n result = (True, False)", - "step_name": "Sync Attached Volumes", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"AWS EC2\" and frappe.db.get_value(\"Virtual Machine\", server.virtual_machine, \"data_disk_snapshot\"):\n cleanup_db_replication_files = False\n if doc.server_type == \"Database Server\" and (server.is_for_recovery or arguments.get(\"setup_db_replication\", False)):\n cleanup_db_replication_files = True\n server.mount_volumes(\n now=False,\n stop_docker_before_mount=doc.server_type == \"Server\",\n stop_mariadb_before_mount=doc.server_type == \"Database Server\",\n # If server is in recovery mode, don't start docker and containers\n # Because If site gets active, background job witll be started and that can modify data\n start_docker_after_mount=doc.server_type == \"Server\" and not server.is_for_recovery,\n # If goal is to create replica server, don't start database\n # As we need to do some additional config before starting database\n start_mariadb_after_mount=doc.server_type == \"Database Server\" and not arguments.get(\"setup_db_replication\", False),\n cleanup_db_replication_files=cleanup_db_replication_files,\n # It's important to change uuid, labels of attached disk\n # There is high chance that the root disk and data disk might have same disk info\n rotate_additional_volume_metadata=True\n )\nelse:\n result = (False, False)", - "step_name": "Mount Data Disk", - "wait_until_true": 0 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"AWS EC2\" and frappe.db.get_value(\"Virtual Machine\", server.virtual_machine, \"data_disk_snapshot\"):\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Mount Volumes\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\nelse:\n result = None", - "step_name": "Wait for Data Disk Mount to Complete", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.wait_for_cloud_init()", - "step_name": "Check Cloud Init status", - "wait_until_true": 0 - }, - { - "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Wait for Cloud Init to finish\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status in (\"Success\", \"Failure\"), False)", - "step_name": "Wait for Cloud Init to finish", - "wait_until_true": 1 - }, - { - "script": "provider = frappe.db.get_value(doc.server_type, doc.server, 'provider')\nif provider == \"Hetzner\" and doc.virtual_machine:\n vm = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\n server = frappe.get_doc(doc.server_type, doc.server)\n \n if vm.virtual_machine_image:\n vmi = frappe.get_doc(\"Virtual Machine Image\", vm.virtual_machine_image)\n\n if vmi.has_data_volume:\n # If VMI has data volume, then only proceed with attaching disk\n if server.plan:\n data_disk_size = int(frappe.db.get_value(\"Server Plan\", server.plan ,\"disk\"))\n else:\n data_disk_size = 25\n \n vm.attach_new_volume(data_disk_size)\n \n # Do Virtual Machine Sync\n # Until we got no error\n max_sync_tries = 100\n while max_sync_tries:\n try:\n vm.sync()\n break\n except Exception as e:\n max_sync_tries = max_sync_tries - 1\n if max_sync_tries <=0 :\n raise e\n \n server.validate_mounts()\n server.save(ignore_version=True) # To avoid timestamp mismatch errors\n server.mount_volumes(now=False)\n result = (True, False)\n else:\n result = (False, False)\n", - "step_name": "Create and mount volumes (Hetzner)", - "wait_until_true": 0 - }, - { - "script": "provider = frappe.db.get_value(doc.server_type, doc.server, 'provider')\nif provider == \"Hetzner\" and doc.virtual_machine:\n vm = frappe.get_doc(\"Virtual Machine\", doc.virtual_machine)\n\n if vm.virtual_machine_image:\n vmi = frappe.get_doc(\"Virtual Machine Image\", vm.virtual_machine_image)\n if vmi.has_data_volume:\n # Check for running ansible play\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Mount Volumes\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", False)\n else:\n result = None\n", - "step_name": "Wait for volumes to mount", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nif server.provider == \"Hetzner\" and server.has_data_volume:\n if server.doctype == \"Server\":\n server.setup_docker()\n elif server.doctype == \"Database Server\":\n server.set_mariadb_mount_dependency()\nelse:\n result = (False, False)", - "step_name": "Configure apps for mounts (Hetzner)", - "wait_until_true": 0 - }, - { - "script": "play_statuses = frappe.db.get_all(\"Ansible Play\", filters={\n \"server_type\": doc.server_type,\n \"server\": doc.server,\n \"play\": (\"in\", [\"Install Docker\", \"Setup MariaDB Mount Dependency\"])\n}, pluck=\"status\")\n\nall_completed = True\nis_failure = False\n\nfor status in play_statuses:\n if status == \"Failure\":\n is_failure = True\n \n all_completed = all_completed and status == \"Success\"\n \nif all_completed:\n result = (True, False)\nelif is_failure:\n result = (False, True)\nelse:\n result = (False, False)", - "step_name": "Wait For Dependent Plays To Finish (Hetzner)", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.update_tls_certificate()", - "step_name": "Update TLS Certificate", - "wait_until_true": 0 - }, - { - "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Setup TLS Certificates\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status in (\"Success\", \"Failure\"), False)", - "step_name": "Wait for TLS Certificate to be updated", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.update_agent_ansible()", - "step_name": "Update Agent Ansible", - "wait_until_true": 0 - }, - { - "script": "plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Update Agent\"}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status in (\"Success\", \"Failure\"), False)", - "step_name": "Wait for Agent to be updated", - "wait_until_true": 1 - }, - { - "script": "\nif doc.server_type == \"Database Server\" and arguments.get(\n\t\"setup_db_replication\", False\n):\n\tpass\n", - "step_name": "Stop MariaDB Slave", - "wait_until_true": 0 - }, - { - "script": "if doc.server_type == \"Database Server\":\n server = frappe.get_doc(\"Database Server\", doc.server)\n server.upgrade_mariadb()\n\nif doc.is_unified_server:\n database_server = frappe.get_doc(\"Database Server\", doc.database_server)\n database_server.upgrade_mariadb()", - "step_name": "Upgrade MariaDB", - "wait_until_true": 0 - }, - { - "script": "if doc.server_type == \"Database Server\" or doc.is_unified_server:\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Upgrade MariaDB\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\nelse:\n result = (True,)", - "step_name": "Wait for MariaDB Upgrade to Complete", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type == \"Database Server\" and arguments.get(\n\t\"setup_db_replication\", False\n):\n\tserver.prepare_mariadb_replica()\n\tresult = (True, False)\nelse:\n\tresult = (False, False)\n", - "step_name": "Prepare MariaDB Replica", - "wait_until_true": 0 - }, - { - "script": "if doc.server_type == \"Database Server\" and arguments.get(\"setup_db_replication\", False):\n plays = frappe.get_all(\"Ansible Play\", {\"server\": doc.server, \"play\": \"Prepare MariaDB Replica\"}, [\"status\"], order_by=\"creation desc\", limit=1)\n result = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\nelse:\n result = (True,)", - "step_name": "Wait for MariaDB Replica to Be Prepared", - "wait_until_true": 1 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type == \"Database Server\" and arguments.get(\n\t\"setup_db_replication\", False\n):\n\tserver.configure_replication()\n\tresult = (True, False)\nelse:\n\tresult = (False, False)\n", - "step_name": "Configure MariaDB Replica", - "wait_until_true": 0 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\n\nif doc.server_type == \"Database Server\" and arguments.get(\n\t\"setup_db_replication\", False\n):\n\tserver.start_replication()\n\tresult = (True, False)\nelse:\n\tresult = (False, False)\n", - "step_name": "Start MariaDB Replica", - "wait_until_true": 0 - }, - { - "script": "server = frappe.get_doc(doc.server_type, doc.server)\nserver.set_additional_config()", - "step_name": "Set additional config", - "wait_until_true": 0 - }, - { - "script": "if doc.server.startswith(\"fs\") and doc.server_type == \"Server\":\n primary_server = frappe.db.get_value(\"Server\", doc.server, \"primary\")\n nfs_volume_attachment = frappe.get_doc(\n\t {\"doctype\": \"NFS Volume Attachment\", \"primary_server\": primary_server}\n )\n nfs_volume_attachment.insert(ignore_permissions=True)\n frappe.db.commit()", - "step_name": "Share benches over NFS", - "wait_until_true": 0 - } - ] + "name": "Create Server" }, { - "callback_max_retry": 1, - "callback_script": "if doc.status == \"Success\":\n failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n failover.is_db_server_failover_setup = False\n failover.is_app_server_failover_setup = False\n failover.enabled = False\n failover.save()", "docstatus": 0, "doctype": "Press Job Type", "modified": "2026-03-18 17:20:29.325620", - "name": "Remove On-Prem Failover", - "steps": [ - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.remove_app_server_from_firewall()", - "step_name": "Remove Wireguard Port Access from App Server", - "wait_until_true": 0 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.remove_db_server_from_firewall()", - "step_name": "Remove Wireguard Port Access from DB Server", - "wait_until_true": 0 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.stop_replication_from_app_server()", - "step_name": "Stop Replication from App Server", - "wait_until_true": 0 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": failover.app_server, \"play\": \"Stop App Server Replication to On-Premise\", \"creation\": (\">=\", doc.creation)}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\n", - "step_name": "Wait for Stop Replication from App Server", - "wait_until_true": 1 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.stop_replication_from_db_server()", - "step_name": "Stop Replication from DB Server", - "wait_until_true": 0 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": failover.database_server, \"play\": \"Stop Database Server Replication to On-Premise\", \"creation\": (\">=\", doc.creation)}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\n", - "step_name": "Wait for Stop Replication from DB Server", - "wait_until_true": 1 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.delete_firewall()", - "step_name": "Delete Firewall", - "wait_until_true": 0 - } - ] + "name": "Remove On-Prem Failover" }, { - "callback_max_retry": 1, - "callback_script": "if doc.status == \"Success\":\n failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n failover.is_db_server_failover_setup = True\n failover.is_app_server_failover_setup = True\n failover.save()", "docstatus": 0, "doctype": "Press Job Type", "modified": "2026-03-18 17:17:19.436686", - "name": "Setup On-Prem Failover", - "steps": [ - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.add_app_server_to_firewall()", - "step_name": "Allow Wireguard Port Through Security Group on App Server", - "wait_until_true": 0 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.add_db_server_to_firewall()", - "step_name": "Allow Wireguard Port Through Security Group on DB Server", - "wait_until_true": 0 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.setup_wireguard_on_app_server()", - "step_name": "Setup Wireguard on App Server", - "wait_until_true": 0 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.setup_wireguard_on_database_server()", - "step_name": "Setup Wireguard on DB Server", - "wait_until_true": 0 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.check_connectivity_to_on_premise_server()\n\nfailover.reload()\nif failover.is_on_prem_server_ssh_from_app_server_working and failover.is_on_prem_server_ssh_from_db_server_working:\n result = (True, False)\nelse:\n result = (False, False)", - "step_name": "Test Connectivity to On-Prem Server", - "wait_until_true": 1 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.setup_app_server_replica()", - "step_name": "Setup Replication for App Server", - "wait_until_true": 0 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": failover.app_server, \"play\": \"Setup App Server Replication Sync\", \"creation\": (\">=\", doc.creation)}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")\n", - "step_name": "Wait For Replication Setup for App Server", - "wait_until_true": 1 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.setup_db_lsync_for_initial_sync()", - "step_name": "Setup Lsyncd For Initial DB Sync", - "wait_until_true": 0 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": failover.database_server, \"play\": \"Setup Database Lsyncd for On-Premise Failover\", \"creation\": (\">=\", doc.creation)}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")", - "step_name": "Wait For Setup Lsyncd For Initial DB Sync", - "wait_until_true": 1 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n\nif failover.db_lsyncd_stop_at and (frappe.utils.now_datetime() > failover.db_lsyncd_stop_at):\n result = (True, False)\nelse:\n result = (False, False)", - "step_name": "Wait For Initial DB Sync To Complete", - "wait_until_true": 1 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.setup_db_rsync_for_final_sync()", - "step_name": "Rsync New DB Files", - "wait_until_true": 0 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": failover.database_server, \"play\": \"Final Database Sync for On-Premise Failover\", \"creation\": (\">=\", doc.creation)}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")", - "step_name": "Wait For Rsync New DB Files", - "wait_until_true": 1 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\nfailover.setup_and_configure_database_replica()\n", - "step_name": "Setup Replica In On-Prem Server", - "wait_until_true": 0 - }, - { - "script": "failover = frappe.get_doc(\"On-Prem Failover\", arguments.failover)\n\nplays = frappe.get_all(\"Ansible Play\", {\"server\": failover.database_server, \"play\": \"Setup Replica on On-Premise Server\", \"creation\": (\">=\", doc.creation)}, [\"status\"], order_by=\"creation desc\", limit=1)\nresult = (plays and plays[0].status == \"Success\", plays and plays[0].status == \"Failure\")", - "step_name": "Wait For Setup Replica In On-Prem Server", - "wait_until_true": 1 - } - ] + "name": "Setup On-Prem Failover" } ] \ No newline at end of file diff --git a/press/press/doctype/press_job/press_job.json b/press/press/doctype/press_job/press_job.json index 546418bf126..68b9a6b57c4 100644 --- a/press/press/doctype/press_job/press_job.json +++ b/press/press/doctype/press_job/press_job.json @@ -22,11 +22,10 @@ "fields": [ { "fieldname": "job_type", - "fieldtype": "Link", + "fieldtype": "Data", "in_list_view": 1, "in_standard_filter": 1, "label": "Job Type", - "options": "Press Job Type", "reqd": 1, "search_index": 1, "set_only_once": 1 @@ -123,7 +122,7 @@ "link_fieldname": "linked_docname" } ], - "modified": "2026-04-16 23:16:27.885432", + "modified": "2026-04-17 02:56:56.915694", "modified_by": "Administrator", "module": "Press", "name": "Press Job", diff --git a/press/press/doctype/press_job/press_job.py b/press/press/doctype/press_job/press_job.py index 8c447ce7f18..552beb92114 100644 --- a/press/press/doctype/press_job/press_job.py +++ b/press/press/doctype/press_job/press_job.py @@ -94,7 +94,7 @@ class PressJob(WorkflowBuilder): arguments: DF.SmallText duration: DF.Duration | None end: DF.Datetime | None - job_type: DF.Link + job_type: DF.Data name: DF.Int | None server: DF.DynamicLink | None server_type: DF.Link | None diff --git a/press/press/doctype/press_job_type/press_job_type.json b/press/press/doctype/press_job_type/press_job_type.json index 0e350db1652..65a21feba46 100644 --- a/press/press/doctype/press_job_type/press_job_type.json +++ b/press/press/doctype/press_job_type/press_job_type.json @@ -7,37 +7,21 @@ "editable_grid": 1, "engine": "InnoDB", "field_order": [ - "steps", - "callback_script", - "callback_max_retry" + "disclaimer" ], "fields": [ - { - "fieldname": "steps", - "fieldtype": "Table", - "label": "Steps", - "options": "Press Job Type Step", - "reqd": 1 - }, - { - "description": "The callback function will be called once Press Job reaches the terminating state [Success, Failure].", - "fieldname": "callback_script", - "fieldtype": "Code", - "label": "Callback Script", - "options": "Python" - }, { "default": "1", - "fieldname": "callback_max_retry", - "fieldtype": "Int", - "in_list_view": 1, - "label": "Callback Max Retry" + "fieldname": "disclaimer", + "fieldtype": "HTML", + "label": "Disclaimer", + "options": "The steps and callback script for press job has been deprecated and moved to code.
\n

Please check press/press/doctype/press_job/jobs folder for more info

" } ], "grid_page_length": 50, "index_web_pages_for_search": 1, "links": [], - "modified": "2025-07-31 13:52:28.892322", + "modified": "2026-04-17 02:55:16.274403", "modified_by": "Administrator", "module": "Press", "name": "Press Job Type", diff --git a/press/press/doctype/press_job_type/press_job_type.py b/press/press/doctype/press_job_type/press_job_type.py index aef0ee11eae..0a0c5dfb090 100644 --- a/press/press/doctype/press_job_type/press_job_type.py +++ b/press/press/doctype/press_job_type/press_job_type.py @@ -14,11 +14,6 @@ class PressJobType(Document): if TYPE_CHECKING: from frappe.types import DF - from press.press.doctype.press_job_type_step.press_job_type_step import PressJobTypeStep - - callback_max_retry: DF.Int - callback_script: DF.Code | None - steps: DF.Table[PressJobTypeStep] # end: auto-generated types pass From 78ca7ca9821e5cf44cc9013a7e7f6d6188bf988c Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 17 Apr 2026 03:04:31 +0530 Subject: [PATCH 07/22] refactor(release-pipeline): Use defer_current_task function for retry --- .../release_pipeline/release_pipeline.py | 44 ++++--------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/press/press/doctype/release_pipeline/release_pipeline.py b/press/press/doctype/release_pipeline/release_pipeline.py index eb1c4633d8d..e8aabe9feab 100644 --- a/press/press/doctype/release_pipeline/release_pipeline.py +++ b/press/press/doctype/release_pipeline/release_pipeline.py @@ -19,7 +19,6 @@ ) from press.press.doctype.bench_update.bench_update import get_bench_update from press.workflow_engine.doctype.press_workflow.decorators import flow, task -from press.workflow_engine.doctype.press_workflow.exceptions import PressWorkflowTaskEnqueued from press.workflow_engine.doctype.press_workflow.workflow_builder import WorkflowBuilder if typing.TYPE_CHECKING: @@ -169,12 +168,6 @@ def workflow_name(self) -> str: "Press Workflow", {"linked_doctype": "Release Pipeline", "linked_docname": self.name}, "name" ) - def get_task_name(self, func): - """Get task name for the given function""" - return frappe.db.get_value( - "Press Workflow Task", {"method_name": func.__name__, "workflow": self.workflow_name}, "name" - ) - @task def validate_app_hashes(self, apps: list[dict[str, str]]): """Validate App Hashes""" @@ -245,10 +238,8 @@ def _check_for_scheduled_build_retries(self, deploy_candidate_build: str): if deploy_candidate_build_doc.should_build_retry(exc=None, job=agent_job): self.update_pipeline_status("Retrying") - raise PressWorkflowTaskEnqueued( - f"Build {deploy_candidate_build} has scheduled retries. Waiting for retries to complete.", - self.workflow_name, - self.get_task_name(self.monitor_pre_build_validation), + self.defer_current_task( + f"Build {deploy_candidate_build} has scheduled retries. Waiting for retries to complete." ) def _get_latest_retried_build(self, deploy_candidate_build: str) -> str: @@ -274,7 +265,6 @@ def _get_latest_retried_build(self, deploy_candidate_build: str) -> str: @task def monitor_pre_build_validation(self, deploy_candidate_build: str): """Monitors the Deploy Candidate Build until the remote build job is created.""" - task_name = self.get_task_name(self.monitor_pre_build_validation) deploy_candidate_build_status = frappe.db.get_value( "Deploy Candidate Build", deploy_candidate_build, "status" ) @@ -288,10 +278,8 @@ def monitor_pre_build_validation(self, deploy_candidate_build: str): "Please check the build logs for more details." ) - raise PressWorkflowTaskEnqueued( - f"Waiting for remote build job to be enqueued for Deploy Candidate Build {deploy_candidate_build}", - self.workflow_name, - task_name, + self.defer_current_task( + f"Waiting for remote build job to be enqueued for Deploy Candidate Build {deploy_candidate_build}" ) @task @@ -312,10 +300,8 @@ def monitor_build_success(self, deploy_candidate_build: str): f"Remote build failed for Deploy Candidate Build {deploy_candidate_build}. Please check the build logs for more details." ) - raise PressWorkflowTaskEnqueued( - f"Waiting for build to complete for Deploy Candidate Build {deploy_candidate_build}", - self.workflow_name, - self.get_task_name(self.monitor_build_success), + self.defer_current_task( + f"Waiting for build to complete for Deploy Candidate Build {deploy_candidate_build}" ) def _is_active_bench_work_in_progress(self, builds: list[str]) -> bool: @@ -510,11 +496,7 @@ def orchestrate_build_monitoring(self, deploy_candidate: str, primary_build: str if not secondary_build: # Wait for sometime for the secondary build to be created in case of any delays in build scheduling - raise PressWorkflowTaskEnqueued( - f"Waiting for secondary build creation for {deploy_candidate}", - self.workflow_name, - self.get_task_name(self.monitor_build_success), - ) + self.defer_current_task(f"Waiting for secondary build to be created for {deploy_candidate}") self.monitor_pre_build_validation(secondary_build) self.monitor_build_success(secondary_build) @@ -535,22 +517,14 @@ def monitor_bench_creation(self, deploy_candidate_build: str): # This should take care of the retries as well. if self._is_active_bench_work_in_progress(builds): - raise PressWorkflowTaskEnqueued( - "Benches in progress, Waiting...", - self.workflow_name, - self.get_task_name(self.monitor_bench_creation), - ) + self.defer_current_task("Benches in progress, Waiting...") # Just another safety lock to ensure no early failures occur statues = frappe.db.get_all("Bench", {"build": ["in", builds]}, pluck="status") in_transition = [status for status in statues if status in BENCH_TRANSITION_STATES] if in_transition: - raise PressWorkflowTaskEnqueued( - "Benches are in transition states...", - self.workflow_name, - self.get_task_name(self.monitor_bench_creation), - ) + self.defer_current_task("Benches are in transition states...") self._finalize_pipeline_status(builds=builds, expected_count=expected) From d141bec0f2813d2cbd1772a9580a50b0c88ca77d Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Sat, 18 Apr 2026 20:47:32 +0530 Subject: [PATCH 08/22] fix(server): Fix server creation tests --- press/api/tests/test_server.py | 82 +++++++++++++------ press/press/doctype/press_job/press_job.py | 15 ++++ press/utils/test.py | 69 ++++++++-------- .../doctype/press_workflow/press_workflow.py | 5 +- .../press_workflow/workflow_builder.py | 3 +- 5 files changed, 117 insertions(+), 57 deletions(-) diff --git a/press/api/tests/test_server.py b/press/api/tests/test_server.py index 241eb206550..e8e3e1cdb57 100644 --- a/press/api/tests/test_server.py +++ b/press/api/tests/test_server.py @@ -3,6 +3,8 @@ from __future__ import annotations +from typing import TYPE_CHECKING +from unittest import mock from unittest.mock import MagicMock, Mock, patch import frappe @@ -14,6 +16,7 @@ from press.press.doctype.cluster.cluster import Cluster from press.press.doctype.cluster.test_cluster import create_test_cluster from press.press.doctype.database_server.database_server import DatabaseServer +from press.press.doctype.press_job.jobs.resize_server import ResizeServerJob from press.press.doctype.proxy_server.test_proxy_server import create_test_proxy_server from press.press.doctype.server.server import BaseServer from press.press.doctype.team.test_team import create_test_press_admin_team @@ -27,6 +30,9 @@ from press.runner import Ansible from press.utils.test import foreground_enqueue_doc_with_user +if TYPE_CHECKING: + from press.press.doctype.press_job.press_job import PressJob + def create_test_server_plan( document_type: str, @@ -67,53 +73,68 @@ def unavailable_check_machine_availability(self: Cluster, machine_type: str, ins return False -def successful_sync(self: VirtualMachine): +def successful_sync_with_memory(memory): + def _sync(self): + return successful_sync(self, memory) + + return _sync + + +def successful_sync(self: VirtualMachine, memory: int | None = None): self.status = "Running" if not self.volumes: self.append( "volumes", {"volume_id": "vol-123456", "size": 20, "volume_type": "gp2", "device": "/dev/sda1"} ) + if memory: + self.ram = memory self.save() self.update_servers() -def successful_ping_ansible(self: BaseServer): - create_test_ansible_play("Ping Server", "ping.yml", self.doctype, self.name) +def successful_ping_ansible(self: BaseServer, *args, **kwargs): + return create_test_ansible_play("Ping Server", "ping.yml", self.doctype, self.name) -def successful_upgrade_mariadb(self: DatabaseServer): - create_test_ansible_play("Upgrade MariaDB", "upgrade_mariadb.yml", self.doctype, self.name) +def successful_upgrade_mariadb(self: DatabaseServer, *args, **kwargs): + return create_test_ansible_play("Upgrade MariaDB", "upgrade_mariadb.yml", self.doctype, self.name) -def successful_upgrade_mariadb_patched(self: DatabaseServer): - create_test_ansible_play( +def successful_upgrade_mariadb_patched(self: DatabaseServer, *args, **kwargs): + return create_test_ansible_play( "Upgrade MariaDB Patched", "upgrade_mariadb_patched.yml", self.doctype, self.name ) -def successful_tls_certificate(self: BaseServer): - create_test_ansible_play("Setup TLS Certificates", "tls.yml", self.doctype, self.name) +def successful_tls_certificate(self: BaseServer, *args, **kwargs): + return create_test_ansible_play("Setup TLS Certificates", "tls.yml", self.doctype, self.name) -def successful_update_agent_ansible(self: BaseServer): - create_test_ansible_play("Update Agent", "update_agent.yml", self.doctype, self.name) +def successful_update_agent_ansible(self: BaseServer, *args, **kwargs): + return create_test_ansible_play("Update Agent", "update_agent.yml", self.doctype, self.name) -def successful_wait_for_cloud_init(self: BaseServer): - create_test_ansible_play( +def successful_wait_for_cloud_init(self: BaseServer, *args, **kwargs): + return create_test_ansible_play( "Wait for Cloud Init to finish", "wait_for_cloud_init.yml", self.doctype, self.name ) @patch.object(VirtualMachineImage, "client", new=MagicMock()) @patch.object(VirtualMachine, "client", new=MagicMock()) +@patch.object(VirtualMachine, "provision", new=successful_provision) +@patch.object(VirtualMachine, "sync", new=successful_sync) @patch.object(Ansible, "run", new=Mock()) @patch.object(BaseServer, "ping_ansible", new=successful_ping_ansible) @patch.object(DatabaseServer, "upgrade_mariadb", new=successful_upgrade_mariadb) -@patch.object(DatabaseServer, "upgrade_mariadb_patched", new=successful_upgrade_mariadb_patched) +@patch.object(DatabaseServer, "_upgrade_mariadb", new=successful_upgrade_mariadb) +@patch.object(DatabaseServer, "upgrade_mariadb_patched", new=successful_upgrade_mariadb) +@patch.object(DatabaseServer, "_upgrade_mariadb_patched", new=successful_upgrade_mariadb_patched) @patch.object(BaseServer, "wait_for_cloud_init", new=successful_wait_for_cloud_init) +@patch.object(BaseServer, "_wait_for_cloud_init", new=successful_wait_for_cloud_init) @patch.object(BaseServer, "update_tls_certificate", new=successful_tls_certificate) @patch.object(BaseServer, "update_agent_ansible", new=successful_update_agent_ansible) +@patch.object(BaseServer, "_update_agent_ansible", new=successful_update_agent_ansible) @patch.object(Cluster, "check_machine_availability", new=available_check_machine_availability) class TestAPIServer(FrappeTestCase): @patch.object(Cluster, "provision_on_aws_ec2", new=Mock()) @@ -140,12 +161,13 @@ def test_create_new_server_creates_pending_server_and_db_server(self): create_test_virtual_machine_image( cluster=self.cluster, series="f" ) # call from here and not setup, so mocks work + frappe.set_user(self.team.user) - servers_before = self._get_doc_count("Server", "Pending", self.team.name) - db_servers_before = self._get_doc_count("Database Server", "Pending", self.team.name) + servers_before = self._get_doc_count("Server", "Active", self.team.name) + db_servers_before = self._get_doc_count("Database Server", "Active", self.team.name) - new( + response = new( { "cluster": self.cluster.name, "db_plan": self.db_plan.name, @@ -154,8 +176,20 @@ def test_create_new_server_creates_pending_server_and_db_server(self): } ) - servers_after = self._get_doc_count("Server", "Pending", self.team.name) - db_servers_after = self._get_doc_count("Database Server", "Pending", self.team.name) + server_name = response["server"] + database_server_name = frappe.db.get_value("Server", server_name, "database_server") + + create_app_server_press_job: PressJob = frappe.get_last_doc( + "Press Job", {"server_type": "Server", "server": server_name} + ) + create_db_server_press_job: PressJob = frappe.get_last_doc( + "Press Job", {"server_type": "Database Server", "server": database_server_name} + ) + self.assertEqual(create_app_server_press_job.status, "Success") + self.assertEqual(create_db_server_press_job.status, "Success") + + servers_after = self._get_doc_count("Server", "Active", self.team.name) + db_servers_after = self._get_doc_count("Database Server", "Active", self.team.name) self.assertEqual(servers_before + 1, servers_after) self.assertEqual(db_servers_before + 1, db_servers_after) @@ -232,6 +266,7 @@ def test_new_fn_creates_server_with_active_subscription(self): @patch.object(VirtualMachine, "provision", new=successful_provision) @patch.object(VirtualMachine, "sync", new=successful_sync) + @patch.object(ResizeServerJob, "wait_for_virtual_machine_to_stop", new=mock.Mock()) def test_change_plan_changes_plan_of_server_and_updates_subscription_doc(self): create_test_virtual_machine_image(cluster=self.cluster, series="m") create_test_virtual_machine_image( @@ -259,10 +294,11 @@ def test_change_plan_changes_plan_of_server_and_updates_subscription_doc(self): "Press Job", {"status": "Running"}, "status", "Success" ) # Mark running jobs as success as extra steps we don't check - change_plan( - server.name, - app_plan_2.name, - ) + with patch.object(VirtualMachine, "sync", new=successful_sync_with_memory(app_plan_2.memory)): + change_plan( + server.name, + app_plan_2.name, + ) server.reload() app_subscription = frappe.get_doc( diff --git a/press/press/doctype/press_job/press_job.py b/press/press/doctype/press_job/press_job.py index 552beb92114..a5148da225b 100644 --- a/press/press/doctype/press_job/press_job.py +++ b/press/press/doctype/press_job/press_job.py @@ -124,6 +124,21 @@ def virtual_machine_doc(self) -> VirtualMachine | None: self._virtual_machine_doc = frappe.get_doc("Virtual Machine", self.virtual_machine) return self._virtual_machine_doc # type: ignore + @property + def steps(self) -> list[dict[str, str]]: + try: + workflow = frappe.get_last_doc("Press Workflow", {"linked_docname": self.name}) + return [ + { + "method": step.step_method, + "title": step.step_title, + "status": step.status, + } + for step in workflow.steps + ] + except frappe.DoesNotExistError: + return [] + def before_insert(self): frappe.db.get_value(self.server_type, self.server, "status", for_update=True) if existing_jobs := frappe.db.get_all( diff --git a/press/utils/test.py b/press/utils/test.py index 7de9df6d505..4f8e907618a 100644 --- a/press/utils/test.py +++ b/press/utils/test.py @@ -1,5 +1,6 @@ """Utility methods for writing tests""" +import os import sys from collections.abc import Callable from urllib.parse import urlparse, urlunparse @@ -7,6 +8,8 @@ import frappe import requests +_workflow_log_buffer: list[str] = [] + def foreground_enqueue_doc_with_user(run_as_user: str): def wrapper(*args, **kwargs): @@ -47,7 +50,7 @@ def foreground_enqueue_doc( getattr(frappe.get_doc(doctype, docname), method)(**kwargs) -def _foreground_run_workflow_doc(doctype: str, docname: str, job_id: str) -> None: +def _foreground_run_workflow_doc(doctype: str, docname: str, job_id: str, max_retries: int = 50) -> None: # noqa: C901 """ Tracks in-flight job IDs to prevent direct recursion. When the same job_id is re-enqueued while it is already on the call-stack the request is deferred; @@ -63,17 +66,21 @@ def _foreground_run_workflow_doc(doctype: str, docname: str, job_id: str) -> Non in_flight: set = frappe.local._fg_wf_in_flight pending: dict = frappe.local._fg_wf_pending + log_immediate = os.environ.get("PRESS_LOG_WORKFLOW_DEBUG_INFO") in ("1", "true", "True") + + def _log(msg: str) -> None: + _workflow_log_buffer.append(msg) + if log_immediate: + print(msg, file=sys.stderr, flush=True) + if job_id in in_flight: # Already executing this job - defer until the outermost call drains it. - print( - f"[FG] DEFER {job_id} (in-flight: {sorted(in_flight)})", - file=sys.stderr, - flush=True, - ) + _log(f"[WORKFLOW] DEFER {job_id} (in-flight: {sorted(in_flight)})") pending[job_id] = (doctype, docname) return - print(f"[FG] START {job_id}", file=sys.stderr, flush=True) + _log(f"[WORKFLOW] START {job_id}") + in_flight.add(job_id) method_title = "unknown_method" try: @@ -83,46 +90,38 @@ def _foreground_run_workflow_doc(doctype: str, docname: str, job_id: str) -> Non if hasattr(doc, "main_method_title") else (doc.method_title if hasattr(doc, "method_title") else "unknown_method") ) - print( - f"[FG] RUN {job_id} {method_title} | status={getattr(doc, 'status', '?')}", - file=sys.stderr, - flush=True, - ) + _log(f"[WORKFLOW] RUN {job_id} {method_title} | status={getattr(doc, 'status', '?')}") doc.run() - print( - f"[FG] DONE {job_id} {method_title} | status={getattr(frappe.get_doc(doctype, docname), 'status', '?')}", - file=sys.stderr, - flush=True, + _log( + f"[WORKFLOW] DONE {job_id} {method_title} | status={getattr(frappe.get_doc(doctype, docname), 'status', '?')}" ) # Drain any re-enqueue requests that arrived while this job was running. retry = 0 while job_id in pending: retry += 1 + if retry > max_retries: + _log( + f"[WORKFLOW] MAX RETRIES EXCEEDED for {job_id} {method_title} | pending={list(pending.keys())}" + ) + break pending.pop(job_id) - print(f"[FG] RETRY {job_id} {method_title} (#{retry})", file=sys.stderr, flush=True) + _log(f"[WORKFLOW] RETRY {job_id} {method_title} (#{retry})") doc = frappe.get_doc(doctype, docname) - print( - f"[FG] RUN {job_id} {method_title} | status={getattr(doc, 'status', '?')} (retry #{retry})", - file=sys.stderr, - flush=True, + _log( + f"[WORKFLOW] RUN {job_id} {method_title} | status={getattr(doc, 'status', '?')} (retry #{retry})" ) doc.run() - print( - f"[FG] DONE {job_id} {method_title} | status={getattr(frappe.get_doc(doctype, docname), 'status', '?')} (retry #{retry})", - file=sys.stderr, - flush=True, + _log( + f"[WORKFLOW] DONE {job_id} {method_title} | status={getattr(frappe.get_doc(doctype, docname), 'status', '?')} (retry #{retry})" ) + except Exception: + raise finally: - print( - f"[FG] FINISH {job_id} {method_title} | pending={list(pending.keys())}", - file=sys.stderr, - flush=True, - ) + _log(f"[WORKFLOW] FINISH {job_id} {method_title} | pending={list(pending.keys())}") in_flight.discard(job_id) def foreground_enqueue_task(task_name: str) -> None: - print(f"[FG] enqueue_task({task_name})", file=sys.stderr, flush=True) _foreground_run_workflow_doc( "Press Workflow Task", task_name, @@ -131,12 +130,18 @@ def foreground_enqueue_task(task_name: str) -> None: def foreground_enqueue_workflow(workflow_name: str) -> None: - print(f"[FG] enqueue_workflow({workflow_name})", file=sys.stderr, flush=True) + log_immediate = os.environ.get("PRESS_LOG_WORKFLOW_DEBUG_INFO") in ("1", "true", "True") + _workflow_log_buffer.clear() _foreground_run_workflow_doc( "Press Workflow", workflow_name, f"press_workflow||{workflow_name}||run", ) + if not log_immediate: + doc = frappe.get_doc("Press Workflow", workflow_name) + if getattr(doc, "status", None) == "Failure": + for msg in _workflow_log_buffer: + print(msg, file=sys.stderr, flush=True) def foreground_enqueue( diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.py b/press/workflow_engine/doctype/press_workflow/press_workflow.py index 3c10461e41f..22c2b0bef46 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.py +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.py @@ -146,7 +146,10 @@ def run(self): # noqa: C901 - best to keep it in one place self.update_skipped_steps_status(save=False) self.save() - self.execute_callback_in_background() + if frappe.flags.in_test: + self.execute_callback() + else: + self.execute_callback_in_background() def execute_callback_in_background(self): frappe.enqueue_doc( diff --git a/press/workflow_engine/doctype/press_workflow/workflow_builder.py b/press/workflow_engine/doctype/press_workflow/workflow_builder.py index b9ea57cd914..ae7bf5906c1 100644 --- a/press/workflow_engine/doctype/press_workflow/workflow_builder.py +++ b/press/workflow_engine/doctype/press_workflow/workflow_builder.py @@ -26,6 +26,7 @@ ) if TYPE_CHECKING: + from press.workflow_engine.doctype.press_workflow.press_workflow import PressWorkflow from press.workflow_engine.doctype.press_workflow_task.press_workflow_task import ( PressWorkflowTask, ) @@ -180,7 +181,7 @@ def resolve_context(self) -> None: current_workflow = getattr(frappe.flags, "current_press_workflow", None) if current_workflow: self.workflow_name = str(current_workflow) - self.workflow_doc = frappe.get_doc("Press Workflow", self.workflow_name) # type: ignore + self.workflow_doc: PressWorkflow = frappe.get_doc("Press Workflow", self.workflow_name) # type: ignore if self.kv_store_type != "workflow_store": # Store type is changing — discard any cached in-memory store. self.kv_store_type = "workflow_store" From ea0d7856d6ba6b3af6e749022be228531e5acd75 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Sat, 18 Apr 2026 21:11:23 +0530 Subject: [PATCH 09/22] fix(release-pipeline): Don't override workflow_name The workflow_name param is configured by WorkflowBuilder class automatically --- press/press/doctype/release_pipeline/release_pipeline.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/press/press/doctype/release_pipeline/release_pipeline.py b/press/press/doctype/release_pipeline/release_pipeline.py index e8aabe9feab..3e0b917b12a 100644 --- a/press/press/doctype/release_pipeline/release_pipeline.py +++ b/press/press/doctype/release_pipeline/release_pipeline.py @@ -162,12 +162,6 @@ def update_pipeline_status( def release_group_doc(self) -> "ReleaseGroup": return frappe.get_doc("Release Group", self.release_group) - @cached_property - def workflow_name(self) -> str: - return frappe.db.get_value( - "Press Workflow", {"linked_doctype": "Release Pipeline", "linked_docname": self.name}, "name" - ) - @task def validate_app_hashes(self, apps: list[dict[str, str]]): """Validate App Hashes""" @@ -498,6 +492,7 @@ def orchestrate_build_monitoring(self, deploy_candidate: str, primary_build: str # Wait for sometime for the secondary build to be created in case of any delays in build scheduling self.defer_current_task(f"Waiting for secondary build to be created for {deploy_candidate}") + assert secondary_build, "Secondary build should be present for candidates requiring 2 builds" self.monitor_pre_build_validation(secondary_build) self.monitor_build_success(secondary_build) From 01f375dfec7a17b24b21382371a356d2c3dbb56d Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Sat, 18 Apr 2026 21:37:15 +0530 Subject: [PATCH 10/22] feat(press-job): Add option to retry --- .../doctype/press_job/jobs/create_server.py | 2 + press/press/doctype/press_job/press_job.js | 40 ++++++++----------- press/press/doctype/press_job/press_job.py | 11 +++++ 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/press/press/doctype/press_job/jobs/create_server.py b/press/press/doctype/press_job/jobs/create_server.py index add0a81b700..fcdc1e101bb 100644 --- a/press/press/doctype/press_job/jobs/create_server.py +++ b/press/press/doctype/press_job/jobs/create_server.py @@ -63,6 +63,8 @@ def is_fs_server(self): @task def provision_server(self): machine = self.virtual_machine_doc + if machine.status != "Draft": + return machine.provision() @task diff --git a/press/press/doctype/press_job/press_job.js b/press/press/doctype/press_job/press_job.js index e25deb104ec..41afd743164 100644 --- a/press/press/doctype/press_job/press_job.js +++ b/press/press/doctype/press_job/press_job.js @@ -3,29 +3,21 @@ frappe.ui.form.on('Press Job', { refresh: function (frm) { - [ - [__('Force Continue'), 'force_continue', frm.doc.status === 'Failure'], - [__('Force Fail'), 'force_fail', frm.doc.status === 'Running'], - [ - __('Mark Callback Failure Issue Resolved'), - 'mark_callback_failure_issue_resolved', - frm.doc.callback_failed && - !frm.doc.callback_executed && - !frm.doc.callback_failure_issue_resolved, - ], - ].forEach(([label, method, condition]) => { - if (condition) { - frm.add_custom_button( - label, - () => { - frappe.confirm( - `Are you sure you want to ${label.toLowerCase()}?`, - () => frm.call(method).then(() => frm.refresh()), - ); - }, - __('Actions'), - ); - } - }); + [[__('Retry'), 'retry', frm.doc.status === 'Failed']].forEach( + ([label, method, condition]) => { + if (condition) { + frm.add_custom_button( + label, + () => { + frappe.confirm( + `Are you sure you want to ${label.toLowerCase()}?`, + () => frm.call(method).then(() => frm.refresh()), + ); + }, + __('Actions'), + ); + } + }, + ); }, }); diff --git a/press/press/doctype/press_job/press_job.py b/press/press/doctype/press_job/press_job.py index a5148da225b..c5156796de2 100644 --- a/press/press/doctype/press_job/press_job.py +++ b/press/press/doctype/press_job/press_job.py @@ -206,3 +206,14 @@ def on_workflow_failure(self, workflow: "PressWorkflow"): if hasattr(self, "on_press_job_failure"): self.on_press_job_failure(workflow) + + @frappe.whitelist() + def retry(self): + if self.status != "Failure": + frappe.throw("Only workflows in Failure state can be retried.") # nosemgrep + return + + self.status = "Pending" + self.save() + self.start_workflow() + frappe.db.commit() # nosemgrep From d4bc7516f286b47bac90a289fd557daf6d6f3266 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 11:15:33 +0530 Subject: [PATCH 11/22] feat(press-job): Make functions idempotent --- .../doctype/press_job/jobs/archive_server.py | 12 ++++ .../doctype/press_job/jobs/create_server.py | 59 +++++++++++-------- .../press_job/jobs/create_server_snapshot.py | 12 ++++ .../doctype/press_job/jobs/resize_server.py | 24 +++++++- .../press_job/jobs/stop_and_start_server.py | 12 ++++ 5 files changed, 91 insertions(+), 28 deletions(-) diff --git a/press/press/doctype/press_job/jobs/archive_server.py b/press/press/doctype/press_job/jobs/archive_server.py index f5be638da93..e5c99555bb2 100644 --- a/press/press/doctype/press_job/jobs/archive_server.py +++ b/press/press/doctype/press_job/jobs/archive_server.py @@ -15,10 +15,22 @@ def execute(self): @task def disable_termination_protection(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Terminated": + return + self.virtual_machine_doc.disable_termination_protection() @task(queue="long", timeout=600) def terminate_virtual_machine(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Terminated": + return + self.virtual_machine_doc.terminate() @task diff --git a/press/press/doctype/press_job/jobs/create_server.py b/press/press/doctype/press_job/jobs/create_server.py index fcdc1e101bb..78f7ce794b4 100644 --- a/press/press/doctype/press_job/jobs/create_server.py +++ b/press/press/doctype/press_job/jobs/create_server.py @@ -1,4 +1,4 @@ -import time +import contextlib from typing import TYPE_CHECKING import frappe @@ -113,6 +113,10 @@ def create_volume_from_snapshot(self): if not self.virtual_machine_doc.data_disk_snapshot: return + if self.virtual_machine_doc.data_disk_snapshot_volume_id: + # Volume has already been created from the snapshot, proceed to attach it + return + max_retries = self.arguments_dict.get("max_volume_creation_retries", 6) if self.kv.get("volume_creation_attempts", 0) >= max_retries: raise Exception(f"Failed to create volume from snapshot after {max_retries} retries") @@ -126,38 +130,41 @@ def create_volume_from_snapshot(self): @task def attach_snapshotted_volume(self): - vm = frappe.get_doc("Virtual Machine", self.virtual_machine) - if not vm.data_disk_snapshot: + if not self.virtual_machine_doc.data_disk_snapshot: return - while True: - is_attached = vm.check_and_attach_data_disk_snapshot_volume() - if is_attached: - return - time.sleep(10) - vm = frappe.get_doc("Virtual Machine", self.virtual_machine) + if self.virtual_machine_doc.data_disk_snapshot_attached: + # Volume has already been attached, proceed to sync it + return + + try: + self.virtual_machine_doc.check_and_attach_data_disk_snapshot_volume() + except (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError): + self.defer_current_task() @task def sync_attached_volumes(self): - server = self.server_doc - if server.provider != "AWS EC2" or not frappe.db.get_value( - "Virtual Machine", server.virtual_machine, "data_disk_snapshot" - ): + if not self.virtual_machine_doc.data_disk_snapshot: return - while True: - time.sleep(10) - try: - vm = frappe.get_doc("Virtual Machine", server.virtual_machine) - vm.sync() - if len(vm.volumes) == 0 or (vm.data_disk_snapshot_attached and len(vm.volumes) == 1): - continue - server.reload() - server.validate_mounts() - server.save() - break - except (frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError): - continue + with contextlib.suppress( + frappe.QueryDeadlockError, frappe.QueryTimeoutError, frappe.TimestampMismatchError + ): + self.virtual_machine_doc.sync() + if ( + self.virtual_machine_doc.data_disk_snapshot_attached + and len(self.virtual_machine_doc.volumes) == 1 + ) or ( + not self.virtual_machine_doc.data_disk_snapshot_attached + and len(self.virtual_machine_doc.volumes) == 0 + ): + self.defer_current_task() + return + + server = self.server_doc + server.reload() + server.validate_mounts() + server.save() @task(queue="long", timeout=7200) def mount_snapshotted_volume(self): diff --git a/press/press/doctype/press_job/jobs/create_server_snapshot.py b/press/press/doctype/press_job/jobs/create_server_snapshot.py index da5e7502749..584fb3954c5 100644 --- a/press/press/doctype/press_job/jobs/create_server_snapshot.py +++ b/press/press/doctype/press_job/jobs/create_server_snapshot.py @@ -23,6 +23,12 @@ def execute(self): @task def stop_virtual_machine(self): machine = self.virtual_machine_doc + with suppress(Exception): + machine.sync() + + if machine.status == "Stopped": + return + machine.stop() @task @@ -42,6 +48,12 @@ def create_snapshot(self): @task def start_virtual_machine(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Running": + return + try: self.virtual_machine_doc.start() except Exception: diff --git a/press/press/doctype/press_job/jobs/resize_server.py b/press/press/doctype/press_job/jobs/resize_server.py index 19cfa0f6c24..c12acf2d169 100644 --- a/press/press/doctype/press_job/jobs/resize_server.py +++ b/press/press/doctype/press_job/jobs/resize_server.py @@ -30,6 +30,12 @@ def execute(self): @task def stop_virtual_machine(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Stopped": + return + self.virtual_machine_doc.stop() @task @@ -44,15 +50,29 @@ def wait_for_virtual_machine_to_stop(self): @task def resize_virtual_machine(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if ( + self.arguments_dict.get("upgrade_disk", False) + and self.virtual_machine_doc.machine_type == self.arguments_dict.machine_type + ): + return + self.virtual_machine_doc.resize( self.arguments_dict.machine_type, self.arguments_dict.get("upgrade_disk", False) ) @task def start_virtual_machine(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Running": + return + try: - if self.virtual_machine_doc.status != "Running": - self.virtual_machine_doc.start() + self.virtual_machine_doc.start() except Exception: self.defer_current_task() diff --git a/press/press/doctype/press_job/jobs/stop_and_start_server.py b/press/press/doctype/press_job/jobs/stop_and_start_server.py index 74d93d7502b..5dbbdc55d1b 100644 --- a/press/press/doctype/press_job/jobs/stop_and_start_server.py +++ b/press/press/doctype/press_job/jobs/stop_and_start_server.py @@ -17,6 +17,12 @@ def execute(self): @task def stop_virtual_machine(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Stopped": + return + self.virtual_machine_doc.stop() @task @@ -31,6 +37,12 @@ def wait_for_virtual_machine_to_stop(self): @task def start_virtual_machine(self): + with suppress(Exception): + self.virtual_machine_doc.sync() + + if self.virtual_machine_doc.status == "Running": + return + self.virtual_machine_doc.start() @task From 3fc42f001f1c334b987664134e475b5890940560 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 11:43:10 +0530 Subject: [PATCH 12/22] feat(press-workflow): Add Support for requesting force failure --- .../doctype/press_workflow/press_workflow.js | 25 +++++++++++++++---- .../press_workflow/press_workflow.json | 13 ++++++++-- .../doctype/press_workflow/press_workflow.py | 12 +++++++++ .../press_workflow_task.py | 6 ++++- 4 files changed, 48 insertions(+), 8 deletions(-) diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.js b/press/workflow_engine/doctype/press_workflow/press_workflow.js index 6f82d82f35d..9e7f1967396 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.js +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.js @@ -1,8 +1,23 @@ // Copyright (c) 2026, Frappe and contributors // For license information, please see license.txt -// frappe.ui.form.on("Press Workflow", { -// refresh(frm) { - -// }, -// }); +frappe.ui.form.on('Press Workflow', { + refresh(frm) { + if (frm.doc.status === 'Running') { + frm.add_custom_button( + 'Force Fail', + () => { + frappe.confirm( + 'Are you sure you want to force fail this workflow? This action cannot be undone.', + () => { + frm.call('force_fail').then(() => { + frm.reload_doc(); + }); + }, + ); + }, + 'Actions', + ); + } + }, +}); diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.json b/press/workflow_engine/doctype/press_workflow/press_workflow.json index 306ac0b52b7..0d6b9a10455 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.json +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.json @@ -6,6 +6,7 @@ "engine": "InnoDB", "field_order": [ "status", + "is_force_failure_requested", "column_break_lkci", "linked_doctype", "column_break_xuyw", @@ -41,7 +42,7 @@ "section_break_xglm", "stdout", "traceback", - "callback_traceback" + "callback_traceback", "workflow_traceback" ], "fields": [ @@ -274,9 +275,17 @@ { "fieldname": "column_break_gteb", "fieldtype": "Column Break" + }, + { "fieldname": "workflow_traceback", "fieldtype": "Long Text", "label": "Workflow Traceback" + }, + { + "default": "0", + "fieldname": "is_force_failure_requested", + "fieldtype": "Check", + "label": "Force Failure Requested" } ], "grid_page_length": 50, @@ -287,7 +296,7 @@ "link_fieldname": "workflow" } ], - "modified": "2026-04-23 19:16:29.284785", + "modified": "2026-04-24 11:33:09.864201", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow", diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.py b/press/workflow_engine/doctype/press_workflow/press_workflow.py index 94ac2b910aa..efb796f7c49 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.py +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.py @@ -48,6 +48,7 @@ class PressWorkflow(Document): duration: DF.Duration | None end: DF.Datetime | None exception: DF.Link | None + is_force_failure_requested: DF.Check key_value_store: DF.Table[PressWorkflowKV] kwargs: DF.Link | None linked_docname: DF.DynamicLink @@ -75,6 +76,14 @@ def after_insert(self): def on_trash(self): frappe.db.delete("Press Workflow Task", {"workflow": self.name}) + @frappe.whitelist() + def force_fail(self): + if self.status in ["Success", "Failure", "Fatal"]: + frappe.throw("Cannot force fail a workflow that has already completed.") + return + + frappe.db.set_value(self.doctype, self.name, "is_force_failure_requested", True) + def run(self): # noqa: C901 - best to keep it in one place if not self.linked_doctype or not self.linked_docname: frappe.throw("Cannot run flow without linked_doctype and linked_docname", frappe.ValidationError) @@ -111,6 +120,9 @@ def run(self): # noqa: C901 - best to keep it in one place frappe.db.commit() # nosemgrep try: + if self.is_force_failure_requested: + raise Exception("Workflow was forcefully failed based on user request.") + with redirect_stdout(buffer): result = getattr(reference_doc, self.main_method_name)(*args, **kwargs) diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py index 33b664a94e2..e8f605ff8ff 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py @@ -91,7 +91,7 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac workflow_info = frappe.get_value( "Press Workflow", self.workflow, - ["name", "status", "linked_docname", "linked_doctype"], + ["name", "status", "linked_docname", "linked_doctype", "is_force_failure_requested"], as_dict=True, ) @@ -142,6 +142,10 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac existing_task_signature = reference_doc.current_task_signature try: reference_doc.current_task_signature = self.signature + + if workflow_info.is_force_failure_requested: + raise Exception("Workflow was forcefully failed based on user request.") + with redirect_stdout(buffer): result = getattr(reference_doc, self.method_name)(*args, **kwargs) From 02ea86cc47fb54eb46c93a2653413f5870f91869 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 13:14:44 +0530 Subject: [PATCH 13/22] feat(workflow-kv): Store primitive types as json data To prevent creating too many workflow object --- .../doctype/press_workflow/press_workflow.py | 2 +- .../press_workflow_kv/press_workflow_kv.json | 19 ++- .../press_workflow_kv/press_workflow_kv.py | 23 ++-- press/workflow_engine/test_utils.py | 32 +++++ press/workflow_engine/utils.py | 128 +++++++++++++++++- 5 files changed, 188 insertions(+), 16 deletions(-) diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.py b/press/workflow_engine/doctype/press_workflow/press_workflow.py index efb796f7c49..912f4001a95 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.py +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.py @@ -79,7 +79,7 @@ def on_trash(self): @frappe.whitelist() def force_fail(self): if self.status in ["Success", "Failure", "Fatal"]: - frappe.throw("Cannot force fail a workflow that has already completed.") + frappe.throw("Cannot force fail a workflow that has already completed.") # nosemgrep return frappe.db.set_value(self.doctype, self.name, "is_force_failure_requested", True) diff --git a/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json b/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json index 2d828c74343..57f63a65a2d 100644 --- a/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json +++ b/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json @@ -6,7 +6,8 @@ "engine": "InnoDB", "field_order": [ "key", - "value" + "value", + "type" ], "fields": [ { @@ -18,11 +19,19 @@ "search_index": 1 }, { + "description": "Actual value or link to the object", "fieldname": "value", - "fieldtype": "Link", + "fieldtype": "Data", + "in_list_view": 1, + "label": "Value" + }, + { + "default": "object", + "fieldname": "type", + "fieldtype": "Select", "in_list_view": 1, - "label": "Value", - "options": "Press Workflow Object", + "label": "Type", + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject", "reqd": 1 } ], @@ -30,7 +39,7 @@ "index_web_pages_for_search": 1, "istable": 1, "links": [], - "modified": "2026-03-03 21:15:51.697093", + "modified": "2026-04-24 12:02:00.810732", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow KV", diff --git a/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.py b/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.py index a4d89b28d1b..4dc6dbfc8f1 100644 --- a/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.py +++ b/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.py @@ -7,9 +7,7 @@ import frappe from frappe.model.document import Document -from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( - PressWorkflowObject, -) +from press.workflow_engine.utils import deserialize_value, serialize_and_store_value class KVStoreInterface(abc.ABC): @@ -43,10 +41,16 @@ def set(self, key: str, value: Any, throw_on_error: bool = True): kv_doc.parenttype = self.parent_type kv_doc.key = key - if kv_doc.value: + if kv_doc.value and kv_doc.type == "object": frappe.db.set_value("Press Workflow Object", str(kv_doc.value), "deleted", True) - kv_doc.value = PressWorkflowObject.store(value, throw_on_error=throw_on_error) + value_type, value = serialize_and_store_value(value, throw_on_error=throw_on_error) + if value_type is None: + self.delete(key) + return + + kv_doc.type = value_type + kv_doc.value = value kv_doc.save(ignore_permissions=True) def get(self, key: str) -> Any | None: @@ -54,11 +58,11 @@ def get(self, key: str) -> Any | None: if not kv_name: return None - object_name = frappe.db.get_value("Press Workflow KV", kv_name, "value") - if not object_name: + value, value_type = frappe.db.get_value("Press Workflow KV", kv_name, ["value", "type"]) + if not value: return None - return PressWorkflowObject.get_object(str(object_name)) + return deserialize_value(value_type, value) def delete(self, key: str): kv_name = self._get_kv_record_name(key) @@ -111,5 +115,6 @@ class PressWorkflowKV(Document): parent: DF.Data parentfield: DF.Data parenttype: DF.Data - value: DF.Link + type: DF.Literal["int", "float", "string", "tuple", "list", "dict", "object"] + value: DF.Data | None # end: auto-generated types diff --git a/press/workflow_engine/test_utils.py b/press/workflow_engine/test_utils.py index 7a9e83e47b2..33a3d7ce8b6 100644 --- a/press/workflow_engine/test_utils.py +++ b/press/workflow_engine/test_utils.py @@ -10,9 +10,11 @@ _canonicalize, calculate_duration, called_methods_in_order, + deserialize_value, generate_function_signature, is_func_accept_task_id, method_title, + serialize_and_store_value, ) @@ -144,3 +146,33 @@ def my_func(a, b=2, task_id=None): sig4 = generate_function_signature(my_func, args=(1,), kwargs={"task_id": "123"}) # In this implementation, the payload structure incorporates task_id so the digest will be different. self.assertNotEqual(sig1, sig4) + + def test_serialize_deserialize_json_types(self): + cases = [ + (True, "bool"), + (7, "int"), + (1.5, "float"), + ("value", "string"), + ((1, "a"), "tuple"), + ([1, "a"], "list"), + ({"a": 1}, "dict"), + ] + + for original, expected_type in cases: + with self.subTest(value=original, value_type=expected_type): + value_type, serialized_value = serialize_and_store_value(original) + self.assertEqual(value_type, expected_type) + deserialized_value = deserialize_value(value_type, serialized_value) + self.assertEqual(type(deserialized_value), type(original)) + self.assertEqual(deserialized_value, original) + + def test_serialize_deserialize_exception_as_object(self): + original = ValueError("something went wrong") + value_type, serialized_value = serialize_and_store_value(original) + + self.assertEqual(value_type, "object") + self.assertIsNotNone(serialized_value) + + deserialized = deserialize_value(value_type, serialized_value) + self.assertIsInstance(deserialized, ValueError) + self.assertEqual(str(deserialized), str(original)) diff --git a/press/workflow_engine/utils.py b/press/workflow_engine/utils.py index c97d82048b7..153824a679a 100644 --- a/press/workflow_engine/utils.py +++ b/press/workflow_engine/utils.py @@ -10,7 +10,7 @@ import textwrap from collections.abc import Callable from datetime import datetime -from typing import Any +from typing import Any, Literal from frappe.model.document import Document from frappe.utils import get_datetime @@ -150,3 +150,129 @@ def generate_function_signature(func: Callable, args: tuple, kwargs: dict) -> st blob = json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8") return hashlib.sha256(blob).hexdigest() + + +def _is_serializable_value(value: Any) -> bool: + """Return True if value can round-trip through JSON without loss.""" + if value is None or isinstance(value, bool | str): + return True + if isinstance(value, int): # after bool: bool subclasses int + return True + if isinstance(value, float): + return math.isfinite(value) + if isinstance(value, list | tuple): + return all(_is_serializable_value(v) for v in value) + if isinstance(value, dict): + return all(isinstance(k, str) for k in value) and all( + _is_serializable_value(v) for v in value.values() + ) + return False + + +ValueType = Literal["bool", "int", "float", "string", "tuple", "list", "dict", "object"] + + +def get_type_of_value( + value: Any, +) -> ValueType | None: + if value is None: + return None + + value_type = type(value) + primitive_types: dict[type[Any], ValueType] = { + bool: "bool", + int: "int", + str: "string", + } + primitive_type = primitive_types.get(value_type) + if primitive_type: + return primitive_type + + if value_type is float: + return "float" if math.isfinite(value) else "object" + + container_types: dict[type[Any], ValueType] = { + tuple: "tuple", + list: "list", + dict: "dict", + } + container_type = container_types.get(value_type) + if container_type: + return container_type if _is_serializable_value(value) else "object" + + return "object" + + +def serialize_and_store_value( + value: Any, + throw_on_error: bool = True, +) -> tuple[ValueType | None, str | None]: + """ + Serialize a value to a string for storage, along with its type. + If the value is not JSON-serializable, it will be stored as a PressWorkflowObject and the type will be "object". + """ + + from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import PressWorkflowObject + + value_type = get_type_of_value(value) + if value_type is None: + return None, None + + if value_type == "object": + return value_type, PressWorkflowObject.store(value, throw_on_error=throw_on_error) + + try: + serialized_value = json.dumps(value, sort_keys=True, separators=(",", ":")) + return value_type, serialized_value + except (TypeError, ValueError): + # Fallback to pickling for non-JSON-serializable objects + return "object", PressWorkflowObject.store(value) + + +def deserialize_value( + value_type: ValueType | None, + serialized_value: str | None, +) -> Any: + """ + Deserialize a value from its serialized form based on its type. + + Args: + value_type: The type of the value. + serialized_value: The serialized representation of the value. + + Returns: + The deserialized value. + """ + if value_type is None: + return None + + if value_type == "object": + assert serialized_value is not None + from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( + PressWorkflowObject, + ) + + return PressWorkflowObject.get_object(serialized_value) + + try: + value = json.loads(serialized_value) if serialized_value is not None else None + except (TypeError, ValueError) as e: + raise ValueError(f"Cannot deserialize value of type {value_type!r}") from e + + if value is None: + return None + + value_casters: dict[str, Callable[[Any], Any]] = { + "bool": bool, + "int": int, + "float": float, + "string": str, + "tuple": tuple, + "list": list, + "dict": dict, + } + + try: + return value_casters[value_type](value) + except (KeyError, TypeError, ValueError) as e: + raise ValueError(f"Cannot deserialize value of type {value_type!r}") from e From 72ca62e81d6f5c1cf3364f05b84b199df23b53bb Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 13:52:04 +0530 Subject: [PATCH 14/22] feat(workflow-engine): Store args and kwargs as json if possible --- .../doctype/press_workflow/decorators.py | 13 +++--- .../press_workflow/press_workflow.json | 43 ++++++++++++++----- .../doctype/press_workflow/press_workflow.py | 19 +++++--- .../press_workflow/workflow_builder.py | 9 +++- .../press_workflow_kv/press_workflow_kv.json | 5 ++- .../press_workflow_task.json | 26 ++++++++++- .../press_workflow_task.py | 17 +++++--- 7 files changed, 98 insertions(+), 34 deletions(-) diff --git a/press/workflow_engine/doctype/press_workflow/decorators.py b/press/workflow_engine/doctype/press_workflow/decorators.py index 679eaca1975..c23a7622e83 100644 --- a/press/workflow_engine/doctype/press_workflow/decorators.py +++ b/press/workflow_engine/doctype/press_workflow/decorators.py @@ -11,13 +11,11 @@ from frappe.model.document import Document from press.workflow_engine.doctype.press_workflow.workflow_builder import WorkflowBuilder -from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( - PressWorkflowObject, -) from press.workflow_engine.utils import ( called_methods_in_order, is_func_accept_task_id, method_title, + serialize_and_store_value, ) if typing.TYPE_CHECKING: @@ -169,12 +167,17 @@ def run_as_workflow(self, *args: Any, **kwargs: Any) -> str: seen: set[str] = set() methods = [m for m in methods if not (m[0] in seen or seen.add(m[0]))] # type: ignore[func-returns-value] + args_type, args_value = serialize_and_store_value(args) + kwargs_type, kwargs_value = serialize_and_store_value(kwargs) + return ( frappe.get_doc( { "doctype": "Press Workflow", - "args": PressWorkflowObject.store(args) if args else None, - "kwargs": PressWorkflowObject.store(kwargs) if kwargs else None, + "args": args_value, + "args_type": args_type, + "kwargs": kwargs_value, + "kwargs_type": kwargs_type, "linked_doctype": instance.doctype, # type: ignore "linked_docname": str(instance.name), # type: ignore "main_method_name": self._wrapped.__name__, diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.json b/press/workflow_engine/doctype/press_workflow/press_workflow.json index 0d6b9a10455..6f564454759 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.json +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.json @@ -13,10 +13,12 @@ "linked_docname", "section_break_bicj", "main_method_name", - "main_method_title", - "column_break_ccie", "args", + "args_type", + "column_break_ccie", + "main_method_title", "kwargs", + "kwargs_type", "kv_storage_section", "key_value_store", "section_break_zpgq", @@ -29,6 +31,7 @@ "steps", "section_break_pfpj", "output", + "output_type", "column_break_lhnh", "exception", "callback_section", @@ -96,9 +99,9 @@ }, { "fieldname": "output", - "fieldtype": "Link", + "fieldtype": "Data", "label": "Output", - "options": "Press Workflow Object", + "length": 1000, "read_only": 1 }, { @@ -184,16 +187,16 @@ }, { "fieldname": "args", - "fieldtype": "Link", + "fieldtype": "Data", "label": "Args", - "options": "Press Workflow Object", + "length": 1000, "set_only_once": 1 }, { "fieldname": "kwargs", - "fieldtype": "Link", + "fieldtype": "Data", "label": "Kwargs", - "options": "Press Workflow Object", + "length": 1000, "set_only_once": 1 }, { @@ -286,6 +289,26 @@ "fieldname": "is_force_failure_requested", "fieldtype": "Check", "label": "Force Failure Requested" + }, + { + "fieldname": "args_type", + "fieldtype": "Select", + "label": "Args Type", + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject", + "set_only_once": 1 + }, + { + "fieldname": "kwargs_type", + "fieldtype": "Select", + "label": "Kwargs Type", + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject", + "set_only_once": 1 + }, + { + "fieldname": "output_type", + "fieldtype": "Select", + "label": "Output Type", + "read_only": 1 } ], "grid_page_length": 50, @@ -296,7 +319,7 @@ "link_fieldname": "workflow" } ], - "modified": "2026-04-24 11:33:09.864201", + "modified": "2026-04-24 13:36:34.775783", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow", @@ -339,4 +362,4 @@ "sort_field": "creation", "sort_order": "DESC", "states": [] -} \ No newline at end of file +} diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.py b/press/workflow_engine/doctype/press_workflow/press_workflow.py index 912f4001a95..ffce70a7d5a 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.py +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.py @@ -20,7 +20,7 @@ from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( PressWorkflowObject, ) -from press.workflow_engine.utils import calculate_duration +from press.workflow_engine.utils import calculate_duration, serialize_and_store_value if TYPE_CHECKING: from press.workflow_engine.doctype.press_workflow.workflow_builder import WorkflowBuilder @@ -41,7 +41,8 @@ class PressWorkflow(Document): from press.workflow_engine.doctype.press_workflow_kv.press_workflow_kv import PressWorkflowKV from press.workflow_engine.doctype.press_workflow_step.press_workflow_step import PressWorkflowStep - args: DF.Link | None + args: DF.Data | None + args_type: DF.Literal["int", "float", "string", "tuple", "list", "dict", "object"] callback_next_retry_at: DF.Datetime | None callback_status: DF.Literal["Pending", "Success", "Failure", "Fatal"] callback_traceback: DF.LongText | None @@ -50,14 +51,16 @@ class PressWorkflow(Document): exception: DF.Link | None is_force_failure_requested: DF.Check key_value_store: DF.Table[PressWorkflowKV] - kwargs: DF.Link | None + kwargs: DF.Data | None + kwargs_type: DF.Literal["int", "float", "string", "tuple", "list", "dict", "object"] linked_docname: DF.DynamicLink linked_doctype: DF.Link main_method_name: DF.Data main_method_title: DF.Data max_no_of_callback_attempts: DF.Int no_of_callback_attempts: DF.Int - output: DF.Link | None + output: DF.Data | None + output_type: DF.Literal[None] start: DF.Datetime | None status: DF.Literal["Queued", "Running", "Success", "Failure", "Fatal"] stdout: DF.LongText | None @@ -102,7 +105,8 @@ def run(self): # noqa: C901 - best to keep it in one place self.save() return - output = None + output_value = None + output_type = None exception = None workflow_exception_traceback = None status = "Running" @@ -127,7 +131,7 @@ def run(self): # noqa: C901 - best to keep it in one place result = getattr(reference_doc, self.main_method_name)(*args, **kwargs) if result is not None: - output = PressWorkflowObject.store(result) # type: ignore + output_type, output_value = serialize_and_store_value(result) status = "Success" except PressWorkflowTaskEnqueued: # This is expected when a task is enqueued. @@ -150,7 +154,8 @@ def run(self): # noqa: C901 - best to keep it in one place self.duration = calculate_duration(self.start, self.end) self.status = status - self.output = output + self.output = output_value + self.output_type = output_type self.stdout = (self.stdout or "") + buffer.getvalue() if frappe.flags.in_test and self.stdout: diff --git a/press/workflow_engine/doctype/press_workflow/workflow_builder.py b/press/workflow_engine/doctype/press_workflow/workflow_builder.py index 049a142d38f..edfda65a724 100644 --- a/press/workflow_engine/doctype/press_workflow/workflow_builder.py +++ b/press/workflow_engine/doctype/press_workflow/workflow_builder.py @@ -23,6 +23,7 @@ generate_function_signature, is_func_accept_task_id, method_title, + serialize_and_store_value, ) if TYPE_CHECKING: @@ -86,8 +87,12 @@ def run_task( # noqa: C901 task_doc.method_title = method_title(wrapped) # type: ignore task_doc.signature = signature # type: ignore - task_doc.args = PressWorkflowObject.store(args) if args else None # type: ignore - task_doc.kwargs = PressWorkflowObject.store(kwargs) if kwargs else None # type: ignore + args_type, args_value = serialize_and_store_value(args) + kwargs_type, kwargs_value = serialize_and_store_value(kwargs) + task_doc.args = args_value + task_doc.args_type = args_type + task_doc.kwargs = kwargs_value + task_doc.kwargs_type = kwargs_type task_doc.status = "Queued" # type: ignore task_doc.queue = queue # type: ignore task_doc.timeout = timeout or 0 # type: ignore diff --git a/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json b/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json index 57f63a65a2d..fd7b913a385 100644 --- a/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json +++ b/press/workflow_engine/doctype/press_workflow_kv/press_workflow_kv.json @@ -23,7 +23,8 @@ "fieldname": "value", "fieldtype": "Data", "in_list_view": 1, - "label": "Value" + "label": "Value", + "length": 1000 }, { "default": "object", @@ -39,7 +40,7 @@ "index_web_pages_for_search": 1, "istable": 1, "links": [], - "modified": "2026-04-24 12:02:00.810732", + "modified": "2026-04-24 13:24:11.790831", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow KV", diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json index 463ee0d37f1..3053ea93d5f 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json @@ -14,9 +14,12 @@ "method_name", "method_title", "args", + "args_type", "kwargs", + "kwargs_type", "column_break_fiyw", "output", + "output_type", "exception", "signature", "section_break_jvoo", @@ -182,11 +185,30 @@ "fieldtype": "Long Text", "label": "Traceback", "read_only": 1 + }, + { + "fieldname": "args_type", + "fieldtype": "Select", + "label": "Args Type", + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject" + }, + { + "fieldname": "kwargs_type", + "fieldtype": "Data", + "label": "Kwargs Type", + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject" + }, + { + "fieldname": "output_type", + "fieldtype": "Select", + "label": "Output Type", + "length": 1000, + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject" } ], "grid_page_length": 50, "links": [], - "modified": "2026-04-23 19:21:35.153779", + "modified": "2026-04-24 13:38:30.338341", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow Task", @@ -231,4 +253,4 @@ "sort_field": "creation", "sort_order": "DESC", "states": [] -} \ No newline at end of file +} diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py index e8f605ff8ff..8928b66f5db 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py @@ -16,7 +16,7 @@ from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( PressWorkflowObject, ) -from press.workflow_engine.utils import calculate_duration +from press.workflow_engine.utils import calculate_duration, deserialize_value, serialize_and_store_value if TYPE_CHECKING: from press.workflow_engine.doctype.press_workflow.workflow_builder import WorkflowBuilder @@ -32,13 +32,16 @@ class PressWorkflowTask(Document): from frappe.types import DF args: DF.Link | None + args_type: DF.Literal["int", "float", "string", "tuple", "list", "dict", "object"] duration: DF.Duration | None end: DF.Datetime | None exception: DF.Link | None kwargs: DF.Link | None + kwargs_type: DF.Data | None method_name: DF.Data method_title: DF.Data output: DF.Link | None + output_type: DF.Literal["int", "float", "string", "tuple", "list", "dict", "object"] parent_task: DF.Link | None queue: DF.Data | None signature: DF.Data @@ -103,8 +106,8 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac reference_doc.flags.current_press_workflow_task = self.name try: - args = PressWorkflowObject.get_object(self.args) if self.args else () - kwargs = PressWorkflowObject.get_object(self.kwargs) if self.kwargs else {} + args = deserialize_value(self.args) if self.args else () + kwargs = deserialize_value(self.kwargs) if self.kwargs else {} except Exception as e: self.exception = PressWorkflowObject.store(e, throw_on_error=False) self.status = "Failure" @@ -133,7 +136,8 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac if not frappe.flags.in_test: frappe.db.commit() # nosemgrep - output = None + output_value = None + output_type = None exception = None exception_traceback = None status = "Running" @@ -150,7 +154,7 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac result = getattr(reference_doc, self.method_name)(*args, **kwargs) if result is not None: - output = PressWorkflowObject.store(result) + output_type, output_value = serialize_and_store_value(result) status = "Success" except PressWorkflowTaskEnqueued: @@ -174,7 +178,8 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac self.duration = calculate_duration(self.start, self.end) self.status = status - self.output = output + self.output = output_value + self.output_type = output_type self.exception = exception self.stdout = (self.stdout or "") + buffer.getvalue() self.traceback = exception_traceback or getattr(self, "traceback", None) From 4e064bd6d963a88bfa450eeab51f15351cce3ad9 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 14:10:41 +0530 Subject: [PATCH 15/22] refactor(release-pipeline): Use workflow failure callback --- .../doctype/release_pipeline/release_pipeline.py | 6 +++++- .../press_workflow_task/press_workflow_task.py | 12 +----------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/press/press/doctype/release_pipeline/release_pipeline.py b/press/press/doctype/release_pipeline/release_pipeline.py index fadcec4b479..6b05e2d8b70 100644 --- a/press/press/doctype/release_pipeline/release_pipeline.py +++ b/press/press/doctype/release_pipeline/release_pipeline.py @@ -180,9 +180,10 @@ def update_pipeline_status( "Failure", "Retrying", ], + ignore_permissions: bool = False, ): self.status = status - self.save() + self.save(ignore_permissions=ignore_permissions) if self.status == "Failure": self.send_failure_notification() @@ -625,3 +626,6 @@ def create_release( workflow_status = frappe.db.get_value("Press Workflow", self.workflow, "status") if workflow_status == "Failure": self.update_pipeline_status("Failure") + + def on_workflow_failure(self): + self.update_pipeline_status("Failure", ignore_permissions=True) diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py index 8928b66f5db..e3380a9aa7e 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py @@ -4,7 +4,7 @@ from __future__ import annotations import io -from contextlib import redirect_stdout, suppress +from contextlib import redirect_stdout from typing import TYPE_CHECKING import frappe @@ -78,13 +78,6 @@ def update_tracked_step_status(self): }.get(self.status, "Pending"), ) - def _mark_reference_doc_as_failed(self, reference_doc: WorkflowBuilder): - """In case the link document has a status field try and mark it as failure to reflect the workflow failure.""" - with suppress(Exception): # Try your best but don't fail - if hasattr(reference_doc, "status"): - reference_doc.status = "Failure" - reference_doc.save(ignore_permissions=True) - def run(self): # noqa: C901 - Best to keep workflow execution logic in one place assert self.name, "Task must be saved before it can be run" frappe.get_value( @@ -184,9 +177,6 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac self.stdout = (self.stdout or "") + buffer.getvalue() self.traceback = exception_traceback or getattr(self, "traceback", None) - if self.status == "Failure": - self._mark_reference_doc_as_failed(reference_doc) - if frappe.flags.in_test and self.stdout: print(self.stdout) From ebfba735f4e844a1fc06a61add1ef4db38fc5893 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 14:16:24 +0530 Subject: [PATCH 16/22] fix(press-workflow): In task while deserialize pass the type as well --- press/press/doctype/press_job/press_job.js | 2 +- .../doctype/press_workflow_task/press_workflow_task.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/press/press/doctype/press_job/press_job.js b/press/press/doctype/press_job/press_job.js index 41afd743164..125c8b82525 100644 --- a/press/press/doctype/press_job/press_job.js +++ b/press/press/doctype/press_job/press_job.js @@ -3,7 +3,7 @@ frappe.ui.form.on('Press Job', { refresh: function (frm) { - [[__('Retry'), 'retry', frm.doc.status === 'Failed']].forEach( + [[__('Retry'), 'retry', frm.doc.status === 'Failure']].forEach( ([label, method, condition]) => { if (condition) { frm.add_custom_button( diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py index e3380a9aa7e..eb80263070d 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py @@ -99,8 +99,8 @@ def run(self): # noqa: C901 - Best to keep workflow execution logic in one plac reference_doc.flags.current_press_workflow_task = self.name try: - args = deserialize_value(self.args) if self.args else () - kwargs = deserialize_value(self.kwargs) if self.kwargs else {} + args = deserialize_value(self.args_type, self.args) if self.args else () + kwargs = deserialize_value(self.kwargs_type, self.kwargs) if self.kwargs else {} except Exception as e: self.exception = PressWorkflowObject.store(e, throw_on_error=False) self.status = "Failure" From 7d47f5d728820418c87a8de71b3db392d499f22e Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 14:48:43 +0530 Subject: [PATCH 17/22] fix(workflow-engine): Test cases --- .../doctype/press_workflow/press_workflow.py | 16 +- .../doctype/press_workflow/test_decorators.py | 187 +++++++++++ .../doctype/press_workflow/test_exceptions.py | 61 ++++ .../press_workflow/test_press_workflow.py | 136 ++++++++ .../press_workflow/test_workflow_builder.py | 211 +++++++++++++ .../press_workflow/workflow_builder.py | 26 +- .../test_press_workflow_kv.py | 67 +++- .../test_press_workflow_object.py | 42 ++- .../press_workflow_task.json | 14 +- .../press_workflow_task.py | 47 ++- .../test_press_workflow_task.py | 298 +++++++++++++++++- .../test_press_workflow_test.py | 161 +++++++++- press/workflow_engine/test_utils.py | 141 +++++++++ 13 files changed, 1330 insertions(+), 77 deletions(-) create mode 100644 press/workflow_engine/doctype/press_workflow/test_decorators.py create mode 100644 press/workflow_engine/doctype/press_workflow/test_exceptions.py create mode 100644 press/workflow_engine/doctype/press_workflow/test_workflow_builder.py diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.py b/press/workflow_engine/doctype/press_workflow/press_workflow.py index ffce70a7d5a..b6e2cbc64d3 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.py +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.py @@ -20,7 +20,11 @@ from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( PressWorkflowObject, ) -from press.workflow_engine.utils import calculate_duration, serialize_and_store_value +from press.workflow_engine.utils import ( + calculate_duration, + deserialize_value, + serialize_and_store_value, +) if TYPE_CHECKING: from press.workflow_engine.doctype.press_workflow.workflow_builder import WorkflowBuilder @@ -97,8 +101,8 @@ def run(self): # noqa: C901 - best to keep it in one place reference_doc.workflow_name = self.name reference_doc.flags.in_press_workflow_execution = True - args = PressWorkflowObject.get_object(self.args) if self.args else () - kwargs = PressWorkflowObject.get_object(self.kwargs) if self.kwargs else {} + args = deserialize_value(self.args_type, self.args) or () + kwargs = deserialize_value(self.kwargs_type, self.kwargs) or {} except Exception: self.status = "Fatal" self.traceback = frappe.get_traceback() @@ -227,8 +231,8 @@ def execute_callback(self): self.callback_traceback = frappe.get_traceback() else: self.callback_status = "Failure" - self.callback_next_retry_at = frappe.utils.add_minutes( - now_datetime(), 2**self.no_of_callback_attempts + self.callback_next_retry_at = frappe.utils.add_to_date( + minutes=2**self.no_of_callback_attempts ) self.save() @@ -274,7 +278,7 @@ def get_result(self): if self.status == "Success": if self.output: - return PressWorkflowObject.get_object(self.output) + return deserialize_value(self.output_type, self.output) return None if self.status == "Failure": diff --git a/press/workflow_engine/doctype/press_workflow/test_decorators.py b/press/workflow_engine/doctype/press_workflow/test_decorators.py new file mode 100644 index 00000000000..34ab8961e32 --- /dev/null +++ b/press/workflow_engine/doctype/press_workflow/test_decorators.py @@ -0,0 +1,187 @@ +# Copyright (c) 2026, Frappe and Contributors +# See license.txt + +from unittest.mock import patch + +import frappe +from frappe.model.document import Document +from frappe.tests.utils import FrappeTestCase + +from press.utils.test import foreground_enqueue, foreground_enqueue_doc +from press.workflow_engine.doctype.press_workflow.decorators import ( + BoundFlow, + _in_workflow_execution, + flow, + task, +) +from press.workflow_engine.doctype.press_workflow.workflow_builder import WorkflowBuilder + + +@patch("frappe.enqueue_doc", new=foreground_enqueue_doc) +@patch("frappe.enqueue", new=foreground_enqueue) +@patch("frappe.db.commit", new=lambda: None) +class TestDecorators(FrappeTestCase): + def setUp(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + self.doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 3, + "input_b": 2, + } + ).insert() + + def tearDown(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + self.doc.delete() + + def test_in_workflow_execution_true(self): + class TestDoc(WorkflowBuilder): + pass + + instance = TestDoc({"doctype": "Press Workflow Test"}) + instance.name = "test-name" + + def test_in_workflow_execution_false_no_workflow_name(self): + class TestDoc(WorkflowBuilder): + pass + + instance = TestDoc({"doctype": "Press Workflow Test"}) + instance.name = "test-name" + instance.workflow_name = None + instance.flags.in_press_workflow_execution = True + + self.assertFalse(_in_workflow_execution(instance)) + + def test_in_workflow_execution_false_no_flag(self): + class TestDoc(WorkflowBuilder): + pass + + instance = TestDoc({"doctype": "Press Workflow Test"}) + instance.name = "test-name" + instance.workflow_name = "test-workflow" + instance.flags.in_press_workflow_execution = False + + self.assertFalse(_in_workflow_execution(instance)) + + def test_in_workflow_execution_false_not_workflow_builder(self): + class NotWorkflowBuilder: + pass + + instance = NotWorkflowBuilder() + self.assertFalse(_in_workflow_execution(instance)) + + def test_task_decorator_direct_call(self): + class TestDoc(WorkflowBuilder): + @task + def my_task(self): + return "task result" + + instance = TestDoc({"doctype": "Press Workflow Test"}) + result = instance.my_task() + self.assertEqual(result, "task result") + + def test_task_decorator_with_queue_and_timeout(self): + class TestDoc(WorkflowBuilder): + @task(queue="long", timeout=3600) + def my_task(self): + return "task result" + + instance = TestDoc({"doctype": "Press Workflow Test"}) + result = instance.my_task() + self.assertEqual(result, "task result") + + def test_task_with_task_id(self): + class TestDoc(WorkflowBuilder): + @task + def my_task(self, task_id=None): + return f"task_id={task_id}" + + instance = TestDoc({"doctype": "Press Workflow Test"}) + result = instance.my_task.with_task_id("my-id")() + self.assertEqual(result, "task_id=my-id") + + def test_task_with_task_id_in_workflow(self): + wf_name = self.doc.main_with_task_id_passthrough.run_as_workflow() + wf = frappe.get_doc("Press Workflow", wf_name) + wf.run() + + self.assertEqual(wf.status, "Success") + self.assertEqual(wf.get_result(), 9) + + def test_flow_decorator_normal_call(self): + result = self.doc.main_success() + self.assertEqual(result, "success output") + + def test_flow_decorator_run_as_workflow(self): + wf_name = self.doc.main_success.run_as_workflow() + self.assertTrue(wf_name) + self.assertTrue(frappe.db.exists("Press Workflow", wf_name)) + + def test_flow_decorator_with_args(self): + wf_name = self.doc.flow_with_args.run_as_workflow(x=5, y=10) + wf = frappe.get_doc("Press Workflow", wf_name) + wf.run() + + self.assertEqual(wf.status, "Success") + self.assertEqual(wf.get_result(), 15) + + def test_flow_descriptor_on_non_document_raises(self): + with self.assertRaises((TypeError, RuntimeError)): + + class NotADocument: + @flow + def my_flow(self): + pass + + def test_run_as_workflow_on_non_workflow_builder_raises(self): + class TestDoc(Document): + @flow + def my_flow(self): + return "result" + + instance = TestDoc({"doctype": "Press Workflow Test"}) + instance.name = "test" + instance.doctype = "TestDoc" + + bound_flow = instance.my_flow + self.assertIsInstance(bound_flow, BoundFlow) + + with self.assertRaises(TypeError): + bound_flow.run_as_workflow() + + def test_flow_callable_protocol(self): + bound_flow = self.doc.main_success + self.assertTrue(callable(bound_flow)) + self.assertTrue(hasattr(bound_flow, "run_as_workflow")) + + def test_task_descriptor_class_access(self): + class TestDoc(WorkflowBuilder): + @task + def my_task(self): + return "result" + + self.assertTrue(hasattr(TestDoc, "my_task")) + + def test_task_without_task_id_strips_kwarg(self): + class TestDoc(WorkflowBuilder): + @task + def my_task(self): + return "no task_id" + + instance = TestDoc({"doctype": "Press Workflow Test"}) + result = instance.my_task() + self.assertEqual(result, "no task_id") + + def test_flow_creates_workflow_with_steps(self): + wf_name = self.doc.main_with_task.run_as_workflow() + wf = frappe.get_doc("Press Workflow", wf_name) + + self.assertEqual(wf.linked_doctype, "Press Workflow Test") + self.assertEqual(wf.linked_docname, self.doc.name) + self.assertEqual(wf.main_method_name, "main_with_task") + self.assertTrue(len(wf.steps) > 0) diff --git a/press/workflow_engine/doctype/press_workflow/test_exceptions.py b/press/workflow_engine/doctype/press_workflow/test_exceptions.py new file mode 100644 index 00000000000..daa8460c1ed --- /dev/null +++ b/press/workflow_engine/doctype/press_workflow/test_exceptions.py @@ -0,0 +1,61 @@ +# Copyright (c) 2026, Frappe and Contributors +# See license.txt + +from frappe.tests.utils import FrappeTestCase + +from press.workflow_engine.doctype.press_workflow.exceptions import ( + PressWorkflowFailedError, + PressWorkflowFatalError, + PressWorkflowRunningError, + PressWorkflowTaskEnqueued, +) + + +class TestPressWorkflowExceptions(FrappeTestCase): + def test_press_workflow_task_enqueued_with_task_name(self): + exc = PressWorkflowTaskEnqueued("Task is enqueued", "wf-001", "task-001") + self.assertEqual(str(exc), "Task is enqueued") + self.assertEqual(exc.workflow_name, "wf-001") + self.assertEqual(exc.task_name, "task-001") + + def test_press_workflow_task_enqueued_without_task_name(self): + exc = PressWorkflowTaskEnqueued("Task is enqueued", "wf-001") + self.assertEqual(str(exc), "Task is enqueued") + self.assertEqual(exc.workflow_name, "wf-001") + self.assertIsNone(exc.task_name) + + def test_press_workflow_running_error(self): + exc = PressWorkflowRunningError("Workflow wf-001 is currently running") + self.assertEqual(str(exc), "Workflow wf-001 is currently running") + + def test_press_workflow_failed_error(self): + exc = PressWorkflowFailedError("Workflow failed with no exception") + self.assertEqual(str(exc), "Workflow failed with no exception") + + def test_press_workflow_fatal_error_with_traceback(self): + traceback = "Traceback (most recent call last):\n File 'test.py', line 1" + exc = PressWorkflowFatalError("Fatal error occurred", traceback=traceback) + self.assertEqual(str(exc), "Fatal error occurred") + self.assertEqual(exc.traceback, traceback) + + def test_press_workflow_fatal_error_without_traceback(self): + exc = PressWorkflowFatalError("Fatal error occurred") + self.assertEqual(str(exc), "Fatal error occurred") + self.assertIsNone(exc.traceback) + + def test_exceptions_are_subclasses_of_exception(self): + self.assertTrue(issubclass(PressWorkflowTaskEnqueued, Exception)) + self.assertTrue(issubclass(PressWorkflowRunningError, Exception)) + self.assertTrue(issubclass(PressWorkflowFailedError, Exception)) + self.assertTrue(issubclass(PressWorkflowFatalError, Exception)) + + def test_catch_press_workflow_task_enqueued(self): + with self.assertRaises(PressWorkflowTaskEnqueued) as ctx: + raise PressWorkflowTaskEnqueued("Test message", "wf-001", "task-001") + self.assertEqual(ctx.exception.workflow_name, "wf-001") + self.assertEqual(ctx.exception.task_name, "task-001") + + def test_catch_press_workflow_fatal_error(self): + with self.assertRaises(PressWorkflowFatalError) as ctx: + raise PressWorkflowFatalError("Test fatal", traceback="test traceback") + self.assertEqual(ctx.exception.traceback, "test traceback") diff --git a/press/workflow_engine/doctype/press_workflow/test_press_workflow.py b/press/workflow_engine/doctype/press_workflow/test_press_workflow.py index 510c4dc5939..d0768612b98 100644 --- a/press/workflow_engine/doctype/press_workflow/test_press_workflow.py +++ b/press/workflow_engine/doctype/press_workflow/test_press_workflow.py @@ -107,3 +107,139 @@ def test_flow_with_args(self): wf = self.get_wf(self.doc.flow_with_args.run_as_workflow(x=4, y=5)) self.assertEqual(wf.status, "Success") self.assertEqual(wf.get_result(), 9) + + def test_force_fail(self): + with patch( + "press.workflow_engine.doctype.press_workflow.press_workflow.enqueue_workflow", + new=lambda *_args, **_kwargs: None, + ): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + "status": "Queued", + } + ).insert(ignore_permissions=True) + + wf.force_fail() + self.assertTrue(frappe.db.get_value("Press Workflow", wf.name, "is_force_failure_requested")) + + def test_force_fail_already_completed(self): + wf_name = self.doc.main_success.run_as_workflow() + wf = self.get_wf(wf_name) + self.assertEqual(wf.status, "Success") + + with self.assertRaises(frappe.ValidationError): + wf.force_fail() + + def test_on_trash_deletes_tasks(self): + wf_name = self.doc.main_with_task.run_as_workflow() + wf = self.get_wf(wf_name) + self.assertEqual(wf.status, "Success") + + tasks_before = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + self.assertTrue(len(tasks_before) > 0) + + wf.delete() + tasks_after = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + self.assertEqual(len(tasks_after), 0) + + def test_workflow_fatal_status(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + "status": "Fatal", + "traceback": "Test traceback", + } + ).insert(ignore_permissions=True) + + from press.workflow_engine.doctype.press_workflow.exceptions import PressWorkflowFatalError + + with self.assertRaises(PressWorkflowFatalError) as ctx: + wf.get_result() + self.assertIn("fatal error", str(ctx.exception).lower()) + self.assertEqual(ctx.exception.traceback, "Test traceback") + + def test_workflow_queued_running_error(self): + with patch( + "press.workflow_engine.doctype.press_workflow.press_workflow.enqueue_workflow", + new=lambda *_args, **_kwargs: None, + ): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + "status": "Queued", + } + ).insert(ignore_permissions=True) + + from press.workflow_engine.doctype.press_workflow.exceptions import PressWorkflowRunningError + + with self.assertRaises(PressWorkflowRunningError): + wf.get_result() + + wf.reload() + wf.status = "Running" + wf.save() + with self.assertRaises(PressWorkflowRunningError): + wf.get_result() + + def test_workflow_success_with_none_output(self): + wf_name = self.doc.main_success.run_as_workflow() + wf = self.get_wf(wf_name) + self.assertEqual(wf.status, "Success") + result = wf.get_result() + self.assertEqual(result, "success output") + + def test_workflow_with_skipped_steps(self): + wf_name = self.doc.skipped_steps_flow.run_as_workflow() + wf = self.get_wf(wf_name) + self.assertEqual(wf.status, "Success") + + steps = wf.steps + self.assertTrue(len(steps) > 0) + for step in steps: + self.assertEqual(step.status, "Skipped") + + def test_workflow_as_flow_with_multiple_tasks(self): + wf_name = self.doc.main_as_flow.run_as_workflow() + wf = self.get_wf(wf_name) + self.assertEqual(wf.status, "Success") + self.assertEqual(wf.get_result(), "flow done") + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}, pluck="name") + self.assertTrue(len(tasks) >= 2) + + def test_workflow_with_kwargs(self): + wf_name = self.doc.flow_with_args.run_as_workflow(x=10, y=20) + wf = self.get_wf(wf_name) + self.assertEqual(wf.status, "Success") + self.assertEqual(wf.get_result(), 30) + + def test_workflow_failure_with_no_exception(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + "status": "Failure", + } + ).insert(ignore_permissions=True) + + from press.workflow_engine.doctype.press_workflow.exceptions import PressWorkflowFailedError + + with self.assertRaises(PressWorkflowFailedError) as ctx: + wf.get_result() + self.assertIn("no exception was recorded", str(ctx.exception).lower()) diff --git a/press/workflow_engine/doctype/press_workflow/test_workflow_builder.py b/press/workflow_engine/doctype/press_workflow/test_workflow_builder.py new file mode 100644 index 00000000000..83658beb8b1 --- /dev/null +++ b/press/workflow_engine/doctype/press_workflow/test_workflow_builder.py @@ -0,0 +1,211 @@ +# Copyright (c) 2026, Frappe and Contributors +# See license.txt + +from unittest.mock import patch + +import frappe +from frappe.tests.utils import FrappeTestCase + +from press.utils.test import foreground_enqueue, foreground_enqueue_doc +from press.workflow_engine.doctype.press_workflow.exceptions import PressWorkflowTaskEnqueued +from press.workflow_engine.doctype.press_workflow.workflow_builder import ( + ensure_to_resolve_context, +) +from press.workflow_engine.doctype.press_workflow_kv.press_workflow_kv import ( + InMemoryKVStore, + WorkflowKVStore, +) + + +@patch("frappe.enqueue_doc", new=foreground_enqueue_doc) +@patch("frappe.enqueue", new=foreground_enqueue) +@patch("frappe.db.commit", new=lambda: None) +class TestWorkflowBuilder(FrappeTestCase): + def setUp(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + frappe.db.delete("Press Workflow KV") + self.doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 3, + "input_b": 2, + } + ).insert() + + def tearDown(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + frappe.db.delete("Press Workflow KV") + self.doc.delete() + + def test_kv_property_in_memory_default(self): + kv = self.doc.kv + self.assertIsInstance(kv, InMemoryKVStore) + + def test_kv_property_set_and_get_in_memory(self): + self.doc.kv.set("test_key", "test_value") + self.assertEqual(self.doc.kv.get("test_key"), "test_value") + + def test_kv_property_delete_in_memory(self): + self.doc.kv.set("test_key", "test_value") + self.doc.kv.delete("test_key") + self.assertIsNone(self.doc.kv.get("test_key")) + + def test_kv_property_workflow_store(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + } + ).insert(ignore_permissions=True) + + self.doc.workflow_name = wf.name + self.doc.kv_store_type = "workflow_store" + self.doc.kv_store_reference = None + + kv = self.doc.kv + self.assertIsInstance(kv, WorkflowKVStore) + + def test_resolve_context_with_workflow_name(self): + self.doc.workflow_name = "test-workflow-123" + self.doc.resolve_context() + self.assertEqual(self.doc.workflow_name, "test-workflow-123") + + def test_resolve_context_with_frappe_flag(self): + self.addCleanup(lambda: frappe.flags.pop("current_press_workflow", None)) + frappe.flags.current_press_workflow = "test-workflow-from-flag" + + self.doc.workflow_name = None + self.doc.resolve_context() + + self.assertEqual(self.doc.workflow_name, "test-workflow-from-flag") + del frappe.flags.current_press_workflow + + def test_resolve_context_without_workflow(self): + self.doc.workflow_name = None + self.doc.resolve_context() + + self.assertIsNone(self.doc.workflow_name) + self.assertEqual(self.doc.kv_store_type, "in_memory") + + def test_defer_current_task_outside_workflow(self): + self.doc.flags.in_press_workflow_execution = False + self.doc.defer_current_task("Defer this task") + + def test_defer_current_task_inside_workflow(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + } + ).insert(ignore_permissions=True) + + self.doc.workflow_name = wf.name + self.doc.flags.in_press_workflow_execution = True + self.doc.flags.current_press_workflow_task = "task-001" + + with self.assertRaises(PressWorkflowTaskEnqueued) as ctx: + self.doc.defer_current_task("Please defer") + self.assertEqual(ctx.exception.workflow_name, wf.name) + self.assertEqual(ctx.exception.task_name, "task-001") + + def test_defer_current_task_without_task_name(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + } + ).insert(ignore_permissions=True) + + self.doc.workflow_name = wf.name + self.doc.flags.in_press_workflow_execution = True + + with self.assertRaises(PressWorkflowTaskEnqueued) as ctx: + self.doc.defer_current_task() + self.assertEqual(ctx.exception.workflow_name, wf.name) + self.assertIsNone(ctx.exception.task_name) + + def test_ensure_to_resolve_context_decorator(self): + @ensure_to_resolve_context + def my_method(self): + return "resolved" + + result = my_method(self.doc) + self.assertEqual(result, "resolved") + + def test_run_task_returns_cached_result_on_success(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_args_task", + "main_method_title": "Main With Args Task", + "steps": [ + { + "step_title": "Add", + "step_method": "add", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + self.assertEqual(wf.status, "Success") + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + self.assertEqual(len(tasks), 1) + + def test_run_task_raises_exception_on_failure(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_failing_task", + "main_method_title": "Main With Failing Task", + "steps": [ + { + "step_title": "Sample Failing Task", + "step_method": "sample_failing_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + self.assertEqual(wf.status, "Failure") + + def test_workflow_builder_attributes(self): + self.assertIsNone(self.doc.workflow_name) + self.assertIsNone(self.doc.workflow_doc) + self.assertEqual(self.doc.kv_store_type, "in_memory") + self.assertIsNone(self.doc.kv_store_reference) + self.assertIsNone(self.doc.current_task_signature) + + def test_kv_store_type_change_discards_cache(self): + self.doc.kv.set("key1", "value1") + self.doc.kv_store_reference = InMemoryKVStore() + + self.addCleanup(lambda: frappe.flags.pop("current_press_workflow", None)) + frappe.flags.current_press_workflow = "test-wf-for-kv-change" + self.doc.workflow_name = None + self.doc.resolve_context() + + self.assertEqual(self.doc.kv_store_type, "workflow_store") + self.assertIsNone(self.doc.kv_store_reference) + del frappe.flags.current_press_workflow diff --git a/press/workflow_engine/doctype/press_workflow/workflow_builder.py b/press/workflow_engine/doctype/press_workflow/workflow_builder.py index edfda65a724..8ff28f2b850 100644 --- a/press/workflow_engine/doctype/press_workflow/workflow_builder.py +++ b/press/workflow_engine/doctype/press_workflow/workflow_builder.py @@ -17,9 +17,9 @@ from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( ObjectDeserializeError, ObjectPreviousSerializationFailedError, - PressWorkflowObject, ) from press.workflow_engine.utils import ( + deserialize_value, generate_function_signature, is_func_accept_task_id, method_title, @@ -106,19 +106,17 @@ def run_task( # noqa: C901 # Store the reference of the task in workflow doctype # If it's a nested task, ignore it if not task_doc.parent_task and ( - tracked_step := str( - frappe.db.exists( - "Press Workflow Step", - { - "parenttype": "Press Workflow", - "parent": self.workflow_name, - "step_method": wrapped.__name__, - "task": ("is", "not set"), - }, - ) + tracked_step := frappe.db.exists( + "Press Workflow Step", + { + "parenttype": "Press Workflow", + "parent": self.workflow_name, + "step_method": wrapped.__name__, + "task": ("is", "not set"), + }, ) ): - frappe.db.set_value("Press Workflow Step", tracked_step, "task", task_doc.name) + frappe.db.set_value("Press Workflow Step", str(tracked_step), "task", task_doc.name) task_name = task_doc.name assert task_name, "Task must be saved successfully before it can be run" @@ -133,12 +131,12 @@ def run_task( # noqa: C901 task_doc: PressWorkflowTask = frappe.get_doc("Press Workflow Task", task_name) # type: ignore if task_doc.status == "Success": - return PressWorkflowObject.get_object(task_doc.output) if task_doc.output else None + return deserialize_value(task_doc.output_type, task_doc.output) if task_doc.status == "Failure": if task_doc.exception: try: - exc = PressWorkflowObject.get_object(task_doc.exception) + exc = deserialize_value("object", task_doc.exception) except ObjectPreviousSerializationFailedError as e: raise RuntimeError( f"Task '{task_doc.method_title}' failed. Original exception could not be " diff --git a/press/workflow_engine/doctype/press_workflow_kv/test_press_workflow_kv.py b/press/workflow_engine/doctype/press_workflow_kv/test_press_workflow_kv.py index 3af2845a30b..ddd7a4aed60 100644 --- a/press/workflow_engine/doctype/press_workflow_kv/test_press_workflow_kv.py +++ b/press/workflow_engine/doctype/press_workflow_kv/test_press_workflow_kv.py @@ -36,17 +36,22 @@ def test_workflow_kv_store_set_and_get(self): def test_workflow_kv_store_update(self): self.store.set("test_key", "initial_value") initial_kv_name = self.store._get_kv_record_name("test_key") - initial_obj_name = frappe.db.get_value("Press Workflow KV", initial_kv_name, "value") + initial_type, initial_value = frappe.db.get_value( + "Press Workflow KV", initial_kv_name, ["type", "value"] + ) self.store.set("test_key", "updated_value") updated_kv_name = self.store._get_kv_record_name("test_key") - updated_obj_name = frappe.db.get_value("Press Workflow KV", updated_kv_name, "value") + _, updated_value = frappe.db.get_value("Press Workflow KV", updated_kv_name, ["type", "value"]) self.assertEqual(initial_kv_name, updated_kv_name) - self.assertNotEqual(initial_obj_name, updated_obj_name) + self.assertNotEqual(initial_value, updated_value) - is_deleted = frappe.db.get_value("Press Workflow Object", initial_obj_name, "deleted") - self.assertTrue(is_deleted) + # For JSON-serializable values, no Press Workflow Object is created. + # Only verify object deletion tracking when type is "object". + if initial_type == "object": + is_deleted = frappe.db.get_value("Press Workflow Object", initial_value, "deleted") + self.assertTrue(is_deleted) value = self.store.get("test_key") self.assertEqual(value, "updated_value") @@ -54,16 +59,62 @@ def test_workflow_kv_store_update(self): def test_workflow_kv_store_delete(self): self.store.set("test_key", "to_be_deleted") kv_name = self.store._get_kv_record_name("test_key") - obj_name = frappe.db.get_value("Press Workflow KV", kv_name, "value") + obj_type, obj_name = frappe.db.get_value("Press Workflow KV", kv_name, ["type", "value"]) self.store.delete("test_key") self.assertFalse(frappe.db.exists("Press Workflow KV", kv_name)) - is_deleted = frappe.db.get_value("Press Workflow Object", obj_name, "deleted") - self.assertTrue(is_deleted) + # Only Press Workflow Object documents are marked as deleted. + # JSON-serializable values are stored directly in the KV record. + if obj_type == "object": + is_deleted = frappe.db.get_value("Press Workflow Object", obj_name, "deleted") + self.assertTrue(is_deleted) self.assertIsNone(self.store.get("test_key")) def test_workflow_kv_store_get_nonexistent(self): self.assertIsNone(self.store.get("nonexistent_key")) + + def test_in_memory_kv_store_multiple_keys(self): + store = InMemoryKVStore() + store.set("key1", "value1") + store.set("key2", "value2") + store.set("key3", "value3") + + self.assertEqual(store.get("key1"), "value1") + self.assertEqual(store.get("key2"), "value2") + self.assertEqual(store.get("key3"), "value3") + + def test_in_memory_kv_store_overwrite(self): + store = InMemoryKVStore() + store.set("key", "initial") + store.set("key", "updated") + + self.assertEqual(store.get("key"), "updated") + + def test_in_memory_kv_store_delete_nonexistent(self): + store = InMemoryKVStore() + store.delete("nonexistent") + self.assertIsNone(store.get("nonexistent")) + + def test_workflow_kv_store_with_none_value(self): + self.store.set("null_key", None) + self.assertIsNone(self.store.get("null_key")) + + def test_workflow_kv_store_with_complex_value(self): + value = {"nested": {"data": [1, 2, 3]}, "list": ["a", "b", "c"]} + self.store.set("complex_key", value) + retrieved = self.store.get("complex_key") + self.assertEqual(retrieved, value) + + def test_workflow_kv_store_multiple_keys(self): + self.store.set("key1", "value1") + self.store.set("key2", "value2") + + self.assertEqual(self.store.get("key1"), "value1") + self.assertEqual(self.store.get("key2"), "value2") + + def test_workflow_kv_store_delete_nonexistent(self): + self.store.delete("nonexistent_key") + self.assertIsNone(self.store.get("nonexistent_key")) diff --git a/press/workflow_engine/doctype/press_workflow_object/test_press_workflow_object.py b/press/workflow_engine/doctype/press_workflow_object/test_press_workflow_object.py index f1ef91662e6..b59a170cae5 100644 --- a/press/workflow_engine/doctype/press_workflow_object/test_press_workflow_object.py +++ b/press/workflow_engine/doctype/press_workflow_object/test_press_workflow_object.py @@ -13,8 +13,8 @@ # On IntegrationTestCase, the doctype test records and all # link-field test record dependencies are recursively loaded # Use these module variables to add/remove to/from that list -EXTRA_TEST_RECORD_DEPENDENCIES = [] # eg. ["User"] -IGNORE_TEST_RECORD_DEPENDENCIES = [] # eg. ["User"] +EXTRA_TEST_RECORD_DEPENDENCIES: list[str] = [] # eg. ["User"] +IGNORE_TEST_RECORD_DEPENDENCIES: list[str] = [] # eg. ["User"] class MyCustomClass: @@ -78,3 +78,41 @@ def test_get_summary(self): summary = PressWorkflowObject.get_summary(doc_name) self.assertEqual(summary, str(obj)) + + def test_get_summary_nonexistent(self): + with self.assertRaises(frappe.DoesNotExistError): + PressWorkflowObject.get_summary("nonexistent-doc-name") + + def test_get_object_nonexistent(self): + with self.assertRaises(frappe.DoesNotExistError): + PressWorkflowObject.get_object("nonexistent-doc-name") + + def test_store_and_get_none_value(self): + doc_name = PressWorkflowObject.store(None) + self.assertTrue(doc_name) + retrieved = PressWorkflowObject.get_object(doc_name) + self.assertIsNone(retrieved) + + def test_store_and_get_complex_nested_object(self): + obj = { + "list_of_dicts": [{"a": 1}, {"b": 2}], + "dict_of_lists": {"x": [1, 2], "y": [3, 4]}, + "nested": {"deep": {"deeper": {"value": 42}}}, + } + doc_name = PressWorkflowObject.store(obj) + retrieved = PressWorkflowObject.get_object(doc_name) + self.assertEqual(retrieved, obj) + + def test_delete_trashed_objects(self): + from press.workflow_engine.doctype.press_workflow_object.press_workflow_object import ( + delete_trashed_objects, + ) + + obj = {"key": "value"} + doc_name = PressWorkflowObject.store(obj) + + frappe.db.set_value("Press Workflow Object", doc_name, "deleted", True) + + delete_trashed_objects() + + self.assertFalse(frappe.db.exists("Press Workflow Object", doc_name)) diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json index 3053ea93d5f..8a188322945 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json @@ -131,9 +131,9 @@ }, { "fieldname": "output", - "fieldtype": "Link", + "fieldtype": "Data", "label": "Output", - "options": "Press Workflow Object", + "length": 1000, "read_only": 1 }, { @@ -146,16 +146,16 @@ }, { "fieldname": "args", - "fieldtype": "Link", + "fieldtype": "Data", "label": "Args", - "options": "Press Workflow Object", + "length": 1000, "set_only_once": 1 }, { "fieldname": "kwargs", - "fieldtype": "Link", + "fieldtype": "Data", "label": "Kwargs", - "options": "Press Workflow Object", + "length": 1000, "set_only_once": 1 }, { @@ -208,7 +208,7 @@ ], "grid_page_length": 50, "links": [], - "modified": "2026-04-24 13:38:30.338341", + "modified": "2026-04-24 14:46:19.016442", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow Task", diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py index eb80263070d..a8ec354713f 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.py @@ -31,16 +31,16 @@ class PressWorkflowTask(Document): if TYPE_CHECKING: from frappe.types import DF - args: DF.Link | None + args: DF.Data | None args_type: DF.Literal["int", "float", "string", "tuple", "list", "dict", "object"] duration: DF.Duration | None end: DF.Datetime | None exception: DF.Link | None - kwargs: DF.Link | None + kwargs: DF.Data | None kwargs_type: DF.Data | None method_name: DF.Data method_title: DF.Data - output: DF.Link | None + output: DF.Data | None output_type: DF.Literal["int", "float", "string", "tuple", "list", "dict", "object"] parent_task: DF.Link | None queue: DF.Data | None @@ -63,20 +63,39 @@ def update_tracked_step_status(self): if self.is_new(): return + if self.flags.in_insert: + # Called from run_post_save_methods() after insert, where the in-memory status + # is still the original "Queued" but the task may have already run synchronously + return + if not self.has_value_changed("status"): return - frappe.db.set_value( - "Press Workflow Step", - {"task": self.name}, - "status", - { - "Queued": "Pending", - "Running": "Running", - "Success": "Success", - "Failure": "Failure", - }.get(self.status, "Pending"), - ) + new_status = { + "Queued": "Pending", + "Running": "Running", + "Success": "Success", + "Failure": "Failure", + }.get(self.status, "Pending") + + # Primary lookup: find the step already linked to this task. + step_name = frappe.db.get_value("Press Workflow Step", {"task": self.name}, "name") + + if not step_name: + # Fallback: the step may not yet be linked (e.g. in synchronous test execution + # where after_insert runs the task before run_task sets the step.task reference). + step_name = frappe.db.get_value( + "Press Workflow Step", + { + "parenttype": "Press Workflow", + "parent": self.workflow, + "step_method": self.method_name, + }, + "name", + ) + + if step_name: + frappe.db.set_value("Press Workflow Step", step_name, "status", new_status) def run(self): # noqa: C901 - Best to keep workflow execution logic in one place assert self.name, "Task must be saved before it can be run" diff --git a/press/workflow_engine/doctype/press_workflow_task/test_press_workflow_task.py b/press/workflow_engine/doctype/press_workflow_task/test_press_workflow_task.py index a73f1d47daf..a924f12af17 100644 --- a/press/workflow_engine/doctype/press_workflow_task/test_press_workflow_task.py +++ b/press/workflow_engine/doctype/press_workflow_task/test_press_workflow_task.py @@ -1,20 +1,292 @@ # Copyright (c) 2026, Frappe and Contributors # See license.txt -# import frappe -from frappe.tests import IntegrationTestCase +from unittest.mock import patch -# On IntegrationTestCase, the doctype test records and all -# link-field test record dependencies are recursively loaded -# Use these module variables to add/remove to/from that list -EXTRA_TEST_RECORD_DEPENDENCIES = [] # eg. ["User"] -IGNORE_TEST_RECORD_DEPENDENCIES = [] # eg. ["User"] +import frappe +from frappe.tests.utils import FrappeTestCase +from press.utils.test import foreground_enqueue, foreground_enqueue_doc -class IntegrationTestPressWorkflowTask(IntegrationTestCase): - """ - Integration tests for PressWorkflowTask. - Use this class for testing interactions between multiple components. - """ - pass +@patch("frappe.enqueue_doc", new=foreground_enqueue_doc) +@patch("frappe.enqueue", new=foreground_enqueue) +@patch("frappe.db.commit", new=lambda: None) +class TestPressWorkflowTask(FrappeTestCase): + def setUp(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + self.doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 3, + "input_b": 2, + } + ).insert() + + def tearDown(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + self.doc.delete() + + def test_task_after_insert_enqueues(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_task", + "main_method_title": "Main With Task", + "steps": [ + { + "step_title": "Sample Task", + "step_method": "sample_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + self.assertEqual(wf.status, "Success") + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + self.assertEqual(len(tasks), 1) + + task = frappe.get_doc("Press Workflow Task", tasks[0].name) + self.assertEqual(task.status, "Success") + self.assertEqual(task.method_name, "sample_task") + + def test_task_update_tracked_step_status(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_task", + "main_method_title": "Main With Task", + "steps": [ + { + "step_title": "Sample Task", + "step_method": "sample_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + step = frappe.get_doc("Press Workflow Step", {"parent": wf.name}) + self.assertEqual(step.status, "Success") + + def test_task_failure_status(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_failing_task", + "main_method_title": "Main With Failing Task", + "steps": [ + { + "step_title": "Sample Failing Task", + "step_method": "sample_failing_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + self.assertEqual(wf.status, "Failure") + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + self.assertEqual(len(tasks), 1) + + task = frappe.get_doc("Press Workflow Task", tasks[0].name) + self.assertEqual(task.status, "Failure") + self.assertIsNotNone(task.exception) + + def test_task_with_args_and_kwargs(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_args_task", + "main_method_title": "Main With Args Task", + "steps": [ + { + "step_title": "Add", + "step_method": "add", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + self.assertEqual(wf.status, "Success") + self.assertEqual(wf.get_result(), 5) + + def test_task_with_nested_task(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_nested_task", + "main_method_title": "Main With Nested Task", + "steps": [ + { + "step_title": "Sample Nested Task", + "step_method": "sample_nested_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + self.assertEqual(wf.status, "Success") + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}, pluck="name") + self.assertTrue(len(tasks) >= 2) + + child_task = frappe.get_doc("Press Workflow Task", tasks[0]) + if child_task.method_name == "sample_nested_task": + self.assertIsNotNone(child_task.parent_task) + + def test_task_resume_workflow_on_success(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_task", + "main_method_title": "Main With Task", + "steps": [ + { + "step_title": "Sample Task", + "step_method": "sample_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + self.assertEqual(wf.status, "Success") + + def test_task_signature_deduplication(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_args_task", + "main_method_title": "Main With Args Task", + "steps": [ + { + "step_title": "Add", + "step_method": "add", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + self.assertEqual(len(tasks), 1) + + task = frappe.get_doc("Press Workflow Task", tasks[0].name) + self.assertIsNotNone(task.signature) + + def test_task_with_queue_and_timeout(self): + wf_name = ( + frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_success", + "main_method_title": "Main Success", + "steps": [], + } + ) + .insert(ignore_permissions=True) + .name + ) + + with patch( + "press.workflow_engine.doctype.press_workflow_task.press_workflow_task.enqueue_task", + return_value=None, + ): + task_doc = frappe.new_doc("Press Workflow Task") + task_doc.workflow = wf_name + task_doc.method_name = "sample_task" + task_doc.method_title = "Sample Task" + task_doc.signature = "test-signature" + task_doc.args_type = "tuple" + task_doc.args = "[]" + task_doc.kwargs_type = "dict" + task_doc.kwargs = "{}" + task_doc.status = "Queued" + task_doc.queue = "long" + task_doc.timeout = 600 + task_doc.insert(ignore_permissions=True) + + self.assertEqual(task_doc.queue, "long") + self.assertEqual(task_doc.timeout, 600) + + def test_task_stdout_capture(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_noisy_task", + "main_method_title": "Main With Noisy Task", + "steps": [ + { + "step_title": "Noisy Task", + "step_method": "noisy_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + wf.reload() + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + self.assertEqual(len(tasks), 1) + + task = frappe.get_doc("Press Workflow Task", tasks[0].name) + self.assertIn("hello from noisy_task", task.stdout or "") + + def test_task_duration_calculation(self): + wf = frappe.get_doc( + { + "doctype": "Press Workflow", + "linked_doctype": "Press Workflow Test", + "linked_docname": self.doc.name, + "main_method_name": "main_with_task", + "main_method_title": "Main With Task", + "steps": [ + { + "step_title": "Sample Task", + "step_method": "sample_task", + "status": "Pending", + } + ], + } + ).insert(ignore_permissions=True) + + tasks = frappe.get_all("Press Workflow Task", filters={"workflow": wf.name}) + task = frappe.get_doc("Press Workflow Task", tasks[0].name) + + self.assertIsNotNone(task.start) + self.assertIsNotNone(task.end) + self.assertIsNotNone(task.duration) diff --git a/press/workflow_engine/doctype/press_workflow_test/test_press_workflow_test.py b/press/workflow_engine/doctype/press_workflow_test/test_press_workflow_test.py index 9f9e22f354e..b28a34cd145 100644 --- a/press/workflow_engine/doctype/press_workflow_test/test_press_workflow_test.py +++ b/press/workflow_engine/doctype/press_workflow_test/test_press_workflow_test.py @@ -1,20 +1,155 @@ # Copyright (c) 2026, Frappe and Contributors # See license.txt -# import frappe -from frappe.tests import IntegrationTestCase +from unittest.mock import patch -# On IntegrationTestCase, the doctype test records and all -# link-field test record dependencies are recursively loaded -# Use these module variables to add/remove to/from that list -EXTRA_TEST_RECORD_DEPENDENCIES = [] # eg. ["User"] -IGNORE_TEST_RECORD_DEPENDENCIES = [] # eg. ["User"] +import frappe +from frappe.tests.utils import FrappeTestCase +from press.utils.test import foreground_enqueue, foreground_enqueue_doc -class IntegrationTestPressWorkflowTest(IntegrationTestCase): - """ - Integration tests for PressWorkflowTest. - Use this class for testing interactions between multiple components. - """ - pass +@patch("frappe.enqueue_doc", new=foreground_enqueue_doc) +@patch("frappe.enqueue", new=foreground_enqueue) +@patch("frappe.db.commit", new=lambda: None) +class TestPressWorkflowTestDoctype(FrappeTestCase): + def setUp(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + + def tearDown(self): + frappe.db.delete("Press Workflow") + frappe.db.delete("Press Workflow Task") + frappe.db.delete("Press Workflow Object") + + def test_create_workflow_test_doc(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 10, + "input_b": 5, + } + ).insert() + + self.assertEqual(doc.input_a, 10) + self.assertEqual(doc.input_b, 5) + doc.delete() + + def test_workflow_test_sample_task(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 1, + "input_b": 2, + } + ).insert() + + result = doc.sample_task() + self.assertEqual(result, "task done") + doc.delete() + + def test_workflow_test_sample_failing_task(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 1, + "input_b": 2, + } + ).insert() + + with self.assertRaises(ValueError) as ctx: + doc.sample_failing_task() + self.assertIn("task failed", str(ctx.exception)) + doc.delete() + + def test_workflow_test_add_task(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 10, + "input_b": 20, + } + ).insert() + + result = doc.add(10, 20) + self.assertEqual(result, 30) + doc.delete() + + def test_workflow_test_multiply_task(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 3, + "input_b": 4, + } + ).insert() + + result = doc.multiply(3, 4) + self.assertEqual(result, 12) + doc.delete() + + def test_workflow_test_power_task(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 2, + "input_b": 3, + } + ).insert() + + result = doc.power(2, 3) + self.assertEqual(result, 8) + doc.delete() + + def test_workflow_test_noisy_task(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 1, + "input_b": 2, + } + ).insert() + + result = doc.noisy_task() + self.assertEqual(result, "done") + doc.delete() + + def test_workflow_test_main_success_flow(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 1, + "input_b": 2, + } + ).insert() + + result = doc.main_success() + self.assertEqual(result, "success output") + doc.delete() + + def test_workflow_test_main_fail_flow(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 1, + "input_b": 2, + } + ).insert() + + with self.assertRaises(ValueError): + doc.main_fail() + doc.delete() + + def test_workflow_test_skipped_steps_flow(self): + doc = frappe.get_doc( + { + "doctype": "Press Workflow Test", + "input_a": 1, + "input_b": 2, + } + ).insert() + + result = doc.skipped_steps_flow() + self.assertEqual(result, "skipped") + doc.delete() diff --git a/press/workflow_engine/test_utils.py b/press/workflow_engine/test_utils.py index 33a3d7ce8b6..63ea43eb4df 100644 --- a/press/workflow_engine/test_utils.py +++ b/press/workflow_engine/test_utils.py @@ -12,6 +12,7 @@ called_methods_in_order, deserialize_value, generate_function_signature, + get_type_of_value, is_func_accept_task_id, method_title, serialize_and_store_value, @@ -176,3 +177,143 @@ def test_serialize_deserialize_exception_as_object(self): deserialized = deserialize_value(value_type, serialized_value) self.assertIsInstance(deserialized, ValueError) self.assertEqual(str(deserialized), str(original)) + + def test_get_type_of_value_none(self): + self.assertIsNone(get_type_of_value(None)) + + def test_get_type_of_value_bool(self): + self.assertEqual(get_type_of_value(True), "bool") + self.assertEqual(get_type_of_value(False), "bool") + + def test_get_type_of_value_int(self): + self.assertEqual(get_type_of_value(0), "int") + self.assertEqual(get_type_of_value(-100), "int") + self.assertEqual(get_type_of_value(999999), "int") + + def test_get_type_of_value_float_finite(self): + self.assertEqual(get_type_of_value(1.5), "float") + self.assertEqual(get_type_of_value(0.0), "float") + + def test_get_type_of_value_float_infinite(self): + self.assertEqual(get_type_of_value(float("inf")), "object") + self.assertEqual(get_type_of_value(float("-inf")), "object") + self.assertEqual(get_type_of_value(float("nan")), "object") + + def test_get_type_of_value_string(self): + self.assertEqual(get_type_of_value(""), "string") + self.assertEqual(get_type_of_value("hello"), "string") + + def test_get_type_of_value_tuple_serializable(self): + self.assertEqual(get_type_of_value((1, 2, 3)), "tuple") + self.assertEqual(get_type_of_value(("a", "b")), "tuple") + + def test_get_type_of_value_tuple_non_serializable(self): + self.assertEqual(get_type_of_value((float("inf"),)), "object") + + def test_get_type_of_value_list_serializable(self): + self.assertEqual(get_type_of_value([1, 2, 3]), "list") + + def test_get_type_of_value_list_non_serializable(self): + self.assertEqual(get_type_of_value([float("inf")]), "object") + + def test_get_type_of_value_dict_serializable(self): + self.assertEqual(get_type_of_value({"a": 1, "b": 2}), "dict") + + def test_get_type_of_value_dict_non_serializable(self): + self.assertEqual(get_type_of_value({"a": float("inf")}), "object") + + def test_get_type_of_value_custom_object(self): + obj = DummyDataclass(a=1, b="test") + self.assertEqual(get_type_of_value(obj), "object") + + def test_serialize_and_store_value_none(self): + value_type, serialized_value = serialize_and_store_value(None) + self.assertIsNone(value_type) + self.assertIsNone(serialized_value) + + def test_serialize_and_store_value_object(self): + obj = DummyDataclass(a=1, b="test") + value_type, serialized_value = serialize_and_store_value(obj) + self.assertEqual(value_type, "object") + self.assertIsNotNone(serialized_value) + + def test_deserialize_value_none_type(self): + self.assertIsNone(deserialize_value(None, None)) + + def test_deserialize_value_invalid_json(self): + with self.assertRaises(ValueError): + deserialize_value("dict", "not valid json") + + def test_deserialize_value_unsupported_type(self): + with self.assertRaises(ValueError): + deserialize_value("unsupported", "value") + + def test_canonicalize_frozenset(self): + result = _canonicalize(frozenset([1, 2, 3])) + self.assertEqual(result["__type__"], "frozenset") + self.assertEqual(sorted(result["values"]), [1, 2, 3]) + + def test_generate_function_signature_with_self(self): + class MyClass: + def my_method(self, a, b): + pass + + sig = generate_function_signature(MyClass.my_method, args=(1, 2), kwargs={}) + self.assertIsInstance(sig, str) + self.assertTrue(len(sig) > 0) + + def test_generate_function_signature_different_args(self): + def my_func(a, b): + pass + + sig1 = generate_function_signature(my_func, args=(1, 2), kwargs={}) + sig2 = generate_function_signature(my_func, args=(3, 4), kwargs={}) + self.assertNotEqual(sig1, sig2) + + def test_is_func_accept_task_id_with_kwargs(self): + def func_with_kwargs(**kwargs): + pass + + self.assertFalse(is_func_accept_task_id(func_with_kwargs)) + + def test_is_func_accept_task_id_with_variadic(self): + def func_with_variadic(*args, **kwargs): + pass + + self.assertFalse(is_func_accept_task_id(func_with_variadic)) + + def test_method_title_with_multiline_docstring(self): + def func(): + """First line + Second line + Third line + """ + pass + + self.assertEqual(method_title(func), "First line") + + def test_method_title_with_underscores(self): + def my_function_name(): + pass + + self.assertEqual(method_title(my_function_name), "My Function Name") + + def test_called_methods_in_order_with_method_name(self): + calls = called_methods_in_order(DummyClassForCallVisitor, "method_three") + self.assertEqual(len(calls), 2) + self.assertEqual(calls[0][0], "method_one") + self.assertEqual(calls[1][0], "method_two") + + def test_serialize_deserialize_empty_collections(self): + cases = [ + ([], "list"), + ((), "tuple"), + ({}, "dict"), + ] + + for original, expected_type in cases: + with self.subTest(value=original, value_type=expected_type): + value_type, serialized_value = serialize_and_store_value(original) + self.assertEqual(value_type, expected_type) + deserialized_value = deserialize_value(value_type, serialized_value) + self.assertEqual(deserialized_value, original) From 9c61826af9ddbd63604aaf2fe6a712ae43d57835 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 18:49:12 +0530 Subject: [PATCH 18/22] fix(release-pipeline): In on_workflow_failure add 2nd args --- press/press/doctype/release_pipeline/release_pipeline.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/press/press/doctype/release_pipeline/release_pipeline.py b/press/press/doctype/release_pipeline/release_pipeline.py index 1c67e777703..7839f652921 100644 --- a/press/press/doctype/release_pipeline/release_pipeline.py +++ b/press/press/doctype/release_pipeline/release_pipeline.py @@ -180,7 +180,6 @@ def update_pipeline_status( "Failure", "Retrying", ], - ignore_permissions: bool = False, ): # If the workflow doc touches this for any reason # Document native methods would raise a `TimeStampMismatch` error @@ -624,9 +623,5 @@ def create_release( # Just in case, make sure that we mark the pipeline as failed and notify the frontend to stop listening for deploy updates self.update_pipeline_status("Failure") - workflow_status = frappe.db.get_value("Press Workflow", self.workflow, "status") - if workflow_status == "Failure": - self.update_pipeline_status("Failure") - - def on_workflow_failure(self): - self.update_pipeline_status("Failure", ignore_permissions=True) + def on_workflow_failure(self, *args, **kwargs): + self.update_pipeline_status("Failure") From d9ee6895d9665843f033e723d2f97d868f320218 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 18:55:37 +0530 Subject: [PATCH 19/22] chore(workflow-engine): Make traceback fields read only --- .../doctype/press_workflow/press_workflow.json | 8 +++++--- .../press_workflow_task/press_workflow_task.json | 11 +++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.json b/press/workflow_engine/doctype/press_workflow/press_workflow.json index 6f564454759..d52bb95423e 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.json +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.json @@ -263,7 +263,8 @@ { "fieldname": "callback_traceback", "fieldtype": "Long Text", - "label": "Callback Traceback" + "label": "Callback Traceback", + "read_only": 1 }, { "default": "Pending", @@ -282,7 +283,8 @@ { "fieldname": "workflow_traceback", "fieldtype": "Long Text", - "label": "Workflow Traceback" + "label": "Workflow Traceback", + "read_only": 1 }, { "default": "0", @@ -319,7 +321,7 @@ "link_fieldname": "workflow" } ], - "modified": "2026-04-24 13:36:34.775783", + "modified": "2026-04-24 18:53:20.041521", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow", diff --git a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json index 8a188322945..22789dae75e 100644 --- a/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json +++ b/press/workflow_engine/doctype/press_workflow_task/press_workflow_task.json @@ -190,25 +190,28 @@ "fieldname": "args_type", "fieldtype": "Select", "label": "Args Type", - "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject" + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject", + "read_only": 1 }, { "fieldname": "kwargs_type", "fieldtype": "Data", "label": "Kwargs Type", - "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject" + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject", + "read_only": 1 }, { "fieldname": "output_type", "fieldtype": "Select", "label": "Output Type", "length": 1000, - "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject" + "options": "int\nfloat\nstring\ntuple\nlist\ndict\nobject", + "read_only": 1 } ], "grid_page_length": 50, "links": [], - "modified": "2026-04-24 14:46:19.016442", + "modified": "2026-04-24 18:52:37.880235", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow Task", From 80c4ed210f23b035f19fa77f0f14d1c35b21cbf2 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 18:55:37 +0530 Subject: [PATCH 20/22] chore(workflow-engine): Make traceback fields read only --- .../doctype/press_workflow/press_workflow.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/press/workflow_engine/doctype/press_workflow/press_workflow.json b/press/workflow_engine/doctype/press_workflow/press_workflow.json index d52bb95423e..8e3ebb043b3 100644 --- a/press/workflow_engine/doctype/press_workflow/press_workflow.json +++ b/press/workflow_engine/doctype/press_workflow/press_workflow.json @@ -290,7 +290,8 @@ "default": "0", "fieldname": "is_force_failure_requested", "fieldtype": "Check", - "label": "Force Failure Requested" + "label": "Force Failure Requested", + "read_only": 1 }, { "fieldname": "args_type", @@ -321,7 +322,7 @@ "link_fieldname": "workflow" } ], - "modified": "2026-04-24 18:53:20.041521", + "modified": "2026-04-24 19:15:31.340535", "modified_by": "Administrator", "module": "Workflow Engine", "name": "Press Workflow", From 14e4ce71c24d3ab28bf4b24871b785647f429361 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 19:21:24 +0530 Subject: [PATCH 21/22] fix(workflow-engine): Load workflow_doc lazily --- .../doctype/press_workflow/workflow_builder.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/press/workflow_engine/doctype/press_workflow/workflow_builder.py b/press/workflow_engine/doctype/press_workflow/workflow_builder.py index 8ff28f2b850..d3a803de1fe 100644 --- a/press/workflow_engine/doctype/press_workflow/workflow_builder.py +++ b/press/workflow_engine/doctype/press_workflow/workflow_builder.py @@ -47,11 +47,21 @@ def wrapper(self: "WorkflowBuilder", *args, **kwargs): class WorkflowBuilder(Document): workflow_name: str | None = None - workflow_doc = None + _workflow_doc_cache: "PressWorkflow | None" = None kv_store_type: Literal["in_memory", "workflow_store"] = "in_memory" kv_store_reference: KVStoreInterface | None = None current_task_signature: str | None = None + @property + def workflow_doc(self) -> "PressWorkflow | None": + if self._workflow_doc_cache is None and self.workflow_name: + self._workflow_doc_cache = frappe.get_doc("Press Workflow", self.workflow_name) # type: ignore + return self._workflow_doc_cache + + @workflow_doc.setter + def workflow_doc(self, value: "PressWorkflow | None") -> None: + self._workflow_doc_cache = value + @ensure_to_resolve_context def run_task( # noqa: C901 self, @@ -184,7 +194,7 @@ def resolve_context(self) -> None: current_workflow = getattr(frappe.flags, "current_press_workflow", None) if current_workflow: self.workflow_name = str(current_workflow) - self.workflow_doc: PressWorkflow = frappe.get_doc("Press Workflow", self.workflow_name) # type: ignore + # workflow_doc will be loaded lazily on first access if self.kv_store_type != "workflow_store": # Store type is changing — discard any cached in-memory store. self.kv_store_type = "workflow_store" From af6aa4631a45fd49476ec9cfe224456db8e526bd Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar <57363826+tanmoysrt@users.noreply.github.com> Date: Fri, 24 Apr 2026 20:01:16 +0530 Subject: [PATCH 22/22] feat(press-job): Expand job step details --- press/press/doctype/press_job/press_job.py | 28 +++++++++++++++++++--- press/press/doctype/server/server.py | 10 ++++---- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/press/press/doctype/press_job/press_job.py b/press/press/doctype/press_job/press_job.py index c5156796de2..4a69ade1b51 100644 --- a/press/press/doctype/press_job/press_job.py +++ b/press/press/doctype/press_job/press_job.py @@ -127,12 +127,34 @@ def virtual_machine_doc(self) -> VirtualMachine | None: @property def steps(self) -> list[dict[str, str]]: try: - workflow = frappe.get_last_doc("Press Workflow", {"linked_docname": self.name}) + workflow: PressWorkflow = frappe.get_last_doc("Press Workflow", {"linked_docname": self.name}) + tasks = frappe.get_all( + "Press Workflow Task", + filters={"workflow": workflow.name}, + fields=[ + "name", + "method_title", + "status", + "stdout", + "creation", + "start", + "end", + "duration", + ], + ) + # Convert to a dict with task name as key for easy lookup + task_dict = {task.name: task for task in tasks} return [ { - "method": step.step_method, - "title": step.step_title, + "name": step.name, + "step_name": step.step_title, # backward compatibility + "step_title": step.step_title, "status": step.status, + "result": task_dict.get(step.task, {}).get("stdout", ""), + "traceback": task_dict.get(step.task, {}).get("traceback", ""), + "start": task_dict.get(step.task, {}).get("start"), + "end": task_dict.get(step.task, {}).get("end"), + "duration": task_dict.get(step.task, {}).get("duration"), } for step in workflow.steps ] diff --git a/press/press/doctype/server/server.py b/press/press/doctype/server/server.py index e6a28aca99c..578a26e2723 100644 --- a/press/press/doctype/server/server.py +++ b/press/press/doctype/server/server.py @@ -3,6 +3,7 @@ from __future__ import annotations +import contextlib import datetime import ipaddress import json @@ -3451,12 +3452,9 @@ def generate_on_prem_failover_config(self): running_press_job = next((job for job in press_jobs if job.status in ("Pending", "Running")), None) if press_jobs: for press_job in press_jobs: - press_job["steps"] = frappe.get_all( - "Press Job Step", - filters={"job": press_job.name}, - fields=["name", "step_name", "status", "result", "traceback", "start", "end", "duration"], - order_by="creation asc", - ) + press_job["steps"] = [] + with contextlib.suppress(frappe.DoesNotExistError): + press_job["steps"] = frappe.get_doc("Press Job", press_job.name).steps return { "running_press_job_type": running_press_job.job_type if running_press_job else None,