From 5b25c38f1489f090b31ba660f0765b7feb95b8fc Mon Sep 17 00:00:00 2001 From: agatha197 <28584164+agatha197@users.noreply.github.com> Date: Wed, 22 Apr 2026 03:34:05 +0000 Subject: [PATCH] docs(FR-2603): add health check SVG diagrams for all 4 languages, replace ASCII art (#6839) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves part of FR-2603 ## Summary - Recover and add health check SVG diagrams (health_check_app_proxy.svg, health_check_state_machine.svg) for all 4 languages (en/ko/ja/th) - Replace ASCII art flow diagrams in model_serving.md with the two SVG images across all languages: - **① AppProxy**: Traffic routing control diagram (960×500) - **② Manager**: Health state machine diagram (900×800) - KO keeps original Korean-labeled SVGs; EN/JA/TH use translated versions --- .../src/en/images/health_check_app_proxy.svg | 85 +++++++++++++++++++ .../en/images/health_check_state_machine.svg | 80 +++++++++++++++++ .../src/en/model_serving/model_serving.md | 59 ++----------- .../src/ja/images/health_check_app_proxy.svg | 85 +++++++++++++++++++ .../ja/images/health_check_state_machine.svg | 80 +++++++++++++++++ .../src/ja/model_serving/model_serving.md | 59 ++----------- .../src/ko/images/health_check_app_proxy.svg | 85 +++++++++++++++++++ .../ko/images/health_check_state_machine.svg | 80 +++++++++++++++++ .../src/ko/model_serving/model_serving.md | 59 ++----------- .../src/th/images/health_check_app_proxy.svg | 85 +++++++++++++++++++ .../th/images/health_check_state_machine.svg | 80 +++++++++++++++++ .../src/th/model_serving/model_serving.md | 59 ++----------- 12 files changed, 688 insertions(+), 208 deletions(-) create mode 100644 packages/backend.ai-webui-docs/src/en/images/health_check_app_proxy.svg create mode 100644 packages/backend.ai-webui-docs/src/en/images/health_check_state_machine.svg create mode 100644 packages/backend.ai-webui-docs/src/ja/images/health_check_app_proxy.svg create mode 100644 packages/backend.ai-webui-docs/src/ja/images/health_check_state_machine.svg create mode 100644 packages/backend.ai-webui-docs/src/ko/images/health_check_app_proxy.svg create mode 100644 packages/backend.ai-webui-docs/src/ko/images/health_check_state_machine.svg create mode 100644 packages/backend.ai-webui-docs/src/th/images/health_check_app_proxy.svg create mode 100644 packages/backend.ai-webui-docs/src/th/images/health_check_state_machine.svg diff --git a/packages/backend.ai-webui-docs/src/en/images/health_check_app_proxy.svg b/packages/backend.ai-webui-docs/src/en/images/health_check_app_proxy.svg new file mode 100644 index 0000000000..f99e991851 --- /dev/null +++ b/packages/backend.ai-webui-docs/src/en/images/health_check_app_proxy.svg @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + Manager + Syncs only Active · HEALTHY routes + to AppProxy + + + + + sync + + + + + User + traffic + + + + + request + + + + + AppProxy + + + + + Health Checker + replica probe + + + + Active Pool + healthy only + + + + → routes to healthy replicas only + + + + + + replica A + ✓ in pool + + + + replica B + ✓ in pool + + + + replica C + ✗ excluded from pool + + + + + + + diff --git a/packages/backend.ai-webui-docs/src/en/images/health_check_state_machine.svg b/packages/backend.ai-webui-docs/src/en/images/health_check_state_machine.svg new file mode 100644 index 0000000000..766b97336a --- /dev/null +++ b/packages/backend.ai-webui-docs/src/en/images/health_check_state_machine.svg @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + NOT_CHECKED + initial_delay · pending check + + + + + + HEALTHY + active · routing target + + + + + + UNHEALTHY + confirmed consecutive failures + + + + + + DEGRADED + check unavailable · deferred + + + + + + + TERMINATING + terminal · routing ended + + + + + on first success + (even during initial_delay) + + + + failures ignored + stays NOT_CHECKED + + + + N consecutive failures + + + + recovery on success + + + + check unavailable + + + + check returns + success + + + + consecutive failures after check returns + + + + eviction + diff --git a/packages/backend.ai-webui-docs/src/en/model_serving/model_serving.md b/packages/backend.ai-webui-docs/src/en/model_serving/model_serving.md index 9cab825a36..c31355a0df 100644 --- a/packages/backend.ai-webui-docs/src/en/model_serving/model_serving.md +++ b/packages/backend.ai-webui-docs/src/en/model_serving/model_serving.md @@ -142,58 +142,13 @@ Fields without "(Required)" mark are optional. The health check system monitors individual model service containers and automatically manages traffic routing based on their health status. -``` -Container Created -│ -▼ -┌─────────────────────────────────┐ -│ Wait for initial_delay (60s) │ ← Model loading, GPU init, warmup -│ Status: NOT_CHECKED │ -│ No health checks during this │ -└─────────────────────────────────┘ -│ -▼ -Start Health Check Cycle -│ -▼ -┌─────────────────────────────────┐ -│ Every interval (10s): │ -│ HTTP GET → path ("/health") │ -└─────────────────────────────────┘ -│ -▼ -Wait up to max_wait_time (15s) -│ -┌──────────┴──────────┐ -▼ ▼ -Response Timeout/Error -│ │ -▼ │ -Status == │ -expected? │ -│ │ -┌──┴──┐ │ -▼ ▼ │ -Y N │ -│ │ │ -│ └───────┬───────┘ -│ ▼ -│ Consecutive -│ failures +1 -│ │ -▼ ▼ -HEALTHY Failures > max_retries? -(reset │ -failures) ┌─────┴─────┐ - ▼ ▼ - Yes No - │ │ - ▼ ▼ - UNHEALTHY Keep current - (removed status - from traffic - internally) -``` +**① AppProxy: Traffic Routing Control** + +![](../images/health_check_app_proxy.svg) + +**② Manager: Health State Management and Eviction** + +![](../images/health_check_state_machine.svg) :::note The internal health status (used for traffic routing) may not be immediately diff --git a/packages/backend.ai-webui-docs/src/ja/images/health_check_app_proxy.svg b/packages/backend.ai-webui-docs/src/ja/images/health_check_app_proxy.svg new file mode 100644 index 0000000000..1560e83d40 --- /dev/null +++ b/packages/backend.ai-webui-docs/src/ja/images/health_check_app_proxy.svg @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + Manager + Active · HEALTHY route のみ + AppProxy に sync + + + + + sync + + + + + User + traffic + + + + + リクエスト + + + + + AppProxy + + + + + Health Checker + replica probe + + + + Active Pool + healthy のみ維持 + + + + → healthy replica のみにルーティング + + + + + + replica A + ✓ pool に含む + + + + replica B + ✓ pool に含む + + + + replica C + ✗ pool から除外 + + + + + + + diff --git a/packages/backend.ai-webui-docs/src/ja/images/health_check_state_machine.svg b/packages/backend.ai-webui-docs/src/ja/images/health_check_state_machine.svg new file mode 100644 index 0000000000..3b62e4cae6 --- /dev/null +++ b/packages/backend.ai-webui-docs/src/ja/images/health_check_state_machine.svg @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + NOT_CHECKED + initial_delay · 未判定 + + + + + + HEALTHY + アクティブ · ルーティング対象 + + + + + + UNHEALTHY + 連続失敗確定 + + + + + + DEGRADED + チェック不可 · 判定猶予 + + + + + + + TERMINATING + terminal · ルーティング終了 + + + + + 初回成功時に即時 + (initial_delay 中でも) + + + + 失敗は無視 + NOT_CHECKED 維持 + + + + N回連続失敗 + + + + 成功時に即時復旧 + + + + チェック不可 + + + + チェック復帰 + 成功 + + + + チェック復帰後に連続失敗 + + + + eviction + diff --git a/packages/backend.ai-webui-docs/src/ja/model_serving/model_serving.md b/packages/backend.ai-webui-docs/src/ja/model_serving/model_serving.md index 29da441675..6889f951f9 100644 --- a/packages/backend.ai-webui-docs/src/ja/model_serving/model_serving.md +++ b/packages/backend.ai-webui-docs/src/ja/model_serving/model_serving.md @@ -127,58 +127,13 @@ models: ヘルスチェックシステムは、個々のモデルサービスコンテナを監視し、ヘルスステータスに基づいてトラフィックルーティングを自動的に管理します。 -``` -Container Created -│ -▼ -┌─────────────────────────────────┐ -│ Wait for initial_delay (60s) │ ← Model loading, GPU init, warmup -│ Status: NOT_CHECKED │ -│ No health checks during this │ -└─────────────────────────────────┘ -│ -▼ -Start Health Check Cycle -│ -▼ -┌─────────────────────────────────┐ -│ Every interval (10s): │ -│ HTTP GET → path ("/health") │ -└─────────────────────────────────┘ -│ -▼ -Wait up to max_wait_time (15s) -│ -┌──────────┴──────────┐ -▼ ▼ -Response Timeout/Error -│ │ -▼ │ -Status == │ -expected? │ -│ │ -┌──┴──┐ │ -▼ ▼ │ -Y N │ -│ │ │ -│ └───────┬───────┘ -│ ▼ -│ Consecutive -│ failures +1 -│ │ -▼ ▼ -HEALTHY Failures > max_retries? -(reset │ -failures) ┌─────┴─────┐ - ▼ ▼ - Yes No - │ │ - ▼ ▼ - UNHEALTHY Keep current - (removed status - from traffic - internally) -``` +**① AppProxy: トラフィックルーティング制御** + +![](../images/health_check_app_proxy.svg) + +**② Manager: ヘルス状態管理と eviction** + +![](../images/health_check_state_machine.svg) :::note 内部ヘルスステータス(トラフィックルーティングに使用)は、ユーザーインターフェースに diff --git a/packages/backend.ai-webui-docs/src/ko/images/health_check_app_proxy.svg b/packages/backend.ai-webui-docs/src/ko/images/health_check_app_proxy.svg new file mode 100644 index 0000000000..2d2d4fb8e1 --- /dev/null +++ b/packages/backend.ai-webui-docs/src/ko/images/health_check_app_proxy.svg @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + Manager + Syncs only Active · HEALTHY routes + to AppProxy + + + + + sync + + + + + User + traffic + + + + + request + + + + + AppProxy + + + + + Health Checker + replica probe + + + + Active Pool + healthy only + + + + → routes to healthy replicas only + + + + + + replica A + ✓ in pool + + + + replica B + ✓ in pool + + + + replica C + ✗ excluded from pool + + + + + + + diff --git a/packages/backend.ai-webui-docs/src/ko/images/health_check_state_machine.svg b/packages/backend.ai-webui-docs/src/ko/images/health_check_state_machine.svg new file mode 100644 index 0000000000..570657eb12 --- /dev/null +++ b/packages/backend.ai-webui-docs/src/ko/images/health_check_state_machine.svg @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + NOT_CHECKED + initial_delay · 미판정 + + + + + + HEALTHY + 활성 · 라우팅 대상 + + + + + + UNHEALTHY + 연속 실패 확정 + + + + + + DEGRADED + 체크 불가 · 판정 유예 + + + + + + + TERMINATING + terminal · 라우팅 종료 + + + + + 첫 성공 시 즉시 + (initial_delay 중이어도) + + + + 실패는 무시 + NOT_CHECKED 유지 + + + + N회 연속 실패 + + + + 성공 즉시 복구 + + + + 체크 불가 + + + + 체크 복귀 + 성공 + + + + 체크 복귀 후 연속 실패 + + + + eviction + diff --git a/packages/backend.ai-webui-docs/src/ko/model_serving/model_serving.md b/packages/backend.ai-webui-docs/src/ko/model_serving/model_serving.md index b393cb724f..bb0b034d6c 100644 --- a/packages/backend.ai-webui-docs/src/ko/model_serving/model_serving.md +++ b/packages/backend.ai-webui-docs/src/ko/model_serving/model_serving.md @@ -127,58 +127,13 @@ models: 상태 확인 시스템은 개별 모델 서비스 컨테이너를 모니터링하고, 상태에 따라 트래픽 라우팅을 자동으로 관리합니다. -``` -Container Created -│ -▼ -┌─────────────────────────────────┐ -│ Wait for initial_delay (60s) │ ← Model loading, GPU init, warmup -│ Status: NOT_CHECKED │ -│ No health checks during this │ -└─────────────────────────────────┘ -│ -▼ -Start Health Check Cycle -│ -▼ -┌─────────────────────────────────┐ -│ Every interval (10s): │ -│ HTTP GET → path ("/health") │ -└─────────────────────────────────┘ -│ -▼ -Wait up to max_wait_time (15s) -│ -┌──────────┴──────────┐ -▼ ▼ -Response Timeout/Error -│ │ -▼ │ -Status == │ -expected? │ -│ │ -┌──┴──┐ │ -▼ ▼ │ -Y N │ -│ │ │ -│ └───────┬───────┘ -│ ▼ -│ Consecutive -│ failures +1 -│ │ -▼ ▼ -HEALTHY Failures > max_retries? -(reset │ -failures) ┌─────┴─────┐ - ▼ ▼ - Yes No - │ │ - ▼ ▼ - UNHEALTHY Keep current - (removed status - from traffic - internally) -``` +**① AppProxy: 트래픽 라우팅 제어** + +![](../images/health_check_app_proxy.svg) + +**② Manager: 상태 관리 및 eviction** + +![](../images/health_check_state_machine.svg) :::note 내부 상태 정보(트래픽 라우팅에 사용됨)는 사용자 인터페이스에 표시되는 diff --git a/packages/backend.ai-webui-docs/src/th/images/health_check_app_proxy.svg b/packages/backend.ai-webui-docs/src/th/images/health_check_app_proxy.svg new file mode 100644 index 0000000000..6bc46c67d1 --- /dev/null +++ b/packages/backend.ai-webui-docs/src/th/images/health_check_app_proxy.svg @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + Manager + ซิงค์เฉพาะ route Active · HEALTHY + ไปยัง AppProxy + + + + + sync + + + + + User + traffic + + + + + คำขอ + + + + + AppProxy + + + + + Health Checker + replica probe + + + + Active Pool + เฉพาะ healthy + + + + → กำหนดเส้นทางไปยัง replica healthy เท่านั้น + + + + + + replica A + ✓ อยู่ใน pool + + + + replica B + ✓ อยู่ใน pool + + + + replica C + ✗ ไม่อยู่ใน pool + + + + + + + diff --git a/packages/backend.ai-webui-docs/src/th/images/health_check_state_machine.svg b/packages/backend.ai-webui-docs/src/th/images/health_check_state_machine.svg new file mode 100644 index 0000000000..35a5ac88da --- /dev/null +++ b/packages/backend.ai-webui-docs/src/th/images/health_check_state_machine.svg @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + NOT_CHECKED + initial_delay · ยังไม่ตัดสิน + + + + + + HEALTHY + ทำงาน · เป้าหมายการกำหนดเส้นทาง + + + + + + UNHEALTHY + ล้มเหลวต่อเนื่องยืนยันแล้ว + + + + + + DEGRADED + ตรวจสอบไม่ได้ · ผ่อนผัน + + + + + + + TERMINATING + terminal · สิ้นสุดการกำหนดเส้นทาง + + + + + เมื่อสำเร็จครั้งแรกทันที + (แม้ระหว่าง initial_delay) + + + + ความล้มเหลวถูกละเว้น + คงอยู่ใน NOT_CHECKED + + + + ล้มเหลว N ครั้งติดต่อกัน + + + + กู้คืนทันทีเมื่อสำเร็จ + + + + ตรวจสอบไม่ได้ + + + + ตรวจสอบกลับมา + สำเร็จ + + + + ล้มเหลวต่อเนื่องหลังตรวจสอบกลับมา + + + + eviction + diff --git a/packages/backend.ai-webui-docs/src/th/model_serving/model_serving.md b/packages/backend.ai-webui-docs/src/th/model_serving/model_serving.md index af2d330244..ca579ad50a 100644 --- a/packages/backend.ai-webui-docs/src/th/model_serving/model_serving.md +++ b/packages/backend.ai-webui-docs/src/th/model_serving/model_serving.md @@ -129,58 +129,13 @@ models: ระบบการตรวจสอบสุขภาพจะตรวจสอบคอนเทนเนอร์บริการโมเดลแต่ละตัวและจัดการการเส้นทางการรับส่งข้อมูลโดยอัตโนมัติตามสถานะสุขภาพ -``` -Container Created -│ -▼ -┌─────────────────────────────────┐ -│ Wait for initial_delay (60s) │ ← Model loading, GPU init, warmup -│ Status: NOT_CHECKED │ -│ No health checks during this │ -└─────────────────────────────────┘ -│ -▼ -Start Health Check Cycle -│ -▼ -┌─────────────────────────────────┐ -│ Every interval (10s): │ -│ HTTP GET → path ("/health") │ -└─────────────────────────────────┘ -│ -▼ -Wait up to max_wait_time (15s) -│ -┌──────────┴──────────┐ -▼ ▼ -Response Timeout/Error -│ │ -▼ │ -Status == │ -expected? │ -│ │ -┌──┴──┐ │ -▼ ▼ │ -Y N │ -│ │ │ -│ └───────┬───────┘ -│ ▼ -│ Consecutive -│ failures +1 -│ │ -▼ ▼ -HEALTHY Failures > max_retries? -(reset │ -failures) ┌─────┴─────┐ - ▼ ▼ - Yes No - │ │ - ▼ ▼ - UNHEALTHY Keep current - (removed status - from traffic - internally) -``` +**① AppProxy: การควบคุมการกำหนดเส้นทาง Traffic** + +![](../images/health_check_app_proxy.svg) + +**② Manager: การจัดการสถานะสุขภาพและ Eviction** + +![](../images/health_check_state_machine.svg) :::note สถานะสุขภาพภายใน (ใช้สำหรับการเส้นทางการรับส่งข้อมูล) อาจไม่ถูก