diff --git a/common/common.cpp b/common/common.cpp
index b6a7626f2a1d..055772d711fe 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1312,6 +1312,12 @@ std::vector<llama_adapter_lora_ptr> & common_init_result::lora() {
 }
 
 common_init_result_ptr common_init_from_params(common_params & params, bool model_only) {
+    // report the load phase up front (router progress UI); fit + metadata read emit nothing,
+    // so without this the UI sticks on "download 100%" until the per-tensor callback starts
+    if (params.load_stage_callback) {
+        params.load_stage_callback(COMMON_LOAD_STAGE_LOAD, -1.0f, params.load_stage_callback_user_data);
+    }
+
     common_init_result_ptr res(new common_init_result(params, model_only));
 
     llama_model * model = res->model();
@@ -1387,6 +1393,10 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode
     }
 
     if (params.warmup) {
+        // report the warmup phase (router progress UI)
+        if (params.load_stage_callback) {
+            params.load_stage_callback(COMMON_LOAD_STAGE_WARMUP, -1.0f, params.load_stage_callback_user_data);
+        }
         LOG_INF("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__);
 
         std::vector<llama_token> tmp;
diff --git a/common/common.h b/common/common.h
index 13f387271d81..067e443fe01a 100644
--- a/common/common.h
+++ b/common/common.h
@@ -423,6 +423,15 @@ struct lr_opt {
 
 struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
 
+// load-stage names for common_load_stage_callback; keep the webui switch in sync (getModelLoadPhase in models/utils.ts)
+#define COMMON_LOAD_STAGE_DOWNLOAD "download" // file download (router child only)
+#define COMMON_LOAD_STAGE_LOAD     "load"     // tensor loading
+#define COMMON_LOAD_STAGE_WARMUP   "warmup"   // empty-run warmup
+#define COMMON_LOAD_STAGE_FINALIZE "finalize" // post-warmup setup: chat templates / seq-rm tests (context alloc already happened during load)
+
+// coarse load-stage reporting for router mode; progress in [0,1], or <0 if indeterminate
+typedef void (*common_load_stage_callback)(const char * stage, float progress, void * user_data);
+
 struct common_params {
     int32_t n_predict             =    -1; // max. number of new tokens to predict, -1 == no limit
     int32_t n_ctx                 =     0; // context size, 0 == context the model was trained with
@@ -701,6 +710,9 @@ struct common_params {
     // return false from callback to abort model loading or true to continue
     llama_progress_callback load_progress_callback = NULL;
     void *                  load_progress_callback_user_data = NULL;
+    // optional callback for coarse load-stage reporting (used by router mode to drive a progress UI)
+    common_load_stage_callback load_stage_callback = NULL;
+    void *                     load_stage_callback_user_data = NULL;
     bool no_alloc = false; // Don't allocate model buffers
 };
 
diff --git a/common/download.cpp b/common/download.cpp
index 40f6eb780f41..4f5affc1b742 100644
--- a/common/download.cpp
+++ b/common/download.cpp
@@ -475,17 +475,38 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
     return { res->status, std::move(buf) };
 }
 
+static common_download_callback * g_default_download_callback = nullptr;
+
+void common_download_set_default_callback(common_download_callback * callback) {
+    g_default_download_callback = callback;
+}
+
 int common_download_file_single(const std::string & url,
                                 const std::string & path,
                                 const common_download_opts & opts,
                                 bool skip_etag) {
-    if (!opts.offline) {
+    // resolve the effective callback: per-call > process-wide default
+    common_download_opts eff = opts;
+    if (!eff.callback) {
+        eff.callback = g_default_download_callback;
+    }
+
+    if (!eff.offline) {
         ProgressBar tty_cb;
-        common_download_opts online_opts = opts;
-        if (!online_opts.callback) {
-            online_opts.callback = &tty_cb;
+        if (!eff.callback) {
+            eff.callback = &tty_cb;
         }
-        return common_download_file_single_online(url, path, online_opts, skip_etag);
+        const int status = common_download_file_single_online(url, path, eff, skip_etag);
+        // the online path returns 304 (cached, not modified) before emitting any callback;
+        // surface a cached start/done pair so aggregators still see every file exactly once
+        if (status == 304 && eff.callback) {
+            common_download_progress p;
+            p.url    = url;
+            p.cached = true;
+            eff.callback->on_start(p);
+            eff.callback->on_done(p, true);
+        }
+        return status;
     }
 
     if (!std::filesystem::exists(path)) {
@@ -496,12 +517,12 @@ int common_download_file_single(const std::string & url,
     LOG_DBG("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
 
     // notify the callback that the file was cached
-    if (opts.callback) {
+    if (eff.callback) {
         common_download_progress p;
         p.url = url;
         p.cached = true;
-        opts.callback->on_start(p);
-        opts.callback->on_done(p, true);
+        eff.callback->on_start(p);
+        eff.callback->on_done(p, true);
     }
 
     return 304; // Not Modified - fake cached response
@@ -814,6 +835,13 @@ common_download_model_result common_download_model(const common_params_model  &
         return result;
     }
 
+    // announce the full file set up front so a progress aggregator can form a stable
+    // denominator (multi-part GGUFs download in parallel below). use the effective callback,
+    // mirroring common_download_file_single's per-call > process-wide resolution.
+    if (common_download_callback * cb = opts.callback ? opts.callback : g_default_download_callback) {
+        cb->on_plan(tasks.size());
+    }
+
     std::vector<std::future<int>> futures;
     for (const auto & task : tasks) {
         futures.push_back(std::async(std::launch::async,
diff --git a/common/download.h b/common/download.h
index ebeedd6058c7..a9c76c1a7634 100644
--- a/common/download.h
+++ b/common/download.h
@@ -18,12 +18,20 @@ struct common_download_progress {
 class common_download_callback {
 public:
     virtual ~common_download_callback() = default;
+    // called once before any file starts, with the number of files about to be downloaded;
+    // lets aggregators know the full set up front (e.g. multi-part GGUFs) instead of discovering
+    // files lazily as their callbacks fire. optional: default no-op.
+    virtual void on_plan(size_t total_files) { (void) total_files; }
     virtual void on_start(const common_download_progress & p) = 0;
     virtual void on_update(const common_download_progress & p) = 0;
     virtual void on_done(const common_download_progress & p, bool ok) = 0;
     virtual bool is_cancelled() const { return false; }
 };
 
+// process-wide default download callback, used when common_download_opts::callback is unset (nullptr to clear).
+// borrowed, not owned: must outlive any download that uses it.
+void common_download_set_default_callback(common_download_callback * callback);
+
 struct common_remote_params {
     common_header_list headers;
     long timeout  = 0;           // in seconds, 0 means no timeout
diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
index 07759f417084..8d86e226bd48 100644
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -1004,6 +1004,12 @@ struct server_context_impl {
             }
         }
 
+        // init done (weights + warmup); mark finalize so the UI doesn't stick on the warmup phase
+        // while the post-warmup setup below (chat templates / seq-rm tests) runs
+        if (params_base.load_stage_callback) {
+            params_base.load_stage_callback(COMMON_LOAD_STAGE_FINALIZE, -1.0f, params_base.load_stage_callback_user_data);
+        }
+
         if (!llama_memory_can_shift(llama_get_memory(ctx_tgt))) {
             if (params_base.ctx_shift) {
                 params_base.ctx_shift = false;
diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp
index 49b0e423f462..226010d7be92 100644
--- a/tools/server/server-models.cpp
+++ b/tools/server/server-models.cpp
@@ -18,6 +18,8 @@
 #include <atomic>
 #include <chrono>
 #include <queue>
+#include <unordered_map>
+#include <unordered_set>
 #include <filesystem>
 #include <random>
 #include <sstream>
@@ -46,6 +48,9 @@ extern char **environ;
 #define CMD_CHILD_TO_ROUTER_READY "cmd_child_to_router:ready" // also sent when waking up from sleep
 #define CMD_CHILD_TO_ROUTER_SLEEP "cmd_child_to_router:sleep"
 #define CMD_CHILD_TO_ROUTER_INFO  "cmd_child_to_router:info:" // followed by json string
+// load stage report: "<stage>" or "<stage>:<fraction 0..1>" (no fraction => indeterminate stage)
+// stages are the COMMON_LOAD_STAGE_* names (download / load / warmup / finalize)
+#define CMD_CHILD_TO_ROUTER_STAGE "cmd_child_to_router:stage:"
 
 // address for child process, this is needed because router may run on 0.0.0.0
 // ref: https://github.com/ggml-org/llama.cpp/issues/17862
@@ -762,9 +767,11 @@ void server_models::load(const std::string & name) {
     instance_t inst;
     inst.meta             = meta;
     inst.meta.port        = get_free_port();
-    inst.meta.status      = SERVER_MODEL_STATUS_LOADING;
-    inst.meta.loaded_info = json{};
-    inst.meta.last_used   = ggml_time_ms();
+    inst.meta.status        = SERVER_MODEL_STATUS_LOADING;
+    inst.meta.loaded_info   = json{};
+    inst.meta.load_stage    = "";    // reset stale stage/progress from a previous load
+    inst.meta.load_progress = -1.0f;
+    inst.meta.last_used     = ggml_time_ms();
 
     if (inst.meta.port <= 0) {
         throw std::runtime_error("failed to get a port number");
@@ -821,6 +828,16 @@ void server_models::load(const std::string & name) {
                         this->update_loaded_info(name, str);
                     } else if (string_starts_with(buffer, CMD_CHILD_TO_ROUTER_SLEEP)) {
                         this->update_status(name, SERVER_MODEL_STATUS_SLEEPING, 0);
+                    } else if (string_starts_with(buffer, CMD_CHILD_TO_ROUTER_STAGE)) {
+                        std::string payload = string_strip(str.substr(strlen(CMD_CHILD_TO_ROUTER_STAGE)));
+                        std::string stage   = payload;
+                        float       progress = -1.0f;
+                        auto colon = payload.find(':');
+                        if (colon != std::string::npos) {
+                            stage    = payload.substr(0, colon);
+                            progress = strtof(payload.c_str() + colon + 1, nullptr);
+                        }
+                        this->update_stage(name, stage, progress);
                     }
                 }
             } else {
@@ -985,6 +1002,16 @@ void server_models::update_loaded_info(const std::string & name, std::string & r
     cv.notify_all();
 }
 
+void server_models::update_stage(const std::string & name, const std::string & stage, float progress) {
+    std::unique_lock<std::mutex> lk(mutex);
+    auto it = mapping.find(name);
+    if (it != mapping.end()) {
+        it->second.meta.load_stage    = stage;
+        it->second.meta.load_progress = progress;
+    }
+    cv.notify_all();
+}
+
 void server_models::wait_until_loading_finished(const std::string & name) {
     std::unique_lock<std::mutex> lk(mutex);
     cv.wait(lk, [this, &name]() {
@@ -1106,6 +1133,112 @@ void server_models::notify_router_sleeping_state(bool is_sleeping) {
     common_log_resume(common_log_main());
 }
 
+void server_models::notify_router_stage(const char * stage, float progress) {
+    // write in a single fputs to avoid interleaving with loader logging on the shared stdout
+    char line[96];
+    if (progress >= 0.0f) {
+        snprintf(line, sizeof(line), "%s%s:%.4f\n", CMD_CHILD_TO_ROUTER_STAGE, stage, progress);
+    } else {
+        snprintf(line, sizeof(line), "%s%s\n", CMD_CHILD_TO_ROUTER_STAGE, stage);
+    }
+    common_log_pause(common_log_main());
+    fflush(stdout);
+    fputs(line, stdout);
+    fflush(stdout);
+    common_log_resume(common_log_main());
+}
+
+// funnels all stage emissions to the router. progress callbacks fire per tensor/chunk (hundreds of
+// times), so only forward on phase change, integer-percent advance, or completion.
+struct stage_emitter {
+    std::string last_stage;
+    int         last_pct = -1;
+
+    void emit(const char * stage, float progress) {
+        const int pct = (int) (progress * 100.0f);
+        if (last_stage != stage || pct != last_pct || progress >= 1.0f) {
+            last_stage = stage;
+            last_pct   = pct;
+            server_models::notify_router_stage(stage, progress);
+        }
+    }
+};
+
+// single model per child; emission sources don't overlap in time (download at arg-parse, then
+// single-threaded load/warmup/finalize), and the download callback serializes its part threads.
+static stage_emitter g_stage_emitter;
+
+bool server_models::child_load_progress_callback(float progress, void * /*user_data*/) {
+    g_stage_emitter.emit(COMMON_LOAD_STAGE_LOAD, progress);
+    return true; // never abort loading
+}
+
+void server_models::child_load_stage_callback(const char * stage, float progress, void * /*user_data*/) {
+    g_stage_emitter.emit(stage, progress); // coarse phase markers (e.g. warmup / finalize)
+}
+
+// forwards download progress to the router. multi-part GGUFs download parts in parallel, each with
+// its own byte counts, so aggregate across parts for a whole-model 0->1 instead of one racing part.
+//
+// the per-file total arrives lazily (from each file's HEAD), so a naive sum over only-seen files has
+// a growing denominator and the percent regresses (e.g. one part hits 100% before the rest register).
+// to avoid that, stay indeterminate until every expected file is resolved (size known, or finished
+// for a cached/unknown-size file); only then is the denominator stable and the percent monotonic.
+struct child_download_progress_callback : common_download_callback {
+    void on_plan(size_t total_files) override {
+        std::lock_guard<std::mutex> lock(mutex);
+        // fresh accounting per download pass (defensive: child loads one model, but a validation
+        // pass could call this twice) so a stale denominator can't leak across passes
+        files.clear();
+        resolved.clear();
+        expected = total_files;
+    }
+    void on_start(const common_download_progress & p) override { record(p, /*done=*/false); }
+    void on_update(const common_download_progress & p) override { record(p, /*done=*/false); }
+    void on_done(const common_download_progress & p, bool /*ok*/) override { record(p, /*done=*/true); }
+
+private:
+    std::mutex mutex;
+    std::unordered_map<std::string, std::pair<size_t, size_t>> files;    // url -> {downloaded, total}
+    std::unordered_set<std::string>                            resolved; // urls with a known size or finished
+    size_t expected = 0;                                                 // total files, from on_plan
+
+    void record(const common_download_progress & p, bool done) {
+        // serializing here also protects g_stage_emitter's state from the parallel part threads
+        std::lock_guard<std::mutex> lock(mutex);
+
+        if (p.total > 0) {
+            files[p.url] = { p.downloaded, p.total };
+            resolved.insert(p.url);
+        } else if (done) {
+            // cached / unknown-size file finished: count it resolved so we don't wait forever
+            files.emplace(p.url, std::make_pair<size_t, size_t>(0, 0));
+            resolved.insert(p.url);
+        }
+
+        // until every file's size is known the denominator keeps growing; report indeterminate so
+        // the UI shows an animated download phase instead of a percentage that jumps backwards
+        if (expected == 0 || resolved.size() < expected) {
+            g_stage_emitter.emit(COMMON_LOAD_STAGE_DOWNLOAD, -1.0f);
+            return;
+        }
+
+        size_t downloaded = 0;
+        size_t total      = 0;
+        for (const auto & f : files) {
+            downloaded += f.second.first;
+            total      += f.second.second;
+        }
+        // total == 0 means every file was cached/unknown-size: nothing to transfer, report complete
+        g_stage_emitter.emit(COMMON_LOAD_STAGE_DOWNLOAD, total > 0 ? (float) downloaded / (float) total : 1.0f);
+    }
+};
+
+void server_models::register_child_download_progress() {
+    static child_download_progress_callback cb;
+    common_download_set_default_callback(&cb);
+}
+
 
 //
 // server_models_routes
@@ -1238,6 +1371,14 @@ void server_models_routes::init_routes() {
                 {"value",  server_model_status_to_string(meta.status)},
                 {"args",   meta.args},
             };
+            if (meta.status == SERVER_MODEL_STATUS_LOADING) {
+                if (!meta.load_stage.empty()) {
+                    status["stage"] = meta.load_stage;
+                }
+                if (meta.load_progress >= 0.0f) {
+                    status["progress"] = meta.load_progress;
+                }
+            }
             if (!meta.preset.name.empty()) {
                 common_preset preset_copy = meta.preset;
                 unset_reserved_args(preset_copy, false);
diff --git a/tools/server/server-models.h b/tools/server/server-models.h
index 2198589a7aa2..11b0fa80773b 100644
--- a/tools/server/server-models.h
+++ b/tools/server/server-models.h
@@ -68,6 +68,8 @@ struct server_model_meta {
     int stop_timeout = 0; // seconds to wait before force-killing the model instance during shutdown
     mtmd_caps multimodal; // multimodal capabilities
     bool need_download = false; // whether the model needs to be downloaded before loading
+    std::string load_stage = "";  // current load stage ("download"/"load"/"warmup"/"finalize"), valid while status == LOADING
+    float load_progress = -1.0f;  // progress 0..1 within the current stage, or <0 if indeterminate
 
     bool is_ready() const {
         return status == SERVER_MODEL_STATUS_LOADED;
@@ -150,6 +152,7 @@ struct server_models {
     // update the status of a model instance (thread-safe)
     void update_status(const std::string & name, server_model_status status, int exit_code);
     void update_loaded_info(const std::string & name, std::string & raw_info);
+    void update_stage(const std::string & name, const std::string & stage, float progress);
 
     // wait until the model instance is fully loaded (thread-safe)
     // return when the model no longer in "loading" state
@@ -172,6 +175,19 @@ struct server_models {
 
     // notify the router server that the sleeping state has changed
     static void notify_router_sleeping_state(bool sleeping);
+
+    // notify the router server of the current load stage (progress < 0 => indeterminate phase)
+    static void notify_router_stage(const char * stage, float progress);
+
+    // llama_progress_callback for tensor loading: forwards throttled "load" progress to the router
+    static bool child_load_progress_callback(float progress, void * user_data);
+
+    // common_params load_stage_callback: forwards a coarse phase marker (e.g. "finalize") to the router
+    static void child_load_stage_callback(const char * stage, float progress, void * user_data);
+
+    // register a process-wide download progress callback that forwards "download" progress to the router
+    // (must be called before model download happens, i.e. before common_params_parse)
+    static void register_child_download_progress();
 };
 
 struct server_models_routes {
diff --git a/tools/server/server.cpp b/tools/server/server.cpp
index a6ea749d0c3f..9a8e0b5ebcb8 100644
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -82,6 +82,11 @@ int llama_server(int argc, char ** argv) {
 
     common_init();
 
+    // child download happens during arg parsing, so register the forwarding callback beforehand
+    if (server_models::is_child_server()) {
+        server_models::register_child_download_progress();
+    }
+
     if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_SERVER)) {
         return 1;
     }
@@ -298,6 +303,11 @@ int llama_server(int argc, char ** argv) {
             ctx_server.on_sleeping_changed([&](bool sleeping) {
                 server_models::notify_router_sleeping_state(sleeping);
             });
+            // forward load stages to the router: "load" via the progress callback, warmup/finalize via the stage callback
+            params.load_progress_callback           = server_models::child_load_progress_callback;
+            params.load_progress_callback_user_data = nullptr;
+            params.load_stage_callback              = server_models::child_load_stage_callback;
+            params.load_stage_callback_user_data    = nullptr;
         }
 
         if (!ctx_server.load_model(params)) {
diff --git a/tools/ui/src/lib/components/app/models/ModelsSelectorDropdown.svelte b/tools/ui/src/lib/components/app/models/ModelsSelectorDropdown.svelte
index 40006a4c9359..2ca9592f8a2d 100644
--- a/tools/ui/src/lib/components/app/models/ModelsSelectorDropdown.svelte
+++ b/tools/ui/src/lib/components/app/models/ModelsSelectorDropdown.svelte
@@ -1,5 +1,5 @@
 <script lang="ts">
-	import { ChevronDown, Loader2, Package } from '@lucide/svelte';
+	import { Loader2, Package } from '@lucide/svelte';
 	import * as DropdownMenu from '$lib/components/ui/dropdown-menu';
 	import * as Tooltip from '$lib/components/ui/tooltip';
 	import { KeyboardKey } from '$lib/enums';
@@ -9,7 +9,8 @@
 		DropdownMenuSearchable,
 		ModelId,
 		ModelsSelectorList,
-		ModelsSelectorOption
+		ModelsSelectorOption,
+		ModelsSelectorTriggerIndicator
 	} from '$lib/components/app';
 	import type { ModelItem } from './utils';
 
@@ -123,7 +124,7 @@
 							<DropdownMenu.Trigger
 								{...props}
 								class={[
-									`inline-grid cursor-pointer grid-cols-[1fr_auto_1fr] items-center gap-1.5 rounded-sm bg-background px-1.5 py-1 text-xs shadow-sm transition hover:bg-muted-foreground/20 focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-60 dark:bg-muted-foreground/15 dark:text-secondary-foreground`,
+									`inline-flex cursor-pointer items-center gap-1.5 rounded-sm bg-background px-1.5 py-1 text-xs shadow-sm transition hover:bg-muted-foreground/20 focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-60 dark:bg-muted-foreground/15 dark:text-secondary-foreground`,
 									!ms.isCurrentModelInCache
 										? 'bg-red-400/10 !text-red-400 hover:bg-red-400/20 hover:text-red-400'
 										: forceForegroundText
@@ -141,19 +142,18 @@
 								{#if selectedOption}
 									<ModelId
 										modelId={selectedOption.model}
-										class="min-w-0 overflow-hidden"
+										class="min-w-0 flex-1 overflow-hidden"
 										hideOrgName={false}
 										hideQuantization
 									/>
 								{:else}
-									<span class="min-w-0 font-medium">Select model</span>
+									<span class="min-w-0 flex-1 text-left font-medium">Select model</span>
 								{/if}
 
-								{#if ms.updating || ms.isLoadingModel}
-									<Loader2 class="h-3 w-3.5 shrink-0 animate-spin" />
-								{:else}
-									<ChevronDown class="h-3 w-3.5 shrink-0" />
-								{/if}
+								<ModelsSelectorTriggerIndicator
+									loading={ms.updating || ms.isLoadingModel}
+									modelId={selectedOption?.model}
+								/>
 							</DropdownMenu.Trigger>
 						{/snippet}
 					</Tooltip.Trigger>
diff --git a/tools/ui/src/lib/components/app/models/ModelsSelectorOption.svelte b/tools/ui/src/lib/components/app/models/ModelsSelectorOption.svelte
index d103d4b6711e..325aaf1bbad8 100644
--- a/tools/ui/src/lib/components/app/models/ModelsSelectorOption.svelte
+++ b/tools/ui/src/lib/components/app/models/ModelsSelectorOption.svelte
@@ -13,6 +13,7 @@
 	import type { ModelOption } from '$lib/types/models';
 	import { ServerModelStatus } from '$lib/enums';
 	import { modelsStore, routerModels } from '$lib/stores/models.svelte';
+	import { getModelLoadPhase } from './utils';
 
 	interface Props {
 		option: ModelOption;
@@ -39,17 +40,22 @@
 	}: Props = $props();
 
 	let currentRouterModels = $derived(routerModels());
-	let serverStatus = $derived.by(() => {
-		const model = currentRouterModels.find((m) => m.id === option.model);
-		return (model?.status?.value as ServerModelStatus) ?? null;
-	});
+	let model = $derived(currentRouterModels.find((m) => m.id === option.model) ?? null);
+	// store maps crash (failed=true) -> FAILED in one place
+	let serverStatus = $derived(modelsStore.getModelStatus(option.model));
+	let loadStage = $derived(model?.status?.stage ?? null);
+	let loadProgress = $derived(model?.status?.progress ?? null);
+	let loadPhase = $derived(getModelLoadPhase(loadStage));
 	let isOperationInProgress = $derived(modelsStore.isModelOperationInProgress(option.model));
 	let isFailed = $derived(serverStatus === ServerModelStatus.FAILED);
 	let isSleeping = $derived(serverStatus === ServerModelStatus.SLEEPING);
 	let isLoaded = $derived(
 		(serverStatus === ServerModelStatus.LOADED || isSleeping) && !isOperationInProgress
 	);
-	let isLoading = $derived(serverStatus === ServerModelStatus.LOADING || isOperationInProgress);
+	// don't keep spinning on a crashed instance while the operation winds down
+	let isLoading = $derived(
+		!isFailed && (serverStatus === ServerModelStatus.LOADING || isOperationInProgress)
+	);
 </script>
 
 <div
@@ -113,7 +119,32 @@
 		</div>
 
 		{#if isLoading}
-			<Loader2 class="h-4 w-4 animate-spin text-muted-foreground" />
+			{#if loadPhase}
+				{@const Icon = loadPhase.icon}
+				<span
+					class="flex items-center gap-1 text-muted-foreground"
+					title={loadPhase.numeric && loadProgress != null
+						? `${loadPhase.label} ${Math.round(loadProgress * 100)}%`
+						: loadPhase.label}
+				>
+					<!-- a numeric phase with no % yet (e.g. download before sizes are known) is indeterminate: pulse so it doesn't look frozen -->
+					<Icon
+						class={[
+							'h-3.5 w-3.5',
+							loadPhase.anim,
+							!loadPhase.anim && loadProgress == null && 'animate-pulse'
+						]}
+					/>
+					{#if loadPhase.numeric && loadProgress != null}
+						<!-- reserve room for "100%" so the w-max menu doesn't resize as the % grows -->
+						<span class="w-9 shrink-0 text-center font-mono text-xs tabular-nums">
+							{Math.round(loadProgress * 100)}%
+						</span>
+					{/if}
+				</span>
+			{:else}
+				<Loader2 class="h-4 w-4 animate-spin text-muted-foreground" />
+			{/if}
 		{:else if isFailed}
 			<div class="flex w-4 items-center justify-center">
 				<CircleAlert class="h-3.5 w-3.5 text-red-500 group-hover:hidden" />
diff --git a/tools/ui/src/lib/components/app/models/ModelsSelectorSheet.svelte b/tools/ui/src/lib/components/app/models/ModelsSelectorSheet.svelte
index 2ddbf24055b8..599befec4257 100644
--- a/tools/ui/src/lib/components/app/models/ModelsSelectorSheet.svelte
+++ b/tools/ui/src/lib/components/app/models/ModelsSelectorSheet.svelte
@@ -1,11 +1,12 @@
 <script lang="ts">
-	import { ChevronDown, Loader2, Package } from '@lucide/svelte';
+	import { Loader2, Package } from '@lucide/svelte';
 	import * as Sheet from '$lib/components/ui/sheet';
 	import { useModelsSelector } from '$lib/hooks/use-models-selector.svelte';
 	import {
 		DialogModelInformation,
 		ModelId,
 		ModelsSelectorList,
+		ModelsSelectorTriggerIndicator,
 		SearchInput
 	} from '$lib/components/app';
 
@@ -83,10 +84,10 @@
 				<Package class="h-3.5 w-3.5 shrink-0" />
 
 				{#if !selectedOption}
-					<span class="min-w-0 font-medium">Select model</span>
+					<span class="min-w-0 flex-1 text-left font-medium">Select model</span>
 				{:else}
 					<ModelId
-						class="text-xs"
+						class="min-w-0 flex-1 text-xs"
 						modelId={selectedOption?.model || ''}
 						hideQuantization
 						hideTags
@@ -94,11 +95,10 @@
 					/>
 				{/if}
 
-				{#if ms.updating || ms.isLoadingModel}
-					<Loader2 class="h-3 w-3.5 shrink-0 animate-spin" />
-				{:else}
-					<ChevronDown class="h-3 w-3.5 shrink-0" />
-				{/if}
+				<ModelsSelectorTriggerIndicator
+					loading={ms.updating || ms.isLoadingModel}
+					modelId={selectedOption?.model}
+				/>
 			</button>
 
 			<Sheet.Root bind:open={sheetOpen} onOpenChange={handleSheetOpenChange}>
diff --git a/tools/ui/src/lib/components/app/models/ModelsSelectorTriggerIndicator.svelte b/tools/ui/src/lib/components/app/models/ModelsSelectorTriggerIndicator.svelte
new file mode 100644
index 000000000000..1af7274c6b13
--- /dev/null
+++ b/tools/ui/src/lib/components/app/models/ModelsSelectorTriggerIndicator.svelte
@@ -0,0 +1,71 @@
+<script lang="ts">
+	import { ChevronDown, CircleAlert, Loader2 } from '@lucide/svelte';
+	import { ServerModelStatus } from '$lib/enums';
+	import { modelsStore, routerModels } from '$lib/stores/models.svelte';
+	import { getModelLoadPhase } from './utils';
+
+	interface Props {
+		// selection-transition hint from the parent (e.g. ms.updating). The indicator also
+		// derives loading from the model's own status below, so it survives re-selection,
+		// concurrent loads, and out-of-band loads that never set the parent's flag.
+		loading: boolean;
+		modelId?: string | null;
+	}
+
+	let { loading, modelId = null }: Props = $props();
+
+	let model = $derived(modelId ? (routerModels().find((m) => m.id === modelId) ?? null) : null);
+	// router reports a crash as value="unloaded" + failed=true (see store.getModelStatus)
+	let isFailed = $derived(model?.status?.failed === true);
+	// Derive loading from the model's own status, not just the parent's transient flag, which
+	// resets when loadModel() short-circuits on a load that is already in progress.
+	let isModelLoading = $derived(
+		!!modelId &&
+			!isFailed &&
+			(model?.status?.value === ServerModelStatus.LOADING ||
+				modelsStore.isModelOperationInProgress(modelId))
+	);
+	let showLoading = $derived(loading || isModelLoading);
+
+	let load = $derived(
+		showLoading && model
+			? { stage: model.status?.stage ?? null, progress: model.status?.progress ?? null }
+			: null
+	);
+	let loadPhase = $derived(getModelLoadPhase(load?.stage));
+</script>
+
+{#if showLoading}
+	{#if loadPhase}
+		{@const Icon = loadPhase.icon}
+		<span
+			class="flex shrink-0 items-center gap-1"
+			title={loadPhase.numeric && load?.progress != null
+				? `${loadPhase.label} ${Math.round(load.progress * 100)}%`
+				: loadPhase.label}
+		>
+			<!-- a numeric phase with no % yet (e.g. download before sizes are known) is indeterminate: pulse so it doesn't look frozen -->
+			<Icon
+				class={[
+					'h-3 w-3.5 shrink-0',
+					loadPhase.anim,
+					!loadPhase.anim && load?.progress == null && 'animate-pulse'
+				]}
+			/>
+			{#if loadPhase.numeric && load?.progress != null}
+				<!-- reserve room for "100%": the trigger is right-anchored, so a growing % would shift the name -->
+				<span class="w-9 shrink-0 text-center font-mono text-xs tabular-nums">
+					{Math.round(load.progress * 100)}%
+				</span>
+			{/if}
+		</span>
+	{:else}
+		<Loader2 class="h-3 w-3.5 shrink-0 animate-spin" />
+	{/if}
+{:else if isFailed}
+	<span class="flex shrink-0 items-center" title="Failed to load">
+		<CircleAlert class="h-3 w-3.5 shrink-0 text-red-500" />
+	</span>
+{:else}
+	<ChevronDown class="h-3 w-3.5 shrink-0" />
+{/if}
diff --git a/tools/ui/src/lib/components/app/models/index.ts b/tools/ui/src/lib/components/app/models/index.ts
index 3ac6ecb678b2..d00b9318f057 100644
--- a/tools/ui/src/lib/components/app/models/index.ts
+++ b/tools/ui/src/lib/components/app/models/index.ts
@@ -65,6 +65,14 @@ export { default as ModelsSelectorList } from './ModelsSelectorList.svelte';
  */
 export { default as ModelsSelectorOption } from './ModelsSelectorOption.svelte';
 
+/**
+ * **ModelsSelectorTriggerIndicator** - Trailing trigger indicator
+ *
+ * Shared by the desktop dropdown and mobile sheet triggers: renders the load
+ * phase (icon + percent), a spinner, or the chevron for a given model.
+ */
+export { default as ModelsSelectorTriggerIndicator } from './ModelsSelectorTriggerIndicator.svelte';
+
 /**
  * **ModelsSelectorSheet** - Mobile model selection sheet
  *
diff --git a/tools/ui/src/lib/components/app/models/utils.ts b/tools/ui/src/lib/components/app/models/utils.ts
index ae1f511e9f66..eee5d92e7e5a 100644
--- a/tools/ui/src/lib/components/app/models/utils.ts
+++ b/tools/ui/src/lib/components/app/models/utils.ts
@@ -1,3 +1,4 @@
+import { ArrowDownToLine, Flame, Layers, Settings } from '@lucide/svelte';
 import { SvelteMap } from 'svelte/reactivity';
 import type { ModelOption } from '$lib/types/models';
 
@@ -6,6 +7,30 @@ export interface ModelItem {
 	flatIndex: number;
 }
 
+export interface ModelLoadPhase {
+	icon: typeof ArrowDownToLine;
+	label: string;
+	numeric: boolean; // true => phase reports 0..1 progress shown as a %
+	anim: string; // animation class for indeterminate phases
+}
+
+// Map a router load stage (COMMON_LOAD_STAGE_* in common/common.h) to its presentation;
+// shared by the dropdown trigger and the option rows so both render the same icon/label/%.
+export function getModelLoadPhase(stage: string | null | undefined): ModelLoadPhase | null {
+	switch (stage) {
+		case 'download':
+			return { icon: ArrowDownToLine, label: 'Downloading', numeric: true, anim: '' };
+		case 'load':
+			return { icon: Layers, label: 'Loading weights', numeric: true, anim: '' };
+		case 'warmup':
+			return { icon: Flame, label: 'Warming up', numeric: false, anim: 'animate-pulse' };
+		case 'finalize':
+			return { icon: Settings, label: 'Finalizing', numeric: false, anim: 'animate-spin' };
+		default:
+			return null;
+	}
+}
+
 export interface OrgGroup {
 	orgName: string | null;
 	items: ModelItem[];
diff --git a/tools/ui/src/lib/hooks/use-models-selector.svelte.ts b/tools/ui/src/lib/hooks/use-models-selector.svelte.ts
index 098cb2c27aaf..56ab33ce4cf4 100644
--- a/tools/ui/src/lib/hooks/use-models-selector.svelte.ts
+++ b/tools/ui/src/lib/hooks/use-models-selector.svelte.ts
@@ -110,6 +110,8 @@ export function useModelsSelector(opts: UseModelsSelectorOptions): UseModelsSele
 			if (open) {
 				modelsStore.fetchRouterModels().then(() => {
 					modelsStore.fetchModalitiesForLoadedModels();
+					// pick up loads started elsewhere so their progress keeps updating here
+					modelsStore.observeOngoingLoads();
 				});
 			}
 
diff --git a/tools/ui/src/lib/stores/models.svelte.ts b/tools/ui/src/lib/stores/models.svelte.ts
index 1990ba6049de..8504023bd615 100644
--- a/tools/ui/src/lib/stores/models.svelte.ts
+++ b/tools/ui/src/lib/stores/models.svelte.ts
@@ -207,7 +207,10 @@ class ModelsStore {
 	getModelStatus(modelId: string): ServerModelStatus | null {
 		const model = this.routerModels.find((m) => m.id === modelId);
 
-		return model?.status.value ?? null;
+		if (!model) return null;
+		// the router reports a crash as value="unloaded" + failed=true; surface it as FAILED
+		if (model.status.failed === true) return ServerModelStatus.FAILED;
+		return model.status.value ?? null;
 	}
 
 	getModelUsage(modelId: string): SvelteSet<string> {
@@ -384,8 +387,8 @@ class ModelsStore {
 	 * No-op in router mode — fetch() already calls listRouter() internally.
 	 * Kept for API compatibility (e.g. handleOpenChange dropdown open handler).
 	 */
-	async fetchRouterModels(): Promise<void> {
-		if (!isRouterMode()) return;
+	async fetchRouterModels(): Promise<boolean> {
+		if (!isRouterMode()) return true;
 
 		try {
 			const response = await ModelsService.listRouter();
@@ -396,9 +399,11 @@ class ModelsStore {
 			if (visible.length === 1 && this.isModelLoaded(visible[0].model)) {
 				this.selectModelById(visible[0].id);
 			}
+			return true;
 		} catch (error) {
 			console.warn('Failed to fetch router models:', error);
 			this.routerModels = [];
+			return false;
 		}
 	}
 
@@ -637,6 +642,8 @@ class ModelsStore {
 	 */
 
 	private static readonly STATUS_POLL_INTERVAL = 500;
+	// give up after this many consecutive failed fetches (e.g. router died mid-load) instead of spinning forever
+	private static readonly MAX_POLL_FAILURES = 6;
 
 	/**
 	 * Poll for expected model status after load/unload operation.
@@ -647,8 +654,18 @@ class ModelsStore {
 		expectedStatus: ServerModelStatus
 	): Promise<void> {
 		let attempt = 0;
+		let failures = 0;
 		while (true) {
-			await this.fetchRouterModels();
+			const ok = await this.fetchRouterModels();
+			if (!ok) {
+				// tolerate transient blips, but bail if the server stays unreachable
+				if (++failures >= ModelsStore.MAX_POLL_FAILURES) {
+					throw new Error('Lost connection to the server while loading the model');
+				}
+				await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL));
+				continue;
+			}
+			failures = 0;
 
 			const currentStatus = this.getModelStatus(modelId);
 			if (currentStatus === expectedStatus) return;
@@ -693,6 +710,37 @@ class ModelsStore {
 		}
 	}
 
+	/**
+	 * Follow a load started elsewhere (another client/tab) so this UI keeps updating its
+	 * progress. Same path as loadModel() without the load() call — the modelLoadingStates
+	 * guard keeps it from stacking on a load we already started or observe.
+	 */
+	observeModelLoad(modelId: string): void {
+		if (this.isModelLoaded(modelId)) return;
+		if (this.modelLoadingStates.get(modelId)) return;
+
+		this.modelLoadingStates.set(modelId, true);
+		this.pollForModelStatus(modelId, ServerModelStatus.LOADED)
+			.then(() => this.updateModelModalities(modelId))
+			// the load aborting/failing on the server is the normal end of observing, not our error
+			.catch((error) => console.warn(`Stopped observing load for ${modelId}:`, error))
+			.finally(() => this.modelLoadingStates.set(modelId, false));
+	}
+
+	/**
+	 * Follow every load the router currently reports, so out-of-band loads keep ticking
+	 * here. Safe to call repeatedly — observeModelLoad() no-ops on loads already followed.
+	 */
+	observeOngoingLoads(): void {
+		if (!isRouterMode()) return;
+
+		for (const model of this.routerModels) {
+			if (model.status.value === ServerModelStatus.LOADING && !model.status.failed) {
+				this.observeModelLoad(model.id);
+			}
+		}
+	}
+
 	async unloadModel(modelId: string): Promise<void> {
 		if (!this.isModelLoaded(modelId)) return;
 		if (this.modelLoadingStates.get(modelId)) return;
diff --git a/tools/ui/src/lib/types/api.d.ts b/tools/ui/src/lib/types/api.d.ts
index f620d67351ba..6e63ffd05e34 100644
--- a/tools/ui/src/lib/types/api.d.ts
+++ b/tools/ui/src/lib/types/api.d.ts
@@ -65,6 +65,14 @@ export interface ApiModelStatus {
 	value: ServerModelStatus;
 	/** Command line arguments used when loading (only for loaded models) */
 	args?: string[];
+	/** Current load phase while loading: "download" | "load" | "warmup" | "finalize" */
+	stage?: string;
+	/** Progress 0..1 within the current phase; absent for indeterminate phases */
+	progress?: number;
+	/** True when the model instance crashed/exited with a non-zero code (value stays "unloaded") */
+	failed?: boolean;
+	/** Exit code of the crashed instance (present when failed is true) */
+	exit_code?: number;
 }
 
 /**