ravi9 · wine99 · May 27, 2026 · May 27, 2026 · May 21, 2026 · May 26, 2026
@@ -98,16 +98,31 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph, std::map<std::string, std::sh
     }
 }
 
+namespace {
+bool is_inplace_op(const ggml_tensor * node) {
+    return node->op == GGML_OP_SET_ROWS || node->op == GGML_OP_CPY || (node->op == GGML_OP_SCALE && node->view_src);
+}
+
+bool is_same_shape(const ggml_tensor * a, const ggml_tensor * b) {
+    for (int i = 0; i < GGML_MAX_DIMS; i++) {
+        if (a->ne[i] != b->ne[i]) {
+            return false;
+        }
+    }
+    return true;
+}
+}  // namespace
+
 void GgmlOvDecoder::set_input_output() {
     for (int node_n = 0; node_n < m_cgraph->n_nodes; node_n++) {
-        auto node = m_cgraph->nodes[node_n];
+        auto * node = m_cgraph->nodes[node_n];
 
         NodeInfo current_node_info;
         auto node_name = std::string(node->name);
         auto node_output_name = node_name;
         auto * node_output = node;
-        if (node->op == GGML_OP_SET_ROWS) {
-            // SET_ROWS updates the tensor in place. For later ov op that uses the
+        if (::is_inplace_op(node)) {
+            // In-place ops update the tensor in place. For later ov op that uses the
             // the view_src of SET_ROWS, we need to make sure they get the updated tensor
             // by putting the view_src name in the tensor_map in
             // <openvino>/src/frontends/ggml/src/translate_session.cpp
@@ -167,6 +182,10 @@ int GgmlOvDecoder::compute_op_case(const ggml_tensor * node) const {
     switch (node->op) {
     case GGML_OP_RESHAPE: {
         auto * src = node->src[0];
+        if (is_same_shape(src, node)) {
+            op_case = 7;
+            break;
+        }
         if (src->op == GGML_OP_RESHAPE && src->src[0]->ne[0] == node->ne[0] && src->src[0]->ne[1] == node->ne[1]) {
             op_case = 4;
         } else if (node->ne[0] * node->ne[1] == src->ne[0]) {
@@ -295,6 +314,33 @@ int GgmlOvDecoder::compute_op_case(const ggml_tensor * node) const {
         }
         break;
     }
+    case GGML_OP_RMS_NORM: {
+        if (node->src[0]->op == GGML_OP_VIEW) {
+            if (is_same_shape(node->src[0]->src[0], node->src[0])) {
+                op_case = 1;
+            } else if (node->src[0]->src[0]->op == GGML_OP_GATED_DELTA_NET) {
+                op_case = 2;
+            }
+        }
+        break;
+    }
+    case GGML_OP_CPY: {
+        if (node->src[0]->op == GGML_OP_VIEW) {
+            if (node->src[0]->src[0]->op == GGML_OP_GATED_DELTA_NET) {
+                op_case = 1;
+            } else if (std::string(node->src[0]->name).find("conv_state_last") == 0) {
+                op_case = 2;
+                break;
+            }
+        }
+        break;
+    }
+    case GGML_OP_SCALE: {
+        if (is_kvcache(node->view_src, nullptr)) {
+            op_case = 1;
+        }
+        break;
+    }
     default:
         break;
     }
@@ -476,6 +522,13 @@ std::pair<ModelParams, ComputeParams> GgmlOvDecoder::compute_llm_params(ggml_cgr
                 model_params.mixed_rope_params = true;
             }
         }
+        if (node->op == GGML_OP_GATED_DELTA_NET) {
+            model_params.state_size = node->src[0]->ne[0];
+        }
+        if (node->op == GGML_OP_SCALE && is_kvcache(node->view_src, nullptr)) {
+            compute_params.cache_rs_reset_len = ggml_nelements(node) / node->view_src->ne[0];
+            compute_params.cache_rs_reset_idx = node->src[0]->view_offs / node->view_src->ne[0];
+        }
     }
     auto * output_tensor = cgraph->nodes[cgraph->n_nodes - 1];
     compute_params.output_len = output_tensor->ne[1];
@@ -595,6 +648,11 @@ void GgmlOvDecoder::add_extra_inputs() {
         create_1d_input("token_len_per_seq", m_compute_params.token_len_per_seq);
     }
     // create_1d_input("token_len", m_compute_params.token_len_per_seq * m_compute_params.n_seq_active);
+
+    if (m_compute_params.cache_rs_reset_idx != -1) {
+        create_1d_input("cache_rs_reset_idx", m_compute_params.cache_rs_reset_idx);
+        create_1d_input("cache_rs_reset_len", m_compute_params.cache_rs_reset_len);
+    }
 }
 
 bool GgmlOvDecoder::node_is_used_as_src(const int node_idx) {
@@ -691,8 +749,8 @@ void GgmlOvDecoder::compute_model_outputs() {
         }
         auto cur_node_use_count = m_cgraph->use_counts[ggml_hash_find(&m_cgraph->visited_hash_set, cur_node)];
         if (cur_node_use_count == 0) {
-            // The output of SET_ROWS is the view_src tensor, which is updated in place. We should use the view_src name as the output name to make sure it can be correctly matched with the later ops that use the view_src.
-            if (cur_node != nullptr && cur_node->op == GGML_OP_SET_ROWS) {
+            // The output of in-place ops is the view_src tensor, which is updated in place. We should use the view_src name as the output name to make sure it can be correctly matched with the later ops that use the view_src.
+            if (cur_node != nullptr && ::is_inplace_op(cur_node)) {
                 cur_node = cur_node->view_src;
             }
         } else {
@@ -712,7 +770,7 @@ void GgmlOvDecoder::compute_model_outputs() {
         if (cur_node != nullptr) {
             std::string node_output_name(cur_node->name);
             m_model_outputs[node_output_name] = cur_node;
-            m_model_output_names.push_back(node_output_name);
+            m_model_output_names.insert(node_output_name);
         }
     }
 }
@@ -1231,6 +1289,26 @@ std::vector<std::string> GgmlOvDecoder::get_output_names(int node_idx) const {
     return {m_node_info_list[node_idx].node_output_name};
 }
 
+bool GgmlOvDecoder::is_inplace_op(int node_idx) const {
+    return ::is_inplace_op(m_node_info_list[node_idx].node);
+}
+
+std::string GgmlOvDecoder::get_view_src_name(int node_idx) const {
+    auto * node = m_node_info_list[node_idx].node;
+    if (node->view_src == nullptr) {
+        return "";
+    }
+    return node->view_src->name;
+}
+
+bool GgmlOvDecoder::is_view_like_alias_of(int node_idx, const std::string & view_src_name) const {
+    auto * node = m_node_info_list[node_idx].node;
+    if (node->view_src == nullptr || std::string(node->view_src->name) != view_src_name) {
+        return false;
+    }
+    return node->op == GGML_OP_RESHAPE || node->op == GGML_OP_VIEW;
+}
+
 const std::string & GgmlOvDecoder::get_op_name() const {
     static const std::string unknown_name = "UNKNOWN_OP_NAME";
     return unknown_name;
@@ -1404,14 +1482,18 @@ void GgmlOvDecoder::compute_node_dynamic_dims() {
                 }
                 if (m_node_dynamic_dims[node] != -1 && dynamic_dim_value != node->ne[m_node_dynamic_dims[node]]) {
                     m_node_dynamic_dims[node] = -1;
-                    // std::cout << "Warning: Dynamic dim value mismatch for node: " << node->name
-                    //           << " and its src[0]: " << node->src[0]->name << std::endl;
+                    GGML_LOG_WARN("ggml-openvino: dynamic dim value mismatch for VIEW node '%s', src[0]: '%s'\n",
+                                  node->name, node->src[0]->name);
                 }
             }
             break;
         }
         case GGML_OP_TRANSPOSE:
         case GGML_OP_RESHAPE: {
+            if (is_same_shape(node->src[0], node)) {
+                m_node_dynamic_dims[node] = m_node_dynamic_dims[node->src[0]];
+                break;
+            }
             // RESHAPE requires src[0] to be contiguous, so both src and result
             // have standard compact strides: nb[i] = type_size * prod(ne[0..i-1]).
             // Match src->nb[dynamic_dim] against result->nb[i] to find the output
@@ -1429,7 +1511,7 @@ void GgmlOvDecoder::compute_node_dynamic_dims() {
                     }
                 }
                 if (m_node_dynamic_dims[node] == -1) {
-                    // std::cout << "Cannot determine dynamic dim for RESHAPE node: " << node->name << std::endl;
+                    GGML_LOG_WARN("ggml-openvino: cannot determine dynamic dim for RESHAPE node '%s'\n", node->name);
                 }
             }
             break;
@@ -1480,25 +1562,46 @@ void GgmlOvDecoder::compute_node_dynamic_dims() {
                     }
                     if (matched_dim_count != 1) {
                         m_node_dynamic_dims[node] = -1;
-                        // std::cout << "Warning: Cannot determine dynamic dim for CONT node: " << node->name
-                        //           << " and its src[0]: " << node->src[0]->name << std::endl;
+                        GGML_LOG_WARN("ggml-openvino: cannot determine dynamic dim for CONT node '%s', src[0]: '%s'\n",
+                                      node->name, node->src[0]->name);
                     }
                 }
             }
             break;
+        case GGML_OP_CONCAT:
+            for (int i = 0; i < GGML_MAX_DIMS; i++) {
+                if (node->src[0]->ne[i] != node->ne[i]) {
+                    m_node_dynamic_dims[node] = i;
+                    break;
+                }
+            }
+            break;
+        case GGML_OP_SSM_CONV:
+        case GGML_OP_GATED_DELTA_NET:
+            m_node_dynamic_dims[node] = 1;
+            break;
         case GGML_OP_RMS_NORM:
+        case GGML_OP_L2_NORM:
         case GGML_OP_NORM:
         case GGML_OP_ADD:
+        case GGML_OP_SUB:
         case GGML_OP_GLU:
         case GGML_OP_ROPE:
         case GGML_OP_SCALE:
         case GGML_OP_SOFT_MAX:
         case GGML_OP_ARGSORT:
         case GGML_OP_ADD_ID:
         case GGML_OP_UNARY:
+        case GGML_OP_CUMSUM:
+        case GGML_OP_FILL:
+        case GGML_OP_SET:
+        case GGML_OP_DIAG:
+        case GGML_OP_TRI:
+        case GGML_OP_REPEAT:
             m_node_dynamic_dims[node] = m_node_dynamic_dims[node->src[0]];
             break;
         case GGML_OP_MUL_MAT_ID:
+        case GGML_OP_SOLVE_TRI:
             m_node_dynamic_dims[node] = m_node_dynamic_dims[node->src[1]];
             break;
         case GGML_OP_CPY:
@@ -1534,7 +1637,8 @@ void GgmlOvDecoder::compute_node_dynamic_dims() {
             break;
         }
         default:
-            // std::cout << "Doesn't handle node name: " << node->name << " op: " << ggml_op_name(node->op) << std::endl;
+            GGML_LOG_DEBUG("ggml-openvino: compute_node_dynamic_dims: unhandled op %s for node '%s'\n",
+                           ggml_op_name(node->op), node->name);
             break;
         }
     };

@@ -20,6 +20,7 @@ struct ModelParams {
     int n_seq = 1;
     int n_heads_kv = -1;
     int head_size = -1;
+    int state_size = -1;  // for SSM molels, eg qwen35
     int32_t rope_params[15];
     bool mixed_rope_params = false;
     std::vector<int> swa_layers;
@@ -48,6 +49,16 @@ struct ComputeParams {
     int token_len_per_seq = -1;
     int past_kv_len = -1;
     int output_len = 1;
+
+    int cache_rs_reset_idx = -1;
+    int cache_rs_reset_len = -1;
+    // SSM/DeltaNet models otionally clear cache_r and cache_s of certain slots in the cgraph
+    // 3: [ 18432,     4,     1,     1] RESHAPE              cache_r_l0 (reshaped)
+    //    [ 18432,     4,     1,     1]            0: NONE        cache_r_l0
+    // 4: [ 18432,     1,     1,     1] VIEW                 cache_r_l0 (reshaped) (view)
+    //    [ 18432,     4,     1,     1]            0: RESHAPE     cache_r_l0 (reshaped)
+    // 5: [ 18432,     1,     1,     1] SCALE                cache_r_l0 (reshaped) (view) (view)
+    //    [ 18432,     1,     1,     1]            0: VIEW        cache_r_l0 (reshaped) (view)
 };
 
 class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
@@ -156,6 +167,12 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
 
     virtual std::vector<std::string> get_output_names(int node_idx) const override;
 
+    virtual bool is_inplace_op(int node_idx) const override;
+
+    virtual std::string get_view_src_name(int node_idx) const override;
+
+    virtual bool is_view_like_alias_of(int node_idx, const std::string & view_src_name) const override;
+
     virtual const std::string & get_op_type() const override;
 
     virtual const std::string & get_op_type(int node_idx) const override;
@@ -189,7 +206,7 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
         return m_model_weights;
     }
 
-    virtual std::vector<std::string> get_model_output_names() const override { return m_model_output_names; }
+    virtual std::set<std::string> get_model_output_names() const override { return m_model_output_names; }
 
     const std::map<std::string, ggml_tensor *> & get_model_outputs() const { return m_model_outputs; }
 
@@ -214,6 +231,8 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
 
     virtual bool has_mixed_rope_params() const override { return m_model_params.mixed_rope_params; }
 
+    virtual int get_ssm_state_size() const override { return m_model_params.state_size; }
+
     virtual std::map<std::string, std::string> get_kv_param_res_names() const override;
 
     virtual bool is_static() const override { return m_is_static; }
@@ -287,6 +306,7 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
         return op->op == GGML_OP_ROPE && tensor == op->src[2];
     }
 
+    // also returns true for cache_s and cache_r in SSM/DeltaNet models
     inline static bool is_kvcache(const ggml_tensor * tensor, const ggml_tensor * op) {
         return tensor->buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY ||
                (op != nullptr && op->op == GGML_OP_SET_ROWS && op->src[2] == tensor);
@@ -334,7 +354,7 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
     std::map<std::string, std::shared_ptr<ov::Tensor>> m_model_extra_input_values;
     std::map<std::string, std::shared_ptr<ov::Node>> m_model_weights;
     std::map<std::string, ggml_tensor *> m_model_outputs;
-    std::vector<std::string> m_model_output_names;
+    std::set<std::string> m_model_output_names;
     std::vector<NodeInfo> m_node_info_list;
     std::map<ggml_tensor *, int> m_node_dynamic_dims;