Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,3 @@ bench/**/*.json
tmp/models
/build/exo
/.claude/skills
/.claude
24 changes: 13 additions & 11 deletions .mlx_typings/mlx_lm/models/cache.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -148,18 +148,21 @@ class QuantizedKVCache(_BaseCache):
...

class KVCache(_BaseCache):
step = ...
step: int
keys: mx.array | None
values: mx.array | None
_idx: int
def __init__(self) -> None: ...
def update_and_fetch(self, keys, values): # -> tuple[array | Any, array | Any]:
...
def update_and_fetch(
self, keys: mx.array, values: mx.array
) -> tuple[mx.array, mx.array]: ...
@property
def state(
self,
) -> tuple[mx.array | None, mx.array | None]: ...
@state.setter
def state(self, v) -> None: ...
def is_trimmable(self): # -> Literal[True]:
...
def state(self, v: tuple[mx.array | None, mx.array | None]) -> None: ...
def is_trimmable(self) -> bool: ...
def trim(self, n: int) -> int: ...
def to_quantized(
self, group_size: int = ..., bits: int = ...
Expand All @@ -169,20 +172,19 @@ class KVCache(_BaseCache):
) -> mx.array | Literal["causal"] | None: ...

class RotatingKVCache(_BaseCache):
step = ...
step: int
keys: mx.array | None
values: mx.array | None
keep: int
max_size: int
_idx: int
def __init__(self, max_size, keep=...) -> None: ...
def __init__(self, max_size: int, keep: int = ...) -> None: ...
def _trim(
self, trim_size: int, v: mx.array, append: mx.array | None = ...
) -> mx.array: ...
def update_and_fetch(
self, keys, values
): # -> tuple[array | Any, array | Any] | tuple[array | Any, array | Any | None]:
...
self, keys: mx.array, values: mx.array
) -> tuple[mx.array, mx.array]: ...
@property
def state(
self,
Expand Down
62 changes: 31 additions & 31 deletions .mlx_typings/mlx_lm/models/gemma4_text.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -10,37 +10,37 @@ from .switch_layers import SwitchGLU

@dataclass
class ModelArgs(BaseModelArgs):
model_type: str
hidden_size: int
num_hidden_layers: int
intermediate_size: int
num_attention_heads: int
head_dim: int
global_head_dim: int
global_partial_rotary_factor: float
rms_norm_eps: float
vocab_size: int
vocab_size_per_layer_input: int
num_key_value_heads: int
num_global_key_value_heads: Optional[int]
num_kv_shared_layers: int
pad_token_id: int
hidden_size_per_layer_input: int
rope_traditional: bool
partial_rotary_factor: float
rope_parameters: Optional[Dict[str, Any]]
sliding_window: int
sliding_window_pattern: int
max_position_embeddings: int
attention_k_eq_v: bool
final_logit_softcapping: float
use_double_wide_mlp: bool
enable_moe_block: bool
num_experts: Optional[int]
top_k_experts: Optional[int]
moe_intermediate_size: Optional[int]
layer_types: Optional[List[str]]
tie_word_embeddings: bool
model_type: str = ...
hidden_size: int = ...
num_hidden_layers: int = ...
intermediate_size: int = ...
num_attention_heads: int = ...
head_dim: int = ...
global_head_dim: int = ...
global_partial_rotary_factor: float = ...
rms_norm_eps: float = ...
vocab_size: int = ...
vocab_size_per_layer_input: int = ...
num_key_value_heads: int = ...
num_global_key_value_heads: Optional[int] = ...
num_kv_shared_layers: int = ...
pad_token_id: int = ...
hidden_size_per_layer_input: int = ...
rope_traditional: bool = ...
partial_rotary_factor: float = ...
rope_parameters: Optional[Dict[str, Any]] = ...
sliding_window: int = ...
sliding_window_pattern: int = ...
max_position_embeddings: int = ...
attention_k_eq_v: bool = ...
final_logit_softcapping: float = ...
use_double_wide_mlp: bool = ...
enable_moe_block: bool = ...
num_experts: Optional[int] = ...
top_k_experts: Optional[int] = ...
moe_intermediate_size: Optional[int] = ...
layer_types: Optional[List[str]] = ...
tie_word_embeddings: bool = ...

def __post_init__(self) -> None: ...

Expand Down
42 changes: 3 additions & 39 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions bench/eval_tool_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
from typing import Any, Literal

import httpx
from exo_tools.client import ExoClient, ExoHttpError
from exo_tools.harness import (
from harness import (
ExoClient,
ExoHttpError,
add_common_instance_args,
capture_cluster_snapshot,
instance_id_from_instance,
Expand Down
5 changes: 3 additions & 2 deletions bench/exo_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@
from statistics import mean
from typing import Any

from exo_tools.client import ExoClient, ExoHttpError
from exo_tools.harness import (
from harness import (
ExoClient,
ExoHttpError,
add_common_instance_args,
capture_cluster_snapshot,
find_existing_instance,
Expand Down
5 changes: 3 additions & 2 deletions bench/exo_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@
from typing import Any

import httpx
from exo_tools.client import ExoClient, ExoHttpError
from exo_tools.harness import (
from harness import (
ExoClient,
ExoHttpError,
add_common_instance_args,
capture_cluster_snapshot,
find_existing_instance,
Expand Down
Loading