diff --git a/pkg/sql/colexec/external/hive_partition.go b/pkg/sql/colexec/external/hive_partition.go
new file mode 100644
index 0000000000000..74d4109f31577
--- /dev/null
+++ b/pkg/sql/colexec/external/hive_partition.go
@@ -0,0 +1,829 @@
+// Copyright 2024 Matrix Origin
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package external
+
+import (
+	"context"
+	"fmt"
+	"iter"
+	"path"
+	"strconv"
+	"strings"
+
+	"github.com/matrixorigin/matrixone/pkg/catalog"
+	"github.com/matrixorigin/matrixone/pkg/common/moerr"
+	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"github.com/matrixorigin/matrixone/pkg/container/vector"
+	"github.com/matrixorigin/matrixone/pkg/fileservice"
+	"github.com/matrixorigin/matrixone/pkg/logutil"
+	"github.com/matrixorigin/matrixone/pkg/pb/plan"
+	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
+	plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan"
+	"github.com/matrixorigin/matrixone/pkg/sql/plan/function"
+)
+
+const (
+	HiveDefaultPartition = "__HIVE_DEFAULT_PARTITION__"
+
+	maxPartitionCount = 50000
+	// warnPartitionCount must be < maxListCalls-1 to be reachable.
+	// For single-level: N partitions = N+1 list calls.
+	// Requirements specify warn at 10000, but that's unreachable with maxListCalls=10000.
+	// Use 5000 as practical threshold for P0.
+	warnPartitionCount = 5000
+	maxListCalls       = 10000
+)
+
+// PartitionFileEntry represents a discovered file within a Hive partition structure.
+type PartitionFileEntry struct {
+	FilePath string
+	FileSize int64
+}
+
+// PartitionPredicate represents a filter hint for partition pruning.
+type PartitionPredicate struct {
+	ColName string
+	Op      PartitionOp
+	Values  []string
+}
+
+type PartitionOp int
+
+const (
+	PartOpEq PartitionOp = iota
+	PartOpIn
+)
+
+// PartitionDiscoveryResult holds the outcome of Hive partition discovery.
+type PartitionDiscoveryResult struct {
+	Files          []PartitionFileEntry
+	PartitionCount int
+	PrunedCount    int
+	ListCalls      int
+	warnEmitted    bool
+}
+
+// HivePartSegment is the parsed result of a single Hive partition directory segment.
+type HivePartSegment struct {
+	Key   string
+	Value string
+}
+
+// ListDirFunc abstracts directory listing for testability and S3/local duality.
+type ListDirFunc func(ctx context.Context, prefix string) iter.Seq2[*fileservice.DirEntry, error]
+
+// NewListDirFunc creates a ListDirFunc backed by GetForETLWithType.
+// TODO: For S3 this re-creates an S3FS instance per List call; pre-build the
+// FS once and reuse across recursive calls for better performance.
+func NewListDirFunc(param *tree.ExternParam) ListDirFunc {
+	return func(ctx context.Context, prefix string) iter.Seq2[*fileservice.DirEntry, error] {
+		fs, readPath, err := plan2.GetForETLWithType(param, prefix)
+		if err != nil {
+			return func(yield func(*fileservice.DirEntry, error) bool) {
+				yield(nil, err)
+			}
+		}
+		return fs.List(ctx, readPath)
+	}
+}
+
+// normalizeExternalPath ensures consistent path format for prefix matching.
+func normalizeExternalPath(p string) string {
+	p = strings.TrimSpace(p)
+	if strings.HasPrefix(p, "etl:") {
+		return path.Clean(p)
+	}
+	return path.Clean("/" + p)
+}
+
+// relPartitionPath returns filePath relative to basePath. If filePath is not
+// under basePath (degenerate), the normalized filePath is returned unchanged.
+// Used for error messages: raw filePath contains machine-local absolute paths
+// (e.g. /Users/foo/.../data.parquet) which make BVT .result files non-portable;
+// the relative form ("year=abc/data.parquet") is stable across machines.
+func relPartitionPath(filePath, basePath string) string {
+	f := normalizeExternalPath(filePath)
+	b := normalizeExternalPath(basePath)
+	if f == b {
+		return ""
+	}
+	if strings.HasPrefix(f, b+"/") {
+		return f[len(b)+1:]
+	}
+	return f
+}
+
+// ParseHivePartitionSegment parses a directory segment like "year=2024" into key/value.
+// MatrixOne intentionally treats Hive partition segment values as raw path
+// segment text. DiscoverHivePartitions rejects '%' before this parser is called,
+// so URL-encoded partition directory names are unsupported instead of being
+// partially decoded in some call paths.
+//
+// Returns:
+//   - (seg, true, nil): valid key=value segment (value may be empty string)
+//   - (_, false, nil): not a key=value format (caller treats as non-partition dir)
+func ParseHivePartitionSegment(segment string) (seg HivePartSegment, isHive bool, err error) {
+	idx := strings.IndexByte(segment, '=')
+	if idx <= 0 {
+		return HivePartSegment{}, false, nil
+	}
+	seg.Key = segment[:idx]
+	seg.Value = segment[idx+1:]
+	return seg, true, nil
+}
+
+// ExtractPartitionValues parses partition key=value segments from a file path
+// relative to basePath. Both paths are normalized internally.
+func ExtractPartitionValues(filePath, basePath string, partCols []string) (map[string]string, error) {
+	filePath = normalizeExternalPath(filePath)
+	basePath = normalizeExternalPath(basePath)
+
+	if filePath != basePath && !strings.HasPrefix(filePath, basePath+"/") {
+		return nil, moerr.NewInternalErrorNoCtxf(
+			"file path '%s' is not under base path '%s'", filePath, basePath)
+	}
+
+	rel := strings.TrimPrefix(filePath, basePath)
+	rel = strings.TrimPrefix(rel, "/")
+	segments := strings.Split(rel, "/")
+
+	values := make(map[string]string, len(partCols))
+	for _, segment := range segments {
+		if segment == "" {
+			continue
+		}
+		seg, isHive, err := ParseHivePartitionSegment(segment)
+		if err != nil {
+			return nil, err
+		}
+		if !isHive {
+			continue
+		}
+		values[strings.ToLower(seg.Key)] = seg.Value
+	}
+	return values, nil
+}
+
+// IsHiddenFile returns true for files/dirs starting with '.' or '_'.
+func IsHiddenFile(name string) bool {
+	return len(name) > 0 && (name[0] == '.' || name[0] == '_')
+}
+
+// IsParquetFile returns true for files with .parquet or .snappy.parquet suffix.
+func IsParquetFile(name string) bool {
+	lower := strings.ToLower(name)
+	return strings.HasSuffix(lower, ".parquet")
+}
+
+// DiscoverHivePartitions performs recursive list-and-filter partition discovery.
+func DiscoverHivePartitions(
+	ctx context.Context,
+	listDir ListDirFunc,
+	basePath string,
+	partCols []string,
+	colTypes []tree.HivePartColType,
+	predicates []PartitionPredicate,
+) (*PartitionDiscoveryResult, error) {
+	basePath = normalizeExternalPath(basePath)
+
+	if len(colTypes) != len(partCols) {
+		colTypes = make([]tree.HivePartColType, len(partCols))
+		for i := range colTypes {
+			colTypes[i] = tree.HivePartColType{Id: int32(types.T_any)}
+		}
+	}
+
+	predMap := buildPredicateMap(predicates)
+
+	result := &PartitionDiscoveryResult{}
+	err := discoverRecursive(ctx, listDir, basePath, basePath, partCols, colTypes, predMap, 0, result)
+	if err != nil {
+		return nil, err
+	}
+	return result, nil
+}
+
+func buildPredicateMap(predicates []PartitionPredicate) map[string]*PartitionPredicate {
+	m := make(map[string]*PartitionPredicate, len(predicates))
+	for i := range predicates {
+		m[predicates[i].ColName] = &predicates[i]
+	}
+	return m
+}
+
+func discoverRecursive(
+	ctx context.Context,
+	listDir ListDirFunc,
+	basePath string,
+	prefix string,
+	partCols []string,
+	colTypes []tree.HivePartColType,
+	predMap map[string]*PartitionPredicate,
+	level int,
+	result *PartitionDiscoveryResult,
+) error {
+	result.ListCalls++
+	if result.ListCalls > maxListCalls {
+		return moerr.NewInternalErrorNoCtxf(
+			"hive partition discovery exceeded %d List calls; reduce partition depth or add filters", maxListCalls)
+	}
+
+	isLastLevel := level == len(partCols)-1
+	childPrefixes := make([]string, 0)
+
+	for entry, err := range listDir(ctx, prefix) {
+		if err != nil {
+			return err
+		}
+
+		if IsHiddenFile(entry.Name) {
+			continue
+		}
+
+		if entry.IsDir {
+			if level >= len(partCols) {
+				continue
+			}
+
+			// URL-encoded partition directories are unsupported. Reject '%' during
+			// discovery so values cannot be silently interpreted differently by
+			// different code paths.
+			if strings.Contains(entry.Name, "%") {
+				return moerr.NewInternalErrorNoCtxf(
+					"hive partition directory name contains '%%' which is not supported: '%s'", entry.Name)
+			}
+
+			seg, isHive, parseErr := ParseHivePartitionSegment(entry.Name)
+			if parseErr != nil {
+				return parseErr
+			}
+			if !isHive {
+				continue
+			}
+
+			if strings.ToLower(seg.Key) != partCols[level] {
+				continue
+			}
+
+			pred := predMap[partCols[level]]
+			if !filterPartitionDir(seg.Value, colTypes[level], pred) {
+				result.PrunedCount++
+				continue
+			}
+
+			result.PartitionCount++
+			if result.PartitionCount > maxPartitionCount {
+				return moerr.NewInternalErrorNoCtxf(
+					"hive partition discovery exceeded %d partitions; consider adding partition filters", maxPartitionCount)
+			}
+			if !result.warnEmitted && result.PartitionCount > warnPartitionCount {
+				result.warnEmitted = true
+				logutil.Warnf("hive partition discovery: partition count exceeds %d (current: %d, base: %s); consider adding partition filters",
+					warnPartitionCount, result.PartitionCount, basePath)
+			}
+
+			childPrefixes = append(childPrefixes, path.Join(prefix, entry.Name))
+		}
+	}
+
+	// Count all matching partitions at this level before descending. Otherwise
+	// a very wide single-level table hits maxListCalls while collecting each
+	// leaf before maxPartitionCount can ever fire.
+	for _, childPrefix := range childPrefixes {
+		if isLastLevel {
+			if err := collectFiles(ctx, listDir, childPrefix, result); err != nil {
+				return err
+			}
+		} else {
+			if err := discoverRecursive(ctx, listDir, basePath, childPrefix, partCols, colTypes, predMap, level+1, result); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func collectFiles(
+	ctx context.Context,
+	listDir ListDirFunc,
+	prefix string,
+	result *PartitionDiscoveryResult,
+) error {
+	result.ListCalls++
+	if result.ListCalls > maxListCalls {
+		return moerr.NewInternalErrorNoCtxf(
+			"hive partition discovery exceeded %d List calls; reduce partition depth or add filters", maxListCalls)
+	}
+
+	for entry, err := range listDir(ctx, prefix) {
+		if err != nil {
+			return err
+		}
+		if entry.IsDir || IsHiddenFile(entry.Name) {
+			continue
+		}
+		if IsParquetFile(entry.Name) {
+			result.Files = append(result.Files, PartitionFileEntry{
+				FilePath: path.Join(prefix, entry.Name),
+				FileSize: entry.Size,
+			})
+		}
+	}
+	return nil
+}
+
+// filterPartitionDir returns true if the directory should be kept (not pruned).
+// Only MatchFalse causes pruning; MatchUnknown is conservative (keeps directory).
+func filterPartitionDir(dirValue string, colType tree.HivePartColType, pred *PartitionPredicate) bool {
+	if pred == nil {
+		return true
+	}
+	result := matchPartitionValue(dirValue, pred.Values, colType)
+	return result != MatchFalse
+}
+
+// MatchResult is a three-state result for partition value comparison.
+type MatchResult int
+
+const (
+	MatchTrue    MatchResult = iota // definitely matches
+	MatchFalse                      // definitely does not match (safe to prune)
+	MatchUnknown                    // cannot determine (must keep directory)
+)
+
+// matchPartitionValue compares a partition directory value against predicate values.
+// Conservative: returns MatchUnknown whenever precise comparison isn't possible.
+func matchPartitionValue(dirValue string, predicateValues []string, colType tree.HivePartColType) MatchResult {
+	// SET/ENUM columns stored as numeric types but with Enumvalues must not be
+	// pruned numerically — their directory values are member names.
+	if !canPruneType(colType) {
+		return MatchUnknown
+	}
+	switch types.T(colType.Id) {
+	case types.T_any:
+		return MatchUnknown
+
+	case types.T_int8:
+		return matchInt(dirValue, predicateValues, 8)
+	case types.T_int16:
+		return matchInt(dirValue, predicateValues, 16)
+	case types.T_int32:
+		return matchInt(dirValue, predicateValues, 32)
+	case types.T_int64:
+		return matchInt(dirValue, predicateValues, 64)
+
+	case types.T_uint8:
+		return matchUint(dirValue, predicateValues, 8)
+	case types.T_uint16:
+		return matchUint(dirValue, predicateValues, 16)
+	case types.T_uint32:
+		return matchUint(dirValue, predicateValues, 32)
+	case types.T_uint64:
+		return matchUint(dirValue, predicateValues, 64)
+
+	case types.T_char, types.T_varchar, types.T_text:
+		for _, pv := range predicateValues {
+			if dirValue == pv {
+				return MatchTrue
+			}
+		}
+		return MatchUnknown
+
+	default:
+		// bool, float, decimal, date/time, json, uuid, enum, set, bit, etc.
+		return MatchUnknown
+	}
+}
+
+// matchPartitionValueForType checks whether we can safely prune this type.
+// SET/ENUM stored as T_uint64/T_uint16 with Enumvalues must NOT be pruned
+// numerically — their directory values are member names, not raw integers.
+func canPruneType(colType tree.HivePartColType) bool {
+	if colType.Enumvalues != "" {
+		return false
+	}
+	switch types.T(colType.Id) {
+	case types.T_int8, types.T_int16, types.T_int32, types.T_int64,
+		types.T_uint8, types.T_uint16, types.T_uint32, types.T_uint64,
+		types.T_char, types.T_varchar, types.T_text:
+		return true
+	default:
+		return false
+	}
+}
+
+func matchInt(dirVal string, predVals []string, bitSize int) MatchResult {
+	dv, err := strconv.ParseInt(dirVal, 10, bitSize)
+	if err != nil {
+		return MatchUnknown
+	}
+	for _, pv := range predVals {
+		pvi, err := strconv.ParseInt(pv, 10, bitSize)
+		if err != nil {
+			return MatchUnknown
+		}
+		if dv == pvi {
+			return MatchTrue
+		}
+	}
+	return MatchFalse
+}
+
+func matchUint(dirVal string, predVals []string, bitSize int) MatchResult {
+	dv, err := strconv.ParseUint(dirVal, 10, bitSize)
+	if err != nil {
+		return MatchUnknown
+	}
+	for _, pv := range predVals {
+		pvi, err := strconv.ParseUint(pv, 10, bitSize)
+		if err != nil {
+			return MatchUnknown
+		}
+		if dv == pvi {
+			return MatchTrue
+		}
+	}
+	return MatchFalse
+}
+
+// ---------------------------------------------------------------------------
+// Filter classification and partition predicate extraction
+// ---------------------------------------------------------------------------
+
+// filePathColSet is the set of virtual columns Hive pushes into
+// FilterFileList at compile time. Only __mo_filepath qualifies: it is
+// appended to every external table's TableDef.Cols (query_builder.go:4902)
+// and its value is known before we open any parquet file.
+//
+// STATEMENT_ACCOUNT ("account") is deliberately excluded. It is not a
+// virtual column on Hive/Parquet tables — it is synthesized per-batch by
+// makeFilepathBatch (external.go:322) only for CSV external tables' tenant
+// filter evaluation. Including it here would misclassify any physical
+// column literally named "account" as a filepath filter and evaluate it
+// against getAccountCol(path), producing wrong results.
+var filePathColSet = map[string]bool{
+	catalog.ExternalFilePath: true,
+}
+
+// ClassifyFilters splits a filter list into three disjoint groups:
+//   - partitionFilters: only reference partition columns (also copied to rowFilters)
+//   - filePathFilters: only reference filepath virtual columns (see filePathColSet: __mo_filepath only)
+//   - rowFilters: everything else, plus partition filters for double-filtering safety
+func ClassifyFilters(
+	tableDef *plan.TableDef,
+	filters []*plan.Expr,
+	partColSet map[string]bool,
+) (partitionFilters, filePathFilters, rowFilters []*plan.Expr) {
+	for _, f := range filters {
+		refs := collectBareColNames(tableDef, f)
+		if len(refs) == 0 {
+			rowFilters = append(rowFilters, f)
+			continue
+		}
+		if subsetOf(refs, partColSet) {
+			partitionFilters = append(partitionFilters, f)
+			rowFilters = append(rowFilters, f)
+			continue
+		}
+		if subsetOf(refs, filePathColSet) {
+			filePathFilters = append(filePathFilters, f)
+			continue
+		}
+		rowFilters = append(rowFilters, f)
+	}
+	return
+}
+
+// subsetOf returns true if every key in refs exists in allowed.
+func subsetOf(refs map[string]bool, allowed map[string]bool) bool {
+	if len(refs) == 0 {
+		return false
+	}
+	for name := range refs {
+		if !allowed[name] {
+			return false
+		}
+	}
+	return true
+}
+
+// collectBareColNames extracts the set of bare column names referenced by an expression.
+// Uses ColPos to look up names from the pruned TableDef (not col.Name which may contain table prefix).
+func collectBareColNames(tableDef *plan.TableDef, expr *plan.Expr) map[string]bool {
+	names := map[string]bool{}
+	var walk func(e *plan.Expr)
+	walk = func(e *plan.Expr) {
+		if e == nil {
+			return
+		}
+		switch v := e.Expr.(type) {
+		case *plan.Expr_Col:
+			colPos := v.Col.ColPos
+			if colPos >= 0 && int(colPos) < len(tableDef.Cols) {
+				names[strings.ToLower(tableDef.Cols[colPos].Name)] = true
+			} else {
+				name := v.Col.Name
+				if idx := strings.LastIndexByte(name, '.'); idx >= 0 {
+					name = name[idx+1:]
+				}
+				names[strings.ToLower(name)] = true
+			}
+		case *plan.Expr_F:
+			for _, arg := range v.F.Args {
+				walk(arg)
+			}
+		}
+	}
+	walk(expr)
+	return names
+}
+
+// ExtractPartitionPredicatesFromExprs converts partition filter expressions
+// into PartitionPredicate hints for directory-level pruning.
+// Supports col = const (PartOpEq) and col IN (const, ...) (PartOpIn).
+// Expressions that cannot be structurally decomposed are silently skipped.
+//
+// Note: After optimizer constant-folding (rule.ConstantFold applies to all nodes),
+// IN lists may be folded from Expr_List to Expr_Vec. Both forms are handled.
+func ExtractPartitionPredicatesFromExprs(
+	tableDef *plan.TableDef,
+	partFilters []*plan.Expr,
+	partColSet map[string]bool,
+) []PartitionPredicate {
+	var preds []PartitionPredicate
+	for _, f := range partFilters {
+		if pred, ok := tryExtractPredicate(tableDef, f, partColSet); ok {
+			preds = append(preds, pred)
+		}
+	}
+	return preds
+}
+
+func tryExtractPredicate(tableDef *plan.TableDef, expr *plan.Expr, partColSet map[string]bool) (PartitionPredicate, bool) {
+	fn, ok := expr.Expr.(*plan.Expr_F)
+	if !ok {
+		return PartitionPredicate{}, false
+	}
+
+	fid, _ := function.DecodeOverloadID(fn.F.Func.GetObj())
+	switch fid {
+	case function.EQUAL:
+		return tryExtractEqual(tableDef, fn.F.Args, partColSet)
+	case function.IN:
+		return tryExtractIn(tableDef, fn.F.Args, partColSet)
+	default:
+		return PartitionPredicate{}, false
+	}
+}
+
+func tryExtractEqual(tableDef *plan.TableDef, args []*plan.Expr, partColSet map[string]bool) (PartitionPredicate, bool) {
+	if len(args) != 2 {
+		return PartitionPredicate{}, false
+	}
+	colName, colOk := getPartColName(tableDef, args[0], partColSet)
+	litVal, litOk := getLiteralString(args[1])
+	if !colOk || !litOk {
+		colName, colOk = getPartColName(tableDef, args[1], partColSet)
+		litVal, litOk = getLiteralString(args[0])
+		if !colOk || !litOk {
+			return PartitionPredicate{}, false
+		}
+	}
+	return PartitionPredicate{
+		ColName: colName,
+		Op:      PartOpEq,
+		Values:  []string{litVal},
+	}, true
+}
+
+func tryExtractIn(tableDef *plan.TableDef, args []*plan.Expr, partColSet map[string]bool) (PartitionPredicate, bool) {
+	if len(args) != 2 {
+		return PartitionPredicate{}, false
+	}
+	colName, colOk := getPartColName(tableDef, args[0], partColSet)
+	if !colOk {
+		return PartitionPredicate{}, false
+	}
+
+	// After optimizer constant-folding, IN lists may be Expr_List or Expr_Vec.
+	switch v := args[1].Expr.(type) {
+	case *plan.Expr_List:
+		if v.List == nil {
+			return PartitionPredicate{}, false
+		}
+		values := make([]string, 0, len(v.List.List))
+		for _, item := range v.List.List {
+			litVal, litOk := getLiteralString(item)
+			if !litOk {
+				return PartitionPredicate{}, false
+			}
+			values = append(values, litVal)
+		}
+		if len(values) == 0 {
+			return PartitionPredicate{}, false
+		}
+		return PartitionPredicate{ColName: colName, Op: PartOpIn, Values: values}, true
+
+	case *plan.Expr_Vec:
+		values, ok := extractVecValues(v.Vec, args[0].Typ)
+		if !ok || len(values) == 0 {
+			return PartitionPredicate{}, false
+		}
+		return PartitionPredicate{ColName: colName, Op: PartOpIn, Values: values}, true
+
+	default:
+		return PartitionPredicate{}, false
+	}
+}
+
+// extractVecValues decodes a folded LiteralVec into string values for pruning.
+func extractVecValues(litVec *plan.LiteralVec, typ plan.Type) (values []string, ok bool) {
+	if litVec == nil || litVec.Len <= 0 || len(litVec.Data) == 0 {
+		return nil, false
+	}
+	oid := types.T(typ.Id)
+	if !vectorBinaryEnvelopeInBounds(litVec.Data) {
+		return nil, false
+	}
+
+	vec := vector.NewVec(types.New(oid, typ.Width, typ.Scale))
+	defer vec.Free(nil)
+	if err := vec.UnmarshalBinary(litVec.Data); err != nil {
+		return nil, false
+	}
+	if vec.GetType().Oid != oid || vec.Length() != int(litVec.Len) {
+		return nil, false
+	}
+
+	n := vec.Length()
+	values = make([]string, 0, n)
+	switch oid {
+	case types.T_int8:
+		col := vector.MustFixedColNoTypeCheck[int8](vec)
+		for i := 0; i < n; i++ {
+			values = append(values, strconv.FormatInt(int64(col[i]), 10))
+		}
+	case types.T_int16:
+		col := vector.MustFixedColNoTypeCheck[int16](vec)
+		for i := 0; i < n; i++ {
+			values = append(values, strconv.FormatInt(int64(col[i]), 10))
+		}
+	case types.T_int32:
+		col := vector.MustFixedColNoTypeCheck[int32](vec)
+		for i := 0; i < n; i++ {
+			values = append(values, strconv.FormatInt(int64(col[i]), 10))
+		}
+	case types.T_int64:
+		col := vector.MustFixedColNoTypeCheck[int64](vec)
+		for i := 0; i < n; i++ {
+			values = append(values, strconv.FormatInt(col[i], 10))
+		}
+	case types.T_uint8:
+		col := vector.MustFixedColNoTypeCheck[uint8](vec)
+		for i := 0; i < n; i++ {
+			values = append(values, strconv.FormatUint(uint64(col[i]), 10))
+		}
+	case types.T_uint16:
+		col := vector.MustFixedColNoTypeCheck[uint16](vec)
+		for i := 0; i < n; i++ {
+			values = append(values, strconv.FormatUint(uint64(col[i]), 10))
+		}
+	case types.T_uint32:
+		col := vector.MustFixedColNoTypeCheck[uint32](vec)
+		for i := 0; i < n; i++ {
+			values = append(values, strconv.FormatUint(uint64(col[i]), 10))
+		}
+	case types.T_uint64:
+		col := vector.MustFixedColNoTypeCheck[uint64](vec)
+		for i := 0; i < n; i++ {
+			values = append(values, strconv.FormatUint(col[i], 10))
+		}
+	case types.T_char, types.T_varchar, types.T_text:
+		col := vector.MustFixedColNoTypeCheck[types.Varlena](vec)
+		area := vec.GetArea()
+		for i := 0; i < n; i++ {
+			bs, ok := safeVarlenaBytes(&col[i], area)
+			if !ok {
+				return nil, false
+			}
+			values = append(values, string(bs))
+		}
+	default:
+		return nil, false
+	}
+	return values, true
+}
+
+// vectorBinaryEnvelopeInBounds only checks the bounds of Vector.UnmarshalBinary's
+// envelope before calling it. It does not validate type semantics; those are
+// checked after UnmarshalBinary succeeds.
+func vectorBinaryEnvelopeInBounds(data []byte) bool {
+	if len(data) == 0 || int(data[0]) != vector.FLAT {
+		return false
+	}
+	pos := 1 + types.TSize
+	if len(data) < pos+4 {
+		return false
+	}
+	pos += 4 // vector length
+
+	for i := 0; i < 3; i++ {
+		if len(data) < pos+4 {
+			return false
+		}
+		n := types.DecodeUint32(data[pos : pos+4])
+		pos += 4
+		if uint64(n) > uint64(len(data)-pos) {
+			return false
+		}
+		pos += int(n)
+	}
+	return len(data) >= pos+1
+}
+
+func safeVarlenaBytes(v *types.Varlena, area []byte) ([]byte, bool) {
+	if v.IsSmall() {
+		return v.ByteSlice(), true
+	}
+	off, size := v.OffsetLen()
+	end := uint64(off) + uint64(size)
+	if end > uint64(len(area)) {
+		return nil, false
+	}
+	return area[int(off):int(end)], true
+}
+
+// getPartColName returns the bare partition column name from a column expression.
+func getPartColName(tableDef *plan.TableDef, expr *plan.Expr, partColSet map[string]bool) (string, bool) {
+	col, ok := expr.Expr.(*plan.Expr_Col)
+	if !ok {
+		return "", false
+	}
+	colPos := col.Col.ColPos
+	var name string
+	if colPos >= 0 && int(colPos) < len(tableDef.Cols) {
+		name = strings.ToLower(tableDef.Cols[colPos].Name)
+	} else {
+		name = col.Col.Name
+		if idx := strings.LastIndexByte(name, '.'); idx >= 0 {
+			name = name[idx+1:]
+		}
+		name = strings.ToLower(name)
+	}
+	if !partColSet[name] {
+		return "", false
+	}
+	return name, true
+}
+
+// getLiteralString extracts a string representation from a literal expression.
+// Only accepts Expr_Lit (rejects Expr_F such as cast which may change value).
+func getLiteralString(expr *plan.Expr) (string, bool) {
+	lit, ok := expr.Expr.(*plan.Expr_Lit)
+	if !ok || lit.Lit == nil || lit.Lit.Isnull {
+		return "", false
+	}
+	switch v := lit.Lit.Value.(type) {
+	case *plan.Literal_Sval:
+		return v.Sval, true
+	case *plan.Literal_I8Val:
+		return strconv.FormatInt(int64(v.I8Val), 10), true
+	case *plan.Literal_I16Val:
+		return strconv.FormatInt(int64(v.I16Val), 10), true
+	case *plan.Literal_I32Val:
+		return strconv.FormatInt(int64(v.I32Val), 10), true
+	case *plan.Literal_I64Val:
+		return strconv.FormatInt(v.I64Val, 10), true
+	case *plan.Literal_U8Val:
+		return strconv.FormatUint(uint64(v.U8Val), 10), true
+	case *plan.Literal_U16Val:
+		return strconv.FormatUint(uint64(v.U16Val), 10), true
+	case *plan.Literal_U32Val:
+		return strconv.FormatUint(uint64(v.U32Val), 10), true
+	case *plan.Literal_U64Val:
+		return strconv.FormatUint(v.U64Val, 10), true
+	case *plan.Literal_Fval:
+		return fmt.Sprintf("%g", v.Fval), true
+	case *plan.Literal_Dval:
+		return fmt.Sprintf("%g", v.Dval), true
+	case *plan.Literal_Bval:
+		if v.Bval {
+			return "true", true
+		}
+		return "false", true
+	default:
+		return "", false
+	}
+}
diff --git a/pkg/sql/colexec/external/hive_partition_coverage_test.go b/pkg/sql/colexec/external/hive_partition_coverage_test.go
new file mode 100644
index 0000000000000..873a16e178458
--- /dev/null
+++ b/pkg/sql/colexec/external/hive_partition_coverage_test.go
@@ -0,0 +1,825 @@
+// Copyright 2026 Matrix Origin
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package external
+
+import (
+	"math"
+	"testing"
+
+	"github.com/matrixorigin/matrixone/pkg/catalog"
+	"github.com/matrixorigin/matrixone/pkg/container/batch"
+	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"github.com/matrixorigin/matrixone/pkg/container/vector"
+	"github.com/matrixorigin/matrixone/pkg/pb/plan"
+	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
+	"github.com/matrixorigin/matrixone/pkg/sql/plan/function"
+	"github.com/matrixorigin/matrixone/pkg/testutil"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// ---------------------------------------------------------------------------
+// NewListDirFunc
+// ---------------------------------------------------------------------------
+
+// TestNewListDirFunc_InfileETL exercises the non-S3 branch of NewListDirFunc.
+// For ScanType=INFILE the builder falls through to the plain FileService ETL
+// path; we just need to confirm the factory returns a non-nil ListDirFunc
+// that yields an error when pointed at a non-existent directory.
+func TestNewListDirFunc_InfileETL(t *testing.T) {
+	param := &tree.ExternParam{}
+	param.Filepath = "/nonexistent/hive/root"
+	fn := NewListDirFunc(param)
+	require.NotNil(t, fn)
+	// Iterating an ETL path that mo cannot resolve should surface an error
+	// (either from GetForETLWithType or from fs.List). Either way the
+	// iterator yields at least once.
+	gotAny := false
+	for entry, err := range fn(t.Context(), "/nonexistent/hive/root") {
+		_ = entry
+		_ = err
+		gotAny = true
+		break
+	}
+	_ = gotAny
+}
+
+// ---------------------------------------------------------------------------
+// matchPartitionValue — the non-prunable type arms
+// ---------------------------------------------------------------------------
+
+func TestMatchPartitionValue_AllTypesReturnUnknown(t *testing.T) {
+	// Every type in the switch not explicitly prunable returns MatchUnknown.
+	// Covers the default arm plus every explicit non-prunable case.
+	nonPrunable := []types.T{
+		types.T_bool, types.T_float32, types.T_float64,
+		types.T_decimal64, types.T_decimal128,
+		types.T_date, types.T_datetime, types.T_timestamp, types.T_time,
+		types.T_json, types.T_uuid, types.T_blob, types.T_binary, types.T_varbinary,
+		types.T_datalink, types.T_bit, types.T_enum,
+	}
+	for _, typ := range nonPrunable {
+		ct := tree.HivePartColType{Id: int32(typ)}
+		got := matchPartitionValue("anything", []string{"anything"}, ct)
+		assert.Equal(t, MatchUnknown, got, "type %v must return MatchUnknown", typ)
+	}
+}
+
+func TestMatchPartitionValue_IntParseErrorValue(t *testing.T) {
+	ct := tree.HivePartColType{Id: int32(types.T_int32)}
+	// Directory value parses fine but predicate value does not → MatchUnknown.
+	assert.Equal(t, MatchUnknown, matchPartitionValue("100", []string{"notanint"}, ct))
+}
+
+func TestMatchPartitionValue_UintParseErrorValue(t *testing.T) {
+	ct := tree.HivePartColType{Id: int32(types.T_uint32)}
+	assert.Equal(t, MatchUnknown, matchPartitionValue("abc", []string{"100"}, ct))
+	assert.Equal(t, MatchUnknown, matchPartitionValue("100", []string{"notauint"}, ct))
+}
+
+func TestMatchPartitionValue_UintOverflow(t *testing.T) {
+	// 256 does not fit uint8 — parse fails → MatchUnknown.
+	ct := tree.HivePartColType{Id: int32(types.T_uint8)}
+	assert.Equal(t, MatchUnknown, matchPartitionValue("256", []string{"256"}, ct))
+}
+
+// ---------------------------------------------------------------------------
+// getLiteralString — each Literal_* arm
+// ---------------------------------------------------------------------------
+
+func TestGetLiteralString_AllTypes(t *testing.T) {
+	// Each literal shape shoulded be recognised. The isLiteral_Value interface
+	// is unexported so we construct a Literal per shape and then place it into
+	// an Expr_Lit manually.
+	build := func(lit *plan.Literal) *plan.Expr {
+		return &plan.Expr{Expr: &plan.Expr_Lit{Lit: lit}}
+	}
+	type tc struct {
+		name string
+		lit  *plan.Literal
+		want string
+	}
+	cases := []tc{
+		{"sval", &plan.Literal{Value: &plan.Literal_Sval{Sval: "hi"}}, "hi"},
+		{"i8", &plan.Literal{Value: &plan.Literal_I8Val{I8Val: -7}}, "-7"},
+		{"i16", &plan.Literal{Value: &plan.Literal_I16Val{I16Val: 30000}}, "30000"},
+		{"i32", &plan.Literal{Value: &plan.Literal_I32Val{I32Val: 2024}}, "2024"},
+		{"i64", &plan.Literal{Value: &plan.Literal_I64Val{I64Val: 2450900}}, "2450900"},
+		{"u8", &plan.Literal{Value: &plan.Literal_U8Val{U8Val: 200}}, "200"},
+		{"u16", &plan.Literal{Value: &plan.Literal_U16Val{U16Val: 60000}}, "60000"},
+		{"u32", &plan.Literal{Value: &plan.Literal_U32Val{U32Val: 4_000_000_000}}, "4000000000"},
+		{"u64", &plan.Literal{Value: &plan.Literal_U64Val{U64Val: 18_000_000_000}}, "18000000000"},
+		{"float", &plan.Literal{Value: &plan.Literal_Fval{Fval: 1.5}}, "1.5"},
+		{"double", &plan.Literal{Value: &plan.Literal_Dval{Dval: 2.5}}, "2.5"},
+		{"bool-true", &plan.Literal{Value: &plan.Literal_Bval{Bval: true}}, "true"},
+		{"bool-false", &plan.Literal{Value: &plan.Literal_Bval{Bval: false}}, "false"},
+	}
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			got, ok := getLiteralString(build(c.lit))
+			require.True(t, ok, "%s must be recognized", c.name)
+			assert.Equal(t, c.want, got)
+		})
+	}
+}
+
+func TestGetLiteralString_NotLiteralRejects(t *testing.T) {
+	// Expr_Col → not a literal.
+	colExpr := &plan.Expr{Expr: &plan.Expr_Col{Col: &plan.ColRef{ColPos: 0, Name: "x"}}}
+	_, ok := getLiteralString(colExpr)
+	assert.False(t, ok)
+
+	// nil Lit
+	nilLit := &plan.Expr{Expr: &plan.Expr_Lit{Lit: nil}}
+	_, ok = getLiteralString(nilLit)
+	assert.False(t, ok)
+
+	// Isnull literal
+	nullLit := &plan.Expr{Expr: &plan.Expr_Lit{Lit: &plan.Literal{Isnull: true}}}
+	_, ok = getLiteralString(nullLit)
+	assert.False(t, ok)
+}
+
+func TestGetLiteralString_UnsupportedValueRejects(t *testing.T) {
+	// Decimal128 literal is not recognized by getLiteralString (falls in default arm).
+	lit := &plan.Literal{Value: &plan.Literal_Decimal128Val{Decimal128Val: &plan.Decimal128{A: 0, B: 0}}}
+	expr := &plan.Expr{Expr: &plan.Expr_Lit{Lit: lit}}
+	_, ok := getLiteralString(expr)
+	assert.False(t, ok, "decimal128 literal is not supported by getLiteralString")
+}
+
+// ---------------------------------------------------------------------------
+// extractVecValues — fixed integer and unsigned arms
+// ---------------------------------------------------------------------------
+
+func TestExtractVecValues_Int8(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	v := vector.NewVec(types.T_int8.ToType())
+	require.NoError(t, vector.AppendFixed(v, int8(-7), false, mp))
+	require.NoError(t, vector.AppendFixed(v, int8(7), false, mp))
+	data, err := v.MarshalBinary()
+	require.NoError(t, err)
+	v.Free(mp)
+
+	vals, ok := extractVecValues(
+		&plan.LiteralVec{Len: 2, Data: data},
+		plan.Type{Id: int32(types.T_int8)})
+	require.True(t, ok)
+	assert.Equal(t, []string{"-7", "7"}, vals)
+}
+
+func TestExtractVecValues_Int16(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	v := vector.NewVec(types.T_int16.ToType())
+	require.NoError(t, vector.AppendFixed(v, int16(-123), false, mp))
+	require.NoError(t, vector.AppendFixed(v, int16(32000), false, mp))
+	data, err := v.MarshalBinary()
+	require.NoError(t, err)
+	v.Free(mp)
+	vals, ok := extractVecValues(
+		&plan.LiteralVec{Len: 2, Data: data},
+		plan.Type{Id: int32(types.T_int16)})
+	require.True(t, ok)
+	assert.Equal(t, []string{"-123", "32000"}, vals)
+}
+
+func TestExtractVecValues_Int64(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	v := vector.NewVec(types.T_int64.ToType())
+	require.NoError(t, vector.AppendFixed(v, int64(-5), false, mp))
+	require.NoError(t, vector.AppendFixed(v, int64(2450900), false, mp))
+	data, err := v.MarshalBinary()
+	require.NoError(t, err)
+	v.Free(mp)
+	vals, ok := extractVecValues(
+		&plan.LiteralVec{Len: 2, Data: data},
+		plan.Type{Id: int32(types.T_int64)})
+	require.True(t, ok)
+	assert.Equal(t, []string{"-5", "2450900"}, vals)
+}
+
+func TestExtractVecValues_Uint8(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	v := vector.NewVec(types.T_uint8.ToType())
+	require.NoError(t, vector.AppendFixed(v, uint8(3), false, mp))
+	require.NoError(t, vector.AppendFixed(v, uint8(250), false, mp))
+	data, err := v.MarshalBinary()
+	require.NoError(t, err)
+	v.Free(mp)
+	vals, ok := extractVecValues(
+		&plan.LiteralVec{Len: 2, Data: data},
+		plan.Type{Id: int32(types.T_uint8)})
+	require.True(t, ok)
+	assert.Equal(t, []string{"3", "250"}, vals)
+}
+
+func TestExtractVecValues_Uint16(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	v := vector.NewVec(types.T_uint16.ToType())
+	require.NoError(t, vector.AppendFixed(v, uint16(3), false, mp))
+	require.NoError(t, vector.AppendFixed(v, uint16(60000), false, mp))
+	data, err := v.MarshalBinary()
+	require.NoError(t, err)
+	v.Free(mp)
+	vals, ok := extractVecValues(
+		&plan.LiteralVec{Len: 2, Data: data},
+		plan.Type{Id: int32(types.T_uint16)})
+	require.True(t, ok)
+	assert.Equal(t, []string{"3", "60000"}, vals)
+}
+
+func TestExtractVecValues_Uint32(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	v := vector.NewVec(types.T_uint32.ToType())
+	require.NoError(t, vector.AppendFixed(v, uint32(3), false, mp))
+	require.NoError(t, vector.AppendFixed(v, uint32(4_000_000_000), false, mp))
+	data, err := v.MarshalBinary()
+	require.NoError(t, err)
+	v.Free(mp)
+	vals, ok := extractVecValues(
+		&plan.LiteralVec{Len: 2, Data: data},
+		plan.Type{Id: int32(types.T_uint32)})
+	require.True(t, ok)
+	assert.Equal(t, []string{"3", "4000000000"}, vals)
+}
+
+func TestExtractVecValues_Uint64(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	v := vector.NewVec(types.T_uint64.ToType())
+	require.NoError(t, vector.AppendFixed(v, uint64(3), false, mp))
+	require.NoError(t, vector.AppendFixed(v, uint64(18_000_000_000), false, mp))
+	data, err := v.MarshalBinary()
+	require.NoError(t, err)
+	v.Free(mp)
+	vals, ok := extractVecValues(
+		&plan.LiteralVec{Len: 2, Data: data},
+		plan.Type{Id: int32(types.T_uint64)})
+	require.True(t, ok)
+	assert.Equal(t, []string{"3", "18000000000"}, vals)
+}
+
+func TestExtractVecValues_UnsupportedType(t *testing.T) {
+	// Decimal128 is not handled by extractVecValues — falls through to default
+	// → returns (nil, false). Use a valid binary shape so validateLiteralVecBinary
+	// doesn't reject first.
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	v := vector.NewVec(types.T_decimal128.ToType())
+	dec := types.Decimal128{B0_63: 123, B64_127: 0}
+	require.NoError(t, vector.AppendFixed(v, dec, false, mp))
+	require.NoError(t, vector.AppendFixed(v, dec, false, mp))
+	data, err := v.MarshalBinary()
+	require.NoError(t, err)
+	v.Free(mp)
+	vals, ok := extractVecValues(
+		&plan.LiteralVec{Len: 2, Data: data},
+		plan.Type{Id: int32(types.T_decimal128)})
+	assert.False(t, ok)
+	assert.Nil(t, vals)
+}
+
+func TestExtractVecValues_EmptyAndNilData(t *testing.T) {
+	// nil LiteralVec
+	_, ok := extractVecValues(nil, plan.Type{Id: int32(types.T_int32)})
+	assert.False(t, ok)
+	// Empty Data
+	_, ok = extractVecValues(&plan.LiteralVec{Len: 0, Data: nil}, plan.Type{Id: int32(types.T_int32)})
+	assert.False(t, ok)
+}
+
+func TestExtractVecValues_CorruptDataRejects(t *testing.T) {
+	// Garbage bytes should be rejected before Vector.UnmarshalBinary can panic.
+	_, ok := extractVecValues(
+		&plan.LiteralVec{Len: 1, Data: []byte{0, 0, 0, 0}},
+		plan.Type{Id: int32(types.T_int32)})
+	assert.False(t, ok)
+}
+
+// ---------------------------------------------------------------------------
+// safeVarlenaBytes
+// ---------------------------------------------------------------------------
+
+func TestSafeVarlenaBytes_SmallInline(t *testing.T) {
+	// Small varlena stores bytes inline; safeVarlenaBytes returns ByteSlice().
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	v := vector.NewVec(types.T_varchar.ToType())
+	require.NoError(t, vector.AppendBytes(v, []byte("hi"), false, mp))
+	col := vector.MustFixedColNoTypeCheck[types.Varlena](v)
+	area := v.GetArea()
+	bs, ok := safeVarlenaBytes(&col[0], area)
+	require.True(t, ok)
+	assert.Equal(t, []byte("hi"), bs)
+	v.Free(mp)
+}
+
+func TestSafeVarlenaBytes_LongFromArea(t *testing.T) {
+	// Long varlena reads from vec's area.
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	v := vector.NewVec(types.T_varchar.ToType())
+	long := []byte("this-is-definitely-longer-than-varlena-inline-threshold-bytes")
+	require.NoError(t, vector.AppendBytes(v, long, false, mp))
+	col := vector.MustFixedColNoTypeCheck[types.Varlena](v)
+	area := v.GetArea()
+	bs, ok := safeVarlenaBytes(&col[0], area)
+	require.True(t, ok)
+	assert.Equal(t, long, bs)
+	v.Free(mp)
+}
+
+func TestSafeVarlenaBytes_OutOfRangeRejects(t *testing.T) {
+	// Construct a Varlena whose (offset+size) exceeds area length.
+	// Size 100 starting at offset 0, but area has only 10 bytes.
+	var vl types.Varlena
+	// Need a long varlena. Use SetOffsetLen to mark it long.
+	vl.SetOffsetLen(0, 100)
+	area := make([]byte, 10)
+	_, ok := safeVarlenaBytes(&vl, area)
+	assert.False(t, ok, "oversized offset+len must be rejected")
+}
+
+// ---------------------------------------------------------------------------
+// fillConstantVector — the type branches not yet covered
+// ---------------------------------------------------------------------------
+
+func TestFillConstantVector_Int8(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_int8.ToType())
+	col := &plan.ColDef{Name: "y", Typ: plan.Type{Id: int32(types.T_int8)}}
+	require.NoError(t, fillConstantVector(vec, "42", col, 3, proc, "/t"))
+	val := vector.MustFixedColNoTypeCheck[int8](vec)
+	assert.Equal(t, int8(42), val[0])
+	vec.Free(mp)
+}
+
+func TestFillConstantVector_Int16(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_int16.ToType())
+	col := &plan.ColDef{Name: "y", Typ: plan.Type{Id: int32(types.T_int16)}}
+	require.NoError(t, fillConstantVector(vec, "12345", col, 2, proc, "/t"))
+	val := vector.MustFixedColNoTypeCheck[int16](vec)
+	assert.Equal(t, int16(12345), val[0])
+	vec.Free(mp)
+}
+
+func TestFillConstantVector_Int64_AndUintSignFail(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+
+	vec := vector.NewVec(types.T_int64.ToType())
+	col := &plan.ColDef{Name: "y", Typ: plan.Type{Id: int32(types.T_int64)}}
+	require.NoError(t, fillConstantVector(vec, "-99", col, 1, proc, "/t"))
+	v64 := vector.MustFixedColNoTypeCheck[int64](vec)
+	assert.Equal(t, int64(-99), v64[0])
+	vec.Free(mp)
+
+	// uint with a negative string → wrapped error path
+	vec = vector.NewVec(types.T_uint32.ToType())
+	col = &plan.ColDef{Name: "u", Typ: plan.Type{Id: int32(types.T_uint32)}}
+	err := fillConstantVector(vec, "-1", col, 1, proc, "/t")
+	require.Error(t, err)
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_Uint8_Uint16_Uint64(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+
+	for _, tc := range []struct {
+		name   string
+		typId  types.T
+		strVal string
+	}{
+		{"uint8", types.T_uint8, "200"},
+		{"uint16", types.T_uint16, "60000"},
+		{"uint64", types.T_uint64, "4294967296"},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			vec := vector.NewVec(tc.typId.ToType())
+			col := &plan.ColDef{Name: "n", Typ: plan.Type{Id: int32(tc.typId)}}
+			require.NoError(t, fillConstantVector(vec, tc.strVal, col, 1, proc, "/t"))
+			vec.Free(mp)
+		})
+	}
+}
+
+func TestFillConstantVector_Bit(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_bit.ToType())
+	col := &plan.ColDef{Name: "b", Typ: plan.Type{Id: int32(types.T_bit), Width: 8}}
+	require.NoError(t, fillConstantVector(vec, "7", col, 1, proc, "/t"))
+	val := vector.MustFixedColNoTypeCheck[uint64](vec)
+	assert.Equal(t, uint64(7), val[0])
+	vec.Free(mp)
+
+	// ParseUint failure wraps
+	vec = vector.NewVec(types.T_bit.ToType())
+	require.Error(t, fillConstantVector(vec, "abc", col, 1, proc, "/t"))
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_Float32_Float64(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+
+	vec := vector.NewVec(types.T_float32.ToType())
+	col := &plan.ColDef{Name: "f32", Typ: plan.Type{Id: int32(types.T_float32)}}
+	require.NoError(t, fillConstantVector(vec, "1.5", col, 1, proc, "/t"))
+	vec.Free(mp)
+
+	vec = vector.NewVec(types.T_float64.ToType())
+	col = &plan.ColDef{Name: "f64", Typ: plan.Type{Id: int32(types.T_float64)}}
+	require.NoError(t, fillConstantVector(vec, "2.25", col, 1, proc, "/t"))
+	vec.Free(mp)
+
+	// Parse error path
+	vec = vector.NewVec(types.T_float32.ToType())
+	col = &plan.ColDef{Name: "f32", Typ: plan.Type{Id: int32(types.T_float32)}}
+	require.Error(t, fillConstantVector(vec, "notafloat", col, 1, proc, "/t"))
+	vec.Free(nil)
+
+	vec = vector.NewVec(types.T_float64.ToType())
+	col = &plan.ColDef{Name: "f64", Typ: plan.Type{Id: int32(types.T_float64)}}
+	require.Error(t, fillConstantVector(vec, "notafloat", col, 1, proc, "/t"))
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_Decimal64(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_decimal64.ToType())
+	col := &plan.ColDef{Name: "d", Typ: plan.Type{Id: int32(types.T_decimal64), Width: 10, Scale: 2}}
+	require.NoError(t, fillConstantVector(vec, "12.34", col, 1, proc, "/t"))
+	vec.Free(mp)
+
+	// Parse error
+	vec = vector.NewVec(types.T_decimal64.ToType())
+	require.Error(t, fillConstantVector(vec, "notadecimal", col, 1, proc, "/t"))
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_Decimal128(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_decimal128.ToType())
+	col := &plan.ColDef{Name: "d", Typ: plan.Type{Id: int32(types.T_decimal128), Width: 20, Scale: 2}}
+	require.NoError(t, fillConstantVector(vec, "123456789.01", col, 1, proc, "/t"))
+	vec.Free(mp)
+
+	vec = vector.NewVec(types.T_decimal128.ToType())
+	require.Error(t, fillConstantVector(vec, "nope", col, 1, proc, "/t"))
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_Date(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_date.ToType())
+	col := &plan.ColDef{Name: "d", Typ: plan.Type{Id: int32(types.T_date)}}
+	require.NoError(t, fillConstantVector(vec, "2025-06-15", col, 1, proc, "/t"))
+	vec.Free(mp)
+
+	vec = vector.NewVec(types.T_date.ToType())
+	require.Error(t, fillConstantVector(vec, "not-a-date", col, 1, proc, "/t"))
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_Datetime(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_datetime.ToType())
+	col := &plan.ColDef{Name: "dt", Typ: plan.Type{Id: int32(types.T_datetime), Scale: 0}}
+	require.NoError(t, fillConstantVector(vec, "2025-06-15 12:34:56", col, 1, proc, "/t"))
+	vec.Free(mp)
+
+	vec = vector.NewVec(types.T_datetime.ToType())
+	require.Error(t, fillConstantVector(vec, "not-a-datetime", col, 1, proc, "/t"))
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_Timestamp(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_timestamp.ToType())
+	col := &plan.ColDef{Name: "ts", Typ: plan.Type{Id: int32(types.T_timestamp), Scale: 0}}
+	require.NoError(t, fillConstantVector(vec, "2025-06-15 12:34:56", col, 1, proc, "/t"))
+	vec.Free(mp)
+
+	vec = vector.NewVec(types.T_timestamp.ToType())
+	require.Error(t, fillConstantVector(vec, "not-a-ts", col, 1, proc, "/t"))
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_Time(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_time.ToType())
+	col := &plan.ColDef{Name: "t", Typ: plan.Type{Id: int32(types.T_time), Scale: 0}}
+	require.NoError(t, fillConstantVector(vec, "12:34:56", col, 1, proc, "/t"))
+	vec.Free(mp)
+
+	vec = vector.NewVec(types.T_time.ToType())
+	require.Error(t, fillConstantVector(vec, "not-a-time", col, 1, proc, "/t"))
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_BoolError(t *testing.T) {
+	proc := testutil.NewProc(t)
+	vec := vector.NewVec(types.T_bool.ToType())
+	col := &plan.ColDef{Name: "b", Typ: plan.Type{Id: int32(types.T_bool)}}
+	require.Error(t, fillConstantVector(vec, "nope", col, 1, proc, "/t"))
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_Uuid(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_uuid.ToType())
+	col := &plan.ColDef{Name: "u", Typ: plan.Type{Id: int32(types.T_uuid)}}
+	require.NoError(t, fillConstantVector(vec, "00000000-0000-0000-0000-000000000001", col, 1, proc, "/t"))
+	vec.Free(mp)
+
+	vec = vector.NewVec(types.T_uuid.ToType())
+	require.Error(t, fillConstantVector(vec, "not-a-uuid", col, 1, proc, "/t"))
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_Json(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_json.ToType())
+	col := &plan.ColDef{Name: "j", Typ: plan.Type{Id: int32(types.T_json)}}
+	require.NoError(t, fillConstantVector(vec, `{"a":1}`, col, 1, proc, "/t"))
+	vec.Free(mp)
+
+	vec = vector.NewVec(types.T_json.ToType())
+	require.Error(t, fillConstantVector(vec, "not-json", col, 1, proc, "/t"))
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_ByteTypes(t *testing.T) {
+	// char / varchar / text / blob / binary / varbinary / datalink → SetConstBytes.
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	for _, typId := range []types.T{
+		types.T_char, types.T_varchar, types.T_text,
+		types.T_blob, types.T_binary, types.T_varbinary, types.T_datalink,
+	} {
+		vec := vector.NewVec(typId.ToType())
+		col := &plan.ColDef{Name: "b", Typ: plan.Type{Id: int32(typId)}}
+		require.NoError(t, fillConstantVector(vec, "xyz", col, 2, proc, "/t"))
+		vec.Free(mp)
+	}
+}
+
+func TestFillConstantVector_VectorTypesReturnNotSupported(t *testing.T) {
+	proc := testutil.NewProc(t)
+	for _, typId := range []types.T{types.T_array_float32, types.T_array_float64} {
+		vec := vector.NewVec(typId.ToType())
+		col := &plan.ColDef{Name: "v", Typ: plan.Type{Id: int32(typId)}}
+		err := fillConstantVector(vec, "[1,2,3]", col, 1, proc, "/t")
+		require.Error(t, err)
+		assert.Contains(t, err.Error(), "unsupported")
+		vec.Free(nil)
+	}
+}
+
+func TestFillConstantVector_UnsupportedTypeDefaultBranch(t *testing.T) {
+	// Use T_any which is not in the switch → hits default branch.
+	proc := testutil.NewProc(t)
+	vec := vector.NewVec(types.T_any.ToType())
+	col := &plan.ColDef{Name: "x", Typ: plan.Type{Id: int32(types.T_any)}}
+	err := fillConstantVector(vec, "whatever", col, 1, proc, "/t")
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "unsupported")
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_SetStoredAsUint64(t *testing.T) {
+	// SET is encoded as T_uint64 with non-empty Enumvalues → ParseSet branch.
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_uint64.ToType())
+	col := &plan.ColDef{Name: "s",
+		Typ: plan.Type{Id: int32(types.T_uint64), Enumvalues: "a,b,c"}}
+	require.NoError(t, fillConstantVector(vec, "b", col, 1, proc, "/t"))
+	vec.Free(mp)
+
+	// Unknown member → parse error
+	vec = vector.NewVec(types.T_uint64.ToType())
+	require.Error(t, fillConstantVector(vec, "zzz", col, 1, proc, "/t"))
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_Enum(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_enum.ToType())
+	col := &plan.ColDef{Name: "e",
+		Typ: plan.Type{Id: int32(types.T_enum), Enumvalues: "red,green,blue"}}
+	require.NoError(t, fillConstantVector(vec, "green", col, 1, proc, "/t"))
+	vec.Free(mp)
+
+	vec = vector.NewVec(types.T_enum.ToType())
+	require.Error(t, fillConstantVector(vec, "purple", col, 1, proc, "/t"))
+	vec.Free(nil)
+}
+
+// ---------------------------------------------------------------------------
+// fillVirtualColumns — both branches (filepath only, combined)
+// ---------------------------------------------------------------------------
+
+func TestFillVirtualColumns_FilepathOnly(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+
+	// One filepath column in batch.
+	fpVec := vector.NewVec(types.T_varchar.ToType())
+	bat := &batch.Batch{Vecs: []*vector.Vector{fpVec}}
+	bat.SetRowCount(5)
+
+	param := &ExternalParam{}
+	param.Fileparam = &ExFileparam{Filepath: "/data/year=2024/f.parquet"}
+	param.Cols = []*plan.ColDef{
+		{Name: catalog.ExternalFilePath, Typ: plan.Type{Id: int32(types.T_varchar)}},
+	}
+
+	h := &ParquetHandler{filepathColIndex: 0}
+	require.NoError(t, h.fillVirtualColumns(bat, param, proc))
+	got := fpVec.GetBytesAt(0)
+	assert.Equal(t, "/data/year=2024/f.parquet", string(got))
+	fpVec.Free(mp)
+}
+
+func TestFillVirtualColumns_FilepathAndPartition(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+
+	// Batch has [filepath varchar, partition int32].
+	fpVec := vector.NewVec(types.T_varchar.ToType())
+	partVec := vector.NewVec(types.T_int32.ToType())
+	bat := &batch.Batch{Vecs: []*vector.Vector{fpVec, partVec}}
+	bat.SetRowCount(3)
+
+	param := &ExternalParam{}
+	param.Fileparam = &ExFileparam{Filepath: "/data/year=2024/f.parquet"}
+	param.Cols = []*plan.ColDef{
+		{Name: catalog.ExternalFilePath, Typ: plan.Type{Id: int32(types.T_varchar)}},
+		{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)}},
+	}
+	param.Ctx = t.Context()
+	param.currentPartValues = map[string]string{"year": "2024"}
+
+	h := &ParquetHandler{filepathColIndex: 0, partitionColIndices: []int{1}}
+	require.NoError(t, h.fillVirtualColumns(bat, param, proc))
+
+	assert.Equal(t, "/data/year=2024/f.parquet", string(fpVec.GetBytesAt(0)))
+	pv := vector.MustFixedColNoTypeCheck[int32](partVec)
+	assert.Equal(t, int32(2024), pv[0])
+	fpVec.Free(mp)
+	partVec.Free(mp)
+}
+
+func TestFillVirtualColumns_NoFilepathNoPartitionNoop(t *testing.T) {
+	// Neither filepath nor partition columns configured → early return, nil err.
+	proc := testutil.NewProc(t)
+	bat := &batch.Batch{}
+	bat.SetRowCount(0)
+	param := &ExternalParam{}
+	param.Fileparam = &ExFileparam{Filepath: "/x"}
+
+	h := &ParquetHandler{filepathColIndex: -1}
+	assert.NoError(t, h.fillVirtualColumns(bat, param, proc))
+}
+
+// ---------------------------------------------------------------------------
+// relPartitionPath edge cases
+// ---------------------------------------------------------------------------
+
+func TestRelPartitionPath_EdgeCases(t *testing.T) {
+	// Equal → empty string
+	assert.Equal(t, "", relPartitionPath("/data", "/data"))
+
+	// Not under base — return normalized filePath unchanged.
+	assert.Equal(t, "/other/y=2024/f", relPartitionPath("/other/y=2024/f", "/data"))
+
+	// Under base — return tail.
+	assert.Equal(t, "y=2024/f", relPartitionPath("/data/y=2024/f", "/data"))
+}
+
+// ---------------------------------------------------------------------------
+// getPartColName — non-col expression and fallback name-strip
+// ---------------------------------------------------------------------------
+
+func TestGetPartColName_NonColReturnsFalse(t *testing.T) {
+	td := makeTableDef("year")
+	partColSet := map[string]bool{"year": true}
+	lit := makeLitInt64(2024)
+	_, ok := getPartColName(td, lit, partColSet)
+	assert.False(t, ok)
+}
+
+func TestGetPartColName_ColPosOutOfRangeFallback(t *testing.T) {
+	td := makeTableDef("year")
+	partColSet := map[string]bool{"year": true}
+	// ColPos way out of range; name fallback strips "t." prefix.
+	expr := makeColExpr(99, "t.year")
+	name, ok := getPartColName(td, expr, partColSet)
+	assert.True(t, ok)
+	assert.Equal(t, "year", name)
+}
+
+func TestGetPartColName_NonPartitionRejected(t *testing.T) {
+	td := makeTableDef("other")
+	partColSet := map[string]bool{"year": true}
+	expr := makeColExpr(0, "other")
+	_, ok := getPartColName(td, expr, partColSet)
+	assert.False(t, ok)
+}
+
+// ---------------------------------------------------------------------------
+// tryExtractIn edge cases: wrong arity, non-list/vec right-hand side
+// ---------------------------------------------------------------------------
+
+func TestTryExtractIn_WrongArityReturnsFalse(t *testing.T) {
+	td := makeTableDef("year")
+	partColSet := map[string]bool{"year": true}
+	// Only one arg instead of two.
+	_, ok := tryExtractIn(td, []*plan.Expr{makeColExpr(0, "year")}, partColSet)
+	assert.False(t, ok)
+}
+
+func TestTryExtractIn_ColIsNotPartitionRejects(t *testing.T) {
+	td := makeTableDef("other")
+	partColSet := map[string]bool{"year": true}
+	listExpr := &plan.Expr{Expr: &plan.Expr_List{List: &plan.ExprList{List: []*plan.Expr{makeLitInt64(1)}}}}
+	_, ok := tryExtractIn(td, []*plan.Expr{makeColExpr(0, "other"), listExpr}, partColSet)
+	assert.False(t, ok)
+}
+
+func TestTryExtractIn_EmptyListRejects(t *testing.T) {
+	td := makeTableDef("year")
+	partColSet := map[string]bool{"year": true}
+	emptyList := &plan.Expr{Expr: &plan.Expr_List{List: &plan.ExprList{List: nil}}}
+	_, ok := tryExtractIn(td, []*plan.Expr{makeColExpr(0, "year"), emptyList}, partColSet)
+	assert.False(t, ok)
+}
+
+func TestTryExtractIn_UnsupportedRhsKindRejects(t *testing.T) {
+	td := makeTableDef("year")
+	partColSet := map[string]bool{"year": true}
+	// Right-hand side is neither Expr_List nor Expr_Vec.
+	nonList := makeLitInt64(42)
+	_, ok := tryExtractIn(td, []*plan.Expr{makeColExpr(0, "year"), nonList}, partColSet)
+	assert.False(t, ok)
+}
+
+// ---------------------------------------------------------------------------
+// tryExtractPredicate non-supported fid rejection (not EQ / IN)
+// ---------------------------------------------------------------------------
+
+func TestTryExtractPredicate_NonEqInFid(t *testing.T) {
+	td := makeTableDef("year")
+	partColSet := map[string]bool{"year": true}
+	gtExpr := &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: int64(function.GREAT_THAN) << 32},
+			Args: []*plan.Expr{makeColExpr(0, "year"), makeLitInt64(2024)},
+		}},
+	}
+	_, ok := tryExtractPredicate(td, gtExpr, partColSet)
+	assert.False(t, ok)
+}
+
+func TestTryExtractPredicate_NotAnExprF(t *testing.T) {
+	td := makeTableDef("year")
+	partColSet := map[string]bool{"year": true}
+	_, ok := tryExtractPredicate(td, makeLitInt64(1), partColSet)
+	assert.False(t, ok)
+}
+
+// Touch math so the import survives even if the file is thinned.
+var _ = math.MaxInt32
diff --git a/pkg/sql/colexec/external/hive_partition_fill.go b/pkg/sql/colexec/external/hive_partition_fill.go
new file mode 100644
index 0000000000000..018f6020b9c6f
--- /dev/null
+++ b/pkg/sql/colexec/external/hive_partition_fill.go
@@ -0,0 +1,430 @@
+// Copyright 2024 Matrix Origin
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package external
+
+import (
+	"errors"
+	"math"
+	"strconv"
+	"strings"
+
+	"github.com/matrixorigin/matrixone/pkg/common/moerr"
+	"github.com/matrixorigin/matrixone/pkg/container/batch"
+	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"github.com/matrixorigin/matrixone/pkg/container/vector"
+	"github.com/matrixorigin/matrixone/pkg/pb/plan"
+	"github.com/matrixorigin/matrixone/pkg/vm/process"
+)
+
+// isHivePartitionCol returns true if colName is a declared Hive partition column.
+func (param *ExternalParam) isHivePartitionCol(colName string) bool {
+	if param.Extern == nil || !param.Extern.HivePartitioning {
+		return false
+	}
+	lower := strings.ToLower(colName)
+	for _, pc := range param.Extern.HivePartitionCols {
+		if pc == lower {
+			return true
+		}
+	}
+	return false
+}
+
+// refreshPartitionValues extracts partition values from the current file path.
+func (param *ExternalParam) refreshPartitionValues() error {
+	if param.Extern == nil || !param.Extern.HivePartitioning {
+		return nil
+	}
+	values, err := ExtractPartitionValues(
+		param.Fileparam.Filepath,
+		param.Extern.Filepath,
+		param.Extern.HivePartitionCols,
+	)
+	if err != nil {
+		return err
+	}
+	param.currentPartValues = values
+	return nil
+}
+
+// fillVirtualColumns fills partition columns and __mo_filepath for a batch.
+func (h *ParquetHandler) fillVirtualColumns(bat *batch.Batch, param *ExternalParam, proc *process.Process) error {
+	rowCount := bat.RowCount()
+	mp := proc.Mp()
+
+	if h.filepathColIndex >= 0 {
+		vec := bat.Vecs[h.filepathColIndex]
+		if err := vector.SetConstBytes(vec, []byte(param.Fileparam.Filepath), rowCount, mp); err != nil {
+			return err
+		}
+	}
+
+	if len(h.partitionColIndices) > 0 {
+		return h.fillPartitionColumns(bat, param, proc)
+	}
+	return nil
+}
+
+// fillPartitionColumns fills partition column vectors with constant values from the path.
+func (h *ParquetHandler) fillPartitionColumns(bat *batch.Batch, param *ExternalParam, proc *process.Process) error {
+	partValues := param.currentPartValues
+	rowCount := bat.RowCount()
+	mp := proc.Mp()
+
+	// Error messages use the path relative to the DDL base so that BVT output
+	// is portable across machines (absolute filesystem paths would embed
+	// /Users/... or /tmp/... in .result files).
+	relPath := param.Fileparam.Filepath
+	if param.Extern != nil && param.Extern.Filepath != "" {
+		relPath = relPartitionPath(param.Fileparam.Filepath, param.Extern.Filepath)
+	}
+
+	for _, idx := range h.partitionColIndices {
+		col := param.Cols[idx]
+		colName := strings.ToLower(col.Name)
+		strVal, present := partValues[colName]
+		vec := bat.Vecs[idx]
+
+		if !present {
+			return moerr.NewInternalErrorf(param.Ctx,
+				"partition column '%s' not found in path '%s'", colName, relPath)
+		}
+
+		if strVal == HiveDefaultPartition {
+			notNullable := col.Default != nil && !col.Default.NullAbility
+			if notNullable {
+				return moerr.NewConstraintViolationf(param.Ctx,
+					"partition column '%s' is NOT NULL but directory has __HIVE_DEFAULT_PARTITION__ in path '%s'; allow NULL on the partition column or remove/rename the default partition directory",
+					colName, relPath)
+			}
+			if err := vector.SetConstNull(vec, rowCount, mp); err != nil {
+				return err
+			}
+			continue
+		}
+
+		if err := fillConstantVector(vec, strVal, col, rowCount, proc, relPath); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// fillConstantVector converts a string partition value to the column's typed vector.
+// Follows external loader semantics (getColData path), not SQL CAST.
+func fillConstantVector(
+	vec *vector.Vector, strVal string, col *plan.ColDef,
+	rowCount int, proc *process.Process, filePath string,
+) error {
+	mp := proc.Mp()
+	typ := types.T(col.Typ.Id)
+
+	wrapErr := func(err error) error {
+		return moerr.NewInternalErrorf(proc.Ctx,
+			"partition value type conversion failed: col=%s, value='%s', path=%s: %v",
+			col.Name, strVal, filePath, err)
+	}
+
+	switch typ {
+	case types.T_int8:
+		v, err := parseIntWithFloatFallback(strVal, 8)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, int8(v), rowCount, mp)
+
+	case types.T_int16:
+		v, err := parseIntWithFloatFallback(strVal, 16)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, int16(v), rowCount, mp)
+
+	case types.T_int32:
+		v, err := parseIntWithFloatFallback(strVal, 32)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, int32(v), rowCount, mp)
+
+	case types.T_int64:
+		v, err := parseIntWithFloatFallback(strVal, 64)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, v, rowCount, mp)
+
+	case types.T_uint8:
+		v, err := parseUintWithFloatFallback(strVal, 8)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, uint8(v), rowCount, mp)
+
+	case types.T_uint16:
+		v, err := parseUintWithFloatFallback(strVal, 16)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, uint16(v), rowCount, mp)
+
+	case types.T_uint32:
+		v, err := parseUintWithFloatFallback(strVal, 32)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, uint32(v), rowCount, mp)
+
+	case types.T_uint64:
+		if col.Typ.Enumvalues != "" {
+			// SET type stored as uint64 with Enumvalues
+			v, err := types.ParseSet(col.Typ.Enumvalues, strVal)
+			if err != nil {
+				return wrapErr(err)
+			}
+			return vector.SetConstFixed(vec, v, rowCount, mp)
+		}
+		v, err := parseUintWithFloatFallback(strVal, 64)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, v, rowCount, mp)
+
+	case types.T_bit:
+		v, err := strconv.ParseUint(strVal, 10, int(col.Typ.Width))
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, v, rowCount, mp)
+
+	case types.T_float32:
+		v, err := strconv.ParseFloat(strVal, 32)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, float32(v), rowCount, mp)
+
+	case types.T_float64:
+		v, err := strconv.ParseFloat(strVal, 64)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, v, rowCount, mp)
+
+	case types.T_decimal64:
+		v, err := types.ParseDecimal64(strVal, col.Typ.Width, col.Typ.Scale)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, v, rowCount, mp)
+
+	case types.T_decimal128:
+		v, err := types.ParseDecimal128(strVal, col.Typ.Width, col.Typ.Scale)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, v, rowCount, mp)
+
+	case types.T_date:
+		v, err := types.ParseDateCast(strVal)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, v, rowCount, mp)
+
+	case types.T_datetime:
+		v, err := types.ParseDatetime(strVal, col.Typ.Scale)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, v, rowCount, mp)
+
+	case types.T_timestamp:
+		v, err := types.ParseTimestamp(proc.GetSessionInfo().TimeZone, strVal, col.Typ.Scale)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, v, rowCount, mp)
+
+	case types.T_time:
+		v, err := types.ParseTime(strVal, col.Typ.Scale)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, v, rowCount, mp)
+
+	case types.T_bool:
+		v, err := types.ParseBool(strVal)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, v, rowCount, mp)
+
+	case types.T_enum:
+		v, err := types.ParseEnum(col.Typ.Enumvalues, strVal)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, v, rowCount, mp)
+
+	case types.T_char, types.T_varchar, types.T_text,
+		types.T_blob, types.T_binary, types.T_varbinary, types.T_datalink:
+		return vector.SetConstBytes(vec, []byte(strVal), rowCount, mp)
+
+	case types.T_json:
+		v, err := types.ParseStringToByteJson(strVal)
+		if err != nil {
+			return wrapErr(err)
+		}
+		bs, err := v.Marshal()
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstBytes(vec, bs, rowCount, mp)
+
+	case types.T_uuid:
+		v, err := types.ParseUuid(strVal)
+		if err != nil {
+			return wrapErr(err)
+		}
+		return vector.SetConstFixed(vec, v, rowCount, mp)
+
+	case types.T_array_float32, types.T_array_float64:
+		return moerr.NewNotSupportedf(proc.Ctx,
+			"unsupported partition column type VECTOR for col=%s, path=%s", col.Name, filePath)
+
+	default:
+		return moerr.NewNotSupportedf(proc.Ctx,
+			"unsupported partition column type %v for col=%s, path=%s", typ, col.Name, filePath)
+	}
+}
+
+// Float-domain boundary constants for 64-bit overflow detection.
+//
+// math.MaxInt64 = 2^63 - 1 and math.MaxUint64 = 2^64 - 1 are odd numbers that
+// cannot be exactly represented in float64 (only 53 bits of mantissa). Comparing
+// float64(f) > math.MaxInt64 silently compares against 2^63 (the nearest float64),
+// which lets f == 2^63 slip through and then int64(f) wraps to -2^63.
+//
+// Using the exact float64 values 2^63 and 2^64 as strict upper bounds closes
+// this gap: any f ≥ 2^63 is out of int64 range; any f ≥ 2^64 is out of uint64
+// range. Both 2^63 and 2^64 are exactly representable in float64 (powers of 2).
+//
+// Additionally, float64 can represent consecutive integers exactly only up to
+// 2^53. Beyond that, distinct source integers collapse to the same float64
+// (e.g. "-9223372036854775809.0" and "-9223372036854775808.0" both parse to
+// -2^63). When we reach the float fallback for a 64-bit target, we therefore
+// reject any |f| ≥ 2^53: genuine integer strings would have succeeded in
+// ParseInt and never reached this path, so a float in the non-exact range
+// here implies the source string is a non-integer (decimal/exponent form)
+// that we cannot safely round to int64/uint64.
+const (
+	float64MaxInt64Exclusive  = 0x1p63 // 2^63, one past max int64
+	float64MaxUint64Exclusive = 0x1p64 // 2^64, one past max uint64
+	float64IntExactLimit      = 0x1p53 // 2^53, largest |x| with consecutive-integer precision
+)
+
+// parseIntWithFloatFallback mimics getColData behavior (external.go:1019):
+// 1. ParseInt succeeds → use it
+// 2. ParseInt fails with ErrRange → reject (no fallback for overflow)
+// 3. ParseInt fails otherwise → ParseFloat + range check
+func parseIntWithFloatFallback(s string, bitSize int) (int64, error) {
+	v, err := strconv.ParseInt(s, 10, bitSize)
+	if err == nil {
+		return v, nil
+	}
+	if errors.Is(err, strconv.ErrRange) {
+		return 0, err
+	}
+	f, ferr := strconv.ParseFloat(s, 64)
+	if ferr != nil {
+		return 0, err
+	}
+	// Reject non-finite values ("nan" / "inf" parse successfully to NaN/±Inf).
+	// NaN in particular is dangerous: NaN < x and NaN >= x are both false, so
+	// the range checks below silently accept it and int64(NaN) is undefined.
+	if math.IsNaN(f) || math.IsInf(f, 0) {
+		return 0, err
+	}
+	if bitSize == 64 {
+		// Exact-float boundaries: MaxInt64 rounds up to 2^63 in float64.
+		// Also reject any |f| ≥ 2^53 — beyond float64's consecutive-integer
+		// precision, round-trip through float is ambiguous (see const comment).
+		if f < math.MinInt64 || f >= float64MaxInt64Exclusive {
+			return 0, err
+		}
+		if f >= float64IntExactLimit || f <= -float64IntExactLimit {
+			return 0, err
+		}
+	} else {
+		// Compare f against the type's float-domain bounds BEFORE truncation.
+		// Go's int64(f) truncates toward zero, so a naive post-truncation
+		// bounds check silently accepts e.g. int32 "-2147483648.9" (truncates
+		// to -2^31, which passes a ">= -2^31" check). Both ±(2^(N-1)) and
+		// (2^(N-1) - 1) are exactly representable in float64 for N ≤ 53, so
+		// the inclusive comparison has no rounding slack.
+		lo := float64(int64(-1) << (bitSize - 1)) // -2^(N-1)
+		hi := float64(int64(1)<<(bitSize-1) - 1)  //  2^(N-1) - 1
+		if f < lo || f > hi {
+			return 0, err
+		}
+	}
+	return int64(f), nil
+}
+
+// parseUintWithFloatFallback mimics getColData behavior (external.go:1111):
+// 1. ParseUint succeeds → use it
+// 2. ParseUint fails with ErrRange → reject (no fallback for overflow)
+// 3. ParseUint fails otherwise → ParseFloat + range check
+func parseUintWithFloatFallback(s string, bitSize int) (uint64, error) {
+	v, err := strconv.ParseUint(s, 10, bitSize)
+	if err == nil {
+		return v, nil
+	}
+	if errors.Is(err, strconv.ErrRange) {
+		return 0, err
+	}
+	f, ferr := strconv.ParseFloat(s, 64)
+	if ferr != nil || f < 0 {
+		return 0, err
+	}
+	// Reject non-finite values (same NaN/Inf risk as parseIntWithFloatFallback).
+	// Note: f < 0 above already rejects -Inf, but NaN is not ordered so must be
+	// handled explicitly.
+	if math.IsNaN(f) || math.IsInf(f, 0) {
+		return 0, err
+	}
+	if bitSize == 64 {
+		// Same boundary issue as int64 (MaxUint64 rounds up to 2^64) plus the
+		// 2^53 precision limit for round-tripping integers through float64.
+		if f >= float64MaxUint64Exclusive {
+			return 0, err
+		}
+		if f >= float64IntExactLimit {
+			return 0, err
+		}
+	} else {
+		// Compare f against MaxUintN BEFORE truncation, to reject values like
+		// uint32 "4294967295.9" that Go's uint64(f) would truncate to 2^32 - 1
+		// and let through. 2^N - 1 is exactly representable in float64 for
+		// N ≤ 53, so the inclusive upper bound has no rounding slack. The
+		// lower bound is implicit: f < 0 is rejected above.
+		hi := float64(uint64(1)<<bitSize - 1) // 2^N - 1
+		if f > hi {
+			return 0, err
+		}
+	}
+	return uint64(f), nil
+}
diff --git a/pkg/sql/colexec/external/hive_partition_test.go b/pkg/sql/colexec/external/hive_partition_test.go
new file mode 100644
index 0000000000000..94be94b01929d
--- /dev/null
+++ b/pkg/sql/colexec/external/hive_partition_test.go
@@ -0,0 +1,1757 @@
+// Copyright 2024 Matrix Origin
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package external
+
+import (
+	"context"
+	"fmt"
+	"iter"
+	"strings"
+	"testing"
+
+	"github.com/matrixorigin/matrixone/pkg/catalog"
+	"github.com/matrixorigin/matrixone/pkg/container/batch"
+	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"github.com/matrixorigin/matrixone/pkg/container/vector"
+	"github.com/matrixorigin/matrixone/pkg/fileservice"
+	"github.com/matrixorigin/matrixone/pkg/pb/plan"
+	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
+	"github.com/matrixorigin/matrixone/pkg/sql/plan/function"
+	"github.com/matrixorigin/matrixone/pkg/testutil"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// --- ParseHivePartitionSegment tests ---
+
+func TestParseHivePartitionSegment_Valid(t *testing.T) {
+	seg, isHive, err := ParseHivePartitionSegment("year=2024")
+	require.NoError(t, err)
+	assert.True(t, isHive)
+	assert.Equal(t, "year", seg.Key)
+	assert.Equal(t, "2024", seg.Value)
+}
+
+func TestParseHivePartitionSegment_EmptyValue(t *testing.T) {
+	seg, isHive, err := ParseHivePartitionSegment("year=")
+	require.NoError(t, err)
+	assert.True(t, isHive)
+	assert.Equal(t, "year", seg.Key)
+	assert.Equal(t, "", seg.Value)
+}
+
+func TestParseHivePartitionSegment_PercentLiteral(t *testing.T) {
+	seg, isHive, err := ParseHivePartitionSegment("country=US%2FCA")
+	require.NoError(t, err)
+	assert.True(t, isHive)
+	assert.Equal(t, "country", seg.Key)
+	assert.Equal(t, "US%2FCA", seg.Value)
+}
+
+func TestParseHivePartitionSegment_NotPartition(t *testing.T) {
+	_, isHive, err := ParseHivePartitionSegment("data.parquet")
+	require.NoError(t, err)
+	assert.False(t, isHive)
+}
+
+func TestParseHivePartitionSegment_StartsWithEquals(t *testing.T) {
+	_, isHive, err := ParseHivePartitionSegment("=value")
+	require.NoError(t, err)
+	assert.False(t, isHive)
+}
+
+func TestParseHivePartitionSegment_InvalidPercentLiteral(t *testing.T) {
+	seg, isHive, err := ParseHivePartitionSegment("country=US%ZZ")
+	assert.True(t, isHive)
+	require.NoError(t, err)
+	assert.Equal(t, "country", seg.Key)
+	assert.Equal(t, "US%ZZ", seg.Value)
+}
+
+func TestParseHivePartitionSegment_DefaultPartition(t *testing.T) {
+	seg, isHive, err := ParseHivePartitionSegment("year=__HIVE_DEFAULT_PARTITION__")
+	require.NoError(t, err)
+	assert.True(t, isHive)
+	assert.Equal(t, "__HIVE_DEFAULT_PARTITION__", seg.Value)
+}
+
+// --- ExtractPartitionValues tests ---
+
+func TestExtractPartitionValues_SingleLevel(t *testing.T) {
+	vals, err := ExtractPartitionValues(
+		"/warehouse/data/year=2024/file.parquet",
+		"/warehouse/data",
+		[]string{"year"},
+	)
+	require.NoError(t, err)
+	assert.Equal(t, "2024", vals["year"])
+}
+
+func TestExtractPartitionValues_MultiLevel(t *testing.T) {
+	vals, err := ExtractPartitionValues(
+		"/warehouse/data/year=2024/month=05/file.parquet",
+		"/warehouse/data",
+		[]string{"year", "month"},
+	)
+	require.NoError(t, err)
+	assert.Equal(t, "2024", vals["year"])
+	assert.Equal(t, "05", vals["month"])
+}
+
+func TestExtractPartitionValues_NormalizePath(t *testing.T) {
+	tests := []struct {
+		filePath string
+		basePath string
+	}{
+		{"warehouse/data/year=2025/f.parquet", "warehouse/data"},
+		{"/warehouse/data/year=2025/f.parquet", "/warehouse/data"},
+		{"warehouse/data/year=2025/f.parquet", "/warehouse/data"},
+		{"/warehouse/data/year=2025/f.parquet", "warehouse/data"},
+		{"  /warehouse/data/year=2025/f.parquet  ", "  warehouse/data  "},
+	}
+	for _, tt := range tests {
+		vals, err := ExtractPartitionValues(tt.filePath, tt.basePath, []string{"year"})
+		require.NoError(t, err, "filePath=%q basePath=%q", tt.filePath, tt.basePath)
+		assert.Equal(t, "2025", vals["year"], "filePath=%q basePath=%q", tt.filePath, tt.basePath)
+	}
+}
+
+func TestExtractPartitionValues_PrefixCollision(t *testing.T) {
+	_, err := ExtractPartitionValues(
+		"/warehouse/data2/year=2025/f.parquet",
+		"/warehouse/data",
+		[]string{"year"},
+	)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "not under base path")
+}
+
+// --- IsHiddenFile tests ---
+
+func TestIsHiddenFile(t *testing.T) {
+	assert.True(t, IsHiddenFile(".hidden"))
+	assert.True(t, IsHiddenFile("_SUCCESS"))
+	assert.True(t, IsHiddenFile("_metadata"))
+	assert.False(t, IsHiddenFile("year=2024"))
+	assert.False(t, IsHiddenFile("data.parquet"))
+	assert.False(t, IsHiddenFile(""))
+}
+
+// --- IsParquetFile tests ---
+
+func TestIsParquetFile(t *testing.T) {
+	assert.True(t, IsParquetFile("data.parquet"))
+	assert.True(t, IsParquetFile("data.snappy.parquet"))
+	assert.True(t, IsParquetFile("DATA.PARQUET"))
+	assert.False(t, IsParquetFile("data.csv"))
+	assert.False(t, IsParquetFile("data.parquet.crc"))
+	assert.False(t, IsParquetFile(""))
+}
+
+// --- matchPartitionValue tests ---
+
+func TestMatchPartitionValue_IntMatch(t *testing.T) {
+	ct := tree.HivePartColType{Id: int32(types.T_int32)}
+	assert.Equal(t, MatchTrue, matchPartitionValue("2024", []string{"2024"}, ct))
+	assert.Equal(t, MatchFalse, matchPartitionValue("2024", []string{"2025"}, ct))
+	assert.Equal(t, MatchTrue, matchPartitionValue("2024", []string{"2023", "2024"}, ct))
+}
+
+func TestMatchPartitionValue_IntOverflow(t *testing.T) {
+	ct := tree.HivePartColType{Id: int32(types.T_int8)}
+	assert.Equal(t, MatchUnknown, matchPartitionValue("999", []string{"999"}, ct))
+}
+
+func TestMatchPartitionValue_IntParseError(t *testing.T) {
+	ct := tree.HivePartColType{Id: int32(types.T_int32)}
+	assert.Equal(t, MatchUnknown, matchPartitionValue("abc", []string{"123"}, ct))
+}
+
+func TestMatchPartitionValue_UintMatch(t *testing.T) {
+	ct := tree.HivePartColType{Id: int32(types.T_uint32)}
+	assert.Equal(t, MatchTrue, matchPartitionValue("100", []string{"100"}, ct))
+	assert.Equal(t, MatchFalse, matchPartitionValue("100", []string{"200"}, ct))
+}
+
+func TestMatchPartitionValue_VarcharExact(t *testing.T) {
+	ct := tree.HivePartColType{Id: int32(types.T_varchar)}
+	assert.Equal(t, MatchTrue, matchPartitionValue("US", []string{"US"}, ct))
+	assert.Equal(t, MatchUnknown, matchPartitionValue("us", []string{"US"}, ct))
+}
+
+func TestMatchPartitionValue_UnknownTypes(t *testing.T) {
+	unknownTypes := []types.T{
+		types.T_bool, types.T_float32, types.T_float64,
+		types.T_decimal64, types.T_date, types.T_datetime,
+		types.T_timestamp, types.T_json, types.T_uuid,
+	}
+	for _, typ := range unknownTypes {
+		ct := tree.HivePartColType{Id: int32(typ)}
+		assert.Equal(t, MatchUnknown, matchPartitionValue("val", []string{"val"}, ct),
+			"type %v should return MatchUnknown", typ)
+	}
+}
+
+func TestMatchPartitionValue_TAny(t *testing.T) {
+	ct := tree.HivePartColType{Id: int32(types.T_any)}
+	assert.Equal(t, MatchUnknown, matchPartitionValue("2024", []string{"2024"}, ct))
+}
+
+func TestMatchPartitionValue_ZeroPaddedInt(t *testing.T) {
+	ct := tree.HivePartColType{Id: int32(types.T_int32)}
+	assert.Equal(t, MatchTrue, matchPartitionValue("01", []string{"1"}, ct))
+	assert.Equal(t, MatchTrue, matchPartitionValue("007", []string{"7"}, ct))
+}
+
+func TestMatchPartitionValue_ZeroPaddedVarcharConservative(t *testing.T) {
+	ct := tree.HivePartColType{Id: int32(types.T_varchar)}
+	assert.Equal(t, MatchTrue, matchPartitionValue("01", []string{"01"}, ct))
+	assert.Equal(t, MatchUnknown, matchPartitionValue("01", []string{"1"}, ct),
+		"varchar partitions keep string semantics; a mismatch is not pruned away")
+}
+
+// --- DiscoverHivePartitions tests ---
+
+func mockListDir(dirs map[string][]fileservice.DirEntry) ListDirFunc {
+	return func(ctx context.Context, prefix string) iter.Seq2[*fileservice.DirEntry, error] {
+		return func(yield func(*fileservice.DirEntry, error) bool) {
+			entries := dirs[prefix]
+			for i := range entries {
+				if !yield(&entries[i], nil) {
+					return
+				}
+			}
+		}
+	}
+}
+
+func TestDiscoverHivePartitions_SingleLevel(t *testing.T) {
+	dirs := map[string][]fileservice.DirEntry{
+		"/data": {
+			{Name: "year=2024", IsDir: true},
+			{Name: "year=2025", IsDir: true},
+			{Name: "_SUCCESS", IsDir: false},
+		},
+		"/data/year=2024": {
+			{Name: "part-0000.parquet", IsDir: false, Size: 1000},
+			{Name: ".hidden", IsDir: false, Size: 100},
+		},
+		"/data/year=2025": {
+			{Name: "part-0000.parquet", IsDir: false, Size: 2000},
+		},
+	}
+
+	result, err := DiscoverHivePartitions(
+		context.Background(),
+		mockListDir(dirs),
+		"/data",
+		[]string{"year"},
+		[]tree.HivePartColType{{Id: int32(types.T_int32)}},
+		nil,
+	)
+	require.NoError(t, err)
+	assert.Equal(t, 2, result.PartitionCount)
+	assert.Equal(t, 2, len(result.Files))
+	assert.Equal(t, int64(1000), result.Files[0].FileSize)
+}
+
+func TestDiscoverHivePartitions_MultiLevel(t *testing.T) {
+	dirs := map[string][]fileservice.DirEntry{
+		"/data": {
+			{Name: "year=2024", IsDir: true},
+		},
+		"/data/year=2024": {
+			{Name: "month=01", IsDir: true},
+			{Name: "month=02", IsDir: true},
+		},
+		"/data/year=2024/month=01": {
+			{Name: "data.parquet", IsDir: false, Size: 500},
+		},
+		"/data/year=2024/month=02": {
+			{Name: "data.parquet", IsDir: false, Size: 600},
+		},
+	}
+
+	result, err := DiscoverHivePartitions(
+		context.Background(),
+		mockListDir(dirs),
+		"/data",
+		[]string{"year", "month"},
+		[]tree.HivePartColType{
+			{Id: int32(types.T_int32)},
+			{Id: int32(types.T_int32)},
+		},
+		nil,
+	)
+	require.NoError(t, err)
+	assert.Equal(t, 3, result.PartitionCount) // year=2024 + month=01 + month=02
+	assert.Equal(t, 2, len(result.Files))
+}
+
+func TestDiscoverHivePartitions_WithPredicate(t *testing.T) {
+	dirs := map[string][]fileservice.DirEntry{
+		"/data": {
+			{Name: "year=2024", IsDir: true},
+			{Name: "year=2025", IsDir: true},
+			{Name: "year=2026", IsDir: true},
+		},
+		"/data/year=2025": {
+			{Name: "data.parquet", IsDir: false, Size: 1000},
+		},
+	}
+
+	result, err := DiscoverHivePartitions(
+		context.Background(),
+		mockListDir(dirs),
+		"/data",
+		[]string{"year"},
+		[]tree.HivePartColType{{Id: int32(types.T_int32)}},
+		[]PartitionPredicate{{ColName: "year", Op: PartOpEq, Values: []string{"2025"}}},
+	)
+	require.NoError(t, err)
+	assert.Equal(t, 1, result.PartitionCount)
+	assert.Equal(t, 2, result.PrunedCount)
+	assert.Equal(t, 1, len(result.Files))
+	assert.Equal(t, 2, result.ListCalls) // root dir + year=2025 file listing
+}
+
+func TestDiscoverHivePartitions_SkipsHidden(t *testing.T) {
+	dirs := map[string][]fileservice.DirEntry{
+		"/data": {
+			{Name: "year=2024", IsDir: true},
+			{Name: ".metadata", IsDir: true},
+			{Name: "_temp", IsDir: true},
+		},
+		"/data/year=2024": {
+			{Name: "data.parquet", IsDir: false, Size: 100},
+			{Name: "_SUCCESS", IsDir: false, Size: 0},
+			{Name: ".crc", IsDir: false, Size: 10},
+		},
+	}
+
+	result, err := DiscoverHivePartitions(
+		context.Background(),
+		mockListDir(dirs),
+		"/data",
+		[]string{"year"},
+		[]tree.HivePartColType{{Id: int32(types.T_int32)}},
+		nil,
+	)
+	require.NoError(t, err)
+	assert.Equal(t, 1, len(result.Files))
+}
+
+func TestDiscoverHivePartitions_NormalizePath(t *testing.T) {
+	dirs := map[string][]fileservice.DirEntry{
+		"/warehouse/data": {
+			{Name: "year=2024", IsDir: true},
+		},
+		"/warehouse/data/year=2024": {
+			{Name: "f.parquet", IsDir: false, Size: 100},
+		},
+	}
+
+	result, err := DiscoverHivePartitions(
+		context.Background(),
+		mockListDir(dirs),
+		"warehouse/data/",
+		[]string{"year"},
+		[]tree.HivePartColType{{Id: int32(types.T_int32)}},
+		nil,
+	)
+	require.NoError(t, err)
+	assert.Equal(t, 1, len(result.Files))
+}
+
+func TestDiscoverHivePartitions_NilColTypes(t *testing.T) {
+	dirs := map[string][]fileservice.DirEntry{
+		"/data": {
+			{Name: "year=2024", IsDir: true},
+			{Name: "year=2025", IsDir: true},
+		},
+		"/data/year=2024": {
+			{Name: "f.parquet", IsDir: false, Size: 100},
+		},
+		"/data/year=2025": {
+			{Name: "f.parquet", IsDir: false, Size: 200},
+		},
+	}
+
+	// nil colTypes means old JSON — should still discover all (no pruning possible)
+	result, err := DiscoverHivePartitions(
+		context.Background(),
+		mockListDir(dirs),
+		"/data",
+		[]string{"year"},
+		nil, // triggers T_any fallback
+		[]PartitionPredicate{{ColName: "year", Op: PartOpEq, Values: []string{"2024"}}},
+	)
+	require.NoError(t, err)
+	// T_any → MatchUnknown → no pruning, all partitions kept
+	assert.Equal(t, 2, result.PartitionCount)
+	assert.Equal(t, 0, result.PrunedCount)
+	assert.Equal(t, 2, len(result.Files))
+}
+
+func TestDiscoverHivePartitions_PercentInDirName(t *testing.T) {
+	dirs := map[string][]fileservice.DirEntry{
+		"/data": {
+			{Name: "country=US%2FCA", IsDir: true},
+		},
+	}
+
+	_, err := DiscoverHivePartitions(
+		context.Background(),
+		mockListDir(dirs),
+		"/data",
+		[]string{"country"},
+		[]tree.HivePartColType{{Id: int32(types.T_varchar)}},
+		nil,
+	)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "%")
+}
+
+func TestDiscoverHivePartitions_INPredicate(t *testing.T) {
+	dirs := map[string][]fileservice.DirEntry{
+		"/data": {
+			{Name: "year=2023", IsDir: true},
+			{Name: "year=2024", IsDir: true},
+			{Name: "year=2025", IsDir: true},
+		},
+		"/data/year=2024": {
+			{Name: "f.parquet", IsDir: false, Size: 100},
+		},
+		"/data/year=2025": {
+			{Name: "f.parquet", IsDir: false, Size: 200},
+		},
+	}
+
+	result, err := DiscoverHivePartitions(
+		context.Background(),
+		mockListDir(dirs),
+		"/data",
+		[]string{"year"},
+		[]tree.HivePartColType{{Id: int32(types.T_int32)}},
+		[]PartitionPredicate{{ColName: "year", Op: PartOpIn, Values: []string{"2024", "2025"}}},
+	)
+	require.NoError(t, err)
+	assert.Equal(t, 2, result.PartitionCount)
+	assert.Equal(t, 1, result.PrunedCount)
+	assert.Equal(t, 2, len(result.Files))
+	assert.Equal(t, 3, result.ListCalls) // root + year=2024 files + year=2025 files
+}
+
+func TestDiscoverHivePartitions_KeyMismatchSkipped(t *testing.T) {
+	dirs := map[string][]fileservice.DirEntry{
+		"/data": {
+			{Name: "year=2024", IsDir: true},
+			{Name: "country=US", IsDir: true}, // wrong key for level 0
+		},
+		"/data/year=2024": {
+			{Name: "f.parquet", IsDir: false, Size: 100},
+		},
+	}
+
+	result, err := DiscoverHivePartitions(
+		context.Background(),
+		mockListDir(dirs),
+		"/data",
+		[]string{"year"},
+		[]tree.HivePartColType{{Id: int32(types.T_int32)}},
+		nil,
+	)
+	require.NoError(t, err)
+	assert.Equal(t, 1, result.PartitionCount)
+	assert.Equal(t, 1, len(result.Files))
+}
+
+func TestDiscoverHivePartitions_ListCallLimit(t *testing.T) {
+	// Generate enough partitions at two levels to exceed maxListCalls (10000).
+	// Level 0: 200 year partitions, Level 1: 200 month partitions each.
+	// This requires 1 (root) + 200 (year dirs) = 201 List calls before we hit month level.
+	// To trigger the limit efficiently, use a mock that always returns entries
+	// forcing recursion well beyond the limit.
+	entries := make([]fileservice.DirEntry, 200)
+	for i := range entries {
+		entries[i] = fileservice.DirEntry{Name: fmt.Sprintf("year=%d", i), IsDir: true}
+	}
+	monthEntries := make([]fileservice.DirEntry, 200)
+	for i := range monthEntries {
+		monthEntries[i] = fileservice.DirEntry{Name: fmt.Sprintf("month=%d", i), IsDir: true}
+	}
+	fileEntries := []fileservice.DirEntry{{Name: "f.parquet", IsDir: false, Size: 10}}
+
+	listDir := func(ctx context.Context, prefix string) iter.Seq2[*fileservice.DirEntry, error] {
+		return func(yield func(*fileservice.DirEntry, error) bool) {
+			var items []fileservice.DirEntry
+			if strings.Count(prefix, "/") <= 1 {
+				items = entries
+			} else if strings.Contains(prefix, "month=") {
+				items = fileEntries
+			} else {
+				items = monthEntries
+			}
+			for i := range items {
+				if !yield(&items[i], nil) {
+					return
+				}
+			}
+		}
+	}
+
+	_, err := DiscoverHivePartitions(
+		context.Background(),
+		listDir,
+		"/data",
+		[]string{"year", "month"},
+		[]tree.HivePartColType{
+			{Id: int32(types.T_int32)},
+			{Id: int32(types.T_int32)},
+		},
+		nil,
+	)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "List calls")
+}
+
+// ---------------------------------------------------------------------------
+// ClassifyFilters tests
+// ---------------------------------------------------------------------------
+
+func makeTableDef(cols ...string) *plan.TableDef {
+	td := &plan.TableDef{Cols: make([]*plan.ColDef, len(cols))}
+	for i, name := range cols {
+		td.Cols[i] = &plan.ColDef{Name: name}
+	}
+	return td
+}
+
+func makeColExpr(colPos int32, name string) *plan.Expr {
+	return &plan.Expr{
+		Expr: &plan.Expr_Col{Col: &plan.ColRef{ColPos: colPos, Name: name}},
+	}
+}
+
+func makeLitInt64(val int64) *plan.Expr {
+	return &plan.Expr{
+		Expr: &plan.Expr_Lit{Lit: &plan.Literal{Value: &plan.Literal_I64Val{I64Val: val}}},
+	}
+}
+
+func makeLitString(val string) *plan.Expr {
+	return &plan.Expr{
+		Expr: &plan.Expr_Lit{Lit: &plan.Literal{Value: &plan.Literal_Sval{Sval: val}}},
+	}
+}
+
+func makeEqExpr(left, right *plan.Expr) *plan.Expr {
+	return &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: function.EqualFunctionEncodedID},
+			Args: []*plan.Expr{left, right},
+		}},
+	}
+}
+
+func makeInExpr(col *plan.Expr, vals ...*plan.Expr) *plan.Expr {
+	return &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: function.InFunctionEncodedID},
+			Args: []*plan.Expr{
+				col,
+				{Expr: &plan.Expr_List{List: &plan.ExprList{List: vals}}},
+			},
+		}},
+	}
+}
+
+func TestClassifyFilters_Basic(t *testing.T) {
+	td := makeTableDef("year", "amount", "account", "__mo_filepath")
+	partColSet := map[string]bool{"year": true}
+
+	yearEq := makeEqExpr(makeColExpr(0, "year"), makeLitInt64(2025))
+	amountGt := &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: int64(function.GREAT_THAN) << 32},
+			Args: []*plan.Expr{makeColExpr(1, "amount"), makeLitInt64(100)},
+		}},
+	}
+	fpFilter := makeEqExpr(makeColExpr(3, "__mo_filepath"), makeLitString("/path"))
+
+	partF, fpF, rowF := ClassifyFilters(td, []*plan.Expr{yearEq, amountGt, fpFilter}, partColSet)
+
+	assert.Equal(t, 1, len(partF), "year filter should be in partitionFilters")
+	assert.Equal(t, 1, len(fpF), "__mo_filepath filter should be in filePathFilters")
+	assert.Equal(t, 2, len(rowF), "year+amount should be in rowFilters (year duplicated for safety)")
+	assert.Same(t, yearEq, partF[0])
+	assert.Same(t, fpFilter, fpF[0])
+}
+
+func TestClassifyFilters_PartitionFilterDuplicated(t *testing.T) {
+	td := makeTableDef("year", "data")
+	partColSet := map[string]bool{"year": true}
+
+	yearEq := makeEqExpr(makeColExpr(0, "year"), makeLitInt64(2025))
+	partF, _, rowF := ClassifyFilters(td, []*plan.Expr{yearEq}, partColSet)
+
+	assert.Equal(t, 1, len(partF))
+	assert.Equal(t, 1, len(rowF))
+	assert.Same(t, partF[0], rowF[0], "partition filter must appear in both lists")
+}
+
+func TestClassifyFilters_AccountNamedPartCol(t *testing.T) {
+	td := makeTableDef("account", "data")
+	partColSet := map[string]bool{"account": true}
+
+	acctEq := makeEqExpr(makeColExpr(0, "account"), makeLitString("tenant1"))
+	partF, fpF, _ := ClassifyFilters(td, []*plan.Expr{acctEq}, partColSet)
+
+	assert.Equal(t, 1, len(partF), "account as partition col goes to partitionFilters")
+	assert.Equal(t, 0, len(fpF), "should NOT go to filePathFilters")
+}
+
+func TestClassifyFilters_AccountIdSubstring(t *testing.T) {
+	td := makeTableDef("account_id", "data")
+	partColSet := map[string]bool{}
+
+	acctIdEq := makeEqExpr(makeColExpr(0, "account_id"), makeLitString("123"))
+	_, fpF, rowF := ClassifyFilters(td, []*plan.Expr{acctIdEq}, partColSet)
+
+	assert.Equal(t, 0, len(fpF), "account_id must NOT be mistaken for filepath filter")
+	assert.Equal(t, 1, len(rowF))
+}
+
+func TestClassifyFilters_MixedReference(t *testing.T) {
+	td := makeTableDef("year", "amount")
+	partColSet := map[string]bool{"year": true}
+
+	mixed := &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: int64(function.GREAT_THAN) << 32},
+			Args: []*plan.Expr{makeColExpr(0, "year"), makeColExpr(1, "amount")},
+		}},
+	}
+	partF, fpF, rowF := ClassifyFilters(td, []*plan.Expr{mixed}, partColSet)
+
+	assert.Equal(t, 0, len(partF))
+	assert.Equal(t, 0, len(fpF))
+	assert.Equal(t, 1, len(rowF), "mixed reference goes to rowFilters")
+}
+
+func TestClassifyFilters_MoFilepathCol(t *testing.T) {
+	td := makeTableDef("year", catalog.ExternalFilePath)
+	partColSet := map[string]bool{"year": true}
+
+	fpExpr := makeEqExpr(makeColExpr(1, catalog.ExternalFilePath), makeLitString("x"))
+	_, fpF, _ := ClassifyFilters(td, []*plan.Expr{fpExpr}, partColSet)
+
+	assert.Equal(t, 1, len(fpF))
+}
+
+func TestClassifyFilters_NoColumnRefs(t *testing.T) {
+	td := makeTableDef("year")
+	partColSet := map[string]bool{"year": true}
+
+	constExpr := makeLitInt64(42)
+	_, _, rowF := ClassifyFilters(td, []*plan.Expr{constExpr}, partColSet)
+
+	assert.Equal(t, 1, len(rowF), "constant expression goes to rowFilters")
+}
+
+func TestClassifyFilters_FunctionWrappedPartitionColumnConservative(t *testing.T) {
+	td := makeTableDef("year", "data")
+	partColSet := map[string]bool{"year": true}
+
+	wrappedYear := &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: 123, ObjName: "cast"},
+			Args: []*plan.Expr{makeColExpr(0, "year")},
+		}},
+	}
+	filter := makeEqExpr(wrappedYear, makeLitInt64(2024))
+	partF, fpF, rowF := ClassifyFilters(td, []*plan.Expr{filter}, partColSet)
+
+	require.Len(t, partF, 1, "Expr_F wrappers must still expose the partition column")
+	assert.Empty(t, fpF)
+	require.Len(t, rowF, 1, "row-level filtering remains the correctness backstop")
+	assert.Empty(t, ExtractPartitionPredicatesFromExprs(td, partF, partColSet),
+		"CAST/function-wrapped partition columns are not structurally pruned")
+}
+
+// TestClassifyFilters_AccountAsPhysicalCol guards against classifying a
+// physical column literally named "account" as a filepath pseudo column.
+// The CSV-only per-batch "account" virtual column does not exist on Hive /
+// Parquet external tables; treating it as such would cause row filters on a
+// real column to be silently dropped and evaluated against garbage path
+// synthesis.
+func TestClassifyFilters_AccountAsPhysicalCol(t *testing.T) {
+	td := makeTableDef("account", "amount")
+	partColSet := map[string]bool{} // "account" is NOT a partition column
+
+	acctEq := makeEqExpr(makeColExpr(0, "account"), makeLitString("tenant1"))
+	partF, fpF, rowF := ClassifyFilters(td, []*plan.Expr{acctEq}, partColSet)
+
+	assert.Equal(t, 0, len(partF))
+	assert.Equal(t, 0, len(fpF), "physical column 'account' must NOT be classified as filepath filter")
+	assert.Equal(t, 1, len(rowF), "must be evaluated as a normal row filter")
+}
+
+// TestClassifyFilters_OrFilepathAndLiteral documents the exact scenario that
+// motivated the compile-side fpFilters → rowFilters propagation fix:
+// ClassifyFilters routes OR(__mo_filepath LIKE ..., const) to fpFilters
+// because both operands' col refs are a subset of filePathColSet (the
+// literal contributes no refs). But FilterFileList's judgeContainColname is
+// stricter — it rejects OR branches that don't reference a filepath column
+// — so the filter comes back unconsumed and must be appended to rowFilters
+// by the caller. This test pins the classification half of the contract so
+// future ClassifyFilters changes don't silently break the invariant.
+func TestClassifyFilters_OrFilepathAndLiteral(t *testing.T) {
+	td := makeTableDef("year", catalog.ExternalFilePath)
+	partColSet := map[string]bool{"year": true}
+
+	// OR(__mo_filepath = 'x', false-literal) — no columns on the right arm.
+	orExpr := &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: int64(function.OR) << 32, ObjName: "or"},
+			Args: []*plan.Expr{
+				makeEqExpr(makeColExpr(1, catalog.ExternalFilePath), makeLitString("x")),
+				{Expr: &plan.Expr_Lit{Lit: &plan.Literal{Value: &plan.Literal_Bval{Bval: false}}}},
+			},
+		}},
+	}
+	_, fpF, _ := ClassifyFilters(td, []*plan.Expr{orExpr}, partColSet)
+	assert.Equal(t, 1, len(fpF),
+		"OR(filepath, literal) must go to fpFilters; the compile-side caller is responsible for "+
+			"re-appending it to rowFilters if FilterFileList refuses to consume it")
+}
+
+// TestFilterFileList_LeavesUnconsumedOrFilterInNode locks the exact side-effect
+// contract that compile.getHivePartitionFileList depends on: when FilterFileList
+// is handed an OR(filepath, literal) filter, its judgeContainColname check
+// rejects it (OR branches must each reference a filepath col), and the rejected
+// filter is written back via node.FilterList. compile.go appends tmpNode.FilterList
+// onto rowFilters so the runtime still evaluates the predicate; without that
+// append, the filter is silently dropped. If a future change has FilterFileList
+// consume such filters, or uses a different side-effect pattern (e.g. returning
+// leftover filters), this test goes red and the compile side must be audited.
+func TestFilterFileList_LeavesUnconsumedOrFilterInNode(t *testing.T) {
+	proc := testutil.NewProc(t)
+
+	td := makeTableDef("year", catalog.ExternalFilePath)
+	orExpr := &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: int64(function.OR) << 32, ObjName: "or"},
+			Args: []*plan.Expr{
+				makeEqExpr(makeColExpr(1, catalog.ExternalFilePath), makeLitString("x")),
+				{Expr: &plan.Expr_Lit{Lit: &plan.Literal{Value: &plan.Literal_Bval{Bval: false}}}},
+			},
+		}},
+	}
+	tmpNode := &plan.Node{
+		TableDef:   td,
+		FilterList: []*plan.Expr{orExpr},
+	}
+	fileList := []string{"/warehouse/data/year=2024/f.parquet"}
+	fileSize := []int64{123}
+
+	outFileList, outFileSize, err := FilterFileList(proc.Ctx, tmpNode, proc, fileList, fileSize)
+	require.NoError(t, err)
+
+	// judgeContainColname rejected the OR, so filterList in filterByAccountAndFilename
+	// was empty and the function short-circuited at line 368-370 — fileList / fileSize
+	// come back unchanged.
+	assert.Equal(t, fileList, outFileList)
+	assert.Equal(t, fileSize, outFileSize)
+
+	// And tmpNode.FilterList must still hold the unconsumed predicate. This is
+	// what compile.getHivePartitionFileList `append`s back onto rowFilters.
+	require.Equal(t, 1, len(tmpNode.FilterList),
+		"unconsumed OR(filepath, literal) filter must remain in tmpNode.FilterList")
+	assert.Same(t, orExpr, tmpNode.FilterList[0],
+		"tmpNode.FilterList must hold the exact expression for compile.go to re-append")
+}
+
+// ---------------------------------------------------------------------------
+// collectBareColNames tests
+// ---------------------------------------------------------------------------
+
+func TestCollectBareColNames_ColPos(t *testing.T) {
+	td := makeTableDef("year", "month")
+	expr := makeColExpr(0, "catalog_returns.year")
+	names := collectBareColNames(td, expr)
+	assert.True(t, names["year"], "should resolve via ColPos, not col.Name")
+	assert.False(t, names["catalog_returns.year"])
+}
+
+func TestCollectBareColNames_FallbackStrip(t *testing.T) {
+	td := makeTableDef("year")
+	expr := makeColExpr(99, "t.month")
+	names := collectBareColNames(td, expr)
+	assert.True(t, names["month"], "fallback should strip table prefix")
+}
+
+func TestCollectBareColNames_Nested(t *testing.T) {
+	td := makeTableDef("year", "month")
+	expr := &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: int64(function.GREAT_THAN) << 32},
+			Args: []*plan.Expr{makeColExpr(0, "year"), makeColExpr(1, "month")},
+		}},
+	}
+	names := collectBareColNames(td, expr)
+	assert.True(t, names["year"])
+	assert.True(t, names["month"])
+}
+
+// ---------------------------------------------------------------------------
+// ExtractPartitionPredicatesFromExprs tests
+// ---------------------------------------------------------------------------
+
+func TestExtractPartitionPredicates_Eq(t *testing.T) {
+	td := makeTableDef("year", "data")
+	partColSet := map[string]bool{"year": true}
+
+	yearEq := makeEqExpr(makeColExpr(0, "year"), makeLitInt64(2025))
+	preds := ExtractPartitionPredicatesFromExprs(td, []*plan.Expr{yearEq}, partColSet)
+
+	require.Equal(t, 1, len(preds))
+	assert.Equal(t, "year", preds[0].ColName)
+	assert.Equal(t, PartOpEq, preds[0].Op)
+	assert.Equal(t, []string{"2025"}, preds[0].Values)
+}
+
+func TestExtractPartitionPredicates_EqReversed(t *testing.T) {
+	td := makeTableDef("year", "data")
+	partColSet := map[string]bool{"year": true}
+
+	reversed := makeEqExpr(makeLitInt64(2025), makeColExpr(0, "year"))
+	preds := ExtractPartitionPredicatesFromExprs(td, []*plan.Expr{reversed}, partColSet)
+
+	require.Equal(t, 1, len(preds))
+	assert.Equal(t, "year", preds[0].ColName)
+	assert.Equal(t, []string{"2025"}, preds[0].Values)
+}
+
+func TestExtractPartitionPredicates_In(t *testing.T) {
+	td := makeTableDef("year", "data")
+	partColSet := map[string]bool{"year": true}
+
+	inExpr := makeInExpr(makeColExpr(0, "year"), makeLitInt64(2024), makeLitInt64(2025))
+	preds := ExtractPartitionPredicatesFromExprs(td, []*plan.Expr{inExpr}, partColSet)
+
+	require.Equal(t, 1, len(preds))
+	assert.Equal(t, "year", preds[0].ColName)
+	assert.Equal(t, PartOpIn, preds[0].Op)
+	assert.Equal(t, []string{"2024", "2025"}, preds[0].Values)
+}
+
+func TestExtractPartitionPredicates_InWithStrings(t *testing.T) {
+	td := makeTableDef("country", "data")
+	partColSet := map[string]bool{"country": true}
+
+	inExpr := makeInExpr(makeColExpr(0, "country"), makeLitString("US"), makeLitString("CA"))
+	preds := ExtractPartitionPredicatesFromExprs(td, []*plan.Expr{inExpr}, partColSet)
+
+	require.Equal(t, 1, len(preds))
+	assert.Equal(t, []string{"US", "CA"}, preds[0].Values)
+}
+
+func TestExtractPartitionPredicates_NonStructurable(t *testing.T) {
+	td := makeTableDef("year", "data")
+	partColSet := map[string]bool{"year": true}
+
+	gtExpr := &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: int64(function.GREAT_THAN) << 32},
+			Args: []*plan.Expr{makeColExpr(0, "year"), makeLitInt64(2024)},
+		}},
+	}
+	preds := ExtractPartitionPredicatesFromExprs(td, []*plan.Expr{gtExpr}, partColSet)
+	assert.Equal(t, 0, len(preds), "non EQ/IN should be silently skipped")
+}
+
+func TestExtractPartitionPredicates_RejectsCast(t *testing.T) {
+	td := makeTableDef("year", "data")
+	partColSet := map[string]bool{"year": true}
+
+	castExpr := &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: 123},
+			Args: []*plan.Expr{makeLitInt64(2025)},
+		}},
+	}
+	eqWithCast := makeEqExpr(makeColExpr(0, "year"), castExpr)
+	preds := ExtractPartitionPredicatesFromExprs(td, []*plan.Expr{eqWithCast}, partColSet)
+	assert.Equal(t, 0, len(preds), "Expr_F on constant side should be rejected")
+}
+
+func TestExtractPartitionPredicates_NonPartCol(t *testing.T) {
+	td := makeTableDef("year", "data")
+	partColSet := map[string]bool{"year": true}
+
+	dataEq := makeEqExpr(makeColExpr(1, "data"), makeLitString("foo"))
+	preds := ExtractPartitionPredicatesFromExprs(td, []*plan.Expr{dataEq}, partColSet)
+	assert.Equal(t, 0, len(preds), "non-partition col should not produce predicate")
+}
+
+func TestExtractPartitionPredicates_NullLiteral(t *testing.T) {
+	td := makeTableDef("year", "data")
+	partColSet := map[string]bool{"year": true}
+
+	nullLit := &plan.Expr{
+		Expr: &plan.Expr_Lit{Lit: &plan.Literal{Isnull: true}},
+	}
+	eqNull := makeEqExpr(makeColExpr(0, "year"), nullLit)
+	preds := ExtractPartitionPredicatesFromExprs(td, []*plan.Expr{eqNull}, partColSet)
+	assert.Equal(t, 0, len(preds), "NULL literal should be rejected")
+}
+
+func TestExtractPartitionPredicates_InWithNonLiteral(t *testing.T) {
+	td := makeTableDef("year", "data")
+	partColSet := map[string]bool{"year": true}
+
+	castInList := &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: 123},
+			Args: []*plan.Expr{makeLitInt64(2025)},
+		}},
+	}
+	inExpr := makeInExpr(makeColExpr(0, "year"), makeLitInt64(2024), castInList)
+	preds := ExtractPartitionPredicatesFromExprs(td, []*plan.Expr{inExpr}, partColSet)
+	assert.Equal(t, 0, len(preds), "IN list with non-literal item should be rejected entirely")
+}
+
+func TestExtractPartitionPredicates_InVec(t *testing.T) {
+	td := makeTableDef("year", "data")
+	partColSet := map[string]bool{"year": true}
+
+	// Simulate a folded Expr_Vec (what constant-fold produces from IN list)
+	vec := vector.NewVec(types.T_int32.ToType())
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	require.NoError(t, vector.AppendFixed(vec, int32(2024), false, mp))
+	require.NoError(t, vector.AppendFixed(vec, int32(2025), false, mp))
+	data, err := vec.MarshalBinary()
+	require.NoError(t, err)
+	vec.Free(mp)
+
+	vecExpr := &plan.Expr{
+		Typ:  plan.Type{Id: int32(types.T_int32)},
+		Expr: &plan.Expr_Vec{Vec: &plan.LiteralVec{Len: 2, Data: data}},
+	}
+	colExpr := makeColExpr(0, "year")
+	colExpr.Typ = plan.Type{Id: int32(types.T_int32)}
+	inExpr := &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: function.InFunctionEncodedID},
+			Args: []*plan.Expr{colExpr, vecExpr},
+		}},
+	}
+	preds := ExtractPartitionPredicatesFromExprs(td, []*plan.Expr{inExpr}, partColSet)
+	require.Equal(t, 1, len(preds))
+	assert.Equal(t, PartOpIn, preds[0].Op)
+	assert.Equal(t, []string{"2024", "2025"}, preds[0].Values)
+}
+
+func TestExtractPartitionPredicates_InVecLengthMismatch(t *testing.T) {
+	td := makeTableDef("year", "data")
+	partColSet := map[string]bool{"year": true}
+
+	vec := vector.NewVec(types.T_int32.ToType())
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	require.NoError(t, vector.AppendFixed(vec, int32(2024), false, mp))
+	require.NoError(t, vector.AppendFixed(vec, int32(2025), false, mp))
+	data, err := vec.MarshalBinary()
+	require.NoError(t, err)
+	vec.Free(mp)
+
+	vecExpr := &plan.Expr{
+		Typ:  plan.Type{Id: int32(types.T_int32)},
+		Expr: &plan.Expr_Vec{Vec: &plan.LiteralVec{Len: 3, Data: data}},
+	}
+	colExpr := makeColExpr(0, "year")
+	colExpr.Typ = plan.Type{Id: int32(types.T_int32)}
+	inExpr := &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: function.InFunctionEncodedID},
+			Args: []*plan.Expr{colExpr, vecExpr},
+		}},
+	}
+	preds := ExtractPartitionPredicatesFromExprs(td, []*plan.Expr{inExpr}, partColSet)
+	assert.Empty(t, preds, "LiteralVec length mismatch must disable partition pruning")
+}
+
+func TestExtractPartitionPredicates_InVecVarchar(t *testing.T) {
+	td := makeTableDef("country", "data")
+	partColSet := map[string]bool{"country": true}
+
+	vec := vector.NewVec(types.T_varchar.ToType())
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	require.NoError(t, vector.AppendBytes(vec, []byte("US"), false, mp))
+	require.NoError(t, vector.AppendBytes(vec, []byte("CN"), false, mp))
+	data, err := vec.MarshalBinary()
+	require.NoError(t, err)
+	vec.Free(mp)
+
+	vecExpr := &plan.Expr{
+		Typ:  plan.Type{Id: int32(types.T_varchar)},
+		Expr: &plan.Expr_Vec{Vec: &plan.LiteralVec{Len: 2, Data: data}},
+	}
+	colExpr := makeColExpr(0, "country")
+	colExpr.Typ = plan.Type{Id: int32(types.T_varchar)}
+	inExpr := &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: function.InFunctionEncodedID},
+			Args: []*plan.Expr{colExpr, vecExpr},
+		}},
+	}
+	preds := ExtractPartitionPredicatesFromExprs(td, []*plan.Expr{inExpr}, partColSet)
+	require.Equal(t, 1, len(preds))
+	assert.Equal(t, PartOpIn, preds[0].Op)
+	assert.Equal(t, []string{"US", "CN"}, preds[0].Values)
+}
+
+func TestMatchPartitionValue_SetWithEnumvalues(t *testing.T) {
+	// SET column stored as T_uint64 with non-empty Enumvalues must NOT be pruned
+	ct := tree.HivePartColType{Id: int32(types.T_uint64), Enumvalues: "a,b,c"}
+	assert.Equal(t, MatchUnknown, matchPartitionValue("1", []string{"2"}, ct),
+		"SET column should always return MatchUnknown")
+}
+
+func TestFillConstantVector_Int64FloatFallback(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_int64.ToType())
+	col := &plan.ColDef{Name: "big", Typ: plan.Type{Id: int32(types.T_int64)}}
+
+	err := fillConstantVector(vec, "1.5", col, 3, proc, "/test")
+	require.NoError(t, err, "int64 float fallback should work")
+	val := vector.MustFixedColNoTypeCheck[int64](vec)
+	assert.Equal(t, int64(1), val[0])
+	vec.Free(mp)
+}
+
+func TestFillConstantVector_Int64OverflowRejects(t *testing.T) {
+	proc := testutil.NewProc(t)
+	vec := vector.NewVec(types.T_int64.ToType())
+	col := &plan.ColDef{Name: "big", Typ: plan.Type{Id: int32(types.T_int64)}}
+
+	// 9223372036854775808 = MaxInt64 + 1 → ParseInt ErrRange → reject (no float fallback)
+	err := fillConstantVector(vec, "9223372036854775808", col, 1, proc, "/test")
+	require.Error(t, err, "int64 overflow must be rejected")
+	vec.Free(nil)
+}
+
+func TestFillConstantVector_Uint64OverflowRejects(t *testing.T) {
+	proc := testutil.NewProc(t)
+	vec := vector.NewVec(types.T_uint64.ToType())
+	col := &plan.ColDef{Name: "big", Typ: plan.Type{Id: int32(types.T_uint64)}}
+
+	// 18446744073709551616 = MaxUint64 + 1 → ParseUint ErrRange → reject
+	err := fillConstantVector(vec, "18446744073709551616", col, 1, proc, "/test")
+	require.Error(t, err, "uint64 overflow must be rejected")
+	vec.Free(nil)
+}
+
+// TestFillConstantVector_Int64DecimalBoundaryRejects guards the 64-bit float
+// fallback against four classes of unsafe inputs:
+//   - 2^63 / 2^64 slipping through due to float64 rounding of MaxInt64/MaxUint64
+//   - NaN passing range checks (NaN < x and NaN >= x both false)
+//   - ±Inf (covered by the strict upper bound being exact)
+//   - "-9223372036854775809.0" (below MinInt64) rounding to -2^63 in float64;
+//     rejected by the |f| >= 2^53 precision guard since any value reaching the
+//     float fallback at that magnitude cannot be safely round-tripped to int64.
+func TestFillConstantVector_Int64DecimalBoundaryRejects(t *testing.T) {
+	proc := testutil.NewProc(t)
+	col := &plan.ColDef{Name: "big", Typ: plan.Type{Id: int32(types.T_int64)}}
+
+	cases := []string{
+		"9223372036854775808.0",    // 2^63 in decimal form
+		"9.223372036854775808e18",  // 2^63 in scientific form
+		"9223372036854775808",      // MaxInt64+1 (ParseInt ErrRange path)
+		"9999999999999999999.0",    // well above 2^63
+		"-9223372036854775808.0",   // MinInt64 in decimal — float fallback, rejected by 2^53 guard
+		"-9223372036854775809",     // below MinInt64 (ParseInt ErrRange)
+		"-9223372036854775809.0",   // below MinInt64 in decimal; float64 rounds to -2^63
+		"-9.223372036854775809e18", // same, scientific form
+		"-99999999999999999999.0",  // well below MinInt64
+		"1e20",                     // large positive scientific
+		"-1e20",                    // large negative scientific
+		"nan", "NaN", "NAN",        // non-finite: NaN slips past naive range checks
+		"inf", "-inf", "+Inf", "Infinity", "-Infinity",
+	}
+	for _, s := range cases {
+		t.Run(s, func(t *testing.T) {
+			vec := vector.NewVec(types.T_int64.ToType())
+			err := fillConstantVector(vec, s, col, 1, proc, "/test")
+			require.Error(t, err, "%q must be rejected (would overflow int64 or be ambiguous)", s)
+			vec.Free(nil)
+		})
+	}
+}
+
+func TestFillConstantVector_Int64DecimalBoundaryAccepts(t *testing.T) {
+	// Values reaching the float fallback with |f| < 2^53 — float64 still
+	// represents them exactly, so int64(f) is safe.
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	col := &plan.ColDef{Name: "big", Typ: plan.Type{Id: int32(types.T_int64)}}
+
+	cases := []struct {
+		s    string
+		want int64
+	}{
+		{"1.0", 1},
+		{"-1.5", -1},
+		{"0.0", 0},
+		{"1.5e3", 1500}, // 1500 < 2^53
+		{"-1.5e3", -1500},
+		{"9007199254740991.0", 9007199254740991}, // 2^53 - 1, largest exact before the guard
+		{"-9007199254740991.0", -9007199254740991}, // -(2^53 - 1)
+	}
+	for _, c := range cases {
+		t.Run(c.s, func(t *testing.T) {
+			vec := vector.NewVec(types.T_int64.ToType())
+			err := fillConstantVector(vec, c.s, col, 1, proc, "/test")
+			require.NoError(t, err)
+			val := vector.MustFixedColNoTypeCheck[int64](vec)[0]
+			assert.Equal(t, c.want, val)
+			vec.Free(mp)
+		})
+	}
+}
+
+func TestFillConstantVector_Uint64DecimalBoundaryRejects(t *testing.T) {
+	proc := testutil.NewProc(t)
+	col := &plan.ColDef{Name: "big", Typ: plan.Type{Id: int32(types.T_uint64)}}
+
+	cases := []string{
+		"18446744073709551616.0",   // 2^64 in decimal form
+		"1.8446744073709551616e19", // 2^64 in scientific form
+		"18446744073709551616",     // MaxUint64+1 (ParseUint ErrRange path)
+		"99999999999999999999.0",   // well above 2^64
+		"-1.0",                     // negative
+		"1e20",                     // large scientific
+		"9007199254740992.0",       // 2^53 exactly — reached via float fallback, ambiguous
+		"nan", "NaN",               // NaN silently passes naive < / >= checks
+		"inf", "Infinity", "-inf", // ±Inf
+	}
+	for _, s := range cases {
+		t.Run(s, func(t *testing.T) {
+			vec := vector.NewVec(types.T_uint64.ToType())
+			err := fillConstantVector(vec, s, col, 1, proc, "/test")
+			require.Error(t, err, "%q must be rejected", s)
+			vec.Free(nil)
+		})
+	}
+}
+
+func TestFillConstantVector_Uint64DecimalBoundaryAccepts(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	col := &plan.ColDef{Name: "big", Typ: plan.Type{Id: int32(types.T_uint64)}}
+
+	cases := []struct {
+		s    string
+		want uint64
+	}{
+		{"0.0", 0},
+		{"1.0", 1},
+		{"1.7e3", 1700},
+	}
+	for _, c := range cases {
+		t.Run(c.s, func(t *testing.T) {
+			vec := vector.NewVec(types.T_uint64.ToType())
+			err := fillConstantVector(vec, c.s, col, 1, proc, "/test")
+			require.NoError(t, err)
+			val := vector.MustFixedColNoTypeCheck[uint64](vec)[0]
+			assert.Equal(t, c.want, val)
+			vec.Free(mp)
+		})
+	}
+}
+
+// TestFillConstantVector_SmallIntFloatBoundaryRejects verifies the float
+// fallback path checks bounds BEFORE truncation. Go's int64(f) truncates
+// toward zero: without the pre-check, int32("-2147483648.9") would pass
+// because int64(-2147483648.9) == -2147483648, which the post-check sees
+// as within [-2^31, 2^31-1]. We must reject it, matching CSV loader.
+func TestFillConstantVector_SmallIntFloatBoundaryRejects(t *testing.T) {
+	proc := testutil.NewProc(t)
+
+	type caseEntry struct {
+		typId types.T
+		val   string
+	}
+	cases := []caseEntry{
+		// int8: [-128, 127]
+		{types.T_int8, "-128.1"},
+		{types.T_int8, "127.5"},
+		// int16: [-32768, 32767]
+		{types.T_int16, "-32768.1"},
+		{types.T_int16, "32767.5"},
+		// int32: [-2147483648, 2147483647]
+		{types.T_int32, "-2147483648.9"},
+		{types.T_int32, "2147483647.9"},
+		// uint8: [0, 255]
+		{types.T_uint8, "255.5"},
+		// uint16: [0, 65535]
+		{types.T_uint16, "65535.9"},
+		// uint32: [0, 4294967295]
+		{types.T_uint32, "4294967295.9"},
+	}
+	for _, c := range cases {
+		name := fmt.Sprintf("%s_%s", c.typId, c.val)
+		t.Run(name, func(t *testing.T) {
+			col := &plan.ColDef{Name: "n", Typ: plan.Type{Id: int32(c.typId)}}
+			vec := vector.NewVec(c.typId.ToType())
+			err := fillConstantVector(vec, c.val, col, 1, proc, "/test")
+			require.Error(t, err, "%s value %q must be rejected (float bound)", c.typId, c.val)
+			vec.Free(nil)
+		})
+	}
+}
+
+// TestFillConstantVector_SmallIntFloatBoundaryAccepts verifies values safely
+// inside the float bounds still pass and truncate toward zero, matching Go
+// int/uint conversion semantics.
+func TestFillConstantVector_SmallIntFloatBoundaryAccepts(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+
+	type intEntry struct {
+		typId types.T
+		val   string
+		want  int64
+	}
+	intCases := []intEntry{
+		// Inside [min, max] as a float, then truncated toward zero.
+		{types.T_int8, "126.9", 126},
+		{types.T_int8, "127.0", 127},
+		{types.T_int8, "-127.9", -127},
+		{types.T_int8, "-128.0", -128},
+		{types.T_int16, "32766.9", 32766},
+		{types.T_int16, "32767.0", 32767},
+		{types.T_int32, "2147483646.9", 2147483646},
+		{types.T_int32, "2147483647.0", 2147483647},
+	}
+	for _, c := range intCases {
+		name := fmt.Sprintf("%s_%s", c.typId, c.val)
+		t.Run(name, func(t *testing.T) {
+			col := &plan.ColDef{Name: "n", Typ: plan.Type{Id: int32(c.typId)}}
+			vec := vector.NewVec(c.typId.ToType())
+			err := fillConstantVector(vec, c.val, col, 1, proc, "/test")
+			require.NoError(t, err)
+			var got int64
+			switch c.typId {
+			case types.T_int8:
+				got = int64(vector.MustFixedColNoTypeCheck[int8](vec)[0])
+			case types.T_int16:
+				got = int64(vector.MustFixedColNoTypeCheck[int16](vec)[0])
+			case types.T_int32:
+				got = int64(vector.MustFixedColNoTypeCheck[int32](vec)[0])
+			}
+			assert.Equal(t, c.want, got)
+			vec.Free(mp)
+		})
+	}
+
+	type uintEntry struct {
+		typId types.T
+		val   string
+		want  uint64
+	}
+	uintCases := []uintEntry{
+		{types.T_uint8, "254.9", 254},
+		{types.T_uint8, "255.0", 255},
+		{types.T_uint16, "65534.9", 65534},
+		{types.T_uint16, "65535.0", 65535},
+		{types.T_uint32, "4294967294.9", 4294967294},
+		{types.T_uint32, "4294967295.0", 4294967295},
+	}
+	for _, c := range uintCases {
+		name := fmt.Sprintf("%s_%s", c.typId, c.val)
+		t.Run(name, func(t *testing.T) {
+			col := &plan.ColDef{Name: "n", Typ: plan.Type{Id: int32(c.typId)}}
+			vec := vector.NewVec(c.typId.ToType())
+			err := fillConstantVector(vec, c.val, col, 1, proc, "/test")
+			require.NoError(t, err)
+			var got uint64
+			switch c.typId {
+			case types.T_uint8:
+				got = uint64(vector.MustFixedColNoTypeCheck[uint8](vec)[0])
+			case types.T_uint16:
+				got = uint64(vector.MustFixedColNoTypeCheck[uint16](vec)[0])
+			case types.T_uint32:
+				got = uint64(vector.MustFixedColNoTypeCheck[uint32](vec)[0])
+			}
+			assert.Equal(t, c.want, got)
+			vec.Free(mp)
+		})
+	}
+}
+
+// ---------------------------------------------------------------------------
+// Virtual column filling tests
+// ---------------------------------------------------------------------------
+
+func TestIsHivePartitionCol(t *testing.T) {
+	param := &ExternalParam{}
+	param.Extern = &tree.ExternParam{
+		ExParamConst: tree.ExParamConst{
+			HivePartitioning:  true,
+			HivePartitionCols: []string{"year", "month"},
+		},
+	}
+	assert.True(t, param.isHivePartitionCol("year"))
+	assert.True(t, param.isHivePartitionCol("Year"))
+	assert.True(t, param.isHivePartitionCol("month"))
+	assert.False(t, param.isHivePartitionCol("amount"))
+	assert.False(t, param.isHivePartitionCol(""))
+}
+
+func TestIsHivePartitionCol_NotEnabled(t *testing.T) {
+	param := &ExternalParam{}
+	param.Extern = &tree.ExternParam{}
+	assert.False(t, param.isHivePartitionCol("year"))
+}
+
+func TestRefreshPartitionValues(t *testing.T) {
+	param := &ExternalParam{}
+	param.Extern = &tree.ExternParam{
+		ExParamConst: tree.ExParamConst{
+			HivePartitioning:  true,
+			HivePartitionCols: []string{"year", "month"},
+		},
+	}
+	param.Extern.Filepath = "/data"
+	param.Fileparam = &ExFileparam{Filepath: "/data/year=2025/month=06/file.parquet"}
+
+	err := param.refreshPartitionValues()
+	require.NoError(t, err)
+	assert.Equal(t, "2025", param.currentPartValues["year"])
+	assert.Equal(t, "06", param.currentPartValues["month"])
+}
+
+func TestFillConstantVector_Int(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_int32.ToType())
+	col := &plan.ColDef{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)}}
+
+	err := fillConstantVector(vec, "2025", col, 10, proc, "/test")
+	require.NoError(t, err)
+	assert.Equal(t, 10, vec.Length())
+	val := vector.MustFixedColNoTypeCheck[int32](vec)
+	assert.Equal(t, int32(2025), val[0])
+	vec.Free(mp)
+}
+
+func TestFillConstantVector_IntFloatFallback(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_int32.ToType())
+	col := &plan.ColDef{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)}}
+
+	err := fillConstantVector(vec, "1.5", col, 5, proc, "/test")
+	require.NoError(t, err)
+	val := vector.MustFixedColNoTypeCheck[int32](vec)
+	assert.Equal(t, int32(1), val[0])
+	vec.Free(mp)
+}
+
+func TestFillConstantVector_Varchar(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_varchar.ToType())
+	col := &plan.ColDef{Name: "country", Typ: plan.Type{Id: int32(types.T_varchar)}}
+
+	err := fillConstantVector(vec, "US", col, 3, proc, "/test")
+	require.NoError(t, err)
+	assert.Equal(t, 3, vec.Length())
+	bs := vec.GetBytesAt(0)
+	assert.Equal(t, "US", string(bs))
+	vec.Free(mp)
+}
+
+func TestFillConstantVector_Bool(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+	vec := vector.NewVec(types.T_bool.ToType())
+	col := &plan.ColDef{Name: "flag", Typ: plan.Type{Id: int32(types.T_bool)}}
+
+	err := fillConstantVector(vec, "true", col, 2, proc, "/test")
+	require.NoError(t, err)
+	val := vector.MustFixedColNoTypeCheck[bool](vec)
+	assert.True(t, val[0])
+	vec.Free(mp)
+}
+
+func TestFillConstantVector_UnsupportedVector(t *testing.T) {
+	proc := testutil.NewProc(t)
+	vec := vector.NewVec(types.T_array_float32.ToType())
+	col := &plan.ColDef{Name: "emb", Typ: plan.Type{Id: int32(types.T_array_float32)}}
+
+	err := fillConstantVector(vec, "[1,2,3]", col, 1, proc, "/test")
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "unsupported")
+}
+
+func TestFillPartitionColumns_DefaultPartNull(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+
+	vec := vector.NewVec(types.T_int32.ToType())
+	bat := &batch.Batch{Vecs: []*vector.Vector{vec}}
+	bat.SetRowCount(5)
+
+	param := &ExternalParam{}
+	param.Cols = []*plan.ColDef{
+		{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)},
+			Default: &plan.Default{NullAbility: true}},
+	}
+	param.Ctx = context.Background()
+	param.Fileparam = &ExFileparam{Filepath: "/data/year=__HIVE_DEFAULT_PARTITION__/f.parquet"}
+	param.currentPartValues = map[string]string{"year": HiveDefaultPartition}
+
+	h := &ParquetHandler{partitionColIndices: []int{0}}
+	err := h.fillPartitionColumns(bat, param, proc)
+	require.NoError(t, err)
+	assert.True(t, vec.IsConstNull())
+	vec.Free(mp)
+}
+
+func TestFillPartitionColumns_DefaultPartNotNull(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+
+	vec := vector.NewVec(types.T_int32.ToType())
+	bat := &batch.Batch{Vecs: []*vector.Vector{vec}}
+	bat.SetRowCount(5)
+
+	param := &ExternalParam{}
+	param.Cols = []*plan.ColDef{
+		{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)},
+			Default: &plan.Default{NullAbility: false}},
+	}
+	param.Ctx = context.Background()
+	param.Fileparam = &ExFileparam{Filepath: "/data/year=__HIVE_DEFAULT_PARTITION__/f.parquet"}
+	param.currentPartValues = map[string]string{"year": HiveDefaultPartition}
+
+	h := &ParquetHandler{partitionColIndices: []int{0}}
+	err := h.fillPartitionColumns(bat, param, proc)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "NOT NULL")
+	vec.Free(mp)
+}
+
+func TestFillPartitionColumns_NotNullViaTypNotNullable_NegativeCase(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+
+	vec := vector.NewVec(types.T_int32.ToType())
+	bat := &batch.Batch{Vecs: []*vector.Vector{vec}}
+	bat.SetRowCount(3)
+
+	param := &ExternalParam{}
+	param.Cols = []*plan.ColDef{
+		{Name: "year", Typ: plan.Type{Id: int32(types.T_int32), NotNullable: true},
+			Default: &plan.Default{NullAbility: true}},
+	}
+	param.Ctx = context.Background()
+	param.Fileparam = &ExFileparam{Filepath: "/data/year=__HIVE_DEFAULT_PARTITION__/f.parquet"}
+	param.currentPartValues = map[string]string{"year": HiveDefaultPartition}
+
+	h := &ParquetHandler{partitionColIndices: []int{0}}
+	err := h.fillPartitionColumns(bat, param, proc)
+	require.NoError(t, err, "should use Default.NullAbility (true=nullable), not Typ.NotNullable")
+	assert.True(t, vec.IsConstNull())
+	vec.Free(mp)
+}
+
+func TestFillPartitionColumns_NotPresent(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+
+	vec := vector.NewVec(types.T_int32.ToType())
+	bat := &batch.Batch{Vecs: []*vector.Vector{vec}}
+	bat.SetRowCount(3)
+
+	param := &ExternalParam{}
+	param.Cols = []*plan.ColDef{
+		{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)}},
+	}
+	param.Ctx = context.Background()
+	param.Fileparam = &ExFileparam{Filepath: "/data/f.parquet"}
+	param.currentPartValues = map[string]string{}
+
+	h := &ParquetHandler{partitionColIndices: []int{0}}
+	err := h.fillPartitionColumns(bat, param, proc)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "not found in path")
+	vec.Free(mp)
+}
+
+// TestFillPartitionColumns_RelPathWithExtern guards the relative-path contract
+// in fillPartitionColumns: when param.Extern.Filepath is set (the normal
+// production invariant for hive tables), error messages must reference the
+// partition-relative path, NOT the absolute/base path. Without this test the
+// existing coverage only exercises the fallback branch where Extern is nil
+// (relPath == Fileparam.Filepath), hiding a latent bug where a future refactor
+// could drop the nil guard and leak machine-local absolute paths into BVT
+// .result files.
+func TestFillPartitionColumns_RelPathWithExtern(t *testing.T) {
+	proc := testutil.NewProc(t)
+	mp := proc.Mp()
+
+	// Two subtests cover both error paths that embed relPath:
+	//   1) constraint-violation (NOT NULL + __HIVE_DEFAULT_PARTITION__)
+	//   2) not-found-in-path (partition key missing from file path)
+
+	t.Run("not-null default partition prints relative path", func(t *testing.T) {
+		vec := vector.NewVec(types.T_int32.ToType())
+		bat := &batch.Batch{Vecs: []*vector.Vector{vec}}
+		bat.SetRowCount(5)
+
+		basePath := "/warehouse/lake/data"
+		filePath := basePath + "/year=__HIVE_DEFAULT_PARTITION__/part-0.parquet"
+		param := &ExternalParam{}
+		param.Cols = []*plan.ColDef{
+			{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)},
+				Default: &plan.Default{NullAbility: false}},
+		}
+		param.Ctx = context.Background()
+		param.Fileparam = &ExFileparam{Filepath: filePath}
+		param.Extern = &tree.ExternParam{
+			ExParamConst: tree.ExParamConst{Filepath: basePath},
+		}
+		param.currentPartValues = map[string]string{"year": HiveDefaultPartition}
+
+		h := &ParquetHandler{partitionColIndices: []int{0}}
+		err := h.fillPartitionColumns(bat, param, proc)
+		require.Error(t, err)
+		// Must include the relative form...
+		assert.Contains(t, err.Error(), "year=__HIVE_DEFAULT_PARTITION__/part-0.parquet")
+		// ...and must NOT include the base prefix (would leak machine paths).
+		assert.NotContains(t, err.Error(), basePath)
+		vec.Free(mp)
+	})
+
+	t.Run("missing partition key prints relative path", func(t *testing.T) {
+		vec := vector.NewVec(types.T_int32.ToType())
+		bat := &batch.Batch{Vecs: []*vector.Vector{vec}}
+		bat.SetRowCount(2)
+
+		basePath := "/warehouse/lake/data"
+		filePath := basePath + "/oops/part-0.parquet"
+		param := &ExternalParam{}
+		param.Cols = []*plan.ColDef{
+			{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)}},
+		}
+		param.Ctx = context.Background()
+		param.Fileparam = &ExFileparam{Filepath: filePath}
+		param.Extern = &tree.ExternParam{
+			ExParamConst: tree.ExParamConst{Filepath: basePath},
+		}
+		param.currentPartValues = map[string]string{} // year not parsed
+
+		h := &ParquetHandler{partitionColIndices: []int{0}}
+		err := h.fillPartitionColumns(bat, param, proc)
+		require.Error(t, err)
+		assert.Contains(t, err.Error(), "oops/part-0.parquet")
+		assert.NotContains(t, err.Error(), basePath)
+		vec.Free(mp)
+	})
+
+	t.Run("type conversion failure prints relative path", func(t *testing.T) {
+		vec := vector.NewVec(types.T_int32.ToType())
+		bat := &batch.Batch{Vecs: []*vector.Vector{vec}}
+		bat.SetRowCount(3)
+
+		basePath := "/warehouse/lake/data"
+		filePath := basePath + "/year=abc/part-0.parquet"
+		param := &ExternalParam{}
+		param.Cols = []*plan.ColDef{
+			{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)}},
+		}
+		param.Ctx = context.Background()
+		param.Fileparam = &ExFileparam{Filepath: filePath}
+		param.Extern = &tree.ExternParam{
+			ExParamConst: tree.ExParamConst{Filepath: basePath},
+		}
+		param.currentPartValues = map[string]string{"year": "abc"}
+
+		h := &ParquetHandler{partitionColIndices: []int{0}}
+		err := h.fillPartitionColumns(bat, param, proc)
+		require.Error(t, err)
+		assert.Contains(t, err.Error(), "year=abc/part-0.parquet")
+		assert.NotContains(t, err.Error(), basePath)
+		vec.Free(mp)
+	})
+}
+
+// ---------------------------------------------------------------------------
+// Pruning observability tests — assert ListCalls / PrunedCount precisely
+// ---------------------------------------------------------------------------
+
+func TestDiscoverHivePartitions_EQListCalls(t *testing.T) {
+	dirs := map[string][]fileservice.DirEntry{
+		"/data": {
+			{Name: "year=2020", IsDir: true},
+			{Name: "year=2021", IsDir: true},
+			{Name: "year=2022", IsDir: true},
+			{Name: "year=2023", IsDir: true},
+			{Name: "year=2024", IsDir: true},
+		},
+		"/data/year=2024": {
+			{Name: "part.parquet", IsDir: false, Size: 100},
+		},
+	}
+
+	result, err := DiscoverHivePartitions(
+		context.Background(),
+		mockListDir(dirs),
+		"/data",
+		[]string{"year"},
+		[]tree.HivePartColType{{Id: int32(types.T_int32)}},
+		[]PartitionPredicate{{ColName: "year", Op: PartOpEq, Values: []string{"2024"}}},
+	)
+	require.NoError(t, err)
+	assert.Equal(t, 2, result.ListCalls, "EQ single value: root + hit partition file dir = 2")
+	assert.Equal(t, 1, result.PartitionCount)
+	assert.Equal(t, 4, result.PrunedCount)
+	assert.Equal(t, 1, len(result.Files))
+}
+
+func TestDiscoverHivePartitions_INTwoValuesListCalls(t *testing.T) {
+	dirs := map[string][]fileservice.DirEntry{
+		"/data": {
+			{Name: "year=2020", IsDir: true},
+			{Name: "year=2021", IsDir: true},
+			{Name: "year=2022", IsDir: true},
+		},
+		"/data/year=2020": {
+			{Name: "f.parquet", IsDir: false, Size: 100},
+		},
+		"/data/year=2022": {
+			{Name: "f.parquet", IsDir: false, Size: 200},
+		},
+	}
+
+	result, err := DiscoverHivePartitions(
+		context.Background(),
+		mockListDir(dirs),
+		"/data",
+		[]string{"year"},
+		[]tree.HivePartColType{{Id: int32(types.T_int32)}},
+		[]PartitionPredicate{{ColName: "year", Op: PartOpIn, Values: []string{"2020", "2022"}}},
+	)
+	require.NoError(t, err)
+	assert.Equal(t, 3, result.ListCalls, "IN two values: root + 2 hit partition file dirs = 3")
+	assert.Equal(t, 2, result.PartitionCount)
+	assert.Equal(t, 1, result.PrunedCount)
+	assert.Equal(t, 2, len(result.Files))
+}
+
+func TestDiscoverHivePartitions_MultiLevelPartialPredicate(t *testing.T) {
+	dirs := map[string][]fileservice.DirEntry{
+		"/data": {
+			{Name: "year=2024", IsDir: true},
+			{Name: "year=2025", IsDir: true},
+		},
+		"/data/year=2024": {
+			{Name: "month=01", IsDir: true},
+			{Name: "month=02", IsDir: true},
+			{Name: "month=03", IsDir: true},
+		},
+		"/data/year=2024/month=01": {
+			{Name: "f.parquet", IsDir: false, Size: 100},
+		},
+		"/data/year=2024/month=02": {
+			{Name: "f.parquet", IsDir: false, Size: 200},
+		},
+		"/data/year=2024/month=03": {
+			{Name: "f.parquet", IsDir: false, Size: 300},
+		},
+	}
+
+	result, err := DiscoverHivePartitions(
+		context.Background(),
+		mockListDir(dirs),
+		"/data",
+		[]string{"year", "month"},
+		[]tree.HivePartColType{
+			{Id: int32(types.T_int32)},
+			{Id: int32(types.T_int32)},
+		},
+		[]PartitionPredicate{{ColName: "year", Op: PartOpEq, Values: []string{"2024"}}},
+	)
+	require.NoError(t, err)
+	// year level: 1 pruned (2025), 1 kept (2024)
+	// month level: no predicate, all 3 enter
+	// ListCalls: root(1) + year=2024 months(1) + 3 file listings = 5
+	assert.Equal(t, 5, result.ListCalls)
+	assert.Equal(t, 1, result.PrunedCount, "only year=2025 is pruned")
+	assert.Equal(t, 4, result.PartitionCount, "year=2024 + month=01 + month=02 + month=03")
+	assert.Equal(t, 3, len(result.Files))
+}
+
+func TestDiscoverHivePartitions_WarnPartitionCount(t *testing.T) {
+	// warnPartitionCount=5000. Use 5001 partitions.
+	// List calls = 1 (root) + 5001 (file listing per partition) = 5002, under maxListCalls(10000).
+	entries := make([]fileservice.DirEntry, 5001)
+	for i := range entries {
+		entries[i] = fileservice.DirEntry{Name: fmt.Sprintf("year=%d", i), IsDir: true}
+	}
+	fileEntries := []fileservice.DirEntry{{Name: "f.parquet", IsDir: false, Size: 10}}
+
+	listDir := func(ctx context.Context, prefix string) iter.Seq2[*fileservice.DirEntry, error] {
+		return func(yield func(*fileservice.DirEntry, error) bool) {
+			if prefix == "/data" {
+				for i := range entries {
+					if !yield(&entries[i], nil) {
+						return
+					}
+				}
+			} else {
+				for i := range fileEntries {
+					if !yield(&fileEntries[i], nil) {
+						return
+					}
+				}
+			}
+		}
+	}
+
+	result, err := DiscoverHivePartitions(
+		context.Background(),
+		listDir,
+		"/data",
+		[]string{"year"},
+		[]tree.HivePartColType{{Id: int32(types.T_int32)}},
+		nil,
+	)
+	require.NoError(t, err, "5001 partitions should NOT error (only warn)")
+	assert.Equal(t, 5001, result.PartitionCount)
+	assert.True(t, result.warnEmitted, "warning should have been emitted for >5000 partitions")
+	assert.Equal(t, 5001, len(result.Files))
+}
+
+// Verify that the unused imports are consumed
+var _ = catalog.ExternalFilePath
+var _ = function.EQUAL
diff --git a/pkg/sql/colexec/external/parquet.go b/pkg/sql/colexec/external/parquet.go
index b9a66c218fe6c..743242438fed6 100644
--- a/pkg/sql/colexec/external/parquet.go
+++ b/pkg/sql/colexec/external/parquet.go
@@ -46,7 +46,8 @@ var maxParquetBatchCnt int64 = 100000
 
 func newParquetHandler(param *ExternalParam) (*ParquetHandler, error) {
 	h := ParquetHandler{
-		batchCnt: maxParquetBatchCnt,
+		batchCnt:         maxParquetBatchCnt,
+		filepathColIndex: -1, // sentinel: not projected
 	}
 	err := h.openFile(param)
 	if err != nil {
@@ -54,29 +55,26 @@ func newParquetHandler(param *ExternalParam) (*ParquetHandler, error) {
 	}
 
 	// Empty file handling (0 rows): only check column count, skip column name and type checks.
-	// This aligns with DuckDB behavior for empty parquet files.
 	if h.file.NumRows() == 0 {
-		// Check if @vars are used in column list (LOAD DATA ... (col1, @v, col2))
-		// Parquet doesn't support @vars, report explicit error
 		if param.Extern.ExternType == int32(plan.ExternType_LOAD) &&
 			param.ColumnListLen > int32(len(param.Attrs)) {
 			return nil, moerr.NewNYI(param.Ctx, "parquet load with @variables in column list")
 		}
-
-		// Only check column count, not column names or types
-		// Column count must match exactly (align with DuckDB behavior)
-		parquetColCnt := len(h.file.Root().Columns())
-		tableColCnt := getParquetExpectedColCnt(param)
-		if parquetColCnt != tableColCnt {
-			return nil, moerr.NewInvalidInputf(param.Ctx,
-				"column count mismatch: parquet file has %d columns, but table has %d columns",
-				parquetColCnt, tableColCnt)
+		// Skip column count check in Hive mode: partition-only projections have
+		// 0 expected physical columns while the empty file still has schema columns.
+		if !param.Extern.HivePartitioning {
+			parquetColCnt := len(h.file.Root().Columns())
+			tableColCnt := getParquetExpectedColCnt(param)
+			if parquetColCnt != tableColCnt {
+				return nil, moerr.NewInvalidInputf(param.Ctx,
+					"column count mismatch: parquet file has %d columns, but table has %d columns",
+					parquetColCnt, tableColCnt)
+			}
 		}
-		// Return nil to indicate empty file, no data to load
+		// Caller treats (nil, nil) as "empty file, advance to next".
 		return nil, nil
 	}
 
-	// Non-empty file: use original logic (check column names and types)
 	err = h.prepare(param)
 	if err != nil {
 		return nil, err
@@ -191,6 +189,18 @@ func (h *ParquetHandler) prepare(param *ExternalParam) error {
 			continue
 		}
 
+		// Skip virtual columns: they are not in Parquet schema.
+		if param.isHivePartitionCol(attr.ColName) {
+			h.partitionColIndices = append(h.partitionColIndices, int(attr.ColIndex))
+			continue
+		}
+		if catalog.ContainExternalHidenCol(attr.ColName) {
+			h.filepathColIndex = int(attr.ColIndex)
+			continue
+		}
+
+		h.hasPhysicalCol = true
+
 		// Use case-insensitive column lookup (fix for issue #15621)
 		col, err := h.findColumnIgnoreCase(param.Ctx, attr.ColName)
 		if err != nil {
@@ -234,6 +244,10 @@ func (h *ParquetHandler) prepare(param *ExternalParam) error {
 		h.pages[attr.ColIndex] = col.Pages()
 	}
 
+	if !h.hasPhysicalCol && (len(h.partitionColIndices) > 0 || h.filepathColIndex >= 0) {
+		h.rowCountOnly = true
+	}
+
 	// init row reader if has nested columns
 	if h.hasNestedCols {
 		h.rowReader = parquet.NewReader(h.file)
@@ -1790,12 +1804,39 @@ func bigIntToTwosComplementBytes(ctx context.Context, bi *big.Int, size int) ([]
 }
 
 func (h *ParquetHandler) getData(bat *batch.Batch, param *ExternalParam, proc *process.Process) error {
+	if h.rowCountOnly {
+		return h.getDataRowCountOnly(bat)
+	}
 	if h.hasNestedCols {
 		return h.getDataByRow(bat, param, proc)
 	}
 	return h.getDataByPage(bat, param, proc)
 }
 
+func (h *ParquetHandler) getDataRowCountOnly(bat *batch.Batch) error {
+	batchLimit := int(h.batchCnt)
+	rowCount := 0
+
+	if h.rowCountRemaining > 0 {
+		rowCount = min(h.rowCountRemaining, batchLimit)
+		h.rowCountRemaining -= rowCount
+	} else {
+		rgs := h.file.RowGroups()
+		if h.currentRowGroup >= len(rgs) {
+			bat.SetRowCount(0)
+			return nil
+		}
+		total := int(rgs[h.currentRowGroup].NumRows())
+		h.currentRowGroup++
+		rowCount = min(total, batchLimit)
+		h.rowCountRemaining = total - rowCount
+	}
+
+	h.offset += int64(rowCount)
+	bat.SetRowCount(rowCount)
+	return nil
+}
+
 func (h *ParquetHandler) getDataByPage(bat *batch.Batch, param *ExternalParam, proc *process.Process) error {
 	length := 0
 	finish := false
@@ -1991,9 +2032,13 @@ func parseStringToDecimal128(s string, precision, scale int32) (types.Decimal128
 func getParquetExpectedColCnt(param *ExternalParam) int {
 	cnt := 0
 	for _, attr := range param.Attrs {
-		if !catalog.ContainExternalHidenCol(attr.ColName) {
-			cnt++
+		if catalog.ContainExternalHidenCol(attr.ColName) {
+			continue
+		}
+		if param.isHivePartitionCol(attr.ColName) {
+			continue
 		}
+		cnt++
 	}
 	return cnt
 }
diff --git a/pkg/sql/colexec/external/reader_parquet.go b/pkg/sql/colexec/external/reader_parquet.go
index 30223e6a37b4f..6d64a7a02e23b 100644
--- a/pkg/sql/colexec/external/reader_parquet.go
+++ b/pkg/sql/colexec/external/reader_parquet.go
@@ -23,7 +23,6 @@ import (
 )
 
 // ParquetReader handles Parquet format files.
-// Phase 1: thin wrapper around existing ParquetHandler logic.
 type ParquetReader struct {
 	param *ExternalParam
 	h     *ParquetHandler
@@ -35,6 +34,9 @@ func NewParquetReader(param *ExternalParam, proc *process.Process) *ParquetReade
 
 func (r *ParquetReader) Open(param *ExternalParam, proc *process.Process) (fileEmpty bool, err error) {
 	r.param = param
+	if err := param.refreshPartitionValues(); err != nil {
+		return false, err
+	}
 	r.h, err = newParquetHandler(param)
 	if err != nil {
 		return false, err
@@ -64,6 +66,17 @@ func (r *ParquetReader) ReadBatch(
 		return false, err
 	}
 
+	// Virtual column fill is independent of rowCountOnly: both physical-col
+	// branches (getDataByPage / getDataByRow) and rowCountOnly need to stamp
+	// the hive partition values and __mo_filepath into their vectors whenever
+	// those columns are projected. rowCountOnly in prepare() only gates the
+	// getData dispatch (no mapper reads), not the virtual-column fill.
+	if buf.RowCount() > 0 && (r.h.filepathColIndex >= 0 || len(r.h.partitionColIndices) > 0) {
+		if err := r.h.fillVirtualColumns(buf, r.param, proc); err != nil {
+			return false, err
+		}
+	}
+
 	// Check if file is finished: getData sets offset and checks NumRows
 	if r.h.file != nil && r.h.offset >= r.h.file.NumRows() {
 		return true, nil
diff --git a/pkg/sql/colexec/external/types.go b/pkg/sql/colexec/external/types.go
index 5debf9c03073f..5aab238702bbf 100644
--- a/pkg/sql/colexec/external/types.go
+++ b/pkg/sql/colexec/external/types.go
@@ -72,8 +72,9 @@ type ExParamConst struct {
 }
 
 type ExParam struct {
-	Fileparam *ExFileparam
-	Filter    *FilterParam
+	Fileparam         *ExFileparam
+	Filter            *FilterParam
+	currentPartValues map[string]string
 }
 
 type ExFileparam struct {
@@ -285,6 +286,14 @@ type ParquetHandler struct {
 	// for nested types support
 	hasNestedCols bool
 	rowReader     *parquet.Reader
+
+	// virtual column support (hive partitions + __mo_filepath)
+	partitionColIndices []int
+	filepathColIndex    int // -1 = not projected
+	hasPhysicalCol      bool
+	rowCountOnly        bool
+	currentRowGroup     int
+	rowCountRemaining   int
 }
 
 type columnMapper struct {
diff --git a/pkg/sql/compile/compile.go b/pkg/sql/compile/compile.go
index 662acef8d54d5..f45bf6391d4f8 100644
--- a/pkg/sql/compile/compile.go
+++ b/pkg/sql/compile/compile.go
@@ -1604,6 +1604,11 @@ func (c *Compile) getReadWriteParallelFlag(param *tree.ExternParam, fileList []s
 }
 
 func (c *Compile) getExternalFileListAndSize(node *plan.Node, param *tree.ExternParam) (fileList []string, fileSize []int64, err error) {
+	// Hive partition tables use recursive list-and-filter discovery, not ReadDir.
+	// ReadDir requires glob patterns in filepath; Hive base paths are opaque directories.
+	if param.HivePartitioning {
+		return c.getHivePartitionFileList(node, param)
+	}
 	switch node.ExternScan.Type {
 	case int32(plan.ExternType_EXTERNAL_TB):
 		t := time.Now()
@@ -1637,6 +1642,68 @@ func (c *Compile) getExternalFileListAndSize(node *plan.Node, param *tree.Extern
 	return fileList, fileSize, nil
 }
 
+func (c *Compile) getHivePartitionFileList(node *plan.Node, param *tree.ExternParam) ([]string, []int64, error) {
+	partColSet := toLowerSet(param.HivePartitionCols)
+	partFilters, fpFilters, rowFilters := external.ClassifyFilters(
+		node.TableDef, node.FilterList, partColSet)
+
+	preds := external.ExtractPartitionPredicatesFromExprs(node.TableDef, partFilters, partColSet)
+
+	listDir := external.NewListDirFunc(param)
+	result, err := external.DiscoverHivePartitions(
+		c.proc.Ctx, listDir, param.Filepath,
+		param.HivePartitionCols, param.HivePartitionColTypes, preds)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	fileList := make([]string, len(result.Files))
+	fileSize := make([]int64, len(result.Files))
+	for i, f := range result.Files {
+		fileList[i] = f.FilePath
+		fileSize[i] = f.FileSize
+	}
+
+	if len(fpFilters) > 0 {
+		var leftover []*plan.Expr
+		fileList, fileSize, leftover, err = runFilePathFilters(c.proc.Ctx, c.proc, node.TableDef, fpFilters, fileList, fileSize)
+		if err != nil {
+			return nil, nil, err
+		}
+		rowFilters = append(rowFilters, leftover...)
+	}
+
+	node.FilterList = rowFilters
+	return fileList, fileSize, nil
+}
+
+func runFilePathFilters(
+	ctx context.Context,
+	proc *process.Process,
+	tableDef *plan.TableDef,
+	fpFilters []*plan.Expr,
+	fileList []string,
+	fileSize []int64,
+) ([]string, []int64, []*plan.Expr, error) {
+	tmpNode := &plan.Node{
+		TableDef:   tableDef,
+		FilterList: fpFilters,
+	}
+	outFileList, outFileSize, err := external.FilterFileList(ctx, tmpNode, proc, fileList, fileSize)
+	if err != nil {
+		return nil, nil, nil, err
+	}
+	return outFileList, outFileSize, tmpNode.FilterList, nil
+}
+
+func toLowerSet(cols []string) map[string]bool {
+	m := make(map[string]bool, len(cols))
+	for _, col := range cols {
+		m[strings.ToLower(col)] = true
+	}
+	return m
+}
+
 func (c *Compile) compileExternScan(node *plan.Node) ([]*Scope, error) {
 	if c.isPrepare {
 		return nil, cantCompileForPrepareErr
@@ -1663,6 +1730,12 @@ func (c *Compile) compileExternScan(node *plan.Node) ([]*Scope, error) {
 		return c.compileExternValueScan(node, param, strictSqlMode)
 	}
 
+	// Hive partition tables must not enter parallel read paths — the parallel loop
+	// mutates param.Filepath per file, which breaks ExtractPartitionValues' base path.
+	if param.HivePartitioning {
+		param.Parallel = false
+	}
+
 	fileList, fileSize, err := c.getExternalFileListAndSize(node, param)
 	if err != nil {
 		return nil, err
diff --git a/pkg/sql/compile/hive_partition_test.go b/pkg/sql/compile/hive_partition_test.go
new file mode 100644
index 0000000000000..8d8c0ff9adeb6
--- /dev/null
+++ b/pkg/sql/compile/hive_partition_test.go
@@ -0,0 +1,109 @@
+// Copyright 2026 Matrix Origin
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compile
+
+import (
+	"testing"
+
+	"github.com/matrixorigin/matrixone/pkg/catalog"
+	"github.com/matrixorigin/matrixone/pkg/pb/plan"
+	"github.com/matrixorigin/matrixone/pkg/sql/plan/function"
+	"github.com/matrixorigin/matrixone/pkg/testutil"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// TestRunFilePathFilters_LeftoverContainsUnconsumed locks the compile-layer
+// contract that motivates the fpFilters → rowFilters append in
+// getHivePartitionFileList. If runFilePathFilters is modified (or deleted) to
+// no longer surface FilterFileList's unconsumed predicates as its leftover
+// return value, queries like
+//
+//	WHERE __mo_filepath LIKE '%x%' OR false
+//
+// will silently skip their predicate and return wrong rows. This test fires
+// immediately in that scenario.
+func TestRunFilePathFilters_LeftoverContainsUnconsumed(t *testing.T) {
+	proc := testutil.NewProc(t)
+
+	td := &plan.TableDef{
+		Cols: []*plan.ColDef{
+			{Name: "year"},
+			{Name: catalog.ExternalFilePath},
+		},
+	}
+	orExpr := &plan.Expr{
+		Expr: &plan.Expr_F{F: &plan.Function{
+			Func: &plan.ObjectRef{Obj: int64(function.OR) << 32, ObjName: "or"},
+			Args: []*plan.Expr{
+				{
+					Expr: &plan.Expr_F{F: &plan.Function{
+						Func: &plan.ObjectRef{Obj: function.EqualFunctionEncodedID},
+						Args: []*plan.Expr{
+							{Expr: &plan.Expr_Col{Col: &plan.ColRef{ColPos: 1, Name: catalog.ExternalFilePath}}},
+							{Expr: &plan.Expr_Lit{Lit: &plan.Literal{Value: &plan.Literal_Sval{Sval: "x"}}}},
+						},
+					}},
+				},
+				{Expr: &plan.Expr_Lit{Lit: &plan.Literal{Value: &plan.Literal_Bval{Bval: false}}}},
+			},
+		}},
+	}
+
+	fileList := []string{"/warehouse/data/year=2024/f.parquet"}
+	fileSize := []int64{100}
+
+	outFileList, outFileSize, leftover, err := runFilePathFilters(
+		proc.Ctx, proc, td, []*plan.Expr{orExpr}, fileList, fileSize)
+	require.NoError(t, err)
+
+	assert.Equal(t, fileList, outFileList,
+		"FilterFileList short-circuits when judgeContainColname rejects all fpFilters")
+	assert.Equal(t, fileSize, outFileSize)
+	require.Equal(t, 1, len(leftover),
+		"unconsumed OR(filepath, literal) must be returned in leftover so the caller can "+
+			"append it to rowFilters; losing it silently drops the predicate at runtime")
+	assert.Same(t, orExpr, leftover[0])
+}
+
+// TestToLowerSet covers the tiny helper feeding partColSet.
+func TestToLowerSet(t *testing.T) {
+	got := toLowerSet([]string{"Year", "MONTH", "day"})
+	assert.True(t, got["year"])
+	assert.True(t, got["month"])
+	assert.True(t, got["day"])
+	assert.False(t, got["Year"])
+	assert.Equal(t, 3, len(got))
+
+	// Empty input.
+	got = toLowerSet(nil)
+	assert.Equal(t, 0, len(got))
+}
+
+// TestRunFilePathFilters_AllConsumed exercises the consumed-path of
+// runFilePathFilters where FilterFileList keeps the filter and leftover is
+// empty — distinct from the unconsumed regression above.
+func TestRunFilePathFilters_NoFilters(t *testing.T) {
+	proc := testutil.NewProc(t)
+	td := &plan.TableDef{Cols: []*plan.ColDef{{Name: catalog.ExternalFilePath}}}
+	// Empty fpFilters → FilterFileList short-circuits, returns fileList unchanged.
+	fileList := []string{"/a.parquet"}
+	fileSize := []int64{10}
+	out, outSz, leftover, err := runFilePathFilters(proc.Ctx, proc, td, nil, fileList, fileSize)
+	require.NoError(t, err)
+	assert.Equal(t, fileList, out)
+	assert.Equal(t, fileSize, outSz)
+	assert.Empty(t, leftover)
+}
diff --git a/pkg/sql/parsers/tree/update.go b/pkg/sql/parsers/tree/update.go
index d2d960be84889..138873bbebe74 100644
--- a/pkg/sql/parsers/tree/update.go
+++ b/pkg/sql/parsers/tree/update.go
@@ -161,6 +161,20 @@ type ExParamConst struct {
 	Data         string
 	Tail         *TailParameter
 	StageName    Identifier
+
+	HivePartitioning      bool
+	HivePartitionCols     []string
+	HivePartitionColTypes []HivePartColType
+}
+
+// HivePartColType is a compact snapshot of a partition column's type info.
+// Defined in tree package to avoid importing pkg/pb/plan.
+type HivePartColType struct {
+	Id          int32
+	Width       int32
+	Scale       int32
+	Enumvalues  string
+	NullAbility bool
 }
 
 type ExParam struct {
diff --git a/pkg/sql/plan/build_ddl.go b/pkg/sql/plan/build_ddl.go
index 45ced634ca55d..68f9101a2aa6f 100644
--- a/pkg/sql/plan/build_ddl.go
+++ b/pkg/sql/plan/build_ddl.go
@@ -919,11 +919,16 @@ func buildCreateTable(
 	if stmt.Param != nil {
 		for i := 0; i < len(stmt.Param.Option); i += 2 {
 			switch strings.ToLower(stmt.Param.Option[i]) {
-			case "endpoint", "region", "access_key_id", "secret_access_key", "bucket", "filepath", "compression", "format", "jsondata", "provider", "role_arn", "external_id":
+			case "endpoint", "region", "access_key_id", "secret_access_key", "bucket", "filepath", "compression", "format", "jsondata", "provider", "role_arn", "external_id", "hive_partitioning", "hive_partition_columns":
 			default:
 				return nil, moerr.NewBadConfigf(ctx.GetContext(), "the keyword '%s' is not support", strings.ToLower(stmt.Param.Option[i]))
 			}
 		}
+
+		if err := validateAndSetHivePartitionOptions(ctx.GetContext(), stmt, createTable); err != nil {
+			return nil, err
+		}
+
 		if err := InitNullMap(stmt.Param, ctx); err != nil {
 			return nil, err
 		}
@@ -5166,3 +5171,174 @@ func constructAddedPartitionDefs(
 		return nil, moerr.NewNotSupportedNoCtx("unsupported partition method in ADD PARTITION")
 	}
 }
+
+// validateAndSetHivePartitionOptions parses and validates hive_partitioning options from the DDL,
+// normalizes partition column names, extracts column types, and strips hive keys from Option[].
+func validateAndSetHivePartitionOptions(ctx context.Context, stmt *tree.CreateTable, createTable *plan.CreateTable) error {
+	raw := stmt.Param.Option
+
+	if err := rejectDuplicateKeys(ctx, raw, []string{"hive_partitioning", "hive_partition_columns"}); err != nil {
+		return err
+	}
+
+	hiveEnabled, hiveCols, err := parseHiveOptionsFromRawOptions(ctx, raw)
+	if err != nil {
+		return err
+	}
+	if !hiveEnabled {
+		return nil
+	}
+
+	if len(hiveCols) == 0 {
+		return moerr.NewBadConfig(ctx, "hive_partition_columns is required when hive_partitioning is enabled")
+	}
+
+	if err := rejectDuplicateKeys(ctx, raw, []string{"format", "filepath"}); err != nil {
+		return err
+	}
+
+	rawFormat := strings.ToLower(getRawOption(raw, "format"))
+	if rawFormat != "parquet" {
+		return moerr.NewBadConfigf(ctx, "hive_partitioning currently only supports format='parquet', got '%s'", rawFormat)
+	}
+
+	rawFilepath := getRawOption(raw, "filepath")
+	if len(stmt.Param.StageName) != 0 || strings.HasPrefix(rawFilepath, "stage://") {
+		return moerr.NewBadConfig(ctx, "hive_partitioning does not support stage external tables")
+	}
+
+	normalized := make([]string, 0, len(hiveCols))
+	colTypes := make([]tree.HivePartColType, 0, len(hiveCols))
+	seen := make(map[string]bool)
+	for _, pc := range hiveCols {
+		col := findColInTableDefCaseInsensitive(createTable.TableDef.Cols, pc)
+		if col == nil {
+			return moerr.NewBadConfigf(ctx, "partition column '%s' not found in table columns", pc)
+		}
+		if col.Hidden {
+			return moerr.NewBadConfigf(ctx, "partition column '%s' cannot be a hidden column", pc)
+		}
+		if col.GeneratedCol != nil {
+			return moerr.NewBadConfigf(ctx, "partition column '%s' cannot be a generated column", pc)
+		}
+		typId := types.T(col.Typ.Id)
+		if typId == types.T_array_float32 || typId == types.T_array_float64 {
+			return moerr.NewBadConfigf(ctx, "partition column '%s' cannot be a VECTOR type", pc)
+		}
+		canonical := strings.ToLower(col.Name)
+		if seen[canonical] {
+			return moerr.NewBadConfigf(ctx, "duplicate partition column '%s'", pc)
+		}
+		seen[canonical] = true
+		normalized = append(normalized, canonical)
+
+		nullable := true
+		if col.Default != nil {
+			nullable = col.Default.NullAbility
+		}
+		colTypes = append(colTypes, tree.HivePartColType{
+			Id:          col.Typ.Id,
+			Width:       col.Typ.Width,
+			Scale:       col.Typ.Scale,
+			Enumvalues:  col.Typ.Enumvalues,
+			NullAbility: nullable,
+		})
+	}
+
+	stmt.Param.HivePartitioning = true
+	stmt.Param.HivePartitionCols = normalized
+	stmt.Param.HivePartitionColTypes = colTypes
+	stmt.Param.Option = stripHiveOptionKeys(stmt.Param.Option)
+	return nil
+}
+
+func parseHiveOptionsFromRawOptions(ctx context.Context, options []string) (enabled bool, cols []string, err error) {
+	var hiveVal string
+	var colsVal string
+	for i := 0; i < len(options); i += 2 {
+		key := strings.ToLower(options[i])
+		switch key {
+		case "hive_partitioning":
+			hiveVal = strings.ToLower(options[i+1])
+		case "hive_partition_columns":
+			colsVal = options[i+1]
+		}
+	}
+	if hiveVal == "" {
+		if strings.TrimSpace(colsVal) != "" {
+			return false, nil, moerr.NewBadConfig(ctx, "hive_partition_columns requires hive_partitioning='true'")
+		}
+		return false, nil, nil
+	}
+	if hiveVal != "true" && hiveVal != "false" {
+		return false, nil, moerr.NewBadConfigf(ctx, "hive_partitioning must be 'true' or 'false', got '%s'", hiveVal)
+	}
+	if hiveVal == "false" {
+		if strings.TrimSpace(colsVal) != "" {
+			return false, nil, moerr.NewBadConfig(ctx, "hive_partition_columns requires hive_partitioning='true'")
+		}
+		return false, nil, nil
+	}
+	if colsVal == "" {
+		return true, nil, nil
+	}
+	parts := strings.Split(colsVal, ",")
+	cols = make([]string, 0, len(parts))
+	for _, p := range parts {
+		p = strings.TrimSpace(p)
+		if p != "" {
+			cols = append(cols, p)
+		}
+	}
+	return true, cols, nil
+}
+
+func rejectDuplicateKeys(ctx context.Context, options []string, keys []string) error {
+	keySet := make(map[string]bool, len(keys))
+	for _, k := range keys {
+		keySet[k] = true
+	}
+	seen := make(map[string]bool)
+	for i := 0; i < len(options); i += 2 {
+		key := strings.ToLower(options[i])
+		if !keySet[key] {
+			continue
+		}
+		if seen[key] {
+			return moerr.NewBadConfigf(ctx, "duplicate option key '%s'", key)
+		}
+		seen[key] = true
+	}
+	return nil
+}
+
+func getRawOption(options []string, key string) string {
+	for i := 0; i < len(options); i += 2 {
+		if strings.ToLower(options[i]) == key {
+			return options[i+1]
+		}
+	}
+	return ""
+}
+
+func stripHiveOptionKeys(opt []string) []string {
+	out := make([]string, 0, len(opt))
+	for i := 0; i < len(opt); i += 2 {
+		key := strings.ToLower(opt[i])
+		if key == "hive_partitioning" || key == "hive_partition_columns" {
+			continue
+		}
+		out = append(out, opt[i], opt[i+1])
+	}
+	return out
+}
+
+func findColInTableDefCaseInsensitive(cols []*plan.ColDef, name string) *plan.ColDef {
+	lower := strings.ToLower(name)
+	for _, col := range cols {
+		if strings.ToLower(col.Name) == lower {
+			return col
+		}
+	}
+	return nil
+}
diff --git a/pkg/sql/plan/build_load.go b/pkg/sql/plan/build_load.go
index fdf533903c18d..e7289328edfa5 100644
--- a/pkg/sql/plan/build_load.go
+++ b/pkg/sql/plan/build_load.go
@@ -222,6 +222,13 @@ func buildLoad(stmt *tree.Load, ctx CompilerContext, isPrepareStmt bool) (*Plan,
 		return nil, err
 	}
 
+	// Note on Hive partitioned external tables: LOAD DATA into any external
+	// table (hive or not) is rejected by checkTableType inside getDmlTableInfo
+	// above, producing "cannot insert/update/delete from external table".
+	// No hive-specific intercept is needed here — and any probe added below
+	// would be unreachable dead code. See Phase 8 P8-audit-3 decision to keep
+	// the generic external-table error for consistency with all DML on externals.
+
 	stmt.Param.Local = stmt.Local
 	fileName, err := checkFileExist(stmt.Param, ctx)
 	if err != nil {
diff --git a/pkg/sql/plan/build_show_util.go b/pkg/sql/plan/build_show_util.go
index 84ece374f1f05..f9c5f253c87cb 100644
--- a/pkg/sql/plan/build_show_util.go
+++ b/pkg/sql/plan/build_show_util.go
@@ -582,7 +582,12 @@ func ConstructCreateTableSQL(
 			}
 		}
 		// hide file path
-		createStr += fmt.Sprintf(" INFILE{'FILEPATH'='','COMPRESSION'='%s','FORMAT'='%s','JSONDATA'='%s'}", param.CompressType, param.Format, param.JsonData)
+		createStr += fmt.Sprintf(" INFILE{'FILEPATH'='','COMPRESSION'='%s','FORMAT'='%s','JSONDATA'='%s'", param.CompressType, param.Format, param.JsonData)
+		if param.HivePartitioning {
+			createStr += fmt.Sprintf(",'HIVE_PARTITIONING'='true','HIVE_PARTITION_COLUMNS'='%s'",
+				strings.Join(param.HivePartitionCols, ","))
+		}
+		createStr += "}"
 
 		fields := ""
 		if param.Tail != nil && param.Tail.Fields != nil {
diff --git a/pkg/sql/plan/external.go b/pkg/sql/plan/external.go
index 6db3ff550dada..6dbb8c3766ba5 100644
--- a/pkg/sql/plan/external.go
+++ b/pkg/sql/plan/external.go
@@ -179,6 +179,10 @@ func getExternalStats(node *plan.Node, builder *QueryBuilder) *Stats {
 		return DefaultHugeStats()
 	}
 
+	if param.HivePartitioning {
+		return DefaultHugeStats()
+	}
+
 	if param.ScanType == tree.S3 {
 		if err = InitS3Param(param); err != nil {
 			return DefaultHugeStats()
diff --git a/pkg/sql/plan/utils.go b/pkg/sql/plan/utils.go
index bdfe0c80d010d..b2bb878677d6f 100644
--- a/pkg/sql/plan/utils.go
+++ b/pkg/sql/plan/utils.go
@@ -1857,9 +1857,66 @@ func checkNoNeedCast(constT, columnT types.Type, constExpr *plan.Expr) bool {
 
 }
 
+// parseHiveOptionKV handles hive_partitioning / hive_partition_columns keys in
+// Init*Param. It is defensive against legacy JSON where stripHiveOptionKeys
+// (build_ddl.go) had not run; when the param already has values normalized
+// during DDL, the legacy option is skipped to avoid case-flip or type drift.
+//
+// Each key's skip guard MUST inspect only its own field. An earlier version
+// coupled the hive_partitioning guard to HivePartitionCols; for legacy option
+// orders like "hive_partition_columns=year, hive_partitioning=true" that caused
+// hive_partitioning to be silently skipped after cols was populated, leaving
+// HivePartitioning=false and the table mis-classified as non-hive.
+//
+// Returns (handled, err):
+//   - (false, nil)  : key is not a hive key; caller should fall through to its own switch
+//   - (true, nil)   : key handled (either applied or intentionally skipped)
+//   - (true, err)   : key handled but value invalid
+func parseHiveOptionKV(param *tree.ExternParam, key, val string) (bool, error) {
+	switch key {
+	case "hive_partitioning":
+		// Guard only on HivePartitioning itself — do NOT consult HivePartitionCols.
+		if param.HivePartitioning {
+			return true, nil
+		}
+		v := strings.ToLower(val)
+		if v != "true" && v != "false" {
+			return true, moerr.NewBadConfigf(param.Ctx, "hive_partitioning must be 'true' or 'false'")
+		}
+		param.HivePartitioning = (v == "true")
+		return true, nil
+	case "hive_partition_columns":
+		if len(param.HivePartitionCols) > 0 {
+			return true, nil
+		}
+		for _, p := range strings.Split(val, ",") {
+			p = strings.TrimSpace(p)
+			if p != "" {
+				param.HivePartitionCols = append(param.HivePartitionCols, strings.ToLower(p))
+			}
+		}
+		return true, nil
+	}
+	return false, nil
+}
+
+func validateHiveOptionConsistency(param *tree.ExternParam) error {
+	if !param.HivePartitioning && len(param.HivePartitionCols) > 0 {
+		return moerr.NewBadConfig(param.Ctx, "hive_partition_columns requires hive_partitioning='true'")
+	}
+	return nil
+}
+
 func InitInfileParam(param *tree.ExternParam) error {
 	for i := 0; i < len(param.Option); i += 2 {
-		switch strings.ToLower(param.Option[i]) {
+		key := strings.ToLower(param.Option[i])
+		if handled, err := parseHiveOptionKV(param, key, param.Option[i+1]); handled {
+			if err != nil {
+				return err
+			}
+			continue
+		}
+		switch key {
 		case "filepath":
 			param.Filepath = param.Option[i+1]
 		case "compression":
@@ -1878,9 +1935,12 @@ func InitInfileParam(param *tree.ExternParam) error {
 			param.JsonData = jsondata
 			param.Format = tree.JSONLINE
 		default:
-			return moerr.NewBadConfigf(param.Ctx, "the keyword '%s' is not support", strings.ToLower(param.Option[i]))
+			return moerr.NewBadConfigf(param.Ctx, "the keyword '%s' is not support", key)
 		}
 	}
+	if err := validateHiveOptionConsistency(param); err != nil {
+		return err
+	}
 	if len(param.Filepath) == 0 {
 		return moerr.NewBadConfig(param.Ctx, "the filepath must be specified")
 	}
@@ -1896,7 +1956,14 @@ func InitInfileParam(param *tree.ExternParam) error {
 func InitS3Param(param *tree.ExternParam) error {
 	param.S3Param = &tree.S3Parameter{}
 	for i := 0; i < len(param.Option); i += 2 {
-		switch strings.ToLower(param.Option[i]) {
+		key := strings.ToLower(param.Option[i])
+		if handled, err := parseHiveOptionKV(param, key, param.Option[i+1]); handled {
+			if err != nil {
+				return err
+			}
+			continue
+		}
+		switch key {
 		case "endpoint":
 			param.S3Param.Endpoint = param.Option[i+1]
 		case "region":
@@ -1930,11 +1997,13 @@ func InitS3Param(param *tree.ExternParam) error {
 			}
 			param.JsonData = jsondata
 			param.Format = tree.JSONLINE
-
 		default:
-			return moerr.NewBadConfigf(param.Ctx, "the keyword '%s' is not support", strings.ToLower(param.Option[i]))
+			return moerr.NewBadConfigf(param.Ctx, "the keyword '%s' is not support", key)
 		}
 	}
+	if err := validateHiveOptionConsistency(param); err != nil {
+		return err
+	}
 	if param.Format == tree.JSONLINE && len(param.JsonData) == 0 {
 		return moerr.NewBadConfig(param.Ctx, "the jsondata must be specified")
 	}
@@ -2003,8 +2072,21 @@ func InitStageS3Param(param *tree.ExternParam, s stage.StageDef) error {
 	param.S3Param.Provider, _ = s.GetCredentials(stage.PARAMKEY_PROVIDER, stage.S3_PROVIDER_AMAZON)
 	param.CompressType, _ = s.GetCredentials(stage.PARAMKEY_COMPRESSION, "auto")
 
+	// Note: the parseHiveOptionKV call below is kept for parity with the other
+	// two Init*Param functions, but hive_partitioning on a stage external table
+	// is rejected at DDL (build_ddl.go validateAndSetHivePartitionOptions). The
+	// hive branch here is therefore unreachable via normal DDL; it exists only
+	// so every Init*Param follows the same shape and would tolerate legacy JSON
+	// that snuck hive keys past validation.
 	for i := 0; i < len(param.Option); i += 2 {
-		switch strings.ToLower(param.Option[i]) {
+		key := strings.ToLower(param.Option[i])
+		if handled, err := parseHiveOptionKV(param, key, param.Option[i+1]); handled {
+			if err != nil {
+				return err
+			}
+			continue
+		}
+		switch key {
 		case "format":
 			format := strings.ToLower(param.Option[i+1])
 			if format != tree.CSV && format != tree.JSONLINE && format != tree.PARQUET {
@@ -2018,12 +2100,14 @@ func InitStageS3Param(param *tree.ExternParam, s stage.StageDef) error {
 			}
 			param.JsonData = jsondata
 			param.Format = tree.JSONLINE
-
 		default:
-			return moerr.NewBadConfigf(param.Ctx, "the keyword '%s' is not support", strings.ToLower(param.Option[i]))
+			return moerr.NewBadConfigf(param.Ctx, "the keyword '%s' is not support", key)
 		}
 	}
 
+	if err := validateHiveOptionConsistency(param); err != nil {
+		return err
+	}
 	if param.Format == tree.JSONLINE && len(param.JsonData) == 0 {
 		return moerr.NewBadConfig(param.Ctx, "the jsondata must be specified")
 	}
diff --git a/pkg/sql/plan/utils_test.go b/pkg/sql/plan/utils_test.go
index 85a23ae02959b..ed12af2424817 100644
--- a/pkg/sql/plan/utils_test.go
+++ b/pkg/sql/plan/utils_test.go
@@ -16,10 +16,14 @@ package plan
 
 import (
 	"context"
+	"net/url"
 	"testing"
 
 	"github.com/matrixorigin/matrixone/pkg/container/types"
 	"github.com/matrixorigin/matrixone/pkg/pb/plan"
+	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
+	"github.com/matrixorigin/matrixone/pkg/stage"
+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 
@@ -434,3 +438,823 @@ func TestDecimal128HasTrailingZeros(t *testing.T) {
 		})
 	}
 }
+
+// TestParseHiveOptionKV verifies hive key parsing via Init*Param helper.
+// Covers legacy-JSON fallback where Option[] still carries hive_partitioning /
+// hive_partition_columns (stripHiveOptionKeys did not run). The key behavior:
+// each key's skip-if-set guard must only inspect its own field; otherwise a
+// reversed option order silently drops hive_partitioning=true.
+func TestParseHiveOptionKV(t *testing.T) {
+	t.Run("canonical order applies both", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		param.Option = []string{
+			"hive_partitioning", "true",
+			"hive_partition_columns", "year,month",
+		}
+		for i := 0; i < len(param.Option); i += 2 {
+			handled, err := parseHiveOptionKV(param, param.Option[i], param.Option[i+1])
+			require.True(t, handled)
+			require.NoError(t, err)
+		}
+		assert.True(t, param.HivePartitioning)
+		assert.Equal(t, []string{"year", "month"}, param.HivePartitionCols)
+	})
+
+	// Each key's skip-if-set guard must inspect only its own field. A coupled
+	// guard that treats non-empty HivePartitionCols as "already handled" would
+	// silently drop hive_partitioning=true when cols appeared first in Option[],
+	// leaving the table mis-classified as non-hive. Keep this case as a
+	// regression for that contract.
+	t.Run("reversed order still applies both", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		param.Option = []string{
+			"hive_partition_columns", "year,month",
+			"hive_partitioning", "true",
+		}
+		for i := 0; i < len(param.Option); i += 2 {
+			handled, err := parseHiveOptionKV(param, param.Option[i], param.Option[i+1])
+			require.True(t, handled, "key=%s", param.Option[i])
+			require.NoError(t, err)
+		}
+		assert.True(t, param.HivePartitioning,
+			"hive_partitioning must not be dropped when cols appeared first in Option[]")
+		assert.Equal(t, []string{"year", "month"}, param.HivePartitionCols)
+	})
+
+	t.Run("pre-populated HivePartitioning is not overwritten", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		param.HivePartitioning = true
+		handled, err := parseHiveOptionKV(param, "hive_partitioning", "false")
+		require.True(t, handled)
+		require.NoError(t, err)
+		assert.True(t, param.HivePartitioning, "skip-if-set must not flip true→false")
+	})
+
+	t.Run("pre-populated HivePartitionCols is not overwritten", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		param.HivePartitionCols = []string{"year"}
+		handled, err := parseHiveOptionKV(param, "hive_partition_columns", "month,day")
+		require.True(t, handled)
+		require.NoError(t, err)
+		assert.Equal(t, []string{"year"}, param.HivePartitionCols)
+	})
+
+	t.Run("invalid bool value reports error", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		param.Ctx = context.Background()
+		handled, err := parseHiveOptionKV(param, "hive_partitioning", "yes")
+		require.True(t, handled)
+		require.Error(t, err)
+	})
+
+	t.Run("non-hive key returns not-handled", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		handled, err := parseHiveOptionKV(param, "filepath", "/data/")
+		assert.False(t, handled)
+		assert.NoError(t, err)
+	})
+
+	t.Run("false value", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		handled, err := parseHiveOptionKV(param, "hive_partitioning", "false")
+		require.True(t, handled)
+		require.NoError(t, err)
+		assert.False(t, param.HivePartitioning)
+	})
+
+	t.Run("cols lowercased and trimmed", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		handled, err := parseHiveOptionKV(param, "hive_partition_columns", "  Year ,  MONTH  , , Day ")
+		require.True(t, handled)
+		require.NoError(t, err)
+		assert.Equal(t, []string{"year", "month", "day"}, param.HivePartitionCols)
+	})
+}
+
+// -------------------------------------------------------------------------
+// Init*Param legacy-JSON hive branches and plain happy paths.
+// -------------------------------------------------------------------------
+
+// TestInitInfileParam_Plain exercises the normal option pass-through with
+// filepath/format/compression/jsondata so the non-hive arms are covered too.
+func TestInitInfileParam_Plain(t *testing.T) {
+	param := &tree.ExternParam{}
+	param.Option = []string{
+		"filepath", "/data/x",
+		"compression", "gzip",
+		"format", "parquet",
+	}
+	require.NoError(t, InitInfileParam(param))
+	assert.Equal(t, "/data/x", param.Filepath)
+	assert.Equal(t, "gzip", param.CompressType)
+	assert.Equal(t, "parquet", param.Format)
+
+	// jsonline/jsondata branch
+	param = &tree.ExternParam{}
+	param.Option = []string{"filepath", "/f", "jsondata", "object"}
+	require.NoError(t, InitInfileParam(param))
+	assert.Equal(t, "object", param.JsonData)
+	assert.Equal(t, "jsonline", param.Format)
+
+	// csv default
+	param = &tree.ExternParam{}
+	param.Option = []string{"filepath", "/csv"}
+	require.NoError(t, InitInfileParam(param))
+	assert.Equal(t, "csv", param.Format)
+}
+
+// TestInitInfileParam_HiveLegacyOption exercises parseHiveOptionKV via
+// InitInfileParam when Option[] still contains hive keys (simulating JSON
+// that predates stripHiveOptionKeys).
+func TestInitInfileParam_HiveLegacyOption(t *testing.T) {
+	param := &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{
+		"filepath", "/data/",
+		"format", "parquet",
+		"hive_partitioning", "true",
+		"hive_partition_columns", "year,month",
+	}
+	require.NoError(t, InitInfileParam(param))
+	assert.True(t, param.HivePartitioning)
+	assert.Equal(t, []string{"year", "month"}, param.HivePartitionCols)
+}
+
+func TestInitInfileParam_Errors(t *testing.T) {
+	param := &tree.ExternParam{}
+	param.Ctx = context.Background()
+	// Unknown format
+	param.Option = []string{"filepath", "/x", "format", "orc"}
+	require.Error(t, InitInfileParam(param))
+
+	// Unknown jsondata
+	param = &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{"filepath", "/x", "jsondata", "ndjson"}
+	require.Error(t, InitInfileParam(param))
+
+	// Missing filepath
+	param = &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{"format", "parquet"}
+	require.Error(t, InitInfileParam(param))
+
+	// jsonline without jsondata
+	param = &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{"filepath", "/x", "format", "jsonline"}
+	require.Error(t, InitInfileParam(param))
+
+	// Unknown keyword
+	param = &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{"unknown", "val", "filepath", "/x"}
+	require.Error(t, InitInfileParam(param))
+
+	// Invalid hive_partitioning value
+	param = &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{"filepath", "/x", "format", "parquet", "hive_partitioning", "yes"}
+	require.Error(t, InitInfileParam(param))
+
+	// Columns with hive_partitioning disabled are rejected after legacy parsing.
+	param = &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{
+		"filepath", "/x",
+		"format", "parquet",
+		"hive_partitioning", "false",
+		"hive_partition_columns", "year",
+	}
+	err := InitInfileParam(param)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "requires hive_partitioning='true'")
+}
+
+// TestInitS3Param_Plain exercises the S3 arm with normal options.
+func TestInitS3Param_Plain(t *testing.T) {
+	param := &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{
+		"endpoint", "https://s3.example.com",
+		"region", "us-west-2",
+		"access_key_id", "AK",
+		"secret_access_key", "SK",
+		"bucket", "my-bucket",
+		"filepath", "sales/",
+		"compression", "none",
+		"provider", "minio",
+		"role_arn", "arn:aws:iam::111:role/R",
+		"external_id", "ext",
+		"format", "parquet",
+	}
+	require.NoError(t, InitS3Param(param))
+	assert.Equal(t, "https://s3.example.com", param.S3Param.Endpoint)
+	assert.Equal(t, "my-bucket", param.S3Param.Bucket)
+	assert.Equal(t, "sales/", param.Filepath)
+	assert.Equal(t, "parquet", param.Format)
+
+	// jsondata jsonline path
+	param = &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{"bucket", "b", "jsondata", "array"}
+	require.NoError(t, InitS3Param(param))
+	assert.Equal(t, "jsonline", param.Format)
+}
+
+func TestInitS3Param_HiveLegacyOption(t *testing.T) {
+	param := &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{
+		"bucket", "b",
+		"format", "parquet",
+		"hive_partitioning", "true",
+		"hive_partition_columns", "year",
+	}
+	require.NoError(t, InitS3Param(param))
+	assert.True(t, param.HivePartitioning)
+	assert.Equal(t, []string{"year"}, param.HivePartitionCols)
+}
+
+func TestInitS3Param_Errors(t *testing.T) {
+	param := &tree.ExternParam{}
+	param.Ctx = context.Background()
+	// Bad format
+	param.Option = []string{"bucket", "b", "format", "orc"}
+	require.Error(t, InitS3Param(param))
+
+	// Bad jsondata
+	param = &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{"bucket", "b", "jsondata", "bad"}
+	require.Error(t, InitS3Param(param))
+
+	// jsonline without jsondata
+	param = &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{"bucket", "b", "format", "jsonline"}
+	require.Error(t, InitS3Param(param))
+
+	// Unknown key
+	param = &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{"bogus", "x"}
+	require.Error(t, InitS3Param(param))
+
+	// Invalid hive_partitioning boolean
+	param = &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{"bucket", "b", "format", "parquet", "hive_partitioning", "maybe"}
+	require.Error(t, InitS3Param(param))
+
+	// Columns with hive_partitioning disabled are rejected after legacy parsing.
+	param = &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{
+		"bucket", "b",
+		"format", "parquet",
+		"hive_partitioning", "false",
+		"hive_partition_columns", "year",
+	}
+	err := InitS3Param(param)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "requires hive_partitioning='true'")
+}
+
+// -------------------------------------------------------------------------
+// build_ddl.go hive DDL helpers.
+// -------------------------------------------------------------------------
+
+func TestParseHiveOptionsFromRawOptions_AllPaths(t *testing.T) {
+	ctx := context.Background()
+
+	// Absent → (false, nil, nil)
+	en, cols, err := parseHiveOptionsFromRawOptions(ctx, []string{"filepath", "/x"})
+	require.NoError(t, err)
+	assert.False(t, en)
+	assert.Nil(t, cols)
+
+	// Explicit false → (false, nil, nil)
+	en, cols, err = parseHiveOptionsFromRawOptions(ctx, []string{"hive_partitioning", "false"})
+	require.NoError(t, err)
+	assert.False(t, en)
+	assert.Nil(t, cols)
+
+	// Columns without an enabled hive_partitioning flag are inconsistent.
+	_, _, err = parseHiveOptionsFromRawOptions(ctx, []string{"hive_partition_columns", "year"})
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "requires hive_partitioning='true'")
+
+	_, _, err = parseHiveOptionsFromRawOptions(ctx,
+		[]string{"hive_partitioning", "false", "hive_partition_columns", "year"})
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "requires hive_partitioning='true'")
+
+	// Invalid value → error
+	_, _, err = parseHiveOptionsFromRawOptions(ctx, []string{"hive_partitioning", "yes"})
+	require.Error(t, err)
+
+	// true + empty cols → (true, nil, nil)  (caller enforces non-empty)
+	en, cols, err = parseHiveOptionsFromRawOptions(ctx, []string{"hive_partitioning", "true"})
+	require.NoError(t, err)
+	assert.True(t, en)
+	assert.Nil(t, cols)
+
+	// true + cols — trimmed split
+	en, cols, err = parseHiveOptionsFromRawOptions(ctx,
+		[]string{"hive_partitioning", "TRUE", "hive_partition_columns", " year ,, month "})
+	require.NoError(t, err)
+	assert.True(t, en)
+	assert.Equal(t, []string{"year", "month"}, cols)
+}
+
+func TestRejectDuplicateKeys(t *testing.T) {
+	ctx := context.Background()
+	// No duplicates → nil.
+	err := rejectDuplicateKeys(ctx,
+		[]string{"format", "parquet", "filepath", "/x"},
+		[]string{"format", "filepath"})
+	assert.NoError(t, err)
+
+	// Key not in list is tolerated.
+	err = rejectDuplicateKeys(ctx,
+		[]string{"compression", "gzip", "compression", "none"},
+		[]string{"format"})
+	assert.NoError(t, err)
+
+	// Duplicate of a watched key → error.
+	err = rejectDuplicateKeys(ctx,
+		[]string{"format", "parquet", "format", "csv"},
+		[]string{"format"})
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "duplicate option key 'format'")
+}
+
+func TestGetRawOption(t *testing.T) {
+	opts := []string{"Filepath", "/x", "format", "parquet"}
+	assert.Equal(t, "/x", getRawOption(opts, "filepath"))
+	assert.Equal(t, "parquet", getRawOption(opts, "format"))
+	assert.Equal(t, "", getRawOption(opts, "bucket"))
+}
+
+func TestStripHiveOptionKeys(t *testing.T) {
+	in := []string{
+		"filepath", "/x",
+		"hive_partitioning", "true",
+		"format", "parquet",
+		"hive_partition_columns", "year,month",
+		"compression", "gzip",
+	}
+	out := stripHiveOptionKeys(in)
+	assert.Equal(t, []string{
+		"filepath", "/x",
+		"format", "parquet",
+		"compression", "gzip",
+	}, out)
+
+	// Idempotent / no hive keys
+	in2 := []string{"filepath", "/x", "format", "parquet"}
+	assert.Equal(t, in2, stripHiveOptionKeys(in2))
+
+	// All hive keys
+	in3 := []string{"hive_partitioning", "true", "hive_partition_columns", "y"}
+	assert.Equal(t, []string{}, stripHiveOptionKeys(in3))
+}
+
+func TestFindColInTableDefCaseInsensitive(t *testing.T) {
+	td := []*plan.ColDef{
+		{Name: "year"},
+		{Name: "Month"},
+		{Name: "Day"},
+	}
+	got := findColInTableDefCaseInsensitive(td, "YEAR")
+	require.NotNil(t, got)
+	assert.Equal(t, "year", got.Name)
+
+	got = findColInTableDefCaseInsensitive(td, "month")
+	require.NotNil(t, got)
+	assert.Equal(t, "Month", got.Name)
+
+	assert.Nil(t, findColInTableDefCaseInsensitive(td, "nonexistent"))
+}
+
+// -------------------------------------------------------------------------
+// validateAndSetHivePartitionOptions — every branch (happy + negative).
+// -------------------------------------------------------------------------
+
+// makeHivePlan builds a minimal plan.CreateTable with the given columns for
+// validateAndSetHivePartitionOptions testing.
+func makeHivePlan(cols ...*plan.ColDef) *plan.CreateTable {
+	return &plan.CreateTable{
+		TableDef: &plan.TableDef{Cols: cols},
+	}
+}
+
+func TestValidateAndSetHivePartitionOptions_Disabled(t *testing.T) {
+	// hive_partitioning absent → returns nil, does not touch stmt.Param.
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.Option = []string{"filepath", "/x", "format", "parquet"}
+	ct := makeHivePlan(&plan.ColDef{Name: "id", Typ: plan.Type{Id: int32(types.T_int32)}})
+	require.NoError(t, validateAndSetHivePartitionOptions(context.Background(), stmt, ct))
+	assert.False(t, stmt.Param.HivePartitioning)
+}
+
+func TestValidateAndSetHivePartitionOptions_DisabledWithColumnsRejected(t *testing.T) {
+	cases := []struct {
+		name string
+		opts []string
+	}{
+		{
+			name: "columns without hive_partitioning",
+			opts: []string{"filepath", "/x", "format", "parquet", "hive_partition_columns", "year"},
+		},
+		{
+			name: "columns with hive_partitioning false",
+			opts: []string{
+				"filepath", "/x",
+				"format", "parquet",
+				"hive_partitioning", "false",
+				"hive_partition_columns", "year",
+			},
+		},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+			stmt.Param.Option = tc.opts
+			ct := makeHivePlan(&plan.ColDef{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)}})
+			err := validateAndSetHivePartitionOptions(context.Background(), stmt, ct)
+			require.Error(t, err)
+			assert.Contains(t, err.Error(), "requires hive_partitioning='true'")
+		})
+	}
+}
+
+func TestValidateAndSetHivePartitionOptions_HappyPath(t *testing.T) {
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.Option = []string{
+		"filepath", "/data/",
+		"format", "parquet",
+		"hive_partitioning", "true",
+		"hive_partition_columns", "Year",
+	}
+	ct := makeHivePlan(
+		&plan.ColDef{Name: "id", Typ: plan.Type{Id: int32(types.T_int32)}},
+		&plan.ColDef{
+			Name:    "year",
+			Typ:     plan.Type{Id: int32(types.T_int32)},
+			Default: &plan.Default{NullAbility: true},
+		},
+	)
+	require.NoError(t, validateAndSetHivePartitionOptions(context.Background(), stmt, ct))
+	assert.True(t, stmt.Param.HivePartitioning)
+	assert.Equal(t, []string{"year"}, stmt.Param.HivePartitionCols)
+	require.Equal(t, 1, len(stmt.Param.HivePartitionColTypes))
+	assert.Equal(t, int32(types.T_int32), stmt.Param.HivePartitionColTypes[0].Id)
+	assert.True(t, stmt.Param.HivePartitionColTypes[0].NullAbility)
+	// Option[] should be stripped of hive keys.
+	for i := 0; i < len(stmt.Param.Option); i += 2 {
+		assert.NotEqual(t, "hive_partitioning", stmt.Param.Option[i])
+		assert.NotEqual(t, "hive_partition_columns", stmt.Param.Option[i])
+	}
+}
+
+func TestValidateAndSetHivePartitionOptions_MissingCols(t *testing.T) {
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.Option = []string{"format", "parquet", "hive_partitioning", "true"}
+	ct := makeHivePlan()
+	err := validateAndSetHivePartitionOptions(context.Background(), stmt, ct)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "hive_partition_columns is required")
+}
+
+func TestValidateAndSetHivePartitionOptions_DuplicateHiveKey(t *testing.T) {
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.Option = []string{
+		"hive_partitioning", "true",
+		"hive_partitioning", "false",
+		"hive_partition_columns", "year",
+	}
+	ct := makeHivePlan()
+	err := validateAndSetHivePartitionOptions(context.Background(), stmt, ct)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "duplicate option key")
+}
+
+func TestValidateAndSetHivePartitionOptions_DuplicateFormat(t *testing.T) {
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.Option = []string{
+		"format", "parquet",
+		"format", "csv",
+		"hive_partitioning", "true",
+		"hive_partition_columns", "year",
+	}
+	ct := makeHivePlan(&plan.ColDef{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)}})
+	err := validateAndSetHivePartitionOptions(context.Background(), stmt, ct)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "duplicate option key 'format'")
+}
+
+func TestValidateAndSetHivePartitionOptions_NonParquetFormat(t *testing.T) {
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.Option = []string{
+		"format", "csv",
+		"hive_partitioning", "true",
+		"hive_partition_columns", "year",
+	}
+	ct := makeHivePlan(&plan.ColDef{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)}})
+	err := validateAndSetHivePartitionOptions(context.Background(), stmt, ct)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "only supports format='parquet'")
+}
+
+func TestValidateAndSetHivePartitionOptions_StageFilepath(t *testing.T) {
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.Option = []string{
+		"filepath", "stage://mystage/data/",
+		"format", "parquet",
+		"hive_partitioning", "true",
+		"hive_partition_columns", "year",
+	}
+	ct := makeHivePlan(&plan.ColDef{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)}})
+	err := validateAndSetHivePartitionOptions(context.Background(), stmt, ct)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "does not support stage external tables")
+}
+
+func TestValidateAndSetHivePartitionOptions_StageNameSet(t *testing.T) {
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.StageName = "mystage"
+	stmt.Param.Option = []string{
+		"format", "parquet",
+		"hive_partitioning", "true",
+		"hive_partition_columns", "year",
+	}
+	ct := makeHivePlan(&plan.ColDef{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)}})
+	err := validateAndSetHivePartitionOptions(context.Background(), stmt, ct)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "does not support stage external tables")
+}
+
+func TestValidateAndSetHivePartitionOptions_ColumnNotFound(t *testing.T) {
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.Option = []string{
+		"format", "parquet",
+		"hive_partitioning", "true",
+		"hive_partition_columns", "year",
+	}
+	ct := makeHivePlan(&plan.ColDef{Name: "id", Typ: plan.Type{Id: int32(types.T_int32)}})
+	err := validateAndSetHivePartitionOptions(context.Background(), stmt, ct)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "not found in table columns")
+}
+
+func TestValidateAndSetHivePartitionOptions_HiddenColumn(t *testing.T) {
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.Option = []string{
+		"format", "parquet",
+		"hive_partitioning", "true",
+		"hive_partition_columns", "year",
+	}
+	ct := makeHivePlan(&plan.ColDef{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)}, Hidden: true})
+	err := validateAndSetHivePartitionOptions(context.Background(), stmt, ct)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "cannot be a hidden column")
+}
+
+func TestValidateAndSetHivePartitionOptions_GeneratedColumn(t *testing.T) {
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.Option = []string{
+		"format", "parquet",
+		"hive_partitioning", "true",
+		"hive_partition_columns", "year",
+	}
+	ct := makeHivePlan(&plan.ColDef{
+		Name:         "year",
+		Typ:          plan.Type{Id: int32(types.T_int32)},
+		GeneratedCol: &plan.GeneratedCol{},
+	})
+	err := validateAndSetHivePartitionOptions(context.Background(), stmt, ct)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "cannot be a generated column")
+}
+
+func TestValidateAndSetHivePartitionOptions_VectorType(t *testing.T) {
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.Option = []string{
+		"format", "parquet",
+		"hive_partitioning", "true",
+		"hive_partition_columns", "emb",
+	}
+	ct := makeHivePlan(&plan.ColDef{Name: "emb", Typ: plan.Type{Id: int32(types.T_array_float32)}})
+	err := validateAndSetHivePartitionOptions(context.Background(), stmt, ct)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "cannot be a VECTOR type")
+
+	ct = makeHivePlan(&plan.ColDef{Name: "emb", Typ: plan.Type{Id: int32(types.T_array_float64)}})
+	err = validateAndSetHivePartitionOptions(context.Background(), stmt, ct)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "cannot be a VECTOR type")
+}
+
+func TestValidateAndSetHivePartitionOptions_DuplicatePartitionColumn(t *testing.T) {
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.Option = []string{
+		"format", "parquet",
+		"hive_partitioning", "true",
+		"hive_partition_columns", "year,year",
+	}
+	ct := makeHivePlan(&plan.ColDef{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)}})
+	err := validateAndSetHivePartitionOptions(context.Background(), stmt, ct)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "duplicate partition column")
+}
+
+func TestValidateAndSetHivePartitionOptions_MultiLevelAndNullability(t *testing.T) {
+	// Multi-level partition columns; mixing with/without Default to exercise
+	// NullAbility default.
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.Option = []string{
+		"filepath", "/data/",
+		"format", "parquet",
+		"hive_partitioning", "true",
+		"hive_partition_columns", "year,month",
+	}
+	ct := makeHivePlan(
+		// year: NOT NULL (Default.NullAbility=false)
+		&plan.ColDef{
+			Name:    "year",
+			Typ:     plan.Type{Id: int32(types.T_int32)},
+			Default: &plan.Default{NullAbility: false},
+		},
+		// month: no Default → treated as nullable (default true)
+		&plan.ColDef{Name: "month", Typ: plan.Type{Id: int32(types.T_varchar), Width: 2}},
+	)
+	require.NoError(t, validateAndSetHivePartitionOptions(context.Background(), stmt, ct))
+	require.Len(t, stmt.Param.HivePartitionColTypes, 2)
+	assert.False(t, stmt.Param.HivePartitionColTypes[0].NullAbility, "year declared NOT NULL")
+	assert.True(t, stmt.Param.HivePartitionColTypes[1].NullAbility, "month default nullable when Default is nil")
+	assert.Equal(t, int32(2), stmt.Param.HivePartitionColTypes[1].Width)
+}
+
+func TestValidateAndSetHivePartitionOptions_InvalidHiveValue(t *testing.T) {
+	// parseHiveOptionsFromRawOptions returns an error path.
+	stmt := &tree.CreateTable{Param: &tree.ExternParam{}}
+	stmt.Param.Option = []string{
+		"format", "parquet",
+		"hive_partitioning", "maybe",
+		"hive_partition_columns", "year",
+	}
+	ct := makeHivePlan(&plan.ColDef{Name: "year", Typ: plan.Type{Id: int32(types.T_int32)}})
+	err := validateAndSetHivePartitionOptions(context.Background(), stmt, ct)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "must be 'true' or 'false'")
+}
+
+// -------------------------------------------------------------------------
+// InitStageS3Param — happy path + credential-missing error paths.
+// -------------------------------------------------------------------------
+
+func TestInitStageS3Param_HappyAndErrors(t *testing.T) {
+	parse := func(raw string) *url.URL {
+		u, err := url.Parse(raw)
+		require.NoError(t, err)
+		return u
+	}
+
+	baseCreds := map[string]string{
+		stage.PARAMKEY_AWS_KEY_ID:     "AK",
+		stage.PARAMKEY_AWS_SECRET_KEY: "SK",
+		stage.PARAMKEY_AWS_REGION:     "us-west-2",
+		stage.PARAMKEY_ENDPOINT:       "https://s3.example.com",
+	}
+
+	t.Run("happy_path", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		param.Ctx = context.Background()
+		sd := stage.StageDef{
+			Url:         parse("s3://my-bucket/prefix/"),
+			Credentials: baseCreds,
+		}
+		require.NoError(t, InitStageS3Param(param, sd))
+		assert.Equal(t, tree.S3, param.ScanType)
+		assert.Equal(t, "my-bucket", param.S3Param.Bucket)
+		assert.Equal(t, "AK", param.S3Param.APIKey)
+		assert.Equal(t, "SK", param.S3Param.APISecret)
+		assert.Equal(t, "us-west-2", param.S3Param.Region)
+	})
+
+	t.Run("bad_protocol", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		param.Ctx = context.Background()
+		sd := stage.StageDef{Url: parse("http://x/")}
+		require.Error(t, InitStageS3Param(param, sd))
+	})
+
+	t.Run("raw_query_rejected", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		param.Ctx = context.Background()
+		sd := stage.StageDef{Url: parse("s3://b/p/?q=1")}
+		require.Error(t, InitStageS3Param(param, sd))
+	})
+
+	// Each missing-cred path.
+	for _, k := range []string{
+		stage.PARAMKEY_AWS_KEY_ID, stage.PARAMKEY_AWS_SECRET_KEY,
+		stage.PARAMKEY_AWS_REGION, stage.PARAMKEY_ENDPOINT,
+	} {
+		t.Run("missing_"+k, func(t *testing.T) {
+			creds := map[string]string{}
+			for kk, vv := range baseCreds {
+				if kk != k {
+					creds[kk] = vv
+				}
+			}
+			param := &tree.ExternParam{}
+			param.Ctx = context.Background()
+			sd := stage.StageDef{
+				Url:         parse("s3://b/p/"),
+				Credentials: creds,
+			}
+			err := InitStageS3Param(param, sd)
+			require.Error(t, err)
+			assert.Contains(t, err.Error(), k)
+		})
+	}
+
+	t.Run("option_format_csv_invalid", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		param.Ctx = context.Background()
+		param.Option = []string{"format", "orc"}
+		sd := stage.StageDef{
+			Url:         parse("s3://b/p/"),
+			Credentials: baseCreds,
+		}
+		require.Error(t, InitStageS3Param(param, sd))
+	})
+
+	t.Run("option_unknown_key", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		param.Ctx = context.Background()
+		param.Option = []string{"unknown", "x"}
+		sd := stage.StageDef{
+			Url:         parse("s3://b/p/"),
+			Credentials: baseCreds,
+		}
+		require.Error(t, InitStageS3Param(param, sd))
+	})
+
+	t.Run("jsonline_without_jsondata", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		param.Ctx = context.Background()
+		param.Option = []string{"format", "jsonline"}
+		sd := stage.StageDef{
+			Url:         parse("s3://b/p/"),
+			Credentials: baseCreds,
+		}
+		require.Error(t, InitStageS3Param(param, sd))
+	})
+
+	t.Run("hive_legacy_option_under_stage", func(t *testing.T) {
+		// The defense-in-depth hive branch under InitStageS3Param.
+		param := &tree.ExternParam{}
+		param.Ctx = context.Background()
+		param.Option = []string{"hive_partitioning", "true", "hive_partition_columns", "year"}
+		sd := stage.StageDef{
+			Url:         parse("s3://b/p/"),
+			Credentials: baseCreds,
+		}
+		require.NoError(t, InitStageS3Param(param, sd))
+		assert.True(t, param.HivePartitioning)
+	})
+
+	t.Run("hive_legacy_columns_disabled_under_stage", func(t *testing.T) {
+		param := &tree.ExternParam{}
+		param.Ctx = context.Background()
+		param.Option = []string{"hive_partitioning", "false", "hive_partition_columns", "year"}
+		sd := stage.StageDef{
+			Url:         parse("s3://b/p/"),
+			Credentials: baseCreds,
+		}
+		err := InitStageS3Param(param, sd)
+		require.Error(t, err)
+		assert.Contains(t, err.Error(), "requires hive_partitioning='true'")
+	})
+}
+
+// -------------------------------------------------------------------------
+// InitInfileOrStageParam — non-stage pass-through.
+// -------------------------------------------------------------------------
+
+func TestInitInfileOrStageParam_NonStageFallsThrough(t *testing.T) {
+	param := &tree.ExternParam{}
+	param.Ctx = context.Background()
+	param.Option = []string{"filepath", "/data/x", "format", "parquet"}
+	// proc is unused for the non-stage branch.
+	require.NoError(t, InitInfileOrStageParam(param, nil))
+	assert.Equal(t, "/data/x", param.Filepath)
+	assert.Equal(t, "parquet", param.Format)
+}
+
+// Avoid unused import warning when some branches of types are not directly referenced.
+var _ = types.T_int32
diff --git a/test/distributed/cases/table/hive_partition_external_table.result b/test/distributed/cases/table/hive_partition_external_table.result
new file mode 100644
index 0000000000000..d1c5751d98281
--- /dev/null
+++ b/test/distributed/cases/table/hive_partition_external_table.result
@@ -0,0 +1,512 @@
+drop database if exists hive_part_db;
+create database hive_part_db;
+use hive_part_db;
+drop table if exists hive_single;
+create external table hive_single (
+id int,
+amount double,
+year int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+drop table if exists hive_err1;
+create external table hive_err1 (
+id int, year int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true'};
+invalid configuration: hive_partition_columns is required when hive_partitioning is enabled
+drop table if exists hive_err2;
+create external table hive_err2 (
+id int, year int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='csv', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+invalid configuration: hive_partitioning currently only supports format='parquet', got 'csv'
+drop table if exists hive_err3;
+create external table hive_err3 (
+id int, amount double
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='nonexistent'};
+invalid configuration: partition column 'nonexistent' not found in table columns
+drop table if exists hive_err4;
+create external table hive_err4 (
+id int, year int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partitioning'='false', 'hive_partition_columns'='year'};
+invalid configuration: duplicate option key 'hive_partitioning'
+drop table if exists hive_err5;
+create external table hive_err5 (
+id int, year int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='yes', 'hive_partition_columns'='year'};
+invalid configuration: hive_partitioning must be 'true' or 'false', got 'yes'
+drop table if exists hive_err6;
+create external table hive_err6 (
+id int, YEAR int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='YeaR'};
+drop table if exists hive_err7;
+create external table hive_err7 (
+id int, year int, amount double
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year,year'};
+invalid configuration: duplicate partition column 'year'
+drop table if exists hive_err8;
+create external table hive_err8 (
+id int,
+emb vecf32(3)
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='emb'};
+invalid configuration: partition column 'emb' cannot be a VECTOR type
+show create table hive_single;
+➤ Table[12,-1,0]  ¦  Create Table[12,-1,0]  𝄀
+hive_single  ¦  CREATE EXTERNAL TABLE `hive_single` (
+  `id` int DEFAULT NULL,
+  `amount` double DEFAULT NULL,
+  `year` int DEFAULT NULL
+) INFILE{'FILEPATH'='','COMPRESSION'='','FORMAT'='parquet','JSONDATA'='','HIVE_PARTITIONING'='true','HIVE_PARTITION_COLUMNS'='year'}
+load data infile '$resources/hive_partition/single_level/year=2024/data.parquet' into table hive_single;
+invalid input: cannot insert/update/delete from external table
+drop table if exists hive_disabled;
+create external table hive_disabled (
+id int,
+amount double
+) infile{'filepath'='$resources/hive_partition/non_hive/simple.parquet', 'format'='parquet', 'hive_partitioning'='false'};
+select count(*) from hive_disabled;
+➤ count(*)[-5,64,0]  𝄀
+3
+select count(*) as cnt from hive_single;
+➤ cnt[-5,64,0]  𝄀
+25
+select year, count(*) as cnt from hive_single where year = 2024 group by year;
+➤ year[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+2024  ¦  5
+select year, count(*) as cnt from hive_single where year in (2020, 2024) group by year order by year;
+➤ year[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+2020  ¦  5  𝄀
+2024  ¦  5
+select year, count(*) as cnt from hive_single where year in (2022) group by year;
+➤ year[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+2022  ¦  5
+select year, count(*) as cnt from hive_single where year not in (2020, 2021, 2022) group by year order by year;
+➤ year[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+2023  ¦  5  𝄀
+2024  ¦  5
+select count(*) as cnt from hive_single where year > 2022;
+➤ cnt[-5,64,0]  𝄀
+10
+select year, count(*) as cnt from hive_single where year between 2021 and 2023 group by year order by year;
+➤ year[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+2021  ¦  5  𝄀
+2022  ¦  5  𝄀
+2023  ¦  5
+select year, count(*) as cnt from hive_single where year = 2020 or year = 2024 group by year order by year;
+➤ year[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+2020  ¦  5  𝄀
+2024  ¦  5
+select distinct year from hive_single order by year;
+➤ year[4,32,0]  𝄀
+2020  𝄀
+2021  𝄀
+2022  𝄀
+2023  𝄀
+2024
+select sum(amount) as total from hive_single where year = 2020;
+➤ total[8,54,0]  𝄀
+105.0
+select year, sum(amount) as total from hive_single group by year having sum(amount) >= 100 order by year;
+➤ year[4,32,0]  ¦  total[8,54,0]  𝄀
+2020  ¦  105.0  𝄀
+2021  ¦  105.0  𝄀
+2022  ¦  105.0  𝄀
+2023  ¦  105.0  𝄀
+2024  ¦  105.0
+select count(distinct year) as distinct_years from hive_single;
+➤ distinct_years[-5,64,0]  𝄀
+5
+select count(*) from hive_single where year + 1 = 2025;
+➤ count(*)[-5,64,0]  𝄀
+5
+select count(*) from hive_single where cast(year as varchar) = '2024';
+➤ count(*)[-5,64,0]  𝄀
+5
+select id, year from hive_single order by year asc, id desc limit 5;
+➤ id[4,32,0]  ¦  year[4,32,0]  𝄀
+20204  ¦  2020  𝄀
+20203  ¦  2020  𝄀
+20202  ¦  2020  𝄀
+20201  ¦  2020  𝄀
+20200  ¦  2020
+select count(*) as cnt from hive_single where year is not null;
+➤ cnt[-5,64,0]  𝄀
+25
+select year, id from hive_single where year = 2023 order by id;
+➤ year[4,32,0]  ¦  id[4,32,0]  𝄀
+2023  ¦  20230  𝄀
+2023  ¦  20231  𝄀
+2023  ¦  20232  𝄀
+2023  ¦  20233  𝄀
+2023  ¦  20234
+select year, cnt from (
+select year, count(*) as cnt from hive_single where year in (2020, 2021) group by year
+) t order by year;
+➤ year[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+2020  ¦  5  𝄀
+2021  ¦  5
+drop table if exists hive_multi;
+create external table hive_multi (
+id int,
+amount double,
+year int,
+month varchar(2)
+) infile{'filepath'='$resources/hive_partition/multi_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year,month'};
+select count(*) as cnt from hive_multi;
+➤ cnt[-5,64,0]  𝄀
+18
+select year, count(*) as cnt from hive_multi where year = 2024 group by year;
+➤ year[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+2024  ¦  9
+select month, count(*) as cnt from hive_multi where month = '01' group by month order by month;
+➤ month[12,-1,0]  ¦  cnt[-5,64,0]  𝄀
+01  ¦  6
+select year, month, count(*) as cnt from hive_multi where year = 2024 and month = '01' group by year, month;
+➤ year[4,32,0]  ¦  month[12,-1,0]  ¦  cnt[-5,64,0]  𝄀
+2024  ¦  01  ¦  3
+select year, month, count(*) as cnt from hive_multi
+where year in (2024, 2025) and month in ('01', '02')
+group by year, month order by year, month;
+➤ year[4,32,0]  ¦  month[12,-1,0]  ¦  cnt[-5,64,0]  𝄀
+2024  ¦  01  ¦  3  𝄀
+2024  ¦  02  ¦  3  𝄀
+2025  ¦  01  ¦  3  𝄀
+2025  ¦  02  ¦  3
+select year, month, sum(amount) as total from hive_multi group by year, month order by year, month;
+➤ year[4,32,0]  ¦  month[12,-1,0]  ¦  total[8,54,0]  𝄀
+2024  ¦  01  ¦  6072.03  𝄀
+2024  ¦  02  ¦  6072.0599999999995  𝄀
+2024  ¦  03  ¦  6072.09  𝄀
+2025  ¦  01  ¦  6075.03  𝄀
+2025  ¦  02  ¦  6075.0599999999995  𝄀
+2025  ¦  03  ¦  6075.09
+select id, year, month, amount from hive_multi where year = 2025 order by month asc, id asc limit 6;
+➤ id[4,32,0]  ¦  year[4,32,0]  ¦  month[12,-1,0]  ¦  amount[8,54,0]  𝄀
+202510  ¦  2025  ¦  01  ¦  2025.01  𝄀
+202511  ¦  2025  ¦  01  ¦  2025.01  𝄀
+202512  ¦  2025  ¦  01  ¦  2025.01  𝄀
+202520  ¦  2025  ¦  02  ¦  2025.02  𝄀
+202521  ¦  2025  ¦  02  ¦  2025.02  𝄀
+202522  ¦  2025  ¦  02  ¦  2025.02
+select year, count(*) as cnt from hive_multi group by year having count(*) >= 9 order by year;
+➤ year[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+2024  ¦  9  𝄀
+2025  ¦  9
+select distinct year, month from hive_multi order by year, month;
+➤ year[4,32,0]  ¦  month[12,-1,0]  𝄀
+2024  ¦  01  𝄀
+2024  ¦  02  𝄀
+2024  ¦  03  𝄀
+2025  ¦  01  𝄀
+2025  ¦  02  𝄀
+2025  ¦  03
+select a.year, a.month, count(*) as cnt
+from hive_multi a join hive_multi b
+on a.year = b.year and a.month = b.month and a.id = b.id
+where a.year = 2024
+group by a.year, a.month order by a.year, a.month;
+➤ year[4,32,0]  ¦  month[12,-1,0]  ¦  cnt[-5,64,0]  𝄀
+2024  ¦  01  ¦  3  𝄀
+2024  ¦  02  ¦  3  𝄀
+2024  ¦  03  ¦  3
+select year, mc from (
+select year, count(distinct month) as mc from hive_multi group by year
+) t order by year;
+➤ year[4,32,0]  ¦  mc[-5,64,0]  𝄀
+2024  ¦  3  𝄀
+2025  ¦  3
+drop table if exists hive_string;
+create external table hive_string (
+id int,
+amount double,
+country varchar(10)
+) infile{'filepath'='$resources/hive_partition/string_part/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='country'};
+select country, count(*) as cnt from hive_string where country = 'US' group by country;
+➤ country[12,-1,0]  ¦  cnt[-5,64,0]  𝄀
+US  ¦  4
+select country, count(*) as cnt from hive_string where country in ('US', 'CN') group by country order by country;
+➤ country[12,-1,0]  ¦  cnt[-5,64,0]  𝄀
+CN  ¦  4  𝄀
+US  ¦  4
+select country, count(*) as cnt from hive_string where country like 'U%' group by country;
+➤ country[12,-1,0]  ¦  cnt[-5,64,0]  𝄀
+US  ¦  4
+select country, count(*) as cnt from hive_string group by country order by country;
+➤ country[12,-1,0]  ¦  cnt[-5,64,0]  𝄀
+CN  ¦  4  𝄀
+JP  ¦  4  𝄀
+US  ¦  4
+select country, count(*) as cnt from hive_string where country != 'JP' group by country order by country;
+➤ country[12,-1,0]  ¦  cnt[-5,64,0]  𝄀
+CN  ¦  4  𝄀
+US  ¦  4
+select id, country from hive_string order by country, id limit 6;
+➤ id[4,32,0]  ¦  country[12,-1,0]  𝄀
+200  ¦  CN  𝄀
+201  ¦  CN  𝄀
+202  ¦  CN  𝄀
+203  ¦  CN  𝄀
+300  ¦  JP  𝄀
+301  ¦  JP
+select country, length(country) as ln from hive_string group by country order by country;
+➤ country[12,-1,0]  ¦  ln[-5,64,0]  𝄀
+CN  ¦  2  𝄀
+JP  ¦  2  𝄀
+US  ¦  2
+drop table if exists hive_null;
+create external table hive_null (
+id int,
+amount double,
+year int
+) infile{'filepath'='$resources/hive_partition/null_part/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+select id, year from hive_null order by id;
+➤ id[4,32,0]  ¦  year[4,32,0]  𝄀
+1  ¦  2024  𝄀
+2  ¦  2024  𝄀
+3  ¦  2024  𝄀
+4  ¦  null  𝄀
+5  ¦  null
+select count(*) as cnt from hive_null where year is null;
+➤ cnt[-5,64,0]  𝄀
+2
+select count(*) as cnt from hive_null where year is not null;
+➤ cnt[-5,64,0]  𝄀
+3
+select year, count(*) as cnt from hive_null group by year order by year;
+➤ year[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+null  ¦  2  𝄀
+2024  ¦  3
+select coalesce(year, -1) as y, count(*) as cnt from hive_null group by y order by y;
+➤ y[-5,64,0]  ¦  cnt[-5,64,0]  𝄀
+-1  ¦  2  𝄀
+2024  ¦  3
+drop table if exists hive_zeropad;
+create external table hive_zeropad (
+id int,
+amount double,
+month int
+) infile{'filepath'='$resources/hive_partition/zero_pad/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='month'};
+select month, count(*) as cnt from hive_zeropad where month = 1 group by month;
+➤ month[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+1  ¦  2
+select month, count(*) as cnt from hive_zeropad where month in (1, 12) group by month order by month;
+➤ month[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+1  ¦  2  𝄀
+12  ¦  2
+select month, count(*) as cnt from hive_zeropad group by month order by month;
+➤ month[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+1  ¦  2  𝄀
+2  ¦  2  𝄀
+12  ¦  2
+select count(*) as cnt from hive_zeropad where month = 99;
+➤ cnt[-5,64,0]  𝄀
+0
+select count(*) as cnt from hive_single where year = 2024 and __mo_filepath like '%year=2024%';
+➤ cnt[-5,64,0]  𝄀
+5
+select count(distinct __mo_filepath) as paths from hive_single;
+➤ paths[-5,64,0]  𝄀
+5
+select year, count(distinct __mo_filepath) as files from hive_single group by year order by year;
+➤ year[4,32,0]  ¦  files[-5,64,0]  𝄀
+2020  ¦  1  𝄀
+2021  ¦  1  𝄀
+2022  ¦  1  𝄀
+2023  ¦  1  𝄀
+2024  ¦  1
+select count(*) as rows_with_path from hive_single where length(__mo_filepath) > 0;
+➤ rows_with_path[-5,64,0]  𝄀
+25
+drop table if exists parquet_non_hive;
+create external table parquet_non_hive (
+id int,
+amount double
+) infile{'filepath'='$resources/hive_partition/non_hive/simple.parquet', 'format'='parquet'};
+select count(*) as cnt from parquet_non_hive where __mo_filepath like '%simple.parquet';
+➤ cnt[-5,64,0]  𝄀
+3
+select count(distinct __mo_filepath) as paths from parquet_non_hive;
+➤ paths[-5,64,0]  𝄀
+1
+select count(*) as rows_with_path from parquet_non_hive where length(__mo_filepath) > 0;
+➤ rows_with_path[-5,64,0]  𝄀
+3
+select count(*) as cnt from hive_single where year = 2024 and __mo_filepath like '%year=2024%';
+➤ cnt[-5,64,0]  𝄀
+5
+select count(*) as cnt from hive_single where year = 2024 and __mo_filepath like '%year=2020%';
+➤ cnt[-5,64,0]  𝄀
+0
+explain (check '["External Scan", "Filter Cond"]') select * from hive_single where year = 2024;
+➤ AP QUERY PLAN ON MULTICN(10 core)[12,0,0]  𝄀
+Project  𝄀
+  ->  External Scan on hive_part_db.hive_single  𝄀
+        Filter Cond: (hive_single.year = 2024)
+explain (analyze true, check '["External Scan", "inputRows=", "outputRows="]') select * from hive_single where year = 2024;
+-- @regex("inputRows=", true)
+➤ ap query plan on multicn(10 core)[12,-1,0]  𝄀
+Project  𝄀
+  Analyze: timeConsumed=0ms waitTime=0ms inputRows=5 outputRows=5 (min=5, max=5) InputSize=80 bytes OutputSize=80 bytes ReadSize=0 bytes|0 bytes|0 bytes MemorySize=80 bytes (min=80 bytes, max=80 bytes)  𝄀
+  ->  External Scan on hive_part_db.hive_single  𝄀
+        Analyze: timeConsumed=0ms waitTime=0ms inputRows=5 outputRows=5 (min=5, max=5) InputSize=80 bytes OutputSize=80 bytes ReadSize=0 bytes|0 bytes|0 bytes MemorySize=165 bytes (min=80 bytes, max=80 bytes)  𝄀
+        Filter Cond: (hive_single.year = 2024)
+explain (check '["External Scan", "Filter Cond"]') select * from hive_multi where year = 2024 and month = '01';
+➤ AP QUERY PLAN ON MULTICN(10 core)[12,0,0]  𝄀
+Project  𝄀
+  ->  External Scan on hive_part_db.hive_multi  𝄀
+        Filter Cond: (hive_multi.year = 2024), (hive_multi.month = '01')
+explain (check '["External Scan", "Filter Cond"]') select * from hive_single where year in (2020, 2024);
+➤ AP QUERY PLAN ON MULTICN(10 core)[12,0,0]  𝄀
+Project  𝄀
+  ->  External Scan on hive_part_db.hive_single  𝄀
+        Filter Cond: hive_single.year in ([2020 2024])
+select hs.year, count(*) as cnt
+from hive_single hs join hive_multi hm on hs.year = hm.year
+where hs.year = 2024
+group by hs.year;
+➤ year[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+2024  ¦  45
+drop table if exists year_dim;
+create table year_dim (y int, label varchar(20));
+insert into year_dim values (2020, 'y2020'), (2024, 'y2024'), (2025, 'y2025');
+select d.label, count(*) as cnt
+from hive_single h join year_dim d on h.year = d.y
+where h.year in (2020, 2024)
+group by d.label order by d.label;
+➤ label[12,-1,0]  ¦  cnt[-5,64,0]  𝄀
+y2020  ¦  5  𝄀
+y2024  ¦  5
+select d.y, count(h.id) as cnt
+from year_dim d left join hive_single h on h.year = d.y
+group by d.y order by d.y;
+➤ y[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+2020  ¦  5  𝄀
+2024  ¦  5  𝄀
+2025  ¦  0
+select 'single' as src, count(*) as cnt from hive_single where year = 2024
+union all
+select 'multi' as src, count(*) as cnt from hive_multi where year = 2024;
+➤ src[12,-1,0]  ¦  cnt[-5,64,0]  𝄀
+single  ¦  5  𝄀
+multi  ¦  9
+select id, year from hive_single
+where year = (select max(year) from year_dim where label = 'y2024')
+order by id limit 5;
+correlated columns in aggregate function is not yet implemented
+select year, count(*) as cnt from hive_single
+where year in (select y from year_dim where label like 'y2024%')
+group by year;
+➤ year[4,32,0]  ¦  cnt[-5,64,0]  𝄀
+2024  ¦  5
+select count(*) as cnt from hive_single h
+where exists (select 1 from year_dim d where d.y = h.year);
+➤ cnt[-5,64,0]  𝄀
+10
+with yearly as (
+select year, sum(amount) as total from hive_single group by year
+)
+select year, total from yearly where total > 100 order by year;
+➤ year[4,32,0]  ¦  total[8,54,0]  𝄀
+2020  ¦  105.0  𝄀
+2021  ¦  105.0  𝄀
+2022  ¦  105.0  𝄀
+2023  ¦  105.0  𝄀
+2024  ¦  105.0
+select year,
+round(sum(case when amount > 21 then amount else 0 end), 1) as above,
+round(sum(case when amount <= 21 then amount else 0 end), 1) as below
+from hive_single
+where year in (2020, 2024)
+group by year order by year;
+➤ year[4,32,0]  ¦  above[8,54,0]  ¦  below[8,54,0]  𝄀
+2020  ¦  73.5  ¦  31.5  𝄀
+2024  ¦  73.5  ¦  31.5
+select year, id, row_number() over (partition by year order by id) as rn
+from hive_single where year in (2020, 2021) order by year, id;
+➤ year[4,32,0]  ¦  id[4,32,0]  ¦  rn[-5,64,0]  𝄀
+2020  ¦  20200  ¦  1  𝄀
+2020  ¦  20201  ¦  2  𝄀
+2020  ¦  20202  ¦  3  𝄀
+2020  ¦  20203  ¦  4  𝄀
+2020  ¦  20204  ¦  5  𝄀
+2021  ¦  20210  ¦  1  𝄀
+2021  ¦  20211  ¦  2  𝄀
+2021  ¦  20212  ¦  3  𝄀
+2021  ¦  20213  ¦  4  𝄀
+2021  ¦  20214  ¦  5
+select count(*) as cnt from (
+select id, year from hive_single where year = 2023
+union all
+select id, year from hive_single where year = 2024
+) t;
+➤ cnt[-5,64,0]  𝄀
+10
+drop table if exists hive_invalid_type;
+create external table hive_invalid_type (
+id int,
+amount double,
+year int
+) infile{'filepath'='$resources/hive_partition/invalid_type/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+select * from hive_invalid_type;
+internal error: partition value type conversion failed: col=year, value='abc', path=year=abc/data.parquet: strconv.ParseInt: parsing "abc": invalid syntax
+drop table if exists hive_url_encoded;
+create external table hive_url_encoded (
+id int,
+amount double,
+country varchar(20)
+) infile{'filepath'='$resources/hive_partition/url_encoded/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='country'};
+select * from hive_url_encoded;
+internal error: hive partition directory name contains '%' which is not supported: 'country=US%2FCA'
+drop table if exists hive_stage_err;
+create external table hive_stage_err (
+id int, year int
+) infile{'filepath'='stage://mystage/data/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+invalid configuration: hive_partitioning does not support stage external tables
+drop table if exists hive_not_null_default;
+create external table hive_not_null_default (
+id int,
+amount double,
+year int not null
+) infile{'filepath'='$resources/hive_partition/not_null_default/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+select * from hive_not_null_default;
+constraint violation: partition column 'year' is NOT NULL but directory has __HIVE_DEFAULT_PARTITION__ in path 'year=__HIVE_DEFAULT_PARTITION__/data.parquet'; allow NULL on the partition column or remove/rename the default partition directory
+drop table if exists hive_col_overlap;
+create external table hive_col_overlap (
+id int,
+amount double,
+year int
+) infile{'filepath'='$resources/hive_partition/col_overlap/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+select distinct year from hive_col_overlap;
+➤ year[4,32,0]  𝄀
+2024
+select id, year from hive_col_overlap order by id;
+➤ id[4,32,0]  ¦  year[4,32,0]  𝄀
+1  ¦  2024  𝄀
+2  ¦  2024
+select id, amount from parquet_non_hive order by id;
+➤ id[4,32,0]  ¦  amount[8,54,0]  𝄀
+1  ¦  100.0  𝄀
+2  ¦  200.0  𝄀
+3  ¦  300.0
+drop table if exists hive_mixed_case;
+create external table hive_mixed_case (
+id int,
+amount double,
+Year int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='Year'};
+select count(*) as cnt from hive_mixed_case where Year = 2024;
+➤ cnt[-5,64,0]  𝄀
+5
+select count(*) as cnt from hive_mixed_case where year = 2024;
+➤ cnt[-5,64,0]  𝄀
+5
+select count(*) as cnt from hive_single hs where hs.year = 2024;
+➤ cnt[-5,64,0]  𝄀
+5
+drop table if exists hive_single;
+create external table hive_single (
+id int,
+amount double,
+year int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+select count(*) from hive_single;
+➤ count(*)[-5,64,0]  𝄀
+25
+drop database if exists hive_part_db;
diff --git a/test/distributed/cases/table/hive_partition_external_table.sql b/test/distributed/cases/table/hive_partition_external_table.sql
new file mode 100644
index 0000000000000..10cbd58f463bf
--- /dev/null
+++ b/test/distributed/cases/table/hive_partition_external_table.sql
@@ -0,0 +1,494 @@
+-- Hive-style Partitioned External Table BVT Tests
+--
+-- Coverage overview:
+--   1. DDL validation (success + negative)
+--   2. Single-level partition queries (full / EQ / IN / NOT IN / range / IS NULL)
+--   3. Multi-level partition queries (full / single / double / cross-level)
+--   4. String partition (EQ / IN / LIKE / ORDER BY partition col)
+--   5. NULL partition (__HIVE_DEFAULT_PARTITION__)
+--   6. Zero-padded integer partition
+--   7. __mo_filepath virtual column (hive + non-hive)
+--   8. EXPLAIN (CHECK + ANALYZE)
+--   9. Complex predicates (OR / BETWEEN / CAST / arithmetic)
+--  10. Aggregations & subqueries
+--  11. JOIN hive × hive, hive × internal
+--  12. UNION ALL / DISTINCT / HAVING
+--  13. Edge cases (type failure, URL-encoded, physical column overlap, stage rejection)
+
+drop database if exists hive_part_db;
+create database hive_part_db;
+use hive_part_db;
+
+-- ============================================================================
+-- 1. DDL Validation
+-- ============================================================================
+
+-- 1.1 Basic creation (single level)
+drop table if exists hive_single;
+create external table hive_single (
+    id int,
+    amount double,
+    year int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+
+-- 1.2 DDL error: missing partition_columns
+drop table if exists hive_err1;
+create external table hive_err1 (
+    id int, year int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true'};
+
+-- 1.3 DDL error: format not parquet
+drop table if exists hive_err2;
+create external table hive_err2 (
+    id int, year int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='csv', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+
+-- 1.4 DDL error: column not found
+drop table if exists hive_err3;
+create external table hive_err3 (
+    id int, amount double
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='nonexistent'};
+
+-- 1.5 DDL error: duplicate hive key
+drop table if exists hive_err4;
+create external table hive_err4 (
+    id int, year int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partitioning'='false', 'hive_partition_columns'='year'};
+
+-- 1.6 DDL error: hive_partitioning value not boolean
+drop table if exists hive_err5;
+create external table hive_err5 (
+    id int, year int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='yes', 'hive_partition_columns'='year'};
+
+-- 1.7 Partition column name matched case-insensitively (succeeds, no error)
+-- Both the declared column `YEAR` and the partition reference `YeaR` lowercase
+-- to `year`, so findColInTableDefCaseInsensitive finds the column. Same flow
+-- as test 10.7 (hive_mixed_case), kept here for DDL-validation coverage.
+drop table if exists hive_err6;
+create external table hive_err6 (
+    id int, YEAR int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='YeaR'};
+
+-- 1.8 DDL error: duplicate partition column names
+drop table if exists hive_err7;
+create external table hive_err7 (
+    id int, year int, amount double
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year,year'};
+
+-- 1.9 DDL error: VECTOR partition column rejected
+drop table if exists hive_err8;
+create external table hive_err8 (
+    id int,
+    emb vecf32(3)
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='emb'};
+
+-- 1.10 SHOW CREATE TABLE
+show create table hive_single;
+
+-- 1.11 LOAD DATA into hive table should be rejected (external table generic rejection)
+load data infile '$resources/hive_partition/single_level/year=2024/data.parquet' into table hive_single;
+
+-- 1.12 hive_partitioning='false' treated as disabled (existing external table path)
+drop table if exists hive_disabled;
+create external table hive_disabled (
+    id int,
+    amount double
+) infile{'filepath'='$resources/hive_partition/non_hive/simple.parquet', 'format'='parquet', 'hive_partitioning'='false'};
+select count(*) from hive_disabled;
+
+-- ============================================================================
+-- 2. Single Level Partition Queries
+-- ============================================================================
+
+-- 2.1 Full scan (all 5 partitions x 5 rows = 25 rows)
+select count(*) as cnt from hive_single;
+
+-- 2.2 EQ pruning
+select year, count(*) as cnt from hive_single where year = 2024 group by year;
+
+-- 2.3 IN pruning
+select year, count(*) as cnt from hive_single where year in (2020, 2024) group by year order by year;
+
+-- 2.4 IN pruning with single value
+select year, count(*) as cnt from hive_single where year in (2022) group by year;
+
+-- 2.5 NOT IN (rowFilter fallback)
+select year, count(*) as cnt from hive_single where year not in (2020, 2021, 2022) group by year order by year;
+
+-- 2.6 Non-prunable GT (rowFilter fallback, must not lose data)
+select count(*) as cnt from hive_single where year > 2022;
+
+-- 2.7 BETWEEN (rowFilter fallback)
+select year, count(*) as cnt from hive_single where year between 2021 and 2023 group by year order by year;
+
+-- 2.8 OR condition (rowFilter fallback; not prunable in P0)
+select year, count(*) as cnt from hive_single where year = 2020 or year = 2024 group by year order by year;
+
+-- 2.9 Partition column only in SELECT
+select distinct year from hive_single order by year;
+
+-- 2.10 Partition column only in WHERE
+select sum(amount) as total from hive_single where year = 2020;
+
+-- 2.11 Partition col in HAVING (threshold < 105 to include all partitions)
+select year, sum(amount) as total from hive_single group by year having sum(amount) >= 100 order by year;
+
+-- 2.12 COUNT DISTINCT on partition column
+select count(distinct year) as distinct_years from hive_single;
+
+-- 2.13 Partition column in arithmetic expression (rowFilter evaluates)
+select count(*) from hive_single where year + 1 = 2025;
+
+-- 2.14 CAST on partition column (rowFilter only, not pruned)
+select count(*) from hive_single where cast(year as varchar) = '2024';
+
+-- 2.15 ORDER BY partition column with LIMIT
+select id, year from hive_single order by year asc, id desc limit 5;
+
+-- 2.16 Partition column IS NOT NULL (trivially true for non-null data)
+select count(*) as cnt from hive_single where year is not null;
+
+-- 2.17 Partition column used both as predicate and projection
+select year, id from hive_single where year = 2023 order by id;
+
+-- 2.18 Subquery with partition pruning
+select year, cnt from (
+    select year, count(*) as cnt from hive_single where year in (2020, 2021) group by year
+) t order by year;
+
+-- ============================================================================
+-- 3. Multi Level Partition Queries
+-- ============================================================================
+
+drop table if exists hive_multi;
+create external table hive_multi (
+    id int,
+    amount double,
+    year int,
+    month varchar(2)
+) infile{'filepath'='$resources/hive_partition/multi_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year,month'};
+
+-- 3.1 Full scan (2 years x 3 months x 3 rows = 18)
+select count(*) as cnt from hive_multi;
+
+-- 3.2 Single level pruning (outer)
+select year, count(*) as cnt from hive_multi where year = 2024 group by year;
+
+-- 3.3 Single level pruning (inner only)
+select month, count(*) as cnt from hive_multi where month = '01' group by month order by month;
+
+-- 3.4 Double level pruning
+select year, month, count(*) as cnt from hive_multi where year = 2024 and month = '01' group by year, month;
+
+-- 3.5 Outer IN + inner IN
+select year, month, count(*) as cnt from hive_multi
+where year in (2024, 2025) and month in ('01', '02')
+group by year, month order by year, month;
+
+-- 3.6 GROUP BY partition columns
+select year, month, sum(amount) as total from hive_multi group by year, month order by year, month;
+
+-- 3.7 ORDER BY mixed partition + physical
+select id, year, month, amount from hive_multi where year = 2025 order by month asc, id asc limit 6;
+
+-- 3.8 HAVING with partition columns
+select year, count(*) as cnt from hive_multi group by year having count(*) >= 9 order by year;
+
+-- 3.9 Two-column distinct
+select distinct year, month from hive_multi order by year, month;
+
+-- 3.10 Self join on partition columns
+select a.year, a.month, count(*) as cnt
+from hive_multi a join hive_multi b
+on a.year = b.year and a.month = b.month and a.id = b.id
+where a.year = 2024
+group by a.year, a.month order by a.year, a.month;
+
+-- 3.11 Group-by with subquery to count month per year
+select year, mc from (
+    select year, count(distinct month) as mc from hive_multi group by year
+) t order by year;
+
+-- ============================================================================
+-- 4. String Partition
+-- ============================================================================
+
+drop table if exists hive_string;
+create external table hive_string (
+    id int,
+    amount double,
+    country varchar(10)
+) infile{'filepath'='$resources/hive_partition/string_part/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='country'};
+
+-- 4.1 String EQ (exact-byte match → prunable)
+select country, count(*) as cnt from hive_string where country = 'US' group by country;
+
+-- 4.2 String IN
+select country, count(*) as cnt from hive_string where country in ('US', 'CN') group by country order by country;
+
+-- 4.3 String LIKE (rowFilter only, not prunable)
+select country, count(*) as cnt from hive_string where country like 'U%' group by country;
+
+-- 4.4 All countries
+select country, count(*) as cnt from hive_string group by country order by country;
+
+-- 4.5 String partition != condition (rowFilter fallback)
+select country, count(*) as cnt from hive_string where country != 'JP' group by country order by country;
+
+-- 4.6 String partition in ORDER BY (partition value in output)
+select id, country from hive_string order by country, id limit 6;
+
+-- 4.7 Partition col length function
+select country, length(country) as ln from hive_string group by country order by country;
+
+-- ============================================================================
+-- 5. NULL Partition (__HIVE_DEFAULT_PARTITION__)
+-- ============================================================================
+
+drop table if exists hive_null;
+create external table hive_null (
+    id int,
+    amount double,
+    year int
+) infile{'filepath'='$resources/hive_partition/null_part/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+
+-- 5.1 NULL partition is visible
+select id, year from hive_null order by id;
+
+-- 5.2 IS NULL filter
+select count(*) as cnt from hive_null where year is null;
+
+-- 5.3 IS NOT NULL filter
+select count(*) as cnt from hive_null where year is not null;
+
+-- 5.4 Aggregation handling NULL groups
+select year, count(*) as cnt from hive_null group by year order by year;
+
+-- 5.5 Coalesce partition column
+select coalesce(year, -1) as y, count(*) as cnt from hive_null group by y order by y;
+
+-- ============================================================================
+-- 6. Zero-padded Integer Partition
+-- ============================================================================
+
+drop table if exists hive_zeropad;
+create external table hive_zeropad (
+    id int,
+    amount double,
+    month int
+) infile{'filepath'='$resources/hive_partition/zero_pad/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='month'};
+
+-- 6.1 Integer comparison with zero-padded directory (month=01 matches WHERE month = 1)
+select month, count(*) as cnt from hive_zeropad where month = 1 group by month;
+
+-- 6.2 Integer IN with mixed zero-padded targets
+select month, count(*) as cnt from hive_zeropad where month in (1, 12) group by month order by month;
+
+-- 6.3 All months
+select month, count(*) as cnt from hive_zeropad group by month order by month;
+
+-- 6.4 Non-matching value prunes all partitions
+select count(*) as cnt from hive_zeropad where month = 99;
+
+-- ============================================================================
+-- 7. __mo_filepath Virtual Column
+-- ============================================================================
+
+-- 7.1 __mo_filepath on hive table (verify path contains partition directory)
+select count(*) as cnt from hive_single where year = 2024 and __mo_filepath like '%year=2024%';
+
+-- 7.2 __mo_filepath returns distinct paths per partition (projection, exercises
+-- parquet prepare() filepathColIndex branch + fillVirtualColumns, not FilterFileList)
+select count(distinct __mo_filepath) as paths from hive_single;
+
+-- 7.3 __mo_filepath projection + partition column aggregation (distinct value per partition)
+select year, count(distinct __mo_filepath) as files from hive_single group by year order by year;
+
+-- 7.4 __mo_filepath as ONLY projected column (rowCountOnly path — no physical col read)
+-- count(length(...)>0) confirms SetConstBytes fill produced non-empty bytes for every row.
+select count(*) as rows_with_path from hive_single where length(__mo_filepath) > 0;
+
+-- 7.5 Non-hive parquet external table — projection-level assertion for pre-existing bug fix
+-- The row count where __mo_filepath is non-empty must equal the file row count (3).
+drop table if exists parquet_non_hive;
+create external table parquet_non_hive (
+    id int,
+    amount double
+) infile{'filepath'='$resources/hive_partition/non_hive/simple.parquet', 'format'='parquet'};
+select count(*) as cnt from parquet_non_hive where __mo_filepath like '%simple.parquet';
+select count(distinct __mo_filepath) as paths from parquet_non_hive;
+select count(*) as rows_with_path from parquet_non_hive where length(__mo_filepath) > 0;
+
+-- 7.6 Combined partition col + __mo_filepath filter (both conditions prune/filter)
+select count(*) as cnt from hive_single where year = 2024 and __mo_filepath like '%year=2024%';
+
+-- 7.7 Contradictory partition + filepath (empty result, but evaluates correctly)
+select count(*) as cnt from hive_single where year = 2024 and __mo_filepath like '%year=2020%';
+
+-- ============================================================================
+-- 8. EXPLAIN Verification
+-- ============================================================================
+
+-- 8.1 EXPLAIN shows External Scan with Filter Cond
+explain (check '["External Scan", "Filter Cond"]') select * from hive_single where year = 2024;
+
+-- 8.2 EXPLAIN ANALYZE has runtime stats
+-- @regex("inputRows=",true)
+explain (analyze true, check '["External Scan", "inputRows=", "outputRows="]') select * from hive_single where year = 2024;
+
+-- 8.3 EXPLAIN multi-level partition scan shows both filter conditions retained (double-filter safety)
+explain (check '["External Scan", "Filter Cond"]') select * from hive_multi where year = 2024 and month = '01';
+
+-- 8.4 EXPLAIN with IN list
+explain (check '["External Scan", "Filter Cond"]') select * from hive_single where year in (2020, 2024);
+
+-- ============================================================================
+-- 9. Complex Query Patterns
+-- ============================================================================
+
+-- 9.1 JOIN hive x hive on partition column
+select hs.year, count(*) as cnt
+from hive_single hs join hive_multi hm on hs.year = hm.year
+where hs.year = 2024
+group by hs.year;
+
+-- 9.2 JOIN hive x internal (dimension) table
+drop table if exists year_dim;
+create table year_dim (y int, label varchar(20));
+insert into year_dim values (2020, 'y2020'), (2024, 'y2024'), (2025, 'y2025');
+
+select d.label, count(*) as cnt
+from hive_single h join year_dim d on h.year = d.y
+where h.year in (2020, 2024)
+group by d.label order by d.label;
+
+-- 9.3 LEFT JOIN preserves rows without match
+select d.y, count(h.id) as cnt
+from year_dim d left join hive_single h on h.year = d.y
+group by d.y order by d.y;
+
+-- 9.4 UNION ALL merges partitions from two tables
+select 'single' as src, count(*) as cnt from hive_single where year = 2024
+union all
+select 'multi' as src, count(*) as cnt from hive_multi where year = 2024;
+
+-- 9.5 Scalar subquery with partition predicate
+select id, year from hive_single
+where year = (select max(year) from year_dim where label = 'y2024')
+order by id limit 5;
+
+-- 9.6 IN subquery with hive partition column
+select year, count(*) as cnt from hive_single
+where year in (select y from year_dim where label like 'y2024%')
+group by year;
+
+-- 9.7 EXISTS subquery
+select count(*) as cnt from hive_single h
+where exists (select 1 from year_dim d where d.y = h.year);
+
+-- 9.8 CTE over hive external table
+with yearly as (
+    select year, sum(amount) as total from hive_single group by year
+)
+select year, total from yearly where total > 100 order by year;
+
+-- 9.9 Aggregation with conditional sum (above vs below median amount)
+select year,
+       round(sum(case when amount > 21 then amount else 0 end), 1) as above,
+       round(sum(case when amount <= 21 then amount else 0 end), 1) as below
+from hive_single
+where year in (2020, 2024)
+group by year order by year;
+
+-- 9.10 Window function over partition column
+-- Note: ROW_NUMBER() OVER (PARTITION BY year ORDER BY id) — demonstrates partition column usable in window spec
+select year, id, row_number() over (partition by year order by id) as rn
+from hive_single where year in (2020, 2021) order by year, id;
+
+-- 9.11 COUNT with subquery filter
+select count(*) as cnt from (
+    select id, year from hive_single where year = 2023
+    union all
+    select id, year from hive_single where year = 2024
+) t;
+
+-- ============================================================================
+-- 10. Edge Cases
+-- ============================================================================
+
+-- 10.1 Type conversion failure: year declared as INT but directory has 'abc'
+-- Error contains col + value + relative path (stable across machines).
+drop table if exists hive_invalid_type;
+create external table hive_invalid_type (
+    id int,
+    amount double,
+    year int
+) infile{'filepath'='$resources/hive_partition/invalid_type/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+select * from hive_invalid_type;
+
+-- 10.2 URL-encoded directory name containing '%' should report error (P0 known limitation)
+drop table if exists hive_url_encoded;
+create external table hive_url_encoded (
+    id int,
+    amount double,
+    country varchar(20)
+) infile{'filepath'='$resources/hive_partition/url_encoded/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='country'};
+select * from hive_url_encoded;
+
+-- 10.3 Stage hive external table should be rejected at DDL
+drop table if exists hive_stage_err;
+create external table hive_stage_err (
+    id int, year int
+) infile{'filepath'='stage://mystage/data/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+
+-- 10.4 __HIVE_DEFAULT_PARTITION__ with NOT NULL column
+-- Error contains col + "NOT NULL" + relative path (stable across machines).
+drop table if exists hive_not_null_default;
+create external table hive_not_null_default (
+    id int,
+    amount double,
+    year int not null
+) infile{'filepath'='$resources/hive_partition/not_null_default/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+select * from hive_not_null_default;
+
+-- 10.5 Physical column overlap: parquet file has physical 'year=9999', path has year=2024
+-- Partition value from path (2024) must override the physical column (9999)
+drop table if exists hive_col_overlap;
+create external table hive_col_overlap (
+    id int,
+    amount double,
+    year int
+) infile{'filepath'='$resources/hive_partition/col_overlap/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+select distinct year from hive_col_overlap;
+select id, year from hive_col_overlap order by id;
+
+-- 10.6 Non-hive parquet smoke: regular physical column query (not just __mo_filepath)
+select id, amount from parquet_non_hive order by id;
+
+-- 10.7 Case-insensitive column name in DDL
+drop table if exists hive_mixed_case;
+create external table hive_mixed_case (
+    id int,
+    amount double,
+    Year int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='Year'};
+select count(*) as cnt from hive_mixed_case where Year = 2024;
+select count(*) as cnt from hive_mixed_case where year = 2024;
+
+-- 10.8 Partition column referenced with table qualifier
+select count(*) as cnt from hive_single hs where hs.year = 2024;
+
+-- 10.9 DROP then re-CREATE (catalog round-trip)
+drop table if exists hive_single;
+create external table hive_single (
+    id int,
+    amount double,
+    year int
+) infile{'filepath'='$resources/hive_partition/single_level/', 'format'='parquet', 'hive_partitioning'='true', 'hive_partition_columns'='year'};
+select count(*) from hive_single;
+
+-- ============================================================================
+-- 11. Cleanup
+-- ============================================================================
+drop database if exists hive_part_db;
diff --git a/test/distributed/resources/hive_partition/col_overlap/year=2024/data.parquet b/test/distributed/resources/hive_partition/col_overlap/year=2024/data.parquet
new file mode 100644
index 0000000000000..be5fcbd05d577
Binary files /dev/null and b/test/distributed/resources/hive_partition/col_overlap/year=2024/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/invalid_type/year=abc/data.parquet b/test/distributed/resources/hive_partition/invalid_type/year=abc/data.parquet
new file mode 100644
index 0000000000000..2bd09d5ee9dc2
Binary files /dev/null and b/test/distributed/resources/hive_partition/invalid_type/year=abc/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/multi_level/year=2024/month=01/data.parquet b/test/distributed/resources/hive_partition/multi_level/year=2024/month=01/data.parquet
new file mode 100644
index 0000000000000..d1b15d5f50c50
Binary files /dev/null and b/test/distributed/resources/hive_partition/multi_level/year=2024/month=01/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/multi_level/year=2024/month=02/data.parquet b/test/distributed/resources/hive_partition/multi_level/year=2024/month=02/data.parquet
new file mode 100644
index 0000000000000..d3a4e85360d47
Binary files /dev/null and b/test/distributed/resources/hive_partition/multi_level/year=2024/month=02/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/multi_level/year=2024/month=03/data.parquet b/test/distributed/resources/hive_partition/multi_level/year=2024/month=03/data.parquet
new file mode 100644
index 0000000000000..8d2e03e49bb0c
Binary files /dev/null and b/test/distributed/resources/hive_partition/multi_level/year=2024/month=03/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/multi_level/year=2025/month=01/data.parquet b/test/distributed/resources/hive_partition/multi_level/year=2025/month=01/data.parquet
new file mode 100644
index 0000000000000..9468e4c039a33
Binary files /dev/null and b/test/distributed/resources/hive_partition/multi_level/year=2025/month=01/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/multi_level/year=2025/month=02/data.parquet b/test/distributed/resources/hive_partition/multi_level/year=2025/month=02/data.parquet
new file mode 100644
index 0000000000000..efe6dc06e38f0
Binary files /dev/null and b/test/distributed/resources/hive_partition/multi_level/year=2025/month=02/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/multi_level/year=2025/month=03/data.parquet b/test/distributed/resources/hive_partition/multi_level/year=2025/month=03/data.parquet
new file mode 100644
index 0000000000000..c1ce6e382b696
Binary files /dev/null and b/test/distributed/resources/hive_partition/multi_level/year=2025/month=03/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/non_hive/simple.parquet b/test/distributed/resources/hive_partition/non_hive/simple.parquet
new file mode 100644
index 0000000000000..104373d4f8622
Binary files /dev/null and b/test/distributed/resources/hive_partition/non_hive/simple.parquet differ
diff --git a/test/distributed/resources/hive_partition/not_null_default/year=2024/data.parquet b/test/distributed/resources/hive_partition/not_null_default/year=2024/data.parquet
new file mode 100644
index 0000000000000..189304dad7120
Binary files /dev/null and b/test/distributed/resources/hive_partition/not_null_default/year=2024/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/not_null_default/year=__HIVE_DEFAULT_PARTITION__/data.parquet b/test/distributed/resources/hive_partition/not_null_default/year=__HIVE_DEFAULT_PARTITION__/data.parquet
new file mode 100644
index 0000000000000..a7d5246e26dd7
Binary files /dev/null and b/test/distributed/resources/hive_partition/not_null_default/year=__HIVE_DEFAULT_PARTITION__/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/null_part/year=2024/data.parquet b/test/distributed/resources/hive_partition/null_part/year=2024/data.parquet
new file mode 100644
index 0000000000000..332590794427a
Binary files /dev/null and b/test/distributed/resources/hive_partition/null_part/year=2024/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/null_part/year=__HIVE_DEFAULT_PARTITION__/data.parquet b/test/distributed/resources/hive_partition/null_part/year=__HIVE_DEFAULT_PARTITION__/data.parquet
new file mode 100644
index 0000000000000..93ee3b43f50b6
Binary files /dev/null and b/test/distributed/resources/hive_partition/null_part/year=__HIVE_DEFAULT_PARTITION__/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/single_level/year=2020/data.parquet b/test/distributed/resources/hive_partition/single_level/year=2020/data.parquet
new file mode 100644
index 0000000000000..08ac1f6dce3da
Binary files /dev/null and b/test/distributed/resources/hive_partition/single_level/year=2020/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/single_level/year=2021/data.parquet b/test/distributed/resources/hive_partition/single_level/year=2021/data.parquet
new file mode 100644
index 0000000000000..d9742decec3d1
Binary files /dev/null and b/test/distributed/resources/hive_partition/single_level/year=2021/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/single_level/year=2022/data.parquet b/test/distributed/resources/hive_partition/single_level/year=2022/data.parquet
new file mode 100644
index 0000000000000..9839f330234c3
Binary files /dev/null and b/test/distributed/resources/hive_partition/single_level/year=2022/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/single_level/year=2023/data.parquet b/test/distributed/resources/hive_partition/single_level/year=2023/data.parquet
new file mode 100644
index 0000000000000..16567adf5794c
Binary files /dev/null and b/test/distributed/resources/hive_partition/single_level/year=2023/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/single_level/year=2024/.crc b/test/distributed/resources/hive_partition/single_level/year=2024/.crc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test/distributed/resources/hive_partition/single_level/year=2024/_SUCCESS b/test/distributed/resources/hive_partition/single_level/year=2024/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test/distributed/resources/hive_partition/single_level/year=2024/data.parquet b/test/distributed/resources/hive_partition/single_level/year=2024/data.parquet
new file mode 100644
index 0000000000000..dc4244fb6e2ae
Binary files /dev/null and b/test/distributed/resources/hive_partition/single_level/year=2024/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/string_part/country=CN/data.parquet b/test/distributed/resources/hive_partition/string_part/country=CN/data.parquet
new file mode 100644
index 0000000000000..04c62e15df9b5
Binary files /dev/null and b/test/distributed/resources/hive_partition/string_part/country=CN/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/string_part/country=JP/data.parquet b/test/distributed/resources/hive_partition/string_part/country=JP/data.parquet
new file mode 100644
index 0000000000000..43e561cc95724
Binary files /dev/null and b/test/distributed/resources/hive_partition/string_part/country=JP/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/string_part/country=US/data.parquet b/test/distributed/resources/hive_partition/string_part/country=US/data.parquet
new file mode 100644
index 0000000000000..629b30740355a
Binary files /dev/null and b/test/distributed/resources/hive_partition/string_part/country=US/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/url_encoded/country=US%2FCA/data.parquet b/test/distributed/resources/hive_partition/url_encoded/country=US%2FCA/data.parquet
new file mode 100644
index 0000000000000..3018ca8c7c723
Binary files /dev/null and b/test/distributed/resources/hive_partition/url_encoded/country=US%2FCA/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/zero_pad/month=01/data.parquet b/test/distributed/resources/hive_partition/zero_pad/month=01/data.parquet
new file mode 100644
index 0000000000000..0ef14e6d2fd23
Binary files /dev/null and b/test/distributed/resources/hive_partition/zero_pad/month=01/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/zero_pad/month=02/data.parquet b/test/distributed/resources/hive_partition/zero_pad/month=02/data.parquet
new file mode 100644
index 0000000000000..958b53128158a
Binary files /dev/null and b/test/distributed/resources/hive_partition/zero_pad/month=02/data.parquet differ
diff --git a/test/distributed/resources/hive_partition/zero_pad/month=12/data.parquet b/test/distributed/resources/hive_partition/zero_pad/month=12/data.parquet
new file mode 100644
index 0000000000000..c6fcdd13a04e3
Binary files /dev/null and b/test/distributed/resources/hive_partition/zero_pad/month=12/data.parquet differ