delta-io · PorridgeSwim · Jun 23, 2026 · zikangh · Jun 24, 2026
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSinkSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSinkSuite.scala
@@ -81,21 +81,35 @@ class DeltaSinkSuite
 
   import testImplicits._
 
+  /**
+   * Run a sink test against a name-based (catalog) target table.
+   */
+  protected def withSinkTarget(f: (String, File) => Unit): Unit = {
+    withTempDir { checkpointDir =>
+      // Unique table name per invocation: the target is a managed table at a deterministic
+      // warehouse path, so a fixed name leaks state across tests (stale data / DeltaLog cache).
+      val table = "test_delta_sink_" + checkpointDir.getName.replaceAll("[^A-Za-z0-9]", "")
+      withTable(table) {
+        f(table, checkpointDir)
+      }
+    }
+  }
+
   test("append mode") {
     failAfter(streamingTimeout) {
-      withTempDirs { (outputDir, checkpointDir) =>
+      withSinkTarget { (table, checkpointDir) =>
         val inputData = MemoryStream[Int]
         val df = inputData.toDF()
         val query = df.writeStream
           .option("checkpointLocation", checkpointDir.getCanonicalPath)
           .format("delta")
-          .start(outputDir.getCanonicalPath)
-        val log = DeltaLog.forTable(spark, outputDir.getCanonicalPath)
+          .toTable(table)
+        val log = DeltaLog.forTable(spark, TableIdentifier(table))
         try {
           inputData.addData(1)
           query.processAllAvailable()
 
-          val outputDf = spark.read.format("delta").load(outputDir.getCanonicalPath)
+          val outputDf = spark.read.table(table)
           checkDatasetUnorderly(outputDf.as[Int], 1)
           assert(log.update().transactions.head == (query.id.toString -> 0L))
 
@@ -119,7 +133,7 @@ class DeltaSinkSuite
 
   test("complete mode") {
     failAfter(streamingTimeout) {
-      withTempDirs { (outputDir, checkpointDir) =>
+      withSinkTarget { (table, checkpointDir) =>
         val inputData = MemoryStream[Int]
         val df = inputData.toDF()
         val query =
@@ -128,13 +142,13 @@ class DeltaSinkSuite
             .outputMode("complete")
             .option("checkpointLocation", checkpointDir.getCanonicalPath)
             .format("delta")
-            .start(outputDir.getCanonicalPath)
-        val log = DeltaLog.forTable(spark, outputDir.getCanonicalPath)
+            .toTable(table)
+        val log = DeltaLog.forTable(spark, TableIdentifier(table))
         try {
           inputData.addData(1)
           query.processAllAvailable()
 
-          val outputDf = spark.read.format("delta").load(outputDir.getCanonicalPath)
+          val outputDf = spark.read.table(table)
           checkDatasetUnorderly(outputDf.as[Long], 1L)
           assert(log.update().transactions.head == (query.id.toString -> 0L))
 
@@ -158,15 +172,15 @@ class DeltaSinkSuite
 
   test("update mode: not supported") {
     failAfter(streamingTimeout) {
-      withTempDirs { (outputDir, checkpointDir) =>
+      withSinkTarget { (table, checkpointDir) =>
         val inputData = MemoryStream[Int]
         val df = inputData.toDF()
         val e = intercept[AnalysisException] {
           df.writeStream
             .option("checkpointLocation", checkpointDir.getCanonicalPath)
             .outputMode("update")
             .format("delta")
-            .start(outputDir.getCanonicalPath)
+            .toTable(table)
         }
         Seq("update", "not support").foreach { msg =>
           assert(e.getMessage.toLowerCase(Locale.ROOT).contains(msg))
@@ -194,7 +208,7 @@ class DeltaSinkSuite
   }
 
   test("SPARK-21167: encode and decode path correctly") {
-    withTempDirs { (outputDir, checkpointDir) =>
+    withSinkTarget { (table, checkpointDir) =>
       val inputData = MemoryStream[String]
       val query = inputData.toDS()
         .map(s => (s, s.length))
@@ -203,7 +217,7 @@ class DeltaSinkSuite
         .partitionBy("value")
         .option("checkpointLocation", checkpointDir.getCanonicalPath)
         .format("delta")
-        .start(outputDir.getCanonicalPath)
+        .toTable(table)
 
       try {
         // The output is partitioned by "value", so the value will appear in the file path.
@@ -212,7 +226,7 @@ class DeltaSinkSuite
         failAfter(streamingTimeout) {
           query.processAllAvailable()
         }
-        val outputDf = spark.read.format("delta").load(outputDir.getCanonicalPath)
+        val outputDf = spark.read.table(table)
         checkDatasetUnorderly(outputDf.as[(String, Int)], ("hello world", "hello world".length))
       } finally {
         query.stop()
@@ -221,7 +235,7 @@ class DeltaSinkSuite
   }
 
   test("partitioned writing and batch reading") {
-    withTempDirs { (outputDir, checkpointDir) =>
+    withSinkTarget { (table, checkpointDir) =>
       val inputData = MemoryStream[Int]
       val ds = inputData.toDS()
       val query =
@@ -231,15 +245,15 @@ class DeltaSinkSuite
           .partitionBy("id")
           .option("checkpointLocation", checkpointDir.getCanonicalPath)
           .format("delta")
-          .start(outputDir.getCanonicalPath)
+          .toTable(table)
       try {
 
         inputData.addData(1, 2, 3)
         failAfter(streamingTimeout) {
           query.processAllAvailable()
         }
 
-        val outputDf = spark.read.format("delta").load(outputDir.getCanonicalPath)
+        val outputDf = spark.read.table(table)
         val expectedSchema = new StructType()
           .add(StructField("id", IntegerType))
           .add(StructField("value", IntegerType))
@@ -301,7 +315,7 @@ class DeltaSinkSuite
   }
 
   test("work with aggregation + watermark") {
-    withTempDirs { (outputDir, checkpointDir) =>
+    withSinkTarget { (table, checkpointDir) =>
       val inputData = MemoryStream[Long]
       val inputDF = inputData.toDF.toDF("time")
       val outputDf = inputDF
@@ -315,7 +329,7 @@ class DeltaSinkSuite
         outputDf.writeStream
           .option("checkpointLocation", checkpointDir.getCanonicalPath)
           .format("delta")
-          .start(outputDir.getCanonicalPath)
+          .toTable(table)
       try {
         def addTimestamp(timestampInSecs: Int*): Unit = {
           inputData.addData(timestampInSecs.map(_ * 1L): _*)
@@ -325,7 +339,7 @@ class DeltaSinkSuite
         }
 
         def check(expectedResult: ((Long, Long), Long)*): Unit = {
-          val outputDf = spark.read.format("delta").load(outputDir.getCanonicalPath)
+          val outputDf = spark.read.table(table)
             .selectExpr(
               "CAST(start as BIGINT) AS start",
               "CAST(end as BIGINT) AS end",
@@ -350,7 +364,7 @@ class DeltaSinkSuite
   }
 
   test("throw exception when users are trying to write in batch with different partitioning") {
-    withTempDirs { (outputDir, checkpointDir) =>
+    withSinkTarget { (table, checkpointDir) =>
       val inputData = MemoryStream[Int]
       val ds = inputData.toDS()
       val query =
@@ -360,24 +374,26 @@ class DeltaSinkSuite
           .partitionBy("id")
           .option("checkpointLocation", checkpointDir.getCanonicalPath)
           .format("delta")
-          .start(outputDir.getCanonicalPath)
+          .toTable(table)
       try {
 
         inputData.addData(1, 2, 3)
         failAfter(streamingTimeout) {
           query.processAllAvailable()
         }
 
-        val e = intercept[AnalysisException] {
+        val e = intercept[IllegalArgumentException] {
           spark.range(100)
             .select('id.cast("integer"), 'id % 4 as "by4", 'id.cast("integer") * 1000 as "value")
             .write
             .format("delta")
             .partitionBy("id", "by4")
             .mode("append")
-            .save(outputDir.getCanonicalPath)
+            .saveAsTable(table)
         }
-        assert(e.getMessage.contains("Partition columns do not match"))
+        assert(
+          e.getMessage.contains(
+            "The provided partitioning or clustering columns do not match the existing table's"))
 
       } finally {
         query.stop()
@@ -464,32 +480,31 @@ class DeltaSinkSuite
   }
 
   test("can't write out with all columns being partition columns") {
-    withTempDirs { (outputDir, checkpointDir) =>
+    withSinkTarget { (table, checkpointDir) =>
       val inputData = MemoryStream[Int]
       val ds = inputData.toDS()
-      val query =
+      // Name-based: creating a table partitioned by all of its columns is rejected up front (at
+      // table creation), rather than surfacing as a StreamingQueryException once the stream runs.
+      val e = intercept[AnalysisException] {
         ds.map(i => (i, i * 1000))
           .toDF("id", "value")
           .writeStream
           .partitionBy("id", "value")
           .option("checkpointLocation", checkpointDir.getCanonicalPath)
           .format("delta")
-          .start(outputDir.getCanonicalPath)
-      val e = intercept[StreamingQueryException] {
-        inputData.addData(1)
-        query.awaitTermination(30000)
+          .toTable(table)
       }
-      assert(e.cause.isInstanceOf[AnalysisException])
+      assert(e.getMessage.contains("Cannot use all columns for partition columns"))
     }
   }
 
   test("streaming write correctly sets isBlindAppend in CommitInfo") {
-    withTempDirs { (outputDir, checkpointDir) =>
+    withSinkTarget { (table, checkpointDir) =>
 
       val input = MemoryStream[Int]
       val inputDataStream = input.toDF().toDF("value")
 
-      def tableData: DataFrame = spark.read.format("delta").load(outputDir.toString)
+      def tableData: DataFrame = spark.read.table(table)
 
       def appendToTable(df: DataFrame): Unit = failAfter(streamingTimeout) {
         var q: StreamingQuery = null
@@ -498,7 +513,7 @@ class DeltaSinkSuite
           q = df.writeStream
             .format("delta")
             .option("checkpointLocation", checkpointDir.toString)
-            .start(outputDir.toString)
+            .toTable(table)
           q.processAllAvailable()
         } finally {
           if (q != null) q.stop()
@@ -507,7 +522,7 @@ class DeltaSinkSuite
 
       var lastCheckedVersion = -1L
       def isLastCommitBlindAppend: Boolean = {
-        val log = DeltaLog.forTable(spark, outputDir.toString)
+        val log = DeltaLog.forTable(spark, TableIdentifier(table))
         val lastVersion = log.update().version
         assert(lastVersion > lastCheckedVersion, "no new commit was made")
         lastCheckedVersion = lastVersion
@@ -538,48 +553,46 @@ class DeltaSinkSuite
         .add("i", IntegerType, nullable = false))
       .add("c", IntegerType, nullable = false)
 
-    withTempDir { base =>
-      val sourceDir = new File(base, "source").getCanonicalPath
-      val tableDir = new File(base, "output").getCanonicalPath
-      val chkDir = new File(base, "checkpoint").getCanonicalPath
-
-      FileUtils.write(new File(sourceDir, "a.json"), jsonRec)
+    withSinkTarget { (table, checkpointDir) =>
+      withTempDir { sourceDir =>
+        FileUtils.write(new File(sourceDir, "a.json"), jsonRec)
 
-      val q = spark.readStream
-        .format("json")
-        .schema(schema)
-        .load(sourceDir)
-        .withColumn("file", input_file_name()) // Not sure why needs this to reproduce
-        .writeStream
-        .format("delta")
-        .trigger(org.apache.spark.sql.streaming.Trigger.Once)
-        .option("checkpointLocation", chkDir)
-        .start(tableDir)
+        val q = spark.readStream
+          .format("json")
+          .schema(schema)
+          .load(sourceDir.getCanonicalPath)
+          .withColumn("file", input_file_name()) // Not sure why needs this to reproduce
+          .writeStream
+          .format("delta")
+          .trigger(org.apache.spark.sql.streaming.Trigger.Once)
+          .option("checkpointLocation", checkpointDir.getCanonicalPath)
+          .toTable(table)
 
-      q.awaitTermination()
+        q.awaitTermination()
 
-      checkAnswer(
-        spark.read.format("delta").load(tableDir).drop("file"),
-        Seq(Row("ss", Row("ss", null), null)))
+        checkAnswer(
+          spark.read.table(table).drop("file"),
+          Seq(Row("ss", Row("ss", null), null)))
+      }
     }
   }
 
   test("history includes user-defined metadata for DataFrame.writeStream API") {
     failAfter(streamingTimeout) {
-      withTempDirs { (outputDir, checkpointDir) =>
+      withSinkTarget { (table, checkpointDir) =>
         val inputData = MemoryStream[Int]
         val df = inputData.toDF()
         val query = df.writeStream
           .option("checkpointLocation", checkpointDir.getCanonicalPath)
           .option("userMetadata", "testMeta!")
           .format("delta")
-          .start(outputDir.getCanonicalPath)
-        val log = DeltaLog.forTable(spark, outputDir.getCanonicalPath)
+          .toTable(table)
+        val log = DeltaLog.forTable(spark, TableIdentifier(table))
 
         inputData.addData(1)
         query.processAllAvailable()
 
-        val lastCommitInfo = io.delta.tables.DeltaTable.forPath(spark, outputDir.getCanonicalPath)
+        val lastCommitInfo = io.delta.tables.DeltaTable.forName(spark, table)
             .history(1).as[DeltaHistory].head
 
         assert(lastCommitInfo.userMetadata === Some("testMeta!"))
@@ -625,7 +638,7 @@ class DeltaSinkSuite
 
   test("DeltaSink rejects DataFrame with UDT containing NullType") {
     failAfter(streamingTimeout) {
-      withTempDirs { (outputDir, checkpointDir) =>
+      withSinkTarget { (table, checkpointDir) =>
         val inputData = MemoryStream[Int]
         val ds = inputData.toDS()
         val dsWriter =
@@ -636,7 +649,7 @@ class DeltaSinkSuite
             .format("delta")
 
         val wrapperException = intercept[StreamingQueryException] {
-          val q = dsWriter.start(outputDir.getCanonicalPath)
+          val q = dsWriter.toTable(table)
           inputData.addData(42)
           q.processAllAvailable()
         }