diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R index 663e1b8f085c..dd675b40d09a 100644 --- a/r/R/dataset-write.R +++ b/r/R/dataset-write.R @@ -218,7 +218,12 @@ write_dataset <- function( existing_data_behavior_opts <- c("delete_matching", "overwrite", "error") existing_data_behavior <- match(match.arg(existing_data_behavior), existing_data_behavior_opts) - 1L - if (!missing(max_rows_per_file) && missing(max_rows_per_group) && max_rows_per_group > max_rows_per_file) { + if ( + !missing(max_rows_per_file) && + missing(max_rows_per_group) && + max_rows_per_file > 0 && + max_rows_per_group > max_rows_per_file + ) { max_rows_per_group <- max_rows_per_file } @@ -290,7 +295,12 @@ write_delim_dataset <- function( quote = c("needed", "all", "none"), preserve_order = FALSE ) { - if (!missing(max_rows_per_file) && missing(max_rows_per_group) && max_rows_per_group > max_rows_per_file) { + if ( + !missing(max_rows_per_file) && + missing(max_rows_per_group) && + max_rows_per_file > 0 && + max_rows_per_group > max_rows_per_file + ) { max_rows_per_group <- max_rows_per_file } @@ -343,7 +353,12 @@ write_csv_dataset <- function( quote = c("needed", "all", "none"), preserve_order = FALSE ) { - if (!missing(max_rows_per_file) && missing(max_rows_per_group) && max_rows_per_group > max_rows_per_file) { + if ( + !missing(max_rows_per_file) && + missing(max_rows_per_group) && + max_rows_per_file > 0 && + max_rows_per_group > max_rows_per_file + ) { max_rows_per_group <- max_rows_per_file } @@ -395,7 +410,12 @@ write_tsv_dataset <- function( quote = c("needed", "all", "none"), preserve_order = FALSE ) { - if (!missing(max_rows_per_file) && missing(max_rows_per_group) && max_rows_per_group > max_rows_per_file) { + if ( + !missing(max_rows_per_file) && + missing(max_rows_per_group) && + max_rows_per_file > 0 && + max_rows_per_group > max_rows_per_file + ) { max_rows_per_group <- max_rows_per_file } diff --git a/r/tests/testthat/test-dataset-write.R b/r/tests/testthat/test-dataset-write.R index d62b888163c8..8fed358dc372 100644 --- a/r/tests/testthat/test-dataset-write.R +++ b/r/tests/testthat/test-dataset-write.R @@ -576,6 +576,16 @@ test_that("max_rows_per_group is adjusted if at odds with max_rows_per_file", { ) }) +test_that("max_rows_per_file = 0 does not trigger max_rows_per_group adjustment (ARROW-40742)", { + skip_if_not_available("parquet") + + # max_rows_per_file = 0 means "no limit" and should not error + dst_dir <- make_temp_dir() + expect_no_error( + write_dataset(df1, dst_dir, max_rows_per_file = 0L) + ) +}) + test_that("write_dataset checks for format-specific arguments", { df <- tibble::tibble(