diff --git a/src/module_writer/mod.rs b/src/module_writer/mod.rs index 9cf136fe5..5600df69a 100644 --- a/src/module_writer/mod.rs +++ b/src/module_writer/mod.rs @@ -10,7 +10,7 @@ use ignore::WalkBuilder; use indexmap::IndexMap; use itertools::Itertools as _; use normpath::PathExt as _; -use tracing::{debug, trace}; +use tracing::{debug, warn}; use crate::Metadata24; use crate::PyProjectToml; @@ -19,6 +19,7 @@ use crate::archive_source::FileSourceData; use crate::archive_source::GeneratedSourceData; use crate::project_layout::ProjectLayout; use crate::pyproject_toml::Format; +use crate::util::is_symlink_loop_error; pub(crate) mod glob; #[cfg(test)] @@ -132,48 +133,42 @@ pub fn write_python_part( python_packages.push(package_path); } - for absolute in WalkBuilder::new(&project_layout.project_root) - .hidden(false) - .parents(false) - .git_global(false) - .git_exclude(false) - .build() - { - let absolute = match absolute { - Ok(entry) => entry.into_path(), - Err(err) => { - // Skip errors for paths that don't need to be included, e.g. for directories - // that we don't have permissions for. - if let ignore::Error::WithPath { path, .. } = &err - && !python_packages.iter().any(|pkg| path.starts_with(pkg)) - { - // Log priority logging, we're only looking at the directory at all due to - // a particularity in how we're doing path traversal. - trace!( - "Skipping inaccessible path {} due to read error: {err}", - path.display() - ); + // Walk each python package directly (rather than rooting the walker at + // `project_root` and filtering): this keeps `follow_links(true)` scoped to + // symlinks inside python packages, instead of also descending into unrelated + // symlinked trees under the project (e.g. inside `target/` or `.venv/`). + for package in &python_packages { + for absolute in WalkBuilder::new(package) + .hidden(false) + .parents(false) + .git_global(false) + .git_exclude(false) + .follow_links(true) + .build() + { + let absolute = match absolute { + Ok(entry) => entry.into_path(), + Err(err) => { + if is_symlink_loop_error(&err) { + warn!( + "Skipping symlink loop in Python package source tree while building wheel: {err}" + ); + continue; + } + return Err(err.into()); + } + }; + let relative = absolute.strip_prefix(python_dir).unwrap(); + if !absolute.is_dir() { + if is_develop_build_artifact(relative, &project_layout.extension_name) { + debug!("Ignoring develop build artifact {}", relative.display()); continue; } - return Err(err.into()); - } - }; - if !python_packages - .iter() - .any(|path| absolute.starts_with(path)) - { - continue; - } - let relative = absolute.strip_prefix(python_dir).unwrap(); - if !absolute.is_dir() { - if is_develop_build_artifact(relative, &project_layout.extension_name) { - debug!("Ignoring develop build artifact {}", relative.display()); - continue; + let mode = file_permission_mode(&absolute)?; + writer + .add_file(relative, &absolute, permission_is_executable(mode)) + .context(format!("Failed to add file from {}", absolute.display()))?; } - let mode = file_permission_mode(&absolute)?; - writer - .add_file(relative, &absolute, permission_is_executable(mode)) - .context(format!("Failed to add file from {}", absolute.display()))?; } } diff --git a/src/source_distribution/pyproject.rs b/src/source_distribution/pyproject.rs index 0e0cb6fef..32937b78b 100644 --- a/src/source_distribution/pyproject.rs +++ b/src/source_distribution/pyproject.rs @@ -1,11 +1,12 @@ use crate::pyproject_toml::Format; +use crate::util::is_symlink_loop_error; use crate::{ModuleWriter, PyProjectToml, SDistWriter, VirtualWriter}; use anyhow::{Context, Result}; use path_slash::PathExt as _; use pyproject_toml::check_pep639_glob; use std::collections::HashSet; use std::path::{Path, PathBuf}; -use tracing::{debug, trace}; +use tracing::{debug, trace, warn}; use super::SdistContext; use super::cargo_toml_rewrite::parse_toml_file; @@ -72,8 +73,19 @@ pub(super) fn add_python_sources( } for package in python_packages { - for entry in ignore::Walk::new(package) { - let source = entry?.into_path(); + for entry in ignore::WalkBuilder::new(package).follow_links(true).build() { + let source = match entry { + Ok(entry) => entry.into_path(), + Err(err) => { + if is_symlink_loop_error(&err) { + warn!( + "Skipping symlink loop in Python package source tree while building source distribution: {err}" + ); + continue; + } + return Err(err.into()); + } + }; if is_compiled_artifact(&source) { debug!("Ignoring {}", source.display()); continue; diff --git a/src/util.rs b/src/util.rs index 897e61d20..d8d62cf3f 100644 --- a/src/util.rs +++ b/src/util.rs @@ -5,6 +5,17 @@ use zip::DateTime; use fs_err as fs; +pub(crate) fn is_symlink_loop_error(error: &ignore::Error) -> bool { + match error { + ignore::Error::Loop { .. } => true, + ignore::Error::Partial(errors) => errors.iter().any(is_symlink_loop_error), + ignore::Error::WithLineNumber { err, .. } + | ignore::Error::WithPath { err, .. } + | ignore::Error::WithDepth { err, .. } => is_symlink_loop_error(err), + _ => false, + } +} + pub(crate) fn sha256_hex(hash: &[u8]) -> String { use std::fmt::Write; diff --git a/tests/common/other.rs b/tests/common/other.rs index de7d46fdf..0380b4099 100644 --- a/tests/common/other.rs +++ b/tests/common/other.rs @@ -47,6 +47,34 @@ pub fn copy_dir_recursive(src: &Path, dst: &Path) -> std::io::Result<()> { Ok(()) } +#[cfg(unix)] +pub fn copy_pyo3_mixed_py_subdir_with_symlinks() -> Result<(tempfile::TempDir, PathBuf)> { + use std::os::unix::fs::symlink; + + let temp_dir = tempfile::tempdir()?; + let project_dir = temp_dir.path().join("pyo3-mixed-py-subdir"); + copy_dir_recursive(Path::new("test-crates/pyo3-mixed-py-subdir"), &project_dir)?; + + let external_sources = temp_dir.path().join("external-python"); + let linked_dir = external_sources.join("linked_dir"); + fs_err::create_dir_all(&linked_dir)?; + fs_err::write(external_sources.join("linked_file.py"), "VALUE = 1\n")?; + fs_err::write(linked_dir.join("nested.py"), "VALUE = 2\n")?; + symlink(&linked_dir, linked_dir.join("cycle"))?; + + let package_dir = project_dir + .join("python") + .join("pyo3_mixed_py_subdir") + .join("python_module"); + symlink( + external_sources.join("linked_file.py"), + package_dir.join("linked_file.py"), + )?; + symlink(&linked_dir, package_dir.join("linked_dir"))?; + + Ok((temp_dir, project_dir)) +} + /// Tries to compile a sample crate (pyo3-pure) for musl, /// given that rustup and the the musl target are installed /// diff --git a/tests/run/sdist.rs b/tests/run/sdist.rs index 95189e7f4..0aaaa1bbb 100644 --- a/tests/run/sdist.rs +++ b/tests/run/sdist.rs @@ -3,6 +3,7 @@ use expect_test::expect; use indoc::indoc; use maturin::pyproject_toml::SdistGenerator; use maturin::{BuildOptions, BuildOrchestrator, CargoOptions, OutputOptions, unpack_sdist}; +use std::collections::BTreeSet; use std::path::Path; use std::process::Command; use url::Url; @@ -1112,3 +1113,34 @@ fn workspace_license_files() { Some((Path::new("hello_world-0.1.0/Cargo.toml"), cargo_toml)), ) } + +#[test] +#[cfg(unix)] +fn pyo3_mixed_py_subdir_includes_symlinked_python_files_sdist() { + handle_result((|| -> anyhow::Result<()> { + let (_temp_dir, project_dir) = other::copy_pyo3_mixed_py_subdir_with_symlinks()?; + + let mut archive = other::build_source_distribution( + &project_dir, + SdistGenerator::Cargo, + "sdist-pyo3-mixed-py-subdir-symlinks", + )?; + let mut files = BTreeSet::new(); + for entry in archive.entries()? { + let entry = entry?; + files.insert(format!("{}", entry.path()?.display())); + } + + for expected in [ + "pyo3_mixed_py_subdir-2.1.3/python/pyo3_mixed_py_subdir/python_module/linked_file.py", + "pyo3_mixed_py_subdir-2.1.3/python/pyo3_mixed_py_subdir/python_module/linked_dir/nested.py", + ] { + assert!( + files.contains(expected), + "expected `{expected}` in sdist, got {files:#?}" + ); + } + + Ok(()) + })()); +} diff --git a/tests/run/wheel.rs b/tests/run/wheel.rs index 520b7a089..59c51eac3 100644 --- a/tests/run/wheel.rs +++ b/tests/run/wheel.rs @@ -57,6 +57,37 @@ fn pyo3_mixed_py_subdir_include_wheel_files() { )) } +#[test] +#[cfg(unix)] +fn pyo3_mixed_py_subdir_includes_symlinked_python_files() { + handle_result((|| { + let (_temp_dir, project_dir) = other::copy_pyo3_mixed_py_subdir_with_symlinks()?; + + let mut expected = vec![ + "assets/extra_data.txt", + "pyo3_mixed_py_subdir-2.1.3.dist-info/METADATA", + "pyo3_mixed_py_subdir-2.1.3.dist-info/RECORD", + "pyo3_mixed_py_subdir-2.1.3.dist-info/WHEEL", + "pyo3_mixed_py_subdir-2.1.3.dist-info/entry_points.txt", + "pyo3_mixed_py_subdir/__init__.py", + "pyo3_mixed_py_subdir/python_module/__init__.py", + "pyo3_mixed_py_subdir/python_module/double.py", + "pyo3_mixed_py_subdir/python_module/linked_dir/nested.py", + "pyo3_mixed_py_subdir/python_module/linked_file.py", + ]; + #[cfg(feature = "sbom")] + expected + .push("pyo3_mixed_py_subdir-2.1.3.dist-info/sboms/pyo3-mixed-py-subdir.cyclonedx.json"); + + assert_eq!( + other::wheel_files(&project_dir, "wheel-files-pyo3-mixed-py-subdir-symlinks")?, + expected.into_iter().map(str::to_owned).collect() + ); + + Ok(()) + })()) +} + #[test] fn pyo3_wheel_record_has_normalized_paths() { handle_result(other::check_wheel_paths(