Skip to content

Commit 916ea7e

Browse files
committed
_internal(feat[copy]): Add copytree_reflink for CoW-optimized copying
why: Improve test fixture performance on CoW filesystems (Btrfs, XFS, APFS) by leveraging reflink copying which only copies metadata instead of data. what: - Add copytree_reflink() using cp --reflink=auto with shutil fallback - Add _apply_ignore_patterns() for post-copy cleanup of ignored files
1 parent d4688b9 commit 916ea7e

File tree

1 file changed

+124
-0
lines changed

1 file changed

+124
-0
lines changed

src/libvcs/_internal/copy.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
"""Copy utilities with reflink (copy-on-write) support.
2+
3+
This module provides optimized directory copy operations that leverage
4+
filesystem-level copy-on-write (CoW) when available, with automatic
5+
fallback to standard copying on unsupported filesystems.
6+
7+
On Btrfs, XFS, and APFS filesystems, reflink copies are significantly faster
8+
as they only copy metadata - the actual data blocks are shared until modified.
9+
On ext4 and other filesystems, `cp --reflink=auto` silently falls back to
10+
regular copying with no performance penalty.
11+
"""
12+
13+
from __future__ import annotations
14+
15+
import os
16+
import pathlib
17+
import shutil
18+
import subprocess
19+
import typing as t
20+
21+
22+
def copytree_reflink(
23+
src: pathlib.Path,
24+
dst: pathlib.Path,
25+
ignore: t.Callable[..., t.Any] | None = None,
26+
) -> pathlib.Path:
27+
"""Copy directory tree using reflink (CoW) if available, fallback to copytree.
28+
29+
On Btrfs/XFS/APFS, this is significantly faster as it only copies metadata.
30+
On ext4 and other filesystems, `cp --reflink=auto` silently falls back to
31+
regular copy.
32+
33+
Parameters
34+
----------
35+
src : pathlib.Path
36+
Source directory to copy.
37+
dst : pathlib.Path
38+
Destination directory (must not exist).
39+
ignore : callable, optional
40+
Passed to shutil.copytree for fallback. For cp, patterns are applied
41+
after copy by deleting ignored files.
42+
43+
Returns
44+
-------
45+
pathlib.Path
46+
The destination path.
47+
48+
Examples
49+
--------
50+
>>> import pathlib
51+
>>> src = tmp_path / "source"
52+
>>> src.mkdir()
53+
>>> (src / "file.txt").write_text("hello")
54+
5
55+
>>> dst = tmp_path / "dest"
56+
>>> result = copytree_reflink(src, dst)
57+
>>> (result / "file.txt").read_text()
58+
'hello'
59+
60+
With ignore patterns:
61+
62+
>>> import shutil
63+
>>> src2 = tmp_path / "source2"
64+
>>> src2.mkdir()
65+
>>> (src2 / "keep.txt").write_text("keep")
66+
4
67+
>>> (src2 / "skip.pyc").write_text("skip")
68+
4
69+
>>> dst2 = tmp_path / "dest2"
70+
>>> result2 = copytree_reflink(src2, dst2, ignore=shutil.ignore_patterns("*.pyc"))
71+
>>> (result2 / "keep.txt").exists()
72+
True
73+
>>> (result2 / "skip.pyc").exists()
74+
False
75+
"""
76+
dst.parent.mkdir(parents=True, exist_ok=True)
77+
78+
try:
79+
# Try cp --reflink=auto (Linux) - silent fallback on unsupported FS
80+
subprocess.run(
81+
["cp", "-a", "--reflink=auto", str(src), str(dst)],
82+
check=True,
83+
capture_output=True,
84+
timeout=60,
85+
)
86+
except (subprocess.CalledProcessError, FileNotFoundError, OSError):
87+
# Fallback to shutil.copytree (Windows, cp not found, etc.)
88+
return pathlib.Path(shutil.copytree(src, dst, ignore=ignore))
89+
else:
90+
# cp succeeded - apply ignore patterns if needed
91+
if ignore is not None:
92+
_apply_ignore_patterns(dst, ignore)
93+
return dst
94+
95+
96+
def _apply_ignore_patterns(
97+
dst: pathlib.Path,
98+
ignore: t.Callable[[str, list[str]], t.Iterable[str]],
99+
) -> None:
100+
"""Remove files matching ignore patterns after cp --reflink copy.
101+
102+
This function walks the destination directory and removes any files or
103+
directories that match the ignore patterns. This is necessary because
104+
`cp` doesn't support ignore patterns directly.
105+
106+
Parameters
107+
----------
108+
dst : pathlib.Path
109+
Destination directory to clean up.
110+
ignore : callable
111+
A callable that takes (directory, names) and returns names to ignore.
112+
Compatible with shutil.ignore_patterns().
113+
"""
114+
for root, dirs, files in os.walk(dst, topdown=True):
115+
root_path = pathlib.Path(root)
116+
ignored = set(ignore(root, dirs + files))
117+
for name in ignored:
118+
target = root_path / name
119+
if target.is_dir():
120+
shutil.rmtree(target)
121+
elif target.exists():
122+
target.unlink()
123+
# Modify dirs in-place to skip ignored directories during walk
124+
dirs[:] = [d for d in dirs if d not in ignored]

0 commit comments

Comments
 (0)