Speed up the "checkout" subcommand - git-wad - Manage files via git but not their content

commit 79c1d2a2bee2fc11c9d25c952296e509196f1f1b
parent 3cc05f446024ac5b7f648bb33cfc2d033e27af7b
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Mon, 18 Aug 2025 18:41:41 +0200

Speed up the "checkout" subcommand

Search for unresolved WAD files in the first bytes of the files to avoid
searching the entire data, which could take a long time for large files.

Diffstat:
M git-wad  | 18 +++++++++++++-----

1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/git-wad b/git-wad
@@ -562,15 +562,23 @@ checkout()
 
   #   Lists the files. Ensure verbatim filename (-z option)
   # | Ensure one entry per line (see the output section of git-ls-tree(1))
-  # | Protect all characters in preparation for using xargs
-  # | Retain only unresolved WAD file
   # | Restore content
     git ls-files -z "${working_tree}" \
   | tr__ '\0' '\n' \
-  | sed 's/./\\&/g' \
-  | xargs grep -le "^${GIT_WAD_HEADER} [0-9a-z]\{64\} [0-9]\{1,\}$"  \
   | while read -r i; do
-    restore "${i}"
+    # Search for the git-wad header only in the first few bytes of the
+    # file to avoid unnecessarily processing all of the data; the
+    # git-wad header appears at the very beginning of the file, and once
+    # restored, searching the entire file can take a long time because
+    # it can be very large. Thus, considering only a few bytes is not
+    # only reliable, but also significantly speeds up the "checkout"
+    # command.
+    bytes=$(dd if="${i}" bs=128 count=1 2> /dev/null | tr__ '\0' '0')
+    if printf '%s' "${bytes}" \
+     | grep -qe "^${GIT_WAD_HEADER} [0-9a-z]\{64\} [0-9]\{1,\}$"
+    then
+      restore "${i}"
+    fi
   done
 }

	git-wad Manage files via git but not their content
	git clone git://git.meso-star.fr/git-wad.git
	Log \| Files \| Refs \| README \| LICENSE