From 663fc583e90bb955624569ebb3e8cb5d58774add Mon Sep 17 00:00:00 2001 From: Riken Maharjan Date: Tue, 22 Jul 2025 18:49:45 +0000 Subject: [PATCH 1/7] testing with image --- .github/workflows/blob-store-extensions.txt | 93 +++++++++++++ .github/workflows/check-blob-store-files.yml | 136 +++++++++++++++++++ test.img | 0 3 files changed, 229 insertions(+) create mode 100644 .github/workflows/blob-store-extensions.txt create mode 100644 .github/workflows/check-blob-store-files.yml create mode 100644 test.img diff --git a/.github/workflows/blob-store-extensions.txt b/.github/workflows/blob-store-extensions.txt new file mode 100644 index 00000000000..a5960de1354 --- /dev/null +++ b/.github/workflows/blob-store-extensions.txt @@ -0,0 +1,93 @@ +# File extensions that should be stored in blob store instead of git repository +# Lines starting with # are comments and will be ignored +# Extensions should be lowercase without the leading dot + +# Source tarballs and archives +tar +gz +tgz +bz2 +xz +zip +rar +7z +tar.gz +tar.xz +tar.bz2 + +# Binary executables +bin +exe +dll +so +dylib +a +lib +obj +o +rpm + +# Image files +jpg +jpeg +png +gif +bmp +tiff +tif +svg +ico +webp +raw + +# Audio/Video files +mp3 +wav +avi +mp4 +mkv +mov +wmv +flv +ogg +m4a +aac + +# Document formats (binary) +pdf +doc +docx +xls +xlsx +ppt +pptx + +# Package files +rpm +deb +msi +pkg +dmg +iso + +# Compressed source packages +gem +whl +egg + +# Database files +db +sqlite +sqlite3 + +# Fonts +ttf +otf +woff +woff2 + +# Other binary formats +jar +war +ear +class \ No newline at end of file diff --git a/.github/workflows/check-blob-store-files.yml b/.github/workflows/check-blob-store-files.yml new file mode 100644 index 00000000000..dcc283f984f --- /dev/null +++ b/.github/workflows/check-blob-store-files.yml @@ -0,0 +1,136 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +name: Check Blob Store Files + +on: + push: + branches: [main, 2.0*, 3.0*, fasttrack/*] + pull_request: + branches: [main, 2.0*, 3.0*, fasttrack/*] + +jobs: + + build: + name: Check Blob Store Files + runs-on: ubuntu-latest + steps: + + - name: Check out code + uses: actions/checkout@v4 + + - name: Get base commit for PRs + if: ${{ github.event_name == 'pull_request' }} + run: | + git fetch origin ${{ github.base_ref }} + echo "base_sha=$(git rev-parse origin/${{ github.base_ref }})" >> $GITHUB_ENV + echo "Merging ${{ github.sha }} into ${{ github.base_ref }}" + + - name: Get base commit for Pushes + if: ${{ github.event_name == 'push' }} + run: | + git fetch origin ${{ github.event.before }} + echo "base_sha=${{ github.event.before }}" >> $GITHUB_ENV + echo "Merging ${{ github.sha }} into ${{ github.event.before }}" + + - name: Get the changed files + run: | + echo "Files changed: '$(git diff-tree --no-commit-id --name-only -r ${{ env.base_sha }} ${{ github.sha }})'" + changed_files=$(git diff-tree --diff-filter=AM --no-commit-id --name-only -r ${{ env.base_sha }} ${{ github.sha }}) + echo "Files to validate: '${changed_files}'" + echo "changed-files<> $GITHUB_ENV + echo "${changed_files}" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + - name: Check for disallowed file types + run: | + if [[ -z "${{ env.changed-files }}" ]]; then + echo "No files to validate. Exiting." + exit 0 + fi + + echo "Checking files..." + error_found=0 + + # Read disallowed extensions from the configuration file + if [[ ! -f ".github/workflows/blob-store-extensions.txt" ]]; then + echo "Configuration file '.github/workflows/blob-store-extensions.txt' not found. Skipping check." + exit 0 + fi + + # Create array of disallowed extensions + mapfile -t disallowed_extensions < .github/workflows/blob-store-extensions.txt + + # Check each changed file + while IFS= read -r file; do + if [[ -z "$file" ]]; then + continue + fi + + echo "Checking file: $file" + + # Get file extension (convert to lowercase for comparison) + extension=$(echo "${file##*.}" | tr '[:upper:]' '[:lower:]') + filename=$(basename "$file") + + # Check if file should be in blob store + should_be_in_blob_store=false + + # Check against disallowed extensions + for disallowed_ext in "${disallowed_extensions[@]}"; do + # Remove any whitespace and comments + clean_ext=$(echo "$disallowed_ext" | sed 's/#.*//' | xargs) + if [[ -z "$clean_ext" ]]; then + continue + fi + + if [[ "$extension" == "$clean_ext" ]]; then + should_be_in_blob_store=true + break + fi + done + + # Additional checks for binary files and large files + if [[ -f "$file" ]]; then + # Check if file is binary + if file "$file" | grep -q "binary\|executable\|archive\|compressed"; then + should_be_in_blob_store=true + fi + + # Check file size (files > 1MB should be in blob store) + file_size=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo 0) + if [[ $file_size -gt 1048576 ]]; then # 1MB + should_be_in_blob_store=true + fi + fi + + if [[ "$should_be_in_blob_store" == "true" ]]; then + 1>&2 echo "**** ERROR ****" + 1>&2 echo "File '$file' should be stored in blob store, not in git repository." + 1>&2 echo "Reason: Large files, binaries, tarballs, and non-text files slow down git operations" + 1>&2 echo "and cannot be efficiently diffed. Please upload to blob store instead." + 1>&2 echo "**** ERROR ****" + error_found=1 + fi + done <<< "${{ env.changed-files }}" + + if [[ $error_found -eq 1 ]]; then + echo "" + echo "==========================================" + echo "FILES THAT SHOULD BE IN BLOB STORE DETECTED" + echo "==========================================" + echo "The following file types should be stored in blob store:" + echo "- Source tarballs (.tar.gz, .tar.xz, .zip, etc.)" + echo "- Binary files (.bin, .exe, .so, .dll, etc.)" + echo "- Images (.jpg, .png, .gif, .bmp, etc.)" + echo "- Archives (.rar, .7z, .tar, etc.)" + echo "- Large files (> 1MB)" + echo "- Any non-text files that cannot be efficiently diffed" + echo "" + echo "Please upload these files to the blob store and reference them" + echo "in your spec files or configuration instead of checking them into git." + echo "==========================================" + exit 1 + fi + + echo "All files are appropriate for git storage." \ No newline at end of file diff --git a/test.img b/test.img new file mode 100644 index 00000000000..e69de29bb2d From 5bbf7ae8ffa254d99416d6ab7ae4471f6cb7556f Mon Sep 17 00:00:00 2001 From: Riken Maharjan Date: Tue, 22 Jul 2025 19:48:51 +0000 Subject: [PATCH 2/7] image test --- test.img => test.jpg | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test.img => test.jpg (100%) diff --git a/test.img b/test.jpg similarity index 100% rename from test.img rename to test.jpg From 81d939292805c9bbe3c9375074a40d969cc26e57 Mon Sep 17 00:00:00 2001 From: Riken Maharjan Date: Tue, 22 Jul 2025 20:07:59 +0000 Subject: [PATCH 3/7] remove docs --- .github/workflows/blob-store-extensions.txt | 12 ++---------- .github/workflows/check-blob-store-files.yml | 8 ++++++-- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/.github/workflows/blob-store-extensions.txt b/.github/workflows/blob-store-extensions.txt index a5960de1354..dcc487de54d 100644 --- a/.github/workflows/blob-store-extensions.txt +++ b/.github/workflows/blob-store-extensions.txt @@ -25,7 +25,6 @@ a lib obj o -rpm # Image files jpg @@ -39,6 +38,8 @@ svg ico webp raw +heif + # Audio/Video files mp3 @@ -53,15 +54,6 @@ ogg m4a aac -# Document formats (binary) -pdf -doc -docx -xls -xlsx -ppt -pptx - # Package files rpm deb diff --git a/.github/workflows/check-blob-store-files.yml b/.github/workflows/check-blob-store-files.yml index dcc283f984f..0eaf526c32d 100644 --- a/.github/workflows/check-blob-store-files.yml +++ b/.github/workflows/check-blob-store-files.yml @@ -60,7 +60,11 @@ jobs: # Create array of disallowed extensions mapfile -t disallowed_extensions < .github/workflows/blob-store-extensions.txt - + if [[ $? -ne 0 ]]; then + echo "Error occurred while reading disallowed extensions. Exiting." + exit 1 + fi + # Check each changed file while IFS= read -r file; do if [[ -z "$file" ]]; then @@ -107,7 +111,7 @@ jobs: if [[ "$should_be_in_blob_store" == "true" ]]; then 1>&2 echo "**** ERROR ****" 1>&2 echo "File '$file' should be stored in blob store, not in git repository." - 1>&2 echo "Reason: Large files, binaries, tarballs, and non-text files slow down git operations" + 1>&2 echo "Reason: Images, Large files, binaries, tarballs, and non-text files slow down git operations" 1>&2 echo "and cannot be efficiently diffed. Please upload to blob store instead." 1>&2 echo "**** ERROR ****" error_found=1 From 4865f5477a701d344e4f4fb71ba8ac08bc90361c Mon Sep 17 00:00:00 2001 From: Riken Maharjan Date: Tue, 22 Jul 2025 20:08:59 +0000 Subject: [PATCH 4/7] remove test files --- test.jpg | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test.jpg diff --git a/test.jpg b/test.jpg deleted file mode 100644 index e69de29bb2d..00000000000 From 71ddd5efd623674094680ccae98f9bdf6135e2fd Mon Sep 17 00:00:00 2001 From: Riken Maharjan Date: Tue, 22 Jul 2025 20:14:11 +0000 Subject: [PATCH 5/7] change name --- .../{check-blob-store-files.yml => check-files.yml} | 10 +++++----- ...-store-extensions.txt => disallowed-extensions.txt} | 0 2 files changed, 5 insertions(+), 5 deletions(-) rename .github/workflows/{check-blob-store-files.yml => check-files.yml} (94%) rename .github/workflows/{blob-store-extensions.txt => disallowed-extensions.txt} (100%) diff --git a/.github/workflows/check-blob-store-files.yml b/.github/workflows/check-files.yml similarity index 94% rename from .github/workflows/check-blob-store-files.yml rename to .github/workflows/check-files.yml index 0eaf526c32d..9374cfd40f8 100644 --- a/.github/workflows/check-blob-store-files.yml +++ b/.github/workflows/check-files.yml @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -name: Check Blob Store Files +name: Check Disallowed Files on: push: @@ -12,7 +12,7 @@ on: jobs: build: - name: Check Blob Store Files + name: Check Disallowed Files runs-on: ubuntu-latest steps: @@ -53,13 +53,13 @@ jobs: error_found=0 # Read disallowed extensions from the configuration file - if [[ ! -f ".github/workflows/blob-store-extensions.txt" ]]; then - echo "Configuration file '.github/workflows/blob-store-extensions.txt' not found. Skipping check." + if [[ ! -f ".github/workflows/disallowed-extensions.txt" ]]; then + echo "Configuration file '.github/workflows/disallowed-extensions.txt' not found. Skipping check." exit 0 fi # Create array of disallowed extensions - mapfile -t disallowed_extensions < .github/workflows/blob-store-extensions.txt + mapfile -t disallowed_extensions < .github/workflows/disallowed-extensions.txt if [[ $? -ne 0 ]]; then echo "Error occurred while reading disallowed extensions. Exiting." exit 1 diff --git a/.github/workflows/blob-store-extensions.txt b/.github/workflows/disallowed-extensions.txt similarity index 100% rename from .github/workflows/blob-store-extensions.txt rename to .github/workflows/disallowed-extensions.txt From e158784fcb859054f0611018e698cfb09ca1700a Mon Sep 17 00:00:00 2001 From: Riken Maharjan Date: Wed, 23 Jul 2025 16:12:09 +0000 Subject: [PATCH 6/7] address PR comment --- .github/workflows/disallowed-extensions.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/disallowed-extensions.txt b/.github/workflows/disallowed-extensions.txt index dcc487de54d..8867d5d22a4 100644 --- a/.github/workflows/disallowed-extensions.txt +++ b/.github/workflows/disallowed-extensions.txt @@ -27,15 +27,10 @@ obj o # Image files -jpg -jpeg -png gif bmp tiff tif -svg -ico webp raw heif From 4d9e13f2759689551b80a8fcaf126902316a4de7 Mon Sep 17 00:00:00 2001 From: Riken Maharjan Date: Wed, 23 Jul 2025 16:17:44 +0000 Subject: [PATCH 7/7] address comment --- .github/workflows/check-files.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-files.yml b/.github/workflows/check-files.yml index 9374cfd40f8..8839fc4dada 100644 --- a/.github/workflows/check-files.yml +++ b/.github/workflows/check-files.yml @@ -126,7 +126,7 @@ jobs: echo "The following file types should be stored in blob store:" echo "- Source tarballs (.tar.gz, .tar.xz, .zip, etc.)" echo "- Binary files (.bin, .exe, .so, .dll, etc.)" - echo "- Images (.jpg, .png, .gif, .bmp, etc.)" + echo "- Images (.gif, .bmp, etc.)" echo "- Archives (.rar, .7z, .tar, etc.)" echo "- Large files (> 1MB)" echo "- Any non-text files that cannot be efficiently diffed"