Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions .github/workflows/check-files.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

name: Check Disallowed Files

on:
push:
branches: [main, 2.0*, 3.0*, fasttrack/*]
pull_request:
branches: [main, 2.0*, 3.0*, fasttrack/*]

jobs:

build:
name: Check Disallowed Files
runs-on: ubuntu-latest
steps:

- name: Check out code
uses: actions/checkout@v4

- name: Get base commit for PRs
if: ${{ github.event_name == 'pull_request' }}
run: |
git fetch origin ${{ github.base_ref }}
echo "base_sha=$(git rev-parse origin/${{ github.base_ref }})" >> $GITHUB_ENV
echo "Merging ${{ github.sha }} into ${{ github.base_ref }}"

- name: Get base commit for Pushes
if: ${{ github.event_name == 'push' }}
run: |
git fetch origin ${{ github.event.before }}
echo "base_sha=${{ github.event.before }}" >> $GITHUB_ENV
echo "Merging ${{ github.sha }} into ${{ github.event.before }}"

- name: Get the changed files
run: |
echo "Files changed: '$(git diff-tree --no-commit-id --name-only -r ${{ env.base_sha }} ${{ github.sha }})'"
changed_files=$(git diff-tree --diff-filter=AM --no-commit-id --name-only -r ${{ env.base_sha }} ${{ github.sha }})
echo "Files to validate: '${changed_files}'"
echo "changed-files<<EOF" >> $GITHUB_ENV
echo "${changed_files}" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV

- name: Check for disallowed file types
run: |
if [[ -z "${{ env.changed-files }}" ]]; then
echo "No files to validate. Exiting."
exit 0
fi

echo "Checking files..."
error_found=0

# Read disallowed extensions from the configuration file
if [[ ! -f ".github/workflows/disallowed-extensions.txt" ]]; then
echo "Configuration file '.github/workflows/disallowed-extensions.txt' not found. Skipping check."
exit 0
fi

# Create array of disallowed extensions
mapfile -t disallowed_extensions < .github/workflows/disallowed-extensions.txt
if [[ $? -ne 0 ]]; then
echo "Error occurred while reading disallowed extensions. Exiting."
exit 1
fi

# Check each changed file
while IFS= read -r file; do
if [[ -z "$file" ]]; then
continue
fi

echo "Checking file: $file"

# Get file extension (convert to lowercase for comparison)
extension=$(echo "${file##*.}" | tr '[:upper:]' '[:lower:]')
filename=$(basename "$file")

# Check if file should be in blob store
should_be_in_blob_store=false

# Check against disallowed extensions
for disallowed_ext in "${disallowed_extensions[@]}"; do
# Remove any whitespace and comments
clean_ext=$(echo "$disallowed_ext" | sed 's/#.*//' | xargs)
if [[ -z "$clean_ext" ]]; then
continue
fi

if [[ "$extension" == "$clean_ext" ]]; then
should_be_in_blob_store=true
break
fi
done

# Additional checks for binary files and large files
if [[ -f "$file" ]]; then
# Check if file is binary
if file "$file" | grep -q "binary\|executable\|archive\|compressed"; then
should_be_in_blob_store=true
fi

# Check file size (files > 1MB should be in blob store)
file_size=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo 0)
if [[ $file_size -gt 1048576 ]]; then # 1MB
should_be_in_blob_store=true
fi
fi

if [[ "$should_be_in_blob_store" == "true" ]]; then
1>&2 echo "**** ERROR ****"
1>&2 echo "File '$file' should be stored in blob store, not in git repository."
1>&2 echo "Reason: Images, Large files, binaries, tarballs, and non-text files slow down git operations"
1>&2 echo "and cannot be efficiently diffed. Please upload to blob store instead."
1>&2 echo "**** ERROR ****"
error_found=1
fi
done <<< "${{ env.changed-files }}"

if [[ $error_found -eq 1 ]]; then
echo ""
echo "=========================================="
echo "FILES THAT SHOULD BE IN BLOB STORE DETECTED"
echo "=========================================="
echo "The following file types should be stored in blob store:"
echo "- Source tarballs (.tar.gz, .tar.xz, .zip, etc.)"
echo "- Binary files (.bin, .exe, .so, .dll, etc.)"
echo "- Images (.gif, .bmp, etc.)"
echo "- Archives (.rar, .7z, .tar, etc.)"
echo "- Large files (> 1MB)"
echo "- Any non-text files that cannot be efficiently diffed"
echo ""
echo "Please upload these files to the blob store and reference them"
echo "in your spec files or configuration instead of checking them into git."
echo "=========================================="
exit 1
fi

echo "All files are appropriate for git storage."
80 changes: 80 additions & 0 deletions .github/workflows/disallowed-extensions.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# File extensions that should be stored in blob store instead of git repository
# Lines starting with # are comments and will be ignored
# Extensions should be lowercase without the leading dot

# Source tarballs and archives
tar
gz
tgz
bz2
xz
zip
rar
7z
tar.gz
tar.xz
tar.bz2

# Binary executables
bin
exe
dll
so
dylib
a
lib
obj
o

# Image files
Comment thread
rikenm1 marked this conversation as resolved.
gif
bmp
tiff
tif
webp
raw
heif


# Audio/Video files
mp3
wav
avi
mp4
mkv
mov
wmv
flv
ogg
m4a
aac

# Package files
rpm
deb
msi
pkg
dmg
iso

# Compressed source packages
gem
whl
egg

# Database files
db
sqlite
sqlite3

# Fonts
ttf
otf
woff
woff2

# Other binary formats
jar
war
ear
class
Loading