From 047113fae424d989c52e3ae0caff389b8fd55dd3 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Thu, 1 Jan 2026 09:56:39 +0200 Subject: [PATCH] [skip actions] [hsqs] 2026-01-01T09:56:39+02:00 --- .github/workflows/check.yml | 2 +- .mypy.ini | 3 + credsweeper/deep_scanner/deep_scanner.py | 5 + credsweeper/deep_scanner/squashfs_scanner.py | 52 +++++ credsweeper/secret/config.json | 4 +- credsweeper/utils/util.py | 10 + pyproject.toml | 1 + requirements.txt | 1 + tests/__init__.py | 4 +- tests/data/depth_3_pedantic.json | 201 +++++++++++++++++++ tests/samples/sample.hsqs | Bin 0 -> 4096 bytes tests/test_app.py | 2 +- 12 files changed, 279 insertions(+), 6 deletions(-) create mode 100644 credsweeper/deep_scanner/squashfs_scanner.py create mode 100644 tests/samples/sample.hsqs diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index da17b3df7..5d0634ad5 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -92,7 +92,7 @@ jobs: run: | banner="$(python -m credsweeper --banner | head -1)" echo "banner = '${banner}'" - if [ "CredSweeper 1.14.2 crc32:36ab773c" != "${banner}" ]; then + if [ "CredSweeper 1.14.2 crc32:99a7460a" != "${banner}" ]; then echo "Update the check for '${banner}'" exit 1 fi diff --git a/.mypy.ini b/.mypy.ini index a945ec9d3..2b7b40703 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -58,3 +58,6 @@ ignore_missing_imports = True [mypy-striprtf.*] ignore_missing_imports = True + +[mypy-PySquashfsImage.*] +ignore_missing_imports = True diff --git a/credsweeper/deep_scanner/deep_scanner.py b/credsweeper/deep_scanner/deep_scanner.py index 12e74fe3b..5a3b9bbb7 100644 --- a/credsweeper/deep_scanner/deep_scanner.py +++ b/credsweeper/deep_scanner/deep_scanner.py @@ -25,6 +25,7 @@ from .rpm_scanner import RpmScanner from .rtf_scanner import RtfScanner from .sqlite3_scanner import Sqlite3Scanner +from .squashfs_scanner import SquashfsScanner from .strings_scanner import StringsScanner from .tar_scanner import TarScanner from .tmx_scanner import TmxScanner @@ -54,6 +55,7 @@ class DeepScanner( PptxScanner, # RtfScanner, # RpmScanner, # + SquashfsScanner, # Sqlite3Scanner, # StringsScanner, # TarScanner, # @@ -136,6 +138,9 @@ def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[ elif Util.is_sqlite3(data): if 0 < depth: deep_scanners.append(Sqlite3Scanner) + elif Util.is_squashfs(data): + if 0 < depth: + deep_scanners.append(SquashfsScanner) elif Util.is_asn1(data): deep_scanners.append(PkcsScanner) elif Util.is_rtf(data): diff --git a/credsweeper/deep_scanner/squashfs_scanner.py b/credsweeper/deep_scanner/squashfs_scanner.py new file mode 100644 index 000000000..42fa61b9f --- /dev/null +++ b/credsweeper/deep_scanner/squashfs_scanner.py @@ -0,0 +1,52 @@ +import logging +from abc import ABC +from typing import List, Optional + +from PySquashfsImage import SquashFsImage + +from credsweeper.credentials.candidate import Candidate +from credsweeper.deep_scanner.abstract_scanner import AbstractScanner +from credsweeper.file_handler.data_content_provider import DataContentProvider +from credsweeper.file_handler.file_path_extractor import FilePathExtractor +from credsweeper.utils.util import Util + +logger = logging.getLogger(__name__) + + +class SquashfsScanner(AbstractScanner, ABC): + """Implements squash file system scanning""" + + def data_scan( + self, # + data_provider: DataContentProvider, # + depth: int, # + recursive_limit_size: int) -> Optional[List[Candidate]]: + """Extracts files one by one from tar archive and launches data_scan""" + try: + candidates = [] + with SquashFsImage.from_bytes(data_provider.data) as image: + for i in image: + # skip directory + if not i.is_file or i.is_symlink: + continue + logger.warning(f"{i.path}") + if FilePathExtractor.check_exclude_file(self.config, i.path): + continue + if 0 > recursive_limit_size - i.size: + logger.error(f"{i.name}: size {i.size}" + f" is over limit {recursive_limit_size} depth:{depth}") + continue + logger.warning(f"{i.path} {i.name}") + hsqs_content_provider = DataContentProvider(data=image.read_file(i.inode), + file_path=i.path, + file_type=Util.get_extension(i.path), + info=f"{data_provider.info}|HSQS:{i.path}") + # Nevertheless, use extracted data size + new_limit = recursive_limit_size - len(hsqs_content_provider.data) + logger.info(f"{i.name}: size {len(hsqs_content_provider.data)}") + hsqs_candidates = self.recursive_scan(hsqs_content_provider, depth, new_limit) + candidates.extend(hsqs_candidates) + return candidates + except Exception as hsqs_exc: + logger.error(f"{data_provider.file_path}:{hsqs_exc}") + return None diff --git a/credsweeper/secret/config.json b/credsweeper/secret/config.json index 90ea8f2f0..f8ac6ee3b 100644 --- a/credsweeper/secret/config.json +++ b/credsweeper/secret/config.json @@ -2,12 +2,14 @@ "exclude": { "pattern": [], "containers": [ + ".pak", ".aar", ".apk", ".bz2", ".class", ".gz", ".jar", + ".img", ".lzma", ".rpm", ".tar", @@ -44,7 +46,6 @@ ".gif", ".gmo", ".ico", - ".img", ".info", ".jpeg", ".jpg", @@ -65,7 +66,6 @@ ".ogg", ".ogv", ".ops", - ".pak", ".png", ".psd", ".pyc", diff --git a/credsweeper/utils/util.py b/credsweeper/utils/util.py index 326f0030c..d45c0c1e0 100644 --- a/credsweeper/utils/util.py +++ b/credsweeper/utils/util.py @@ -364,6 +364,16 @@ def is_rtf(data: Union[bytes, bytearray]): return True return False + @staticmethod + def is_squashfs(data): + """According https://en.wikipedia.org/wiki/List_of_file_signatures - SQLite Database""" + if isinstance(data, (bytes, bytearray)) and data.startswith(b"hsqs") and b"\x04\x00\x00\x00" == data[28:32]: + # "Must be a power of two between 4096 (4k) and 1048576 (1 MiB)" + block_size = int.from_bytes(data[12:16], byteorder="little", signed=False) + if 0 == 0xFFF & block_size and 4096 <= block_size <= 1048576: + return True + return False + @staticmethod def is_asn1(data: Union[bytes, bytearray]) -> int: """Only sequence type 0x30 and size correctness are checked diff --git a/pyproject.toml b/pyproject.toml index 25af8b678..764bdbe3b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "python-dateutil", "python-docx", "python-pptx", + "PySquashfsImage", "PyYAML", "rpmfile", "striprtf", diff --git a/requirements.txt b/requirements.txt index 5a97692d4..f0bb2a213 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,6 +22,7 @@ numpy==2.3.3; python_version > '3.10' odfpy==1.4.1 xlrd==2.0.2 striprtf==0.0.29 +PySquashfsImage==0.9.0 # onnxruntime - ML engine onnxruntime==1.23.2 diff --git a/tests/__init__.py b/tests/__init__.py index b399ba1a6..5c6da24b9 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,7 +1,7 @@ from pathlib import Path # total number of files in test samples -SAMPLES_FILES_COUNT = 168 +SAMPLES_FILES_COUNT = 169 # the lowest value of ML threshold is used to display possible lowest values NEGLIGIBLE_ML_THRESHOLD = 0.0001 @@ -19,7 +19,7 @@ SAMPLES_POST_CRED_COUNT = 491 # archived credentials that are not found without --depth -SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 138 +SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 142 SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 5 SAMPLES_IN_DEEP_3 = SAMPLES_IN_DEEP_2 + 4 diff --git a/tests/data/depth_3_pedantic.json b/tests/data/depth_3_pedantic.json index 9ce8d2353..8c8c4a2c6 100644 --- a/tests/data/depth_3_pedantic.json +++ b/tests/data/depth_3_pedantic.json @@ -16627,5 +16627,206 @@ "entropy": 2.52164 } ] + }, + { + "rule": "Github Classic Token", + "severity": "high", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "ghp_00000000000000000000000000000004WZ4EQ", + "line_num": 1, + "path": "/sample", + "info": "FILE:./tests/samples/sample.hsqs|HSQS:/sample|RAW", + "variable": null, + "variable_start": -2, + "variable_end": -2, + "value": "ghp_00000000000000000000000000000004WZ4EQ", + "value_start": 0, + "value_end": 41, + "entropy": 1.56292 + } + ] + }, + { + "rule": "Auth", + "severity": "medium", + "confidence": "moderate", + "ml_probability": 0.96, + "line_data_list": [ + { + "line": "AUTH = (\"Basic YWRtaW46cGFyb2w0Mg==\")", + "line_num": 3, + "path": "/sample", + "info": "FILE:./tests/samples/sample.hsqs|HSQS:/sample|RAW", + "variable": "AUTH", + "variable_start": 0, + "variable_end": 4, + "value": "YWRtaW46cGFyb2w0Mg==", + "value_start": 15, + "value_end": 35, + "entropy": 4.12193 + } + ] + }, + { + "rule": "Basic Authorization", + "severity": "medium", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "AUTH = (\"Basic YWRtaW46cGFyb2w0Mg==\")", + "line_num": 3, + "path": "/sample", + "info": "FILE:./tests/samples/sample.hsqs|HSQS:/sample|RAW", + "variable": "Basic", + "variable_start": 9, + "variable_end": 14, + "value": "YWRtaW46cGFyb2w0Mg==", + "value_start": 15, + "value_end": 35, + "entropy": 4.12193 + } + ] + }, + { + "rule": "PEM Private Key", + "severity": "high", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "-----BEGIN PRIVATE KEY-----", + "line_num": 1, + "path": "/sample.pem", + "info": "FILE:./tests/samples/sample.hsqs|HSQS:/sample.pem|RAW", + "variable": null, + "variable_start": -2, + "variable_end": -2, + "value": "-----BEGIN PRIVATE KEY-----", + "value_start": 0, + "value_end": 27, + "entropy": 3.20029 + }, + { + "line": "MIIBVwIBADANBgkqhkiG9w0BAQEFAASCAUEwggE9AgEAAkEAr6IIv1xVwrhgoXp+", + "line_num": 2, + "path": "/sample.pem", + "info": "FILE:./tests/samples/sample.hsqs|HSQS:/sample.pem|RAW", + "variable": null, + "variable_start": -2, + "variable_end": -2, + "value": "MIIBVwIBADANBgkqhkiG9w0BAQEFAASCAUEwggE9AgEAAkEAr6IIv1xVwrhgoXp+", + "value_start": 0, + "value_end": 64, + "entropy": 4.48745 + }, + { + "line": "KOUDhLpzEPg7XaG2vfHaOB0++JzWnpvfii4BojVzrrkJjcHEA0975Ckp1bKp/swY", + "line_num": 3, + "path": "/sample.pem", + "info": "FILE:./tests/samples/sample.hsqs|HSQS:/sample.pem|RAW", + "variable": null, + "variable_start": -2, + "variable_end": -2, + "value": "KOUDhLpzEPg7XaG2vfHaOB0++JzWnpvfii4BojVzrrkJjcHEA0975Ckp1bKp/swY", + "value_start": 0, + "value_end": 64, + "entropy": 5.3007 + }, + { + "line": "r8qHZwIDAQABAkEAkFDJdUxO3+iW5jj7z2iW4oo+IJSIW2CVAzDmybMmWitRFIBZ", + "line_num": 4, + "path": "/sample.pem", + "info": "FILE:./tests/samples/sample.hsqs|HSQS:/sample.pem|RAW", + "variable": null, + "variable_start": -2, + "variable_end": -2, + "value": "r8qHZwIDAQABAkEAkFDJdUxO3+iW5jj7z2iW4oo+IJSIW2CVAzDmybMmWitRFIBZ", + "value_start": 0, + "value_end": 64, + "entropy": 5.07626 + }, + { + "line": "tkxP/yUtk8rysA/MmlFKAywnhsqrh/lKWcLPcQIhANjND990LTAtqe0vX6C/lt3p", + "line_num": 5, + "path": "/sample.pem", + "info": "FILE:./tests/samples/sample.hsqs|HSQS:/sample.pem|RAW", + "variable": null, + "variable_start": -2, + "variable_end": -2, + "value": "tkxP/yUtk8rysA/MmlFKAywnhsqrh/lKWcLPcQIhANjND990LTAtqe0vX6C/lt3p", + "value_start": 0, + "value_end": 64, + "entropy": 5.05836 + }, + { + "line": "PQbLinV/Uvv3TTn7ruhZAiEAz2NzQp9juvUz3ZVhgfer0zbWlLxT52dSAFzwAWWe", + "line_num": 6, + "path": "/sample.pem", + "info": "FILE:./tests/samples/sample.hsqs|HSQS:/sample.pem|RAW", + "variable": null, + "variable_start": -2, + "variable_end": -2, + "value": "PQbLinV/Uvv3TTn7ruhZAiEAz2NzQp9juvUz3ZVhgfer0zbWlLxT52dSAFzwAWWe", + "value_start": 0, + "value_end": 64, + "entropy": 5.03321 + }, + { + "line": "9b8CIQDN81ddza5Tsz1WbQdp3DcETdpERz0byz1Y0J/TMi0A2QIhAKlQVyyANorP", + "line_num": 7, + "path": "/sample.pem", + "info": "FILE:./tests/samples/sample.hsqs|HSQS:/sample.pem|RAW", + "variable": null, + "variable_start": -2, + "variable_end": -2, + "value": "9b8CIQDN81ddza5Tsz1WbQdp3DcETdpERz0byz1Y0J/TMi0A2QIhAKlQVyyANorP", + "value_start": 0, + "value_end": 64, + "entropy": 4.99173 + }, + { + "line": "X327VHUoQBbbgYrCynl0x+TV+3gYgBO/AiEAzIj5UNzfhW5pgJwMHz1G/nH6Ea0l", + "line_num": 8, + "path": "/sample.pem", + "info": "FILE:./tests/samples/sample.hsqs|HSQS:/sample.pem|RAW", + "variable": null, + "variable_start": -2, + "variable_end": -2, + "value": "X327VHUoQBbbgYrCynl0x+TV+3gYgBO/AiEAzIj5UNzfhW5pgJwMHz1G/nH6Ea0l", + "value_start": 0, + "value_end": 64, + "entropy": 5.28891 + }, + { + "line": "MSl+rkqtUopR0Ik=", + "line_num": 9, + "path": "/sample.pem", + "info": "FILE:./tests/samples/sample.hsqs|HSQS:/sample.pem|RAW", + "variable": null, + "variable_start": -2, + "variable_end": -2, + "value": "MSl+rkqtUopR0Ik=", + "value_start": 0, + "value_end": 16, + "entropy": 3.875 + }, + { + "line": "-----END PRIVATE KEY-----", + "line_num": 10, + "path": "/sample.pem", + "info": "FILE:./tests/samples/sample.hsqs|HSQS:/sample.pem|RAW", + "variable": null, + "variable_start": -2, + "variable_end": -2, + "value": "-----END PRIVATE KEY-----", + "value_start": 0, + "value_end": 25, + "entropy": 3.04489 + } + ] } ] \ No newline at end of file diff --git a/tests/samples/sample.hsqs b/tests/samples/sample.hsqs new file mode 100644 index 0000000000000000000000000000000000000000..303a0eca16295d92ebb328beb11c356d266fb060 GIT binary patch literal 4096 zcmc~OE-YqdU|{&V^G*f>0}}%ykj=;-$Z!BivH;l*Knwz+AVomH3#I=55xRFP>dL%RzwQgLOS?<=!n1O?Be6*!jEHY|_%;T2qzDGUYsbhs}iCU;D$~ zZ1mKhdsyFk_M3A)YF(@4AKX`*Jh><5nRmrbwWF=A8T*~wzo}PvoU;BMA}6!=+$wtw zmgYxKS8CV_9uSVKP~u%_!*JmB+xovBx*OLeZwUVOdUn}9b;fFy-OqfU1a6q|ecug6 z<5_<9_Zi-Azi@#m`b}B8?F)r(mIq%?Gc>%mhc7#zeVSP6b_;K@EOwKO#g;9OLuyAOj?$#rRJjN z&8Po;Mu#zf(BH-1&+<<)*=>2v&u6mjwwm{5H9lpL65bwoR>t@?(d6-Gw^cLNzFj~0(Br$?rphdrxIX_Qry57l#YJUHm(5@6 zy>zm;`*m;L75<;T9u7QG!2f(pb4!fG)%j;Otc?~{`unceeOZ?5e~-O6k+``=B=O<(q#|C82>zjMTjq2g9@ z(v3qW7+8eM@10~Yo^U`|AuTC`f&2ZetQiMe)Q+=@B&7V{p7x*X%6Xot%quUmCy6Pr zG2}~dZ&e2AO=)0o(>QzNz^Q;aGomLj(aN{)uWXb6mkz-S1J YhQMeDjE2By2#kinXb6mkz_1Dd0E$OVw*UYD literal 0 HcmV?d00001 diff --git a/tests/test_app.py b/tests/test_app.py index 6705eff0e..b42a8a57d 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -492,7 +492,7 @@ def test_depth_p(self) -> None: cvs_checksum = hashlib.md5(f.read()).digest() checksum = bytes(a ^ b for a, b in zip(checksum, cvs_checksum)) # update the checksum manually and keep line endings in the samples as is (git config core.autocrlf false) - self.assertEqual("4a12fce9d4c17e6b3aaef0f4be070225", binascii.hexlify(checksum).decode()) + self.assertEqual("080e6166edd7cffc5564eb96a2ce7c68", binascii.hexlify(checksum).decode()) normal_report = [] sorted_report = [] with tempfile.TemporaryDirectory() as tmp_dir: