diff --git a/minecode_pipelines/pipelines/mine_apache.py b/minecode_pipelines/pipelines/mine_apache.py
new file mode 100644
index 00000000..8235bc7a
--- /dev/null
+++ b/minecode_pipelines/pipelines/mine_apache.py
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# http://nexb.com and https://github.com/aboutcode-org/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/aboutcode-org/scancode.io for support and download.
+
+
+from minecode_pipelines.pipes import apache
+from minecode_pipelines.pipelines import MineCodeBasePipeline
+from minecode_pipelines.pipelines import _mine_and_publish_packageurls
+
+
+class MineApache(MineCodeBasePipeline):
+    """Mine PackageURLs from apache.org and publish them to FederatedCode."""
+
+    package_batch_size = 5
+
+    @classmethod
+    def steps(cls):
+        return (
+            cls.check_federatedcode_eligibility,
+            cls.create_federatedcode_working_dir,
+            cls.mine_apache_packages,
+            cls.get_apache_packages_to_sync,
+            cls.fetch_federation_config,
+            cls.mine_and_publish_packageurls,
+            cls.delete_working_dir,
+        )
+
+    def mine_apache_packages(self):
+        """Mine apache package archive path from the find_ls file."""
+        (self.apache_packages_metadata, self.last_mined_date) = apache.mine_apache_packages(
+            logger=self.log
+        )
+
+    def get_apache_packages_to_sync(self):
+        """Get apache packages which needs to be synced using checkpoint."""
+        self.packages = apache.get_apache_packages_to_sync(
+            packages_metadata=self.apache_packages_metadata,
+            last_mined_date=self.last_mined_date,
+            logger=self.log,
+        )
+
+    def packages_count(self):
+        return len(list(self.mine_packageurls()))
+
+    def mine_packageurls(self):
+        """Yield npm packageURLs for all mined npm package names."""
+        yield from apache.mine_and_publish_apache_packageurls(
+            packages_to_sync=self.packages,
+            packages_metadata=self.apache_packages_metadata,
+            logger=self.log,
+        )
+
+    def mine_and_publish_packageurls(self):
+        """Mine and publish PackageURLs."""
+        _mine_and_publish_packageurls(
+            packageurls=self.mine_packageurls(),
+            total_package_count=self.packages_count(),
+            data_clusters=self.data_clusters,
+            checked_out_repos=self.checked_out_repos,
+            working_path=self.working_path,
+            append_purls=self.append_purls,
+            commit_msg_func=self.commit_message,
+            logger=self.log,
+            checkpoint_on_commit=True,
+            batch_size=self.package_batch_size,
+        )
diff --git a/minecode_pipelines/pipes/__init__.py b/minecode_pipelines/pipes/__init__.py
index 9e69c180..a5c650bb 100644
--- a/minecode_pipelines/pipes/__init__.py
+++ b/minecode_pipelines/pipes/__init__.py
@@ -92,6 +92,8 @@ def update_checkpoints_file_in_github(checkpoints_file, cloned_repo, path):
     from scanpipe.pipes.federatedcode import commit_and_push_changes
 
     checkpoint_path = os.path.join(cloned_repo.working_dir, path)
+    # Create the directory if does not exist
+    os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)
     shutil.move(checkpoints_file, checkpoint_path)
     commit_message = """Update federatedcode purl mining checkpoint"""
     commit_and_push_changes(
diff --git a/minecode_pipelines/pipes/apache.py b/minecode_pipelines/pipes/apache.py
new file mode 100644
index 00000000..33087b69
--- /dev/null
+++ b/minecode_pipelines/pipes/apache.py
@@ -0,0 +1,571 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# purldb is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/nexB/purldb for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+from datetime import datetime, timezone
+
+from minecode_pipelines.pipes import fetch_checkpoint_from_github
+from minecode_pipelines.pipes import update_checkpoints_in_github
+from minecode_pipelines.pipes import MINECODE_PIPELINES_CONFIG_REPO
+
+from minecode_pipelines.utils import get_temp_dir
+
+from packageurl import PackageURL
+
+from scanpipe.pipes.federatedcode import clone_repository
+from scanpipe.pipes.federatedcode import delete_local_clone
+
+from scanpipe.pipes.fetch import fetch_http
+
+import gzip
+import shutil
+import json
+import os
+import re
+
+import requests
+
+
+TRACE = False
+TRACE_DEEP = False
+
+SID_TYPE = "sid"
+
+
+FIND_LS_URL = "https://archive.apache.org/dist/zzz/find-ls2.txt.gz"
+PROJECT_JSON = "https://projects.apache.org/json/foundation/projects.json"
+BASE_URL = "https://archive.apache.org/dist/"
+BASE_NAMESPACE = "apache.org/"
+
+
+PACKAGE_FILE_NAME = "ApachePackages.json"
+COMPRESSED_PACKAGE_FILE_NAME = "ApachePackages.json.gz"
+COMPRESSED_APACHE_PACKAGES_PATH = "apache/" + COMPRESSED_PACKAGE_FILE_NAME
+APACHE_CHECKPOINT_PATH = "apache/checkpoints.json"
+APACHE_PACKAGES_CHECKPOINT_PATH = "apache/packages_checkpoint.json"
+
+CHECKSUM_EXTS = (
+    ".sha256",
+    ".sha512",
+    ".md5",
+    ".sha",
+    ".sha1",
+)
+
+# only keep downloads with certain extensions for some archives, packages and checksums
+ARCHIVE_EXTS = (
+    # archives
+    ".jar",
+    ".zip",
+    ".tar.gz",
+    ".tgz",
+    ".tar.bz2",
+    ".war",
+    ".tar.xz",
+    ".tgz",
+    ".tar",
+    # packages
+    # '.deb', '.rpm', '.msi', '.exe',
+    ".whl",
+    ".gem",
+    ".nupkg",
+    # '.dmg',
+    # '.nbm',
+)
+
+IGNORED_PATH_CONTAINS = (
+    "META/",  # #
+    # doc
+    "/documentation/",
+    "/doc/",  # #
+    "-doc.",  # #
+    "-doc-",  # #
+    "/docs/",  # #
+    "-docs.",  # #
+    "-docs-",  # #
+    "javadoc",  # #
+    "fulldoc",  # #
+    "apidoc",  # #
+    "-manual.",
+    "-asdocs.",  # #
+    # eclipse p2/update sites are redundant
+    # redundant
+    "updatesite/",  # #
+    "eclipse-update-site",  # #
+    "update/eclipse",  # #
+    "sling/eclipse",  # #
+    "eclipse.site-",
+    # large multi-origin binary distributions
+    "-distro.",
+    "-bin-withdeps.",
+    "-bin-with-deps",
+    # these are larger distributions with third-parties
+    "apache-airavata-distribution",
+    "apache-airavata-server",
+    "apache-mahout-distribution",
+    "/syncope-standalone-",
+    "binaries/conda",
+    # obscure
+    "perl/contrib",
+    # index data
+    "zzz",
+    # doc
+    "ant/manual",
+    # tmp
+    "/tmp/",  # noqa: S108 safe: used only as ignore pattern
+)
+
+
+# TODO: ignore these globs too:
+
+# openoffice/*/binaries is very large
+# /*/apache-log4j-*-site.zip
+
+
+def determine_purl_elements(path):
+    """
+    Determine and return the namespace, name, version and qualifier based
+    on the path info
+    """
+    parsed_result = parse_apache_path_common(path)
+    if parsed_result:
+        namespace = BASE_NAMESPACE + parsed_result.get("namespace")
+        name = parsed_result.get("name")
+        version = parsed_result.get("version")
+        qualifier = {"file_name": parsed_result["file_name"]}
+    else:
+        parsed_result = parse_apache_path_complex(path)
+        namespace = BASE_NAMESPACE + parsed_result.get("namespace")
+        name = parsed_result.get("name")
+        version = parsed_result.get("version")
+        qualifier = {"download_url": BASE_URL + path}
+    return namespace, name, version, qualifier
+
+
+def get_archives_and_checksum(txt_path):
+    """
+    Return:
+    - A list of dictionaries containing the package archive path, size, and release date
+    - A list of checksum files
+
+    """
+    packages_data = []
+    packages_checksum = []
+    with open(txt_path, encoding="utf-8") as f:
+        for line in f:
+            parts = line.strip().split()
+
+            if not parts or len(parts) < 9:
+                continue
+
+            # Extracting the components
+            permissions = parts[0]
+
+            # Skip if it's not a file
+            if not permissions.startswith("-"):
+                continue
+
+            size = parts[4]
+            date = f"{parts[5]} {parts[6]} {parts[7]}"
+            filepath = parts[8]
+
+            if any(ignored in filepath for ignored in IGNORED_PATH_CONTAINS):
+                continue
+
+            if filepath.endswith(CHECKSUM_EXTS):
+                packages_checksum.append(filepath)
+            elif filepath.endswith(ARCHIVE_EXTS):
+                info_dict = {}
+                info_dict["filepath"] = filepath
+                info_dict["size"] = size
+                info_dict["date"] = date
+                packages_data.append(info_dict)
+
+    return packages_data, packages_checksum
+
+
+def update_package_data(packages_data, packages_checksum):
+    """
+    Update package metadata with:
+    - Project information from
+    https://projects.apache.org/json/foundation/projects.json
+    (homepage, download page, description).
+    - A constructed download URL.
+    - Available checksum values (sha256, sha512, md5, etc.).
+    """
+    updated_package_data = []
+    data = ""
+    project_json_download = fetch_http(PROJECT_JSON)
+    with open(project_json_download.path, encoding="utf-8") as f:
+        data = json.load(f)
+
+    for package in packages_data:
+        package_dict = package.copy()
+        path = package["filepath"]
+        package_name = path.split("/")[1]
+        download_url = BASE_URL + path.lstrip("./")
+        package_dict["download_url"] = download_url
+        if data:
+            package_metadata = data.get(package_name, "")
+            # In some cases, projects.json uses
+            # {package_name}-{subpackage_name} as the key.
+            # For example, "directory-fortress" likely refers to
+            # files under /directory/fortress*
+            if not package_metadata:
+                subpackage_name = path.split("/")[2]
+                name = package_name + "-" + subpackage_name
+                package_metadata = data.get(name, "")
+            if package_metadata:
+                for key, target in {
+                    "homepage": "repository_homepage_url",
+                    "download-page": "repository_download_url",
+                    "description": "description",
+                    "mailing-list": "mailing_list",
+                    "programming-language": "programming_language",
+                }.items():
+                    value = package_metadata.get(key)
+                    if value:
+                        package_dict[target] = value
+        """
+        Request to get checksum for every packages will likely lead to Rate Limiting/HTTP 429 error
+        Ignoring the checksum collection for now
+        """
+        # for ext in CHECKSUM_EXTS:
+        #    checksum_path = path + ext
+        #    if checksum_path in packages_checksum:
+        #        checksum = get_checksum(BASE_URL + checksum_path.lstrip("./"))
+        #        checksum_ext = ext.lstrip(".")
+        #        package_dict[checksum_ext] = checksum
+
+        updated_package_data.append(package_dict)
+    return updated_package_data
+
+
+def get_checksum(url):
+    """
+    Fetch the checksum file from the given URL and
+    return only the hash value.
+    """
+    response = requests.get(url)
+    response.raise_for_status()
+
+    content = response.text.strip()
+    checksum = content.split()[0]
+    return checksum
+
+
+def extract_archives(archive_path):
+    txt_path = os.path.splitext(archive_path)[0]
+
+    # Open the gzipped file and write out the decompressed content
+    with gzip.open(archive_path, "rb") as f_in:
+        with open(txt_path, "wb") as f_out:
+            shutil.copyfileobj(f_in, f_out)
+    return txt_path
+
+
+def parse_apache_path_common(path):
+    """
+    Parse standard Apache paths following a strict
+    '{name}/{version}/{filename}' structure. Requires the version segment
+    to start with a digit and the component name to be a substring of the
+    filename.
+    """
+    segments = path.strip().split("/")
+
+    # The minimum required segments for {name}/{version}/{filename} is 3
+    if len(segments) < 3:
+        return None
+
+    # filename is the last segment of the path
+    file_name = segments[-1]
+
+    # version is the segment before the filename
+    version = segments[-2]
+
+    # Check if the version segment represents a numeric value (starts with
+    # a digit)
+    if not (version and version[0].isdigit()):
+        return None
+
+    # name is the segment before the version
+    name = segments[-3]
+
+    # namespace consists of all segments from the beginning up to the name
+    # segment
+    namespace_segments = segments[:-3]
+    namespace = "/".join(namespace_segments)
+
+    return {"namespace": namespace, "name": name, "version": version, "file_name": file_name}
+
+
+def parse_apache_path_complex(path):
+    """
+    Parse non-standard Apache paths by locating a version or keyword
+    boundary.
+
+    Scans left-to-right for a "marker" segment (a semantic version or words
+    like 'bin', 'rc1'). The segment right before this marker becomes the
+    'name'. Falls back to the parent directory if no marker is found.
+    """
+    segments = path.strip().split("/")
+
+    if len(segments) < 2:
+        return None
+
+    path_segments = segments[:-1]
+    file_name = segments[-1]
+
+    special_words = {
+        "jars",
+        "binaries",
+        "binary",
+        "sources",
+        "source",
+        "java",
+        "bin",
+        "dist",
+        "old",
+        "obsolete",
+    }
+
+    marker_idx = None
+    version = ""
+
+    for i, seg in enumerate(path_segments):
+        # Match standard versions (e.g., 1.2.0) OR release candidates (e.g., rc1, rc1.1)
+        # Added re.IGNORECASE to safely handle 'RC1' or 'rc1'
+        version_match = re.search(r"(\d+(?:\.\d+)+|rc\d+(?:\.\d+)*)", seg, re.IGNORECASE)
+
+        is_version = False
+        if version_match:
+            is_version = True
+            if not version:
+                version = version_match.group(1)
+
+        # Check only against the hardcoded metadata keywords
+        is_special = seg.lower() in special_words
+
+        if (is_version or is_special) and marker_idx is None:
+            marker_idx = i
+
+    if marker_idx is not None and marker_idx > 0:
+        name = path_segments[marker_idx - 1]
+        namespace_segments = path_segments[: marker_idx - 1]
+    else:
+        name = path_segments[-1] if path_segments else ""
+        namespace_segments = path_segments[:-1] if path_segments else []
+
+    namespace = "/".join(namespace_segments)
+
+    return {"namespace": namespace, "name": name, "version": version, "file_name": file_name}
+
+
+def mine_apache_packages(logger=None):
+    """
+    Mine apache packages names from "https://archive.apache.org/dist/zzz/find-ls2.txt.gz"
+
+    Apache.org does not provide an index file, so we have no way
+    to check the index and determine which packages are new and
+    need to be synced, unlike npm.
+
+    We will use the timestamp to log when the packages were mined.
+    """
+
+    config_repo = clone_repository(
+        repo_url=MINECODE_PIPELINES_CONFIG_REPO,
+        clone_path=get_temp_dir(),
+        logger=logger,
+    )
+
+    packages_metadata = get_find_ls_archive_paths_and_metadata(logger=logger)
+
+    last_mined_date = get_and_update_apache_checkpoints(
+        cloned_repo=config_repo,
+        checkpoint_path=APACHE_CHECKPOINT_PATH,
+        logger=logger,
+    )
+
+    delete_local_clone(config_repo)
+
+    return packages_metadata, last_mined_date
+
+
+def get_find_ls_archive_paths_and_metadata(logger=None):
+    """
+    Get the archive paths and metadata from the find-ls file.
+    """
+    find_ls_download = fetch_http(FIND_LS_URL)
+    txt_path = extract_archives(find_ls_download.path)
+    packages_data, packages_checksum = get_archives_and_checksum(txt_path)
+    packages_metadata = update_package_data(packages_data, packages_checksum)
+
+    if logger:
+        logger(f"Collected: {len(packages_metadata)} package archive files.")
+
+    return packages_metadata
+
+
+def get_and_update_apache_checkpoints(
+    cloned_repo,
+    checkpoint_path,
+    config_repo=MINECODE_PIPELINES_CONFIG_REPO,
+    logger=None,
+):
+    checkpoint = fetch_checkpoint_from_github(
+        config_repo=config_repo,
+        checkpoint_path=checkpoint_path,
+    )
+
+    last_mined_date = checkpoint.get("date", "")
+    if logger:
+        logger(f"Last mined date from checkpoint: {last_mined_date}")
+
+    now = datetime.now(timezone.utc)
+    formatted_now = now.strftime("%Y-%m-%d %H:%M UTC")
+    checkpoint["date"] = formatted_now
+    update_checkpoints_in_github(
+        checkpoint=checkpoint,
+        cloned_repo=cloned_repo,
+        path=checkpoint_path,
+        logger=logger,
+    )
+
+    return last_mined_date
+
+
+def get_apache_packages_to_sync(packages_metadata, last_mined_date, logger=None):
+    """
+    Get the list of Apache packages that need to be synced based on the
+    timestamp.
+
+    Was thinking to record all mined archives, but even when processing
+    only 10 packages it produced about 62k archive paths (all versions
+    included) totaling 4.2 MB. Scaling this to all ~10,000 Apache packages
+    would make the checkpoint file far too large. Instead, we will log only
+    the timestamp indicating when the packages were mined.
+    """
+
+    if logger:
+        logger(f"# of package archives found from apache.org: {len(packages_metadata)}")
+
+    if not packages_metadata:
+        return
+
+    packages_to_sync = []
+    for package in packages_metadata:
+        path = package.get("filepath")
+        release_date = package.get("date", "")
+        if not last_mined_date:
+            packages_to_sync.append(path)
+        else:
+            if release_date:
+                fmt = "%Y-%m-%d %H:%M UTC"
+                release_date_format = datetime.strptime(release_date, fmt).replace(
+                    tzinfo=timezone.utc
+                )
+                last_mined_date_format = datetime.strptime(last_mined_date, fmt).replace(
+                    tzinfo=timezone.utc
+                )
+                if release_date_format > last_mined_date_format:
+                    packages_to_sync.append(path)
+    if logger:
+        logger(f"Starting initial package mining for {len(packages_to_sync)} packages archives.")
+
+    return packages_to_sync
+
+
+def mine_and_publish_apache_packageurls(packages_to_sync, packages_metadata, logger=None):
+    if logger:
+        logger("Starting package mining for a batch of packages")
+
+    handled_base = None
+    for package_path in packages_to_sync:
+        current_base = None
+        current_purls = []
+        purls_and_package_data = []
+
+        if not package_path:
+            continue
+
+        # fetch packageURLs for package
+        if logger:
+            logger(f"getting packageURLs for package: {package_path}")
+
+        package_path = package_path.lstrip("./")
+        namespace, name, _version, _qualifiers = determine_purl_elements(package_path)
+        current_base = PackageURL(
+            type=SID_TYPE,
+            namespace=namespace,
+            name=name,
+        ).to_string()
+
+        if handled_base and handled_base == current_base:
+            continue
+        else:
+            handled_base = current_base
+
+        for package in packages_metadata:
+            path = package.get("filepath").lstrip("./")
+            package_namespace, package_name, package_version, package_qualifiers = (
+                determine_purl_elements(path)
+            )
+
+            base_purl = PackageURL(
+                type=SID_TYPE,
+                namespace=package_namespace,
+                name=package_name,
+            ).to_string()
+
+            if current_base == base_purl:
+                purl = PackageURL(
+                    type=SID_TYPE,
+                    namespace=package_namespace,
+                    name=package_name,
+                    version=package_version,
+                    qualifiers=package_qualifiers,
+                ).to_string()
+
+                if purl not in current_purls:
+                    package_metadata = {}
+                    package_metadata["name"] = package_name
+                    package_metadata["version"] = package_version
+                    package_metadata["repository_homepage_url"] = package.get(
+                        "repository_homepage_url", ""
+                    )
+                    package_metadata["repository_download_url"] = package.get(
+                        "repository_download_url", ""
+                    )
+                    package_metadata["description"] = package.get("description", "")
+                    package_metadata["download_url"] = package.get("download_url", "")
+                    package_metadata["size"] = package.get("size", "")
+                    package_metadata["release_date"] = package.get("date", "")
+                    package_metadata["mailing_list"] = package.get("mailing_list", "")
+                    package_metadata["programming_language"] = package.get(
+                        "programming_language", ""
+                    )
+
+                    package_data = (purl, package_metadata)
+
+                    current_purls.append(purl)
+                    purls_and_package_data.append(package_data)
+
+            else:
+                if current_purls:
+                    yield current_base, current_purls, purls_and_package_data
+                    # Reset
+                    current_base = None
+                    current_purls = []
+                    purls_and_package_data = []
+                    # packages_metadata should be ordered so that we can
+                    # break the loop once all relevant entries have been
+                    # found.
+                    break
+
+        if current_base is not None:
+            yield current_base, current_purls, purls_and_package_data
diff --git a/minecode_pipelines/tests/pipes/test_apache.py b/minecode_pipelines/tests/pipes/test_apache.py
new file mode 100644
index 00000000..ad440fc9
--- /dev/null
+++ b/minecode_pipelines/tests/pipes/test_apache.py
@@ -0,0 +1,245 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# purldb is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/purldb for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import os
+
+from commoncode.testcase import FileBasedTesting
+
+from minecode_pipelines.pipes import apache
+
+
+class ApacheMiscTest(FileBasedTesting):
+    test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data")
+
+    def test_parse_apache_path_common(self):
+        paths = [
+            "abdera/1.0/apache-abdera-1.0-src.tar.gz",
+            "accumulo/1.10.1/accumulo-1.10.1-src.tar.gz",
+            "answer/1.3.0-incubating/apache-answer-1.3.0-incubating-bin-darwin-amd64.tar.gz",
+            "karaf/cellar/4.0.5/apache-karaf-cellar-4.0.5-src.tar.gz",
+            "cxf/3.1.9/apache-cxf-3.1.9-src.tar.gz",
+            "ws/commons/axiom/1_2_2/axiom-1.2.2-bin.zip",
+            "avalon/framework/jars/avalon-framework-excalibur-test-4.0b1.jar",
+            "avalon/logkit/v1.2/LogKit-1.2-bin.tar.gz",
+            "airflow/providers/2.11/apache_airflow_providers_fab-1.5.4-py3-none-any.whl",
+            "beam/vendor/beam-vendor-calcite-1_40_0/0.1/apache-beam-f6ec9cb0c167815f942cf70a674f92a04819c83b-source-release.zip",
+        ]
+        expected = [
+            {
+                "namespace": "",
+                "name": "abdera",
+                "version": "1.0",
+                "file_name": "apache-abdera-1.0-src.tar.gz",
+            },
+            {
+                "namespace": "",
+                "name": "accumulo",
+                "version": "1.10.1",
+                "file_name": "accumulo-1.10.1-src.tar.gz",
+            },
+            {
+                "namespace": "",
+                "name": "answer",
+                "version": "1.3.0-incubating",
+                "file_name": "apache-answer-1.3.0-incubating-bin-darwin-amd64.tar.gz",
+            },
+            {
+                "namespace": "karaf",
+                "name": "cellar",
+                "version": "4.0.5",
+                "file_name": "apache-karaf-cellar-4.0.5-src.tar.gz",
+            },
+            {
+                "namespace": "",
+                "name": "cxf",
+                "version": "3.1.9",
+                "file_name": "apache-cxf-3.1.9-src.tar.gz",
+            },
+            {
+                "namespace": "ws/commons",
+                "name": "axiom",
+                "version": "1_2_2",
+                "file_name": "axiom-1.2.2-bin.zip",
+            },
+            None,
+            None,
+            {
+                "namespace": "airflow",
+                "name": "providers",
+                "version": "2.11",
+                "file_name": "apache_airflow_providers_fab-1.5.4-py3-none-any.whl",
+            },
+            {
+                "namespace": "beam/vendor",
+                "name": "beam-vendor-calcite-1_40_0",
+                "version": "0.1",
+                "file_name": "apache-beam-f6ec9cb0c167815f942cf70a674f92a04819c83b-source-release.zip",
+            },
+        ]
+
+        for i, p in enumerate(paths):
+            self.assertEqual(apache.parse_apache_path_common(p), expected[i])
+
+    def test_parse_complex_with_special_word_markers(self):
+        """
+        Test paths where parsing boundaries are triggered by keywords like
+        'jars', 'binaries', or 'source'.
+        """
+        cases = [
+            (
+                "avalon/framework/jars/avalon-framework-excalibur-test-4.0b1.jar",
+                {
+                    "namespace": "avalon",
+                    "name": "framework",
+                    "version": "",
+                    "file_name": "avalon-framework-excalibur-test-4.0b1.jar",
+                },
+            ),
+            (
+                "avalon/merlin/binaries/3.0/avalon-merlin-3.0-dist.zip",
+                {
+                    "namespace": "avalon",
+                    "name": "merlin",
+                    "version": "3.0",
+                    "file_name": "avalon-merlin-3.0-dist.zip",
+                },
+            ),
+            (
+                "avalon/merlin/jars/merlin-plugin-1.0.jar",
+                {
+                    "namespace": "avalon",
+                    "name": "merlin",
+                    "version": "",
+                    "file_name": "merlin-plugin-1.0.jar",
+                },
+            ),
+            (
+                "ant/antlibs/antunit/source/apache-ant-antunit-1.5.0-src.tar.bz2",
+                {
+                    "namespace": "ant/antlibs",
+                    "name": "antunit",
+                    "version": "",
+                    "file_name": "apache-ant-antunit-1.5.0-src.tar.bz2",
+                },
+            ),
+            (
+                "ant/antlibs/compress/binaries/apache-ant-compress-1.5-bin.zip",
+                {
+                    "namespace": "ant/antlibs",
+                    "name": "compress",
+                    "version": "",
+                    "file_name": "apache-ant-compress-1.5-bin.zip",
+                },
+            ),
+        ]
+        for path, expected in cases:
+            self.assertEqual(apache.parse_apache_path_complex(path), expected)
+
+    def test_parse_complex_with_version_markers(self):
+        """
+        Test paths where parsing boundaries are explicitly triggered by version strings.
+        """
+        cases = [
+            (
+                "avalon/logkit/v1.2/LogKit-1.2-bin.tar.gz",
+                {
+                    "namespace": "avalon",
+                    "name": "logkit",
+                    "version": "1.2",
+                    "file_name": "LogKit-1.2-bin.tar.gz",
+                },
+            ),
+            (
+                "avro/avro-1.10.0/java/avro-grpc-1.10.0-sources.jar",
+                {
+                    "namespace": "",
+                    "name": "avro",
+                    "version": "1.10.0",
+                    "file_name": "avro-grpc-1.10.0-sources.jar",
+                },
+            ),
+            (
+                "groovy/2.5.23/distribution/apache-groovy-binary-2.5.23.zip",
+                {
+                    "namespace": "",
+                    "name": "groovy",
+                    "version": "2.5.23",
+                    "file_name": "apache-groovy-binary-2.5.23.zip",
+                },
+            ),
+            (
+                "beam/2.73.0/prism/windows/arm64/apache_beam-v2.73.0-prism-windows-arm64.zip",
+                {
+                    "namespace": "",
+                    "name": "beam",
+                    "version": "2.73.0",
+                    "file_name": "apache_beam-v2.73.0-prism-windows-arm64.zip",
+                },
+            ),
+            (
+                "netbeans/netbeans-maven-archetypes/netbeans-platform-app-archetype/netbeans-platform-app-archetype-1.24/netbeans-platform-app-archetype-1.24-source-release.zip",
+                {
+                    "namespace": "netbeans/netbeans-maven-archetypes",
+                    "name": "netbeans-platform-app-archetype",
+                    "version": "1.24",
+                    "file_name": "netbeans-platform-app-archetype-1.24-source-release.zip",
+                },
+            ),
+        ]
+        for path, expected in cases:
+            self.assertEqual(apache.parse_apache_path_complex(path), expected)
+
+    def test_parse_complex_fallback_logic(self):
+        """
+        Test no version in path
+        Only treat the version found in the path as the package version.
+        A version found in the filename represents the file's own version,
+        not necessary the package version.
+        There are cases where a package contains multiple files, each with
+        its own version.
+        For instance,
+        "/namespace/package/1.0.0/john-1.2.3.zip"
+        "/namespace/package/1.0.0/doo-2.3.zip"
+        """
+        cases = [
+            (
+                "httpd/libapreq/libapreq-1.1.tar.gz",
+                {
+                    "namespace": "httpd",
+                    "name": "libapreq",
+                    "version": "",
+                    "file_name": "libapreq-1.1.tar.gz",
+                },
+            ),
+            (
+                "airflow/providers/apache_airflow_providers_cncf_kubernetes-10.18.0.tar.gz",
+                {
+                    "namespace": "airflow",
+                    "name": "providers",
+                    "version": "",
+                    "file_name": "apache_airflow_providers_cncf_kubernetes-10.18.0.tar.gz",
+                },
+            ),
+        ]
+        for path, expected in cases:
+            self.assertEqual(apache.parse_apache_path_complex(path), expected)
+
+    def test_parse_complex_release_candidate_markers(self):
+        """
+        Test handling for release candidate patterns like 'rc1', 'rc2'.
+        """
+        path = "deltacloud/rc1/deltacloud-client-1.1.0.gem"
+        expected = {
+            "namespace": "",
+            "name": "deltacloud",
+            "version": "rc1",
+            "file_name": "deltacloud-client-1.1.0.gem",
+        }
+
+        self.assertEqual(apache.parse_apache_path_complex(path), expected)
diff --git a/pyproject-minecode_pipelines.toml b/pyproject-minecode_pipelines.toml
index 076fe081..05fc3f8f 100644
--- a/pyproject-minecode_pipelines.toml
+++ b/pyproject-minecode_pipelines.toml
@@ -63,6 +63,7 @@ mine_cran = "minecode_pipelines.pipelines.mine_cran:MineCran"
 mine_swift = "minecode_pipelines.pipelines.mine_swift:MineSwift"
 mine_composer = "minecode_pipelines.pipelines.mine_composer:MineComposer"
 mine_crates = "minecode_pipelines.pipelines.mine_crates:MineCrates"
+mine_apache = "minecode_pipelines.pipelines.mine_apache:MineApache"
 
 [tool.bumpversion]
 current_version = "1.0.1"