Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions minecode_pipelines/pipelines/mine_apache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/scancode.io for support and download.


from minecode_pipelines.pipes import apache
from minecode_pipelines.pipelines import MineCodeBasePipeline
from minecode_pipelines.pipelines import _mine_and_publish_packageurls


class MineApache(MineCodeBasePipeline):
"""Mine PackageURLs from apache.org and publish them to FederatedCode."""

package_batch_size = 5

@classmethod
def steps(cls):
return (
cls.check_federatedcode_eligibility,
cls.create_federatedcode_working_dir,
cls.mine_apache_packages,
cls.get_apache_packages_to_sync,
cls.fetch_federation_config,
cls.mine_and_publish_packageurls,
cls.delete_working_dir,
)

def mine_apache_packages(self):
"""Mine apache package archive path from the find_ls file."""
(self.apache_packages_metadata, self.last_mined_date) = apache.mine_apache_packages(
logger=self.log
)

def get_apache_packages_to_sync(self):
"""Get apache packages which needs to be synced using checkpoint."""
self.packages = apache.get_apache_packages_to_sync(
packages_metadata=self.apache_packages_metadata,
last_mined_date=self.last_mined_date,
logger=self.log,
)

def packages_count(self):
return len(list(self.mine_packageurls()))

def mine_packageurls(self):
"""Yield npm packageURLs for all mined npm package names."""
yield from apache.mine_and_publish_apache_packageurls(
packages_to_sync=self.packages,
packages_metadata=self.apache_packages_metadata,
logger=self.log,
)

def mine_and_publish_packageurls(self):
"""Mine and publish PackageURLs."""
_mine_and_publish_packageurls(
packageurls=self.mine_packageurls(),
total_package_count=self.packages_count(),
data_clusters=self.data_clusters,
checked_out_repos=self.checked_out_repos,
working_path=self.working_path,
append_purls=self.append_purls,
commit_msg_func=self.commit_message,
logger=self.log,
checkpoint_on_commit=True,
batch_size=self.package_batch_size,
)
2 changes: 2 additions & 0 deletions minecode_pipelines/pipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ def update_checkpoints_file_in_github(checkpoints_file, cloned_repo, path):
from scanpipe.pipes.federatedcode import commit_and_push_changes

checkpoint_path = os.path.join(cloned_repo.working_dir, path)
# Create the directory if does not exist
os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)
shutil.move(checkpoints_file, checkpoint_path)
commit_message = """Update federatedcode purl mining checkpoint"""
commit_and_push_changes(
Expand Down
Loading