diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..9290471 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,37 @@ +name: SoundScrape CI + +on: [push, pull_request] + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: ["ubuntu-latest", "macos-latest"] + python-version: [3.7, 3.8] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 black pytest coverage + pip install -r requirements.txt + - name: Black code formatting + run: | + black --check --line-length 127 . + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --ignore=E203,E231 --statistics + - name: Run tests + run: | + PYTHONPATH=$(pwd) coverage run -m --include="soundscrape/*" pytest tests/test.py + - name: Coverage report + run: | + coverage report -m diff --git a/.gitignore b/.gitignore index 05ff1f9..07cec54 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,12 @@ +.vscode/ +build/ +dist/ env/ +htmlcov/ +.coverage *.DS_Store *.pyc *.bak -build/ -dist/ +*.mp3 +*.m4a +*.tmp diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index eebb013..0000000 --- a/.travis.yml +++ /dev/null @@ -1,12 +0,0 @@ -language: python -python: - - "2.7" - - "3.3" - - "3.4" - - "3.5" -# command to install dependencies -install: -# - "pip install -r requirements.txt" - - "pip install setuptools --upgrade; python setup.py install" -# command to run tests -script: nosetests diff --git a/README.md b/README.md index 139d58a..acfe6e3 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ ![SoundScrape!](http://i.imgur.com/nHAt2ow.png) -SoundScrape [![Build Status](https://travis-ci.org/Miserlou/SoundScrape.svg)](https://travis-ci.org/Miserlou/SoundScrape) [![Python 2](https://img.shields.io/badge/Python-2-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![Python 3](https://img.shields.io/badge/Python-3-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![PyPI](https://img.shields.io/pypi/v/soundscrape.svg)](https://pypi.python.org/pypi/SoundScrape) +SoundScrape [![Build Status](https://github.com/Miserlou/SoundScrape/workflows/SoundScrape%20CI/badge.svg)](https://github.com/Miserlou/SoundScrape/actions) [![Python 3](https://img.shields.io/badge/Python-3-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![PyPI](https://img.shields.io/pypi/v/soundscrape.svg)](https://pypi.python.org/pypi/SoundScrape) ============== -**SoundScrape** makes it super easy to download artists from SoundCloud (and Bandcamp and MixCloud) - even those which don't have download links! It automatically creates ID3 tags as well (including album art), which is handy. +**SoundScrape** makes it super easy to download artists from SoundCloud (and others) - even those which don't have download links! It automatically creates ID3 tags as well (including album art), which is handy. Usage --------- @@ -91,15 +91,6 @@ By default, SoundScrape will try to rip everything it can. However, if you only soundscrape sly-dogg -d ``` -Keep Preview Tracks --------- - -By default, SoundScrape will skip the 30-second preview tracks that SoundCloud now provides. You can choose to keep these preview snippets with the *-k* argument. - -```bash -soundscrape chromeo -k -``` - Folders -------- @@ -134,19 +125,6 @@ soundscrape -b http://music.monstercat.com/ Note that the full URL must be included. -Mixcloud --------- - -SoundScrape can also grab mixes from Mixcloud. This feature is extremely expermental and is in no way guaranteed to work! - -Finds the original mp3 of a mix and grabs that (with tags and album art) if it can, or else just gets the raw m4a stream. - -Mixcloud currently only takes an invidiual mix. Capacity for a whole artist's profile due shortly. - -```bash -soundscrape https://www.mixcloud.com/corenewsuploads/flume-essential-mix-2015-10-03/ -of -``` - Audiomack -------- diff --git a/requirements.txt b/requirements.txt index cd4e07e..bb41b40 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,10 @@ args>=0.1.0 +atoma>=0.0.17 clint>=0.3.2 demjson>=2.2.2 -fudge>=1.0.3 -nose>=1.3.7 requests[security]>=2.9.0 +sanitize-filename>=1.2.0 setuptools>=18.0.0 -simplejson>=3.3.1 soundcloud>=0.4.1 wheel>=0.24.0 mutagen>=1.31.0 diff --git a/setup.py b/setup.py index 6c9851a..674eac5 100644 --- a/setup.py +++ b/setup.py @@ -1,58 +1,46 @@ +"""Package setup for SoundScrape.""" import os -import setuptools -import soundscrape -import sys from setuptools import setup -# To support 2/3 installation -setup_version = int(setuptools.__version__.split('.')[0]) -if setup_version < 18: - print("Please upgrade your setuptools to install SoundScrape: ") - print("pip install -U pip wheel setuptools") - quit() +import soundscrape # Set external files try: from pypandoc import convert - README = convert('README.md', 'rst') + + README = convert("README.md", "rst") except ImportError: - README = open(os.path.join(os.path.dirname(__file__), 'README.md')).read() + README = open(os.path.join(os.path.dirname(__file__), "README.md")).read() -with open(os.path.join(os.path.dirname(__file__), 'requirements.txt')) as f: +with open(os.path.join(os.path.dirname(__file__), "requirements.txt")) as f: required = f.read().splitlines() # allow setup.py to be run from any path os.chdir(os.path.normpath(os.path.join(os.path.abspath(__file__), os.pardir))) setup( - name='soundscrape', + name="soundscrape", version=soundscrape.__version__, - packages=['soundscrape'], + packages=["soundscrape"], install_requires=required, - extras_require={ ':python_version < "3.0"': [ 'wsgiref>=0.1.2', ], }, + extras_requires={"tests": ["pytest", "coverage"],}, include_package_data=True, - license='MIT License', - description='Scrape an artist from SoundCloud', + license="MIT License", + description="Scrape an artist from SoundCloud", long_description=README, - url='https://github.com/Miserlou/SoundScrape', - author='Rich Jones', - author_email='rich@openwatch.net', - entry_points={ - 'console_scripts': [ - 'soundscrape = soundscrape.soundscrape:main', - ] - }, + url="https://github.com/Miserlou/SoundScrape", + author="Rich Jones", + author_email="rich@openwatch.net", + entry_points={"console_scripts": ["soundscrape = soundscrape.soundscrape:main",]}, classifiers=[ - 'Environment :: Console', - 'License :: OSI Approved :: Apache Software License', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Topic :: Internet :: WWW/HTTP', - 'Topic :: Internet :: WWW/HTTP :: Dynamic Content', + "Environment :: Console", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Topic :: Internet :: WWW/HTTP", + "Topic :: Internet :: WWW/HTTP :: Dynamic Content", ], ) diff --git a/soundscrape/.gitignore b/soundscrape/.gitignore deleted file mode 100644 index bf9e782..0000000 --- a/soundscrape/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.mp3 \ No newline at end of file diff --git a/soundscrape/__init__.py b/soundscrape/__init__.py index 8ada23a..18622a7 100644 --- a/soundscrape/__init__.py +++ b/soundscrape/__init__.py @@ -1 +1,5 @@ -__version__ = '0.30.2' +"""SoundScrape initialization.""" + +# Semantic versioning +# (reference: https://packaging.python.org/guides/distributing-packages-using-setuptools/#semantic-versioning-preferred) +__version__ = "1.00" diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 397ca71..f5b5292 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -1,119 +1,118 @@ #! /usr/bin/env python +"""Main SoundScrape module.""" from __future__ import unicode_literals import argparse -import demjson import os import re -import requests -import soundcloud import sys import urllib - -from clint.textui import colored, puts, progress from datetime import datetime -from mutagen.mp3 import MP3, EasyMP3 -from mutagen.id3 import APIC, WXXX -from mutagen.id3 import ID3 as OldID3 -from subprocess import Popen, PIPE +from os import W_OK, access, mkdir from os.path import dirname, exists, join -from os import access, mkdir, W_OK +from subprocess import PIPE, Popen + +import atoma +import demjson +import requests +import soundcloud +from clint.textui import colored, progress, puts +from mutagen import MutagenError +from mutagen.id3 import APIC +from mutagen.id3 import ID3 as OldID3 +from mutagen.id3 import WXXX +from mutagen.mp3 import MP3, EasyMP3, HeaderNotFoundError +from sanitize_filename import sanitize #################################################################### # Please be nice with this! -CLIENT_ID = 'a3dd183a357fcff9a6943c0d65664087' -CLIENT_SECRET = '7e10d33e967ad42574124977cf7fa4b7' -MAGIC_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28' +CLIENT_ID = "a3dd183a357fcff9a6943c0d65664087" +CLIENT_SECRET = "7e10d33e967ad42574124977cf7fa4b7" +MAGIC_CLIENT_ID = "b45b1aa10f1ac2941910a7f0d10f8e28" -AGGRESSIVE_CLIENT_ID = 'OmTFHKYSMLFqnu2HHucmclAptedxWXkq' -APP_VERSION = '1481046241' +AGGRESSIVE_CLIENT_ID = "OmTFHKYSMLFqnu2HHucmclAptedxWXkq" +APP_VERSION = "1481046241" #################################################################### def main(): """ - Main function. + Argument parsing main function. Converts arguments to Python and processes accordingly. - """ - # Hack related to #58 if sys.platform == "win32": - os.system("chcp 65001"); - - parser = argparse.ArgumentParser(description='SoundScrape. Scrape an artist from SoundCloud.\n') - parser.add_argument('artist_url', metavar='U', type=str, nargs='*', - help='An artist\'s SoundCloud username or URL') - parser.add_argument('-n', '--num-tracks', type=int, default=sys.maxsize, - help='The number of tracks to download') - parser.add_argument('-g', '--group', action='store_true', - help='Use if downloading tracks from a SoundCloud group') - parser.add_argument('-b', '--bandcamp', action='store_true', - help='Use if downloading from Bandcamp rather than SoundCloud') - parser.add_argument('-m', '--mixcloud', action='store_true', - help='Use if downloading from Mixcloud rather than SoundCloud') - parser.add_argument('-a', '--audiomack', action='store_true', - help='Use if downloading from Audiomack rather than SoundCloud') - parser.add_argument('-c', '--hive', action='store_true', - help='Use if downloading from Hive.co rather than SoundCloud') - parser.add_argument('-l', '--likes', action='store_true', - help='Download all of a user\'s Likes.') - parser.add_argument('-L', '--login', type=str, default='soundscrape123@mailinator.com', - help='Set login') - parser.add_argument('-d', '--downloadable', action='store_true', - help='Only fetch tracks with a Downloadable link.') - parser.add_argument('-t', '--track', type=str, default='', - help='The name of a specific track by an artist') - parser.add_argument('-f', '--folders', action='store_true', - help='Organize saved songs in folders by artists') - parser.add_argument('-p', '--path', type=str, default='', - help='Set directory path where downloads should be saved to') - parser.add_argument('-P', '--password', type=str, default='soundscraperocks', - help='Set password') - parser.add_argument('-o', '--open', action='store_true', - help='Open downloaded files after downloading.') - parser.add_argument('-k', '--keep', action='store_true', - help='Keep 30-second preview tracks') - parser.add_argument('-v', '--version', action='store_true', default=False, - help='Display the current version of SoundScrape') + os.system("chcp 65001") + + parser = argparse.ArgumentParser(description="SoundScrape. Scrape an artist from SoundCloud.\n") + parser.add_argument( + "artist_url", metavar="U", type=str, nargs="*", help="An artist's SoundCloud username or URL", + ) + parser.add_argument( + "-n", "--num-tracks", type=int, default=sys.maxsize, help="The number of tracks to download", + ) + parser.add_argument( + "-g", "--group", action="store_true", help="Use if downloading tracks from a SoundCloud group", + ) + parser.add_argument( + "-b", "--bandcamp", action="store_true", help="Use if downloading from Bandcamp rather than SoundCloud", + ) + parser.add_argument( + "-a", "--audiomack", action="store_true", help="Use if downloading from Audiomack rather than SoundCloud", + ) + parser.add_argument("-l", "--likes", action="store_true", help="Download all of a user's Likes.") + parser.add_argument( + "-L", "--login", type=str, default="soundscrape123@mailinator.com", help="Set login", + ) + parser.add_argument( + "-d", "--downloadable", action="store_true", help="Only fetch tracks with a downloadable link.", + ) + parser.add_argument( + "-t", "--track", type=str, default="", help="The name of a specific track by an artist", + ) + parser.add_argument( + "-f", "--folders", action="store_true", help="Organize saved songs in folders by artists", + ) + parser.add_argument( + "-p", "--path", type=str, default="", help="Set directory path where downloads should be saved to", + ) + parser.add_argument("-P", "--password", type=str, default="soundscraperocks", help="Set password") + parser.add_argument( + "-o", "--open", action="store_true", help="Open downloaded files after downloading.", + ) + parser.add_argument( + "-v", "--version", action="store_true", default=False, help="Display the current version of SoundScrape", + ) args = parser.parse_args() vargs = vars(args) - if vargs['version']: + if vargs["version"]: import pkg_resources + version = pkg_resources.require("soundscrape")[0].version print(version) return - if not vargs['artist_url']: - parser.error('Please supply an artist\'s username or URL!') - - if sys.version_info < (3,0,0): - vargs['artist_url'] = urllib.quote(vargs['artist_url'][0], safe=':/') - else: - vargs['artist_url'] = urllib.parse.quote(vargs['artist_url'][0], safe=':/') + if not vargs["artist_url"]: + parser.error("Please supply an artist's username or URL!") - artist_url = vargs['artist_url'] + vargs["artist_url"] = urllib.parse.quote(vargs["artist_url"][0], safe=":/") - if not exists(vargs['path']): - if not access(dirname(vargs['path']), W_OK): - vargs['path'] = '' + if not exists(vargs["path"]): + if not access(dirname(vargs["path"]), W_OK): + vargs["path"] = "" else: - mkdir(vargs['path']) + mkdir(vargs["path"]) - if 'bandcamp.com' in artist_url or vargs['bandcamp']: + if "bandcamp.com" in vargs["artist_url"] or vargs["bandcamp"]: process_bandcamp(vargs) - elif 'mixcloud.com' in artist_url or vargs['mixcloud']: - process_mixcloud(vargs) - elif 'audiomack.com' in artist_url or vargs['audiomack']: + elif "audiomack.com" in vargs["artist_url"] or vargs["audiomack"]: process_audiomack(vargs) - elif 'hive.co' in artist_url or vargs['hive']: - process_hive(vargs) - elif 'musicbed.com' in artist_url: + elif "musicbed.com" in vargs["artist_url"]: process_musicbed(vargs) else: process_soundcloud(vargs) @@ -126,209 +125,206 @@ def main(): def process_soundcloud(vargs): """ - Main SoundCloud path. - """ + Process SoundCloud download. - artist_url = vargs['artist_url'] - track_permalink = vargs['track'] - keep_previews = vargs['keep'] - folders = vargs['folders'] + Args: + vargs (dict): - id3_extras = {} - one_track = False + """ + url = vargs["artist_url"].lower() + track_permalink = vargs["track"].lower() + num_tracks = vargs["num_tracks"] + use_folders = vargs["folders"] + custom_path = vargs["path"] + downloadable_links_only = vargs["downloadable"] + + album = None + artist = None + filenames = [] likes = False - client = get_client() - if 'soundcloud' not in artist_url.lower(): - if vargs['group']: - artist_url = 'https://soundcloud.com/groups/' + artist_url.lower() + client = soundcloud.Client(client_id=CLIENT_ID) + + if "soundcloud" not in url.lower(): + if vargs["group"]: + url = f"https://soundcloud.com/groups/{url}" elif len(track_permalink) > 0: - one_track = True - track_url = 'https://soundcloud.com/' + artist_url.lower() + '/' + track_permalink.lower() + url = f"https://soundcloud.com/{url}/{track_permalink}" else: - artist_url = 'https://soundcloud.com/' + artist_url.lower() - if vargs['likes'] or 'likes' in artist_url.lower(): + url = f"https://soundcloud.com/{url}" + if vargs["likes"] or "likes" in url.lower(): likes = True - if 'likes' in artist_url.lower(): - artist_url = artist_url[0:artist_url.find('/likes')] + if "likes" in url.lower(): + url = url[0 : url.find("/likes")] likes = True - if one_track: - num_tracks = 1 - else: - num_tracks = vargs['num_tracks'] - try: - if one_track: - resolved = client.get('/resolve', url=track_url, limit=200) - - elif likes: - userId = str(client.get('/resolve', url=artist_url).id) + resolved = client.get("/resolve", url=url, limit=200) - resolved = client.get('/users/' + userId + '/favorites', limit=200, linked_partitioning=1) - next_href = False - if(hasattr(resolved, 'next_href')): + if likes: + resolved = client.get(f"/users/{resolved.id}/favorites", limit=200, linked_partitioning=1) + next_href = None + if hasattr(resolved, "next_href"): next_href = resolved.next_href - while (next_href): - - resolved2 = requests.get(next_href).json() - if('next_href' in resolved2): - next_href = resolved2['next_href'] - else: - next_href = False - resolved2 = soundcloud.resource.ResourceList(resolved2['collection']) - resolved.collection.extend(resolved2) + while next_href is not None: + next_resolved = requests.get(next_href).json() + next_href = None + if "next_href" in next_resolved: + next_href = next_resolved["next_href"] + next_resolved = soundcloud.resource.ResourceList(next_resolved["collection"]) + resolved.collection.extend(next_resolved) resolved = resolved.collection + except Exception as e: + filename = force_download_track_from_soundcloud(str(e), use_folders, custom_path) + if filename is not None: + filenames.append(filename) + if vargs["open"]: + open_files(filenames) + return + if not hasattr(resolved, "kind"): + # This is either likes or sets. + tracks = resolved + elif resolved.kind == "playlist": + album = resolved.title + if len(resolved.tracks) > 0: + tracks = resolved.tracks else: - resolved = client.get('/resolve', url=artist_url, limit=200) - - except Exception as e: # HTTPError? + tracks = get_soundcloud_playlist_data(resolved.id)["tracks"] + elif resolved.kind == "track": + tracks = [resolved] + elif resolved.kind == "group": + tracks = client.get(f"/groups/{resolved.id}/tracks", limit=200) + else: + tracks = client.get(f"/users/{resolved.id}/tracks", limit=200) + + if hasattr(resolved, "full_name"): + artist = resolved.full_name + if artist == "": + artist = resolved.username + + if len(tracks) == 0 and resolved.track_count > 0: + # SoundCloud has a unfortunate bug where some artists don't have any tracks returned using: + # client.get("/users/" + artist_id + "/tracks", limit=200) + # There are a number of reports of this issue since late 2019 on StackOverflow. + # ( reference: https://stackoverflow.com/questions/59204383, https://stackoverflow.com/questions/61807979) + # It seems that the common pattern is that if an artist has any tracks marked as private, + # e.g. the track is not downloadable (only streamable), then SoundCloud won't return any + # tracks for that artist. The way around this is to refer to the artist's RSS feed. This + # is in the form of: + # http://feeds.soundcloud.com/users/soundcloud:users:/sounds.rss + + filenames = download_tracks_from_soundcloud_feed(resolved.id, artist, num_tracks, use_folders, custom_path) + else: + filenames = download_tracks_from_soundcloud( + client, tracks, artist, album, num_tracks, downloadable_links_only, use_folders, custom_path, + ) - # SoundScrape is trying to prevent us from downloading this. - # We're going to have to stop trusting the API/client and - # do all our own scraping. Boo. + if vargs["open"]: + open_files(filenames) - if '404 Client Error' in str(e): - puts(colored.red("Problem downloading [404]: ") + colored.white("Item Not Found")) - return None + return - message = str(e) - item_id = message.rsplit('/', 1)[-1].split('.json')[0].split('?client_id')[0] - hard_track_url = get_hard_track_url(item_id) - track_data = get_soundcloud_data(artist_url) - puts_safe(colored.green("Scraping") + colored.white(": " + track_data['title'])) +def force_download_track_from_soundcloud(message, use_folders=False, custom_path=""): + """ + Try to force download a track from SoundCloud despite a client error. - filenames = [] - filename = sanitize_filename(track_data['artist'] + ' - ' + track_data['title'] + '.mp3') + Args: + message (str): + use_folder (bool): + custom_path (str): - if folders: - name_path = join(vargs['path'], track_data['artist']) - if not exists(name_path): - mkdir(name_path) - filename = join(name_path, filename) - else: - filename = join(vargs['path'], filename) + Returns: + filename of successfully downloaded track or None - if exists(filename): - puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_data['title'])) - return None + """ + if "404" in message.lower(): + puts(colored.red("Problem downloading [404]: ") + colored.white("Item Not Found")) + return None - filename = download_file(hard_track_url, filename) - tagged = tag_file(filename, - artist=track_data['artist'], - title=track_data['title'], - year='2018', - genre='', - album='', - artwork_url='') + item_id = message.rsplit("/", 1)[-1].split(".json")[0].split("?client_id")[0] + url = get_soundcloud_track_url(item_id) + if url is None: + return None + response = requests.get(url) + if response.status_code != 200: + puts(colored.red("Problem downloading: ") + colored.white(url)) + return None - if not tagged: - wav_filename = filename[:-3] + 'wav' - os.rename(filename, wav_filename) - filename = wav_filename + title_tag = response.text.split("")[1].split("</title")[0] - filenames.append(filename) + track_data = { + "url": url, + "artist": title_tag.split(" by ")[1].split("|")[0].strip(), + "title": title_tag.split(" by ")[0].strip(), + } + return download_single_track_from_soundcloud(track_data, use_folders, custom_path) - else: - aggressive = False +def download_tracks_from_soundcloud_feed(track_id, artist, num_tracks=sys.maxsize, use_folders=False, custom_path=""): + """ + Use the artist's RSS feed from SoundCloud to get tracks. - # This is is likely a 'likes' page. - if not hasattr(resolved, 'kind'): - tracks = resolved - else: - if resolved.kind == 'artist': - artist = resolved - artist_id = str(artist.id) - tracks = client.get('/users/' + artist_id + '/tracks', limit=200) - elif resolved.kind == 'playlist': - id3_extras['album'] = resolved.title - if resolved.tracks != []: - tracks = resolved.tracks - else: - tracks = get_soundcloud_api_playlist_data(resolved.id)['tracks'] - tracks = tracks[:num_tracks] - aggressive = True - for track in tracks: - download_track(track, resolved.title, keep_previews, folders, custom_path=vargs['path']) - - elif resolved.kind == 'track': - tracks = [resolved] - elif resolved.kind == 'group': - group = resolved - group_id = str(group.id) - tracks = client.get('/groups/' + group_id + '/tracks', limit=200) - else: - artist = resolved - artist_id = str(artist.id) - tracks = client.get('/users/' + artist_id + '/tracks', limit=200) - if tracks == [] and artist.track_count > 0: - aggressive = True - filenames = [] - - # this might be buggy - data = get_soundcloud_api2_data(artist_id) - - for track in data['collection']: - - if len(filenames) >= num_tracks: - break - - if track['type'] == 'playlist': - track['playlist']['tracks'] = track['playlist']['tracks'][:num_tracks] - for playlist_track in track['playlist']['tracks']: - album_name = track['playlist']['title'] - filename = download_track(playlist_track, album_name, keep_previews, folders, filenames, custom_path=vargs['path']) - if filename: - filenames.append(filename) - else: - d_track = track['track'] - filename = download_track(d_track, custom_path=vargs['path']) - if filename: - filenames.append(filename) - - if not aggressive: - filenames = download_tracks(client, tracks, num_tracks, vargs['downloadable'], vargs['folders'], vargs['path'], - id3_extras=id3_extras) - - if vargs['open']: - open_files(filenames) + Args: + track_id (int): + artist (str): + num_tracks (int): + use_folders (bool): + custom_path (str): + Returns: + filenames downloaded from the RSS feed -def get_client(): - """ - Return a new SoundCloud Client object. """ - client = soundcloud.Client(client_id=CLIENT_ID) - return client + filenames = [] + url = f"http://feeds.soundcloud.com/users/soundcloud:users:{track_id}/sounds.rss" + response = requests.get(url) + if response.status_code != 200: + puts(colored.red("Problem downloading: ") + colored.white(url)) + return None -def download_track(track, album_name=u'', keep_previews=False, folders=False, filenames=[], custom_path=''): - """ - Given a track, force scrape it. - """ + feed = atoma.parse_rss_bytes(response.content) + for i, feed_item in enumerate(feed.items): + if i > num_tracks - 1: + continue - hard_track_url = get_hard_track_url(track['id']) + for enclosure in feed_item.enclosures: + track_data = { + "url": enclosure.url, + "artist": artist, + "title": feed_item.title, + "date": feed_item.pub_date.year, + "artwork_url": feed.image.url, + } + + filename = download_single_track_from_soundcloud(track_data, use_folders, custom_path) + if filename is not None: + filenames.append(filename) + return filenames - # We have no info on this track whatsoever. - if not 'title' in track: - return None - if not keep_previews: - if (track.get('duration', 0) < track.get('full_duration', 0)): - puts_safe(colored.yellow("Skipping preview track") + colored.white(": " + track['title'])) - return None +def download_single_track_from_soundcloud(track_data, use_folders=False, custom_path=""): + """ + Download a single track from SoundCloud. - # May not have a "full name" - name = track['user'].get('full_name', '') - if name == '': - name = track['user']['username'] + Args: + track_data (dict): + use_folders (bool): + custom_path (str): - filename = sanitize_filename(name + ' - ' + track['title'] + '.mp3') + Returns: + filename or None + + """ + required_keys = {"artist", "title", "url"} + if not track_data.keys() >= required_keys: + return None + + filename = sanitize(f"{track_data['artist']} - {track_data['title']}.mp3") - if folders: - name_path = join(custom_path, name) + if use_folders: + name_path = join(custom_path, track_data["artist"]) if not exists(name_path): mkdir(name_path) filename = join(name_path, filename) @@ -336,197 +332,152 @@ def download_track(track, album_name=u'', keep_previews=False, folders=False, fi filename = join(custom_path, filename) if exists(filename): - puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track['title'])) + puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_data["title"])) return None - # Skip already downloaded track. - if filename in filenames: - return None + puts_safe(colored.green("Scraping: ") + colored.white(track_data["title"])) - if hard_track_url: - puts_safe(colored.green("Scraping") + colored.white(": " + track['title'])) - else: - # Region coded? - puts_safe(colored.yellow("Unable to download") + colored.white(": " + track['title'])) - return None + filename = download_file(track_data["url"], filename) - filename = download_file(hard_track_url, filename) - tagged = tag_file(filename, - artist=name, - title=track['title'], - year=track['created_at'][:4], - genre=track['genre'], - album=album_name, - artwork_url=track['artwork_url']) + tagged = tag_file( + filename, + artist=track_data["artist"], + title=track_data["title"], + album=track_data.get("album"), + year=track_data.get("date"), + genre=track_data.get("genre"), + artwork_url=track_data.get("artwork_url"), + ) if not tagged: - wav_filename = filename[:-3] + 'wav' + wav_filename = f"{filename[:-3]}wav" os.rename(filename, wav_filename) filename = wav_filename return filename -def download_tracks(client, tracks, num_tracks=sys.maxsize, downloadable=False, folders=False, custom_path='', id3_extras={}): - """ - Given a list of tracks, iteratively download all of them. - - """ +def download_tracks_from_soundcloud( + client, tracks, artist, album, num_tracks=sys.maxsize, downloadable_links_only=False, use_folders=False, custom_path="", +): + """Given a list of tracks, iteratively download all of them.""" filenames = [] for i, track in enumerate(tracks): + track_id = track.id if hasattr(track, "id") else track.get("id", "") + title = track.title if hasattr(track, "title") else track.get("title", "") - # "Track" and "Resource" objects are actually different, - # even though they're the same. - if isinstance(track, soundcloud.resource.Resource): - - try: + if hasattr(track, "kind") and track.kind == "playlist": + if len(track.tracks) > 0: + playlist_tracks = track.tracks + else: + playlist_tracks = get_soundcloud_playlist_data(track_id) + playlist_tracks = playlist_tracks["tracks"] if playlist_tracks is not None else None + if playlist_tracks is not None: + download_tracks_from_soundcloud( + client, playlist_tracks, artist, album, num_tracks, downloadable_links_only, use_folders, custom_path, + ) + else: + puts_safe(colored.white(title) + colored.red(" is not downloadable.")) + continue - t_track = {} - t_track['downloadable'] = track.downloadable - t_track['streamable'] = track.streamable - t_track['title'] = track.title - t_track['user'] = {'username': track.user['username']} - t_track['release_year'] = track.release - t_track['genre'] = track.genre - t_track['artwork_url'] = track.artwork_url - if track.downloadable: - t_track['stream_url'] = track.download_url - else: - if downloadable: - puts_safe(colored.red("Skipping") + colored.white(": " + track.title)) - continue - if hasattr(track, 'stream_url'): - t_track['stream_url'] = track.stream_url - else: - t_track['direct'] = True - streams_url = "https://api.soundcloud.com/i1/tracks/%s/streams?client_id=%s&app_version=%s" % ( - str(track.id), AGGRESSIVE_CLIENT_ID, APP_VERSION) - response = requests.get(streams_url).json() - t_track['stream_url'] = response['http_mp3_128_url'] - - track = t_track - except Exception as e: - puts_safe(colored.white(track.title) + colored.red(' is not downloadable.')) + track_data = {} + + user = track.user if hasattr(track, "user") else track.get("user") + track_artist = artist + if user is not None: + track_artist = user["full_name"] if user.get("full_name") is not None else user["username"] + + track_data = { + "direct": False, + "artist": artist if artist is not None else track_artist, + "album": album, + "title": title, + "streamable": track.streamable if hasattr(track, "streamable") else track.get("streamable", False), + "date": track.release if hasattr(track, "release") else track.get("release"), + "genre": track.genre if hasattr(track, "genre") else track.get("genre"), + "artwork_url": track.artwork_url if hasattr(track, "artwork_url") else track.get("artwork_url"), + } + if track.downloadable if hasattr(track, "downloadable") else track.get("downloadable", False): + track_data["url"] = track.download_url if hasattr(track, "download_url") else track.get("download_url") + else: + if downloadable_links_only: + puts_safe(colored.red("Skipping: ") + colored.white(title)) continue + if hasattr(track, "stream_url") or isinstance(track, dict): + track_data["url"] = track.stream_url if hasattr(track, "stream_url") else track.get("stream_url") + else: + track_data["url"] = get_soundcloud_track_url(track_id) + track_data["direct"] = True + + if track_data["url"] is None: + puts_safe(colored.white(title) + colored.red(" is not downloadable.")) + continue if i > num_tracks - 1: continue try: - if not track.get('stream_url', False): - puts_safe(colored.white(track['title']) + colored.red(' is not downloadable.')) - continue - else: - track_artist = sanitize_filename(track['user']['username']) - track_title = sanitize_filename(track['title']) - track_filename = track_artist + ' - ' + track_title + '.mp3' - - if folders: - track_artist_path = join(custom_path, track_artist) - if not exists(track_artist_path): - mkdir(track_artist_path) - track_filename = join(track_artist_path, track_filename) + if not track_data["direct"]: + stream = client.get(track_data["url"], allow_redirects=False, limit=200) + if hasattr(stream, "location"): + track_data["url"] = stream.location else: - track_filename = join(custom_path, track_filename) - - if exists(track_filename): - puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_title)) - continue - - puts_safe(colored.green("Downloading") + colored.white(": " + track['title'])) - - - if track.get('direct', False): - location = track['stream_url'] - else: - stream = client.get(track['stream_url'], allow_redirects=False, limit=200) - if hasattr(stream, 'location'): - location = stream.location - else: - location = stream.url - - filename = download_file(location, track_filename) - tagged = tag_file(filename, - artist=track['user']['username'], - title=track['title'], - year=track['release_year'], - genre=track['genre'], - album=id3_extras.get('album', None), - artwork_url=track['artwork_url']) - - if not tagged: - wav_filename = filename[:-3] + 'wav' - os.rename(filename, wav_filename) - filename = wav_filename + track_data["url"] = stream.url + filename = download_single_track_from_soundcloud(track_data, use_folders, custom_path) + if filename is not None: filenames.append(filename) except Exception as e: - puts_safe(colored.red("Problem downloading ") + colored.white(track['title'])) - puts_safe(str(e)) + filename = force_download_track_from_soundcloud(str(e), use_folders, custom_path) + if filename is not None: + filenames.append(filename) + else: + puts_safe(colored.red("Problem downloading ") + colored.white(track_data["title"])) return filenames - -def get_soundcloud_data(url): +def get_soundcloud_playlist_data(playlist_id): """ - Scrapes a SoundCloud page for a track's important information. + Get playlist data from SoundCloud. + + Args: + playlist_id (str): id of the playlist Returns: - dict: of audio data + data about the playlist with playlist_id or None """ + url = f"https://api.soundcloud.com/playlists/{playlist_id}?representation=full&client_id=02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea&app_version=1467724310" + response = requests.get(url) + if response.status_code != 200: + puts(colored.red("Problem getting playlist data from: ") + colored.white(url)) + return None - data = {} - - request = requests.get(url) - - title_tag = request.text.split('<title>')[1].split('</title')[0] - data['title'] = title_tag.split(' by ')[0].strip() - data['artist'] = title_tag.split(' by ')[1].split('|')[0].strip() - # XXX Do more.. - - return data + return response.json() -def get_soundcloud_api2_data(artist_id): - """ - Scrape the new API. Returns the parsed JSON response. +def get_soundcloud_track_url(track_id): """ + Get the track url from SoundCloud. - v2_url = "https://api-v2.soundcloud.com/stream/users/%s?limit=500&client_id=%s&app_version=%s" % ( - artist_id, AGGRESSIVE_CLIENT_ID, APP_VERSION) - response = requests.get(v2_url) - parsed = response.json() + Args: + track_id (str): id of the track - return parsed + Returns: + url to the track with track_id or None -def get_soundcloud_api_playlist_data(playlist_id): - """ - Scrape the new API. Returns the parsed JSON response. """ - - url = "https://api.soundcloud.com/playlists/%s?representation=full&client_id=02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea&app_version=1467724310" % ( - playlist_id) + url = ( + f"https://api.soundcloud.com/i1/tracks/{track_id}/streams/?client_id={AGGRESSIVE_CLIENT_ID}&app_version={APP_VERSION}" + ) response = requests.get(url) - parsed = response.json() - - return parsed - -def get_hard_track_url(item_id): - """ - Hard-scrapes a track. - """ + if response.status_code != 200: + puts(colored.red("Problem getting track data from: ") + colored.white(url)) + return None - streams_url = "https://api.soundcloud.com/i1/tracks/%s/streams/?client_id=%s&app_version=%s" % ( - item_id, AGGRESSIVE_CLIENT_ID, APP_VERSION) - response = requests.get(streams_url) json_response = response.json() + return json_response["http_mp3_128_url"] - if response.status_code == 200: - hard_track_url = json_response['http_mp3_128_url'] - return hard_track_url - else: - return None #################################################################### # Bandcamp @@ -535,17 +486,21 @@ def get_hard_track_url(item_id): def process_bandcamp(vargs): """ - Main BandCamp path. - """ + Process BandCamp download. - artist_url = vargs['artist_url'] + Args: + vargs (dict): + """ + artist_url = vargs["artist_url"] - if 'bandcamp.com' in artist_url or ('://' in artist_url and vargs['bandcamp']): + if "bandcamp.com" in artist_url or ("://" in artist_url and vargs["bandcamp"]): bc_url = artist_url else: - bc_url = 'https://' + artist_url + '.bandcamp.com/music' + bc_url = f"https://{artist_url}.bandcamp.com/music" - filenames = scrape_bandcamp_url(bc_url, num_tracks=vargs['num_tracks'], folders=vargs['folders'], custom_path=vargs['path']) + filenames = scrape_bandcamp_url( + bc_url, num_tracks=vargs["num_tracks"], use_folders=vargs["folders"], custom_path=vargs["path"], + ) # check if we have lists inside a list, which indicates the # scraping has gone recursive, so we must format the output @@ -558,21 +513,22 @@ def process_bandcamp(vargs): # ( reference: http://stackoverflow.com/a/11264751 ) filenames = [val for sub in filenames for val in sub] - if vargs['open']: + if vargs["open"]: open_files(filenames) return -# Largely borrowed from Ronier's bandcampscrape -def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path=''): +# Largely borrowed from bandcampscrape +# ( reference: https://github.com/ronier/bandcampscrape) +def scrape_bandcamp_url(url, num_tracks=sys.maxsize, use_folders=False, custom_path=""): """ Pull out artist and track info from a Bandcamp URL. Returns: list: filenames to open - """ + """ filenames = [] album_data = get_bandcamp_metadata(url) @@ -580,243 +536,130 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path= # so we call the scrape_bandcamp_url() method for each one if type(album_data) is list: for album_url in album_data: - filenames.append(scrape_bandcamp_url(album_url, num_tracks, folders, custom_path)) + filenames.append(scrape_bandcamp_url(album_url, num_tracks, use_folders, custom_path)) return filenames artist = album_data["artist"] album_name = album_data["album_name"] - if folders: + if use_folders: if album_name: - directory = artist + " - " + album_name + directory = f"{artist} - {album_name}" else: directory = artist - directory = sanitize_filename(directory) + directory = sanitize(directory) directory = join(custom_path, directory) if not exists(directory): mkdir(directory) for i, track in enumerate(album_data["trackinfo"]): - if i > num_tracks - 1: continue - try: track_name = track["title"] if track["track_num"]: track_number = str(track["track_num"]).zfill(2) else: track_number = None - if track_number and folders: - track_filename = '%s - %s.mp3' % (track_number, track_name) + if track_number and use_folders: + track_filename = f"{track_number} - {track_name}.mp3" else: - track_filename = '%s.mp3' % (track_name) - track_filename = sanitize_filename(track_filename) + track_filename = f"{track_name}.mp3" + track_filename = sanitize(track_filename) - if folders: + if use_folders: path = join(directory, track_filename) else: - path = join(custom_path, sanitize_filename(artist) + ' - ' + track_filename) + path = join(custom_path, sanitize(artist) + " - " + track_filename) if exists(path): puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_name)) continue - if not track['file']: - puts_safe(colored.yellow("Track unavailble for scraping: ") + colored.white(track_name)) + if not track["file"]: + puts_safe(colored.yellow("Track unavailable for scraping: ") + colored.white(track_name)) continue puts_safe(colored.green("Downloading") + colored.white(": " + track_name)) - path = download_file(track['file']['mp3-128'], path) + path = download_file(track["file"]["mp3-128"], path) - album_year = album_data['album_release_date'] + album_year = album_data["album_release_date"] if album_year: album_year = datetime.strptime(album_year, "%d %b %Y %H:%M:%S GMT").year - tag_file(path, - artist, - track_name, - album=album_name, - year=album_year, - genre=album_data['genre'], - artwork_url=album_data['artFullsizeUrl'], - track_number=track_number, - url=album_data['url']) + tag_file( + path, + artist, + track_name, + album=album_name, + year=album_year, + genre=album_data["genre"], + artwork_url=album_data["artFullsizeUrl"], + track_number=track_number, + url=album_data["url"], + ) filenames.append(path) - except Exception as e: + except: puts_safe(colored.red("Problem downloading ") + colored.white(track_name)) - print(e) + return filenames def get_bandcamp_metadata(url): """ Read information from the Bandcamp JavaScript object. + The method may return a list of URLs (indicating this is probably a "main" page which links to one or more albums), or a JSON if we can already parse album/track info from the given url. The JSON is "sloppy". The native python JSON parser often can't deal, so we use the more tolerant demjson instead. """ - request = requests.get(url) + response = requests.get(url) try: - sloppy_json = request.text.split("var TralbumData = ") + sloppy_json = response.text.split("var TralbumData = ") sloppy_json = sloppy_json[1].replace('" + "', "") - sloppy_json = sloppy_json.replace("'", "\'") + sloppy_json = sloppy_json.replace("'", "'") sloppy_json = sloppy_json.split("};")[0] + "};" sloppy_json = sloppy_json.replace("};", "}") output = demjson.decode(sloppy_json) # if the JSON parser failed, we should consider it's a "/music" page, # so we generate a list of albums/tracks and return it immediately - except Exception as e: + except: regex_all_albums = r'<a href="(/(?:album|track)/[^>]+)">' - all_albums = re.findall(regex_all_albums, request.text, re.MULTILINE) + all_albums = re.findall(regex_all_albums, response.text, re.MULTILINE) album_url_list = list() for album in all_albums: - album_url = re.sub(r'music/?$', '', url) + album + album_url = re.sub(r"music/?$", "", url) + album album_url_list.append(album_url) return album_url_list # if the JSON parser was successful, use a regex to get all tags # from this album/track, join them and set it as the "genre" regex_tags = r'<a class="tag" href[^>]+>([^<]+)</a>' - tags = re.findall(regex_tags, request.text, re.MULTILINE) + tags = re.findall(regex_tags, response.text, re.MULTILINE) # make sure we treat integers correctly with join() # according to http://stackoverflow.com/a/7323861 # (very unlikely, but better safe than sorry!) - output['genre'] = ' '.join(s for s in tags) + output["genre"] = " ".join(s for s in tags) # make sure we always get the correct album name, even if this is a # track URL (unless this track does not belong to any album, in which # case the album name remains set as None. - output['album_name'] = None + output["album_name"] = None regex_album_name = r'album_title\s*:\s*"([^"]+)"\s*,' - match = re.search(regex_album_name, request.text, re.MULTILINE) + match = re.search(regex_album_name, response.text, re.MULTILINE) if match: - output['album_name'] = match.group(1) + output["album_name"] = match.group(1) try: - artUrl = request.text.split("\"tralbumArt\">")[1].split("\">")[0].split("href=\"")[1] - output['artFullsizeUrl'] = artUrl + artUrl = response.text.split('"tralbumArt">')[1].split('">')[0].split('href="')[1] + output["artFullsizeUrl"] = artUrl except: - puts_safe(colored.red("Couldn't get full artwork") + "") - output['artFullsizeUrl'] = None + puts_safe(colored.red("Couldn't get full artwork.")) + output["artFullsizeUrl"] = None return output -#################################################################### -# Mixcloud -#################################################################### - - -def process_mixcloud(vargs): - """ - Main MixCloud path. - """ - - artist_url = vargs['artist_url'] - - if 'mixcloud.com' in artist_url: - mc_url = artist_url - else: - mc_url = 'https://mixcloud.com/' + artist_url - - filenames = scrape_mixcloud_url(mc_url, num_tracks=vargs['num_tracks'], folders=vargs['folders'], custom_path=vargs['path']) - - if vargs['open']: - open_files(filenames) - - return - - -def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=''): - """ - Returns: - list: filenames to open - - """ - - try: - data = get_mixcloud_data(mc_url) - except Exception as e: - puts_safe(colored.red("Problem downloading ") + mc_url) - print(e) - return [] - - filenames = [] - - track_artist = sanitize_filename(data['artist']) - track_title = sanitize_filename(data['title']) - track_filename = track_artist + ' - ' + track_title + data['mp3_url'][-4:] - - if folders: - track_artist_path = join(custom_path, track_artist) - if not exists(track_artist_path): - mkdir(track_artist_path) - track_filename = join(track_artist_path, track_filename) - if exists(track_filename): - puts_safe(colored.yellow("Skipping") + colored.white(': ' + data['title'] + " - it already exists!")) - return [] - else: - track_filename = join(custom_path, track_filename) - - puts_safe(colored.green("Downloading") + colored.white( - ': ' + data['artist'] + " - " + data['title'] + " (" + track_filename[-4:] + ")")) - download_file(data['mp3_url'], track_filename) - if track_filename[-4:] == '.mp3': - tag_file(track_filename, - artist=data['artist'], - title=data['title'], - year=data['year'], - genre="Mix", - artwork_url=data['artwork_url']) - filenames.append(track_filename) - - return filenames - - -def get_mixcloud_data(url): - """ - Scrapes a Mixcloud page for a track's important information. - - Returns: - dict: containing audio data - - """ - - data = {} - request = requests.get(url) - preview_mp3_url = request.text.split('m-preview="')[1].split('" m-preview-light')[0] - song_uuid = request.text.split('m-preview="')[1].split('" m-preview-light')[0].split('previews/')[1].split('.mp3')[0] - - # Fish for the m4a.. - for server in range(1, 23): - # Ex: https://stream6.mixcloud.com/c/m4a/64/1/2/0/9/30fe-23aa-40da-9bf3-4bee2fba649d.m4a - mp3_url = "https://stream" + str(server) + ".mixcloud.com/c/m4a/64/" + song_uuid + '.m4a' - try: - if requests.head(mp3_url).status_code == 200: - if '?' in mp3_url: - mp3_url = mp3_url.split('?')[0] - break - except Exception as e: - continue - - full_title = request.text.split("<title>")[1].split(" | Mixcloud")[0] - title = full_title.split(' by ')[0].strip() - artist = full_title.split(' by ')[1].strip() - - img_thumbnail_url = request.text.split('m-thumbnail-url="')[1].split(" ng-class")[0] - artwork_url = img_thumbnail_url.replace('60/', '300/').replace('60/', '300/').replace('//', 'https://').replace('"', - '') - - data['mp3_url'] = mp3_url - data['title'] = title - data['artist'] = artist - data['artwork_url'] = artwork_url - data['year'] = None - - return data - - #################################################################### # Audiomack #################################################################### @@ -824,62 +667,65 @@ def get_mixcloud_data(url): def process_audiomack(vargs): """ - Main Audiomack path. - """ + Process Audiomack download. - artist_url = vargs['artist_url'] + Args: + vargs (dict): + """ + artist_url = vargs["artist_url"] - if 'audiomack.com' in artist_url: + if "audiomack.com" in artist_url: mc_url = artist_url else: - mc_url = 'https://audiomack.com/' + artist_url + mc_url = f"https://audiomack.com/{artist_url}" - filenames = scrape_audiomack_url(mc_url, num_tracks=vargs['num_tracks'], folders=vargs['folders'], custom_path=vargs['path']) + filenames = scrape_audiomack_url( + mc_url, num_tracks=vargs["num_tracks"], use_folders=vargs["folders"], custom_path=vargs["path"], + ) - if vargs['open']: + if vargs["open"]: open_files(filenames) return -def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=''): +def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, use_folders=False, custom_path=""): """ + Pull out artist and track info from a Audiomack URL. + Returns: list: filenames to open """ + filenames = [] try: data = get_audiomack_data(mc_url) - except Exception as e: + except: puts_safe(colored.red("Problem downloading ") + mc_url) - print(e) - - filenames = [] + return filenames - track_artist = sanitize_filename(data['artist']) - track_title = sanitize_filename(data['title']) - track_filename = track_artist + ' - ' + track_title + '.mp3' + track_artist = sanitize(data["artist"]) + track_title = sanitize(data["title"]) + track_filename = f"{track_artist} - {track_title}.mp3" - if folders: + if use_folders: track_artist_path = join(custom_path, track_artist) if not exists(track_artist_path): mkdir(track_artist_path) track_filename = join(track_artist_path, track_filename) - if exists(track_filename): - puts_safe(colored.yellow("Skipping") + colored.white(': ' + data['title'] + " - it already exists!")) - return [] else: track_filename = join(custom_path, track_filename) - puts_safe(colored.green("Downloading") + colored.white(': ' + data['artist'] + " - " + data['title'])) - download_file(data['mp3_url'], track_filename) - tag_file(track_filename, - artist=data['artist'], - title=data['title'], - year=data['year'], - genre=None, - artwork_url=data['artwork_url']) + if exists(track_filename): + puts_safe(colored.yellow("Skipping: ") + colored.white(data["title"]) + colored.yellow(" - already exists!")) + return filenames + + puts_safe(colored.green("Downloading: ") + colored.white(f"{data['artist']} - {data['title']}")) + download_file(data["mp3_url"], track_filename) + tag_file( + track_filename, artist=data["artist"], title=data["title"], year=data["year"], artwork_url=data["artwork_url"], + ) filenames.append(track_filename) return filenames @@ -887,123 +733,25 @@ def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_p def get_audiomack_data(url): """ - Scrapes a Mixcloud page for a track's important information. + Scrapes a Auidomack page for a track's important information. Returns: dict: containing audio data """ + response = requests.get(url) - data = {} - request = requests.get(url) - - mp3_url = request.text.split('class="player-icon download-song" title="Download" href="')[1].split('"')[0] - artist = request.text.split('<span class="artist">')[1].split('</span>')[0].strip() - title = request.text.split('<span class="artist">')[1].split('</span>')[1].split('</h1>')[0].strip() - artwork_url = request.text.split('<a class="lightbox-trigger" href="')[1].split('" data')[0].strip() - - data['mp3_url'] = mp3_url - data['title'] = title - data['artist'] = artist - data['artwork_url'] = artwork_url - data['year'] = None - - return data - - -#################################################################### -# Hive.co -#################################################################### - - -def process_hive(vargs): - """ - Main Hive.co path. - """ - - artist_url = vargs['artist_url'] - - if 'hive.co' in artist_url: - mc_url = artist_url - else: - mc_url = 'https://www.hive.co/downloads/download/' + artist_url - - filenames = scrape_hive_url(mc_url, num_tracks=vargs['num_tracks'], folders=vargs['folders'], custom_path=vargs['path']) - - if vargs['open']: - open_files(filenames) - - return - - -def scrape_hive_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=''): - """ - Scrape a Hive.co download page. - - Returns: - list: filenames to open - - """ - - try: - data = get_hive_data(mc_url) - except Exception as e: - puts_safe(colored.red("Problem downloading ") + mc_url) - print(e) - - filenames = [] - - # track_artist = sanitize_filename(data['artist']) - # track_title = sanitize_filename(data['title']) - # track_filename = track_artist + ' - ' + track_title + '.mp3' - - # if folders: - # track_artist_path = join(custom_path, track_artist) - # if not exists(track_artist_path): - # mkdir(track_artist_path) - # track_filename = join(track_artist_path, track_filename) - # if exists(track_filename): - # puts_safe(colored.yellow("Skipping") + colored.white(': ' + data['title'] + " - it already exists!")) - # return [] - - # puts_safe(colored.green("Downloading") + colored.white(': ' + data['artist'] + " - " + data['title'])) - # download_file(data['mp3_url'], track_filename) - # tag_file(track_filename, - # artist=data['artist'], - # title=data['title'], - # year=data['year'], - # genre=None, - # artwork_url=data['artwork_url']) - # filenames.append(track_filename) - - return filenames - - -def get_hive_data(url): - """ - - Scrapes a Mixcloud page for a track's important information. - - Returns a dict of data. - - """ - - data = {} - request = requests.get(url) - - # import pdb - # pdb.set_trace() - - # mp3_url = request.text.split('class="player-icon download-song" title="Download" href="')[1].split('"')[0] - # artist = request.text.split('<span class="artist">')[1].split('</span>')[0].strip() - # title = request.text.split('<span class="artist">')[1].split('</span>')[1].split('</h1>')[0].strip() - # artwork_url = request.text.split('<a class="lightbox-trigger" href="')[1].split('" data')[0].strip() + mp3_url = response.text.split('class="player-icon download-song" title="Download" href="')[1].split('"')[0] + artist = response.text.split('<span class="artist">')[1].split("</span>")[0].strip() + title = response.text.split('<span class="artist">')[1].split("</span>")[1].split("</h1>")[0].strip() + artwork_url = response.text.split('<a class="lightbox-trigger" href="')[1].split('" data')[0].strip() - # data['mp3_url'] = mp3_url - # data['title'] = title - # data['artist'] = artist - # data['artwork_url'] = artwork_url - # data['year'] = None + data = { + "mp3_url": mp3_url, + "title": title, + "artist": artist, + "artwork_url": artwork_url, + } return data @@ -1015,34 +763,45 @@ def get_hive_data(url): def process_musicbed(vargs): """ - Main MusicBed path. - """ + Process MusicBed download. - # let's validate given MusicBed url + Args: + vargs (dict): + """ validated = False - if vargs['artist_url'].startswith( 'https://www.musicbed.com/' ): - splitted = vargs['artist_url'][len('https://www.musicbed.com/'):].split( '/' ) - if len( splitted ) == 3: - if ( splitted[0] == 'artists' or splitted[0] == 'albums' or splitted[0] == 'songs' ) and splitted[2].isdigit(): + if vargs["artist_url"].startswith("https://www.musicbed.com/"): + splitted = vargs["artist_url"][len("https://www.musicbed.com/") :].split("/") + if len(splitted) == 3: + if (splitted[0] == "artists" or splitted[0] == "albums" or splitted[0] == "songs") and splitted[2].isdigit(): validated = True if not validated: - puts( colored.red( 'process_musicbed: you provided incorrect MusicBed url. Aborting.' ) ) - puts( colored.white( 'Please make sure that url is either artist-url, album-url or song-url.' ) ) - puts( colored.white( 'Example of correct artist-url: https://www.musicbed.com/artists/lights-motion/5188' ) ) - puts( colored.white( 'Example of correct album-url: https://www.musicbed.com/albums/be-still/2828' ) ) - puts( colored.white( 'Example of correct song-url: https://www.musicbed.com/songs/be-still/24540' ) ) + puts(colored.red("process_musicbed: you provided incorrect MusicBed url. Aborting.")) + puts(colored.white("Please make sure that url is either artist-url, album-url or song-url.")) + puts(colored.white("Example of correct artist-url: https://www.musicbed.com/artists/lights-motion/5188")) + puts(colored.white("Example of correct album-url: https://www.musicbed.com/albums/be-still/2828")) + puts(colored.white("Example of correct song-url: https://www.musicbed.com/songs/be-still/24540")) return - filenames = scrape_musicbed_url(vargs['artist_url'], vargs['login'], vargs['password'], num_tracks=vargs['num_tracks'], folders=vargs['folders'], custom_path=vargs['path']) + filenames = scrape_musicbed_url( + vargs["artist_url"], + vargs["login"], + vargs["password"], + num_tracks=vargs["num_tracks"], + use_folders=vargs["folders"], + custom_path=vargs["path"], + ) - if vargs['open']: + if vargs["open"]: open_files(filenames) + return + -def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=False, custom_path=''): +def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, use_folders=False, custom_path=""): """ - Scrapes provided MusicBed url. + Scrapes provided MusicBed URL. + Uses requests' Session object in order to store cookies. Requires login and password information. If provided url is of pattern 'https://www.musicbed.com/artists/<string>/<number>' - a number of albums will be downloaded. @@ -1052,13 +811,13 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa Returns: list: filenames to open - """ + """ session = requests.Session() - response = session.get( url ) + response = session.get(url) if response.status_code != 200: - puts( colored.red( 'scrape_musicbed_url: couldn\'t open provided url. Status code: ' + str( response.status_code ) + '. Aborting.' ) ) + puts(colored.red(f"scrape_musicbed_url: couldn't open provided url. Status code: {response.status_code}. Aborting.")) session.close() return [] @@ -1067,101 +826,108 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa # '/artists/' - search for and download many albums # '/albums/' - means we're downloading 1 album # '/songs/' - means 1 album as well, but we're forcing num_tracks=1 in order to download only first relevant track - if url.startswith( 'https://www.musicbed.com/artists/' ): + if url.startswith("https://www.musicbed.com/artists/"): # a hackjob code to get a list of available albums main_index = 0 - while response.text.find( 'https://www.musicbed.com/albums/', main_index ) != -1: - start_index = response.text.find( 'https://www.musicbed.com/albums/', main_index ) - end_index = response.text.find( '">', start_index ) - albums.append( response.text[start_index:end_index] ) + while response.text.find("https://www.musicbed.com/albums/", main_index) != -1: + start_index = response.text.find("https://www.musicbed.com/albums/", main_index) + end_index = response.text.find('">', start_index) + albums.append(response.text[start_index:end_index]) main_index = end_index - elif url.startswith( 'https://www.musicbed.com/songs/' ): - albums.append( url ) + elif url.startswith("https://www.musicbed.com/songs/"): + albums.append(url) num_tracks = 1 - else: # url.startswith( 'https://www.musicbed.com/albums/' ) - albums.append( url ) + else: # url.startswith( 'https://www.musicbed.com/albums/' ) + albums.append(url) # let's get our token and try to login (csrf_token seems to be present on every page) - token = response.text.split( 'var csrf_token = "' )[1].split( '";' )[0] - details = { '_token': token, 'login': login, 'password': password } - response = session.post( 'https://www.musicbed.com/ajax/login', data=details ) + token = response.text.split('var csrf_token = "')[1].split('";')[0] + details = {"_token": token, "login": login, "password": password} + response = session.post("https://www.musicbed.com/ajax/login", data=details) if response.status_code != 200: - puts( colored.red( 'scrape_musicbed_url: couldn\'t login. Aborting. ' ) + colored.white( 'Couldn\'t access login page.' ) ) + puts(colored.red("scrape_musicbed_url: couldn't login. Aborting. ") + colored.white("Couldn't access login page.")) session.close() return [] - login_response_data = demjson.decode( response.text ) - if not login_response_data['body']['status']: - puts( colored.red( 'scrape_musicbed_url: couldn\'t login. Aborting. ' ) + colored.white( 'Did you provide correct login and password?' ) ) + login_response_data = demjson.decode(response.text) + if not login_response_data["body"]["status"]: + puts(colored.red("Can't login to MusicBed. ") + colored.white("Did you provide correct login and password?")) session.close() return [] # now let's actually scrape collected pages filenames = [] for each_album_url in albums: - response = session.get( each_album_url ) + response = session.get(each_album_url) if response.status_code != 200: - puts_safe( colored.red( 'scrape_musicbed_url: couldn\'t open url: ' + each_album_url + - '. Status code: ' + str( response.status_code ) + '. Skipping.' ) ) + puts_safe( + colored.red( + f"scrape_musicbed_url: couldn't open url: {each_album_url}. Status code: {response.status_code}. Skipping." + ) + ) continue # actually not a JSON, but a JS object, but so far so good - json = response.text.split( 'App.components.SongRows = ' )[1].split( '</script>' )[0] - data = demjson.decode( json ) + json = response.text.split("App.components.SongRows = ")[1].split("</script>")[0] + data = demjson.decode(json) song_count = 1 - for each_song in data['loadedSongs']: + for each_song in data["loadedSongs"]: if song_count > num_tracks: break try: - url, params = each_song['playback_url'].split( '?' ) + url, params = each_song["playback_url"].split("?") details = dict() - for each_param in params.split( '&' ): - name, value = each_param.split( '=' ) - details.update( { name: value } ) + for each_param in params.split("&"): + name, value = each_param.split("=") + details.update({name: value}) # musicbed warns about it if it's not fixed - details['X-Amz-Credential'] = details['X-Amz-Credential'].replace( '%2F', '/' ) + details["X-Amz-Credential"] = details["X-Amz-Credential"].replace("%2F", "/") directory = custom_path - if folders: - sanitized_artist = sanitize_filename( each_song['album']['data']['artist']['data']['name'] ) - sanitized_album = sanitize_filename( each_song['album']['data']['name'] ) - directory = join( directory, sanitized_artist + ' - ' + sanitized_album ) - if not exists( directory ): - mkdir( directory ) - filename = join( directory, str( song_count ) + ' - ' + sanitize_filename( each_song['name'] ) + '.mp3' ) - - if exists( filename ): - puts_safe( colored.yellow( 'Skipping' ) + colored.white( ': ' + each_song['name'] + ' - it already exists!' ) ) + if use_folders: + sanitized_artist = sanitize(each_song["album"]["data"]["artist"]["data"]["name"]) + sanitized_album = sanitize(each_song["album"]["data"]["name"]) + directory = join(directory, sanitized_artist + " - " + sanitized_album) + if not exists(directory): + mkdir(directory) + filename = join(directory, str(song_count) + " - " + sanitize(each_song["name"]) + ".mp3",) + + if exists(filename): + puts_safe( + colored.yellow("Skipping: ") + colored.white(each_song["name"]) + colored.yellow(" - already exists!") + ) song_count += 1 continue - puts_safe( colored.green( 'Downloading' ) + colored.white( ': ' + each_song['name'] ) ) - path = download_file( url, filename, session=session, params=details ) + puts_safe(colored.green("Downloading: ") + colored.white(each_song["name"])) + path = download_file(url, filename, session=session, params=details) # example of genre_string: # "<a href=\"https://www.musicbed.com/genres/ambient/2\">Ambient</a> <a href=\"https://www.musicbed.com/genres/cinematic/4\">Cinematic</a>" - genres = '' - for each in each_song['genre_string'].split( '</a>' ): - if ( each != "" ): - genres += each.split( '">' )[1] + '/' - genres = genres[:-1] # removing last '/ - - tag_file(path, - each_song['album']['data']['artist']['data']['name'], - each_song['name'], - album=each_song['album']['data']['name'], - year=int( each_song['album']['data']['released_at'].split( '-' )[0] ), - genre=genres, - artwork_url=each_song['album']['data']['imageObject']['data']['paths']['original'], - track_number=str( song_count ), - url=each_song['song_url']) - - filenames.append( path ) + genres = "" + for each in each_song["genre_string"].split("</a>"): + if each != "": + genres += each.split('">')[1] + "/" + genres = genres[:-1] # removing last '/ + + tag_file( + path, + each_song["album"]["data"]["artist"]["data"]["name"], + each_song["name"], + album=each_song["album"]["data"]["name"], + year=int(each_song["album"]["data"]["released_at"].split("-")[0]), + genre=genres, + artwork_url=each_song["album"]["data"]["imageObject"]["data"]["paths"]["original"], + track_number=str(song_count), + url=each_song["song_url"], + ) + + filenames.append(path) song_count += 1 except: - puts_safe( colored.red( 'Problem downloading ' ) + colored.white( each_song['name'] ) + '. Skipping.' ) + puts_safe(colored.red("Problem downloading ") + colored.white(f"{each_song['name']}. Skipping.")) song_count += 1 session.close() @@ -1175,24 +941,21 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa def download_file(url, path, session=None, params=None): - """ - Download an individual file. - """ - - if url[0:2] == '//': - url = 'https://' + url[2:] + """Download an individual file.""" + if url[0:2] == "//": + url = f"https://{url[2:]}" # Use a temporary file so that we don't import incomplete files. - tmp_path = path + '.tmp' + tmp_path = f"{path}.tmp" - if session and params: - r = session.get( url, params=params, stream=True ) - elif session and not params: - r = session.get( url, stream=True ) + if session is not None and params is not None: + r = session.get(url, params=params, stream=True) + elif session is not None and params is None: + r = session.get(url, stream=True) else: r = requests.get(url, stream=True) - with open(tmp_path, 'wb') as f: - total_length = int(r.headers.get('content-length', 0)) + with open(tmp_path, "wb") as f: + total_length = int(r.headers.get("content-length", 0)) for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_length / 1024) + 1): if chunk: # filter out keep-alive new chunks f.write(chunk) @@ -1203,7 +966,9 @@ def download_file(url, path, session=None, params=None): return path -def tag_file(filename, artist, title, year=None, genre=None, artwork_url=None, album=None, track_number=None, url=None): +def tag_file( + filename, artist, title, year=None, genre=None, artwork_url=None, album=None, track_number=None, url=None, +): """ Attempt to put ID3 tags on a file. @@ -1217,104 +982,101 @@ def tag_file(filename, artist, title, year=None, genre=None, artwork_url=None, a track_number (str): filename (str): url (str): + """ + saved_correctly = True try: audio = EasyMP3(filename) - audio.tags = None - audio["artist"] = artist - audio["title"] = title - if year: - audio["date"] = str(year) - if album: - audio["album"] = album - if track_number: - audio["tracknumber"] = track_number - if genre: - audio["genre"] = genre - if url: # saves the tag as WOAR - audio["website"] = url + except HeaderNotFoundError: + puts(colored.red("Problem opening file: ") + colored.white("Is this file a WAV?")) + return False + + audio.tags = None + audio["artist"] = artist + audio["title"] = title + if year is not None: + # TODO: move year to date. + audio["date"] = str(year) + if album is not None: + audio["album"] = album + if track_number is not None: + audio["tracknumber"] = track_number + if genre is not None: + audio["genre"] = genre + if url is not None: + # saves the tag as WOAR + audio["website"] = url + + try: audio.save() + except MutagenError: + puts(colored.red("Problem tagging file: ") + colored.white("Is this file a WAV?")) + saved_correctly = False - if artwork_url: + try: + audio = MP3(filename, ID3=OldID3) + except HeaderNotFoundError: + puts(colored.red("Problem opening file: ") + colored.white("Is this file a WAV?")) + return False - artwork_url = artwork_url.replace('https', 'http') + if artwork_url is not None: + mime = "image/jpeg" + artwork_url = artwork_url.replace("https", "http") + urls = [artwork_url] + if "-large" in artwork_url: + urls.insert(0, artwork_url.replace("-large", "-t500x500")) - mime = 'image/jpeg' - if '.jpg' in artwork_url: - mime = 'image/jpeg' - if '.png' in artwork_url: - mime = 'image/png' + for a_url in urls: + response = requests.get(a_url) + if response.status_code != 200: + continue - if '-large' in artwork_url: - new_artwork_url = artwork_url.replace('-large', '-t500x500') - try: - image_data = requests.get(new_artwork_url).content - except Exception as e: - # No very large image available. - image_data = requests.get(artwork_url).content - else: - image_data = requests.get(artwork_url).content - - audio = MP3(filename, ID3=OldID3) - audio.tags.add( - APIC( - encoding=3, # 3 is for utf-8 - mime=mime, - type=3, # 3 is for the cover image - desc='Cover', - data=image_data - ) - ) - audio.save() + if ".png" in a_url: + mime = "image/png" - # because there is software that doesn't seem to use WOAR we save url tag again as WXXX - if url: - audio = MP3(filename, ID3=OldID3) - audio.tags.add( WXXX( encoding=3, url=url ) ) - audio.save() + # encoding=3 means utf-8 + # type=3 means cover image + audio.tags.add(APIC(encoding=3, mime=mime, type=3, desc="Cover", data=response.content)) + break - return True + if url is not None: + # Some software doesn't seem to use WOAR so the url is saved again as WXXX. + audio.tags.add(WXXX(encoding=3, url=url)) - except Exception as e: + try: + audio.save() + except MutagenError: puts(colored.red("Problem tagging file: ") + colored.white("Is this file a WAV?")) - return False + saved_correctly = False + + return saved_correctly + def open_files(filenames): """ Call the system 'open' command on a file. + + Args: + filenames (list): """ - command = ['open'] + filenames + if len(filenames) == 0: + return + + command = ["open"] + filenames process = Popen(command, stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() -def sanitize_filename(filename): +def puts_safe(text): """ - Make sure filenames are valid paths. + Safely write to the screen. - Returns: - str: + Args: + text (str): """ - sanitized_filename = re.sub(r'[/\\:*?"<>|]', '-', filename) - sanitized_filename = sanitized_filename.replace('&', 'and') - sanitized_filename = sanitized_filename.replace('"', '') - sanitized_filename = sanitized_filename.replace("'", '') - sanitized_filename = sanitized_filename.replace("/", '') - sanitized_filename = sanitized_filename.replace("\\", '') - - # Annoying. - if sanitized_filename[0] == '.': - sanitized_filename = u'dot' + sanitized_filename[1:] - - return sanitized_filename - -def puts_safe(text): if sys.platform == "win32": - if sys.version_info < (3,0,0): - puts(text) - else: - puts(text.encode(sys.stdout.encoding, errors='replace').decode()) + puts(text.encode(sys.stdout.encoding, errors="replace").decode()) else: puts(text) @@ -1323,7 +1085,7 @@ def puts_safe(text): # Main #################################################################### -if __name__ == '__main__': +if __name__ == "__main__": try: sys.exit(main()) except Exception as e: diff --git a/test.sh b/test.sh deleted file mode 100755 index 1edbe24..0000000 --- a/test.sh +++ /dev/null @@ -1,2 +0,0 @@ -#! /bin/bash -nosetests diff --git a/tests/test.py b/tests/test.py index 626bf4b..2a93a76 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,169 +1,199 @@ +"""Tests for SoundScrape.""" import glob import os -import re -import string -import sys -import unittest - -import nose -from nose import case -from nose.pyversion import unbound_method -from nose import util - -from soundscrape.soundscrape import get_client -from soundscrape.soundscrape import process_soundcloud -from soundscrape.soundscrape import process_bandcamp -from soundscrape.soundscrape import process_mixcloud -from soundscrape.soundscrape import process_audiomack -from soundscrape.soundscrape import process_musicbed - -class TestSoundscrape(unittest.TestCase): - - ## - # Basic Tests - ## - - def test_test(self): - self.assertTrue(True) - - def test_get_client(self): - client = get_client() - self.assertTrue(bool(client)) - - def test_soundcloud(self): - for f in glob.glob('*.mp3'): - os.unlink(f) - - mp3_count = len(glob.glob1('', "*.mp3")) - vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://soundcloud.com/fzpz/revised', 'keep': True} - process_soundcloud(vargs) - new_mp3_count = len(glob.glob1('', "*.mp3")) - self.assertTrue(new_mp3_count > mp3_count) - - for f in glob.glob('*.mp3'): - os.unlink(f) - - def test_soundcloud_hard(self): - for f in glob.glob('*.mp3'): - os.unlink(f) - - mp3_count = len(glob.glob1('', "*.mp3")) - vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 1, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'puptheband', 'keep': False} - process_soundcloud(vargs) - new_mp3_count = len(glob.glob1('', "*.mp3")) - self.assertTrue(new_mp3_count > mp3_count) - self.assertTrue(new_mp3_count == 1) # This used to be 3, but is now 'Not available in United States.' - - for f in glob.glob('*.mp3'): - os.unlink(f) - - def test_soundcloud_hard_2(self): - for f in glob.glob('*.mp3'): - os.unlink(f) - - mp3_count = len(glob.glob1('', "*.mp3")) - vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 1, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://soundcloud.com/lostdogz/snuggles-chapstick', 'keep': False} - process_soundcloud(vargs) - new_mp3_count = len(glob.glob1('', "*.mp3")) - self.assertTrue(new_mp3_count > mp3_count) - self.assertTrue(new_mp3_count == 1) # This used to be 3, but is now 'Not available in United States.' - - for f in glob.glob('*.mp3'): - os.unlink(f) - - # The test URL for this is no longer a WAV. Need a new testcase. - # - # def test_soundcloud_wav(self): - # for f in glob.glob('*.wav'): - # os.unlink(f) - - # wav_count = len(glob.glob1('', "*.wav")) - # vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 1, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://soundcloud.com/coastal/major-lazer-aerosol-can-coastal-flip', 'keep': False} - # process_soundcloud(vargs) - # new_wav_count = len(glob.glob1('', "*.wav")) - # self.assertTrue(new_wav_count > wav_count) - # self.assertTrue(new_wav_count == 1) - - # for f in glob.glob('*.wav'): - # os.unlink(f) - - def test_bandcamp(self): - for f in glob.glob('*.mp3'): - os.unlink(f) - - mp3_count = len(glob.glob1('', "*.mp3")) - vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://atenrays.bandcamp.com/track/who-u-think'} - process_bandcamp(vargs) - new_mp3_count = len(glob.glob1('', "*.mp3")) - self.assertTrue(new_mp3_count > mp3_count) - - for f in glob.glob('*.mp3'): - os.unlink(f) - - def test_bandcamp_slashes(self): - for f in glob.glob('*.mp3'): - os.unlink(f) - - mp3_count = len(glob.glob1('', "*.mp3")) - vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://defill.bandcamp.com/track/amnesia-chamber-harvest-skit'} - process_bandcamp(vargs) - new_mp3_count = len(glob.glob1('', "*.mp3")) - self.assertTrue(new_mp3_count > mp3_count) - - for f in glob.glob('*.mp3'): - os.unlink(f) - - # def test_musicbed(self): - # for f in glob.glob('*.mp3'): - # os.unlink(f) - - # mp3_count = len(glob.glob1('', "*.mp3")) - # vargs = {'login':'musicbedtest@gmail.com', 'password':'oo6alY9T', 'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://www.musicbed.com/albums/be-still/2828'} - # process_musicbed(vargs) - # new_mp3_count = len(glob.glob1('', "*.mp3")) - # self.assertTrue(new_mp3_count > mp3_count) - - # for f in glob.glob('*.mp3'): - # os.unlink(f) - - def test_mixcloud(self): - """ - MixCloud is being blocked from Travis, interestingly. - """ - - for f in glob.glob('*.mp3'): - os.unlink(f) - - for f in glob.glob('*.m4a'): - os.unlink(f) - - # shortest mix I could find that was still semi tolerable - #mp3_count = len(glob.glob1('', "*.mp3")) - #m4a_count = len(glob.glob1('', "*.m4a")) - #vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://www.mixcloud.com/Bobby_T_FS15/coffee-cigarettes-saturday-morning-hip-hop-fix/'} - #process_mixcloud(vargs) - #new_mp3_count = len(glob.glob1('', "*.mp3")) - #new_m4a_count = len(glob.glob1('', "*.m4a")) - #self.assertTrue((new_mp3_count > mp3_count) or (new_m4a_count > m4a_count)) - - for f in glob.glob('*.mp3'): - os.unlink(f) - - for f in glob.glob('*.m4a'): - os.unlink(f) - - # def test_audiomack(self): - # for f in glob.glob('*.mp3'): - # os.unlink(f) - - # mp3_count = len(glob.glob1('', "*.mp3")) - # vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'audiomack': True, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://www.audiomack.com/song/bottomfeedermusic/power'} - # process_audiomack(vargs) - # new_mp3_count = len(glob.glob1('', "*.mp3")) - # self.assertTrue(new_mp3_count > mp3_count) - - # for f in glob.glob('*.mp3'): - # os.unlink(f) - -if __name__ == '__main__': - unittest.main() + +from soundscrape.soundscrape import process_bandcamp, process_soundcloud + + +def cleanup_files(): + """Cleanup files from tests.""" + for f in glob.glob("*.mp3"): + os.unlink(f) + for f in glob.glob("*.m4a"): + os.unlink(f) + + +def test_soundcloud_full_url(): + """Basic SoundCloud test with full url.""" + cleanup_files() + + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 9223372036854775807, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://soundcloud.com/fzpz/revised", + "keep": True, + } + process_soundcloud(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count > mp3_count + cleanup_files() + + +def test_soundcloud_full_url_2(): + """Basic SoundCloud test with full url.""" + cleanup_files() + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 1, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://soundcloud.com/lostdogz/snuggles-chapstick", + "keep": False, + } + process_soundcloud(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count > mp3_count + assert new_mp3_count == 1 + cleanup_files() + + +def test_soundcloud_artist_only(): + """Basic SoundCloud test with artist name only.""" + cleanup_files() + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 1, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "hd1080pmusic", + "keep": False, + } + process_soundcloud(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count > mp3_count + assert new_mp3_count == 1 + cleanup_files() + + +def test_soundcloud_tracks(): + """Basic SoundCloud test tracks.""" + cleanup_files() + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 1, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://soundcloud.com/alan-seslowsky/tracks", + "keep": False, + } + process_soundcloud(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count > mp3_count + assert new_mp3_count == 1 + cleanup_files() + + +def test_soundcloud_track_not_found(): + """Basic SoundCloud test with a track that can't be found.""" + cleanup_files() + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "danny-brown-dip", + "num_tracks": 1, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "foolsgoldrecs", + "keep": False, + } + process_soundcloud(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count == 0 + cleanup_files() + + +def test_soundcloud_feed_download(): + """Basic SoundCloud feed test.""" + cleanup_files() + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 1, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://soundcloud.com/johnocallaghan", + "keep": False, + } + process_soundcloud(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count > mp3_count + assert new_mp3_count == 1 + cleanup_files() + + +def test_bandcamp(): + """Basic BandCamp test.""" + cleanup_files() + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 9223372036854775807, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://atenrays.bandcamp.com/track/who-u-think", + } + process_bandcamp(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count > mp3_count + cleanup_files() + + +def test_bandcamp_slashes(): + """Basic BandCamp test.""" + cleanup_files() + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 9223372036854775807, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://defill.bandcamp.com/track/amnesia-chamber-harvest-skit", + } + process_bandcamp(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count > mp3_count + cleanup_files()