From 858910e32037f42348cdea619e371c5480fa9a41 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk Date: Wed, 13 May 2020 16:11:08 -0700 Subject: [PATCH 01/69] Moving to GitHub Actions. --- .github/workflows/main.yml | 34 ++++++++++++++++++++++++++++++++++ .travis.yml | 12 ------------ 2 files changed, 34 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/main.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..cc72b44 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,34 @@ +name: SoundScape CI + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.5, 3.6, 3.7, 3.8] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index eebb013..0000000 --- a/.travis.yml +++ /dev/null @@ -1,12 +0,0 @@ -language: python -python: - - "2.7" - - "3.3" - - "3.4" - - "3.5" -# command to install dependencies -install: -# - "pip install -r requirements.txt" - - "pip install setuptools --upgrade; python setup.py install" -# command to run tests -script: nosetests From 966bbadb4ff00c5b39aa226d40802d12a8f559b0 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk Date: Wed, 13 May 2020 16:12:25 -0700 Subject: [PATCH 02/69] Fixing typo. --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index cc72b44..4f7127e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,4 +1,4 @@ -name: SoundScape CI +name: SoundScrape CI on: push: From 77b7c8ad444983bfe2de5aab33c6560dc9ff593e Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk Date: Wed, 13 May 2020 20:53:24 -0700 Subject: [PATCH 03/69] Adding Black formatter. --- .github/workflows/main.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4f7127e..ccbb134 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -8,27 +8,27 @@ on: jobs: build: - runs-on: ubuntu-latest strategy: matrix: - python-version: [3.5, 3.6, 3.7, 3.8] - + python-version: ["3{0}5", "3{0}6", "3{0}7", "3{0}8"] steps: - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} + - name: Set up Python ${{ format(matrix.python-version, ".") }} uses: actions/setup-python@v2 with: - python-version: ${{ matrix.python-version }} + python-version: ${{ format(matrix.python-version, ".") }} - name: Install dependencies run: | python -m pip install --upgrade pip - pip install flake8 + pip install flake8 black if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Black code formatting + run: | + black --check --line-length 127 --target-version py${{ format(matrix.python-version, "") }} . - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - From b4c7c4e75ce8e36ff2c7772572ec3b42d392c1b4 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk Date: Wed, 13 May 2020 20:55:04 -0700 Subject: [PATCH 04/69] Moving to single quotes. --- .github/workflows/main.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ccbb134..1ca73c3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,13 +11,13 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3{0}5", "3{0}6", "3{0}7", "3{0}8"] + python-version: ['3{0}5', '3{0}6', '3{0}7', '3{0}8'] steps: - uses: actions/checkout@v2 - - name: Set up Python ${{ format(matrix.python-version, ".") }} + - name: Set up Python ${{ format(matrix.python-version, '.') }} uses: actions/setup-python@v2 with: - python-version: ${{ format(matrix.python-version, ".") }} + python-version: ${{ format(matrix.python-version, '.') }} - name: Install dependencies run: | python -m pip install --upgrade pip @@ -25,7 +25,7 @@ jobs: if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Black code formatting run: | - black --check --line-length 127 --target-version py${{ format(matrix.python-version, "") }} . + black --check --line-length 127 --target-version py${{ format(matrix.python-version, '') }} . - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names From 89c739540e120da275fecc77255fafa905b5457c Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk Date: Wed, 13 May 2020 20:56:17 -0700 Subject: [PATCH 05/69] Removing Python 3.5. --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1ca73c3..c4accfd 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3{0}5', '3{0}6', '3{0}7', '3{0}8'] + python-version: ['3{0}6', '3{0}7', '3{0}8'] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ format(matrix.python-version, '.') }} From eb524679ed1df21015e96d289ba393a57799739a Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk Date: Wed, 13 May 2020 20:59:10 -0700 Subject: [PATCH 06/69] Reformatted with Black. --- setup.py | 51 ++- soundscrape/__init__.py | 2 +- soundscrape/soundscrape.py | 829 ++++++++++++++++++++----------------- tests/test.py | 176 +++++--- 4 files changed, 593 insertions(+), 465 deletions(-) diff --git a/setup.py b/setup.py index 6c9851a..1c77f2a 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ from setuptools import setup # To support 2/3 installation -setup_version = int(setuptools.__version__.split('.')[0]) +setup_version = int(setuptools.__version__.split(".")[0]) if setup_version < 18: print("Please upgrade your setuptools to install SoundScrape: ") print("pip install -U pip wheel setuptools") @@ -15,44 +15,41 @@ # Set external files try: from pypandoc import convert - README = convert('README.md', 'rst') + + README = convert("README.md", "rst") except ImportError: - README = open(os.path.join(os.path.dirname(__file__), 'README.md')).read() + README = open(os.path.join(os.path.dirname(__file__), "README.md")).read() -with open(os.path.join(os.path.dirname(__file__), 'requirements.txt')) as f: +with open(os.path.join(os.path.dirname(__file__), "requirements.txt")) as f: required = f.read().splitlines() # allow setup.py to be run from any path os.chdir(os.path.normpath(os.path.join(os.path.abspath(__file__), os.pardir))) setup( - name='soundscrape', + name="soundscrape", version=soundscrape.__version__, - packages=['soundscrape'], + packages=["soundscrape"], install_requires=required, - extras_require={ ':python_version < "3.0"': [ 'wsgiref>=0.1.2', ], }, + extras_require={':python_version < "3.0"': ["wsgiref>=0.1.2",],}, include_package_data=True, - license='MIT License', - description='Scrape an artist from SoundCloud', + license="MIT License", + description="Scrape an artist from SoundCloud", long_description=README, - url='https://github.com/Miserlou/SoundScrape', - author='Rich Jones', - author_email='rich@openwatch.net', - entry_points={ - 'console_scripts': [ - 'soundscrape = soundscrape.soundscrape:main', - ] - }, + url="https://github.com/Miserlou/SoundScrape", + author="Rich Jones", + author_email="rich@openwatch.net", + entry_points={"console_scripts": ["soundscrape = soundscrape.soundscrape:main",]}, classifiers=[ - 'Environment :: Console', - 'License :: OSI Approved :: Apache Software License', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Topic :: Internet :: WWW/HTTP', - 'Topic :: Internet :: WWW/HTTP :: Dynamic Content', + "Environment :: Console", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Topic :: Internet :: WWW/HTTP", + "Topic :: Internet :: WWW/HTTP :: Dynamic Content", ], ) diff --git a/soundscrape/__init__.py b/soundscrape/__init__.py index 8ada23a..e89b4cf 100644 --- a/soundscrape/__init__.py +++ b/soundscrape/__init__.py @@ -1 +1 @@ -__version__ = '0.30.2' +__version__ = "0.30.2" diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 397ca71..5333975 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -22,12 +22,12 @@ #################################################################### # Please be nice with this! -CLIENT_ID = 'a3dd183a357fcff9a6943c0d65664087' -CLIENT_SECRET = '7e10d33e967ad42574124977cf7fa4b7' -MAGIC_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28' +CLIENT_ID = "a3dd183a357fcff9a6943c0d65664087" +CLIENT_SECRET = "7e10d33e967ad42574124977cf7fa4b7" +MAGIC_CLIENT_ID = "b45b1aa10f1ac2941910a7f0d10f8e28" -AGGRESSIVE_CLIENT_ID = 'OmTFHKYSMLFqnu2HHucmclAptedxWXkq' -APP_VERSION = '1481046241' +AGGRESSIVE_CLIENT_ID = "OmTFHKYSMLFqnu2HHucmclAptedxWXkq" +APP_VERSION = "1481046241" #################################################################### @@ -42,78 +42,90 @@ def main(): # Hack related to #58 if sys.platform == "win32": - os.system("chcp 65001"); - - parser = argparse.ArgumentParser(description='SoundScrape. Scrape an artist from SoundCloud.\n') - parser.add_argument('artist_url', metavar='U', type=str, nargs='*', - help='An artist\'s SoundCloud username or URL') - parser.add_argument('-n', '--num-tracks', type=int, default=sys.maxsize, - help='The number of tracks to download') - parser.add_argument('-g', '--group', action='store_true', - help='Use if downloading tracks from a SoundCloud group') - parser.add_argument('-b', '--bandcamp', action='store_true', - help='Use if downloading from Bandcamp rather than SoundCloud') - parser.add_argument('-m', '--mixcloud', action='store_true', - help='Use if downloading from Mixcloud rather than SoundCloud') - parser.add_argument('-a', '--audiomack', action='store_true', - help='Use if downloading from Audiomack rather than SoundCloud') - parser.add_argument('-c', '--hive', action='store_true', - help='Use if downloading from Hive.co rather than SoundCloud') - parser.add_argument('-l', '--likes', action='store_true', - help='Download all of a user\'s Likes.') - parser.add_argument('-L', '--login', type=str, default='soundscrape123@mailinator.com', - help='Set login') - parser.add_argument('-d', '--downloadable', action='store_true', - help='Only fetch tracks with a Downloadable link.') - parser.add_argument('-t', '--track', type=str, default='', - help='The name of a specific track by an artist') - parser.add_argument('-f', '--folders', action='store_true', - help='Organize saved songs in folders by artists') - parser.add_argument('-p', '--path', type=str, default='', - help='Set directory path where downloads should be saved to') - parser.add_argument('-P', '--password', type=str, default='soundscraperocks', - help='Set password') - parser.add_argument('-o', '--open', action='store_true', - help='Open downloaded files after downloading.') - parser.add_argument('-k', '--keep', action='store_true', - help='Keep 30-second preview tracks') - parser.add_argument('-v', '--version', action='store_true', default=False, - help='Display the current version of SoundScrape') + os.system("chcp 65001") + + parser = argparse.ArgumentParser(description="SoundScrape. Scrape an artist from SoundCloud.\n") + parser.add_argument( + "artist_url", metavar="U", type=str, nargs="*", help="An artist's SoundCloud username or URL", + ) + parser.add_argument( + "-n", "--num-tracks", type=int, default=sys.maxsize, help="The number of tracks to download", + ) + parser.add_argument( + "-g", "--group", action="store_true", help="Use if downloading tracks from a SoundCloud group", + ) + parser.add_argument( + "-b", "--bandcamp", action="store_true", help="Use if downloading from Bandcamp rather than SoundCloud", + ) + parser.add_argument( + "-m", "--mixcloud", action="store_true", help="Use if downloading from Mixcloud rather than SoundCloud", + ) + parser.add_argument( + "-a", "--audiomack", action="store_true", help="Use if downloading from Audiomack rather than SoundCloud", + ) + parser.add_argument( + "-c", "--hive", action="store_true", help="Use if downloading from Hive.co rather than SoundCloud", + ) + parser.add_argument("-l", "--likes", action="store_true", help="Download all of a user's Likes.") + parser.add_argument( + "-L", "--login", type=str, default="soundscrape123@mailinator.com", help="Set login", + ) + parser.add_argument( + "-d", "--downloadable", action="store_true", help="Only fetch tracks with a Downloadable link.", + ) + parser.add_argument( + "-t", "--track", type=str, default="", help="The name of a specific track by an artist", + ) + parser.add_argument( + "-f", "--folders", action="store_true", help="Organize saved songs in folders by artists", + ) + parser.add_argument( + "-p", "--path", type=str, default="", help="Set directory path where downloads should be saved to", + ) + parser.add_argument("-P", "--password", type=str, default="soundscraperocks", help="Set password") + parser.add_argument( + "-o", "--open", action="store_true", help="Open downloaded files after downloading.", + ) + parser.add_argument("-k", "--keep", action="store_true", help="Keep 30-second preview tracks") + parser.add_argument( + "-v", "--version", action="store_true", default=False, help="Display the current version of SoundScrape", + ) args = parser.parse_args() vargs = vars(args) - if vargs['version']: + if vargs["version"]: import pkg_resources + version = pkg_resources.require("soundscrape")[0].version print(version) return - if not vargs['artist_url']: - parser.error('Please supply an artist\'s username or URL!') + if not vargs["artist_url"]: + parser.error("Please supply an artist's username or URL!") - if sys.version_info < (3,0,0): - vargs['artist_url'] = urllib.quote(vargs['artist_url'][0], safe=':/') + if sys.version_info < (3, 0, 0): + vargs["artist_url"] = urllib.quote(vargs["artist_url"][0], safe=":/") else: - vargs['artist_url'] = urllib.parse.quote(vargs['artist_url'][0], safe=':/') + vargs["artist_url"] = urllib.parse.quote(vargs["artist_url"][0], safe=":/") - artist_url = vargs['artist_url'] + artist_url = vargs["artist_url"] - if not exists(vargs['path']): - if not access(dirname(vargs['path']), W_OK): - vargs['path'] = '' + if not exists(vargs["path"]): + if not access(dirname(vargs["path"]), W_OK): + vargs["path"] = "" else: - mkdir(vargs['path']) + mkdir(vargs["path"]) - if 'bandcamp.com' in artist_url or vargs['bandcamp']: + if "bandcamp.com" in artist_url or vargs["bandcamp"]: process_bandcamp(vargs) - elif 'mixcloud.com' in artist_url or vargs['mixcloud']: + elif "mixcloud.com" in artist_url or vargs["mixcloud"]: process_mixcloud(vargs) - elif 'audiomack.com' in artist_url or vargs['audiomack']: + elif "audiomack.com" in artist_url or vargs["audiomack"]: process_audiomack(vargs) - elif 'hive.co' in artist_url or vargs['hive']: + elif "hive.co" in artist_url or vargs["hive"]: process_hive(vargs) - elif 'musicbed.com' in artist_url: + elif "musicbed.com" in artist_url: process_musicbed(vargs) else: process_soundcloud(vargs) @@ -129,59 +141,59 @@ def process_soundcloud(vargs): Main SoundCloud path. """ - artist_url = vargs['artist_url'] - track_permalink = vargs['track'] - keep_previews = vargs['keep'] - folders = vargs['folders'] + artist_url = vargs["artist_url"] + track_permalink = vargs["track"] + keep_previews = vargs["keep"] + folders = vargs["folders"] id3_extras = {} one_track = False likes = False client = get_client() - if 'soundcloud' not in artist_url.lower(): - if vargs['group']: - artist_url = 'https://soundcloud.com/groups/' + artist_url.lower() + if "soundcloud" not in artist_url.lower(): + if vargs["group"]: + artist_url = "https://soundcloud.com/groups/" + artist_url.lower() elif len(track_permalink) > 0: one_track = True - track_url = 'https://soundcloud.com/' + artist_url.lower() + '/' + track_permalink.lower() + track_url = "https://soundcloud.com/" + artist_url.lower() + "/" + track_permalink.lower() else: - artist_url = 'https://soundcloud.com/' + artist_url.lower() - if vargs['likes'] or 'likes' in artist_url.lower(): + artist_url = "https://soundcloud.com/" + artist_url.lower() + if vargs["likes"] or "likes" in artist_url.lower(): likes = True - if 'likes' in artist_url.lower(): - artist_url = artist_url[0:artist_url.find('/likes')] + if "likes" in artist_url.lower(): + artist_url = artist_url[0 : artist_url.find("/likes")] likes = True if one_track: num_tracks = 1 else: - num_tracks = vargs['num_tracks'] + num_tracks = vargs["num_tracks"] try: if one_track: - resolved = client.get('/resolve', url=track_url, limit=200) + resolved = client.get("/resolve", url=track_url, limit=200) elif likes: - userId = str(client.get('/resolve', url=artist_url).id) + userId = str(client.get("/resolve", url=artist_url).id) - resolved = client.get('/users/' + userId + '/favorites', limit=200, linked_partitioning=1) + resolved = client.get("/users/" + userId + "/favorites", limit=200, linked_partitioning=1) next_href = False - if(hasattr(resolved, 'next_href')): + if hasattr(resolved, "next_href"): next_href = resolved.next_href - while (next_href): + while next_href: resolved2 = requests.get(next_href).json() - if('next_href' in resolved2): - next_href = resolved2['next_href'] + if "next_href" in resolved2: + next_href = resolved2["next_href"] else: next_href = False - resolved2 = soundcloud.resource.ResourceList(resolved2['collection']) + resolved2 = soundcloud.resource.ResourceList(resolved2["collection"]) resolved.collection.extend(resolved2) resolved = resolved.collection else: - resolved = client.get('/resolve', url=artist_url, limit=200) + resolved = client.get("/resolve", url=artist_url, limit=200) except Exception as e: # HTTPError? @@ -189,43 +201,39 @@ def process_soundcloud(vargs): # We're going to have to stop trusting the API/client and # do all our own scraping. Boo. - if '404 Client Error' in str(e): + if "404 Client Error" in str(e): puts(colored.red("Problem downloading [404]: ") + colored.white("Item Not Found")) return None message = str(e) - item_id = message.rsplit('/', 1)[-1].split('.json')[0].split('?client_id')[0] + item_id = message.rsplit("/", 1)[-1].split(".json")[0].split("?client_id")[0] hard_track_url = get_hard_track_url(item_id) track_data = get_soundcloud_data(artist_url) - puts_safe(colored.green("Scraping") + colored.white(": " + track_data['title'])) + puts_safe(colored.green("Scraping") + colored.white(": " + track_data["title"])) filenames = [] - filename = sanitize_filename(track_data['artist'] + ' - ' + track_data['title'] + '.mp3') + filename = sanitize_filename(track_data["artist"] + " - " + track_data["title"] + ".mp3") if folders: - name_path = join(vargs['path'], track_data['artist']) + name_path = join(vargs["path"], track_data["artist"]) if not exists(name_path): mkdir(name_path) filename = join(name_path, filename) else: - filename = join(vargs['path'], filename) + filename = join(vargs["path"], filename) if exists(filename): - puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_data['title'])) + puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_data["title"])) return None filename = download_file(hard_track_url, filename) - tagged = tag_file(filename, - artist=track_data['artist'], - title=track_data['title'], - year='2018', - genre='', - album='', - artwork_url='') + tagged = tag_file( + filename, artist=track_data["artist"], title=track_data["title"], year="2018", genre="", album="", artwork_url="", + ) if not tagged: - wav_filename = filename[:-3] + 'wav' + wav_filename = filename[:-3] + "wav" os.rename(filename, wav_filename) filename = wav_filename @@ -236,34 +244,36 @@ def process_soundcloud(vargs): aggressive = False # This is is likely a 'likes' page. - if not hasattr(resolved, 'kind'): + if not hasattr(resolved, "kind"): tracks = resolved else: - if resolved.kind == 'artist': + if resolved.kind == "artist": artist = resolved artist_id = str(artist.id) - tracks = client.get('/users/' + artist_id + '/tracks', limit=200) - elif resolved.kind == 'playlist': - id3_extras['album'] = resolved.title + tracks = client.get("/users/" + artist_id + "/tracks", limit=200) + elif resolved.kind == "playlist": + id3_extras["album"] = resolved.title if resolved.tracks != []: tracks = resolved.tracks else: - tracks = get_soundcloud_api_playlist_data(resolved.id)['tracks'] + tracks = get_soundcloud_api_playlist_data(resolved.id)["tracks"] tracks = tracks[:num_tracks] aggressive = True for track in tracks: - download_track(track, resolved.title, keep_previews, folders, custom_path=vargs['path']) + download_track( + track, resolved.title, keep_previews, folders, custom_path=vargs["path"], + ) - elif resolved.kind == 'track': + elif resolved.kind == "track": tracks = [resolved] - elif resolved.kind == 'group': + elif resolved.kind == "group": group = resolved group_id = str(group.id) - tracks = client.get('/groups/' + group_id + '/tracks', limit=200) + tracks = client.get("/groups/" + group_id + "/tracks", limit=200) else: artist = resolved artist_id = str(artist.id) - tracks = client.get('/users/' + artist_id + '/tracks', limit=200) + tracks = client.get("/users/" + artist_id + "/tracks", limit=200) if tracks == [] and artist.track_count > 0: aggressive = True filenames = [] @@ -271,29 +281,32 @@ def process_soundcloud(vargs): # this might be buggy data = get_soundcloud_api2_data(artist_id) - for track in data['collection']: + for track in data["collection"]: if len(filenames) >= num_tracks: break - if track['type'] == 'playlist': - track['playlist']['tracks'] = track['playlist']['tracks'][:num_tracks] - for playlist_track in track['playlist']['tracks']: - album_name = track['playlist']['title'] - filename = download_track(playlist_track, album_name, keep_previews, folders, filenames, custom_path=vargs['path']) + if track["type"] == "playlist": + track["playlist"]["tracks"] = track["playlist"]["tracks"][:num_tracks] + for playlist_track in track["playlist"]["tracks"]: + album_name = track["playlist"]["title"] + filename = download_track( + playlist_track, album_name, keep_previews, folders, filenames, custom_path=vargs["path"], + ) if filename: filenames.append(filename) else: - d_track = track['track'] - filename = download_track(d_track, custom_path=vargs['path']) + d_track = track["track"] + filename = download_track(d_track, custom_path=vargs["path"]) if filename: filenames.append(filename) if not aggressive: - filenames = download_tracks(client, tracks, num_tracks, vargs['downloadable'], vargs['folders'], vargs['path'], - id3_extras=id3_extras) + filenames = download_tracks( + client, tracks, num_tracks, vargs["downloadable"], vargs["folders"], vargs["path"], id3_extras=id3_extras, + ) - if vargs['open']: + if vargs["open"]: open_files(filenames) @@ -304,28 +317,31 @@ def get_client(): client = soundcloud.Client(client_id=CLIENT_ID) return client -def download_track(track, album_name=u'', keep_previews=False, folders=False, filenames=[], custom_path=''): + +def download_track( + track, album_name="", keep_previews=False, folders=False, filenames=[], custom_path="", +): """ Given a track, force scrape it. """ - hard_track_url = get_hard_track_url(track['id']) + hard_track_url = get_hard_track_url(track["id"]) # We have no info on this track whatsoever. - if not 'title' in track: + if not "title" in track: return None if not keep_previews: - if (track.get('duration', 0) < track.get('full_duration', 0)): - puts_safe(colored.yellow("Skipping preview track") + colored.white(": " + track['title'])) + if track.get("duration", 0) < track.get("full_duration", 0): + puts_safe(colored.yellow("Skipping preview track") + colored.white(": " + track["title"])) return None # May not have a "full name" - name = track['user'].get('full_name', '') - if name == '': - name = track['user']['username'] + name = track["user"].get("full_name", "") + if name == "": + name = track["user"]["username"] - filename = sanitize_filename(name + ' - ' + track['title'] + '.mp3') + filename = sanitize_filename(name + " - " + track["title"] + ".mp3") if folders: name_path = join(custom_path, name) @@ -336,7 +352,7 @@ def download_track(track, album_name=u'', keep_previews=False, folders=False, fi filename = join(custom_path, filename) if exists(filename): - puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track['title'])) + puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track["title"])) return None # Skip already downloaded track. @@ -344,28 +360,33 @@ def download_track(track, album_name=u'', keep_previews=False, folders=False, fi return None if hard_track_url: - puts_safe(colored.green("Scraping") + colored.white(": " + track['title'])) + puts_safe(colored.green("Scraping") + colored.white(": " + track["title"])) else: # Region coded? - puts_safe(colored.yellow("Unable to download") + colored.white(": " + track['title'])) + puts_safe(colored.yellow("Unable to download") + colored.white(": " + track["title"])) return None filename = download_file(hard_track_url, filename) - tagged = tag_file(filename, - artist=name, - title=track['title'], - year=track['created_at'][:4], - genre=track['genre'], - album=album_name, - artwork_url=track['artwork_url']) + tagged = tag_file( + filename, + artist=name, + title=track["title"], + year=track["created_at"][:4], + genre=track["genre"], + album=album_name, + artwork_url=track["artwork_url"], + ) if not tagged: - wav_filename = filename[:-3] + 'wav' + wav_filename = filename[:-3] + "wav" os.rename(filename, wav_filename) filename = wav_filename return filename -def download_tracks(client, tracks, num_tracks=sys.maxsize, downloadable=False, folders=False, custom_path='', id3_extras={}): + +def download_tracks( + client, tracks, num_tracks=sys.maxsize, downloadable=False, folders=False, custom_path="", id3_extras={}, +): """ Given a list of tracks, iteratively download all of them. @@ -382,43 +403,46 @@ def download_tracks(client, tracks, num_tracks=sys.maxsize, downloadable=False, try: t_track = {} - t_track['downloadable'] = track.downloadable - t_track['streamable'] = track.streamable - t_track['title'] = track.title - t_track['user'] = {'username': track.user['username']} - t_track['release_year'] = track.release - t_track['genre'] = track.genre - t_track['artwork_url'] = track.artwork_url + t_track["downloadable"] = track.downloadable + t_track["streamable"] = track.streamable + t_track["title"] = track.title + t_track["user"] = {"username": track.user["username"]} + t_track["release_year"] = track.release + t_track["genre"] = track.genre + t_track["artwork_url"] = track.artwork_url if track.downloadable: - t_track['stream_url'] = track.download_url + t_track["stream_url"] = track.download_url else: if downloadable: puts_safe(colored.red("Skipping") + colored.white(": " + track.title)) continue - if hasattr(track, 'stream_url'): - t_track['stream_url'] = track.stream_url + if hasattr(track, "stream_url"): + t_track["stream_url"] = track.stream_url else: - t_track['direct'] = True + t_track["direct"] = True streams_url = "https://api.soundcloud.com/i1/tracks/%s/streams?client_id=%s&app_version=%s" % ( - str(track.id), AGGRESSIVE_CLIENT_ID, APP_VERSION) + str(track.id), + AGGRESSIVE_CLIENT_ID, + APP_VERSION, + ) response = requests.get(streams_url).json() - t_track['stream_url'] = response['http_mp3_128_url'] + t_track["stream_url"] = response["http_mp3_128_url"] track = t_track except Exception as e: - puts_safe(colored.white(track.title) + colored.red(' is not downloadable.')) + puts_safe(colored.white(track.title) + colored.red(" is not downloadable.")) continue if i > num_tracks - 1: continue try: - if not track.get('stream_url', False): - puts_safe(colored.white(track['title']) + colored.red(' is not downloadable.')) + if not track.get("stream_url", False): + puts_safe(colored.white(track["title"]) + colored.red(" is not downloadable.")) continue else: - track_artist = sanitize_filename(track['user']['username']) - track_title = sanitize_filename(track['title']) - track_filename = track_artist + ' - ' + track_title + '.mp3' + track_artist = sanitize_filename(track["user"]["username"]) + track_title = sanitize_filename(track["title"]) + track_filename = track_artist + " - " + track_title + ".mp3" if folders: track_artist_path = join(custom_path, track_artist) @@ -432,41 +456,41 @@ def download_tracks(client, tracks, num_tracks=sys.maxsize, downloadable=False, puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_title)) continue - puts_safe(colored.green("Downloading") + colored.white(": " + track['title'])) - + puts_safe(colored.green("Downloading") + colored.white(": " + track["title"])) - if track.get('direct', False): - location = track['stream_url'] + if track.get("direct", False): + location = track["stream_url"] else: - stream = client.get(track['stream_url'], allow_redirects=False, limit=200) - if hasattr(stream, 'location'): + stream = client.get(track["stream_url"], allow_redirects=False, limit=200) + if hasattr(stream, "location"): location = stream.location else: location = stream.url filename = download_file(location, track_filename) - tagged = tag_file(filename, - artist=track['user']['username'], - title=track['title'], - year=track['release_year'], - genre=track['genre'], - album=id3_extras.get('album', None), - artwork_url=track['artwork_url']) + tagged = tag_file( + filename, + artist=track["user"]["username"], + title=track["title"], + year=track["release_year"], + genre=track["genre"], + album=id3_extras.get("album", None), + artwork_url=track["artwork_url"], + ) if not tagged: - wav_filename = filename[:-3] + 'wav' + wav_filename = filename[:-3] + "wav" os.rename(filename, wav_filename) filename = wav_filename filenames.append(filename) except Exception as e: - puts_safe(colored.red("Problem downloading ") + colored.white(track['title'])) + puts_safe(colored.red("Problem downloading ") + colored.white(track["title"])) puts_safe(str(e)) return filenames - def get_soundcloud_data(url): """ Scrapes a SoundCloud page for a track's important information. @@ -480,9 +504,9 @@ def get_soundcloud_data(url): request = requests.get(url) - title_tag = request.text.split('')[1].split('</title')[0] - data['title'] = title_tag.split(' by ')[0].strip() - data['artist'] = title_tag.split(' by ')[1].split('|')[0].strip() + title_tag = request.text.split("<title>")[1].split("</title")[0] + data["title"] = title_tag.split(" by ")[0].strip() + data["artist"] = title_tag.split(" by ")[1].split("|")[0].strip() # XXX Do more.. return data @@ -494,40 +518,51 @@ def get_soundcloud_api2_data(artist_id): """ v2_url = "https://api-v2.soundcloud.com/stream/users/%s?limit=500&client_id=%s&app_version=%s" % ( - artist_id, AGGRESSIVE_CLIENT_ID, APP_VERSION) + artist_id, + AGGRESSIVE_CLIENT_ID, + APP_VERSION, + ) response = requests.get(v2_url) parsed = response.json() return parsed + def get_soundcloud_api_playlist_data(playlist_id): """ Scrape the new API. Returns the parsed JSON response. """ - url = "https://api.soundcloud.com/playlists/%s?representation=full&client_id=02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea&app_version=1467724310" % ( - playlist_id) + url = ( + "https://api.soundcloud.com/playlists/%s?representation=full&client_id=02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea&app_version=1467724310" + % (playlist_id) + ) response = requests.get(url) parsed = response.json() return parsed + def get_hard_track_url(item_id): """ Hard-scrapes a track. """ streams_url = "https://api.soundcloud.com/i1/tracks/%s/streams/?client_id=%s&app_version=%s" % ( - item_id, AGGRESSIVE_CLIENT_ID, APP_VERSION) + item_id, + AGGRESSIVE_CLIENT_ID, + APP_VERSION, + ) response = requests.get(streams_url) json_response = response.json() if response.status_code == 200: - hard_track_url = json_response['http_mp3_128_url'] + hard_track_url = json_response["http_mp3_128_url"] return hard_track_url else: return None + #################################################################### # Bandcamp #################################################################### @@ -538,14 +573,16 @@ def process_bandcamp(vargs): Main BandCamp path. """ - artist_url = vargs['artist_url'] + artist_url = vargs["artist_url"] - if 'bandcamp.com' in artist_url or ('://' in artist_url and vargs['bandcamp']): + if "bandcamp.com" in artist_url or ("://" in artist_url and vargs["bandcamp"]): bc_url = artist_url else: - bc_url = 'https://' + artist_url + '.bandcamp.com/music' + bc_url = "https://" + artist_url + ".bandcamp.com/music" - filenames = scrape_bandcamp_url(bc_url, num_tracks=vargs['num_tracks'], folders=vargs['folders'], custom_path=vargs['path']) + filenames = scrape_bandcamp_url( + bc_url, num_tracks=vargs["num_tracks"], folders=vargs["folders"], custom_path=vargs["path"], + ) # check if we have lists inside a list, which indicates the # scraping has gone recursive, so we must format the output @@ -558,14 +595,14 @@ def process_bandcamp(vargs): # ( reference: http://stackoverflow.com/a/11264751 ) filenames = [val for sub in filenames for val in sub] - if vargs['open']: + if vargs["open"]: open_files(filenames) return # Largely borrowed from Ronier's bandcampscrape -def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path=''): +def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path=""): """ Pull out artist and track info from a Bandcamp URL. @@ -608,40 +645,42 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path= else: track_number = None if track_number and folders: - track_filename = '%s - %s.mp3' % (track_number, track_name) + track_filename = "%s - %s.mp3" % (track_number, track_name) else: - track_filename = '%s.mp3' % (track_name) + track_filename = "%s.mp3" % (track_name) track_filename = sanitize_filename(track_filename) if folders: path = join(directory, track_filename) else: - path = join(custom_path, sanitize_filename(artist) + ' - ' + track_filename) + path = join(custom_path, sanitize_filename(artist) + " - " + track_filename) if exists(path): puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_name)) continue - if not track['file']: + if not track["file"]: puts_safe(colored.yellow("Track unavailble for scraping: ") + colored.white(track_name)) continue puts_safe(colored.green("Downloading") + colored.white(": " + track_name)) - path = download_file(track['file']['mp3-128'], path) + path = download_file(track["file"]["mp3-128"], path) - album_year = album_data['album_release_date'] + album_year = album_data["album_release_date"] if album_year: album_year = datetime.strptime(album_year, "%d %b %Y %H:%M:%S GMT").year - tag_file(path, - artist, - track_name, - album=album_name, - year=album_year, - genre=album_data['genre'], - artwork_url=album_data['artFullsizeUrl'], - track_number=track_number, - url=album_data['url']) + tag_file( + path, + artist, + track_name, + album=album_name, + year=album_year, + genre=album_data["genre"], + artwork_url=album_data["artFullsizeUrl"], + track_number=track_number, + url=album_data["url"], + ) filenames.append(path) @@ -662,7 +701,7 @@ def get_bandcamp_metadata(url): try: sloppy_json = request.text.split("var TralbumData = ") sloppy_json = sloppy_json[1].replace('" + "', "") - sloppy_json = sloppy_json.replace("'", "\'") + sloppy_json = sloppy_json.replace("'", "'") sloppy_json = sloppy_json.split("};")[0] + "};" sloppy_json = sloppy_json.replace("};", "}") output = demjson.decode(sloppy_json) @@ -673,7 +712,7 @@ def get_bandcamp_metadata(url): all_albums = re.findall(regex_all_albums, request.text, re.MULTILINE) album_url_list = list() for album in all_albums: - album_url = re.sub(r'music/?$', '', url) + album + album_url = re.sub(r"music/?$", "", url) + album album_url_list.append(album_url) return album_url_list # if the JSON parser was successful, use a regex to get all tags @@ -683,22 +722,22 @@ def get_bandcamp_metadata(url): # make sure we treat integers correctly with join() # according to http://stackoverflow.com/a/7323861 # (very unlikely, but better safe than sorry!) - output['genre'] = ' '.join(s for s in tags) + output["genre"] = " ".join(s for s in tags) # make sure we always get the correct album name, even if this is a # track URL (unless this track does not belong to any album, in which # case the album name remains set as None. - output['album_name'] = None + output["album_name"] = None regex_album_name = r'album_title\s*:\s*"([^"]+)"\s*,' match = re.search(regex_album_name, request.text, re.MULTILINE) if match: - output['album_name'] = match.group(1) + output["album_name"] = match.group(1) try: - artUrl = request.text.split("\"tralbumArt\">")[1].split("\">")[0].split("href=\"")[1] - output['artFullsizeUrl'] = artUrl + artUrl = request.text.split('"tralbumArt">')[1].split('">')[0].split('href="')[1] + output["artFullsizeUrl"] = artUrl except: puts_safe(colored.red("Couldn't get full artwork") + "") - output['artFullsizeUrl'] = None + output["artFullsizeUrl"] = None return output @@ -713,22 +752,24 @@ def process_mixcloud(vargs): Main MixCloud path. """ - artist_url = vargs['artist_url'] + artist_url = vargs["artist_url"] - if 'mixcloud.com' in artist_url: + if "mixcloud.com" in artist_url: mc_url = artist_url else: - mc_url = 'https://mixcloud.com/' + artist_url + mc_url = "https://mixcloud.com/" + artist_url - filenames = scrape_mixcloud_url(mc_url, num_tracks=vargs['num_tracks'], folders=vargs['folders'], custom_path=vargs['path']) + filenames = scrape_mixcloud_url( + mc_url, num_tracks=vargs["num_tracks"], folders=vargs["folders"], custom_path=vargs["path"], + ) - if vargs['open']: + if vargs["open"]: open_files(filenames) return -def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=''): +def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=""): """ Returns: list: filenames to open @@ -744,9 +785,9 @@ def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_pa filenames = [] - track_artist = sanitize_filename(data['artist']) - track_title = sanitize_filename(data['title']) - track_filename = track_artist + ' - ' + track_title + data['mp3_url'][-4:] + track_artist = sanitize_filename(data["artist"]) + track_title = sanitize_filename(data["title"]) + track_filename = track_artist + " - " + track_title + data["mp3_url"][-4:] if folders: track_artist_path = join(custom_path, track_artist) @@ -754,21 +795,25 @@ def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_pa mkdir(track_artist_path) track_filename = join(track_artist_path, track_filename) if exists(track_filename): - puts_safe(colored.yellow("Skipping") + colored.white(': ' + data['title'] + " - it already exists!")) + puts_safe(colored.yellow("Skipping") + colored.white(": " + data["title"] + " - it already exists!")) return [] else: track_filename = join(custom_path, track_filename) - puts_safe(colored.green("Downloading") + colored.white( - ': ' + data['artist'] + " - " + data['title'] + " (" + track_filename[-4:] + ")")) - download_file(data['mp3_url'], track_filename) - if track_filename[-4:] == '.mp3': - tag_file(track_filename, - artist=data['artist'], - title=data['title'], - year=data['year'], - genre="Mix", - artwork_url=data['artwork_url']) + puts_safe( + colored.green("Downloading") + + colored.white(": " + data["artist"] + " - " + data["title"] + " (" + track_filename[-4:] + ")") + ) + download_file(data["mp3_url"], track_filename) + if track_filename[-4:] == ".mp3": + tag_file( + track_filename, + artist=data["artist"], + title=data["title"], + year=data["year"], + genre="Mix", + artwork_url=data["artwork_url"], + ) filenames.append(track_filename) return filenames @@ -786,33 +831,32 @@ def get_mixcloud_data(url): data = {} request = requests.get(url) preview_mp3_url = request.text.split('m-preview="')[1].split('" m-preview-light')[0] - song_uuid = request.text.split('m-preview="')[1].split('" m-preview-light')[0].split('previews/')[1].split('.mp3')[0] + song_uuid = request.text.split('m-preview="')[1].split('" m-preview-light')[0].split("previews/")[1].split(".mp3")[0] # Fish for the m4a.. for server in range(1, 23): # Ex: https://stream6.mixcloud.com/c/m4a/64/1/2/0/9/30fe-23aa-40da-9bf3-4bee2fba649d.m4a - mp3_url = "https://stream" + str(server) + ".mixcloud.com/c/m4a/64/" + song_uuid + '.m4a' + mp3_url = "https://stream" + str(server) + ".mixcloud.com/c/m4a/64/" + song_uuid + ".m4a" try: if requests.head(mp3_url).status_code == 200: - if '?' in mp3_url: - mp3_url = mp3_url.split('?')[0] + if "?" in mp3_url: + mp3_url = mp3_url.split("?")[0] break except Exception as e: continue full_title = request.text.split("<title>")[1].split(" | Mixcloud")[0] - title = full_title.split(' by ')[0].strip() - artist = full_title.split(' by ')[1].strip() + title = full_title.split(" by ")[0].strip() + artist = full_title.split(" by ")[1].strip() img_thumbnail_url = request.text.split('m-thumbnail-url="')[1].split(" ng-class")[0] - artwork_url = img_thumbnail_url.replace('60/', '300/').replace('60/', '300/').replace('//', 'https://').replace('"', - '') + artwork_url = img_thumbnail_url.replace("60/", "300/").replace("60/", "300/").replace("//", "https://").replace('"', "") - data['mp3_url'] = mp3_url - data['title'] = title - data['artist'] = artist - data['artwork_url'] = artwork_url - data['year'] = None + data["mp3_url"] = mp3_url + data["title"] = title + data["artist"] = artist + data["artwork_url"] = artwork_url + data["year"] = None return data @@ -827,22 +871,24 @@ def process_audiomack(vargs): Main Audiomack path. """ - artist_url = vargs['artist_url'] + artist_url = vargs["artist_url"] - if 'audiomack.com' in artist_url: + if "audiomack.com" in artist_url: mc_url = artist_url else: - mc_url = 'https://audiomack.com/' + artist_url + mc_url = "https://audiomack.com/" + artist_url - filenames = scrape_audiomack_url(mc_url, num_tracks=vargs['num_tracks'], folders=vargs['folders'], custom_path=vargs['path']) + filenames = scrape_audiomack_url( + mc_url, num_tracks=vargs["num_tracks"], folders=vargs["folders"], custom_path=vargs["path"], + ) - if vargs['open']: + if vargs["open"]: open_files(filenames) return -def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=''): +def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=""): """ Returns: list: filenames to open @@ -857,9 +903,9 @@ def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_p filenames = [] - track_artist = sanitize_filename(data['artist']) - track_title = sanitize_filename(data['title']) - track_filename = track_artist + ' - ' + track_title + '.mp3' + track_artist = sanitize_filename(data["artist"]) + track_title = sanitize_filename(data["title"]) + track_filename = track_artist + " - " + track_title + ".mp3" if folders: track_artist_path = join(custom_path, track_artist) @@ -867,19 +913,21 @@ def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_p mkdir(track_artist_path) track_filename = join(track_artist_path, track_filename) if exists(track_filename): - puts_safe(colored.yellow("Skipping") + colored.white(': ' + data['title'] + " - it already exists!")) + puts_safe(colored.yellow("Skipping") + colored.white(": " + data["title"] + " - it already exists!")) return [] else: track_filename = join(custom_path, track_filename) - puts_safe(colored.green("Downloading") + colored.white(': ' + data['artist'] + " - " + data['title'])) - download_file(data['mp3_url'], track_filename) - tag_file(track_filename, - artist=data['artist'], - title=data['title'], - year=data['year'], - genre=None, - artwork_url=data['artwork_url']) + puts_safe(colored.green("Downloading") + colored.white(": " + data["artist"] + " - " + data["title"])) + download_file(data["mp3_url"], track_filename) + tag_file( + track_filename, + artist=data["artist"], + title=data["title"], + year=data["year"], + genre=None, + artwork_url=data["artwork_url"], + ) filenames.append(track_filename) return filenames @@ -898,15 +946,15 @@ def get_audiomack_data(url): request = requests.get(url) mp3_url = request.text.split('class="player-icon download-song" title="Download" href="')[1].split('"')[0] - artist = request.text.split('<span class="artist">')[1].split('</span>')[0].strip() - title = request.text.split('<span class="artist">')[1].split('</span>')[1].split('</h1>')[0].strip() + artist = request.text.split('<span class="artist">')[1].split("</span>")[0].strip() + title = request.text.split('<span class="artist">')[1].split("</span>")[1].split("</h1>")[0].strip() artwork_url = request.text.split('<a class="lightbox-trigger" href="')[1].split('" data')[0].strip() - data['mp3_url'] = mp3_url - data['title'] = title - data['artist'] = artist - data['artwork_url'] = artwork_url - data['year'] = None + data["mp3_url"] = mp3_url + data["title"] = title + data["artist"] = artist + data["artwork_url"] = artwork_url + data["year"] = None return data @@ -921,22 +969,22 @@ def process_hive(vargs): Main Hive.co path. """ - artist_url = vargs['artist_url'] + artist_url = vargs["artist_url"] - if 'hive.co' in artist_url: + if "hive.co" in artist_url: mc_url = artist_url else: - mc_url = 'https://www.hive.co/downloads/download/' + artist_url + mc_url = "https://www.hive.co/downloads/download/" + artist_url - filenames = scrape_hive_url(mc_url, num_tracks=vargs['num_tracks'], folders=vargs['folders'], custom_path=vargs['path']) + filenames = scrape_hive_url(mc_url, num_tracks=vargs["num_tracks"], folders=vargs["folders"], custom_path=vargs["path"],) - if vargs['open']: + if vargs["open"]: open_files(filenames) return -def scrape_hive_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=''): +def scrape_hive_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=""): """ Scrape a Hive.co download page. @@ -1020,27 +1068,34 @@ def process_musicbed(vargs): # let's validate given MusicBed url validated = False - if vargs['artist_url'].startswith( 'https://www.musicbed.com/' ): - splitted = vargs['artist_url'][len('https://www.musicbed.com/'):].split( '/' ) - if len( splitted ) == 3: - if ( splitted[0] == 'artists' or splitted[0] == 'albums' or splitted[0] == 'songs' ) and splitted[2].isdigit(): + if vargs["artist_url"].startswith("https://www.musicbed.com/"): + splitted = vargs["artist_url"][len("https://www.musicbed.com/") :].split("/") + if len(splitted) == 3: + if (splitted[0] == "artists" or splitted[0] == "albums" or splitted[0] == "songs") and splitted[2].isdigit(): validated = True if not validated: - puts( colored.red( 'process_musicbed: you provided incorrect MusicBed url. Aborting.' ) ) - puts( colored.white( 'Please make sure that url is either artist-url, album-url or song-url.' ) ) - puts( colored.white( 'Example of correct artist-url: https://www.musicbed.com/artists/lights-motion/5188' ) ) - puts( colored.white( 'Example of correct album-url: https://www.musicbed.com/albums/be-still/2828' ) ) - puts( colored.white( 'Example of correct song-url: https://www.musicbed.com/songs/be-still/24540' ) ) + puts(colored.red("process_musicbed: you provided incorrect MusicBed url. Aborting.")) + puts(colored.white("Please make sure that url is either artist-url, album-url or song-url.")) + puts(colored.white("Example of correct artist-url: https://www.musicbed.com/artists/lights-motion/5188")) + puts(colored.white("Example of correct album-url: https://www.musicbed.com/albums/be-still/2828")) + puts(colored.white("Example of correct song-url: https://www.musicbed.com/songs/be-still/24540")) return - filenames = scrape_musicbed_url(vargs['artist_url'], vargs['login'], vargs['password'], num_tracks=vargs['num_tracks'], folders=vargs['folders'], custom_path=vargs['path']) + filenames = scrape_musicbed_url( + vargs["artist_url"], + vargs["login"], + vargs["password"], + num_tracks=vargs["num_tracks"], + folders=vargs["folders"], + custom_path=vargs["path"], + ) - if vargs['open']: + if vargs["open"]: open_files(filenames) -def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=False, custom_path=''): +def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=False, custom_path=""): """ Scrapes provided MusicBed url. Uses requests' Session object in order to store cookies. @@ -1056,9 +1111,13 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa session = requests.Session() - response = session.get( url ) + response = session.get(url) if response.status_code != 200: - puts( colored.red( 'scrape_musicbed_url: couldn\'t open provided url. Status code: ' + str( response.status_code ) + '. Aborting.' ) ) + puts( + colored.red( + "scrape_musicbed_url: couldn't open provided url. Status code: " + str(response.status_code) + ". Aborting." + ) + ) session.close() return [] @@ -1067,101 +1126,113 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa # '/artists/' - search for and download many albums # '/albums/' - means we're downloading 1 album # '/songs/' - means 1 album as well, but we're forcing num_tracks=1 in order to download only first relevant track - if url.startswith( 'https://www.musicbed.com/artists/' ): + if url.startswith("https://www.musicbed.com/artists/"): # a hackjob code to get a list of available albums main_index = 0 - while response.text.find( 'https://www.musicbed.com/albums/', main_index ) != -1: - start_index = response.text.find( 'https://www.musicbed.com/albums/', main_index ) - end_index = response.text.find( '">', start_index ) - albums.append( response.text[start_index:end_index] ) + while response.text.find("https://www.musicbed.com/albums/", main_index) != -1: + start_index = response.text.find("https://www.musicbed.com/albums/", main_index) + end_index = response.text.find('">', start_index) + albums.append(response.text[start_index:end_index]) main_index = end_index - elif url.startswith( 'https://www.musicbed.com/songs/' ): - albums.append( url ) + elif url.startswith("https://www.musicbed.com/songs/"): + albums.append(url) num_tracks = 1 - else: # url.startswith( 'https://www.musicbed.com/albums/' ) - albums.append( url ) + else: # url.startswith( 'https://www.musicbed.com/albums/' ) + albums.append(url) # let's get our token and try to login (csrf_token seems to be present on every page) - token = response.text.split( 'var csrf_token = "' )[1].split( '";' )[0] - details = { '_token': token, 'login': login, 'password': password } - response = session.post( 'https://www.musicbed.com/ajax/login', data=details ) + token = response.text.split('var csrf_token = "')[1].split('";')[0] + details = {"_token": token, "login": login, "password": password} + response = session.post("https://www.musicbed.com/ajax/login", data=details) if response.status_code != 200: - puts( colored.red( 'scrape_musicbed_url: couldn\'t login. Aborting. ' ) + colored.white( 'Couldn\'t access login page.' ) ) + puts(colored.red("scrape_musicbed_url: couldn't login. Aborting. ") + colored.white("Couldn't access login page.")) session.close() return [] - login_response_data = demjson.decode( response.text ) - if not login_response_data['body']['status']: - puts( colored.red( 'scrape_musicbed_url: couldn\'t login. Aborting. ' ) + colored.white( 'Did you provide correct login and password?' ) ) + login_response_data = demjson.decode(response.text) + if not login_response_data["body"]["status"]: + puts( + colored.red("scrape_musicbed_url: couldn't login. Aborting. ") + + colored.white("Did you provide correct login and password?") + ) session.close() return [] # now let's actually scrape collected pages filenames = [] for each_album_url in albums: - response = session.get( each_album_url ) + response = session.get(each_album_url) if response.status_code != 200: - puts_safe( colored.red( 'scrape_musicbed_url: couldn\'t open url: ' + each_album_url + - '. Status code: ' + str( response.status_code ) + '. Skipping.' ) ) + puts_safe( + colored.red( + "scrape_musicbed_url: couldn't open url: " + + each_album_url + + ". Status code: " + + str(response.status_code) + + ". Skipping." + ) + ) continue # actually not a JSON, but a JS object, but so far so good - json = response.text.split( 'App.components.SongRows = ' )[1].split( '</script>' )[0] - data = demjson.decode( json ) + json = response.text.split("App.components.SongRows = ")[1].split("</script>")[0] + data = demjson.decode(json) song_count = 1 - for each_song in data['loadedSongs']: + for each_song in data["loadedSongs"]: if song_count > num_tracks: break try: - url, params = each_song['playback_url'].split( '?' ) + url, params = each_song["playback_url"].split("?") details = dict() - for each_param in params.split( '&' ): - name, value = each_param.split( '=' ) - details.update( { name: value } ) + for each_param in params.split("&"): + name, value = each_param.split("=") + details.update({name: value}) # musicbed warns about it if it's not fixed - details['X-Amz-Credential'] = details['X-Amz-Credential'].replace( '%2F', '/' ) + details["X-Amz-Credential"] = details["X-Amz-Credential"].replace("%2F", "/") directory = custom_path if folders: - sanitized_artist = sanitize_filename( each_song['album']['data']['artist']['data']['name'] ) - sanitized_album = sanitize_filename( each_song['album']['data']['name'] ) - directory = join( directory, sanitized_artist + ' - ' + sanitized_album ) - if not exists( directory ): - mkdir( directory ) - filename = join( directory, str( song_count ) + ' - ' + sanitize_filename( each_song['name'] ) + '.mp3' ) - - if exists( filename ): - puts_safe( colored.yellow( 'Skipping' ) + colored.white( ': ' + each_song['name'] + ' - it already exists!' ) ) + sanitized_artist = sanitize_filename(each_song["album"]["data"]["artist"]["data"]["name"]) + sanitized_album = sanitize_filename(each_song["album"]["data"]["name"]) + directory = join(directory, sanitized_artist + " - " + sanitized_album) + if not exists(directory): + mkdir(directory) + filename = join(directory, str(song_count) + " - " + sanitize_filename(each_song["name"]) + ".mp3",) + + if exists(filename): + puts_safe(colored.yellow("Skipping") + colored.white(": " + each_song["name"] + " - it already exists!")) song_count += 1 continue - puts_safe( colored.green( 'Downloading' ) + colored.white( ': ' + each_song['name'] ) ) - path = download_file( url, filename, session=session, params=details ) + puts_safe(colored.green("Downloading") + colored.white(": " + each_song["name"])) + path = download_file(url, filename, session=session, params=details) # example of genre_string: # "<a href=\"https://www.musicbed.com/genres/ambient/2\">Ambient</a> <a href=\"https://www.musicbed.com/genres/cinematic/4\">Cinematic</a>" - genres = '' - for each in each_song['genre_string'].split( '</a>' ): - if ( each != "" ): - genres += each.split( '">' )[1] + '/' - genres = genres[:-1] # removing last '/ - - tag_file(path, - each_song['album']['data']['artist']['data']['name'], - each_song['name'], - album=each_song['album']['data']['name'], - year=int( each_song['album']['data']['released_at'].split( '-' )[0] ), - genre=genres, - artwork_url=each_song['album']['data']['imageObject']['data']['paths']['original'], - track_number=str( song_count ), - url=each_song['song_url']) - - filenames.append( path ) + genres = "" + for each in each_song["genre_string"].split("</a>"): + if each != "": + genres += each.split('">')[1] + "/" + genres = genres[:-1] # removing last '/ + + tag_file( + path, + each_song["album"]["data"]["artist"]["data"]["name"], + each_song["name"], + album=each_song["album"]["data"]["name"], + year=int(each_song["album"]["data"]["released_at"].split("-")[0]), + genre=genres, + artwork_url=each_song["album"]["data"]["imageObject"]["data"]["paths"]["original"], + track_number=str(song_count), + url=each_song["song_url"], + ) + + filenames.append(path) song_count += 1 except: - puts_safe( colored.red( 'Problem downloading ' ) + colored.white( each_song['name'] ) + '. Skipping.' ) + puts_safe(colored.red("Problem downloading ") + colored.white(each_song["name"]) + ". Skipping.") song_count += 1 session.close() @@ -1179,20 +1250,20 @@ def download_file(url, path, session=None, params=None): Download an individual file. """ - if url[0:2] == '//': - url = 'https://' + url[2:] + if url[0:2] == "//": + url = "https://" + url[2:] # Use a temporary file so that we don't import incomplete files. - tmp_path = path + '.tmp' + tmp_path = path + ".tmp" if session and params: - r = session.get( url, params=params, stream=True ) + r = session.get(url, params=params, stream=True) elif session and not params: - r = session.get( url, stream=True ) + r = session.get(url, stream=True) else: r = requests.get(url, stream=True) - with open(tmp_path, 'wb') as f: - total_length = int(r.headers.get('content-length', 0)) + with open(tmp_path, "wb") as f: + total_length = int(r.headers.get("content-length", 0)) for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_length / 1024) + 1): if chunk: # filter out keep-alive new chunks f.write(chunk) @@ -1203,7 +1274,9 @@ def download_file(url, path, session=None, params=None): return path -def tag_file(filename, artist, title, year=None, genre=None, artwork_url=None, album=None, track_number=None, url=None): +def tag_file( + filename, artist, title, year=None, genre=None, artwork_url=None, album=None, track_number=None, url=None, +): """ Attempt to put ID3 tags on a file. @@ -1232,22 +1305,22 @@ def tag_file(filename, artist, title, year=None, genre=None, artwork_url=None, a audio["tracknumber"] = track_number if genre: audio["genre"] = genre - if url: # saves the tag as WOAR + if url: # saves the tag as WOAR audio["website"] = url audio.save() if artwork_url: - artwork_url = artwork_url.replace('https', 'http') + artwork_url = artwork_url.replace("https", "http") - mime = 'image/jpeg' - if '.jpg' in artwork_url: - mime = 'image/jpeg' - if '.png' in artwork_url: - mime = 'image/png' + mime = "image/jpeg" + if ".jpg" in artwork_url: + mime = "image/jpeg" + if ".png" in artwork_url: + mime = "image/png" - if '-large' in artwork_url: - new_artwork_url = artwork_url.replace('-large', '-t500x500') + if "-large" in artwork_url: + new_artwork_url = artwork_url.replace("-large", "-t500x500") try: image_data = requests.get(new_artwork_url).content except Exception as e: @@ -1259,11 +1332,7 @@ def tag_file(filename, artist, title, year=None, genre=None, artwork_url=None, a audio = MP3(filename, ID3=OldID3) audio.tags.add( APIC( - encoding=3, # 3 is for utf-8 - mime=mime, - type=3, # 3 is for the cover image - desc='Cover', - data=image_data + encoding=3, mime=mime, type=3, desc="Cover", data=image_data, # 3 is for utf-8 # 3 is for the cover image ) ) audio.save() @@ -1271,7 +1340,7 @@ def tag_file(filename, artist, title, year=None, genre=None, artwork_url=None, a # because there is software that doesn't seem to use WOAR we save url tag again as WXXX if url: audio = MP3(filename, ID3=OldID3) - audio.tags.add( WXXX( encoding=3, url=url ) ) + audio.tags.add(WXXX(encoding=3, url=url)) audio.save() return True @@ -1280,11 +1349,12 @@ def tag_file(filename, artist, title, year=None, genre=None, artwork_url=None, a puts(colored.red("Problem tagging file: ") + colored.white("Is this file a WAV?")) return False + def open_files(filenames): """ Call the system 'open' command on a file. """ - command = ['open'] + filenames + command = ["open"] + filenames process = Popen(command, stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() @@ -1296,25 +1366,26 @@ def sanitize_filename(filename): Returns: str: """ - sanitized_filename = re.sub(r'[/\\:*?"<>|]', '-', filename) - sanitized_filename = sanitized_filename.replace('&', 'and') - sanitized_filename = sanitized_filename.replace('"', '') - sanitized_filename = sanitized_filename.replace("'", '') - sanitized_filename = sanitized_filename.replace("/", '') - sanitized_filename = sanitized_filename.replace("\\", '') + sanitized_filename = re.sub(r'[/\\:*?"<>|]', "-", filename) + sanitized_filename = sanitized_filename.replace("&", "and") + sanitized_filename = sanitized_filename.replace('"', "") + sanitized_filename = sanitized_filename.replace("'", "") + sanitized_filename = sanitized_filename.replace("/", "") + sanitized_filename = sanitized_filename.replace("\\", "") # Annoying. - if sanitized_filename[0] == '.': - sanitized_filename = u'dot' + sanitized_filename[1:] + if sanitized_filename[0] == ".": + sanitized_filename = "dot" + sanitized_filename[1:] return sanitized_filename + def puts_safe(text): if sys.platform == "win32": - if sys.version_info < (3,0,0): + if sys.version_info < (3, 0, 0): puts(text) else: - puts(text.encode(sys.stdout.encoding, errors='replace').decode()) + puts(text.encode(sys.stdout.encoding, errors="replace").decode()) else: puts(text) @@ -1323,7 +1394,7 @@ def puts_safe(text): # Main #################################################################### -if __name__ == '__main__': +if __name__ == "__main__": try: sys.exit(main()) except Exception as e: diff --git a/tests/test.py b/tests/test.py index 626bf4b..4542374 100644 --- a/tests/test.py +++ b/tests/test.py @@ -17,6 +17,7 @@ from soundscrape.soundscrape import process_audiomack from soundscrape.soundscrape import process_musicbed + class TestSoundscrape(unittest.TestCase): ## @@ -31,45 +32,81 @@ def test_get_client(self): self.assertTrue(bool(client)) def test_soundcloud(self): - for f in glob.glob('*.mp3'): - os.unlink(f) - - mp3_count = len(glob.glob1('', "*.mp3")) - vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://soundcloud.com/fzpz/revised', 'keep': True} + for f in glob.glob("*.mp3"): + os.unlink(f) + + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 9223372036854775807, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://soundcloud.com/fzpz/revised", + "keep": True, + } process_soundcloud(vargs) - new_mp3_count = len(glob.glob1('', "*.mp3")) + new_mp3_count = len(glob.glob1("", "*.mp3")) self.assertTrue(new_mp3_count > mp3_count) - for f in glob.glob('*.mp3'): - os.unlink(f) + for f in glob.glob("*.mp3"): + os.unlink(f) def test_soundcloud_hard(self): - for f in glob.glob('*.mp3'): - os.unlink(f) - - mp3_count = len(glob.glob1('', "*.mp3")) - vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 1, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'puptheband', 'keep': False} + for f in glob.glob("*.mp3"): + os.unlink(f) + + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 1, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "puptheband", + "keep": False, + } process_soundcloud(vargs) - new_mp3_count = len(glob.glob1('', "*.mp3")) + new_mp3_count = len(glob.glob1("", "*.mp3")) self.assertTrue(new_mp3_count > mp3_count) - self.assertTrue(new_mp3_count == 1) # This used to be 3, but is now 'Not available in United States.' + self.assertTrue(new_mp3_count == 1) # This used to be 3, but is now 'Not available in United States.' - for f in glob.glob('*.mp3'): - os.unlink(f) + for f in glob.glob("*.mp3"): + os.unlink(f) def test_soundcloud_hard_2(self): - for f in glob.glob('*.mp3'): - os.unlink(f) - - mp3_count = len(glob.glob1('', "*.mp3")) - vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 1, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://soundcloud.com/lostdogz/snuggles-chapstick', 'keep': False} + for f in glob.glob("*.mp3"): + os.unlink(f) + + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 1, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://soundcloud.com/lostdogz/snuggles-chapstick", + "keep": False, + } process_soundcloud(vargs) - new_mp3_count = len(glob.glob1('', "*.mp3")) + new_mp3_count = len(glob.glob1("", "*.mp3")) self.assertTrue(new_mp3_count > mp3_count) - self.assertTrue(new_mp3_count == 1) # This used to be 3, but is now 'Not available in United States.' + self.assertTrue(new_mp3_count == 1) # This used to be 3, but is now 'Not available in United States.' - for f in glob.glob('*.mp3'): - os.unlink(f) + for f in glob.glob("*.mp3"): + os.unlink(f) # The test URL for this is no longer a WAV. Need a new testcase. # @@ -88,30 +125,52 @@ def test_soundcloud_hard_2(self): # os.unlink(f) def test_bandcamp(self): - for f in glob.glob('*.mp3'): - os.unlink(f) - - mp3_count = len(glob.glob1('', "*.mp3")) - vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://atenrays.bandcamp.com/track/who-u-think'} + for f in glob.glob("*.mp3"): + os.unlink(f) + + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 9223372036854775807, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://atenrays.bandcamp.com/track/who-u-think", + } process_bandcamp(vargs) - new_mp3_count = len(glob.glob1('', "*.mp3")) + new_mp3_count = len(glob.glob1("", "*.mp3")) self.assertTrue(new_mp3_count > mp3_count) - for f in glob.glob('*.mp3'): - os.unlink(f) + for f in glob.glob("*.mp3"): + os.unlink(f) def test_bandcamp_slashes(self): - for f in glob.glob('*.mp3'): - os.unlink(f) - - mp3_count = len(glob.glob1('', "*.mp3")) - vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://defill.bandcamp.com/track/amnesia-chamber-harvest-skit'} + for f in glob.glob("*.mp3"): + os.unlink(f) + + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 9223372036854775807, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://defill.bandcamp.com/track/amnesia-chamber-harvest-skit", + } process_bandcamp(vargs) - new_mp3_count = len(glob.glob1('', "*.mp3")) + new_mp3_count = len(glob.glob1("", "*.mp3")) self.assertTrue(new_mp3_count > mp3_count) - for f in glob.glob('*.mp3'): - os.unlink(f) + for f in glob.glob("*.mp3"): + os.unlink(f) # def test_musicbed(self): # for f in glob.glob('*.mp3'): @@ -131,26 +190,26 @@ def test_mixcloud(self): MixCloud is being blocked from Travis, interestingly. """ - for f in glob.glob('*.mp3'): - os.unlink(f) + for f in glob.glob("*.mp3"): + os.unlink(f) - for f in glob.glob('*.m4a'): - os.unlink(f) + for f in glob.glob("*.m4a"): + os.unlink(f) # shortest mix I could find that was still semi tolerable - #mp3_count = len(glob.glob1('', "*.mp3")) - #m4a_count = len(glob.glob1('', "*.m4a")) - #vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://www.mixcloud.com/Bobby_T_FS15/coffee-cigarettes-saturday-morning-hip-hop-fix/'} - #process_mixcloud(vargs) - #new_mp3_count = len(glob.glob1('', "*.mp3")) - #new_m4a_count = len(glob.glob1('', "*.m4a")) - #self.assertTrue((new_mp3_count > mp3_count) or (new_m4a_count > m4a_count)) + # mp3_count = len(glob.glob1('', "*.mp3")) + # m4a_count = len(glob.glob1('', "*.m4a")) + # vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://www.mixcloud.com/Bobby_T_FS15/coffee-cigarettes-saturday-morning-hip-hop-fix/'} + # process_mixcloud(vargs) + # new_mp3_count = len(glob.glob1('', "*.mp3")) + # new_m4a_count = len(glob.glob1('', "*.m4a")) + # self.assertTrue((new_mp3_count > mp3_count) or (new_m4a_count > m4a_count)) - for f in glob.glob('*.mp3'): - os.unlink(f) + for f in glob.glob("*.mp3"): + os.unlink(f) - for f in glob.glob('*.m4a'): - os.unlink(f) + for f in glob.glob("*.m4a"): + os.unlink(f) # def test_audiomack(self): # for f in glob.glob('*.mp3'): @@ -165,5 +224,6 @@ def test_mixcloud(self): # for f in glob.glob('*.mp3'): # os.unlink(f) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() From 4e7e2480cc95ee2ef04a46557ce5b199ccff6274 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Wed, 13 May 2020 21:04:57 -0700 Subject: [PATCH 07/69] Fixing build status to move away from Travis-CI. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 139d58a..d15257c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ![SoundScrape!](http://i.imgur.com/nHAt2ow.png) -SoundScrape [![Build Status](https://travis-ci.org/Miserlou/SoundScrape.svg)](https://travis-ci.org/Miserlou/SoundScrape) [![Python 2](https://img.shields.io/badge/Python-2-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![Python 3](https://img.shields.io/badge/Python-3-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![PyPI](https://img.shields.io/pypi/v/soundscrape.svg)](https://pypi.python.org/pypi/SoundScrape) +SoundScrape [![Build Status](https://github.com/SimplicityGuy/SoundScrape/workflows/SoundScrape%20CI/badge.svg)](https://github.com/SimplicityGuy/SoundScrape/actions) [![Python 2](https://img.shields.io/badge/Python-2-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![Python 3](https://img.shields.io/badge/Python-3-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![PyPI](https://img.shields.io/pypi/v/soundscrape.svg)](https://pypi.python.org/pypi/SoundScrape) ============== **SoundScrape** makes it super easy to download artists from SoundCloud (and Bandcamp and MixCloud) - even those which don't have download links! It automatically creates ID3 tags as well (including album art), which is handy. From 59c6bec373b9f8385cdf38205fb0ec6aeeedb6e2 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Wed, 13 May 2020 21:28:28 -0700 Subject: [PATCH 08/69] Removing support for Python 3.5 and earlier. --- setup.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/setup.py b/setup.py index 1c77f2a..49c0fe3 100644 --- a/setup.py +++ b/setup.py @@ -1,17 +1,8 @@ import os -import setuptools import soundscrape -import sys from setuptools import setup -# To support 2/3 installation -setup_version = int(setuptools.__version__.split(".")[0]) -if setup_version < 18: - print("Please upgrade your setuptools to install SoundScrape: ") - print("pip install -U pip wheel setuptools") - quit() - # Set external files try: from pypandoc import convert @@ -31,7 +22,6 @@ version=soundscrape.__version__, packages=["soundscrape"], install_requires=required, - extras_require={':python_version < "3.0"': ["wsgiref>=0.1.2",],}, include_package_data=True, license="MIT License", description="Scrape an artist from SoundCloud", @@ -45,10 +35,9 @@ "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3.3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", "Topic :: Internet :: WWW/HTTP", "Topic :: Internet :: WWW/HTTP :: Dynamic Content", ], From e61db561cc0d1c936799705c97044af3c52fcc3a Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Wed, 13 May 2020 21:28:54 -0700 Subject: [PATCH 09/69] Fixing workflow to not have version specific formatting. --- .github/workflows/main.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c4accfd..e6f122a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,13 +11,13 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3{0}6', '3{0}7', '3{0}8'] + python-version: [3.6, 3.7, 3.8] steps: - uses: actions/checkout@v2 - - name: Set up Python ${{ format(matrix.python-version, '.') }} + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: - python-version: ${{ format(matrix.python-version, '.') }} + python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip @@ -25,10 +25,10 @@ jobs: if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Black code formatting run: | - black --check --line-length 127 --target-version py${{ format(matrix.python-version, '') }} . + black --check --line-length 127 . - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 . --count --select=E9,F63,F7,F82 --ignore=E203,E231 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --ignore=E203,E231 --statistics From 7b05430da33d652f3594063f676b4997f6d2151d Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Wed, 13 May 2020 21:29:22 -0700 Subject: [PATCH 10/69] Fixing a few flake8 issues. --- soundscrape/soundscrape.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 5333975..a41659b 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -328,7 +328,7 @@ def download_track( hard_track_url = get_hard_track_url(track["id"]) # We have no info on this track whatsoever. - if not "title" in track: + if "title" not in track: return None if not keep_previews: @@ -1323,7 +1323,7 @@ def tag_file( new_artwork_url = artwork_url.replace("-large", "-t500x500") try: image_data = requests.get(new_artwork_url).content - except Exception as e: + except: # No very large image available. image_data = requests.get(artwork_url).content else: @@ -1345,7 +1345,7 @@ def tag_file( return True - except Exception as e: + except: puts(colored.red("Problem tagging file: ") + colored.white("Is this file a WAV?")) return False From 9e39ecc3d4d2cbbfdecd9f1bd554a56665d989cb Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Wed, 13 May 2020 21:29:36 -0700 Subject: [PATCH 11/69] Version bump. --- soundscrape/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/soundscrape/__init__.py b/soundscrape/__init__.py index e89b4cf..98195c0 100644 --- a/soundscrape/__init__.py +++ b/soundscrape/__init__.py @@ -1 +1 @@ -__version__ = "0.30.2" +__version__ = "0.31" From 59ac4ed03a6aeecaf006747f29388d3ff69b19c9 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Wed, 13 May 2020 21:31:28 -0700 Subject: [PATCH 12/69] Remove hive.co support since it was never completed. --- soundscrape/soundscrape.py | 102 ------------------------------------- 1 file changed, 102 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index a41659b..658ac37 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -63,9 +63,6 @@ def main(): parser.add_argument( "-a", "--audiomack", action="store_true", help="Use if downloading from Audiomack rather than SoundCloud", ) - parser.add_argument( - "-c", "--hive", action="store_true", help="Use if downloading from Hive.co rather than SoundCloud", - ) parser.add_argument("-l", "--likes", action="store_true", help="Download all of a user's Likes.") parser.add_argument( "-L", "--login", type=str, default="soundscrape123@mailinator.com", help="Set login", @@ -123,8 +120,6 @@ def main(): process_mixcloud(vargs) elif "audiomack.com" in artist_url or vargs["audiomack"]: process_audiomack(vargs) - elif "hive.co" in artist_url or vargs["hive"]: - process_hive(vargs) elif "musicbed.com" in artist_url: process_musicbed(vargs) else: @@ -959,103 +954,6 @@ def get_audiomack_data(url): return data -#################################################################### -# Hive.co -#################################################################### - - -def process_hive(vargs): - """ - Main Hive.co path. - """ - - artist_url = vargs["artist_url"] - - if "hive.co" in artist_url: - mc_url = artist_url - else: - mc_url = "https://www.hive.co/downloads/download/" + artist_url - - filenames = scrape_hive_url(mc_url, num_tracks=vargs["num_tracks"], folders=vargs["folders"], custom_path=vargs["path"],) - - if vargs["open"]: - open_files(filenames) - - return - - -def scrape_hive_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=""): - """ - Scrape a Hive.co download page. - - Returns: - list: filenames to open - - """ - - try: - data = get_hive_data(mc_url) - except Exception as e: - puts_safe(colored.red("Problem downloading ") + mc_url) - print(e) - - filenames = [] - - # track_artist = sanitize_filename(data['artist']) - # track_title = sanitize_filename(data['title']) - # track_filename = track_artist + ' - ' + track_title + '.mp3' - - # if folders: - # track_artist_path = join(custom_path, track_artist) - # if not exists(track_artist_path): - # mkdir(track_artist_path) - # track_filename = join(track_artist_path, track_filename) - # if exists(track_filename): - # puts_safe(colored.yellow("Skipping") + colored.white(': ' + data['title'] + " - it already exists!")) - # return [] - - # puts_safe(colored.green("Downloading") + colored.white(': ' + data['artist'] + " - " + data['title'])) - # download_file(data['mp3_url'], track_filename) - # tag_file(track_filename, - # artist=data['artist'], - # title=data['title'], - # year=data['year'], - # genre=None, - # artwork_url=data['artwork_url']) - # filenames.append(track_filename) - - return filenames - - -def get_hive_data(url): - """ - - Scrapes a Mixcloud page for a track's important information. - - Returns a dict of data. - - """ - - data = {} - request = requests.get(url) - - # import pdb - # pdb.set_trace() - - # mp3_url = request.text.split('class="player-icon download-song" title="Download" href="')[1].split('"')[0] - # artist = request.text.split('<span class="artist">')[1].split('</span>')[0].strip() - # title = request.text.split('<span class="artist">')[1].split('</span>')[1].split('</h1>')[0].strip() - # artwork_url = request.text.split('<a class="lightbox-trigger" href="')[1].split('" data')[0].strip() - - # data['mp3_url'] = mp3_url - # data['title'] = title - # data['artist'] = artist - # data['artwork_url'] = artwork_url - # data['year'] = None - - return data - - #################################################################### # MusicBed #################################################################### From be2ff5926ec76a10f0bf38008428b618a74e1711 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Wed, 13 May 2020 21:51:26 -0700 Subject: [PATCH 13/69] Fixing up a bunch of flake8 violations. --- soundscrape/soundscrape.py | 99 +++++++++++--------------------------- tests/test.py | 15 ------ 2 files changed, 29 insertions(+), 85 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 658ac37..a7dd539 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -37,9 +37,7 @@ def main(): Main function. Converts arguments to Python and processes accordingly. - """ - # Hack related to #58 if sys.platform == "win32": os.system("chcp 65001") @@ -132,10 +130,7 @@ def main(): def process_soundcloud(vargs): - """ - Main SoundCloud path. - """ - + """Main SoundCloud path.""" artist_url = vargs["artist_url"] track_permalink = vargs["track"] keep_previews = vargs["keep"] @@ -306,9 +301,7 @@ def process_soundcloud(vargs): def get_client(): - """ - Return a new SoundCloud Client object. - """ + """Return a new SoundCloud Client object.""" client = soundcloud.Client(client_id=CLIENT_ID) return client @@ -316,10 +309,7 @@ def get_client(): def download_track( track, album_name="", keep_previews=False, folders=False, filenames=[], custom_path="", ): - """ - Given a track, force scrape it. - """ - + """Given a track, force scrape it.""" hard_track_url = get_hard_track_url(track["id"]) # We have no info on this track whatsoever. @@ -382,11 +372,7 @@ def download_track( def download_tracks( client, tracks, num_tracks=sys.maxsize, downloadable=False, folders=False, custom_path="", id3_extras={}, ): - """ - Given a list of tracks, iteratively download all of them. - - """ - + """Given a list of tracks, iteratively download all of them.""" filenames = [] for i, track in enumerate(tracks): @@ -424,7 +410,7 @@ def download_tracks( t_track["stream_url"] = response["http_mp3_128_url"] track = t_track - except Exception as e: + except: puts_safe(colored.white(track.title) + colored.red(" is not downloadable.")) continue @@ -494,7 +480,6 @@ def get_soundcloud_data(url): dict: of audio data """ - data = {} request = requests.get(url) @@ -508,10 +493,7 @@ def get_soundcloud_data(url): def get_soundcloud_api2_data(artist_id): - """ - Scrape the new API. Returns the parsed JSON response. - """ - + """Scrape the new API. Returns the parsed JSON response.""" v2_url = "https://api-v2.soundcloud.com/stream/users/%s?limit=500&client_id=%s&app_version=%s" % ( artist_id, AGGRESSIVE_CLIENT_ID, @@ -524,10 +506,7 @@ def get_soundcloud_api2_data(artist_id): def get_soundcloud_api_playlist_data(playlist_id): - """ - Scrape the new API. Returns the parsed JSON response. - """ - + """Scrape the new API. Returns the parsed JSON response.""" url = ( "https://api.soundcloud.com/playlists/%s?representation=full&client_id=02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea&app_version=1467724310" % (playlist_id) @@ -539,10 +518,7 @@ def get_soundcloud_api_playlist_data(playlist_id): def get_hard_track_url(item_id): - """ - Hard-scrapes a track. - """ - + """Hard-scrapes a track.""" streams_url = "https://api.soundcloud.com/i1/tracks/%s/streams/?client_id=%s&app_version=%s" % ( item_id, AGGRESSIVE_CLIENT_ID, @@ -564,10 +540,7 @@ def get_hard_track_url(item_id): def process_bandcamp(vargs): - """ - Main BandCamp path. - """ - + """Main BandCamp path.""" artist_url = vargs["artist_url"] if "bandcamp.com" in artist_url or ("://" in artist_url and vargs["bandcamp"]): @@ -603,8 +576,8 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path= Returns: list: filenames to open - """ + """ filenames = [] album_data = get_bandcamp_metadata(url) @@ -629,10 +602,8 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path= mkdir(directory) for i, track in enumerate(album_data["trackinfo"]): - if i > num_tracks - 1: continue - try: track_name = track["title"] if track["track_num"]: @@ -688,6 +659,7 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path= def get_bandcamp_metadata(url): """ Read information from the Bandcamp JavaScript object. + The method may return a list of URLs (indicating this is probably a "main" page which links to one or more albums), or a JSON if we can already parse album/track info from the given url. The JSON is "sloppy". The native python JSON parser often can't deal, so we use the more tolerant demjson instead. @@ -702,7 +674,7 @@ def get_bandcamp_metadata(url): output = demjson.decode(sloppy_json) # if the JSON parser failed, we should consider it's a "/music" page, # so we generate a list of albums/tracks and return it immediately - except Exception as e: + except: regex_all_albums = r'<a href="(/(?:album|track)/[^>]+)">' all_albums = re.findall(regex_all_albums, request.text, re.MULTILINE) album_url_list = list() @@ -743,10 +715,7 @@ def get_bandcamp_metadata(url): def process_mixcloud(vargs): - """ - Main MixCloud path. - """ - + """Main MixCloud path.""" artist_url = vargs["artist_url"] if "mixcloud.com" in artist_url: @@ -766,11 +735,12 @@ def process_mixcloud(vargs): def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=""): """ + Pull out artist and track info from a MixCloud URL. + Returns: list: filenames to open """ - try: data = get_mixcloud_data(mc_url) except Exception as e: @@ -822,11 +792,10 @@ def get_mixcloud_data(url): dict: containing audio data """ - data = {} request = requests.get(url) preview_mp3_url = request.text.split('m-preview="')[1].split('" m-preview-light')[0] - song_uuid = request.text.split('m-preview="')[1].split('" m-preview-light')[0].split("previews/")[1].split(".mp3")[0] + song_uuid = preview_mp3_url.split("previews/")[1].split(".mp3")[0] # Fish for the m4a.. for server in range(1, 23): @@ -837,7 +806,7 @@ def get_mixcloud_data(url): if "?" in mp3_url: mp3_url = mp3_url.split("?")[0] break - except Exception as e: + except: continue full_title = request.text.split("<title>")[1].split(" | Mixcloud")[0] @@ -862,10 +831,7 @@ def get_mixcloud_data(url): def process_audiomack(vargs): - """ - Main Audiomack path. - """ - + """Main Audiomack path.""" artist_url = vargs["artist_url"] if "audiomack.com" in artist_url: @@ -885,11 +851,12 @@ def process_audiomack(vargs): def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=""): """ + Pull out artist and track info from a Audiomack URL. + Returns: list: filenames to open """ - try: data = get_audiomack_data(mc_url) except Exception as e: @@ -930,13 +897,12 @@ def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_p def get_audiomack_data(url): """ - Scrapes a Mixcloud page for a track's important information. + Scrapes a Auidomack page for a track's important information. Returns: dict: containing audio data """ - data = {} request = requests.get(url) @@ -960,11 +926,7 @@ def get_audiomack_data(url): def process_musicbed(vargs): - """ - Main MusicBed path. - """ - - # let's validate given MusicBed url + """Main MusicBed path.""" validated = False if vargs["artist_url"].startswith("https://www.musicbed.com/"): splitted = vargs["artist_url"][len("https://www.musicbed.com/") :].split("/") @@ -995,7 +957,8 @@ def process_musicbed(vargs): def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=False, custom_path=""): """ - Scrapes provided MusicBed url. + Scrapes provided MusicBed URL. + Uses requests' Session object in order to store cookies. Requires login and password information. If provided url is of pattern 'https://www.musicbed.com/artists/<string>/<number>' - a number of albums will be downloaded. @@ -1005,8 +968,8 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa Returns: list: filenames to open - """ + """ session = requests.Session() response = session.get(url) @@ -1144,10 +1107,7 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa def download_file(url, path, session=None, params=None): - """ - Download an individual file. - """ - + """Download an individual file.""" if url[0:2] == "//": url = "https://" + url[2:] @@ -1188,8 +1148,8 @@ def tag_file( track_number (str): filename (str): url (str): - """ + """ try: audio = EasyMP3(filename) audio.tags = None @@ -1249,9 +1209,7 @@ def tag_file( def open_files(filenames): - """ - Call the system 'open' command on a file. - """ + """Call the system 'open' command on a file.""" command = ["open"] + filenames process = Popen(command, stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() @@ -1263,6 +1221,7 @@ def sanitize_filename(filename): Returns: str: + """ sanitized_filename = re.sub(r'[/\\:*?"<>|]', "-", filename) sanitized_filename = sanitized_filename.replace("&", "and") diff --git a/tests/test.py b/tests/test.py index 4542374..5726a0d 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,21 +1,10 @@ import glob import os -import re -import string -import sys import unittest -import nose -from nose import case -from nose.pyversion import unbound_method -from nose import util - from soundscrape.soundscrape import get_client from soundscrape.soundscrape import process_soundcloud from soundscrape.soundscrape import process_bandcamp -from soundscrape.soundscrape import process_mixcloud -from soundscrape.soundscrape import process_audiomack -from soundscrape.soundscrape import process_musicbed class TestSoundscrape(unittest.TestCase): @@ -186,10 +175,6 @@ def test_bandcamp_slashes(self): # os.unlink(f) def test_mixcloud(self): - """ - MixCloud is being blocked from Travis, interestingly. - """ - for f in glob.glob("*.mp3"): os.unlink(f) From 0885031484ce2c544ab1b0feaea92e59826ef7eb Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Wed, 13 May 2020 21:52:46 -0700 Subject: [PATCH 14/69] Removing status for Python 2.7. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d15257c..9ea98f2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ![SoundScrape!](http://i.imgur.com/nHAt2ow.png) -SoundScrape [![Build Status](https://github.com/SimplicityGuy/SoundScrape/workflows/SoundScrape%20CI/badge.svg)](https://github.com/SimplicityGuy/SoundScrape/actions) [![Python 2](https://img.shields.io/badge/Python-2-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![Python 3](https://img.shields.io/badge/Python-3-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![PyPI](https://img.shields.io/pypi/v/soundscrape.svg)](https://pypi.python.org/pypi/SoundScrape) +SoundScrape [![Build Status](https://github.com/SimplicityGuy/SoundScrape/workflows/SoundScrape%20CI/badge.svg)](https://github.com/SimplicityGuy/SoundScrape/actions) [![Python 3](https://img.shields.io/badge/Python-3-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![PyPI](https://img.shields.io/pypi/v/soundscrape.svg)](https://pypi.python.org/pypi/SoundScrape) ============== **SoundScrape** makes it super easy to download artists from SoundCloud (and Bandcamp and MixCloud) - even those which don't have download links! It automatically creates ID3 tags as well (including album art), which is handy. From c050b46537f7b5c288fbc7a248679a948644f09f Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Wed, 13 May 2020 21:54:07 -0700 Subject: [PATCH 15/69] Removing an ignore since we're selecting in the rules we want at that step. --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e6f122a..d9ddcac 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -29,6 +29,6 @@ jobs: - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --ignore=E203,E231 --show-source --statistics + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --ignore=E203,E231 --statistics From be6f13e5e661acf52e285ea29c0a67e035d582f5 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Fri, 15 May 2020 18:33:51 -0700 Subject: [PATCH 16/69] Removed broken code and identified a path forward for strange SoundCloud behavior. --- soundscrape/soundscrape.py | 76 +++++++++++++------------------------- 1 file changed, 26 insertions(+), 50 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index a7dd539..5fe0979 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -138,8 +138,10 @@ def process_soundcloud(vargs): id3_extras = {} one_track = False + num_tracks = 1 likes = False - client = get_client() + client = soundcloud.Client(client_id=CLIENT_ID) + if "soundcloud" not in artist_url.lower(): if vargs["group"]: artist_url = "https://soundcloud.com/groups/" + artist_url.lower() @@ -155,9 +157,7 @@ def process_soundcloud(vargs): artist_url = artist_url[0 : artist_url.find("/likes")] likes = True - if one_track: - num_tracks = 1 - else: + if not one_track: num_tracks = vargs["num_tracks"] try: @@ -185,9 +185,8 @@ def process_soundcloud(vargs): else: resolved = client.get("/resolve", url=artist_url, limit=200) - except Exception as e: # HTTPError? - - # SoundScrape is trying to prevent us from downloading this. + except Exception as e: + # SoundCloud is trying to prevent us from downloading this. # We're going to have to stop trusting the API/client and # do all our own scraping. Boo. @@ -230,18 +229,13 @@ def process_soundcloud(vargs): filenames.append(filename) else: - aggressive = False # This is is likely a 'likes' page. if not hasattr(resolved, "kind"): tracks = resolved else: - if resolved.kind == "artist": - artist = resolved - artist_id = str(artist.id) - tracks = client.get("/users/" + artist_id + "/tracks", limit=200) - elif resolved.kind == "playlist": + if resolved.kind == "playlist": id3_extras["album"] = resolved.title if resolved.tracks != []: tracks = resolved.tracks @@ -253,7 +247,6 @@ def process_soundcloud(vargs): download_track( track, resolved.title, keep_previews, folders, custom_path=vargs["path"], ) - elif resolved.kind == "track": tracks = [resolved] elif resolved.kind == "group": @@ -264,12 +257,21 @@ def process_soundcloud(vargs): artist = resolved artist_id = str(artist.id) tracks = client.get("/users/" + artist_id + "/tracks", limit=200) - if tracks == [] and artist.track_count > 0: + + # SoundCloud has a nasty bug where some artists don't have any tracks returned using + # the previous call. There are a number of reports of this since late 2019 on StackOverflow. + # It seems that the common pattern is that if an artist has any tracks marked as private, + # e.g. the track is not downloadable (only streamable), then SoundCloud won't return any + # tracks for that artist. The way around this is to refer to the artist's RSS feed. This + # is in the form of: + # http://feeds.soundcloud.com/users/soundcloud:users:<artist_id>/sounds.rss + + if len(tracks) == 0 and artist.track_count > 0: aggressive = True filenames = [] # this might be buggy - data = get_soundcloud_api2_data(artist_id) + #data = get_soundcloud_api2_data(artist_id) for track in data["collection"]: @@ -300,12 +302,6 @@ def process_soundcloud(vargs): open_files(filenames) -def get_client(): - """Return a new SoundCloud Client object.""" - client = soundcloud.Client(client_id=CLIENT_ID) - return client - - def download_track( track, album_name="", keep_previews=False, folders=False, filenames=[], custom_path="", ): @@ -376,13 +372,9 @@ def download_tracks( filenames = [] for i, track in enumerate(tracks): - - # "Track" and "Resource" objects are actually different, - # even though they're the same. + # "Track" and "Resource" objects are actually different, even though they're the same. if isinstance(track, soundcloud.resource.Resource): - try: - t_track = {} t_track["downloadable"] = track.downloadable t_track["streamable"] = track.streamable @@ -481,9 +473,7 @@ def get_soundcloud_data(url): """ data = {} - request = requests.get(url) - title_tag = request.text.split("<title>")[1].split("</title")[0] data["title"] = title_tag.split(" by ")[0].strip() data["artist"] = title_tag.split(" by ")[1].split("|")[0].strip() @@ -492,19 +482,6 @@ def get_soundcloud_data(url): return data -def get_soundcloud_api2_data(artist_id): - """Scrape the new API. Returns the parsed JSON response.""" - v2_url = "https://api-v2.soundcloud.com/stream/users/%s?limit=500&client_id=%s&app_version=%s" % ( - artist_id, - AGGRESSIVE_CLIENT_ID, - APP_VERSION, - ) - response = requests.get(v2_url) - parsed = response.json() - - return parsed - - def get_soundcloud_api_playlist_data(playlist_id): """Scrape the new API. Returns the parsed JSON response.""" url = ( @@ -512,9 +489,10 @@ def get_soundcloud_api_playlist_data(playlist_id): % (playlist_id) ) response = requests.get(url) - parsed = response.json() + if response.status_code != 200: + return None - return parsed + return response.json() def get_hard_track_url(item_id): @@ -527,12 +505,11 @@ def get_hard_track_url(item_id): response = requests.get(streams_url) json_response = response.json() - if response.status_code == 200: - hard_track_url = json_response["http_mp3_128_url"] - return hard_track_url - else: + if response.status_code != 200: return None + return json_response["http_mp3_128_url"] + #################################################################### # Bandcamp @@ -626,7 +603,7 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path= continue if not track["file"]: - puts_safe(colored.yellow("Track unavailble for scraping: ") + colored.white(track_name)) + puts_safe(colored.yellow("Track unavailable for scraping: ") + colored.white(track_name)) continue puts_safe(colored.green("Downloading") + colored.white(": " + track_name)) @@ -1168,7 +1145,6 @@ def tag_file( audio.save() if artwork_url: - artwork_url = artwork_url.replace("https", "http") mime = "image/jpeg" From a71424841c64e42c5704d27269b44a77b9333f56 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 16 May 2020 16:43:21 -0700 Subject: [PATCH 17/69] Changing how track downloading works in the case where no tracks are returned from the SoundClound API. --- requirements.txt | 1 + soundscrape/soundscrape.py | 155 ++++++++++++++++++++----------------- 2 files changed, 85 insertions(+), 71 deletions(-) diff --git a/requirements.txt b/requirements.txt index cd4e07e..6c4d0de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ args>=0.1.0 +atoma>=0.0.17 clint>=0.3.2 demjson>=2.2.2 fudge>=1.0.3 diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 5fe0979..0f8dfdc 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import argparse +import atoma import demjson import os import re @@ -163,16 +164,13 @@ def process_soundcloud(vargs): try: if one_track: resolved = client.get("/resolve", url=track_url, limit=200) - elif likes: userId = str(client.get("/resolve", url=artist_url).id) - resolved = client.get("/users/" + userId + "/favorites", limit=200, linked_partitioning=1) next_href = False if hasattr(resolved, "next_href"): next_href = resolved.next_href while next_href: - resolved2 = requests.get(next_href).json() if "next_href" in resolved2: next_href = resolved2["next_href"] @@ -181,14 +179,12 @@ def process_soundcloud(vargs): resolved2 = soundcloud.resource.ResourceList(resolved2["collection"]) resolved.collection.extend(resolved2) resolved = resolved.collection - else: resolved = client.get("/resolve", url=artist_url, limit=200) except Exception as e: # SoundCloud is trying to prevent us from downloading this. - # We're going to have to stop trusting the API/client and - # do all our own scraping. Boo. + # Instead of utilizing the API/client we will do all our own scraping. Boo. if "404 Client Error" in str(e): puts(colored.red("Problem downloading [404]: ") + colored.white("Item Not Found")) @@ -228,80 +224,97 @@ def process_soundcloud(vargs): filenames.append(filename) + # This is is likely a 'likes' page. + if not hasattr(resolved, "kind"): + tracks = resolved else: - aggressive = False - - # This is is likely a 'likes' page. - if not hasattr(resolved, "kind"): - tracks = resolved - else: - if resolved.kind == "playlist": - id3_extras["album"] = resolved.title - if resolved.tracks != []: - tracks = resolved.tracks - else: - tracks = get_soundcloud_api_playlist_data(resolved.id)["tracks"] - tracks = tracks[:num_tracks] - aggressive = True - for track in tracks: - download_track( - track, resolved.title, keep_previews, folders, custom_path=vargs["path"], - ) - elif resolved.kind == "track": - tracks = [resolved] - elif resolved.kind == "group": - group = resolved - group_id = str(group.id) - tracks = client.get("/groups/" + group_id + "/tracks", limit=200) + if resolved.kind == "playlist": + id3_extras["album"] = resolved.title + if resolved.tracks != []: + tracks = resolved.tracks else: - artist = resolved - artist_id = str(artist.id) - tracks = client.get("/users/" + artist_id + "/tracks", limit=200) - - # SoundCloud has a nasty bug where some artists don't have any tracks returned using - # the previous call. There are a number of reports of this since late 2019 on StackOverflow. - # It seems that the common pattern is that if an artist has any tracks marked as private, - # e.g. the track is not downloadable (only streamable), then SoundCloud won't return any - # tracks for that artist. The way around this is to refer to the artist's RSS feed. This - # is in the form of: - # http://feeds.soundcloud.com/users/soundcloud:users:<artist_id>/sounds.rss - - if len(tracks) == 0 and artist.track_count > 0: - aggressive = True - filenames = [] - - # this might be buggy - #data = get_soundcloud_api2_data(artist_id) - - for track in data["collection"]: - - if len(filenames) >= num_tracks: - break - - if track["type"] == "playlist": - track["playlist"]["tracks"] = track["playlist"]["tracks"][:num_tracks] - for playlist_track in track["playlist"]["tracks"]: - album_name = track["playlist"]["title"] - filename = download_track( - playlist_track, album_name, keep_previews, folders, filenames, custom_path=vargs["path"], - ) - if filename: - filenames.append(filename) - else: - d_track = track["track"] - filename = download_track(d_track, custom_path=vargs["path"]) - if filename: - filenames.append(filename) - - if not aggressive: + tracks = get_soundcloud_api_playlist_data(resolved.id)["tracks"] + tracks = tracks[:num_tracks] + for track in tracks: + download_track( + track, resolved.title, keep_previews, folders, custom_path=vargs["path"], + ) + return + elif resolved.kind == "track": + tracks = [resolved] + elif resolved.kind == "group": + group_id = str(resolved.id) + tracks = client.get("/groups/" + group_id + "/tracks", limit=200) + else: + artist_id = str(resolved.id) + tracks = client.get("/users/" + artist_id + "/tracks", limit=200) + + # SoundCloud has a unfortunate bug where some artists don't have any tracks returned using: + # client.get("/users/" + artist_id + "/tracks", limit=200) + # There are a number of reports of this issue since late 2019 on StackOverflow. + # It seems that the common pattern is that if an artist has any tracks marked as private, + # e.g. the track is not downloadable (only streamable), then SoundCloud won't return any + # tracks for that artist. The way around this is to refer to the artist's RSS feed. This + # is in the form of: + # http://feeds.soundcloud.com/users/soundcloud:users:<artist_id>/sounds.rss + + if len(tracks) == 0 and resolved.track_count > 0: + artist = str(resolved.full_name) + artist_id = str(resolved.id) + filenames = [] + + response = requests.get(f"http://feeds.soundcloud.com/users/soundcloud:users:{artist_id}/sounds.rss") + if response.status_code != 200: + # TODO: add error reporting and handling + return + + feed = atoma.parse_rss_bytes(response.content) + for feed_item in feed.items: + filename = download_track_from_feed(artist, feed.image.url, feed_item, folders, vargs["path"]) + if filename is not None: + filenames.append(filename) + else: filenames = download_tracks( - client, tracks, num_tracks, vargs["downloadable"], vargs["folders"], vargs["path"], id3_extras=id3_extras, + client, tracks, num_tracks, vargs["downloadable"], folders, vargs["path"], id3_extras=id3_extras, ) if vargs["open"]: open_files(filenames) +def download_track_from_feed(artist, artwork, track, folders=False, custom_path=""): + """Given an RSS feed item, download the track.""" + if len(track.enclosures) == 0: + return None + url = track.enclosures[0].url + title = track.title + + filename = sanitize_filename(f"{artist} - {title}.mp3") + + if folders: + name_path = join(custom_path, artist) + if not exists(name_path): + mkdir(name_path) + filename = join(name_path, filename) + else: + filename = join(custom_path, filename) + + if exists(filename): + puts_safe(colored.yellow("Track already downloaded: ") + colored.white(title)) + return None + + puts_safe(colored.green("Scraping") + colored.white(": " + title)) + + filename = download_file(url, filename) + tagged = tag_file(filename, artist=artist, title=title, year=track.pub_date.year, artwork_url=artwork,) + if not tagged: + wav_filename = filename[:-3] + "wav" + os.rename(filename, wav_filename) + filename = wav_filename + + return filename + + def download_track( track, album_name="", keep_previews=False, folders=False, filenames=[], custom_path="", ): From 077ffae632b1cb38ece0c30a71fd33915cdb161b Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 16 May 2020 20:31:34 -0700 Subject: [PATCH 18/69] Upgraded strings to Python 3.6 f-strings. --- soundscrape/soundscrape.py | 179 +++++++++++++------------------------ 1 file changed, 64 insertions(+), 115 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 0f8dfdc..21835cb 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -100,10 +100,7 @@ def main(): if not vargs["artist_url"]: parser.error("Please supply an artist's username or URL!") - if sys.version_info < (3, 0, 0): - vargs["artist_url"] = urllib.quote(vargs["artist_url"][0], safe=":/") - else: - vargs["artist_url"] = urllib.parse.quote(vargs["artist_url"][0], safe=":/") + vargs["artist_url"] = urllib.parse.quote(vargs["artist_url"][0], safe=":/") artist_url = vargs["artist_url"] @@ -132,8 +129,8 @@ def main(): def process_soundcloud(vargs): """Main SoundCloud path.""" - artist_url = vargs["artist_url"] - track_permalink = vargs["track"] + artist_url = vargs["artist_url"].lower() + track_permalink = vargs["track"].lower() keep_previews = vargs["keep"] folders = vargs["folders"] @@ -145,12 +142,12 @@ def process_soundcloud(vargs): if "soundcloud" not in artist_url.lower(): if vargs["group"]: - artist_url = "https://soundcloud.com/groups/" + artist_url.lower() + artist_url = f"https://soundcloud.com/groups/{artist_url}" elif len(track_permalink) > 0: one_track = True - track_url = "https://soundcloud.com/" + artist_url.lower() + "/" + track_permalink.lower() + track_url = f"https://soundcloud.com/{artist_url}/{track_permalink}" else: - artist_url = "https://soundcloud.com/" + artist_url.lower() + artist_url = f"https://soundcloud.com/{artist_url}" if vargs["likes"] or "likes" in artist_url.lower(): likes = True @@ -166,7 +163,7 @@ def process_soundcloud(vargs): resolved = client.get("/resolve", url=track_url, limit=200) elif likes: userId = str(client.get("/resolve", url=artist_url).id) - resolved = client.get("/users/" + userId + "/favorites", limit=200, linked_partitioning=1) + resolved = client.get(f"/users/{userId}/favorites", limit=200, linked_partitioning=1) next_href = False if hasattr(resolved, "next_href"): next_href = resolved.next_href @@ -181,7 +178,6 @@ def process_soundcloud(vargs): resolved = resolved.collection else: resolved = client.get("/resolve", url=artist_url, limit=200) - except Exception as e: # SoundCloud is trying to prevent us from downloading this. # Instead of utilizing the API/client we will do all our own scraping. Boo. @@ -194,11 +190,17 @@ def process_soundcloud(vargs): item_id = message.rsplit("/", 1)[-1].split(".json")[0].split("?client_id")[0] hard_track_url = get_hard_track_url(item_id) - track_data = get_soundcloud_data(artist_url) - puts_safe(colored.green("Scraping") + colored.white(": " + track_data["title"])) + track_data = {} + request = requests.get(hard_track_url) + title_tag = request.text.split("<title>")[1].split("</title")[0] + track_data["title"] = title_tag.split(" by ")[0].strip() + track_data["artist"] = title_tag.split(" by ")[1].split("|")[0].strip() + # TODO: Scrape more data? + + puts_safe(colored.green("Scraping") + colored.white(f": {track_data['title']}")) filenames = [] - filename = sanitize_filename(track_data["artist"] + " - " + track_data["title"] + ".mp3") + filename = sanitize_filename(f"{track_data['artist']} - {track_data['title']}.mp3") if folders: name_path = join(vargs["path"], track_data["artist"]) @@ -213,12 +215,10 @@ def process_soundcloud(vargs): return None filename = download_file(hard_track_url, filename) - tagged = tag_file( - filename, artist=track_data["artist"], title=track_data["title"], year="2018", genre="", album="", artwork_url="", - ) + tagged = tag_file(filename, artist=track_data["artist"], title=track_data["title"]) if not tagged: - wav_filename = filename[:-3] + "wav" + wav_filename = f"{filename[:-3]}wav" os.rename(filename, wav_filename) filename = wav_filename @@ -236,18 +236,16 @@ def process_soundcloud(vargs): tracks = get_soundcloud_api_playlist_data(resolved.id)["tracks"] tracks = tracks[:num_tracks] for track in tracks: - download_track( - track, resolved.title, keep_previews, folders, custom_path=vargs["path"], - ) + download_track(track, resolved.title, keep_previews, folders, custom_path=vargs["path"]) return elif resolved.kind == "track": tracks = [resolved] elif resolved.kind == "group": group_id = str(resolved.id) - tracks = client.get("/groups/" + group_id + "/tracks", limit=200) + tracks = client.get(f"/groups/{group_id}/tracks", limit=200) else: artist_id = str(resolved.id) - tracks = client.get("/users/" + artist_id + "/tracks", limit=200) + tracks = client.get(f"/users/{artist_id}/tracks", limit=200) # SoundCloud has a unfortunate bug where some artists don't have any tracks returned using: # client.get("/users/" + artist_id + "/tracks", limit=200) @@ -303,12 +301,12 @@ def download_track_from_feed(artist, artwork, track, folders=False, custom_path= puts_safe(colored.yellow("Track already downloaded: ") + colored.white(title)) return None - puts_safe(colored.green("Scraping") + colored.white(": " + title)) + puts_safe(colored.green("Scraping") + colored.white(f": {title}")) filename = download_file(url, filename) tagged = tag_file(filename, artist=artist, title=title, year=track.pub_date.year, artwork_url=artwork,) if not tagged: - wav_filename = filename[:-3] + "wav" + wav_filename = f"{filename[:-3]}wav" os.rename(filename, wav_filename) filename = wav_filename @@ -327,7 +325,7 @@ def download_track( if not keep_previews: if track.get("duration", 0) < track.get("full_duration", 0): - puts_safe(colored.yellow("Skipping preview track") + colored.white(": " + track["title"])) + puts_safe(colored.yellow("Skipping preview track") + colored.white(f": {track['title']}")) return None # May not have a "full name" @@ -335,7 +333,7 @@ def download_track( if name == "": name = track["user"]["username"] - filename = sanitize_filename(name + " - " + track["title"] + ".mp3") + filename = sanitize_filename(f"{name} - {track['title']}.mp3") if folders: name_path = join(custom_path, name) @@ -354,10 +352,10 @@ def download_track( return None if hard_track_url: - puts_safe(colored.green("Scraping") + colored.white(": " + track["title"])) + puts_safe(colored.green("Scraping") + colored.white(f": {track['title']}")) else: # Region coded? - puts_safe(colored.yellow("Unable to download") + colored.white(": " + track["title"])) + puts_safe(colored.yellow("Unable to download") + colored.white(f": {track['title']}")) return None filename = download_file(hard_track_url, filename) @@ -371,7 +369,7 @@ def download_track( artwork_url=track["artwork_url"], ) if not tagged: - wav_filename = filename[:-3] + "wav" + wav_filename = f"{filename[:-3]}wav" os.rename(filename, wav_filename) filename = wav_filename @@ -400,17 +398,13 @@ def download_tracks( t_track["stream_url"] = track.download_url else: if downloadable: - puts_safe(colored.red("Skipping") + colored.white(": " + track.title)) + puts_safe(colored.red("Skipping") + colored.white(f": {track.title}")) continue if hasattr(track, "stream_url"): t_track["stream_url"] = track.stream_url else: t_track["direct"] = True - streams_url = "https://api.soundcloud.com/i1/tracks/%s/streams?client_id=%s&app_version=%s" % ( - str(track.id), - AGGRESSIVE_CLIENT_ID, - APP_VERSION, - ) + streams_url = f"https://api.soundcloud.com/i1/tracks/{track.id}/streams?client_id={AGGRESSIVE_CLIENT_ID}&app_version={APP_VERSION}" response = requests.get(streams_url).json() t_track["stream_url"] = response["http_mp3_128_url"] @@ -428,7 +422,7 @@ def download_tracks( else: track_artist = sanitize_filename(track["user"]["username"]) track_title = sanitize_filename(track["title"]) - track_filename = track_artist + " - " + track_title + ".mp3" + track_filename = f"{track_artist} - {track_title}.mp3" if folders: track_artist_path = join(custom_path, track_artist) @@ -442,7 +436,7 @@ def download_tracks( puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_title)) continue - puts_safe(colored.green("Downloading") + colored.white(": " + track["title"])) + puts_safe(colored.green("Downloading") + colored.white(f": {track['title']}")) if track.get("direct", False): location = track["stream_url"] @@ -465,7 +459,7 @@ def download_tracks( ) if not tagged: - wav_filename = filename[:-3] + "wav" + wav_filename = f"{filename[:-3]}wav" os.rename(filename, wav_filename) filename = wav_filename @@ -477,30 +471,9 @@ def download_tracks( return filenames -def get_soundcloud_data(url): - """ - Scrapes a SoundCloud page for a track's important information. - - Returns: - dict: of audio data - - """ - data = {} - request = requests.get(url) - title_tag = request.text.split("<title>")[1].split("</title")[0] - data["title"] = title_tag.split(" by ")[0].strip() - data["artist"] = title_tag.split(" by ")[1].split("|")[0].strip() - # XXX Do more.. - - return data - - def get_soundcloud_api_playlist_data(playlist_id): """Scrape the new API. Returns the parsed JSON response.""" - url = ( - "https://api.soundcloud.com/playlists/%s?representation=full&client_id=02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea&app_version=1467724310" - % (playlist_id) - ) + url = f"https://api.soundcloud.com/playlists/{playlist_id}?representation=full&client_id=02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea&app_version=1467724310" response = requests.get(url) if response.status_code != 200: return None @@ -510,17 +483,12 @@ def get_soundcloud_api_playlist_data(playlist_id): def get_hard_track_url(item_id): """Hard-scrapes a track.""" - streams_url = "https://api.soundcloud.com/i1/tracks/%s/streams/?client_id=%s&app_version=%s" % ( - item_id, - AGGRESSIVE_CLIENT_ID, - APP_VERSION, - ) - response = requests.get(streams_url) - json_response = response.json() - + url = f"https://api.soundcloud.com/i1/tracks/{item_id}/streams/?client_id={AGGRESSIVE_CLIENT_ID}&app_version={APP_VERSION}" + response = requests.get(url) if response.status_code != 200: return None + json_response = response.json() return json_response["http_mp3_128_url"] @@ -536,7 +504,7 @@ def process_bandcamp(vargs): if "bandcamp.com" in artist_url or ("://" in artist_url and vargs["bandcamp"]): bc_url = artist_url else: - bc_url = "https://" + artist_url + ".bandcamp.com/music" + bc_url = f"https://{artist_url}.bandcamp.com/music" filenames = scrape_bandcamp_url( bc_url, num_tracks=vargs["num_tracks"], folders=vargs["folders"], custom_path=vargs["path"], @@ -556,8 +524,6 @@ def process_bandcamp(vargs): if vargs["open"]: open_files(filenames) - return - # Largely borrowed from Ronier's bandcampscrape def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path=""): @@ -583,7 +549,7 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path= if folders: if album_name: - directory = artist + " - " + album_name + directory = f"{artist} - {album_name}" else: directory = artist directory = sanitize_filename(directory) @@ -601,9 +567,9 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path= else: track_number = None if track_number and folders: - track_filename = "%s - %s.mp3" % (track_number, track_name) + track_filename = f"{track_number} - {track_name}.mp3" else: - track_filename = "%s.mp3" % (track_name) + track_filename = f"{track_name}.mp3" track_filename = sanitize_filename(track_filename) if folders: @@ -711,7 +677,7 @@ def process_mixcloud(vargs): if "mixcloud.com" in artist_url: mc_url = artist_url else: - mc_url = "https://mixcloud.com/" + artist_url + mc_url = f"https://mixcloud.com/{artist_url}" filenames = scrape_mixcloud_url( mc_url, num_tracks=vargs["num_tracks"], folders=vargs["folders"], custom_path=vargs["path"], @@ -742,7 +708,7 @@ def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_pa track_artist = sanitize_filename(data["artist"]) track_title = sanitize_filename(data["title"]) - track_filename = track_artist + " - " + track_title + data["mp3_url"][-4:] + track_filename = f"{track_artist} - {track_title}{data['mp3_url'][-4:]}" if folders: track_artist_path = join(custom_path, track_artist) @@ -750,15 +716,12 @@ def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_pa mkdir(track_artist_path) track_filename = join(track_artist_path, track_filename) if exists(track_filename): - puts_safe(colored.yellow("Skipping") + colored.white(": " + data["title"] + " - it already exists!")) + puts_safe(colored.yellow("Skipping") + colored.white(f": {data['title']} - already exists!")) return [] else: track_filename = join(custom_path, track_filename) - puts_safe( - colored.green("Downloading") - + colored.white(": " + data["artist"] + " - " + data["title"] + " (" + track_filename[-4:] + ")") - ) + puts_safe(colored.green("Downloading") + colored.white(f": {data['artist']} - {data['title']} ({track_filename[-4:]})")) download_file(data["mp3_url"], track_filename) if track_filename[-4:] == ".mp3": tag_file( @@ -787,10 +750,10 @@ def get_mixcloud_data(url): preview_mp3_url = request.text.split('m-preview="')[1].split('" m-preview-light')[0] song_uuid = preview_mp3_url.split("previews/")[1].split(".mp3")[0] - # Fish for the m4a.. + # Fish for the m4a. for server in range(1, 23): # Ex: https://stream6.mixcloud.com/c/m4a/64/1/2/0/9/30fe-23aa-40da-9bf3-4bee2fba649d.m4a - mp3_url = "https://stream" + str(server) + ".mixcloud.com/c/m4a/64/" + song_uuid + ".m4a" + mp3_url = f"https://stream{server}.mixcloud.com/c/m4a/64/{song_uuid}.m4a" try: if requests.head(mp3_url).status_code == 200: if "?" in mp3_url: @@ -827,7 +790,7 @@ def process_audiomack(vargs): if "audiomack.com" in artist_url: mc_url = artist_url else: - mc_url = "https://audiomack.com/" + artist_url + mc_url = f"https://audiomack.com/{artist_url}" filenames = scrape_audiomack_url( mc_url, num_tracks=vargs["num_tracks"], folders=vargs["folders"], custom_path=vargs["path"], @@ -836,8 +799,6 @@ def process_audiomack(vargs): if vargs["open"]: open_files(filenames) - return - def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=""): """ @@ -857,7 +818,7 @@ def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_p track_artist = sanitize_filename(data["artist"]) track_title = sanitize_filename(data["title"]) - track_filename = track_artist + " - " + track_title + ".mp3" + track_filename = f"{track_artist} - {track_title}.mp3" if folders: track_artist_path = join(custom_path, track_artist) @@ -865,12 +826,12 @@ def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_p mkdir(track_artist_path) track_filename = join(track_artist_path, track_filename) if exists(track_filename): - puts_safe(colored.yellow("Skipping") + colored.white(": " + data["title"] + " - it already exists!")) + puts_safe(colored.yellow("Skipping") + colored.white(f": {data['title']} - already exists!")) return [] else: track_filename = join(custom_path, track_filename) - puts_safe(colored.green("Downloading") + colored.white(": " + data["artist"] + " - " + data["title"])) + puts_safe(colored.green("Downloading") + colored.white(f": {data['artist']} - {data['title']}")) download_file(data["mp3_url"], track_filename) tag_file( track_filename, @@ -964,11 +925,7 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa response = session.get(url) if response.status_code != 200: - puts( - colored.red( - "scrape_musicbed_url: couldn't open provided url. Status code: " + str(response.status_code) + ". Aborting." - ) - ) + puts(colored.red(f"scrape_musicbed_url: couldn't open provided url. Status code: {response.status_code}. Aborting.")) session.close() return [] @@ -1015,11 +972,7 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa if response.status_code != 200: puts_safe( colored.red( - "scrape_musicbed_url: couldn't open url: " - + each_album_url - + ". Status code: " - + str(response.status_code) - + ". Skipping." + f"scrape_musicbed_url: couldn't open url: {each_album_url}. Status code: {response.status_code}. Skipping." ) ) continue @@ -1053,11 +1006,11 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa filename = join(directory, str(song_count) + " - " + sanitize_filename(each_song["name"]) + ".mp3",) if exists(filename): - puts_safe(colored.yellow("Skipping") + colored.white(": " + each_song["name"] + " - it already exists!")) + puts_safe(colored.yellow("Skipping") + colored.white(f": {each_song['name']} - already exists!")) song_count += 1 continue - puts_safe(colored.green("Downloading") + colored.white(": " + each_song["name"])) + puts_safe(colored.green("Downloading") + colored.white(f": {each_song['name']}")) path = download_file(url, filename, session=session, params=details) # example of genre_string: @@ -1083,7 +1036,7 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa filenames.append(path) song_count += 1 except: - puts_safe(colored.red("Problem downloading ") + colored.white(each_song["name"]) + ". Skipping.") + puts_safe(colored.red("Problem downloading ") + colored.white(f"{each_song['name']}. Skipping.")) song_count += 1 session.close() @@ -1099,10 +1052,10 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa def download_file(url, path, session=None, params=None): """Download an individual file.""" if url[0:2] == "//": - url = "https://" + url[2:] + url = f"https://{url[2:]}" # Use a temporary file so that we don't import incomplete files. - tmp_path = path + ".tmp" + tmp_path = f"{path}.tmp" if session and params: r = session.get(url, params=params, stream=True) @@ -1177,11 +1130,10 @@ def tag_file( image_data = requests.get(artwork_url).content audio = MP3(filename, ID3=OldID3) - audio.tags.add( - APIC( - encoding=3, mime=mime, type=3, desc="Cover", data=image_data, # 3 is for utf-8 # 3 is for the cover image - ) - ) + + # encoding=3 means utf-8 + # type=3 means cover image + audio.tags.add(APIC(encoding=3, mime=mime, type=3, desc="Cover", data=image_data)) audio.save() # because there is software that doesn't seem to use WOAR we save url tag again as WXXX @@ -1221,17 +1173,14 @@ def sanitize_filename(filename): # Annoying. if sanitized_filename[0] == ".": - sanitized_filename = "dot" + sanitized_filename[1:] + sanitized_filename = f"dot{sanitized_filename[1:]}" return sanitized_filename def puts_safe(text): if sys.platform == "win32": - if sys.version_info < (3, 0, 0): - puts(text) - else: - puts(text.encode(sys.stdout.encoding, errors="replace").decode()) + puts(text.encode(sys.stdout.encoding, errors="replace").decode()) else: puts(text) From 8c66da8686bd5b8cf7cfc5078389c210006d5398 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 16 May 2020 20:44:48 -0700 Subject: [PATCH 19/69] Adding VSCode to the ignore spec. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 05ff1f9..bb08a0c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.vscode/ env/ *.DS_Store *.pyc From 5e15ba29a9b03f8ad9f0d3c1b56ab391766659ad Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 16 May 2020 21:40:57 -0700 Subject: [PATCH 20/69] Cleaning up some logic and resultant duplicated code. Clarifying comments. Adding TODOs. --- soundscrape/soundscrape.py | 68 ++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 40 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 21835cb..8e663d7 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -102,21 +102,19 @@ def main(): vargs["artist_url"] = urllib.parse.quote(vargs["artist_url"][0], safe=":/") - artist_url = vargs["artist_url"] - if not exists(vargs["path"]): if not access(dirname(vargs["path"]), W_OK): vargs["path"] = "" else: mkdir(vargs["path"]) - if "bandcamp.com" in artist_url or vargs["bandcamp"]: + if "bandcamp.com" in vargs["artist_url"] or vargs["bandcamp"]: process_bandcamp(vargs) - elif "mixcloud.com" in artist_url or vargs["mixcloud"]: + elif "mixcloud.com" in vargs["artist_url"] or vargs["mixcloud"]: process_mixcloud(vargs) - elif "audiomack.com" in artist_url or vargs["audiomack"]: + elif "audiomack.com" in vargs["artist_url"] or vargs["audiomack"]: process_audiomack(vargs) - elif "musicbed.com" in artist_url: + elif "musicbed.com" in vargs["artist_url"]: process_musicbed(vargs) else: process_soundcloud(vargs) @@ -129,55 +127,46 @@ def main(): def process_soundcloud(vargs): """Main SoundCloud path.""" - artist_url = vargs["artist_url"].lower() + url = vargs["artist_url"].lower() track_permalink = vargs["track"].lower() keep_previews = vargs["keep"] folders = vargs["folders"] + num_tracks = vargs["num_tracks"] id3_extras = {} - one_track = False - num_tracks = 1 likes = False client = soundcloud.Client(client_id=CLIENT_ID) - if "soundcloud" not in artist_url.lower(): + if "soundcloud" not in url.lower(): if vargs["group"]: - artist_url = f"https://soundcloud.com/groups/{artist_url}" + url = f"https://soundcloud.com/groups/{url}" elif len(track_permalink) > 0: - one_track = True - track_url = f"https://soundcloud.com/{artist_url}/{track_permalink}" + url = f"https://soundcloud.com/{url}/{track_permalink}" else: - artist_url = f"https://soundcloud.com/{artist_url}" - if vargs["likes"] or "likes" in artist_url.lower(): + url = f"https://soundcloud.com/{url}" + if vargs["likes"] or "likes" in url.lower(): likes = True - if "likes" in artist_url.lower(): - artist_url = artist_url[0 : artist_url.find("/likes")] + if "likes" in url.lower(): + url = url[0 : url.find("/likes")] likes = True - if not one_track: - num_tracks = vargs["num_tracks"] - try: - if one_track: - resolved = client.get("/resolve", url=track_url, limit=200) - elif likes: - userId = str(client.get("/resolve", url=artist_url).id) - resolved = client.get(f"/users/{userId}/favorites", limit=200, linked_partitioning=1) - next_href = False + resolved = client.get("/resolve", url=url, limit=200) + + if likes: + resolved = client.get(f"/users/{resolved.id}/favorites", limit=200, linked_partitioning=1) + next_href = None if hasattr(resolved, "next_href"): next_href = resolved.next_href - while next_href: + while next_href is not None: resolved2 = requests.get(next_href).json() + next_href = None if "next_href" in resolved2: next_href = resolved2["next_href"] - else: - next_href = False resolved2 = soundcloud.resource.ResourceList(resolved2["collection"]) resolved.collection.extend(resolved2) resolved = resolved.collection - else: - resolved = client.get("/resolve", url=artist_url, limit=200) except Exception as e: # SoundCloud is trying to prevent us from downloading this. # Instead of utilizing the API/client we will do all our own scraping. Boo. @@ -186,6 +175,7 @@ def process_soundcloud(vargs): puts(colored.red("Problem downloading [404]: ") + colored.white("Item Not Found")) return None + # TODO: This exception handler can be handled by download_track(...) with a little refactoring. message = str(e) item_id = message.rsplit("/", 1)[-1].split(".json")[0].split("?client_id")[0] hard_track_url = get_hard_track_url(item_id) @@ -237,19 +227,18 @@ def process_soundcloud(vargs): tracks = tracks[:num_tracks] for track in tracks: download_track(track, resolved.title, keep_previews, folders, custom_path=vargs["path"]) - return + return None elif resolved.kind == "track": tracks = [resolved] elif resolved.kind == "group": - group_id = str(resolved.id) - tracks = client.get(f"/groups/{group_id}/tracks", limit=200) + tracks = client.get(f"/groups/{resolved.id}/tracks", limit=200) else: - artist_id = str(resolved.id) - tracks = client.get(f"/users/{artist_id}/tracks", limit=200) + tracks = client.get(f"/users/{resolved.id}/tracks", limit=200) # SoundCloud has a unfortunate bug where some artists don't have any tracks returned using: # client.get("/users/" + artist_id + "/tracks", limit=200) # There are a number of reports of this issue since late 2019 on StackOverflow. + # ( reference: https://stackoverflow.com/questions/59204383, https://stackoverflow.com/questions/61807979) # It seems that the common pattern is that if an artist has any tracks marked as private, # e.g. the track is not downloadable (only streamable), then SoundCloud won't return any # tracks for that artist. The way around this is to refer to the artist's RSS feed. This @@ -257,18 +246,16 @@ def process_soundcloud(vargs): # http://feeds.soundcloud.com/users/soundcloud:users:<artist_id>/sounds.rss if len(tracks) == 0 and resolved.track_count > 0: - artist = str(resolved.full_name) - artist_id = str(resolved.id) filenames = [] - response = requests.get(f"http://feeds.soundcloud.com/users/soundcloud:users:{artist_id}/sounds.rss") + response = requests.get(f"http://feeds.soundcloud.com/users/soundcloud:users:{resolved.id}/sounds.rss") if response.status_code != 200: # TODO: add error reporting and handling return feed = atoma.parse_rss_bytes(response.content) for feed_item in feed.items: - filename = download_track_from_feed(artist, feed.image.url, feed_item, folders, vargs["path"]) + filename = download_track_from_feed(resolved.full_name, feed.image.url, feed_item, folders, vargs["path"]) if filename is not None: filenames.append(filename) else: @@ -282,6 +269,7 @@ def process_soundcloud(vargs): def download_track_from_feed(artist, artwork, track, folders=False, custom_path=""): """Given an RSS feed item, download the track.""" + # TODO: With a little refactoring this method can merge with download_track(...). if len(track.enclosures) == 0: return None url = track.enclosures[0].url From baf0d4e148abd1bc35cbb1d8b76c2a0e4a7c933f Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 16 May 2020 21:49:10 -0700 Subject: [PATCH 21/69] Removed custom filename sanitization code for a library that is way more comprehensive. --- requirements.txt | 1 + soundscrape/soundscrape.py | 53 ++++++++++++-------------------------- 2 files changed, 17 insertions(+), 37 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6c4d0de..42d664c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ demjson>=2.2.2 fudge>=1.0.3 nose>=1.3.7 requests[security]>=2.9.0 +sanitize_filename>=1.2.0 setuptools>=18.0.0 simplejson>=3.3.1 soundcloud>=0.4.1 diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 8e663d7..27cee02 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -19,6 +19,7 @@ from subprocess import Popen, PIPE from os.path import dirname, exists, join from os import access, mkdir, W_OK +from sanitize_filename import sanitize #################################################################### @@ -190,7 +191,7 @@ def process_soundcloud(vargs): puts_safe(colored.green("Scraping") + colored.white(f": {track_data['title']}")) filenames = [] - filename = sanitize_filename(f"{track_data['artist']} - {track_data['title']}.mp3") + filename = sanitize(f"{track_data['artist']} - {track_data['title']}.mp3") if folders: name_path = join(vargs["path"], track_data["artist"]) @@ -275,7 +276,7 @@ def download_track_from_feed(artist, artwork, track, folders=False, custom_path= url = track.enclosures[0].url title = track.title - filename = sanitize_filename(f"{artist} - {title}.mp3") + filename = sanitize(f"{artist} - {title}.mp3") if folders: name_path = join(custom_path, artist) @@ -321,7 +322,7 @@ def download_track( if name == "": name = track["user"]["username"] - filename = sanitize_filename(f"{name} - {track['title']}.mp3") + filename = sanitize(f"{name} - {track['title']}.mp3") if folders: name_path = join(custom_path, name) @@ -408,8 +409,8 @@ def download_tracks( puts_safe(colored.white(track["title"]) + colored.red(" is not downloadable.")) continue else: - track_artist = sanitize_filename(track["user"]["username"]) - track_title = sanitize_filename(track["title"]) + track_artist = sanitize(track["user"]["username"]) + track_title = sanitize(track["title"]) track_filename = f"{track_artist} - {track_title}.mp3" if folders: @@ -540,7 +541,7 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path= directory = f"{artist} - {album_name}" else: directory = artist - directory = sanitize_filename(directory) + directory = sanitize(directory) directory = join(custom_path, directory) if not exists(directory): mkdir(directory) @@ -558,12 +559,12 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path= track_filename = f"{track_number} - {track_name}.mp3" else: track_filename = f"{track_name}.mp3" - track_filename = sanitize_filename(track_filename) + track_filename = sanitize(track_filename) if folders: path = join(directory, track_filename) else: - path = join(custom_path, sanitize_filename(artist) + " - " + track_filename) + path = join(custom_path, sanitize(artist) + " - " + track_filename) if exists(path): puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_name)) @@ -694,8 +695,8 @@ def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_pa filenames = [] - track_artist = sanitize_filename(data["artist"]) - track_title = sanitize_filename(data["title"]) + track_artist = sanitize(data["artist"]) + track_title = sanitize(data["title"]) track_filename = f"{track_artist} - {track_title}{data['mp3_url'][-4:]}" if folders: @@ -804,8 +805,8 @@ def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_p filenames = [] - track_artist = sanitize_filename(data["artist"]) - track_title = sanitize_filename(data["title"]) + track_artist = sanitize(data["artist"]) + track_title = sanitize(data["title"]) track_filename = f"{track_artist} - {track_title}.mp3" if folders: @@ -986,12 +987,12 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa directory = custom_path if folders: - sanitized_artist = sanitize_filename(each_song["album"]["data"]["artist"]["data"]["name"]) - sanitized_album = sanitize_filename(each_song["album"]["data"]["name"]) + sanitized_artist = sanitize(each_song["album"]["data"]["artist"]["data"]["name"]) + sanitized_album = sanitize(each_song["album"]["data"]["name"]) directory = join(directory, sanitized_artist + " - " + sanitized_album) if not exists(directory): mkdir(directory) - filename = join(directory, str(song_count) + " - " + sanitize_filename(each_song["name"]) + ".mp3",) + filename = join(directory, str(song_count) + " - " + sanitize(each_song["name"]) + ".mp3",) if exists(filename): puts_safe(colored.yellow("Skipping") + colored.white(f": {each_song['name']} - already exists!")) @@ -1144,28 +1145,6 @@ def open_files(filenames): stdout, stderr = process.communicate() -def sanitize_filename(filename): - """ - Make sure filenames are valid paths. - - Returns: - str: - - """ - sanitized_filename = re.sub(r'[/\\:*?"<>|]', "-", filename) - sanitized_filename = sanitized_filename.replace("&", "and") - sanitized_filename = sanitized_filename.replace('"', "") - sanitized_filename = sanitized_filename.replace("'", "") - sanitized_filename = sanitized_filename.replace("/", "") - sanitized_filename = sanitized_filename.replace("\\", "") - - # Annoying. - if sanitized_filename[0] == ".": - sanitized_filename = f"dot{sanitized_filename[1:]}" - - return sanitized_filename - - def puts_safe(text): if sys.platform == "win32": puts(text.encode(sys.stdout.encoding, errors="replace").decode()) From 7a986df635da5acb76915ab52f2d147188e02dee Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 16 May 2020 21:49:33 -0700 Subject: [PATCH 22/69] Minor whitespace fixes. --- soundscrape/soundscrape.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 27cee02..22c9457 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -191,7 +191,7 @@ def process_soundcloud(vargs): puts_safe(colored.green("Scraping") + colored.white(f": {track_data['title']}")) filenames = [] - filename = sanitize(f"{track_data['artist']} - {track_data['title']}.mp3") + filename = sanitize(f"{track_data['artist']} - {track_data['title']}.mp3") if folders: name_path = join(vargs["path"], track_data["artist"]) @@ -322,7 +322,7 @@ def download_track( if name == "": name = track["user"]["username"] - filename = sanitize(f"{name} - {track['title']}.mp3") + filename = sanitize(f"{name} - {track['title']}.mp3") if folders: name_path = join(custom_path, name) @@ -411,7 +411,7 @@ def download_tracks( else: track_artist = sanitize(track["user"]["username"]) track_title = sanitize(track["title"]) - track_filename = f"{track_artist} - {track_title}.mp3" + track_filename = f"{track_artist} - {track_title}.mp3" if folders: track_artist_path = join(custom_path, track_artist) From 2d6c47579605b6de7ef4b86c09dedee4646d638d Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 16 May 2020 21:52:22 -0700 Subject: [PATCH 23/69] Correcting name of package. --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 42d664c..c0ae66a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ demjson>=2.2.2 fudge>=1.0.3 nose>=1.3.7 requests[security]>=2.9.0 -sanitize_filename>=1.2.0 +sanitize-filename>=1.2.0 setuptools>=18.0.0 simplejson>=3.3.1 soundcloud>=0.4.1 From b90ca14906328f34188b32ee25ad1ce376731a93 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 16 May 2020 21:56:24 -0700 Subject: [PATCH 24/69] Removing Python 3.6 support due to dependency on sanitize-filename. --- .github/workflows/main.yml | 2 +- setup.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d9ddcac..b51f631 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8] + python-version: [3.7, 3.8] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} diff --git a/setup.py b/setup.py index 49c0fe3..30670d7 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,6 @@ "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Topic :: Internet :: WWW/HTTP", From aee51dbd617f17253aba83700f99982a4f036723 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 16 May 2020 23:03:57 -0700 Subject: [PATCH 25/69] Safely handle getting the cover art for the id3 tags. --- soundscrape/soundscrape.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 22c9457..448af70 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -514,7 +514,8 @@ def process_bandcamp(vargs): open_files(filenames) -# Largely borrowed from Ronier's bandcampscrape +# Largely borrowed from bandcampscrape +# ( reference: https://github.com/ronier/bandcampscrape) def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path=""): """ Pull out artist and track info from a Bandcamp URL. @@ -1100,26 +1101,24 @@ def tag_file( audio.save() if artwork_url: - artwork_url = artwork_url.replace("https", "http") + url = artwork_url.replace("https", "http") + urls = [url] + if "-large" in url: + urls.insert(0, url.replace("-large", "-t500x500")) + + image_data = b"0" + for url in urls: + response = requests.get(url) + if response.status_code == 200: + image_data = response.content + break + + audio = MP3(filename, ID3=OldID3) mime = "image/jpeg" - if ".jpg" in artwork_url: - mime = "image/jpeg" if ".png" in artwork_url: mime = "image/png" - if "-large" in artwork_url: - new_artwork_url = artwork_url.replace("-large", "-t500x500") - try: - image_data = requests.get(new_artwork_url).content - except: - # No very large image available. - image_data = requests.get(artwork_url).content - else: - image_data = requests.get(artwork_url).content - - audio = MP3(filename, ID3=OldID3) - # encoding=3 means utf-8 # type=3 means cover image audio.tags.add(APIC(encoding=3, mime=mime, type=3, desc="Cover", data=image_data)) From 7cc10687834d4b10986dbea8d15921ce292cb458 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 16 May 2020 23:09:22 -0700 Subject: [PATCH 26/69] Accidentally renamed and reused a local that was a param. Also properly checking for None. --- soundscrape/soundscrape.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 448af70..c4fab81 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -1088,30 +1088,31 @@ def tag_file( audio.tags = None audio["artist"] = artist audio["title"] = title - if year: + if year is not None: audio["date"] = str(year) - if album: + if album is not None: audio["album"] = album - if track_number: + if track_number is not None: audio["tracknumber"] = track_number - if genre: + if genre is not None: audio["genre"] = genre - if url: # saves the tag as WOAR + if url is not None: + # saves the tag as WOAR audio["website"] = url audio.save() - if artwork_url: - url = artwork_url.replace("https", "http") - urls = [url] + if artwork_url is not None: + artwork_url = artwork_url.replace("https", "http") + urls = [artwork_url] if "-large" in url: - urls.insert(0, url.replace("-large", "-t500x500")) + urls.insert(0, artwork_url.replace("-large", "-t500x500")) image_data = b"0" - for url in urls: - response = requests.get(url) - if response.status_code == 200: - image_data = response.content - break + for a_url in urls: + response = requests.get(a_url) + if response.status_code != 200: + continue + image_data = response.content audio = MP3(filename, ID3=OldID3) @@ -1125,7 +1126,7 @@ def tag_file( audio.save() # because there is software that doesn't seem to use WOAR we save url tag again as WXXX - if url: + if url is not None: audio = MP3(filename, ID3=OldID3) audio.tags.add(WXXX(encoding=3, url=url)) audio.save() From 057d9254261cb213dd3440479c7070ae882bc23d Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 16 May 2020 23:18:10 -0700 Subject: [PATCH 27/69] Writing the tags only once, scoping the try/except, and handling the specific error that can happen. --- soundscrape/soundscrape.py | 94 +++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 48 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index c4fab81..641b350 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -13,6 +13,7 @@ from clint.textui import colored, puts, progress from datetime import datetime +from mutagen import MutagenError from mutagen.mp3 import MP3, EasyMP3 from mutagen.id3 import APIC, WXXX from mutagen.id3 import ID3 as OldID3 @@ -1083,59 +1084,56 @@ def tag_file( url (str): """ - try: - audio = EasyMP3(filename) - audio.tags = None - audio["artist"] = artist - audio["title"] = title - if year is not None: - audio["date"] = str(year) - if album is not None: - audio["album"] = album - if track_number is not None: - audio["tracknumber"] = track_number - if genre is not None: - audio["genre"] = genre - if url is not None: - # saves the tag as WOAR - audio["website"] = url - audio.save() - - if artwork_url is not None: - artwork_url = artwork_url.replace("https", "http") - urls = [artwork_url] - if "-large" in url: - urls.insert(0, artwork_url.replace("-large", "-t500x500")) - - image_data = b"0" - for a_url in urls: - response = requests.get(a_url) - if response.status_code != 200: - continue - image_data = response.content - - audio = MP3(filename, ID3=OldID3) - - mime = "image/jpeg" - if ".png" in artwork_url: - mime = "image/png" + saved_correctly = True + audio = EasyMP3(filename) + audio.tags = None + audio["artist"] = artist + audio["title"] = title + if year is not None: + audio["date"] = str(year) + if album is not None: + audio["album"] = album + if track_number is not None: + audio["tracknumber"] = track_number + if genre is not None: + audio["genre"] = genre + if url is not None: + # saves the tag as WOAR + audio["website"] = url + if artwork_url is not None: + artwork_url = artwork_url.replace("https", "http") + urls = [artwork_url] + if "-large" in url: + urls.insert(0, artwork_url.replace("-large", "-t500x500")) + + image_data = b"0" + for a_url in urls: + response = requests.get(a_url) + if response.status_code != 200: + continue + image_data = response.content - # encoding=3 means utf-8 - # type=3 means cover image - audio.tags.add(APIC(encoding=3, mime=mime, type=3, desc="Cover", data=image_data)) - audio.save() + audio = MP3(filename, ID3=OldID3) - # because there is software that doesn't seem to use WOAR we save url tag again as WXXX - if url is not None: - audio = MP3(filename, ID3=OldID3) - audio.tags.add(WXXX(encoding=3, url=url)) - audio.save() + mime = "image/jpeg" + if ".png" in artwork_url: + mime = "image/png" - return True + # encoding=3 means utf-8 + # type=3 means cover image + audio.tags.add(APIC(encoding=3, mime=mime, type=3, desc="Cover", data=image_data)) + if url is not None: + # Some software doesn't seem to use WOAR so the url is saved again as WXXX. + audio = MP3(filename, ID3=OldID3) + audio.tags.add(WXXX(encoding=3, url=url)) - except: + try: + audio.save() + except MutagenError: puts(colored.red("Problem tagging file: ") + colored.white("Is this file a WAV?")) - return False + saved_correctly = False + + return saved_correctly def open_files(filenames): From 8174447e01591a298c0beaad000bb53332de207f Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 16 May 2020 23:34:11 -0700 Subject: [PATCH 28/69] Fixing issue with different mutagen objects used for different id3 tag types. --- soundscrape/soundscrape.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 641b350..ffbc496 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -1085,6 +1085,7 @@ def tag_file( """ saved_correctly = True + audio = EasyMP3(filename) audio.tags = None audio["artist"] = artist @@ -1100,6 +1101,14 @@ def tag_file( if url is not None: # saves the tag as WOAR audio["website"] = url + + try: + audio.save() + except MutagenError: + puts(colored.red("Problem tagging file: ") + colored.white("Is this file a WAV?")) + saved_correctly = False + + audio = MP3(filename, ID3=OldID3) if artwork_url is not None: artwork_url = artwork_url.replace("https", "http") urls = [artwork_url] @@ -1113,8 +1122,6 @@ def tag_file( continue image_data = response.content - audio = MP3(filename, ID3=OldID3) - mime = "image/jpeg" if ".png" in artwork_url: mime = "image/png" @@ -1124,7 +1131,6 @@ def tag_file( audio.tags.add(APIC(encoding=3, mime=mime, type=3, desc="Cover", data=image_data)) if url is not None: # Some software doesn't seem to use WOAR so the url is saved again as WXXX. - audio = MP3(filename, ID3=OldID3) audio.tags.add(WXXX(encoding=3, url=url)) try: From 897e3da3dccb449ddf21fd5bbd35ccdacd57e1fd Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sun, 17 May 2020 09:29:34 -0700 Subject: [PATCH 29/69] Minor refactoring (better checks against None, renaming vars). --- soundscrape/soundscrape.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index ffbc496..1f84290 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -162,12 +162,12 @@ def process_soundcloud(vargs): if hasattr(resolved, "next_href"): next_href = resolved.next_href while next_href is not None: - resolved2 = requests.get(next_href).json() + next_resolved = requests.get(next_href).json() next_href = None - if "next_href" in resolved2: - next_href = resolved2["next_href"] - resolved2 = soundcloud.resource.ResourceList(resolved2["collection"]) - resolved.collection.extend(resolved2) + if "next_href" in next_resolved: + next_href = next_resolved["next_href"] + next_resolved = soundcloud.resource.ResourceList(next_resolved["collection"]) + resolved.collection.extend(next_resolved) resolved = resolved.collection except Exception as e: # SoundCloud is trying to prevent us from downloading this. @@ -438,6 +438,7 @@ def download_tracks( location = stream.url filename = download_file(location, track_filename) + tagged = tag_file( filename, artist=track["user"]["username"], @@ -1048,9 +1049,9 @@ def download_file(url, path, session=None, params=None): # Use a temporary file so that we don't import incomplete files. tmp_path = f"{path}.tmp" - if session and params: + if session is not None and params is not None: r = session.get(url, params=params, stream=True) - elif session and not params: + elif session is not None and params is None: r = session.get(url, stream=True) else: r = requests.get(url, stream=True) From 0425cd53a6763fa605156e06c8631fb39e84e1b9 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sun, 17 May 2020 15:43:08 -0700 Subject: [PATCH 30/69] Refactoring 3 areas with nearly identical code around downloading a single track from SoundCloud. --- soundscrape/soundscrape.py | 200 +++++++++++++++---------------------- 1 file changed, 81 insertions(+), 119 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 1f84290..8a43d58 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -136,6 +136,7 @@ def process_soundcloud(vargs): num_tracks = vargs["num_tracks"] id3_extras = {} + filenames = [] likes = False client = soundcloud.Client(client_id=CLIENT_ID) @@ -177,44 +178,23 @@ def process_soundcloud(vargs): puts(colored.red("Problem downloading [404]: ") + colored.white("Item Not Found")) return None - # TODO: This exception handler can be handled by download_track(...) with a little refactoring. message = str(e) - item_id = message.rsplit("/", 1)[-1].split(".json")[0].split("?client_id")[0] - hard_track_url = get_hard_track_url(item_id) - track_data = {} - request = requests.get(hard_track_url) + item_id = message.rsplit("/", 1)[-1].split(".json")[0].split("?client_id")[0] + url = get_hard_track_url(item_id) + request = requests.get(url) + # TODO: handle != 200. title_tag = request.text.split("<title>")[1].split("</title")[0] - track_data["title"] = title_tag.split(" by ")[0].strip() - track_data["artist"] = title_tag.split(" by ")[1].split("|")[0].strip() - # TODO: Scrape more data? - - puts_safe(colored.green("Scraping") + colored.white(f": {track_data['title']}")) - filenames = [] - filename = sanitize(f"{track_data['artist']} - {track_data['title']}.mp3") + track_data = { + "url": url, + "artist": title_tag.split(" by ")[1].split("|")[0].strip(), + "title": title_tag.split(" by ")[0].strip(), + } - if folders: - name_path = join(vargs["path"], track_data["artist"]) - if not exists(name_path): - mkdir(name_path) - filename = join(name_path, filename) - else: - filename = join(vargs["path"], filename) - - if exists(filename): - puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_data["title"])) - return None - - filename = download_file(hard_track_url, filename) - tagged = tag_file(filename, artist=track_data["artist"], title=track_data["title"]) - - if not tagged: - wav_filename = f"{filename[:-3]}wav" - os.rename(filename, wav_filename) - filename = wav_filename - - filenames.append(filename) + filename = download_single_track(track_data, use_folders=folders, custom_path=vargs["path"]) + if filename is not None: + filenames.append(filename) # This is is likely a 'likes' page. if not hasattr(resolved, "kind"): @@ -228,8 +208,22 @@ def process_soundcloud(vargs): tracks = get_soundcloud_api_playlist_data(resolved.id)["tracks"] tracks = tracks[:num_tracks] for track in tracks: - download_track(track, resolved.title, keep_previews, folders, custom_path=vargs["path"]) - return None + if not keep_previews and (track.get("duration", 0) < track.get("full_duration", 0)): + puts_safe(colored.yellow("Skipping preview track: ") + colored.white(track["title"])) + continue + + artist = track["user"].get("full_name", "") + if artist == "": + artist = track["user"]["username"] + + track_data = { + "url": get_hard_track_url(track["id"]), + "artist": artist, + "title": resolved.title, + } + filename = download_single_track(track_data, use_folders=folders, custom_path=vargs["path"]) + if filename is not None: + filenames.append(filename) elif resolved.kind == "track": tracks = [resolved] elif resolved.kind == "group": @@ -248,8 +242,6 @@ def process_soundcloud(vargs): # http://feeds.soundcloud.com/users/soundcloud:users:<artist_id>/sounds.rss if len(tracks) == 0 and resolved.track_count > 0: - filenames = [] - response = requests.get(f"http://feeds.soundcloud.com/users/soundcloud:users:{resolved.id}/sounds.rss") if response.status_code != 200: # TODO: add error reporting and handling @@ -257,7 +249,22 @@ def process_soundcloud(vargs): feed = atoma.parse_rss_bytes(response.content) for feed_item in feed.items: - filename = download_track_from_feed(resolved.full_name, feed.image.url, feed_item, folders, vargs["path"]) + if len(feed_item.enclosures) == 0: + continue + + artist = resolved.full_name + if artist == "": + artist = resolved.username + + track_data = { + "url": feed_item.enclosures[0].url, + "artist": artist, + "title": feed_item.title, + "date": feed_item.pub_date.year, + "artwork_url": feed.image.url, + } + + filename = download_single_track(track_data, use_folders=folders, custom_path=vargs["path"]) if filename is not None: filenames.append(filename) else: @@ -269,64 +276,27 @@ def process_soundcloud(vargs): open_files(filenames) -def download_track_from_feed(artist, artwork, track, folders=False, custom_path=""): - """Given an RSS feed item, download the track.""" - # TODO: With a little refactoring this method can merge with download_track(...). - if len(track.enclosures) == 0: - return None - url = track.enclosures[0].url - title = track.title - - filename = sanitize(f"{artist} - {title}.mp3") - - if folders: - name_path = join(custom_path, artist) - if not exists(name_path): - mkdir(name_path) - filename = join(name_path, filename) - else: - filename = join(custom_path, filename) - - if exists(filename): - puts_safe(colored.yellow("Track already downloaded: ") + colored.white(title)) - return None - - puts_safe(colored.green("Scraping") + colored.white(f": {title}")) - - filename = download_file(url, filename) - tagged = tag_file(filename, artist=artist, title=title, year=track.pub_date.year, artwork_url=artwork,) - if not tagged: - wav_filename = f"{filename[:-3]}wav" - os.rename(filename, wav_filename) - filename = wav_filename - - return filename +def download_single_track(track_data, use_folders=False, custom_path=""): + """ + Download a single track from SoundCloud. + Args: + track_data (dict): + use_folders (bool): + custom_path (str): -def download_track( - track, album_name="", keep_previews=False, folders=False, filenames=[], custom_path="", -): - """Given a track, force scrape it.""" - hard_track_url = get_hard_track_url(track["id"]) + Returns: + filename or None - # We have no info on this track whatsoever. - if "title" not in track: + """ + required_keys = {"artist", "title", "url"} + if not track_data.keys() >= required_keys: return None - if not keep_previews: - if track.get("duration", 0) < track.get("full_duration", 0): - puts_safe(colored.yellow("Skipping preview track") + colored.white(f": {track['title']}")) - return None - - # May not have a "full name" - name = track["user"].get("full_name", "") - if name == "": - name = track["user"]["username"] - - filename = sanitize(f"{name} - {track['title']}.mp3") + filename = sanitize(f"{track_data['artist']} - {track_data['title']}.mp3") - if folders: - name_path = join(custom_path, name) + if use_folders: + name_path = join(custom_path, track_data["artist"]) if not exists(name_path): mkdir(name_path) filename = join(name_path, filename) @@ -334,29 +304,21 @@ def download_track( filename = join(custom_path, filename) if exists(filename): - puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track["title"])) - return None - - # Skip already downloaded track. - if filename in filenames: + puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_data["title"])) return None - - if hard_track_url: - puts_safe(colored.green("Scraping") + colored.white(f": {track['title']}")) else: - # Region coded? - puts_safe(colored.yellow("Unable to download") + colored.white(f": {track['title']}")) - return None + puts_safe(colored.green("Scraping: ") + colored.white(track_data["title"])) + + filename = download_file(track_data["url"], filename) - filename = download_file(hard_track_url, filename) tagged = tag_file( filename, - artist=name, - title=track["title"], - year=track["created_at"][:4], - genre=track["genre"], - album=album_name, - artwork_url=track["artwork_url"], + artist=track_data["artist"], + title=track_data["title"], + album=track_data.get("album", None), + year=track_data.get("date", None), + genre=track_data.get("genre", None), + artwork_url=track_data.get("artwork_url", None), ) if not tagged: wav_filename = f"{filename[:-3]}wav" @@ -388,15 +350,13 @@ def download_tracks( t_track["stream_url"] = track.download_url else: if downloadable: - puts_safe(colored.red("Skipping") + colored.white(f": {track.title}")) + puts_safe(colored.red("Skipping: ") + colored.white(track.title)) continue if hasattr(track, "stream_url"): t_track["stream_url"] = track.stream_url else: t_track["direct"] = True - streams_url = f"https://api.soundcloud.com/i1/tracks/{track.id}/streams?client_id={AGGRESSIVE_CLIENT_ID}&app_version={APP_VERSION}" - response = requests.get(streams_url).json() - t_track["stream_url"] = response["http_mp3_128_url"] + t_track["stream_url"] = get_hard_track_url(track.id) track = t_track except: @@ -426,7 +386,7 @@ def download_tracks( puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_title)) continue - puts_safe(colored.green("Downloading") + colored.white(f": {track['title']}")) + puts_safe(colored.green("Downloading: ") + colored.white(track["title"])) if track.get("direct", False): location = track["stream_url"] @@ -708,12 +668,12 @@ def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_pa mkdir(track_artist_path) track_filename = join(track_artist_path, track_filename) if exists(track_filename): - puts_safe(colored.yellow("Skipping") + colored.white(f": {data['title']} - already exists!")) + puts_safe(colored.yellow("Skipping: ") + colored.white(data["title"]) + colored.yellow("- already exists!")) return [] else: track_filename = join(custom_path, track_filename) - puts_safe(colored.green("Downloading") + colored.white(f": {data['artist']} - {data['title']} ({track_filename[-4:]})")) + puts_safe(colored.green("Downloading: ") + colored.white(f"{data['artist']} - {data['title']} ({track_filename[-4:]})")) download_file(data["mp3_url"], track_filename) if track_filename[-4:] == ".mp3": tag_file( @@ -818,12 +778,12 @@ def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_p mkdir(track_artist_path) track_filename = join(track_artist_path, track_filename) if exists(track_filename): - puts_safe(colored.yellow("Skipping") + colored.white(f": {data['title']} - already exists!")) + puts_safe(colored.yellow("Skipping: ") + colored.white(data["title"]) + colored.yellow(" - already exists!")) return [] else: track_filename = join(custom_path, track_filename) - puts_safe(colored.green("Downloading") + colored.white(f": {data['artist']} - {data['title']}")) + puts_safe(colored.green("Downloading: ") + colored.white(f"{data['artist']} - {data['title']}")) download_file(data["mp3_url"], track_filename) tag_file( track_filename, @@ -998,11 +958,13 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa filename = join(directory, str(song_count) + " - " + sanitize(each_song["name"]) + ".mp3",) if exists(filename): - puts_safe(colored.yellow("Skipping") + colored.white(f": {each_song['name']} - already exists!")) + puts_safe( + colored.yellow("Skipping: ") + colored.white(each_song["name"]) + colored.yellow(" - already exists!") + ) song_count += 1 continue - puts_safe(colored.green("Downloading") + colored.white(f": {each_song['name']}")) + puts_safe(colored.green("Downloading: ") + colored.white(each_song["name"])) path = download_file(url, filename, session=session, params=details) # example of genre_string: From 57715e0579f4e84578caac27bf3b9b134ec4a2ec Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sun, 17 May 2020 15:52:52 -0700 Subject: [PATCH 31/69] Minor renaming of methods. --- soundscrape/soundscrape.py | 39 +++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 8a43d58..d38393b 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -181,7 +181,7 @@ def process_soundcloud(vargs): message = str(e) item_id = message.rsplit("/", 1)[-1].split(".json")[0].split("?client_id")[0] - url = get_hard_track_url(item_id) + url = get_soundcloud_track_url(item_id) request = requests.get(url) # TODO: handle != 200. title_tag = request.text.split("<title>")[1].split("</title")[0] @@ -205,7 +205,7 @@ def process_soundcloud(vargs): if resolved.tracks != []: tracks = resolved.tracks else: - tracks = get_soundcloud_api_playlist_data(resolved.id)["tracks"] + tracks = get_soundcloud_playlist_data(resolved.id)["tracks"] tracks = tracks[:num_tracks] for track in tracks: if not keep_previews and (track.get("duration", 0) < track.get("full_duration", 0)): @@ -217,7 +217,7 @@ def process_soundcloud(vargs): artist = track["user"]["username"] track_data = { - "url": get_hard_track_url(track["id"]), + "url": get_soundcloud_track_url(track["id"]), "artist": artist, "title": resolved.title, } @@ -256,6 +256,7 @@ def process_soundcloud(vargs): if artist == "": artist = resolved.username + # TODO: There could be more than one enclosure, perhaps a playlist? Handle those cases. track_data = { "url": feed_item.enclosures[0].url, "artist": artist, @@ -356,7 +357,7 @@ def download_tracks( t_track["stream_url"] = track.stream_url else: t_track["direct"] = True - t_track["stream_url"] = get_hard_track_url(track.id) + t_track["stream_url"] = get_soundcloud_track_url(track.id) track = t_track except: @@ -422,8 +423,17 @@ def download_tracks( return filenames -def get_soundcloud_api_playlist_data(playlist_id): - """Scrape the new API. Returns the parsed JSON response.""" +def get_soundcloud_playlist_data(playlist_id): + """ + Get playlist data from SoundCloud. + + Args: + playlist_id (str): id of the playlist + + Returns: + data about the playlist with playlist_id or None + + """ url = f"https://api.soundcloud.com/playlists/{playlist_id}?representation=full&client_id=02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea&app_version=1467724310" response = requests.get(url) if response.status_code != 200: @@ -432,9 +442,20 @@ def get_soundcloud_api_playlist_data(playlist_id): return response.json() -def get_hard_track_url(item_id): - """Hard-scrapes a track.""" - url = f"https://api.soundcloud.com/i1/tracks/{item_id}/streams/?client_id={AGGRESSIVE_CLIENT_ID}&app_version={APP_VERSION}" +def get_soundcloud_track_url(track_id): + """ + Get the track url from SoundCloud. + + Args: + track_id (str): id of the track + + Returns: + url to the track with track_id or None + + """ + url = ( + f"https://api.soundcloud.com/i1/tracks/{track_id}/streams/?client_id={AGGRESSIVE_CLIENT_ID}&app_version={APP_VERSION}" + ) response = requests.get(url) if response.status_code != 200: return None From 8fe1d7d0af0a77db96411d11d3715186d1a23bde Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Fri, 22 May 2020 18:28:42 -0700 Subject: [PATCH 32/69] Fixing error handling. --- soundscrape/soundscrape.py | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index d38393b..00baf18 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -174,16 +174,19 @@ def process_soundcloud(vargs): # SoundCloud is trying to prevent us from downloading this. # Instead of utilizing the API/client we will do all our own scraping. Boo. - if "404 Client Error" in str(e): + message = str(e) + + if "404 client error" in message.lower(): puts(colored.red("Problem downloading [404]: ") + colored.white("Item Not Found")) return None - message = str(e) - item_id = message.rsplit("/", 1)[-1].split(".json")[0].split("?client_id")[0] url = get_soundcloud_track_url(item_id) request = requests.get(url) - # TODO: handle != 200. + if request.status_code != 200: + puts(colored.red("Problem downloading: ") + colored.white(url)) + return None + title_tag = request.text.split("<title>")[1].split("</title")[0] track_data = { @@ -191,7 +194,6 @@ def process_soundcloud(vargs): "artist": title_tag.split(" by ")[1].split("|")[0].strip(), "title": title_tag.split(" by ")[0].strip(), } - filename = download_single_track(track_data, use_folders=folders, custom_path=vargs["path"]) if filename is not None: filenames.append(filename) @@ -242,10 +244,11 @@ def process_soundcloud(vargs): # http://feeds.soundcloud.com/users/soundcloud:users:<artist_id>/sounds.rss if len(tracks) == 0 and resolved.track_count > 0: - response = requests.get(f"http://feeds.soundcloud.com/users/soundcloud:users:{resolved.id}/sounds.rss") + url = f"http://feeds.soundcloud.com/users/soundcloud:users:{resolved.id}/sounds.rss" + response = requests.get(url) if response.status_code != 200: - # TODO: add error reporting and handling - return + puts(colored.red("Problem downloading: ") + colored.white(url)) + return None feed = atoma.parse_rss_bytes(response.content) for feed_item in feed.items: @@ -437,6 +440,7 @@ def get_soundcloud_playlist_data(playlist_id): url = f"https://api.soundcloud.com/playlists/{playlist_id}?representation=full&client_id=02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea&app_version=1467724310" response = requests.get(url) if response.status_code != 200: + puts(colored.red("Problem downloading: ") + colored.white(url)) return None return response.json() @@ -458,6 +462,7 @@ def get_soundcloud_track_url(track_id): ) response = requests.get(url) if response.status_code != 200: + puts(colored.red("Problem downloading: ") + colored.white(url)) return None json_response = response.json() @@ -579,9 +584,9 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path= filenames.append(path) - except Exception as e: + except: puts_safe(colored.red("Problem downloading ") + colored.white(track_name)) - print(e) + return filenames @@ -781,13 +786,13 @@ def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_p list: filenames to open """ + filenames = [] + try: data = get_audiomack_data(mc_url) - except Exception as e: + except: puts_safe(colored.red("Problem downloading ") + mc_url) - print(e) - - filenames = [] + return filenames track_artist = sanitize(data["artist"]) track_title = sanitize(data["title"]) @@ -800,7 +805,7 @@ def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_p track_filename = join(track_artist_path, track_filename) if exists(track_filename): puts_safe(colored.yellow("Skipping: ") + colored.white(data["title"]) + colored.yellow(" - already exists!")) - return [] + return filenames else: track_filename = join(custom_path, track_filename) @@ -1068,6 +1073,7 @@ def tag_file( url (str): """ + # TODO: move year to date. saved_correctly = True audio = EasyMP3(filename) From 690438528b6aa970cf71b838d6ea8973fa98975d Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Fri, 22 May 2020 18:46:33 -0700 Subject: [PATCH 33/69] Making the parameter consistent as . --- soundscrape/soundscrape.py | 61 +++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 00baf18..dc2c5d2 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -310,8 +310,8 @@ def download_single_track(track_data, use_folders=False, custom_path=""): if exists(filename): puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_data["title"])) return None - else: - puts_safe(colored.green("Scraping: ") + colored.white(track_data["title"])) + + puts_safe(colored.green("Scraping: ") + colored.white(track_data["title"])) filename = download_file(track_data["url"], filename) @@ -333,7 +333,7 @@ def download_single_track(track_data, use_folders=False, custom_path=""): def download_tracks( - client, tracks, num_tracks=sys.maxsize, downloadable=False, folders=False, custom_path="", id3_extras={}, + client, tracks, num_tracks=sys.maxsize, downloadable=False, use_folders=False, custom_path="", id3_extras={}, ): """Given a list of tracks, iteratively download all of them.""" filenames = [] @@ -378,7 +378,7 @@ def download_tracks( track_title = sanitize(track["title"]) track_filename = f"{track_artist} - {track_title}.mp3" - if folders: + if use_folders: track_artist_path = join(custom_path, track_artist) if not exists(track_artist_path): mkdir(track_artist_path) @@ -484,7 +484,7 @@ def process_bandcamp(vargs): bc_url = f"https://{artist_url}.bandcamp.com/music" filenames = scrape_bandcamp_url( - bc_url, num_tracks=vargs["num_tracks"], folders=vargs["folders"], custom_path=vargs["path"], + bc_url, num_tracks=vargs["num_tracks"], use_folders=vargs["folders"], custom_path=vargs["path"], ) # check if we have lists inside a list, which indicates the @@ -504,7 +504,7 @@ def process_bandcamp(vargs): # Largely borrowed from bandcampscrape # ( reference: https://github.com/ronier/bandcampscrape) -def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path=""): +def scrape_bandcamp_url(url, num_tracks=sys.maxsize, use_folders=False, custom_path=""): """ Pull out artist and track info from a Bandcamp URL. @@ -519,13 +519,13 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path= # so we call the scrape_bandcamp_url() method for each one if type(album_data) is list: for album_url in album_data: - filenames.append(scrape_bandcamp_url(album_url, num_tracks, folders, custom_path)) + filenames.append(scrape_bandcamp_url(album_url, num_tracks, use_folders, custom_path)) return filenames artist = album_data["artist"] album_name = album_data["album_name"] - if folders: + if use_folders: if album_name: directory = f"{artist} - {album_name}" else: @@ -544,13 +544,13 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path= track_number = str(track["track_num"]).zfill(2) else: track_number = None - if track_number and folders: + if track_number and use_folders: track_filename = f"{track_number} - {track_name}.mp3" else: track_filename = f"{track_name}.mp3" track_filename = sanitize(track_filename) - if folders: + if use_folders: path = join(directory, track_filename) else: path = join(custom_path, sanitize(artist) + " - " + track_filename) @@ -658,7 +658,7 @@ def process_mixcloud(vargs): mc_url = f"https://mixcloud.com/{artist_url}" filenames = scrape_mixcloud_url( - mc_url, num_tracks=vargs["num_tracks"], folders=vargs["folders"], custom_path=vargs["path"], + mc_url, num_tracks=vargs["num_tracks"], use_folders=vargs["folders"], custom_path=vargs["path"], ) if vargs["open"]: @@ -667,7 +667,7 @@ def process_mixcloud(vargs): return -def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=""): +def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, use_folders=False, custom_path=""): """ Pull out artist and track info from a MixCloud URL. @@ -675,30 +675,30 @@ def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_pa list: filenames to open """ + filenames = [] + try: data = get_mixcloud_data(mc_url) - except Exception as e: + except: puts_safe(colored.red("Problem downloading ") + mc_url) - print(e) - return [] - - filenames = [] + return filenames track_artist = sanitize(data["artist"]) track_title = sanitize(data["title"]) track_filename = f"{track_artist} - {track_title}{data['mp3_url'][-4:]}" - if folders: + if use_folders: track_artist_path = join(custom_path, track_artist) if not exists(track_artist_path): mkdir(track_artist_path) track_filename = join(track_artist_path, track_filename) - if exists(track_filename): - puts_safe(colored.yellow("Skipping: ") + colored.white(data["title"]) + colored.yellow("- already exists!")) - return [] else: track_filename = join(custom_path, track_filename) + if exists(track_filename): + puts_safe(colored.yellow("Skipping: ") + colored.white(data["title"]) + colored.yellow("- already exists!")) + return filenames + puts_safe(colored.green("Downloading: ") + colored.white(f"{data['artist']} - {data['title']} ({track_filename[-4:]})")) download_file(data["mp3_url"], track_filename) if track_filename[-4:] == ".mp3": @@ -771,14 +771,14 @@ def process_audiomack(vargs): mc_url = f"https://audiomack.com/{artist_url}" filenames = scrape_audiomack_url( - mc_url, num_tracks=vargs["num_tracks"], folders=vargs["folders"], custom_path=vargs["path"], + mc_url, num_tracks=vargs["num_tracks"], use_folders=vargs["folders"], custom_path=vargs["path"], ) if vargs["open"]: open_files(filenames) -def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_path=""): +def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, use_folders=False, custom_path=""): """ Pull out artist and track info from a Audiomack URL. @@ -798,17 +798,18 @@ def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, folders=False, custom_p track_title = sanitize(data["title"]) track_filename = f"{track_artist} - {track_title}.mp3" - if folders: + if use_folders: track_artist_path = join(custom_path, track_artist) if not exists(track_artist_path): mkdir(track_artist_path) track_filename = join(track_artist_path, track_filename) - if exists(track_filename): - puts_safe(colored.yellow("Skipping: ") + colored.white(data["title"]) + colored.yellow(" - already exists!")) - return filenames else: track_filename = join(custom_path, track_filename) + if exists(track_filename): + puts_safe(colored.yellow("Skipping: ") + colored.white(data["title"]) + colored.yellow(" - already exists!")) + return filenames + puts_safe(colored.green("Downloading: ") + colored.white(f"{data['artist']} - {data['title']}")) download_file(data["mp3_url"], track_filename) tag_file( @@ -876,7 +877,7 @@ def process_musicbed(vargs): vargs["login"], vargs["password"], num_tracks=vargs["num_tracks"], - folders=vargs["folders"], + use_folders=vargs["folders"], custom_path=vargs["path"], ) @@ -884,7 +885,7 @@ def process_musicbed(vargs): open_files(filenames) -def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=False, custom_path=""): +def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, use_folders=False, custom_path=""): """ Scrapes provided MusicBed URL. @@ -975,7 +976,7 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, folders=Fa details["X-Amz-Credential"] = details["X-Amz-Credential"].replace("%2F", "/") directory = custom_path - if folders: + if use_folders: sanitized_artist = sanitize(each_song["album"]["data"]["artist"]["data"]["name"]) sanitized_album = sanitize(each_song["album"]["data"]["name"]) directory = join(directory, sanitized_artist + " - " + sanitized_album) From b1031baf28e5c52f5f535594425939b3b696c893 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Fri, 22 May 2020 18:47:06 -0700 Subject: [PATCH 34/69] Adding support for multiple enclosures in the feeds path from SoundCloud. --- soundscrape/soundscrape.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index dc2c5d2..b118680 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -259,18 +259,18 @@ def process_soundcloud(vargs): if artist == "": artist = resolved.username - # TODO: There could be more than one enclosure, perhaps a playlist? Handle those cases. - track_data = { - "url": feed_item.enclosures[0].url, - "artist": artist, - "title": feed_item.title, - "date": feed_item.pub_date.year, - "artwork_url": feed.image.url, - } - - filename = download_single_track(track_data, use_folders=folders, custom_path=vargs["path"]) - if filename is not None: - filenames.append(filename) + for enclosure in feed_item.enclosures: + track_data = { + "url": enclosure.url, + "artist": artist, + "title": feed_item.title, + "date": feed_item.pub_date.year, + "artwork_url": feed.image.url, + } + + filename = download_single_track(track_data, use_folders=folders, custom_path=vargs["path"]) + if filename is not None: + filenames.append(filename) else: filenames = download_tracks( client, tracks, num_tracks, vargs["downloadable"], folders, vargs["path"], id3_extras=id3_extras, From f7dc908e13bde81894646ffd5861c5b197c17022 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Fri, 22 May 2020 20:52:45 -0700 Subject: [PATCH 35/69] Simplifying the multiple track download path to reuse more code. Bonus fixes for mp3 tagging and removing . --- soundscrape/soundscrape.py | 159 ++++++++++++++++--------------------- 1 file changed, 68 insertions(+), 91 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index b118680..27a47d2 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -14,7 +14,7 @@ from clint.textui import colored, puts, progress from datetime import datetime from mutagen import MutagenError -from mutagen.mp3 import MP3, EasyMP3 +from mutagen.mp3 import MP3, EasyMP3, HeaderNotFoundError from mutagen.id3 import APIC, WXXX from mutagen.id3 import ID3 as OldID3 from subprocess import Popen, PIPE @@ -69,7 +69,7 @@ def main(): "-L", "--login", type=str, default="soundscrape123@mailinator.com", help="Set login", ) parser.add_argument( - "-d", "--downloadable", action="store_true", help="Only fetch tracks with a Downloadable link.", + "-d", "--downloadable", action="store_true", help="Only fetch tracks with a downloadable link.", ) parser.add_argument( "-t", "--track", type=str, default="", help="The name of a specific track by an artist", @@ -135,7 +135,8 @@ def process_soundcloud(vargs): folders = vargs["folders"] num_tracks = vargs["num_tracks"] - id3_extras = {} + album = None + artist = None filenames = [] likes = False client = soundcloud.Client(client_id=CLIENT_ID) @@ -203,8 +204,8 @@ def process_soundcloud(vargs): tracks = resolved else: if resolved.kind == "playlist": - id3_extras["album"] = resolved.title - if resolved.tracks != []: + album = resolved.title + if len(resolved.tracks) > 0: tracks = resolved.tracks else: tracks = get_soundcloud_playlist_data(resolved.id)["tracks"] @@ -222,6 +223,7 @@ def process_soundcloud(vargs): "url": get_soundcloud_track_url(track["id"]), "artist": artist, "title": resolved.title, + "album": album, } filename = download_single_track(track_data, use_folders=folders, custom_path=vargs["path"]) if filename is not None: @@ -233,6 +235,10 @@ def process_soundcloud(vargs): else: tracks = client.get(f"/users/{resolved.id}/tracks", limit=200) + artist = resolved.full_name + if artist == "": + artist = resolved.username + # SoundCloud has a unfortunate bug where some artists don't have any tracks returned using: # client.get("/users/" + artist_id + "/tracks", limit=200) # There are a number of reports of this issue since late 2019 on StackOverflow. @@ -255,10 +261,6 @@ def process_soundcloud(vargs): if len(feed_item.enclosures) == 0: continue - artist = resolved.full_name - if artist == "": - artist = resolved.username - for enclosure in feed_item.enclosures: track_data = { "url": enclosure.url, @@ -273,7 +275,7 @@ def process_soundcloud(vargs): filenames.append(filename) else: filenames = download_tracks( - client, tracks, num_tracks, vargs["downloadable"], folders, vargs["path"], id3_extras=id3_extras, + client, tracks, artist, album, num_tracks, vargs["downloadable"], folders, vargs["path"], ) if vargs["open"]: @@ -333,94 +335,58 @@ def download_single_track(track_data, use_folders=False, custom_path=""): def download_tracks( - client, tracks, num_tracks=sys.maxsize, downloadable=False, use_folders=False, custom_path="", id3_extras={}, + client, tracks, artist, album, num_tracks=sys.maxsize, downloadable_links_only=False, use_folders=False, custom_path="", ): """Given a list of tracks, iteratively download all of them.""" filenames = [] for i, track in enumerate(tracks): # "Track" and "Resource" objects are actually different, even though they're the same. - if isinstance(track, soundcloud.resource.Resource): - try: - t_track = {} - t_track["downloadable"] = track.downloadable - t_track["streamable"] = track.streamable - t_track["title"] = track.title - t_track["user"] = {"username": track.user["username"]} - t_track["release_year"] = track.release - t_track["genre"] = track.genre - t_track["artwork_url"] = track.artwork_url - if track.downloadable: - t_track["stream_url"] = track.download_url - else: - if downloadable: - puts_safe(colored.red("Skipping: ") + colored.white(track.title)) - continue - if hasattr(track, "stream_url"): - t_track["stream_url"] = track.stream_url - else: - t_track["direct"] = True - t_track["stream_url"] = get_soundcloud_track_url(track.id) - - track = t_track - except: - puts_safe(colored.white(track.title) + colored.red(" is not downloadable.")) - continue - - if i > num_tracks - 1: + if not isinstance(track, soundcloud.resource.Resource): continue + + track_data = {} try: - if not track.get("stream_url", False): - puts_safe(colored.white(track["title"]) + colored.red(" is not downloadable.")) - continue + track_data = { + "streamable": track.streamable, + "title": track.title, + "artist": artist, + "release_year": track.release, + "genre": track.genre, + "album": album, + "artwork_url": track.artwork_url, + "direct": False, + } + if track.downloadable: + track_data["url"] = track.download_url else: - track_artist = sanitize(track["user"]["username"]) - track_title = sanitize(track["title"]) - track_filename = f"{track_artist} - {track_title}.mp3" - - if use_folders: - track_artist_path = join(custom_path, track_artist) - if not exists(track_artist_path): - mkdir(track_artist_path) - track_filename = join(track_artist_path, track_filename) - else: - track_filename = join(custom_path, track_filename) - - if exists(track_filename): - puts_safe(colored.yellow("Track already downloaded: ") + colored.white(track_title)) + if downloadable_links_only: + puts_safe(colored.red("Skipping: ") + colored.white(track.title)) continue - - puts_safe(colored.green("Downloading: ") + colored.white(track["title"])) - - if track.get("direct", False): - location = track["stream_url"] + if hasattr(track, "stream_url"): + track_data["url"] = track.stream_url else: - stream = client.get(track["stream_url"], allow_redirects=False, limit=200) - if hasattr(stream, "location"): - location = stream.location - else: - location = stream.url - - filename = download_file(location, track_filename) - - tagged = tag_file( - filename, - artist=track["user"]["username"], - title=track["title"], - year=track["release_year"], - genre=track["genre"], - album=id3_extras.get("album", None), - artwork_url=track["artwork_url"], - ) + track_data["url"] = get_soundcloud_track_url(track.id) + track_data["direct"] = True + except: + puts_safe(colored.white(track.title) + colored.red(" is not downloadable.")) + continue - if not tagged: - wav_filename = f"{filename[:-3]}wav" - os.rename(filename, wav_filename) - filename = wav_filename + if i > num_tracks - 1: + continue + try: + if not track_data["direct"]: + stream = client.get(track_data["url"], allow_redirects=False, limit=200) + if hasattr(stream, "location"): + track_data["url"] = stream.location + else: + track_data["url"] = stream.url + filename = download_single_track(track_data, use_folders, custom_path) + if filename is not None: filenames.append(filename) except Exception as e: - puts_safe(colored.red("Problem downloading ") + colored.white(track["title"])) + puts_safe(colored.red("Problem downloading ") + colored.white(track_data["title"])) puts_safe(str(e)) return filenames @@ -1074,14 +1040,19 @@ def tag_file( url (str): """ - # TODO: move year to date. saved_correctly = True - audio = EasyMP3(filename) + try: + audio = EasyMP3(filename) + except HeaderNotFoundError: + puts(colored.red("Problem opening file: ") + colored.white("Is this file a WAV?")) + return False + audio.tags = None audio["artist"] = artist audio["title"] = title if year is not None: + # TODO: move year to date. audio["date"] = str(year) if album is not None: audio["album"] = album @@ -1099,11 +1070,17 @@ def tag_file( puts(colored.red("Problem tagging file: ") + colored.white("Is this file a WAV?")) saved_correctly = False - audio = MP3(filename, ID3=OldID3) + try: + audio = MP3(filename, ID3=OldID3) + except HeaderNotFoundError: + puts(colored.red("Problem opening file: ") + colored.white("Is this file a WAV?")) + return False + if artwork_url is not None: + mime = "image/jpeg" artwork_url = artwork_url.replace("https", "http") urls = [artwork_url] - if "-large" in url: + if "-large" in artwork_url: urls.insert(0, artwork_url.replace("-large", "-t500x500")) image_data = b"0" @@ -1111,11 +1088,11 @@ def tag_file( response = requests.get(a_url) if response.status_code != 200: continue - image_data = response.content - mime = "image/jpeg" - if ".png" in artwork_url: - mime = "image/png" + if ".png" in a_url: + mime = "image/png" + image_data = response.content + break # encoding=3 means utf-8 # type=3 means cover image From 2d42cabef04701d4c994d3f0e5fe60f59e41c125 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 12:34:15 -0700 Subject: [PATCH 36/69] Better handling for playlists and sets, including reusing a modified version of existing code. --- soundscrape/soundscrape.py | 271 ++++++++++++++++++++----------------- 1 file changed, 145 insertions(+), 126 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 27a47d2..b8aa8b5 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -132,8 +132,10 @@ def process_soundcloud(vargs): url = vargs["artist_url"].lower() track_permalink = vargs["track"].lower() keep_previews = vargs["keep"] - folders = vargs["folders"] num_tracks = vargs["num_tracks"] + use_folders = vargs["folders"] + custom_path = vargs["path"] + downloadable_links_only = vargs["downloadable"] album = None artist = None @@ -172,116 +174,108 @@ def process_soundcloud(vargs): resolved.collection.extend(next_resolved) resolved = resolved.collection except Exception as e: - # SoundCloud is trying to prevent us from downloading this. - # Instead of utilizing the API/client we will do all our own scraping. Boo. - - message = str(e) - - if "404 client error" in message.lower(): - puts(colored.red("Problem downloading [404]: ") + colored.white("Item Not Found")) - return None - - item_id = message.rsplit("/", 1)[-1].split(".json")[0].split("?client_id")[0] - url = get_soundcloud_track_url(item_id) - request = requests.get(url) - if request.status_code != 200: - puts(colored.red("Problem downloading: ") + colored.white(url)) - return None - - title_tag = request.text.split("<title>")[1].split("</title")[0] - - track_data = { - "url": url, - "artist": title_tag.split(" by ")[1].split("|")[0].strip(), - "title": title_tag.split(" by ")[0].strip(), - } - filename = download_single_track(track_data, use_folders=folders, custom_path=vargs["path"]) + filename = force_download_from_soundcloud(str(e), use_folders, custom_path) if filename is not None: filenames.append(filename) - # This is is likely a 'likes' page. if not hasattr(resolved, "kind"): + # This is either likes or sets. tracks = resolved - else: - if resolved.kind == "playlist": - album = resolved.title - if len(resolved.tracks) > 0: - tracks = resolved.tracks - else: - tracks = get_soundcloud_playlist_data(resolved.id)["tracks"] - tracks = tracks[:num_tracks] - for track in tracks: - if not keep_previews and (track.get("duration", 0) < track.get("full_duration", 0)): - puts_safe(colored.yellow("Skipping preview track: ") + colored.white(track["title"])) - continue - - artist = track["user"].get("full_name", "") - if artist == "": - artist = track["user"]["username"] - - track_data = { - "url": get_soundcloud_track_url(track["id"]), - "artist": artist, - "title": resolved.title, - "album": album, - } - filename = download_single_track(track_data, use_folders=folders, custom_path=vargs["path"]) - if filename is not None: - filenames.append(filename) - elif resolved.kind == "track": - tracks = [resolved] - elif resolved.kind == "group": - tracks = client.get(f"/groups/{resolved.id}/tracks", limit=200) + elif resolved.kind == "playlist": + album = resolved.title + if len(resolved.tracks) > 0: + tracks = resolved.tracks else: - tracks = client.get(f"/users/{resolved.id}/tracks", limit=200) + tracks = get_soundcloud_playlist_data(resolved.id)["tracks"] + elif resolved.kind == "track": + tracks = [resolved] + elif resolved.kind == "group": + tracks = client.get(f"/groups/{resolved.id}/tracks", limit=200) + else: + tracks = client.get(f"/users/{resolved.id}/tracks", limit=200) + if hasattr(resolved, "full_name"): artist = resolved.full_name if artist == "": artist = resolved.username - # SoundCloud has a unfortunate bug where some artists don't have any tracks returned using: - # client.get("/users/" + artist_id + "/tracks", limit=200) - # There are a number of reports of this issue since late 2019 on StackOverflow. - # ( reference: https://stackoverflow.com/questions/59204383, https://stackoverflow.com/questions/61807979) - # It seems that the common pattern is that if an artist has any tracks marked as private, - # e.g. the track is not downloadable (only streamable), then SoundCloud won't return any - # tracks for that artist. The way around this is to refer to the artist's RSS feed. This - # is in the form of: - # http://feeds.soundcloud.com/users/soundcloud:users:<artist_id>/sounds.rss - - if len(tracks) == 0 and resolved.track_count > 0: - url = f"http://feeds.soundcloud.com/users/soundcloud:users:{resolved.id}/sounds.rss" - response = requests.get(url) - if response.status_code != 200: - puts(colored.red("Problem downloading: ") + colored.white(url)) - return None + # SoundCloud has a unfortunate bug where some artists don't have any tracks returned using: + # client.get("/users/" + artist_id + "/tracks", limit=200) + # There are a number of reports of this issue since late 2019 on StackOverflow. + # ( reference: https://stackoverflow.com/questions/59204383, https://stackoverflow.com/questions/61807979) + # It seems that the common pattern is that if an artist has any tracks marked as private, + # e.g. the track is not downloadable (only streamable), then SoundCloud won't return any + # tracks for that artist. The way around this is to refer to the artist's RSS feed. This + # is in the form of: + # http://feeds.soundcloud.com/users/soundcloud:users:<artist_id>/sounds.rss + + if len(tracks) == 0 and resolved.track_count > 0: + url = f"http://feeds.soundcloud.com/users/soundcloud:users:{resolved.id}/sounds.rss" + response = requests.get(url) + if response.status_code != 200: + puts(colored.red("Problem downloading: ") + colored.white(url)) + return None - feed = atoma.parse_rss_bytes(response.content) - for feed_item in feed.items: - if len(feed_item.enclosures) == 0: - continue + feed = atoma.parse_rss_bytes(response.content) + for feed_item in feed.items: + if len(feed_item.enclosures) == 0: + continue - for enclosure in feed_item.enclosures: - track_data = { - "url": enclosure.url, - "artist": artist, - "title": feed_item.title, - "date": feed_item.pub_date.year, - "artwork_url": feed.image.url, - } - - filename = download_single_track(track_data, use_folders=folders, custom_path=vargs["path"]) - if filename is not None: - filenames.append(filename) - else: - filenames = download_tracks( - client, tracks, artist, album, num_tracks, vargs["downloadable"], folders, vargs["path"], - ) + for enclosure in feed_item.enclosures: + track_data = { + "url": enclosure.url, + "artist": artist, + "title": feed_item.title, + "date": feed_item.pub_date.year, + "artwork_url": feed.image.url, + } + + filename = download_single_track(track_data, use_folders, custom_path) + if filename is not None: + filenames.append(filename) + else: + filenames = download_tracks( + client, tracks, artist, album, num_tracks, downloadable_links_only, use_folders, custom_path, + ) if vargs["open"]: open_files(filenames) +def force_download_from_soundcloud(message, use_folders, custom_path): + """ + Try to force download a track from SoundCloud despite a client error. + + Args: + message (str): + use_folder (bool): + custom_path (str): + + Returns: + filename of successfully downloaded track or None + + """ + if "404" in message.lower(): + puts(colored.red("Problem downloading [404]: ") + colored.white("Item Not Found")) + return None + + item_id = message.rsplit("/", 1)[-1].split(".json")[0].split("?client_id")[0] + url = get_soundcloud_track_url(item_id) + request = requests.get(url) + if request.status_code != 200: + puts(colored.red("Problem downloading: ") + colored.white(url)) + return None + + title_tag = request.text.split("<title>")[1].split("</title")[0] + + track_data = { + "url": url, + "artist": title_tag.split(" by ")[1].split("|")[0].strip(), + "title": title_tag.split(" by ")[0].strip(), + } + return download_single_track(track_data, use_folders, custom_path) + + def download_single_track(track_data, use_folders=False, custom_path=""): """ Download a single track from SoundCloud. @@ -321,10 +315,10 @@ def download_single_track(track_data, use_folders=False, custom_path=""): filename, artist=track_data["artist"], title=track_data["title"], - album=track_data.get("album", None), - year=track_data.get("date", None), - genre=track_data.get("genre", None), - artwork_url=track_data.get("artwork_url", None), + album=track_data.get("album"), + year=track_data.get("date"), + genre=track_data.get("genre"), + artwork_url=track_data.get("artwork_url"), ) if not tagged: wav_filename = f"{filename[:-3]}wav" @@ -341,35 +335,54 @@ def download_tracks( filenames = [] for i, track in enumerate(tracks): - # "Track" and "Resource" objects are actually different, even though they're the same. - if not isinstance(track, soundcloud.resource.Resource): + track_id = track.id if hasattr(track, "id") else track.get("id", "") + title = track.title if hasattr(track, "title") else track.get("title", "") + + if hasattr(track, "kind") and track.kind == "playlist": + if len(track.tracks) > 0: + playlist_tracks = track.tracks + else: + playlist_tracks = get_soundcloud_playlist_data(track_id) + playlist_tracks = playlist_tracks["tracks"] if playlist_tracks is not None else None + if playlist_tracks is not None: + download_tracks( + client, playlist_tracks, artist, album, num_tracks, downloadable_links_only, use_folders, custom_path, + ) + else: + puts_safe(colored.white(title) + colored.red(" is not downloadable.")) continue track_data = {} - try: - track_data = { - "streamable": track.streamable, - "title": track.title, - "artist": artist, - "release_year": track.release, - "genre": track.genre, - "album": album, - "artwork_url": track.artwork_url, - "direct": False, - } - if track.downloadable: - track_data["url"] = track.download_url + + user = track.user if hasattr(track, "user") else track.get("user") + track_artist = artist + if user is not None: + track_artist = user["full_name"] if user.get("full_name") is not None else user["username"] + + track_data = { + "direct": False, + "artist": artist if artist is not None else track_artist, + "album": album, + "title": title, + "streamable": track.streamable if hasattr(track, "streamable") else track.get("streamable", False), + "date": track.release if hasattr(track, "release") else track.get("release"), + "genre": track.genre if hasattr(track, "genre") else track.get("genre"), + "artwork_url": track.artwork_url if hasattr(track, "artwork_url") else track.get("artwork_url"), + } + if track.downloadable if hasattr(track, "downloadable") else track.get("downloadable", False): + track_data["url"] = track.download_url if hasattr(track, "download_url") else track.get("download_url") + else: + if downloadable_links_only: + puts_safe(colored.red("Skipping: ") + colored.white(title)) + continue + if hasattr(track, "stream_url") or isinstance(track, dict): + track_data["url"] = track.stream_url if hasattr(track, "stream_url") else track.get("stream_url") else: - if downloadable_links_only: - puts_safe(colored.red("Skipping: ") + colored.white(track.title)) - continue - if hasattr(track, "stream_url"): - track_data["url"] = track.stream_url - else: - track_data["url"] = get_soundcloud_track_url(track.id) - track_data["direct"] = True - except: - puts_safe(colored.white(track.title) + colored.red(" is not downloadable.")) + track_data["url"] = get_soundcloud_track_url(track_id) + track_data["direct"] = True + + if track_data["url"] is None: + puts_safe(colored.white(title) + colored.red(" is not downloadable.")) continue if i > num_tracks - 1: @@ -386,8 +399,11 @@ def download_tracks( if filename is not None: filenames.append(filename) except Exception as e: - puts_safe(colored.red("Problem downloading ") + colored.white(track_data["title"])) - puts_safe(str(e)) + filename = force_download_from_soundcloud(str(e), use_folders, custom_path) + if filename is not None: + filenames.append(filename) + else: + puts_safe(colored.red("Problem downloading ") + colored.white(track_data["title"])) return filenames @@ -406,7 +422,7 @@ def get_soundcloud_playlist_data(playlist_id): url = f"https://api.soundcloud.com/playlists/{playlist_id}?representation=full&client_id=02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea&app_version=1467724310" response = requests.get(url) if response.status_code != 200: - puts(colored.red("Problem downloading: ") + colored.white(url)) + puts(colored.red("Problem getting playlist data from: ") + colored.white(url)) return None return response.json() @@ -428,7 +444,7 @@ def get_soundcloud_track_url(track_id): ) response = requests.get(url) if response.status_code != 200: - puts(colored.red("Problem downloading: ") + colored.white(url)) + puts(colored.red("Problem getting track data from: ") + colored.white(url)) return None json_response = response.json() @@ -1112,6 +1128,9 @@ def tag_file( def open_files(filenames): """Call the system 'open' command on a file.""" + if len(filenames) == 0: + return + command = ["open"] + filenames process = Popen(command, stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() From 29789a61fafa540caf18b6ff3fc26510493283e3 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 12:35:44 -0700 Subject: [PATCH 37/69] Removing the option around keeping previews as the property doesn't seem to be removed from SoundCloud anymore. --- soundscrape/soundscrape.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index b8aa8b5..a069b24 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -84,7 +84,6 @@ def main(): parser.add_argument( "-o", "--open", action="store_true", help="Open downloaded files after downloading.", ) - parser.add_argument("-k", "--keep", action="store_true", help="Keep 30-second preview tracks") parser.add_argument( "-v", "--version", action="store_true", default=False, help="Display the current version of SoundScrape", ) @@ -131,7 +130,6 @@ def process_soundcloud(vargs): """Main SoundCloud path.""" url = vargs["artist_url"].lower() track_permalink = vargs["track"].lower() - keep_previews = vargs["keep"] num_tracks = vargs["num_tracks"] use_folders = vargs["folders"] custom_path = vargs["path"] From f2bcfaca7fdcd8914dea07f13d9fa381238df769 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 12:45:27 -0700 Subject: [PATCH 38/69] Extracting function for downloading from a feed. --- soundscrape/soundscrape.py | 87 +++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index a069b24..c47003a 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -197,40 +197,18 @@ def process_soundcloud(vargs): if artist == "": artist = resolved.username - # SoundCloud has a unfortunate bug where some artists don't have any tracks returned using: - # client.get("/users/" + artist_id + "/tracks", limit=200) - # There are a number of reports of this issue since late 2019 on StackOverflow. - # ( reference: https://stackoverflow.com/questions/59204383, https://stackoverflow.com/questions/61807979) - # It seems that the common pattern is that if an artist has any tracks marked as private, - # e.g. the track is not downloadable (only streamable), then SoundCloud won't return any - # tracks for that artist. The way around this is to refer to the artist's RSS feed. This - # is in the form of: - # http://feeds.soundcloud.com/users/soundcloud:users:<artist_id>/sounds.rss - if len(tracks) == 0 and resolved.track_count > 0: - url = f"http://feeds.soundcloud.com/users/soundcloud:users:{resolved.id}/sounds.rss" - response = requests.get(url) - if response.status_code != 200: - puts(colored.red("Problem downloading: ") + colored.white(url)) - return None - - feed = atoma.parse_rss_bytes(response.content) - for feed_item in feed.items: - if len(feed_item.enclosures) == 0: - continue - - for enclosure in feed_item.enclosures: - track_data = { - "url": enclosure.url, - "artist": artist, - "title": feed_item.title, - "date": feed_item.pub_date.year, - "artwork_url": feed.image.url, - } - - filename = download_single_track(track_data, use_folders, custom_path) - if filename is not None: - filenames.append(filename) + # SoundCloud has a unfortunate bug where some artists don't have any tracks returned using: + # client.get("/users/" + artist_id + "/tracks", limit=200) + # There are a number of reports of this issue since late 2019 on StackOverflow. + # ( reference: https://stackoverflow.com/questions/59204383, https://stackoverflow.com/questions/61807979) + # It seems that the common pattern is that if an artist has any tracks marked as private, + # e.g. the track is not downloadable (only streamable), then SoundCloud won't return any + # tracks for that artist. The way around this is to refer to the artist's RSS feed. This + # is in the form of: + # http://feeds.soundcloud.com/users/soundcloud:users:<artist_id>/sounds.rss + + filenames = download_from_soundcloud_feed(resolved.id, artist, use_folders, custom_path) else: filenames = download_tracks( client, tracks, artist, album, num_tracks, downloadable_links_only, use_folders, custom_path, @@ -240,7 +218,7 @@ def process_soundcloud(vargs): open_files(filenames) -def force_download_from_soundcloud(message, use_folders, custom_path): +def force_download_from_soundcloud(message, use_folders=False, custom_path=""): """ Try to force download a track from SoundCloud despite a client error. @@ -274,6 +252,47 @@ def force_download_from_soundcloud(message, use_folders, custom_path): return download_single_track(track_data, use_folders, custom_path) +def download_from_soundcloud_feed(track_id, artist, use_folders=False, custom_path=""): + """ + Use the artist's RSS feed from SoundCloud to get tracks. + + Args: + track_id (int): + artist (str): + use_folders (bool): + custom_path (str): + + Returns: + filenames downloaded from the RSS feed + + """ + filenames = [] + url = f"http://feeds.soundcloud.com/users/soundcloud:users:{track_id}/sounds.rss" + response = requests.get(url) + if response.status_code != 200: + puts(colored.red("Problem downloading: ") + colored.white(url)) + return None + + feed = atoma.parse_rss_bytes(response.content) + for feed_item in feed.items: + if len(feed_item.enclosures) == 0: + continue + + for enclosure in feed_item.enclosures: + track_data = { + "url": enclosure.url, + "artist": artist, + "title": feed_item.title, + "date": feed_item.pub_date.year, + "artwork_url": feed.image.url, + } + + filename = download_single_track(track_data, use_folders, custom_path) + if filename is not None: + filenames.append(filename) + return filenames + + def download_single_track(track_data, use_folders=False, custom_path=""): """ Download a single track from SoundCloud. From 587f899a554dea91494921d77e563c4206ad5818 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 12:48:01 -0700 Subject: [PATCH 39/69] Renaming some functions to better reflect what they do. --- soundscrape/soundscrape.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index c47003a..2b67c62 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -172,7 +172,7 @@ def process_soundcloud(vargs): resolved.collection.extend(next_resolved) resolved = resolved.collection except Exception as e: - filename = force_download_from_soundcloud(str(e), use_folders, custom_path) + filename = force_download_track_from_soundcloud(str(e), use_folders, custom_path) if filename is not None: filenames.append(filename) @@ -208,9 +208,9 @@ def process_soundcloud(vargs): # is in the form of: # http://feeds.soundcloud.com/users/soundcloud:users:<artist_id>/sounds.rss - filenames = download_from_soundcloud_feed(resolved.id, artist, use_folders, custom_path) + filenames = download_tracks_from_soundcloud_feed(resolved.id, artist, use_folders, custom_path) else: - filenames = download_tracks( + filenames = download_tracks_from_soundcloud( client, tracks, artist, album, num_tracks, downloadable_links_only, use_folders, custom_path, ) @@ -218,7 +218,7 @@ def process_soundcloud(vargs): open_files(filenames) -def force_download_from_soundcloud(message, use_folders=False, custom_path=""): +def force_download_track_from_soundcloud(message, use_folders=False, custom_path=""): """ Try to force download a track from SoundCloud despite a client error. @@ -249,10 +249,10 @@ def force_download_from_soundcloud(message, use_folders=False, custom_path=""): "artist": title_tag.split(" by ")[1].split("|")[0].strip(), "title": title_tag.split(" by ")[0].strip(), } - return download_single_track(track_data, use_folders, custom_path) + return download_single_track_from_soundcloud(track_data, use_folders, custom_path) -def download_from_soundcloud_feed(track_id, artist, use_folders=False, custom_path=""): +def download_tracks_from_soundcloud_feed(track_id, artist, use_folders=False, custom_path=""): """ Use the artist's RSS feed from SoundCloud to get tracks. @@ -287,13 +287,13 @@ def download_from_soundcloud_feed(track_id, artist, use_folders=False, custom_pa "artwork_url": feed.image.url, } - filename = download_single_track(track_data, use_folders, custom_path) + filename = download_single_track_from_soundcloud(track_data, use_folders, custom_path) if filename is not None: filenames.append(filename) return filenames -def download_single_track(track_data, use_folders=False, custom_path=""): +def download_single_track_from_soundcloud(track_data, use_folders=False, custom_path=""): """ Download a single track from SoundCloud. @@ -345,7 +345,7 @@ def download_single_track(track_data, use_folders=False, custom_path=""): return filename -def download_tracks( +def download_tracks_from_soundcloud( client, tracks, artist, album, num_tracks=sys.maxsize, downloadable_links_only=False, use_folders=False, custom_path="", ): """Given a list of tracks, iteratively download all of them.""" @@ -362,7 +362,7 @@ def download_tracks( playlist_tracks = get_soundcloud_playlist_data(track_id) playlist_tracks = playlist_tracks["tracks"] if playlist_tracks is not None else None if playlist_tracks is not None: - download_tracks( + download_tracks_from_soundcloud( client, playlist_tracks, artist, album, num_tracks, downloadable_links_only, use_folders, custom_path, ) else: @@ -412,11 +412,11 @@ def download_tracks( else: track_data["url"] = stream.url - filename = download_single_track(track_data, use_folders, custom_path) + filename = download_single_track_from_soundcloud(track_data, use_folders, custom_path) if filename is not None: filenames.append(filename) except Exception as e: - filename = force_download_from_soundcloud(str(e), use_folders, custom_path) + filename = force_download_track_from_soundcloud(str(e), use_folders, custom_path) if filename is not None: filenames.append(filename) else: From 9fad322b9ee07bdab30faf30e493dde9fc6ea0d4 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 12:59:14 -0700 Subject: [PATCH 40/69] Fixing flake8 violations. --- soundscrape/soundscrape.py | 54 +++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 2b67c62..e4fba8a 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -37,7 +37,7 @@ def main(): """ - Main function. + Argument parsing main function. Converts arguments to Python and processes accordingly. """ @@ -127,7 +127,13 @@ def main(): def process_soundcloud(vargs): - """Main SoundCloud path.""" + """ + Process SoundCloud download. + + Args: + vargs (dict): + + """ url = vargs["artist_url"].lower() track_permalink = vargs["track"].lower() num_tracks = vargs["num_tracks"] @@ -474,7 +480,12 @@ def get_soundcloud_track_url(track_id): def process_bandcamp(vargs): - """Main BandCamp path.""" + """ + Process BandCamp download. + + Args: + vargs (dict): + """ artist_url = vargs["artist_url"] if "bandcamp.com" in artist_url or ("://" in artist_url and vargs["bandcamp"]): @@ -648,7 +659,12 @@ def get_bandcamp_metadata(url): def process_mixcloud(vargs): - """Main MixCloud path.""" + """ + Process MixCloud download. + + Args: + vargs (dict): + """ artist_url = vargs["artist_url"] if "mixcloud.com" in artist_url: @@ -761,7 +777,12 @@ def get_mixcloud_data(url): def process_audiomack(vargs): - """Main Audiomack path.""" + """ + Process Audiomack download. + + Args: + vargs (dict): + """ artist_url = vargs["artist_url"] if "audiomack.com" in artist_url: @@ -855,7 +876,12 @@ def get_audiomack_data(url): def process_musicbed(vargs): - """Main MusicBed path.""" + """ + Process MusicBed download. + + Args: + vargs (dict): + """ validated = False if vargs["artist_url"].startswith("https://www.musicbed.com/"): splitted = vargs["artist_url"][len("https://www.musicbed.com/") :].split("/") @@ -937,8 +963,7 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, use_folder login_response_data = demjson.decode(response.text) if not login_response_data["body"]["status"]: puts( - colored.red("scrape_musicbed_url: couldn't login. Aborting. ") - + colored.white("Did you provide correct login and password?") + colored.red("Can't login to MusicBed. ") + colored.white("Did you provide correct login and password?") ) session.close() return [] @@ -1144,7 +1169,12 @@ def tag_file( def open_files(filenames): - """Call the system 'open' command on a file.""" + """ + Call the system 'open' command on a file. + + Args: + filenames (list): + """ if len(filenames) == 0: return @@ -1154,6 +1184,12 @@ def open_files(filenames): def puts_safe(text): + """ + Safely write to the screen. + + Args: + text (str): + """ if sys.platform == "win32": puts(text.encode(sys.stdout.encoding, errors="replace").decode()) else: From 672eb72a3a8b982cb3e6f398e2787e9185d13f55 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 12:59:47 -0700 Subject: [PATCH 41/69] Reformatting. --- soundscrape/soundscrape.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index e4fba8a..8f497f7 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -962,9 +962,7 @@ def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, use_folder return [] login_response_data = demjson.decode(response.text) if not login_response_data["body"]["status"]: - puts( - colored.red("Can't login to MusicBed. ") + colored.white("Did you provide correct login and password?") - ) + puts(colored.red("Can't login to MusicBed. ") + colored.white("Did you provide correct login and password?")) session.close() return [] From 570a0e7a9ef340d5206f4c318cc588933f71e2ea Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 14:32:56 -0700 Subject: [PATCH 42/69] Moving to pytest. --- .github/workflows/main.yml | 11 +- setup.py | 3 + test.sh | 2 - tests/test.py | 314 +++++++++++++------------------------ 4 files changed, 114 insertions(+), 216 deletions(-) delete mode 100755 test.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b51f631..d33654e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,10 +1,6 @@ name: SoundScrape CI -on: - push: - branches: [ master ] - pull_request: - branches: [ master ] +on: [push, pull_request] jobs: build: @@ -21,7 +17,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install flake8 black + pip install flake8 black pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Black code formatting run: | @@ -32,3 +28,6 @@ jobs: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --ignore=E203,E231 --statistics + - name: Run tests + run: | + PYTHONPATH=$(pwd) pytest tests/test.py diff --git a/setup.py b/setup.py index 30670d7..1f567b6 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,9 @@ version=soundscrape.__version__, packages=["soundscrape"], install_requires=required, + extras_requires={ + "tests": ["pytest"], + }, include_package_data=True, license="MIT License", description="Scrape an artist from SoundCloud", diff --git a/test.sh b/test.sh deleted file mode 100755 index 1edbe24..0000000 --- a/test.sh +++ /dev/null @@ -1,2 +0,0 @@ -#! /bin/bash -nosetests diff --git a/tests/test.py b/tests/test.py index 5726a0d..556824c 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,214 +1,112 @@ +import pytest import glob import os -import unittest -from soundscrape.soundscrape import get_client from soundscrape.soundscrape import process_soundcloud from soundscrape.soundscrape import process_bandcamp - -class TestSoundscrape(unittest.TestCase): - - ## - # Basic Tests - ## - - def test_test(self): - self.assertTrue(True) - - def test_get_client(self): - client = get_client() - self.assertTrue(bool(client)) - - def test_soundcloud(self): - for f in glob.glob("*.mp3"): - os.unlink(f) - - mp3_count = len(glob.glob1("", "*.mp3")) - vargs = { - "path": "", - "folders": False, - "group": False, - "track": "", - "num_tracks": 9223372036854775807, - "bandcamp": False, - "downloadable": False, - "likes": False, - "open": False, - "artist_url": "https://soundcloud.com/fzpz/revised", - "keep": True, - } - process_soundcloud(vargs) - new_mp3_count = len(glob.glob1("", "*.mp3")) - self.assertTrue(new_mp3_count > mp3_count) - - for f in glob.glob("*.mp3"): - os.unlink(f) - - def test_soundcloud_hard(self): - for f in glob.glob("*.mp3"): - os.unlink(f) - - mp3_count = len(glob.glob1("", "*.mp3")) - vargs = { - "path": "", - "folders": False, - "group": False, - "track": "", - "num_tracks": 1, - "bandcamp": False, - "downloadable": False, - "likes": False, - "open": False, - "artist_url": "puptheband", - "keep": False, - } - process_soundcloud(vargs) - new_mp3_count = len(glob.glob1("", "*.mp3")) - self.assertTrue(new_mp3_count > mp3_count) - self.assertTrue(new_mp3_count == 1) # This used to be 3, but is now 'Not available in United States.' - - for f in glob.glob("*.mp3"): - os.unlink(f) - - def test_soundcloud_hard_2(self): - for f in glob.glob("*.mp3"): - os.unlink(f) - - mp3_count = len(glob.glob1("", "*.mp3")) - vargs = { - "path": "", - "folders": False, - "group": False, - "track": "", - "num_tracks": 1, - "bandcamp": False, - "downloadable": False, - "likes": False, - "open": False, - "artist_url": "https://soundcloud.com/lostdogz/snuggles-chapstick", - "keep": False, - } - process_soundcloud(vargs) - new_mp3_count = len(glob.glob1("", "*.mp3")) - self.assertTrue(new_mp3_count > mp3_count) - self.assertTrue(new_mp3_count == 1) # This used to be 3, but is now 'Not available in United States.' - - for f in glob.glob("*.mp3"): - os.unlink(f) - - # The test URL for this is no longer a WAV. Need a new testcase. - # - # def test_soundcloud_wav(self): - # for f in glob.glob('*.wav'): - # os.unlink(f) - - # wav_count = len(glob.glob1('', "*.wav")) - # vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 1, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://soundcloud.com/coastal/major-lazer-aerosol-can-coastal-flip', 'keep': False} - # process_soundcloud(vargs) - # new_wav_count = len(glob.glob1('', "*.wav")) - # self.assertTrue(new_wav_count > wav_count) - # self.assertTrue(new_wav_count == 1) - - # for f in glob.glob('*.wav'): - # os.unlink(f) - - def test_bandcamp(self): - for f in glob.glob("*.mp3"): - os.unlink(f) - - mp3_count = len(glob.glob1("", "*.mp3")) - vargs = { - "path": "", - "folders": False, - "group": False, - "track": "", - "num_tracks": 9223372036854775807, - "bandcamp": False, - "downloadable": False, - "likes": False, - "open": False, - "artist_url": "https://atenrays.bandcamp.com/track/who-u-think", - } - process_bandcamp(vargs) - new_mp3_count = len(glob.glob1("", "*.mp3")) - self.assertTrue(new_mp3_count > mp3_count) - - for f in glob.glob("*.mp3"): - os.unlink(f) - - def test_bandcamp_slashes(self): - for f in glob.glob("*.mp3"): - os.unlink(f) - - mp3_count = len(glob.glob1("", "*.mp3")) - vargs = { - "path": "", - "folders": False, - "group": False, - "track": "", - "num_tracks": 9223372036854775807, - "bandcamp": False, - "downloadable": False, - "likes": False, - "open": False, - "artist_url": "https://defill.bandcamp.com/track/amnesia-chamber-harvest-skit", - } - process_bandcamp(vargs) - new_mp3_count = len(glob.glob1("", "*.mp3")) - self.assertTrue(new_mp3_count > mp3_count) - - for f in glob.glob("*.mp3"): - os.unlink(f) - - # def test_musicbed(self): - # for f in glob.glob('*.mp3'): - # os.unlink(f) - - # mp3_count = len(glob.glob1('', "*.mp3")) - # vargs = {'login':'musicbedtest@gmail.com', 'password':'oo6alY9T', 'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://www.musicbed.com/albums/be-still/2828'} - # process_musicbed(vargs) - # new_mp3_count = len(glob.glob1('', "*.mp3")) - # self.assertTrue(new_mp3_count > mp3_count) - - # for f in glob.glob('*.mp3'): - # os.unlink(f) - - def test_mixcloud(self): - for f in glob.glob("*.mp3"): - os.unlink(f) - - for f in glob.glob("*.m4a"): - os.unlink(f) - - # shortest mix I could find that was still semi tolerable - # mp3_count = len(glob.glob1('', "*.mp3")) - # m4a_count = len(glob.glob1('', "*.m4a")) - # vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://www.mixcloud.com/Bobby_T_FS15/coffee-cigarettes-saturday-morning-hip-hop-fix/'} - # process_mixcloud(vargs) - # new_mp3_count = len(glob.glob1('', "*.mp3")) - # new_m4a_count = len(glob.glob1('', "*.m4a")) - # self.assertTrue((new_mp3_count > mp3_count) or (new_m4a_count > m4a_count)) - - for f in glob.glob("*.mp3"): - os.unlink(f) - - for f in glob.glob("*.m4a"): - os.unlink(f) - - # def test_audiomack(self): - # for f in glob.glob('*.mp3'): - # os.unlink(f) - - # mp3_count = len(glob.glob1('', "*.mp3")) - # vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'audiomack': True, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://www.audiomack.com/song/bottomfeedermusic/power'} - # process_audiomack(vargs) - # new_mp3_count = len(glob.glob1('', "*.mp3")) - # self.assertTrue(new_mp3_count > mp3_count) - - # for f in glob.glob('*.mp3'): - # os.unlink(f) - - -if __name__ == "__main__": - unittest.main() +def cleanup_files(): + for f in glob.glob("*.mp3"): + os.unlink(f) + for f in glob.glob("*.m4a"): + os.unlink(f) + + +def test_soundcloud(): + cleanup_files() + + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 9223372036854775807, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://soundcloud.com/fzpz/revised", + "keep": True, + } + process_soundcloud(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count > mp3_count + cleanup_files() + +def test_soundcloud_hard_2(): + cleanup_files() + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 1, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://soundcloud.com/lostdogz/snuggles-chapstick", + "keep": False, + } + process_soundcloud(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count > mp3_count + assert new_mp3_count == 1 # This used to be 3, but is now 'Not available in United States.' + cleanup_files() + + +def test_bandcamp(): + cleanup_files() + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 9223372036854775807, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://atenrays.bandcamp.com/track/who-u-think", + } + process_bandcamp(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count > mp3_count + cleanup_files() + + +def test_bandcamp_slashes(): + cleanup_files() + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 9223372036854775807, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://defill.bandcamp.com/track/amnesia-chamber-harvest-skit", + } + process_bandcamp(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count > mp3_count + cleanup_files() + + +def test_mixcloud(): + cleanup_files() + # shortest mix I could find that was still semi tolerable + # mp3_count = len(glob.glob1('', "*.mp3")) + # m4a_count = len(glob.glob1('', "*.m4a")) + # vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://www.mixcloud.com/Bobby_T_FS15/coffee-cigarettes-saturday-morning-hip-hop-fix/'} + # process_mixcloud(vargs) + # new_mp3_count = len(glob.glob1('', "*.mp3")) + # new_m4a_count = len(glob.glob1('', "*.m4a")) + # self.assertTrue((new_mp3_count > mp3_count) or (new_m4a_count > m4a_count)) + cleanup_files() From 41b845e2824a97d5469d811c16ebcc05dd0e0975 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 14:33:25 -0700 Subject: [PATCH 43/69] Removing unnecessary requirements. --- requirements.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index c0ae66a..bb41b40 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,12 +2,9 @@ args>=0.1.0 atoma>=0.0.17 clint>=0.3.2 demjson>=2.2.2 -fudge>=1.0.3 -nose>=1.3.7 requests[security]>=2.9.0 sanitize-filename>=1.2.0 setuptools>=18.0.0 -simplejson>=3.3.1 soundcloud>=0.4.1 wheel>=0.24.0 mutagen>=1.31.0 From 62dbb5150ce74a8143fec98d762d4a9875aee425 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 14:34:34 -0700 Subject: [PATCH 44/69] Fixing syntax error in Github Actions setup. --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d33654e..627858f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -30,4 +30,4 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --ignore=E203,E231 --statistics - name: Run tests run: | - PYTHONPATH=$(pwd) pytest tests/test.py + pytest tests/test.py From a94300d29c1c3fda0ad01336ebd4cc4204cf4075 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 14:35:11 -0700 Subject: [PATCH 45/69] Fixing syntax error in Github Actions setup. --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 627858f..d01f4a8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -29,5 +29,5 @@ jobs: # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --ignore=E203,E231 --statistics - name: Run tests - run: | - pytest tests/test.py + run: | + PYTHONPATH=$(pwd) pytest tests/test.py From 9e3b734254a3a11a32671173b9d12e831a5ace32 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 14:37:23 -0700 Subject: [PATCH 46/69] Fixing linting and formatting errors. --- setup.py | 4 +--- tests/test.py | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 1f567b6..437f5e8 100644 --- a/setup.py +++ b/setup.py @@ -22,9 +22,7 @@ version=soundscrape.__version__, packages=["soundscrape"], install_requires=required, - extras_requires={ - "tests": ["pytest"], - }, + extras_requires={"tests": ["pytest"],}, include_package_data=True, license="MIT License", description="Scrape an artist from SoundCloud", diff --git a/tests/test.py b/tests/test.py index 556824c..4443d54 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,10 +1,10 @@ -import pytest import glob import os from soundscrape.soundscrape import process_soundcloud from soundscrape.soundscrape import process_bandcamp + def cleanup_files(): for f in glob.glob("*.mp3"): os.unlink(f) @@ -34,6 +34,7 @@ def test_soundcloud(): assert new_mp3_count > mp3_count cleanup_files() + def test_soundcloud_hard_2(): cleanup_files() mp3_count = len(glob.glob1("", "*.mp3")) From e000ed776dd392ccb12e86543173f1bd15c9df42 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 15:01:53 -0700 Subject: [PATCH 47/69] Adding coverage report. --- .github/workflows/main.yml | 7 +++++-- .gitignore | 8 ++++++-- setup.py | 2 +- soundscrape/.gitignore | 1 - 4 files changed, 12 insertions(+), 6 deletions(-) delete mode 100644 soundscrape/.gitignore diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d01f4a8..f444fbc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,7 +17,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install flake8 black pytest + pip install flake8 black pytest coverage if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Black code formatting run: | @@ -30,4 +30,7 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --ignore=E203,E231 --statistics - name: Run tests run: | - PYTHONPATH=$(pwd) pytest tests/test.py + PYTHONPATH=$(pwd) coverage run -m --include="soundscrape/*" pytest tests/test.py + - name: Coverage report + run: | + coverage report -m diff --git a/.gitignore b/.gitignore index bb08a0c..bcf4487 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,11 @@ +.coverage/ .vscode/ +build/ +dist/ env/ *.DS_Store *.pyc *.bak -build/ -dist/ +*.mp3 +*.m4a +*.tmp diff --git a/setup.py b/setup.py index 437f5e8..7c80853 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ version=soundscrape.__version__, packages=["soundscrape"], install_requires=required, - extras_requires={"tests": ["pytest"],}, + extras_requires={"tests": ["pytest", "coverage"],}, include_package_data=True, license="MIT License", description="Scrape an artist from SoundCloud", diff --git a/soundscrape/.gitignore b/soundscrape/.gitignore deleted file mode 100644 index bf9e782..0000000 --- a/soundscrape/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.mp3 \ No newline at end of file From ca0f91a595f4b4fee51b6d22e4166fa092b2217c Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 15:02:34 -0700 Subject: [PATCH 48/69] Fixing gitignore. --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index bcf4487..0226924 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ -.coverage/ .vscode/ build/ dist/ env/ +.coverage *.DS_Store *.pyc *.bak From c39946cd71a8d79d2ccd9dd899673682ab8f80de Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 15:02:48 -0700 Subject: [PATCH 49/69] Minor test cleanup. --- tests/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test.py b/tests/test.py index 4443d54..0ae318a 100644 --- a/tests/test.py +++ b/tests/test.py @@ -35,7 +35,7 @@ def test_soundcloud(): cleanup_files() -def test_soundcloud_hard_2(): +def test_soundcloud_hard(): cleanup_files() mp3_count = len(glob.glob1("", "*.mp3")) vargs = { @@ -54,7 +54,7 @@ def test_soundcloud_hard_2(): process_soundcloud(vargs) new_mp3_count = len(glob.glob1("", "*.mp3")) assert new_mp3_count > mp3_count - assert new_mp3_count == 1 # This used to be 3, but is now 'Not available in United States.' + assert new_mp3_count == 1 cleanup_files() From faa16dc9cff431e113db23eef6c9a63cb6a06e9c Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 15:24:14 -0700 Subject: [PATCH 50/69] Flake8 fixes. --- setup.py | 4 +++- soundscrape/__init__.py | 1 + soundscrape/soundscrape.py | 24 +++++++++++++----------- tests/test.py | 23 ++++++++++++++++++++--- 4 files changed, 37 insertions(+), 15 deletions(-) diff --git a/setup.py b/setup.py index 7c80853..674eac5 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,10 @@ +"""Package setup for SoundScrape.""" import os -import soundscrape from setuptools import setup +import soundscrape + # Set external files try: from pypandoc import convert diff --git a/soundscrape/__init__.py b/soundscrape/__init__.py index 98195c0..b97dc18 100644 --- a/soundscrape/__init__.py +++ b/soundscrape/__init__.py @@ -1 +1,2 @@ +"""SoundScrape initialization.""" __version__ = "0.31" diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 8f497f7..a5d6005 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -1,25 +1,27 @@ #! /usr/bin/env python +"""Main SoundScrape module.""" from __future__ import unicode_literals import argparse -import atoma -import demjson import os import re -import requests -import soundcloud import sys import urllib - -from clint.textui import colored, puts, progress from datetime import datetime +from os import W_OK, access, mkdir +from os.path import dirname, exists, join +from subprocess import PIPE, Popen + +import atoma +import demjson +import requests +import soundcloud +from clint.textui import colored, progress, puts from mutagen import MutagenError -from mutagen.mp3 import MP3, EasyMP3, HeaderNotFoundError -from mutagen.id3 import APIC, WXXX +from mutagen.id3 import APIC from mutagen.id3 import ID3 as OldID3 -from subprocess import Popen, PIPE -from os.path import dirname, exists, join -from os import access, mkdir, W_OK +from mutagen.id3 import WXXX +from mutagen.mp3 import MP3, EasyMP3, HeaderNotFoundError from sanitize_filename import sanitize #################################################################### diff --git a/tests/test.py b/tests/test.py index 0ae318a..7766b32 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,11 +1,12 @@ +"""Tests for SoundScrape.""" import glob import os -from soundscrape.soundscrape import process_soundcloud -from soundscrape.soundscrape import process_bandcamp +from soundscrape.soundscrape import process_bandcamp, process_soundcloud def cleanup_files(): + """Cleanup files from tests.""" for f in glob.glob("*.mp3"): os.unlink(f) for f in glob.glob("*.m4a"): @@ -13,6 +14,7 @@ def cleanup_files(): def test_soundcloud(): + """Basic SoundCloud test.""" cleanup_files() mp3_count = len(glob.glob1("", "*.mp3")) @@ -36,6 +38,7 @@ def test_soundcloud(): def test_soundcloud_hard(): + """Basic SoundCloud test.""" cleanup_files() mp3_count = len(glob.glob1("", "*.mp3")) vargs = { @@ -59,6 +62,7 @@ def test_soundcloud_hard(): def test_bandcamp(): + """Basic BandCamp test.""" cleanup_files() mp3_count = len(glob.glob1("", "*.mp3")) vargs = { @@ -80,6 +84,7 @@ def test_bandcamp(): def test_bandcamp_slashes(): + """Basic BandCamp test.""" cleanup_files() mp3_count = len(glob.glob1("", "*.mp3")) vargs = { @@ -101,11 +106,23 @@ def test_bandcamp_slashes(): def test_mixcloud(): + """Basic MixCloud test.""" cleanup_files() # shortest mix I could find that was still semi tolerable # mp3_count = len(glob.glob1('', "*.mp3")) # m4a_count = len(glob.glob1('', "*.m4a")) - # vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://www.mixcloud.com/Bobby_T_FS15/coffee-cigarettes-saturday-morning-hip-hop-fix/'} + # vargs = { + # "path": "", + # "folders": False, + # "group": False, + # "track": "", + # "num_tracks": 9223372036854775807, + # "bandcamp": False, + # "downloadable": False, + # "likes": False, + # "open": False, + # "artist_url": "https://www.mixcloud.com/Bobby_T_FS15/coffee-cigarettes-saturday-morning-hip-hop-fix/", + # } # process_mixcloud(vargs) # new_mp3_count = len(glob.glob1('', "*.mp3")) # new_m4a_count = len(glob.glob1('', "*.m4a")) From 03345550a3e8c4dd2898cf99576a429ef5335d25 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 15:39:01 -0700 Subject: [PATCH 51/69] Cleaing up what's a request and a response. --- soundscrape/soundscrape.py | 59 ++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index a5d6005..796fa29 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -225,6 +225,8 @@ def process_soundcloud(vargs): if vargs["open"]: open_files(filenames) + return + def force_download_track_from_soundcloud(message, use_folders=False, custom_path=""): """ @@ -245,12 +247,12 @@ def force_download_track_from_soundcloud(message, use_folders=False, custom_path item_id = message.rsplit("/", 1)[-1].split(".json")[0].split("?client_id")[0] url = get_soundcloud_track_url(item_id) - request = requests.get(url) - if request.status_code != 200: + response = requests.get(url) + if response.status_code != 200: puts(colored.red("Problem downloading: ") + colored.white(url)) return None - title_tag = request.text.split("<title>")[1].split("</title")[0] + title_tag = response.text.split("<title>")[1].split("</title")[0] track_data = { "url": url, @@ -513,6 +515,8 @@ def process_bandcamp(vargs): if vargs["open"]: open_files(filenames) + return + # Largely borrowed from bandcampscrape # ( reference: https://github.com/ronier/bandcampscrape) @@ -610,9 +614,9 @@ def get_bandcamp_metadata(url): or a JSON if we can already parse album/track info from the given url. The JSON is "sloppy". The native python JSON parser often can't deal, so we use the more tolerant demjson instead. """ - request = requests.get(url) + response = requests.get(url) try: - sloppy_json = request.text.split("var TralbumData = ") + sloppy_json = response.text.split("var TralbumData = ") sloppy_json = sloppy_json[1].replace('" + "', "") sloppy_json = sloppy_json.replace("'", "'") sloppy_json = sloppy_json.split("};")[0] + "};" @@ -622,7 +626,7 @@ def get_bandcamp_metadata(url): # so we generate a list of albums/tracks and return it immediately except: regex_all_albums = r'<a href="(/(?:album|track)/[^>]+)">' - all_albums = re.findall(regex_all_albums, request.text, re.MULTILINE) + all_albums = re.findall(regex_all_albums, response.text, re.MULTILINE) album_url_list = list() for album in all_albums: album_url = re.sub(r"music/?$", "", url) + album @@ -631,7 +635,7 @@ def get_bandcamp_metadata(url): # if the JSON parser was successful, use a regex to get all tags # from this album/track, join them and set it as the "genre" regex_tags = r'<a class="tag" href[^>]+>([^<]+)</a>' - tags = re.findall(regex_tags, request.text, re.MULTILINE) + tags = re.findall(regex_tags, response.text, re.MULTILINE) # make sure we treat integers correctly with join() # according to http://stackoverflow.com/a/7323861 # (very unlikely, but better safe than sorry!) @@ -641,12 +645,12 @@ def get_bandcamp_metadata(url): # case the album name remains set as None. output["album_name"] = None regex_album_name = r'album_title\s*:\s*"([^"]+)"\s*,' - match = re.search(regex_album_name, request.text, re.MULTILINE) + match = re.search(regex_album_name, response.text, re.MULTILINE) if match: output["album_name"] = match.group(1) try: - artUrl = request.text.split('"tralbumArt">')[1].split('">')[0].split('href="')[1] + artUrl = response.text.split('"tralbumArt">')[1].split('">')[0].split('href="')[1] output["artFullsizeUrl"] = artUrl except: puts_safe(colored.red("Couldn't get full artwork") + "") @@ -667,16 +671,17 @@ def process_mixcloud(vargs): Args: vargs (dict): """ - artist_url = vargs["artist_url"] + url = vargs["artist_url"].lower() + num_tracks = vargs["num_tracks"] + use_folders = vargs["folders"] + custom_path = vargs["path"] - if "mixcloud.com" in artist_url: - mc_url = artist_url + if "mixcloud.com" in url: + mc_url = url else: - mc_url = f"https://mixcloud.com/{artist_url}" + mc_url = f"https://mixcloud.com/{url}" - filenames = scrape_mixcloud_url( - mc_url, num_tracks=vargs["num_tracks"], use_folders=vargs["folders"], custom_path=vargs["path"], - ) + filenames = scrape_mixcloud_url(mc_url, num_tracks, use_folders, custom_path) if vargs["open"]: open_files(filenames) @@ -741,8 +746,8 @@ def get_mixcloud_data(url): """ data = {} - request = requests.get(url) - preview_mp3_url = request.text.split('m-preview="')[1].split('" m-preview-light')[0] + response = requests.get(url) + preview_mp3_url = response.text.split('m-preview="')[1].split('" m-preview-light')[0] song_uuid = preview_mp3_url.split("previews/")[1].split(".mp3")[0] # Fish for the m4a. @@ -757,11 +762,11 @@ def get_mixcloud_data(url): except: continue - full_title = request.text.split("<title>")[1].split(" | Mixcloud")[0] + full_title = response.text.split("<title>")[1].split(" | Mixcloud")[0] title = full_title.split(" by ")[0].strip() artist = full_title.split(" by ")[1].strip() - img_thumbnail_url = request.text.split('m-thumbnail-url="')[1].split(" ng-class")[0] + img_thumbnail_url = response.text.split('m-thumbnail-url="')[1].split(" ng-class")[0] artwork_url = img_thumbnail_url.replace("60/", "300/").replace("60/", "300/").replace("//", "https://").replace('"', "") data["mp3_url"] = mp3_url @@ -799,6 +804,8 @@ def process_audiomack(vargs): if vargs["open"]: open_files(filenames) + return + def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, use_folders=False, custom_path=""): """ @@ -856,12 +863,12 @@ def get_audiomack_data(url): """ data = {} - request = requests.get(url) + response = requests.get(url) - mp3_url = request.text.split('class="player-icon download-song" title="Download" href="')[1].split('"')[0] - artist = request.text.split('<span class="artist">')[1].split("</span>")[0].strip() - title = request.text.split('<span class="artist">')[1].split("</span>")[1].split("</h1>")[0].strip() - artwork_url = request.text.split('<a class="lightbox-trigger" href="')[1].split('" data')[0].strip() + mp3_url = response.text.split('class="player-icon download-song" title="Download" href="')[1].split('"')[0] + artist = response.text.split('<span class="artist">')[1].split("</span>")[0].strip() + title = response.text.split('<span class="artist">')[1].split("</span>")[1].split("</h1>")[0].strip() + artwork_url = response.text.split('<a class="lightbox-trigger" href="')[1].split('" data')[0].strip() data["mp3_url"] = mp3_url data["title"] = title @@ -911,6 +918,8 @@ def process_musicbed(vargs): if vargs["open"]: open_files(filenames) + return + def scrape_musicbed_url(url, login, password, num_tracks=sys.maxsize, use_folders=False, custom_path=""): """ From 75700a54c7a8f66ced7be858748cd611665936f4 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 15:57:15 -0700 Subject: [PATCH 52/69] MixCloud has completely broken the scraping methodology here. After some exploring, the urls for tracks are obfuscated and likely returned from some Javascript. It's unclear where the track information is coming from. MixCloud's API also prevents working with streams. Instead of leaving in broken options, opting to remove this until additional work can be done to make this work. --- soundscrape/soundscrape.py | 124 ------------------------------------- tests/test.py | 25 -------- 2 files changed, 149 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 796fa29..d025c0d 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -60,9 +60,6 @@ def main(): parser.add_argument( "-b", "--bandcamp", action="store_true", help="Use if downloading from Bandcamp rather than SoundCloud", ) - parser.add_argument( - "-m", "--mixcloud", action="store_true", help="Use if downloading from Mixcloud rather than SoundCloud", - ) parser.add_argument( "-a", "--audiomack", action="store_true", help="Use if downloading from Audiomack rather than SoundCloud", ) @@ -113,8 +110,6 @@ def main(): if "bandcamp.com" in vargs["artist_url"] or vargs["bandcamp"]: process_bandcamp(vargs) - elif "mixcloud.com" in vargs["artist_url"] or vargs["mixcloud"]: - process_mixcloud(vargs) elif "audiomack.com" in vargs["artist_url"] or vargs["audiomack"]: process_audiomack(vargs) elif "musicbed.com" in vargs["artist_url"]: @@ -659,125 +654,6 @@ def get_bandcamp_metadata(url): return output -#################################################################### -# Mixcloud -#################################################################### - - -def process_mixcloud(vargs): - """ - Process MixCloud download. - - Args: - vargs (dict): - """ - url = vargs["artist_url"].lower() - num_tracks = vargs["num_tracks"] - use_folders = vargs["folders"] - custom_path = vargs["path"] - - if "mixcloud.com" in url: - mc_url = url - else: - mc_url = f"https://mixcloud.com/{url}" - - filenames = scrape_mixcloud_url(mc_url, num_tracks, use_folders, custom_path) - - if vargs["open"]: - open_files(filenames) - - return - - -def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, use_folders=False, custom_path=""): - """ - Pull out artist and track info from a MixCloud URL. - - Returns: - list: filenames to open - - """ - filenames = [] - - try: - data = get_mixcloud_data(mc_url) - except: - puts_safe(colored.red("Problem downloading ") + mc_url) - return filenames - - track_artist = sanitize(data["artist"]) - track_title = sanitize(data["title"]) - track_filename = f"{track_artist} - {track_title}{data['mp3_url'][-4:]}" - - if use_folders: - track_artist_path = join(custom_path, track_artist) - if not exists(track_artist_path): - mkdir(track_artist_path) - track_filename = join(track_artist_path, track_filename) - else: - track_filename = join(custom_path, track_filename) - - if exists(track_filename): - puts_safe(colored.yellow("Skipping: ") + colored.white(data["title"]) + colored.yellow("- already exists!")) - return filenames - - puts_safe(colored.green("Downloading: ") + colored.white(f"{data['artist']} - {data['title']} ({track_filename[-4:]})")) - download_file(data["mp3_url"], track_filename) - if track_filename[-4:] == ".mp3": - tag_file( - track_filename, - artist=data["artist"], - title=data["title"], - year=data["year"], - genre="Mix", - artwork_url=data["artwork_url"], - ) - filenames.append(track_filename) - - return filenames - - -def get_mixcloud_data(url): - """ - Scrapes a Mixcloud page for a track's important information. - - Returns: - dict: containing audio data - - """ - data = {} - response = requests.get(url) - preview_mp3_url = response.text.split('m-preview="')[1].split('" m-preview-light')[0] - song_uuid = preview_mp3_url.split("previews/")[1].split(".mp3")[0] - - # Fish for the m4a. - for server in range(1, 23): - # Ex: https://stream6.mixcloud.com/c/m4a/64/1/2/0/9/30fe-23aa-40da-9bf3-4bee2fba649d.m4a - mp3_url = f"https://stream{server}.mixcloud.com/c/m4a/64/{song_uuid}.m4a" - try: - if requests.head(mp3_url).status_code == 200: - if "?" in mp3_url: - mp3_url = mp3_url.split("?")[0] - break - except: - continue - - full_title = response.text.split("<title>")[1].split(" | Mixcloud")[0] - title = full_title.split(" by ")[0].strip() - artist = full_title.split(" by ")[1].strip() - - img_thumbnail_url = response.text.split('m-thumbnail-url="')[1].split(" ng-class")[0] - artwork_url = img_thumbnail_url.replace("60/", "300/").replace("60/", "300/").replace("//", "https://").replace('"', "") - - data["mp3_url"] = mp3_url - data["title"] = title - data["artist"] = artist - data["artwork_url"] = artwork_url - data["year"] = None - - return data - - #################################################################### # Audiomack #################################################################### diff --git a/tests/test.py b/tests/test.py index 7766b32..062f2da 100644 --- a/tests/test.py +++ b/tests/test.py @@ -103,28 +103,3 @@ def test_bandcamp_slashes(): new_mp3_count = len(glob.glob1("", "*.mp3")) assert new_mp3_count > mp3_count cleanup_files() - - -def test_mixcloud(): - """Basic MixCloud test.""" - cleanup_files() - # shortest mix I could find that was still semi tolerable - # mp3_count = len(glob.glob1('', "*.mp3")) - # m4a_count = len(glob.glob1('', "*.m4a")) - # vargs = { - # "path": "", - # "folders": False, - # "group": False, - # "track": "", - # "num_tracks": 9223372036854775807, - # "bandcamp": False, - # "downloadable": False, - # "likes": False, - # "open": False, - # "artist_url": "https://www.mixcloud.com/Bobby_T_FS15/coffee-cigarettes-saturday-morning-hip-hop-fix/", - # } - # process_mixcloud(vargs) - # new_mp3_count = len(glob.glob1('', "*.mp3")) - # new_m4a_count = len(glob.glob1('', "*.m4a")) - # self.assertTrue((new_mp3_count > mp3_count) or (new_m4a_count > m4a_count)) - cleanup_files() From 13093f80a41c1156d51e5ba1f8abeb67be175ab3 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 16:33:41 -0700 Subject: [PATCH 53/69] Revert "MixCloud has completely broken the scraping methodology here. After some exploring, the urls for tracks are obfuscated and likely returned from some Javascript. It's unclear where the track information is coming from. MixCloud's API also prevents working with streams. Instead of leaving in broken options, opting to remove this until additional work can be done to make this work." This reverts commit 75700a54c7a8f66ced7be858748cd611665936f4. --- soundscrape/soundscrape.py | 124 +++++++++++++++++++++++++++++++++++++ tests/test.py | 25 ++++++++ 2 files changed, 149 insertions(+) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index d025c0d..796fa29 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -60,6 +60,9 @@ def main(): parser.add_argument( "-b", "--bandcamp", action="store_true", help="Use if downloading from Bandcamp rather than SoundCloud", ) + parser.add_argument( + "-m", "--mixcloud", action="store_true", help="Use if downloading from Mixcloud rather than SoundCloud", + ) parser.add_argument( "-a", "--audiomack", action="store_true", help="Use if downloading from Audiomack rather than SoundCloud", ) @@ -110,6 +113,8 @@ def main(): if "bandcamp.com" in vargs["artist_url"] or vargs["bandcamp"]: process_bandcamp(vargs) + elif "mixcloud.com" in vargs["artist_url"] or vargs["mixcloud"]: + process_mixcloud(vargs) elif "audiomack.com" in vargs["artist_url"] or vargs["audiomack"]: process_audiomack(vargs) elif "musicbed.com" in vargs["artist_url"]: @@ -654,6 +659,125 @@ def get_bandcamp_metadata(url): return output +#################################################################### +# Mixcloud +#################################################################### + + +def process_mixcloud(vargs): + """ + Process MixCloud download. + + Args: + vargs (dict): + """ + url = vargs["artist_url"].lower() + num_tracks = vargs["num_tracks"] + use_folders = vargs["folders"] + custom_path = vargs["path"] + + if "mixcloud.com" in url: + mc_url = url + else: + mc_url = f"https://mixcloud.com/{url}" + + filenames = scrape_mixcloud_url(mc_url, num_tracks, use_folders, custom_path) + + if vargs["open"]: + open_files(filenames) + + return + + +def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, use_folders=False, custom_path=""): + """ + Pull out artist and track info from a MixCloud URL. + + Returns: + list: filenames to open + + """ + filenames = [] + + try: + data = get_mixcloud_data(mc_url) + except: + puts_safe(colored.red("Problem downloading ") + mc_url) + return filenames + + track_artist = sanitize(data["artist"]) + track_title = sanitize(data["title"]) + track_filename = f"{track_artist} - {track_title}{data['mp3_url'][-4:]}" + + if use_folders: + track_artist_path = join(custom_path, track_artist) + if not exists(track_artist_path): + mkdir(track_artist_path) + track_filename = join(track_artist_path, track_filename) + else: + track_filename = join(custom_path, track_filename) + + if exists(track_filename): + puts_safe(colored.yellow("Skipping: ") + colored.white(data["title"]) + colored.yellow("- already exists!")) + return filenames + + puts_safe(colored.green("Downloading: ") + colored.white(f"{data['artist']} - {data['title']} ({track_filename[-4:]})")) + download_file(data["mp3_url"], track_filename) + if track_filename[-4:] == ".mp3": + tag_file( + track_filename, + artist=data["artist"], + title=data["title"], + year=data["year"], + genre="Mix", + artwork_url=data["artwork_url"], + ) + filenames.append(track_filename) + + return filenames + + +def get_mixcloud_data(url): + """ + Scrapes a Mixcloud page for a track's important information. + + Returns: + dict: containing audio data + + """ + data = {} + response = requests.get(url) + preview_mp3_url = response.text.split('m-preview="')[1].split('" m-preview-light')[0] + song_uuid = preview_mp3_url.split("previews/")[1].split(".mp3")[0] + + # Fish for the m4a. + for server in range(1, 23): + # Ex: https://stream6.mixcloud.com/c/m4a/64/1/2/0/9/30fe-23aa-40da-9bf3-4bee2fba649d.m4a + mp3_url = f"https://stream{server}.mixcloud.com/c/m4a/64/{song_uuid}.m4a" + try: + if requests.head(mp3_url).status_code == 200: + if "?" in mp3_url: + mp3_url = mp3_url.split("?")[0] + break + except: + continue + + full_title = response.text.split("<title>")[1].split(" | Mixcloud")[0] + title = full_title.split(" by ")[0].strip() + artist = full_title.split(" by ")[1].strip() + + img_thumbnail_url = response.text.split('m-thumbnail-url="')[1].split(" ng-class")[0] + artwork_url = img_thumbnail_url.replace("60/", "300/").replace("60/", "300/").replace("//", "https://").replace('"', "") + + data["mp3_url"] = mp3_url + data["title"] = title + data["artist"] = artist + data["artwork_url"] = artwork_url + data["year"] = None + + return data + + #################################################################### # Audiomack #################################################################### diff --git a/tests/test.py b/tests/test.py index 062f2da..7766b32 100644 --- a/tests/test.py +++ b/tests/test.py @@ -103,3 +103,28 @@ def test_bandcamp_slashes(): new_mp3_count = len(glob.glob1("", "*.mp3")) assert new_mp3_count > mp3_count cleanup_files() + + +def test_mixcloud(): + """Basic MixCloud test.""" + cleanup_files() + # shortest mix I could find that was still semi tolerable + # mp3_count = len(glob.glob1('', "*.mp3")) + # m4a_count = len(glob.glob1('', "*.m4a")) + # vargs = { + # "path": "", + # "folders": False, + # "group": False, + # "track": "", + # "num_tracks": 9223372036854775807, + # "bandcamp": False, + # "downloadable": False, + # "likes": False, + # "open": False, + # "artist_url": "https://www.mixcloud.com/Bobby_T_FS15/coffee-cigarettes-saturday-morning-hip-hop-fix/", + # } + # process_mixcloud(vargs) + # new_mp3_count = len(glob.glob1('', "*.mp3")) + # new_m4a_count = len(glob.glob1('', "*.m4a")) + # self.assertTrue((new_mp3_count > mp3_count) or (new_m4a_count > m4a_count)) + cleanup_files() From b5c40e5b2fcef946fb291970c6d2460b5edb7263 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 16:37:54 -0700 Subject: [PATCH 54/69] Revert "Revert "MixCloud has completely broken the scraping methodology here. After some exploring, the urls for tracks are obfuscated and likely returned from some Javascript. It's unclear where the track information is coming from. MixCloud's API also prevents working with streams. Instead of leaving in broken options, opting to remove this until additional work can be done to make this work."" This reverts commit 13093f80a41c1156d51e5ba1f8abeb67be175ab3. --- soundscrape/soundscrape.py | 124 ------------------------------------- tests/test.py | 25 -------- 2 files changed, 149 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 796fa29..d025c0d 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -60,9 +60,6 @@ def main(): parser.add_argument( "-b", "--bandcamp", action="store_true", help="Use if downloading from Bandcamp rather than SoundCloud", ) - parser.add_argument( - "-m", "--mixcloud", action="store_true", help="Use if downloading from Mixcloud rather than SoundCloud", - ) parser.add_argument( "-a", "--audiomack", action="store_true", help="Use if downloading from Audiomack rather than SoundCloud", ) @@ -113,8 +110,6 @@ def main(): if "bandcamp.com" in vargs["artist_url"] or vargs["bandcamp"]: process_bandcamp(vargs) - elif "mixcloud.com" in vargs["artist_url"] or vargs["mixcloud"]: - process_mixcloud(vargs) elif "audiomack.com" in vargs["artist_url"] or vargs["audiomack"]: process_audiomack(vargs) elif "musicbed.com" in vargs["artist_url"]: @@ -659,125 +654,6 @@ def get_bandcamp_metadata(url): return output -#################################################################### -# Mixcloud -#################################################################### - - -def process_mixcloud(vargs): - """ - Process MixCloud download. - - Args: - vargs (dict): - """ - url = vargs["artist_url"].lower() - num_tracks = vargs["num_tracks"] - use_folders = vargs["folders"] - custom_path = vargs["path"] - - if "mixcloud.com" in url: - mc_url = url - else: - mc_url = f"https://mixcloud.com/{url}" - - filenames = scrape_mixcloud_url(mc_url, num_tracks, use_folders, custom_path) - - if vargs["open"]: - open_files(filenames) - - return - - -def scrape_mixcloud_url(mc_url, num_tracks=sys.maxsize, use_folders=False, custom_path=""): - """ - Pull out artist and track info from a MixCloud URL. - - Returns: - list: filenames to open - - """ - filenames = [] - - try: - data = get_mixcloud_data(mc_url) - except: - puts_safe(colored.red("Problem downloading ") + mc_url) - return filenames - - track_artist = sanitize(data["artist"]) - track_title = sanitize(data["title"]) - track_filename = f"{track_artist} - {track_title}{data['mp3_url'][-4:]}" - - if use_folders: - track_artist_path = join(custom_path, track_artist) - if not exists(track_artist_path): - mkdir(track_artist_path) - track_filename = join(track_artist_path, track_filename) - else: - track_filename = join(custom_path, track_filename) - - if exists(track_filename): - puts_safe(colored.yellow("Skipping: ") + colored.white(data["title"]) + colored.yellow("- already exists!")) - return filenames - - puts_safe(colored.green("Downloading: ") + colored.white(f"{data['artist']} - {data['title']} ({track_filename[-4:]})")) - download_file(data["mp3_url"], track_filename) - if track_filename[-4:] == ".mp3": - tag_file( - track_filename, - artist=data["artist"], - title=data["title"], - year=data["year"], - genre="Mix", - artwork_url=data["artwork_url"], - ) - filenames.append(track_filename) - - return filenames - - -def get_mixcloud_data(url): - """ - Scrapes a Mixcloud page for a track's important information. - - Returns: - dict: containing audio data - - """ - data = {} - response = requests.get(url) - preview_mp3_url = response.text.split('m-preview="')[1].split('" m-preview-light')[0] - song_uuid = preview_mp3_url.split("previews/")[1].split(".mp3")[0] - - # Fish for the m4a. - for server in range(1, 23): - # Ex: https://stream6.mixcloud.com/c/m4a/64/1/2/0/9/30fe-23aa-40da-9bf3-4bee2fba649d.m4a - mp3_url = f"https://stream{server}.mixcloud.com/c/m4a/64/{song_uuid}.m4a" - try: - if requests.head(mp3_url).status_code == 200: - if "?" in mp3_url: - mp3_url = mp3_url.split("?")[0] - break - except: - continue - - full_title = response.text.split("<title>")[1].split(" | Mixcloud")[0] - title = full_title.split(" by ")[0].strip() - artist = full_title.split(" by ")[1].strip() - - img_thumbnail_url = response.text.split('m-thumbnail-url="')[1].split(" ng-class")[0] - artwork_url = img_thumbnail_url.replace("60/", "300/").replace("60/", "300/").replace("//", "https://").replace('"', "") - - data["mp3_url"] = mp3_url - data["title"] = title - data["artist"] = artist - data["artwork_url"] = artwork_url - data["year"] = None - - return data - - #################################################################### # Audiomack #################################################################### diff --git a/tests/test.py b/tests/test.py index 7766b32..062f2da 100644 --- a/tests/test.py +++ b/tests/test.py @@ -103,28 +103,3 @@ def test_bandcamp_slashes(): new_mp3_count = len(glob.glob1("", "*.mp3")) assert new_mp3_count > mp3_count cleanup_files() - - -def test_mixcloud(): - """Basic MixCloud test.""" - cleanup_files() - # shortest mix I could find that was still semi tolerable - # mp3_count = len(glob.glob1('', "*.mp3")) - # m4a_count = len(glob.glob1('', "*.m4a")) - # vargs = { - # "path": "", - # "folders": False, - # "group": False, - # "track": "", - # "num_tracks": 9223372036854775807, - # "bandcamp": False, - # "downloadable": False, - # "likes": False, - # "open": False, - # "artist_url": "https://www.mixcloud.com/Bobby_T_FS15/coffee-cigarettes-saturday-morning-hip-hop-fix/", - # } - # process_mixcloud(vargs) - # new_mp3_count = len(glob.glob1('', "*.mp3")) - # new_m4a_count = len(glob.glob1('', "*.m4a")) - # self.assertTrue((new_mp3_count > mp3_count) or (new_m4a_count > m4a_count)) - cleanup_files() From 1d5f3a3f997b01dd9a340d7df43cc1f313039db7 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 16:39:06 -0700 Subject: [PATCH 55/69] Fixing README. --- README.md | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/README.md b/README.md index 9ea98f2..ed1f780 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ SoundScrape [![Build Status](https://github.com/SimplicityGuy/SoundScrape/workflows/SoundScrape%20CI/badge.svg)](https://github.com/SimplicityGuy/SoundScrape/actions) [![Python 3](https://img.shields.io/badge/Python-3-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![PyPI](https://img.shields.io/pypi/v/soundscrape.svg)](https://pypi.python.org/pypi/SoundScrape) ============== -**SoundScrape** makes it super easy to download artists from SoundCloud (and Bandcamp and MixCloud) - even those which don't have download links! It automatically creates ID3 tags as well (including album art), which is handy. +**SoundScrape** makes it super easy to download artists from SoundCloud (and others) - even those which don't have download links! It automatically creates ID3 tags as well (including album art), which is handy. Usage --------- @@ -134,19 +134,6 @@ soundscrape -b http://music.monstercat.com/ Note that the full URL must be included. -Mixcloud --------- - -SoundScrape can also grab mixes from Mixcloud. This feature is extremely expermental and is in no way guaranteed to work! - -Finds the original mp3 of a mix and grabs that (with tags and album art) if it can, or else just gets the raw m4a stream. - -Mixcloud currently only takes an invidiual mix. Capacity for a whole artist's profile due shortly. - -```bash -soundscrape https://www.mixcloud.com/corenewsuploads/flume-essential-mix-2015-10-03/ -of -``` - Audiomack -------- From 0c99efb3a31917e712317f1a3826b2f4c55e5402 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 17:35:37 -0700 Subject: [PATCH 56/69] Cleaning up README. --- README.md | 9 --------- 1 file changed, 9 deletions(-) diff --git a/README.md b/README.md index ed1f780..30688c2 100644 --- a/README.md +++ b/README.md @@ -91,15 +91,6 @@ By default, SoundScrape will try to rip everything it can. However, if you only soundscrape sly-dogg -d ``` -Keep Preview Tracks --------- - -By default, SoundScrape will skip the 30-second preview tracks that SoundCloud now provides. You can choose to keep these preview snippets with the *-k* argument. - -```bash -soundscrape chromeo -k -``` - Folders -------- From 6bf8aad31a329b15ee4d90dfed07e0b906a1147e Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 17:36:10 -0700 Subject: [PATCH 57/69] Ignore htmlcov folder. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0226924..07cec54 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ build/ dist/ env/ +htmlcov/ .coverage *.DS_Store *.pyc From 3ab2c39a2e403d88a775d86159a6091f2d33a10a Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 17:36:53 -0700 Subject: [PATCH 58/69] Minor cleanup including adding specifying the number of tracks to download in SoundCloud feeds path. --- soundscrape/soundscrape.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index d025c0d..cd8f616 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -211,7 +211,7 @@ def process_soundcloud(vargs): # is in the form of: # http://feeds.soundcloud.com/users/soundcloud:users:<artist_id>/sounds.rss - filenames = download_tracks_from_soundcloud_feed(resolved.id, artist, use_folders, custom_path) + filenames = download_tracks_from_soundcloud_feed(resolved.id, artist, num_tracks, use_folders, custom_path) else: filenames = download_tracks_from_soundcloud( client, tracks, artist, album, num_tracks, downloadable_links_only, use_folders, custom_path, @@ -257,13 +257,14 @@ def force_download_track_from_soundcloud(message, use_folders=False, custom_path return download_single_track_from_soundcloud(track_data, use_folders, custom_path) -def download_tracks_from_soundcloud_feed(track_id, artist, use_folders=False, custom_path=""): +def download_tracks_from_soundcloud_feed(track_id, artist, num_tracks=sys.maxsize, use_folders=False, custom_path=""): """ Use the artist's RSS feed from SoundCloud to get tracks. Args: track_id (int): artist (str): + num_tracks (int): use_folders (bool): custom_path (str): @@ -279,8 +280,8 @@ def download_tracks_from_soundcloud_feed(track_id, artist, use_folders=False, cu return None feed = atoma.parse_rss_bytes(response.content) - for feed_item in feed.items: - if len(feed_item.enclosures) == 0: + for i, feed_item in enumerate(feed.items): + if i > num_tracks - 1: continue for enclosure in feed_item.enclosures: @@ -648,7 +649,7 @@ def get_bandcamp_metadata(url): artUrl = response.text.split('"tralbumArt">')[1].split('">')[0].split('href="')[1] output["artFullsizeUrl"] = artUrl except: - puts_safe(colored.red("Couldn't get full artwork") + "") + puts_safe(colored.red("Couldn't get full artwork.")) output["artFullsizeUrl"] = None return output @@ -722,7 +723,6 @@ def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, use_folders=False, cust artist=data["artist"], title=data["title"], year=data["year"], - genre=None, artwork_url=data["artwork_url"], ) filenames.append(track_filename) @@ -738,7 +738,6 @@ def get_audiomack_data(url): dict: containing audio data """ - data = {} response = requests.get(url) mp3_url = response.text.split('class="player-icon download-song" title="Download" href="')[1].split('"')[0] @@ -746,11 +745,12 @@ def get_audiomack_data(url): title = response.text.split('<span class="artist">')[1].split("</span>")[1].split("</h1>")[0].strip() artwork_url = response.text.split('<a class="lightbox-trigger" href="')[1].split('" data')[0].strip() - data["mp3_url"] = mp3_url - data["title"] = title - data["artist"] = artist - data["artwork_url"] = artwork_url - data["year"] = None + data = { + "mp3_url": mp3_url, + "title": title, + "artist": artist, + "artwork_url": artwork_url, + } return data @@ -1026,7 +1026,6 @@ def tag_file( if "-large" in artwork_url: urls.insert(0, artwork_url.replace("-large", "-t500x500")) - image_data = b"0" for a_url in urls: response = requests.get(a_url) if response.status_code != 200: @@ -1034,12 +1033,12 @@ def tag_file( if ".png" in a_url: mime = "image/png" - image_data = response.content + + # encoding=3 means utf-8 + # type=3 means cover image + audio.tags.add(APIC(encoding=3, mime=mime, type=3, desc="Cover", data=response.content)) break - # encoding=3 means utf-8 - # type=3 means cover image - audio.tags.add(APIC(encoding=3, mime=mime, type=3, desc="Cover", data=image_data)) if url is not None: # Some software doesn't seem to use WOAR so the url is saved again as WXXX. audio.tags.add(WXXX(encoding=3, url=url)) From 5a2279055a56dd67ebd8b04a2ed66782c8c9d2a8 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 17:37:25 -0700 Subject: [PATCH 59/69] Adding test for SoundCloud feeds path. --- tests/test.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test.py b/tests/test.py index 062f2da..d3d2199 100644 --- a/tests/test.py +++ b/tests/test.py @@ -61,6 +61,30 @@ def test_soundcloud_hard(): cleanup_files() +def test_soundcloud_feed_download(): + """Basic SoundCloud feed test.""" + cleanup_files() + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 1, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://soundcloud.com/johnocallaghan", + "keep": False, + } + process_soundcloud(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count > mp3_count + assert new_mp3_count == 1 + cleanup_files() + + def test_bandcamp(): """Basic BandCamp test.""" cleanup_files() From 47eca8911feccb9e7b0cd8c0c014dbabfa6f0bec Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 17:38:22 -0700 Subject: [PATCH 60/69] Reformatting. --- soundscrape/soundscrape.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index cd8f616..7a96118 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -719,11 +719,7 @@ def scrape_audiomack_url(mc_url, num_tracks=sys.maxsize, use_folders=False, cust puts_safe(colored.green("Downloading: ") + colored.white(f"{data['artist']} - {data['title']}")) download_file(data["mp3_url"], track_filename) tag_file( - track_filename, - artist=data["artist"], - title=data["title"], - year=data["year"], - artwork_url=data["artwork_url"], + track_filename, artist=data["artist"], title=data["title"], year=data["year"], artwork_url=data["artwork_url"], ) filenames.append(track_filename) @@ -746,10 +742,10 @@ def get_audiomack_data(url): artwork_url = response.text.split('<a class="lightbox-trigger" href="')[1].split('" data')[0].strip() data = { - "mp3_url": mp3_url, - "title": title, - "artist": artist, - "artwork_url": artwork_url, + "mp3_url": mp3_url, + "title": title, + "artist": artist, + "artwork_url": artwork_url, } return data From 21f35f61be9e0b5dcc9abba951b0c3685355b21d Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 17:48:23 -0700 Subject: [PATCH 61/69] Bumping version to 1.00 as there are a number of incompatible changes since the last release. --- soundscrape/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/soundscrape/__init__.py b/soundscrape/__init__.py index b97dc18..18622a7 100644 --- a/soundscrape/__init__.py +++ b/soundscrape/__init__.py @@ -1,2 +1,5 @@ """SoundScrape initialization.""" -__version__ = "0.31" + +# Semantic versioning +# (reference: https://packaging.python.org/guides/distributing-packages-using-setuptools/#semantic-versioning-preferred) +__version__ = "1.00" From 93a2d8ef508c7bb060bf2198e0a91c67cbdd8300 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 20:22:46 -0700 Subject: [PATCH 62/69] Adding Mac and Windows builders. --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f444fbc..5662a63 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,7 +4,7 @@ on: [push, pull_request] jobs: build: - runs-on: ubuntu-latest + runs-on: ["ubuntu-latest", "macos-latest", "windows-latest"] strategy: matrix: python-version: [3.7, 3.8] From a615d35468d4035ec4714aee4a01d5890602e864 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 20:31:29 -0700 Subject: [PATCH 63/69] Adding Mac and Windows builders. --- .github/workflows/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5662a63..5f702b2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,9 +4,10 @@ on: [push, pull_request] jobs: build: - runs-on: ["ubuntu-latest", "macos-latest", "windows-latest"] + runs-on: ${{ matrix.os }} strategy: matrix: + os: ["ubuntu-latest", "macos-latest", "windows-latest"] python-version: [3.7, 3.8] steps: - uses: actions/checkout@v2 From 9bdb23763018b1eb9ff1347f945ed5890c197c4f Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 20:35:43 -0700 Subject: [PATCH 64/69] Removing Windows for now since bash is not supported there. :/ --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5f702b2..9290471 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -7,7 +7,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: ["ubuntu-latest", "macos-latest", "windows-latest"] + os: ["ubuntu-latest", "macos-latest"] python-version: [3.7, 3.8] steps: - uses: actions/checkout@v2 @@ -19,7 +19,7 @@ jobs: run: | python -m pip install --upgrade pip pip install flake8 black pytest coverage - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install -r requirements.txt - name: Black code formatting run: | black --check --line-length 127 . From 833ca015f1cced7c34b16a4ca3e0fcdeec9e6c6a Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 21:09:05 -0700 Subject: [PATCH 65/69] Bug fixes for issues found adding new tests. --- soundscrape/soundscrape.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py index 7a96118..f5b5292 100755 --- a/soundscrape/soundscrape.py +++ b/soundscrape/soundscrape.py @@ -178,6 +178,9 @@ def process_soundcloud(vargs): filename = force_download_track_from_soundcloud(str(e), use_folders, custom_path) if filename is not None: filenames.append(filename) + if vargs["open"]: + open_files(filenames) + return if not hasattr(resolved, "kind"): # This is either likes or sets. @@ -242,6 +245,8 @@ def force_download_track_from_soundcloud(message, use_folders=False, custom_path item_id = message.rsplit("/", 1)[-1].split(".json")[0].split("?client_id")[0] url = get_soundcloud_track_url(item_id) + if url is None: + return None response = requests.get(url) if response.status_code != 200: puts(colored.red("Problem downloading: ") + colored.white(url)) From d9c06b1d01454f3ff0ea7320a625b2328e548c39 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 21:09:21 -0700 Subject: [PATCH 66/69] Adding 2 more test cases. --- tests/test.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/tests/test.py b/tests/test.py index d3d2199..b97e0cd 100644 --- a/tests/test.py +++ b/tests/test.py @@ -13,8 +13,8 @@ def cleanup_files(): os.unlink(f) -def test_soundcloud(): - """Basic SoundCloud test.""" +def test_soundcloud_full_url(): + """Basic SoundCloud test with full url.""" cleanup_files() mp3_count = len(glob.glob1("", "*.mp3")) @@ -37,8 +37,8 @@ def test_soundcloud(): cleanup_files() -def test_soundcloud_hard(): - """Basic SoundCloud test.""" +def test_soundcloud_full_url_2(): + """Basic SoundCloud test with full url.""" cleanup_files() mp3_count = len(glob.glob1("", "*.mp3")) vargs = { @@ -61,6 +61,53 @@ def test_soundcloud_hard(): cleanup_files() +def test_soundcloud_artist_only(): + """Basic SoundCloud test with artist name only.""" + cleanup_files() + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 1, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "hd1080pmusic", + "keep": False, + } + process_soundcloud(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count > mp3_count + assert new_mp3_count == 1 + cleanup_files() + + +def test_soundcloud_track_not_found(): + """Basic SoundCloud test with a track that can't be found.""" + cleanup_files() + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "danny-brown-dip", + "num_tracks": 1, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "foolsgoldrecs", + "keep": False, + } + process_soundcloud(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count == 0 + cleanup_files() + + def test_soundcloud_feed_download(): """Basic SoundCloud feed test.""" cleanup_files() From 5787b1b9ff69db1913b80b5edd27549e50a7c058 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 21:28:45 -0700 Subject: [PATCH 67/69] Adding test for one more path for downloading a track. --- tests/test.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test.py b/tests/test.py index b97e0cd..bd9dc0f 100644 --- a/tests/test.py +++ b/tests/test.py @@ -85,6 +85,30 @@ def test_soundcloud_artist_only(): cleanup_files() +def test_soundcloud_tracks(): + """Basic SoundCloud test tracks.""" + cleanup_files() + mp3_count = len(glob.glob1("", "*.mp3")) + vargs = { + "path": "", + "folders": False, + "group": False, + "track": "", + "num_tracks": 1, + "bandcamp": False, + "downloadable": False, + "likes": False, + "open": False, + "artist_url": "https://soundcloud.com/alan-seslowsky/tracks", + "keep": False, + } + process_soundcloud(vargs) + new_mp3_count = len(glob.glob1("", "*.mp3")) + assert new_mp3_count > mp3_count + assert new_mp3_count == 1 + cleanup_files() + + def test_soundcloud_track_not_found(): """Basic SoundCloud test with a track that can't be found.""" cleanup_files() From 408cd96cc2cd8d350f40f7c32109a5aafa40fd66 Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 23 May 2020 21:39:51 -0700 Subject: [PATCH 68/69] Minor fix for unused var. --- tests/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test.py b/tests/test.py index bd9dc0f..2a93a76 100644 --- a/tests/test.py +++ b/tests/test.py @@ -112,7 +112,6 @@ def test_soundcloud_tracks(): def test_soundcloud_track_not_found(): """Basic SoundCloud test with a track that can't be found.""" cleanup_files() - mp3_count = len(glob.glob1("", "*.mp3")) vargs = { "path": "", "folders": False, From 618e11ffa76e9c6467cacb44a8310cb55553b23f Mon Sep 17 00:00:00 2001 From: Robert Wlodarczyk <robert@simplicityguy.com> Date: Sat, 1 Aug 2020 20:18:15 -0700 Subject: [PATCH 69/69] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 30688c2..acfe6e3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ![SoundScrape!](http://i.imgur.com/nHAt2ow.png) -SoundScrape [![Build Status](https://github.com/SimplicityGuy/SoundScrape/workflows/SoundScrape%20CI/badge.svg)](https://github.com/SimplicityGuy/SoundScrape/actions) [![Python 3](https://img.shields.io/badge/Python-3-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![PyPI](https://img.shields.io/pypi/v/soundscrape.svg)](https://pypi.python.org/pypi/SoundScrape) +SoundScrape [![Build Status](https://github.com/Miserlou/SoundScrape/workflows/SoundScrape%20CI/badge.svg)](https://github.com/Miserlou/SoundScrape/actions) [![Python 3](https://img.shields.io/badge/Python-3-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![PyPI](https://img.shields.io/pypi/v/soundscrape.svg)](https://pypi.python.org/pypi/SoundScrape) ============== **SoundScrape** makes it super easy to download artists from SoundCloud (and others) - even those which don't have download links! It automatically creates ID3 tags as well (including album art), which is handy.