From 2d1c7d47f3b4c217a334ba8bc9d5956c8c412413 Mon Sep 17 00:00:00 2001
From: Someon <someon@openmailbox.org>
Date: Mon, 10 Aug 2015 22:47:44 +0300
Subject: [PATCH 1/2] Escape forward slashes in downloaded image filenames

---
 dumpgenerator.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/dumpgenerator.py b/dumpgenerator.py
index d2933ea6..ba68a42a 100755
--- a/dumpgenerator.py
+++ b/dumpgenerator.py
@@ -68,10 +68,16 @@ def getVersion():
     return(__VERSION__)
 
 
-def truncateFilename(other={}, filename=''):
-    """ Truncate filenames when downloading images with large filenames """
-    return filename[:other['filenamelimit']] + \
-        md5(filename.encode('utf-8')).hexdigest() + '.' + filename.split('.')[-1]
+def sanitizeFilename(other={}, filename=''):
+    """ Strip forward slashes and truncate overlong filenames. """
+    """ Also insert a hash when the filename was modified to try to """
+    """ avoid name collisions. """
+    if '/' in filename or len(filename) > other['filenamelimit']:
+        filename = filename.replace('/', '%2F')
+        hash = md5(filename.encode('utf-8')).hexdigest()
+        extension = '.' + filename.split('.')[-1]
+        filename = filename[:other['filenamelimit']] + hash + extension
+    return filename
 
 
 def delay(config={}, session=None):
@@ -1060,13 +1066,10 @@ def generateImageDump(config={}, other={}, images=[], start='', session=None):
         delay(config=config, session=session)
 
         # saving file
-        # truncate filename if length > 100 (100 + 32 (md5) = 132 < 143 (crash
-        # limit). Later .desc is added to filename, so better 100 as max)
-        filename2 = urllib.unquote(filename)
-        if len(filename2) > other['filenamelimit']:
-            # split last . (extension) and then merge
-            filename2 = truncateFilename(other=other, filename=filename2)
-            print 'Filename is too long, truncating. Now it is:', filename2
+        # quote forward slashes and truncate filename if length > 100
+        # (100 + 32 (md5) = 132 < 143 (crash limit). Later .desc is
+        # added to filename, so better 100 as max)
+        filename2 = sanitizeFilename(other=other, filename=urllib.unquote(filename))
         filename3 = u'%s/%s' % (imagepath, filename2)
         imagefile = open(filename3, 'wb')
         r = requests.get(url=url)
@@ -1723,9 +1726,7 @@ def resumePreviousDump(config={}, other={}):
             lastfilename2 = lastfilename
             # return always the complete filename, not the truncated
             lastfilename = filename
-            filename2 = filename
-            if len(filename2) > other['filenamelimit']:
-                filename2 = truncateFilename(other=other, filename=filename2)
+            filename2 = sanitizeFilename(other=other, filename=filename)
             if filename2 not in listdir:
                 complete = False
                 break

From b7023a8bb0b587d95294387c2c9396d6187e004e Mon Sep 17 00:00:00 2001
From: Someon <someon@openmailbox.org>
Date: Tue, 11 Aug 2015 00:46:11 +0300
Subject: [PATCH 2/2] .decode('utf-8') raises an error about ASCII codec

---
 dumpgenerator.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/dumpgenerator.py b/dumpgenerator.py
index ba68a42a..6fbe4ed2 100755
--- a/dumpgenerator.py
+++ b/dumpgenerator.py
@@ -1714,8 +1714,12 @@ def resumePreviousDump(config={}, other={}):
         # checking images directory
         listdir = []
         try:
-            listdir = [n.decode('utf-8') for n in os.listdir('%s/images' % (config['path']))]
-        except:
+            files = os.listdir('%s/images' % (config['path']))
+            try:
+                listdir = [n.decode('utf-8') for n in files]
+            except:
+                listdir = files
+        except OSError:
             pass  # probably directory does not exist
         listdir.sort()
         complete = True