Skip to content

Commit 27fd49d

Browse files
committed
fixed issues with OpenGraph didnt return a real image. Fix tests
1 parent a4460aa commit 27fd49d

5 files changed

Lines changed: 50 additions & 54 deletions

File tree

imageresolver/__main__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,6 @@
4444
if options.load_images:
4545
kw_options['load_images'] = False
4646

47-
if options.get_info:
48-
kw_options['get_info'] = True
49-
5047
kw_options['use_adblock_filters'] = options.use_adblock_filters
5148
kw_options['debug'] = options.debug
5249

imageresolver/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def next(self, filters, url, callback):
120120
def resolve(self, url):
121121
logger.debug('Attempting to resolve ' + str(url))
122122
for f in self.filters:
123-
resp = f.resolve(url, debug=self.debug)
123+
resp = f.resolve(url)
124124

125125
# returns the first filter that gives us something
126126
if resp:

imageresolver/plugins/imgur.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,33 +3,33 @@
33
import logging
44

55
from future.standard_library import install_aliases
6+
67
install_aliases()
78
from urllib.parse import urlparse
89

910

1011
class Plugin(object):
11-
def get_image(self, url, soup):
12-
if re.search('http(s*):\/\/(i\.|m\.)*imgur.com\/(gallery\/){0,1}(.*)', url):
13-
logger = logging.getLogger('ImageResolver')
14-
logger.debug('Resolving using plugin {} {}'.format(os.path.basename(__file__), url))
15-
parsed = urlparse(url)
12+
def get_image(self, url, soup):
13+
if re.search('http(s*):\/\/(i\.|m\.)*imgur.com\/(gallery\/){0,1}(.*)', url):
14+
logger = logging.getLogger('ImageResolver')
15+
logger.debug('Resolving using plugin {} {}'.format(os.path.basename(__file__), url))
16+
parsed = urlparse(url)
1617

17-
if parsed.path[1:8] == 'gallery':
18-
logger.debug('Detected imgur gallery.')
19-
tag = soup.find('div', {'id': '1', 'class': 'album-image'})
20-
image = re.findall('i\.imgur.com\/.*\.\w+', str(tag))
21-
if len(image) >= 1:
22-
return 'https://' + image[0]
23-
24-
elif parsed.path[0:3] == '/a/':
25-
logger.debug('Detected imgur album.')
26-
tag = soup.find('meta', {'name': 'twitter:image0:src'})
27-
if tag:
28-
return tag['content']
18+
if parsed.path[1:8] == 'gallery':
19+
logger.debug('Detected imgur gallery.')
20+
tag = soup.find('div', {'id': '1', 'class': 'album-image'})
21+
image = re.findall('i\.imgur.com\/.*\.\w+', str(tag))
22+
if len(image) >= 1:
23+
return 'https://' + image[0]
2924

30-
else:
31-
parsed = urlparse(url)
32-
if re.search('imgur.com(:80)*', parsed.netloc) and os.path.basename(parsed.path):
33-
return 'https://i.imgur.com/' + os.path.basename(parsed.path) + '.jpg'
34-
return None
25+
elif parsed.path[0:3] == '/a/':
26+
logger.debug('Detected imgur album.')
27+
tag = soup.find('meta', {'name': 'twitter:image0:src'})
28+
if tag:
29+
return tag['content']
3530

31+
else:
32+
parsed = urlparse(url)
33+
if re.search('imgur.com(:80)*', parsed.netloc) and os.path.basename(parsed.path):
34+
return 'https://i.imgur.com/' + os.path.basename(parsed.path) + '.jpg'
35+
return None

imageresolver/plugins/opengraph.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os
33
import logging
44
from operator import itemgetter
5-
5+
from imageresolver import FileExtensionResolver
66

77
class Plugin(object):
88
def get_image(self, url, soup):
@@ -20,8 +20,10 @@ def get_image(self, url, soup):
2020
tags = soup.find_all('meta', {ogtag['attribute']: ogtag['name']})
2121
if tags:
2222
try:
23-
ogimages.extend([{'url': image[ogtag['value']], 'type': ogtag['type'], 'score': 0}
24-
for image in tags])
23+
for image in tags:
24+
url = FileExtensionResolver().resolve(image['url'])
25+
if url:
26+
ogimages.append({'url': url, 'type': ogtag['type'], 'score': 0})
2527
except KeyError as e:
2628
pass
2729

@@ -35,10 +37,15 @@ def get_image(self, url, soup):
3537
resolved_image = ogimages[0]['url']
3638
else:
3739
for image in ogimages:
38-
if re.search('(large|big)', image['url'], re.IGNORECASE):
39-
image['score'] += 1
40-
if image['type'] == 'twitter':
41-
image['score'] += 1
40+
# sometimes opengraph tags don't have an actual image?
41+
url = FileExtensionResolver().resolve(image['url'])
42+
if not url:
43+
image['score'] = -1
44+
else:
45+
if re.search('(large|big)', image['url'], re.IGNORECASE):
46+
image['score'] += 1
47+
if image['type'] == 'twitter':
48+
image['score'] += 1
4249

4350
ogimages.sort(key=itemgetter('score'), reverse=True)
4451
resolved_image = ogimages[0]['url']

tests/test_imageresolver.py

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,41 @@
1-
import sys
1+
from __future__ import absolute_import
22
import unittest
3-
import requests
4-
from os.path import dirname,abspath
5-
sys.path.append( dirname( dirname( dirname( abspath(__file__)) ) ) )
6-
from imageresolver import ImageResolver, FileExtensionResolver, PluginResolver, WebpageResolver
3+
from imageresolver import ImageResolver, FileExtensionResolver, WebpageResolver
4+
75

86
class TestImageResolver(unittest.TestCase):
97
def setUp(self):
108
# set to an imgur page
11-
self.imgur_page = 'http://imgur.com/adtBv9Y'
9+
self.imgur_page = 'https://imgur.com/adtBv9Y'
1210

1311
# set to the expected result of the imgur page
1412
# also checks the file extension
15-
self.imgur_result = 'http://i.imgur.com/adtBv9Y.jpg'
13+
self.imgur_result = 'https://i.imgur.com/adtBv9Y.jpg'
1614

1715
# set to a web url
18-
self.web_url = 'http://xkcd.com/353/'
16+
self.web_url = 'https://xkcd.com/353/'
1917

2018
# set to the expected return image from the web url
21-
self.web_img = 'http://imgs.xkcd.com/comics/python.png'
19+
self.web_img = 'https://imgs.xkcd.com/comics/python.png'
2220

2321
def test_fetch_image_info(self):
2422
i = ImageResolver()
25-
(ext,width,height) = i.fetch_image_info(self.web_img)
23+
(ext, width, height) = i.fetch_image_info(self.web_img)
2624

27-
self.assertEquals(ext,'.png')
28-
self.assertEquals(width,518)
29-
self.assertEquals(height,588)
25+
self.assertEquals(ext, '.png')
26+
self.assertEquals(width, 518)
27+
self.assertEquals(height, 588)
3028

31-
def test_resolve_plugin(self):
32-
i = ImageResolver()
33-
i.register(PluginResolver())
34-
src = i.resolve(self.imgur_page)
35-
self.assertEquals(src,self.imgur_result)
36-
3729
def test_resolve_fileext(self):
3830
i = ImageResolver()
3931
i.register(FileExtensionResolver())
4032
src = i.resolve(self.web_img)
4133

42-
self.assertEquals(src,self.web_img)
34+
self.assertEquals(src, self.web_img)
4335

4436
def test_resolve_webpage(self):
4537
i = ImageResolver()
4638
i.register(WebpageResolver(load_images=True))
4739
src = i.resolve(self.web_url)
48-
self.assertEquals(src, self.web_img )
40+
self.assertEquals(src, self.web_img)
4941

0 commit comments

Comments
 (0)