|
2 | 2 | import os |
3 | 3 | import logging |
4 | 4 | from bs4 import BeautifulSoup |
| 5 | +from operator import itemgetter |
5 | 6 |
|
6 | 7 | class Plugin: |
7 | 8 | def get_image(self, url, soup): |
8 | 9 |
|
9 | | - ogtags = [{'attribute':'property', 'name':'og:image', 'value':'content'}, |
10 | | - {'attribute':'rel', 'name':'image_src', 'value':'href'}, |
11 | | - {'attribute':'name', 'name':'twitter:image', 'value':'value'}, |
12 | | - {'attribute':'name', 'name':'twitter:image', 'value':'content'}] |
| 10 | + ogtags = [{'type':'facebook','attribute':'property', 'name':'og:image', 'value':'content'}, |
| 11 | + {'type':'facebook','attribute':'rel', 'name':'image_src', 'value':'href'}, |
| 12 | + {'type':'twitter','attribute':'name', 'name':'twitter:image', 'value':'value'}, |
| 13 | + {'type':'twitter','attribute':'name', 'name':'twitter:image', 'value':'content'}, |
| 14 | + {'type':'twitter','attribute':'property', 'name':'twitter:image', 'value':'content'}, |
| 15 | + {'type':'image','attribute':'itemprop', 'name':'image', 'value':'content'}] |
13 | 16 |
|
14 | 17 | ogimages = [] |
15 | 18 |
|
16 | 19 | for ogtag in ogtags: |
17 | 20 | tags = soup.find_all('meta', {ogtag['attribute']:ogtag['name']}) |
18 | 21 | if tags != []: |
19 | | - try: |
20 | | - ogimages = ogimages + [image[ogtag['value']] for image in tags] |
21 | | - except KeyError: |
22 | | - pass |
| 22 | + for image in tags: |
| 23 | + try: |
| 24 | + ogimages = ogimages + [{'url':image[ogtag['value']], 'type':ogtag['type'], 'score':0} for image in tags] |
| 25 | + except KeyError as e: |
| 26 | + pass |
23 | 27 |
|
24 | | - if len(ogimages) >= 1: |
25 | | - logger = logging.getLogger('ImageResolver') |
26 | | - logger.debug('Resolving using plugin ' + str(os.path.basename(__file__)) + ' ' + str(url)) |
27 | | - return ogimages[0] |
| 28 | + ogimages_len = len(ogimages) |
| 29 | + |
| 30 | + # if more than 1 image, score and return the best one |
| 31 | + if ogimages_len >= 1: |
| 32 | + if ogimages_len == 1: |
| 33 | + logger = logging.getLogger('ImageResolver') |
| 34 | + logger.debug('Resolving using plugin ' + str(os.path.basename(__file__)) + ' ' + str(url)) |
| 35 | + return ogimages[0]['url'] |
| 36 | + else: |
| 37 | + for image in ogimages: |
| 38 | + if re.search('(large|big)', image['url'], re.IGNORECASE): |
| 39 | + image['score'] += 1 |
| 40 | + if image['type'] == 'twitter': |
| 41 | + image['score'] += 1 |
| 42 | + |
| 43 | + ogimages.sort(key=itemgetter('score'), reverse=True) |
| 44 | + |
| 45 | + return ogimages[0]['url'] |
| 46 | + |
28 | 47 |
|
29 | 48 | return None |
30 | 49 |
|
0 commit comments