|
1 | 1 | import re |
2 | 2 | import os |
3 | 3 | import logging |
4 | | -from bs4 import BeautifulSoup |
5 | 4 | from operator import itemgetter |
6 | 5 |
|
7 | | -class Plugin: |
8 | | - def get_image(self, url, soup): |
9 | | - |
10 | | - ogtags = [{'type':'facebook','attribute':'property', 'name':'og:image', 'value':'content'}, |
11 | | - {'type':'facebook','attribute':'rel', 'name':'image_src', 'value':'href'}, |
12 | | - {'type':'twitter','attribute':'name', 'name':'twitter:image', 'value':'value'}, |
13 | | - {'type':'twitter','attribute':'name', 'name':'twitter:image', 'value':'content'}, |
14 | | - {'type':'twitter','attribute':'property', 'name':'twitter:image', 'value':'content'}, |
15 | | - {'type':'image','attribute':'itemprop', 'name':'image', 'value':'content'}] |
16 | | - |
17 | | - ogimages = [] |
18 | | - |
19 | | - for ogtag in ogtags: |
20 | | - tags = soup.find_all('meta', {ogtag['attribute']:ogtag['name']}) |
21 | | - if tags != []: |
22 | | - for image in tags: |
23 | | - try: |
24 | | - ogimages = ogimages + [{'url':image[ogtag['value']], 'type':ogtag['type'], 'score':0} for image in tags] |
25 | | - except KeyError as e: |
26 | | - pass |
27 | | - |
28 | | - ogimages_len = len(ogimages) |
29 | | - |
30 | | - # if more than 1 image, score and return the best one |
31 | | - if ogimages_len >= 1: |
32 | | - if ogimages_len == 1: |
33 | | - logger = logging.getLogger('ImageResolver') |
34 | | - logger.debug('Resolving using plugin ' + str(os.path.basename(__file__)) + ' ' + str(url)) |
35 | | - resolved_image = ogimages[0]['url'] |
36 | | - else: |
37 | | - for image in ogimages: |
38 | | - if re.search('(large|big)', image['url'], re.IGNORECASE): |
39 | | - image['score'] += 1 |
40 | | - if image['type'] == 'twitter': |
41 | | - image['score'] += 1 |
42 | | - |
43 | | - ogimages.sort(key=itemgetter('score'), reverse=True) |
44 | | - resolved_image = ogimages[0]['url'] |
45 | | - |
46 | | - if not re.search('^https?:', resolved_image): |
47 | | - if resolved_image.startswith('//'): |
48 | | - return 'http:' + resolved_image |
49 | | - else: |
50 | | - return resolved_image |
51 | | - |
52 | | - |
53 | | - return None |
54 | 6 |
|
| 7 | +class Plugin(object): |
| 8 | + def get_image(self, url, soup): |
| 9 | + |
| 10 | + ogtags = [{'type': 'facebook', 'attribute': 'property', 'name': 'og:image', 'value': 'content'}, |
| 11 | + {'type': 'facebook', 'attribute': 'rel', 'name': 'image_src', 'value': 'href'}, |
| 12 | + {'type': 'twitter', 'attribute': 'name', 'name': 'twitter:image', 'value': 'value'}, |
| 13 | + {'type': 'twitter', 'attribute': 'name', 'name': 'twitter:image', 'value': 'content'}, |
| 14 | + {'type': 'twitter', 'attribute': 'property', 'name': 'twitter:image', 'value': 'content'}, |
| 15 | + {'type': 'image', 'attribute': 'itemprop', 'name': 'image', 'value': 'content'}] |
| 16 | + |
| 17 | + ogimages = [] |
| 18 | + |
| 19 | + for ogtag in ogtags: |
| 20 | + tags = soup.find_all('meta', {ogtag['attribute']: ogtag['name']}) |
| 21 | + if tags: |
| 22 | + try: |
| 23 | + ogimages.extend([{'url': image[ogtag['value']], 'type': ogtag['type'], 'score': 0} |
| 24 | + for image in tags]) |
| 25 | + except KeyError as e: |
| 26 | + pass |
| 27 | + |
| 28 | + ogimages_len = len(ogimages) |
| 29 | + |
| 30 | + # if more than 1 image, score and return the best one |
| 31 | + if ogimages_len >= 1: |
| 32 | + if ogimages_len == 1: |
| 33 | + logger = logging.getLogger('ImageResolver') |
| 34 | + logger.debug('Resolving using plugin ' + str(os.path.basename(__file__)) + ' ' + str(url)) |
| 35 | + resolved_image = ogimages[0]['url'] |
| 36 | + else: |
| 37 | + for image in ogimages: |
| 38 | + if re.search('(large|big)', image['url'], re.IGNORECASE): |
| 39 | + image['score'] += 1 |
| 40 | + if image['type'] == 'twitter': |
| 41 | + image['score'] += 1 |
| 42 | + |
| 43 | + ogimages.sort(key=itemgetter('score'), reverse=True) |
| 44 | + resolved_image = ogimages[0]['url'] |
| 45 | + |
| 46 | + if not re.search('^https?:', resolved_image): |
| 47 | + if resolved_image.startswith('//'): |
| 48 | + return 'https:' + resolved_image |
| 49 | + else: |
| 50 | + return resolved_image |
| 51 | + |
| 52 | + return None |
0 commit comments