Skip to content

Commit 1741e14

Browse files
committed
Changed WebpageResolver default arguements, added more robust sysargs, added transparent to ruleset
1 parent 1fb399c commit 1741e14

2 files changed

Lines changed: 27 additions & 3 deletions

File tree

bin/resolveimg.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import sys
44
import imageresolver
55
import logging
6+
import time
67
from optparse import OptionParser
78

89
logger = logging.getLogger('ImageResolver')
@@ -14,6 +15,11 @@
1415
opts.add_option("-r","--max-read", dest="max_read",help="Set the max read size")
1516
opts.add_option("-c","--chunk-size",dest="chunk_size",help="Chunk size to read on each pass")
1617
opts.add_option("-a","--read-all",dest="read_all",help="Read the entire image before checking size. Useful for some JPGs. Overrides --max-read")
18+
opts.add_option("--no-adblock", action="store_true",dest="use_adblock_filters",help="Do not use whitelist.txt or blacklist.txt adblock filters")
19+
opts.add_option("--no-ruleset", action="store_true",dest="use_js_ruleset",help="Do not use a custom ruleset for scoring.")
20+
opts.add_option("--benchmark", action="store_true",dest="benchmark",help="Benchmark the total time it takes for the script to return an image")
21+
opts.add_option("-l","--load-images", action="store_true",dest="load_images",help="Load images")
22+
opts.add_option("-p","--parser", dest="parser",help="Choose a parser to use")
1723

1824
(options,args) = opts.parse_args()
1925

@@ -26,7 +32,17 @@
2632
if options.chunk_size:
2733
kw_options['chunk_size'] = int(options.chunk_size)
2834

35+
if options.use_adblock_filters:
36+
kw_options['use_adblock_filters'] = False
37+
38+
if options.use_js_ruleset:
39+
kw_options['use_js_ruleset'] = False
40+
41+
if options.parser:
42+
kw_options['parser'] = options.parser
43+
2944
kw_options['debug'] = options.debug
45+
kw_options['load_images'] = options.load_images
3046

3147
try:
3248
url = args[0]
@@ -39,10 +55,17 @@
3955
print "URL required. Please use the url option or pass a url as the first argument"
4056
sys.exit(-1)
4157

58+
59+
if options.benchmark:
60+
t1 = time.time()
61+
4262
i = imageresolver.ImageResolver(**kw_options)
4363
i.register(imageresolver.FileExtensionResolver())
4464
i.register(imageresolver.PluginResolver())
45-
i.register(imageresolver.WebpageResolver(load_images=True, parser='lxml'))
65+
i.register(imageresolver.WebpageResolver(**kw_options))
4666

4767
print i.resolve(url)
4868

69+
if options.benchmark:
70+
print 'TOTAL TIME', time.time() - t1
71+

imageresolver/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,8 @@ def resolve(self,url,**kwargs):
169169

170170
class WebpageResolver(object):
171171
def __init__(self,**kwargs):
172-
self.load_images = kwargs.get('load_images',False)
173-
self.use_js_ruleset = kwargs.get('use_js_ruleset',False)
172+
self.load_images = kwargs.get('load_images',True)
173+
self.use_js_ruleset = kwargs.get('use_js_ruleset',True)
174174
self.use_adblock_filters = kwargs.get('use_adblock_filters',True)
175175
self.significant_surface = kwargs.get('significant_surface', 100*100)
176176

@@ -224,6 +224,7 @@ def _score(self,image):
224224
{'pattern':'1x1','score':-1},
225225
{'pattern':'pixel','score':-1},
226226
{'pattern':'ads','score':-1},
227+
{'pattern':'transparent','score':-1}
227228
]
228229

229230
for r in rules:

0 commit comments

Comments
 (0)