diff --git a/wikiteam3/dumpgenerator/cli/cli.py b/wikiteam3/dumpgenerator/cli/cli.py index 217f2ad5..0d9dfb32 100644 --- a/wikiteam3/dumpgenerator/cli/cli.py +++ b/wikiteam3/dumpgenerator/cli/cli.py @@ -48,9 +48,18 @@ def getArgumentParser(): help="resumes previous incomplete dump (requires --path)", ) parser.add_argument("--force", action="store_true", help="") - parser.add_argument("--user", help="Username if authentication is required.") + parser.add_argument("--user", help="Username if MedaiWiki authentication is required.") parser.add_argument( - "--pass", dest="password", help="Password if authentication is required." + "--pass", dest="password", help="Password if MediaWiki authentication is required." + ) + parser.add_argument( + "--http-user", dest="http_user", help="Username if HTTP authentication is required." + ) + parser.add_argument( + "--http-pass", dest="http_password", help="Password if HTTP authentication is required." + ) + parser.add_argument( + '--insecure', action='store_true', help='Disable SSL certificate verification' ) parser.add_argument( @@ -127,42 +136,75 @@ def getArgumentParser(): ) return parser -def getParameters(params=None) -> Tuple[Config, Dict]: - # if not params: - # params = sys.argv - parser = getArgumentParser() - args = parser.parse_args(params) - # print (args) +def checkParameters(args=argparse.Namespace()) -> bool: + + passed = True # Don't mix download params and meta info params if (args.xml or args.images) and (args.get_wiki_engine): print("ERROR: Don't mix download params and meta info params") - parser.print_help() - sys.exit(1) + passed = False # No download params and no meta info params? Exit if (not args.xml and not args.images) and (not args.get_wiki_engine): print("ERROR: Use at least one download param or meta info param") - parser.print_help() - sys.exit(1) + passed = False + + # Check user and pass (one requires both) + if (args.user and not args.password) or (args.password and not args.user): + print("ERROR: Both --user and --pass are required for authentication.") + passed = False + + # Check http-user and http-pass (one requires both) + if (args.http_user and not args.http_password) or (args.http_password and not args.http_user): + print("ERROR: Both --http-user and --http-pass are required for authentication.") + passed = False + # --curonly requires --xml + if args.curonly and not args.xml: + print("ERROR: --curonly requires --xml") + passed = False + # --xmlrevisions not supported with --curonly if args.xmlrevisions and args.curonly: print("ERROR: --xmlrevisions not supported with --curonly") + passed = False + + # Check URLs + for url in [args.api, args.index, args.wiki]: + if url and (not url.startswith("http://") and not url.startswith("https://")): + print(url) + print("ERROR: URLs must start with http:// or https://") + passed = False + + return passed + +def getParameters(params=None) -> Tuple[Config, Dict]: + # if not params: + # params = sys.argv + + parser = getArgumentParser() + args = parser.parse_args(params) + if checkParameters(args) is not True: + print("\n\n") parser.print_help() sys.exit(1) + # print (args) ######################################## # Create session - cj = http.cookiejar.MozillaCookieJar() - if args.cookies: - cj.load(args.cookies) - print("Using cookies from %s" % args.cookies) - mod_requests_text(requests) + mod_requests_text(requests) # monkey patch session = requests.Session() + # Disable SSL verification + if args.insecure: + session.verify = False + requests.packages.urllib3.disable_warnings() + print("WARNING: SSL certificate verification disabled") + + # Custom session retry try: from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry @@ -214,27 +256,27 @@ def sleep(self, response=None): except: # Our urllib3/requests is too old pass + + # Set cookies + cj = http.cookiejar.MozillaCookieJar() + if args.cookies: + cj.load(args.cookies) + print("Using cookies from %s" % args.cookies) session.cookies = cj + + # Setup user agent session.headers.update({"User-Agent": getUserAgent()}) - setupUserAgent(session) + setupUserAgent(session) # monkey patch - # set HTTPBasicAuth - if args.user and args.password: + # Set HTTP Basic Auth + if args.http_user and args.http_password: session.auth = (args.user, args.password) # Execute meta info params if args.wiki: if args.get_wiki_engine: print(getWikiEngine(url=args.wiki, session=session)) - sys.exit() - - # check URLs - for url in [args.api, args.index, args.wiki]: - if url and (not url.startswith("http://") and not url.startswith("https://")): - print(url) - print("ERROR: URLs must start with http:// or https://\n") - parser.print_help() - sys.exit(1) + sys.exit(0) # Get API and index and verify api = args.api if args.api else "" @@ -282,7 +324,7 @@ def sleep(self, response=None): sys.exit(1) # login if needed - # TODO: Re-login after session regeneration. + # TODO: Re-login after session expires if args.user and args.password: _session = uniLogin(api=api, index=index, session=session, username=args.user, password=args.password) if _session: @@ -315,11 +357,6 @@ def sleep(self, response=None): ) sys.exit(1) - # check user and pass (one requires both) - if (args.user and not args.password) or (args.password and not args.user): - print("ERROR: Both --user and --pass are required for authentication.") - parser.print_help() - sys.exit(1) namespaces = ["all"] exnamespaces = [] @@ -356,11 +393,6 @@ def sleep(self, response=None): else: exnamespaces = [int(i) for i in ns.split(",")] - # --curonly requires --xml - if args.curonly and not args.xml: - print("--curonly requires --xml\n") - parser.print_help() - sys.exit(1) config = newConfig({ "curonly": args.curonly, diff --git a/wikiteam3/utils/monkey_patch.py b/wikiteam3/utils/monkey_patch.py index a92463ec..c380efa6 100644 --- a/wikiteam3/utils/monkey_patch.py +++ b/wikiteam3/utils/monkey_patch.py @@ -1,6 +1,7 @@ import requests def mod_requests_text(requests: requests): + """ Monkey patch `requests.Response.text` to remove BOM """ def new_text(self): return self.content.lstrip(b'\xef\xbb\xbf').decode(self.encoding) requests.Response.text = property(new_text) \ No newline at end of file