Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions conf/openlibrary.yml
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,18 @@ sentry_cron_jobs:

# Observations cache settings:
observation_cache_duration: 86400

# Proxy configuration.
# http_proxy sets the global default (no auth) via HTTP_PROXY/HTTPS_PROXY env vars.
# http_proxies overrides per service with credentials; each entry has url/user/password.
# Dev/local: leave both unset — no proxy needed.
# http_proxy: http://squid.example.com:3128
# http_proxies:
# recaptcha:
# url: http://squid.example.com:3128
# user: ''
# password: ''
# amazon:
# url: http://squid.example.com:3128
# user: ''
# password: ''
87 changes: 87 additions & 0 deletions openlibrary/core/vendors.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,93 @@ def __init__(
# AmazonCreatorsApi; replace its rest_client to route all
# outbound HTTP through the proxy.
self.api._api_client.rest_client = rest_client

# Also inject a proxy-aware OAuth2 token manager. The SDK's
# OAuth2TokenManager.refresh_token() calls bare requests.post()
# which reads HTTP_PROXY from the environment. After this PR
# lands, HTTP_PROXY will be a bare (unauthenticated) squid URL;
# Amazon's token endpoint requires authenticated proxy access, so
# the bare URL would produce a 403. We override refresh_token()
# here to use a requests.Session with the authenticated proxy URL
# embedded directly, bypassing the env-var lookup entirely.
if proxy_creds:
from urllib.parse import (
quote as _urlquote,
)
from urllib.parse import (
urlparse as _urlparse,
)
from urllib.parse import (
urlunparse as _urlunparse,
)

from creatorsapi_python_sdk.auth.oauth2_config import (
OAuth2Config as _OAuth2Config,
)
from creatorsapi_python_sdk.auth.oauth2_token_manager import (
OAuth2TokenManager as _OAuth2TokenManager,
)

_user, _, _password = proxy_creds.partition(":")
_parsed = _urlparse(proxy_url)
_netloc = f"{_urlquote(_user, safe='')}:{_urlquote(_password, safe='')}@{_parsed.hostname}"
if _parsed.port:
_netloc += f":{_parsed.port}"
_auth_proxy_url = _urlunparse(_parsed._replace(netloc=_netloc))
_proxies = {"http": _auth_proxy_url, "https": _auth_proxy_url}

class _ProxyAwareTokenManager(_OAuth2TokenManager):
"""Routes OAuth2 token refresh through authenticated proxy."""

def refresh_token(self):
import requests as _req

session = _req.Session()
session.proxies = _proxies
try:
if self.config.is_lwa():
Comment thread
mekarpeles marked this conversation as resolved.
resp = session.post(
self.config.get_cognito_endpoint(),
json={
"grant_type": self.config.get_grant_type(),
"client_id": self.config.get_credential_id(),
"client_secret": self.config.get_credential_secret(),
"scope": self.config.get_scope(),
},
headers={"Content-Type": "application/json"},
)
else:
resp = session.post(
self.config.get_cognito_endpoint(),
data={
"grant_type": self.config.get_grant_type(),
"client_id": self.config.get_credential_id(),
"client_secret": self.config.get_credential_secret(),
"scope": self.config.get_scope(),
},
headers={"Content-Type": "application/x-www-form-urlencoded"},
)
if resp.status_code != 200:
raise Exception(f"OAuth2 token request failed with status {resp.status_code}: {resp.text}")
data = resp.json()
if "access_token" not in data:
raise Exception("No access token received from OAuth2 endpoint")
self.access_token = data["access_token"]
self.expires_at = time.time() + data.get("expires_in", 3600) - 30
return self.access_token
except Exception:
self.clear_token()
raise

api_client = self.api._api_client
_oauth_config = _OAuth2Config(
api_client.credential_id,
api_client.credential_secret,
api_client.version,
api_client.auth_endpoint,
)
api_client._token_manager = _ProxyAwareTokenManager(_oauth_config)

except (ImportError, AttributeError):
logger.warning(
"AmazonCreatorsAPI: could not inject proxy — falling back to environment-level proxy (HTTPS_PROXY)",
Expand Down
3 changes: 2 additions & 1 deletion openlibrary/plugins/recaptcha/recaptcha.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import web

from infogami import config
from openlibrary.plugins.upstream.utils import get_proxy_params

logger = logging.getLogger("openlibrary")

Expand Down Expand Up @@ -44,7 +45,7 @@ def accept_error(error_codes: list[str]) -> bool:
}

try:
r = requests.get(url, params=params, timeout=3)
r = requests.get(url, params=params, timeout=3, proxies=get_proxy_params("recaptcha"))
except requests.exceptions.RequestException:
logger.exception("Recaptcha call failed: letting user through")
return True
Expand Down
48 changes: 48 additions & 0 deletions openlibrary/plugins/upstream/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,3 +383,51 @@ def test_get_language_name(add_languages): # noqa: F811
assert utils.get_language_name("/languages/ger", "en") == "German"
# Falls back to name when translation missing for requested language
assert utils.get_language_name("/languages/ger", "fr") == "Deutsch"


class TestGetProxyParams:
def test_no_http_proxies_config(self):
with patch("openlibrary.plugins.upstream.utils.config") as mock_config:
mock_config.get.return_value = {}
assert utils.get_proxy_params("recaptcha") is None

def test_unknown_service_tag(self):
with patch("openlibrary.plugins.upstream.utils.config") as mock_config:
mock_config.get.return_value = {"amazon": {"url": "http://proxy:3128"}}
assert utils.get_proxy_params("recaptcha") is None

def test_url_only_no_auth(self):
with patch("openlibrary.plugins.upstream.utils.config") as mock_config:
mock_config.get.return_value = {"recaptcha": {"url": "http://proxy:3128"}}
result = utils.get_proxy_params("recaptcha")
assert result == {"http": "http://proxy:3128", "https": "http://proxy:3128"}

def test_url_with_auth(self):
with patch("openlibrary.plugins.upstream.utils.config") as mock_config:
mock_config.get.return_value = {
"recaptcha": {
"url": "http://proxy:3128",
"user": "myuser",
"password": "mypass",
}
}
result = utils.get_proxy_params("recaptcha")
assert result == {
"http": "http://myuser:mypass@proxy:3128",
"https": "http://myuser:mypass@proxy:3128",
}

def test_special_chars_in_credentials_are_encoded(self):
with patch("openlibrary.plugins.upstream.utils.config") as mock_config:
mock_config.get.return_value = {
"recaptcha": {
"url": "http://proxy:3128",
"user": "u@ser",
"password": "p@ss:word",
}
}
result = utils.get_proxy_params("recaptcha")
assert result == {
"http": "http://u%40ser:p%40ss%3Aword@proxy:3128",
"https": "http://u%40ser:p%40ss%3Aword@proxy:3128",
}
31 changes: 31 additions & 0 deletions openlibrary/plugins/upstream/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1623,6 +1623,37 @@ def setup_requests(config=config) -> None:
logger.info("Requests set up")


def get_proxy_params(service_tag: str) -> dict[str, str] | None:
"""Return a requests-compatible proxies dict for a service requiring proxy auth.

Reads from the ``http_proxies`` config section. Each entry may have:
url: proxy base URL
user: proxy username
password: proxy password

Returns None when no service-specific config exists so that callers can
pass the result directly as ``proxies=`` to requests — None means requests
will fall back to the global HTTP_PROXY/HTTPS_PROXY env vars set by
setup_requests().
"""
service = config.get("http_proxies", {}).get(service_tag)
if not service:
return None

proxy_url = service.get("url", "")
user = service.get("user", "")
password = service.get("password", "")

if user and proxy_url:
parsed = urlparse(proxy_url)
netloc = f"{quote(user, safe='')}:{quote(password, safe='')}@{parsed.hostname}"
if parsed.port:
netloc += f":{parsed.port}"
proxy_url = urlunparse(parsed._replace(netloc=netloc))

return {"http": proxy_url, "https": proxy_url} if proxy_url else None


def setup() -> None:
"""Do required initialization"""
# monkey-patch get_markdown to use OL Flavored Markdown
Expand Down
11 changes: 9 additions & 2 deletions scripts/affiliate_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,8 +638,15 @@ def GET(self, identifier: str) -> str:
def load_config(configfile):
# This loads openlibrary.yml + infobase.yml
openlibrary_load_config(configfile)
http_proxy_url = config.get("http_proxy")
http_proxy_creds = config.get("http_proxy_creds")

# Prefer per-service proxy config under http_proxies.amazon; fall back to the
# legacy flat keys http_proxy / http_proxy_creds for backward compatibility.
amazon_proxy_cfg = config.get("http_proxies", {}).get("amazon", {})
http_proxy_url = amazon_proxy_cfg.get("url") or config.get("http_proxy")
if amazon_proxy_cfg.get("user"):
http_proxy_creds = f"{amazon_proxy_cfg['user']}:{amazon_proxy_cfg.get('password', '')}"
else:
http_proxy_creds = config.get("http_proxy_creds", "")

stats.client = stats.create_stats_client(cfg=config)

Expand Down