Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/btrixcloud/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ class UserOrgInfoOut(BaseModel):

# ============================================================================
TYPE_RUNNING_STATES = Literal[
"running", "pending-wait", "generate-wacz", "uploading-wacz"
"running", "pending-wait", "generate-wacz", "uploading-wacz", "rate-limited"
]
RUNNING_STATES = get_args(TYPE_RUNNING_STATES)

Expand Down
5 changes: 5 additions & 0 deletions backend/btrixcloud/operator/crawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -1688,6 +1688,7 @@ async def get_redis_crawl_stats(
pipe.hgetall(f"{crawl_id}:size")
pipe.get(f"{crawl_id}:profileUploaded")
pipe.smembers(f"{crawl_id}:reqCrawls")
pipe.get(f"{crawl_id}:rateLimited")

results = await pipe.execute()

Expand All @@ -1707,13 +1708,15 @@ async def get_redis_crawl_stats(

profile_update = results[5]
req_crawls = results[6]
rate_limited = results[7] == "1"

stats = OpCrawlStats(
found=pages_found,
done=pages_done,
size=archive_size,
profile_update=profile_update,
req_crawls=req_crawls,
rate_limited=rate_limited,
)
return stats, sizes

Expand Down Expand Up @@ -1902,6 +1905,8 @@ async def update_crawl_state(
else:
new_status: TYPE_RUNNING_STATES = "running"

if stats.rate_limited:
new_status = "rate-limited"
if status_count.get("generate-wacz"):
new_status = "generate-wacz"
elif status_count.get("uploading-wacz"):
Expand Down
1 change: 1 addition & 0 deletions backend/btrixcloud/operator/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ class OpCrawlStats(CrawlStats):
"""crawl stats + internal profile update"""

profile_update: Optional[str] = ""
rate_limited: Optional[bool] = False


# ============================================================================
Expand Down
10 changes: 10 additions & 0 deletions frontend/src/features/archived-items/crawl-status.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,16 @@ export class CrawlStatus extends TailwindElement {
label = msg("Running");
break;

case "rate-limited":
color = "var(--warning)";
icon = html`<sl-icon
name="exclamation-triangle-fill"
slot="prefix"
style="color: ${color}"
></sl-icon>`;
label = msg("Rate Limited");
break;

case "stopping":
color = "var(--sl-color-violet-600)";
icon = html`<sl-icon
Expand Down
35 changes: 33 additions & 2 deletions frontend/src/pages/org/workflow-detail.ts
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,8 @@ export class WorkflowDetail extends BtrixElement {
return (
this.workflow?.isCrawlRunning &&
!this.workflow.lastCrawlStopping &&
this.workflow.lastCrawlState === "running"
this.workflow.lastCrawlState &&
["running", "rate-limited"].includes(this.workflow.lastCrawlState)
);
}

Expand Down Expand Up @@ -706,7 +707,8 @@ export class WorkflowDetail extends BtrixElement {
${this.renderCrawls()}
</btrix-tab-group-panel>
<btrix-tab-group-panel name=${WorkflowTab.LatestCrawl}>
${this.renderPausedNotice()} ${this.renderLatestCrawl()}
${this.renderRateLimitedNotice()} ${this.renderPausedNotice()}
${this.renderLatestCrawl()}
</btrix-tab-group-panel>
<btrix-tab-group-panel name=${WorkflowTab.Settings}>
${this.renderSettings()}
Expand Down Expand Up @@ -1490,6 +1492,35 @@ export class WorkflowDetail extends BtrixElement {
`;
};

private renderRateLimitedNotice() {
if (this.workflow?.lastCrawlState !== "rate-limited") {
return html``;
}
return html`
<btrix-alert
id="pausedNotice"
class="sticky top-2 z-50 part-[base]:mb-5"
variant="info"
>
<div class="mb-2 flex justify-between">
<span class="inline-flex items-center gap-1.5">
<sl-icon class="text-base" name="exclamation-triangle"></sl-icon>
<strong class="font-medium">
${msg("The site is blocking or rate limiting our crawling")}
</strong>
</span>
</div>
<div class="text-pretty text-neutral-600">
<p class="mb-2">
${msg(
"The crawl has encountered error or CAPTCHA pages and is skipping them. See our guide for more info",
)}
</p>
</div>
</btrix-alert>
`;
}

private renderLatestCrawlAction() {
if (!this.workflow || !this.lastCrawlId) return;

Expand Down
1 change: 1 addition & 0 deletions frontend/src/types/crawlState.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export const RUNNING_STATES = [
"pending-wait",
"generate-wacz",
"uploading-wacz",
"rate-limited",
] as const;

// Match backend TYPE_WAITING_NOT_PAUSED_STATES in models.py
Expand Down
Loading