Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions ReportEngine/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,13 +234,20 @@ def __init__(self, config: Optional[Settings] = None):

# 初始化节点
self._initialize_nodes()

# 初始化文件数量基准
self._initialize_file_baseline()


# 初始化报告压缩器
from .utils.report_compressor import ReportCompressor
self.report_compressor = ReportCompressor(
config=self.config,
llm_client=self.llm_client if self.config.SUMMARY_STRATEGY == "llm" else None
)

# 状态
self.state = ReportState()

# GraphRAG 状态数据(每次 load_input_files 时重置)
self._loaded_states = {}

Expand Down Expand Up @@ -476,6 +483,14 @@ def generate_report(

normalized_reports = self._normalize_reports(reports)

# 根据配置决定是否启用报告压缩
if self.config.ENABLE_REPORT_COMPRESSION:
# 为阶段1-2生成摘要版本
summarized_reports = self.report_compressor.summarize_reports(normalized_reports)
logger.info("已生成报告摘要版本用于文档设计和篇幅规划")
else:
summarized_reports = normalized_reports

def emit(event_type: str, payload: Dict[str, Any]):
"""面向Report Engine流通道的事件分发器,保证错误不外泄。"""
if not stream_handler:
Expand Down Expand Up @@ -516,7 +531,7 @@ def emit(event_type: str, payload: Dict[str, Any]):
lambda: self.document_layout_node.run(
sections,
template_text,
normalized_reports,
summarized_reports, # 使用摘要版本
forum_logs,
query,
template_overview,
Expand All @@ -536,7 +551,7 @@ def emit(event_type: str, payload: Dict[str, Any]):
lambda: self.word_budget_node.run(
sections,
layout_design,
normalized_reports,
summarized_reports, # 使用摘要版本
forum_logs,
query,
template_overview,
Expand All @@ -558,7 +573,7 @@ def emit(event_type: str, payload: Dict[str, Any]):

generation_context = self._build_generation_context(
query,
normalized_reports,
normalized_reports, # 传递原始报告,在章节生成时动态提取
forum_logs,
template_result,
layout_design,
Expand Down Expand Up @@ -1071,6 +1086,8 @@ def _build_generation_context(
"query": query,
"template_name": template_result.get("template_name"),
"reports": reports,
"reports_original": reports, # 保留原始报告用于章节生成时的动态提取
"report_compressor": self.report_compressor, # 传递压缩器实例
"forum_logs": self._stringify(forum_logs),
"theme_tokens": theme_tokens,
"style_directives": {
Expand Down
27 changes: 25 additions & 2 deletions ReportEngine/nodes/chapter_generation_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,30 @@ def _build_payload(self, section: TemplateSection, context: Dict[str, Any]) -> D
返回:
dict: 可以直接序列化进提示词的payload,兼顾章节信息与全局约束。
"""
# 获取报告内容
reports_original = context.get("reports_original", {})
reports = context.get("reports", {})

# 如果启用压缩且有原始报告,则提取相关内容
compressor = context.get("report_compressor")
if compressor and reports_original:
try:
# 从配置中获取压缩开关
from ..utils.config import settings
if settings.ENABLE_REPORT_COMPRESSION:
# 动态提取与章节相关的内容
reports = compressor.extract_relevant_content(
reports=reports_original,
chapter_title=section.title,
chapter_outline=section.outline
)
logger.info(
f"章节 '{section.title}' 已提取相关内容"
)
except Exception as e:
logger.warning(f"章节内容提取失败: {e},使用原始报告")
reports = reports_original

# 章节篇幅规划(来自WordBudgetNode),用于指导字数与强调点
chapter_plan_map = context.get("chapter_directives", {})
chapter_plan = chapter_plan_map.get(section.chapter_id) if chapter_plan_map else {}
Expand Down Expand Up @@ -355,7 +378,7 @@ def _build_payload(self, section: TemplateSection, context: Dict[str, Any]) -> D
"chapterPlan": chapter_plan,
"wordPlan": context.get("word_plan"),
}

# GraphRAG 增强:如果上下文中包含图谱查询结果,添加到payload
graph_results = context.get("graph_results")
if graph_results:
Expand All @@ -370,7 +393,7 @@ def _build_payload(self, section: TemplateSection, context: Dict[str, Any]) -> D
graph_enhancement = context.get("graph_enhancement_prompt")
if graph_enhancement:
payload["graphEnhancementPrompt"] = graph_enhancement

if chapter_plan:
constraints = payload["constraints"]
if chapter_plan.get("targetWords"):
Expand Down
31 changes: 31 additions & 0 deletions ReportEngine/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,32 @@ class Settings(BaseSettings):
default=3, description="GraphRAG每章节查询次数上限"
)

# 报告压缩配置
ENABLE_REPORT_COMPRESSION: bool = Field(
default=True, description="是否启用报告压缩以避免上下文超限"
)
SUMMARY_STRATEGY: str = Field(
default="rule", description="摘要策略:rule=规则提取, llm=LLM摘要, hybrid=混合"
)
SUMMARY_COMPRESSION_RATIO: float = Field(
default=0.35, description="摘要目标压缩率(0.3-0.4推荐)"
)
EXTRACTION_STRATEGY: str = Field(
default="keyword", description="提取策略:keyword=关键词匹配, embedding=语义相似度"
)
EXTRACTION_MAX_RATIO: float = Field(
default=0.5, description="章节提取内容最大比例(相对原文)"
)
KEYWORD_MATCH_THRESHOLD: int = Field(
default=2, description="段落至少匹配的关键词数量"
)
KEEP_CONTEXT_PARAGRAPHS: bool = Field(
default=True, description="提取时是否保留匹配段落的上下文"
)
CONTEXT_PARAGRAPHS_COUNT: int = Field(
default=1, description="保留的上下文段落数量(前后各N段)"
)

class Config:
"""Pydantic配置:允许从.env读取并兼容大小写"""
env_file = ".env"
Expand Down Expand Up @@ -109,5 +135,10 @@ def print_config(config: Settings):
message += f"PDF 导出: {config.ENABLE_PDF_EXPORT}\n"
message += f"图表样式: {config.CHART_STYLE}\n"
message += f"LLM API Key: {'已配置' if config.REPORT_ENGINE_API_KEY else '未配置'}\n"
message += f"报告压缩: {config.ENABLE_REPORT_COMPRESSION}\n"
if config.ENABLE_REPORT_COMPRESSION:
message += f" 摘要策略: {config.SUMMARY_STRATEGY}\n"
message += f" 压缩率: {config.SUMMARY_COMPRESSION_RATIO:.0%}\n"
message += f" 提取策略: {config.EXTRACTION_STRATEGY}\n"
message += "=========================\n"
logger.info(message)
Loading