314 lines
9.4 KiB
Python
Executable File
314 lines
9.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
小红书热点跟踪工具
|
|
|
|
用法:
|
|
python track-topic.py <话题> [--limit N] [--feishu] [--output FILE]
|
|
|
|
示例:
|
|
python track-topic.py "DeepSeek" --limit 5 --feishu
|
|
python track-topic.py "春节旅游" --limit 10 --output report.md
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
import os
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
# 获取脚本目录
|
|
SCRIPT_DIR = Path(__file__).parent.resolve()
|
|
XHS_SCRIPTS = SCRIPT_DIR # 现在就在 xiaohongshu/scripts 目录下
|
|
|
|
# 飞书 skill 路径(支持多种可能的位置)
|
|
def find_feishu_scripts() -> Path:
|
|
"""查找 feishu-docs skill 的 scripts 目录"""
|
|
# 只允许在已知的 skill 目录中查找
|
|
allowed_roots = [
|
|
SCRIPT_DIR.parent.parent, # 同级 skill 目录
|
|
Path.home() / ".openclaw" / "workspace" / "skills",
|
|
Path.home() / ".claude" / "skills",
|
|
]
|
|
for root in allowed_roots:
|
|
candidate = (root / "feishu-docs" / "scripts").resolve()
|
|
# 校验解析后的路径仍在允许的根目录下(防止符号链接逃逸)
|
|
if candidate.is_dir() and any(
|
|
str(candidate).startswith(str(r.resolve()) + os.sep) for r in allowed_roots
|
|
):
|
|
return candidate
|
|
return allowed_roots[0] / "feishu-docs" / "scripts" # 返回默认路径(可能不存在)
|
|
|
|
FEISHU_SCRIPTS = find_feishu_scripts()
|
|
|
|
|
|
def call_xhs_mcp(tool: str, args: dict) -> dict:
|
|
"""调用小红书 MCP 工具"""
|
|
mcp_call = XHS_SCRIPTS / "mcp-call.sh"
|
|
if not mcp_call.exists():
|
|
print(f"❌ 找不到 xiaohongshu skill: {mcp_call}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
result = subprocess.run(
|
|
[str(mcp_call), tool, json.dumps(args)],
|
|
capture_output=True, text=True, timeout=120
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
print(f"❌ MCP 调用失败: {result.stderr}", file=sys.stderr)
|
|
return {}
|
|
|
|
try:
|
|
response = json.loads(result.stdout)
|
|
if "result" in response and "content" in response["result"]:
|
|
text = response["result"]["content"][0].get("text", "{}")
|
|
return json.loads(text) if text else {}
|
|
elif "error" in response:
|
|
print(f"⚠️ MCP 错误: {response['error'].get('message', 'Unknown')}", file=sys.stderr)
|
|
return {}
|
|
return response
|
|
except json.JSONDecodeError:
|
|
return {}
|
|
|
|
|
|
def search_feeds(keyword: str) -> list:
|
|
"""搜索小红书内容"""
|
|
print(f"🔍 搜索: {keyword}")
|
|
result = call_xhs_mcp("search_feeds", {"keyword": keyword})
|
|
feeds = result.get("feeds", [])
|
|
# 过滤掉 hot_query 类型
|
|
return [f for f in feeds if f.get("modelType") == "note"]
|
|
|
|
|
|
def get_feed_detail(feed_id: str, xsec_token: str, load_comments: bool = True) -> dict:
|
|
"""获取帖子详情"""
|
|
args = {
|
|
"feed_id": feed_id,
|
|
"xsec_token": xsec_token,
|
|
"load_all_comments": load_comments
|
|
}
|
|
result = call_xhs_mcp("get_feed_detail", args)
|
|
return result.get("data", {})
|
|
|
|
|
|
def format_timestamp(ts: int) -> str:
|
|
"""格式化时间戳"""
|
|
if not ts:
|
|
return "未知"
|
|
try:
|
|
dt = datetime.fromtimestamp(ts / 1000)
|
|
return dt.strftime("%Y-%m-%d %H:%M")
|
|
except:
|
|
return "未知"
|
|
|
|
|
|
def get_comments_list(post: dict) -> list:
|
|
"""安全地获取评论列表"""
|
|
comments = post.get("comments", {})
|
|
if isinstance(comments, dict):
|
|
return comments.get("list", [])
|
|
elif isinstance(comments, list):
|
|
return comments
|
|
return []
|
|
|
|
|
|
def generate_report(keyword: str, posts: list) -> str:
|
|
"""生成 Markdown 报告"""
|
|
now = datetime.now().strftime("%Y-%m-%d %H:%M")
|
|
|
|
report = f"""# 🔥 小红书热点跟踪报告
|
|
|
|
**话题:** {keyword}
|
|
**生成时间:** {now}
|
|
**收录帖子:** {len(posts)} 篇
|
|
|
|
---
|
|
|
|
## 📊 概览
|
|
|
|
"""
|
|
|
|
# 统计信息
|
|
total_likes = sum(int(p.get("note", {}).get("interactInfo", {}).get("likedCount", 0) or 0) for p in posts)
|
|
total_comments = sum(len(get_comments_list(p)) for p in posts)
|
|
|
|
report += f"""| 指标 | 数值 |
|
|
|------|------|
|
|
| 总帖子数 | {len(posts)} |
|
|
| 总点赞数 | {total_likes:,} |
|
|
| 总评论数 | {total_comments} |
|
|
|
|
---
|
|
|
|
## 📝 热帖详情
|
|
|
|
"""
|
|
|
|
for i, post in enumerate(posts, 1):
|
|
note = post.get("note", {})
|
|
comments = get_comments_list(post)
|
|
|
|
title = note.get("title", "无标题")
|
|
desc = note.get("desc", "")
|
|
user = note.get("user", {}).get("nickname", "匿名")
|
|
time_str = format_timestamp(note.get("time"))
|
|
interact = note.get("interactInfo", {})
|
|
likes = interact.get("likedCount", "0")
|
|
collected = interact.get("collectedCount", "0")
|
|
|
|
report += f"""### {i}. {title}
|
|
|
|
**作者:** {user}
|
|
**时间:** {time_str}
|
|
**互动:** ❤️ {likes} 赞 · ⭐ {collected} 收藏
|
|
|
|
**正文:**
|
|
|
|
> {desc[:500]}{"..." if len(desc) > 500 else ""}
|
|
|
|
"""
|
|
|
|
if comments:
|
|
report += f"""**热门评论 ({len(comments)} 条):**
|
|
|
|
"""
|
|
for j, comment in enumerate(list(comments)[:5], 1):
|
|
c_user = comment.get("userInfo", {}).get("nickname", "匿名")
|
|
c_content = comment.get("content", "")
|
|
c_likes = comment.get("likeCount", 0)
|
|
report += f"- **{c_user}** ({c_likes}赞): {c_content[:100]}\n"
|
|
|
|
if len(comments) > 5:
|
|
report += f"- *... 还有 {len(comments) - 5} 条评论*\n"
|
|
|
|
report += "\n---\n\n"
|
|
|
|
# 评论区热点总结
|
|
report += """## 💬 评论区热点关键词
|
|
|
|
"""
|
|
|
|
# 简单的关键词提取(统计高频词)
|
|
all_comments = []
|
|
for post in posts:
|
|
for c in get_comments_list(post):
|
|
all_comments.append(c.get("content", ""))
|
|
|
|
if all_comments:
|
|
report += f"共 {len(all_comments)} 条评论,主要讨论方向:\n\n"
|
|
# 这里可以做更复杂的 NLP 分析,暂时简化
|
|
report += "- 用户对该话题的关注度较高\n"
|
|
report += "- 评论区互动活跃\n"
|
|
else:
|
|
report += "暂无足够评论数据进行分析\n"
|
|
|
|
report += """
|
|
---
|
|
|
|
## 📈 趋势分析
|
|
|
|
基于以上热帖和评论数据,该话题在小红书上呈现以下特点:
|
|
|
|
1. **热度指数**: """ + ("🔥🔥🔥 高" if total_likes > 1000 else "🔥🔥 中" if total_likes > 100 else "🔥 低") + f"""
|
|
2. **互动活跃度**: """ + ("活跃" if total_comments > 50 else "一般" if total_comments > 10 else "较低") + """
|
|
3. **内容类型**: 以图文笔记为主
|
|
|
|
---
|
|
|
|
*报告由 OpenClaw 小红书热点跟踪工具自动生成*
|
|
"""
|
|
|
|
return report
|
|
|
|
|
|
def export_to_feishu(title: str, content: str) -> str:
|
|
"""导出到飞书文档"""
|
|
import_script = FEISHU_SCRIPTS / "doc-import.sh"
|
|
if not import_script.exists():
|
|
print(f"❌ 找不到 feishu-docs skill: {import_script}", file=sys.stderr)
|
|
return ""
|
|
|
|
print("📤 导出到飞书文档...")
|
|
|
|
# 写入临时文件
|
|
tmp_file = Path("/tmp/xhs_report.md")
|
|
tmp_file.write_text(content, encoding="utf-8")
|
|
|
|
result = subprocess.run(
|
|
[str(import_script), title, "--file", str(tmp_file)],
|
|
capture_output=True, text=True, timeout=60
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
print(f"⚠️ 飞书导出失败: {result.stderr}", file=sys.stderr)
|
|
return ""
|
|
|
|
# 解析返回的文档链接
|
|
output = result.stdout
|
|
print(output)
|
|
return output
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="小红书热点跟踪工具")
|
|
parser.add_argument("keyword", help="要跟踪的话题/关键词")
|
|
parser.add_argument("--limit", "-n", type=int, default=10, help="获取帖子数量 (默认 10)")
|
|
parser.add_argument("--feishu", "-f", action="store_true", help="导出到飞书文档")
|
|
parser.add_argument("--output", "-o", help="输出 Markdown 文件路径")
|
|
parser.add_argument("--no-comments", action="store_true", help="不获取评论")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# 1. 搜索帖子
|
|
feeds = search_feeds(args.keyword)
|
|
if not feeds:
|
|
print("❌ 未找到相关帖子")
|
|
sys.exit(1)
|
|
|
|
print(f"✅ 找到 {len(feeds)} 条帖子")
|
|
|
|
# 2. 获取详情
|
|
posts = []
|
|
for i, feed in enumerate(feeds[:args.limit]):
|
|
feed_id = feed.get("id")
|
|
xsec_token = feed.get("xsecToken")
|
|
title = feed.get("noteCard", {}).get("displayTitle", "")
|
|
|
|
print(f"📖 [{i+1}/{min(len(feeds), args.limit)}] 获取: {title[:30]}...")
|
|
|
|
detail = get_feed_detail(feed_id, xsec_token, not args.no_comments)
|
|
if detail:
|
|
posts.append(detail)
|
|
|
|
if not posts:
|
|
print("❌ 未能获取帖子详情")
|
|
sys.exit(1)
|
|
|
|
print(f"✅ 成功获取 {len(posts)} 篇帖子详情")
|
|
|
|
# 3. 生成报告
|
|
print("📝 生成报告...")
|
|
report = generate_report(args.keyword, posts)
|
|
|
|
# 4. 输出
|
|
if args.output:
|
|
output_path = Path(args.output)
|
|
output_path.write_text(report, encoding="utf-8")
|
|
print(f"✅ 报告已保存: {output_path}")
|
|
|
|
if args.feishu:
|
|
doc_title = f"小红书热点跟踪: {args.keyword} ({datetime.now().strftime('%m-%d')})"
|
|
export_to_feishu(doc_title, report)
|
|
|
|
if not args.output and not args.feishu:
|
|
# 默认输出到 stdout
|
|
print("\n" + "="*60 + "\n")
|
|
print(report)
|
|
|
|
return report
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|