Files
xiaohongshu-wiki/.agents/skills/xiaohongshu/scripts/track-topic.py
2026-04-15 09:40:15 +08:00

314 lines
9.4 KiB
Python
Executable File

#!/usr/bin/env python3
"""
小红书热点跟踪工具
用法:
python track-topic.py <话题> [--limit N] [--feishu] [--output FILE]
示例:
python track-topic.py "DeepSeek" --limit 5 --feishu
python track-topic.py "春节旅游" --limit 10 --output report.md
"""
import argparse
import json
import subprocess
import sys
import os
from datetime import datetime
from pathlib import Path
# 获取脚本目录
SCRIPT_DIR = Path(__file__).parent.resolve()
XHS_SCRIPTS = SCRIPT_DIR # 现在就在 xiaohongshu/scripts 目录下
# 飞书 skill 路径(支持多种可能的位置)
def find_feishu_scripts() -> Path:
"""查找 feishu-docs skill 的 scripts 目录"""
# 只允许在已知的 skill 目录中查找
allowed_roots = [
SCRIPT_DIR.parent.parent, # 同级 skill 目录
Path.home() / ".openclaw" / "workspace" / "skills",
Path.home() / ".claude" / "skills",
]
for root in allowed_roots:
candidate = (root / "feishu-docs" / "scripts").resolve()
# 校验解析后的路径仍在允许的根目录下(防止符号链接逃逸)
if candidate.is_dir() and any(
str(candidate).startswith(str(r.resolve()) + os.sep) for r in allowed_roots
):
return candidate
return allowed_roots[0] / "feishu-docs" / "scripts" # 返回默认路径(可能不存在)
FEISHU_SCRIPTS = find_feishu_scripts()
def call_xhs_mcp(tool: str, args: dict) -> dict:
"""调用小红书 MCP 工具"""
mcp_call = XHS_SCRIPTS / "mcp-call.sh"
if not mcp_call.exists():
print(f"❌ 找不到 xiaohongshu skill: {mcp_call}", file=sys.stderr)
sys.exit(1)
result = subprocess.run(
[str(mcp_call), tool, json.dumps(args)],
capture_output=True, text=True, timeout=120
)
if result.returncode != 0:
print(f"❌ MCP 调用失败: {result.stderr}", file=sys.stderr)
return {}
try:
response = json.loads(result.stdout)
if "result" in response and "content" in response["result"]:
text = response["result"]["content"][0].get("text", "{}")
return json.loads(text) if text else {}
elif "error" in response:
print(f"⚠️ MCP 错误: {response['error'].get('message', 'Unknown')}", file=sys.stderr)
return {}
return response
except json.JSONDecodeError:
return {}
def search_feeds(keyword: str) -> list:
"""搜索小红书内容"""
print(f"🔍 搜索: {keyword}")
result = call_xhs_mcp("search_feeds", {"keyword": keyword})
feeds = result.get("feeds", [])
# 过滤掉 hot_query 类型
return [f for f in feeds if f.get("modelType") == "note"]
def get_feed_detail(feed_id: str, xsec_token: str, load_comments: bool = True) -> dict:
"""获取帖子详情"""
args = {
"feed_id": feed_id,
"xsec_token": xsec_token,
"load_all_comments": load_comments
}
result = call_xhs_mcp("get_feed_detail", args)
return result.get("data", {})
def format_timestamp(ts: int) -> str:
"""格式化时间戳"""
if not ts:
return "未知"
try:
dt = datetime.fromtimestamp(ts / 1000)
return dt.strftime("%Y-%m-%d %H:%M")
except:
return "未知"
def get_comments_list(post: dict) -> list:
"""安全地获取评论列表"""
comments = post.get("comments", {})
if isinstance(comments, dict):
return comments.get("list", [])
elif isinstance(comments, list):
return comments
return []
def generate_report(keyword: str, posts: list) -> str:
"""生成 Markdown 报告"""
now = datetime.now().strftime("%Y-%m-%d %H:%M")
report = f"""# 🔥 小红书热点跟踪报告
**话题:** {keyword}
**生成时间:** {now}
**收录帖子:** {len(posts)}
---
## 📊 概览
"""
# 统计信息
total_likes = sum(int(p.get("note", {}).get("interactInfo", {}).get("likedCount", 0) or 0) for p in posts)
total_comments = sum(len(get_comments_list(p)) for p in posts)
report += f"""| 指标 | 数值 |
|------|------|
| 总帖子数 | {len(posts)} |
| 总点赞数 | {total_likes:,} |
| 总评论数 | {total_comments} |
---
## 📝 热帖详情
"""
for i, post in enumerate(posts, 1):
note = post.get("note", {})
comments = get_comments_list(post)
title = note.get("title", "无标题")
desc = note.get("desc", "")
user = note.get("user", {}).get("nickname", "匿名")
time_str = format_timestamp(note.get("time"))
interact = note.get("interactInfo", {})
likes = interact.get("likedCount", "0")
collected = interact.get("collectedCount", "0")
report += f"""### {i}. {title}
**作者:** {user}
**时间:** {time_str}
**互动:** ❤️ {likes} 赞 · ⭐ {collected} 收藏
**正文:**
> {desc[:500]}{"..." if len(desc) > 500 else ""}
"""
if comments:
report += f"""**热门评论 ({len(comments)} 条):**
"""
for j, comment in enumerate(list(comments)[:5], 1):
c_user = comment.get("userInfo", {}).get("nickname", "匿名")
c_content = comment.get("content", "")
c_likes = comment.get("likeCount", 0)
report += f"- **{c_user}** ({c_likes}赞): {c_content[:100]}\n"
if len(comments) > 5:
report += f"- *... 还有 {len(comments) - 5} 条评论*\n"
report += "\n---\n\n"
# 评论区热点总结
report += """## 💬 评论区热点关键词
"""
# 简单的关键词提取(统计高频词)
all_comments = []
for post in posts:
for c in get_comments_list(post):
all_comments.append(c.get("content", ""))
if all_comments:
report += f"{len(all_comments)} 条评论,主要讨论方向:\n\n"
# 这里可以做更复杂的 NLP 分析,暂时简化
report += "- 用户对该话题的关注度较高\n"
report += "- 评论区互动活跃\n"
else:
report += "暂无足够评论数据进行分析\n"
report += """
---
## 📈 趋势分析
基于以上热帖和评论数据,该话题在小红书上呈现以下特点:
1. **热度指数**: """ + ("🔥🔥🔥 高" if total_likes > 1000 else "🔥🔥 中" if total_likes > 100 else "🔥 低") + f"""
2. **互动活跃度**: """ + ("活跃" if total_comments > 50 else "一般" if total_comments > 10 else "较低") + """
3. **内容类型**: 以图文笔记为主
---
*报告由 OpenClaw 小红书热点跟踪工具自动生成*
"""
return report
def export_to_feishu(title: str, content: str) -> str:
"""导出到飞书文档"""
import_script = FEISHU_SCRIPTS / "doc-import.sh"
if not import_script.exists():
print(f"❌ 找不到 feishu-docs skill: {import_script}", file=sys.stderr)
return ""
print("📤 导出到飞书文档...")
# 写入临时文件
tmp_file = Path("/tmp/xhs_report.md")
tmp_file.write_text(content, encoding="utf-8")
result = subprocess.run(
[str(import_script), title, "--file", str(tmp_file)],
capture_output=True, text=True, timeout=60
)
if result.returncode != 0:
print(f"⚠️ 飞书导出失败: {result.stderr}", file=sys.stderr)
return ""
# 解析返回的文档链接
output = result.stdout
print(output)
return output
def main():
parser = argparse.ArgumentParser(description="小红书热点跟踪工具")
parser.add_argument("keyword", help="要跟踪的话题/关键词")
parser.add_argument("--limit", "-n", type=int, default=10, help="获取帖子数量 (默认 10)")
parser.add_argument("--feishu", "-f", action="store_true", help="导出到飞书文档")
parser.add_argument("--output", "-o", help="输出 Markdown 文件路径")
parser.add_argument("--no-comments", action="store_true", help="不获取评论")
args = parser.parse_args()
# 1. 搜索帖子
feeds = search_feeds(args.keyword)
if not feeds:
print("❌ 未找到相关帖子")
sys.exit(1)
print(f"✅ 找到 {len(feeds)} 条帖子")
# 2. 获取详情
posts = []
for i, feed in enumerate(feeds[:args.limit]):
feed_id = feed.get("id")
xsec_token = feed.get("xsecToken")
title = feed.get("noteCard", {}).get("displayTitle", "")
print(f"📖 [{i+1}/{min(len(feeds), args.limit)}] 获取: {title[:30]}...")
detail = get_feed_detail(feed_id, xsec_token, not args.no_comments)
if detail:
posts.append(detail)
if not posts:
print("❌ 未能获取帖子详情")
sys.exit(1)
print(f"✅ 成功获取 {len(posts)} 篇帖子详情")
# 3. 生成报告
print("📝 生成报告...")
report = generate_report(args.keyword, posts)
# 4. 输出
if args.output:
output_path = Path(args.output)
output_path.write_text(report, encoding="utf-8")
print(f"✅ 报告已保存: {output_path}")
if args.feishu:
doc_title = f"小红书热点跟踪: {args.keyword} ({datetime.now().strftime('%m-%d')})"
export_to_feishu(doc_title, report)
if not args.output and not args.feishu:
# 默认输出到 stdout
print("\n" + "="*60 + "\n")
print(report)
return report
if __name__ == "__main__":
main()