feat: 更新一堆 ai 初始化以及 skill
This commit is contained in:
98
.agents/skills/xiaohongshu/tools/xhs-downloader/README.md
Normal file
98
.agents/skills/xiaohongshu/tools/xhs-downloader/README.md
Normal file
@@ -0,0 +1,98 @@
|
||||
# XHS-Downloader 辅助工具
|
||||
|
||||
配合 [XHS-Downloader](https://github.com/JoeanAmier/XHS-Downloader) 使用的工具脚本,用于下载小红书收藏/点赞笔记并导出为 OpenClaw 记忆库格式。
|
||||
|
||||
## 依赖
|
||||
|
||||
需要先安装 XHS-Downloader:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/JoeanAmier/XHS-Downloader.git
|
||||
cd XHS-Downloader
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## 使用流程
|
||||
|
||||
### 1. 获取收藏/点赞链接(使用油猴脚本)
|
||||
|
||||
手动复制链接效率低,推荐使用 XHS-Downloader 提供的油猴脚本批量提取:
|
||||
|
||||
**安装脚本:**
|
||||
|
||||
1. 安装 [Tampermonkey](https://www.tampermonkey.net/) 浏览器扩展
|
||||
2. 安装用户脚本:[XHS-Downloader.js](https://raw.githubusercontent.com/JoeanAmier/XHS-Downloader/refs/heads/master/static/XHS-Downloader.js)
|
||||
|
||||
**提取链接:**
|
||||
|
||||
1. 打开 [小红书网页版](https://www.xiaohongshu.com) 并登录
|
||||
2. 进入个人主页 → **收藏** 或 **点赞** 页面
|
||||
3. 点击 Tampermonkey 图标,选择:
|
||||
- `提取收藏作品链接`
|
||||
- `提取点赞作品链接`
|
||||
4. 脚本会自动滚动页面加载全部内容
|
||||
5. 提取完成后链接自动复制到剪贴板
|
||||
|
||||
**保存链接:**
|
||||
|
||||
将剪贴板内容粘贴到 `links.md` 文件:
|
||||
|
||||
```
|
||||
https://www.xiaohongshu.com/explore/xxx?xsec_token=...
|
||||
https://www.xiaohongshu.com/explore/yyy?xsec_token=...
|
||||
```
|
||||
|
||||
> **注意**:自动滚动功能默认关闭,需在脚本设置中手动开启。开启后可能触发风控,建议适度使用。
|
||||
|
||||
### 2. 批量下载
|
||||
|
||||
```bash
|
||||
# 在 XHS-Downloader 目录下运行
|
||||
python batch_download.py links.md
|
||||
```
|
||||
|
||||
下载的数据会保存到 `Volume/Download/ExploreData.db`。
|
||||
|
||||
### 3. 导出记忆库
|
||||
|
||||
**方式 A:导出为单文件**
|
||||
|
||||
```bash
|
||||
python export_memory.py
|
||||
# 生成 xhs_memory.md
|
||||
```
|
||||
|
||||
**方式 B:导出为多文件(推荐用于 OpenClaw)**
|
||||
|
||||
```bash
|
||||
python export_to_workspace.py
|
||||
# 生成到 ~/.openclaw/workspace/xhs-memory/
|
||||
```
|
||||
|
||||
### 4. 配置 OpenClaw 记忆搜索
|
||||
|
||||
编辑 `~/.openclaw/openclaw.json`,添加:
|
||||
|
||||
```json
|
||||
{
|
||||
"memorySearch": {
|
||||
"extraPaths": [
|
||||
"~/.openclaw/workspace/xhs-memory"
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
或者如果使用单文件导出,将 `xhs_memory.md` 放到 workspace 目录下。
|
||||
|
||||
## 脚本说明
|
||||
|
||||
| 脚本 | 功能 |
|
||||
|------|------|
|
||||
| `batch_download.py` | 批量下载笔记并记录到数据库 |
|
||||
| `export_memory.py` | 导出为单个 Markdown 文件 |
|
||||
| `export_to_workspace.py` | 导出为多个独立文件(按日期+标题命名) |
|
||||
|
||||
## 致谢
|
||||
|
||||
- [XHS-Downloader](https://github.com/JoeanAmier/XHS-Downloader) - GPL-3.0 License
|
||||
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
批量下载小红书笔记
|
||||
|
||||
用法:
|
||||
python batch_download.py [links_file]
|
||||
|
||||
默认读取当前目录的 links.md 文件
|
||||
"""
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from source import XHS
|
||||
except ImportError:
|
||||
print("错误: 请在 XHS-Downloader 项目目录下运行此脚本")
|
||||
print("或安装依赖: pip install -e /path/to/XHS-Downloader")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
async def main():
|
||||
# 读取链接文件
|
||||
links_file = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("links.md")
|
||||
|
||||
if not links_file.exists():
|
||||
print(f"错误: 链接文件不存在: {links_file}")
|
||||
print("用法: python batch_download.py [links_file]")
|
||||
sys.exit(1)
|
||||
|
||||
links = links_file.read_text().strip()
|
||||
link_count = len([l for l in links.split() if l.startswith("http")])
|
||||
|
||||
print(f"开始下载,共 {link_count} 个链接...")
|
||||
|
||||
async with XHS(
|
||||
work_path="./Volume",
|
||||
folder_name="Download",
|
||||
record_data=True, # 记录作品数据到数据库
|
||||
download_record=True, # 跳过已下载
|
||||
author_archive=True, # 按作者分文件夹
|
||||
) as xhs:
|
||||
result = await xhs.extract(links, download=True)
|
||||
print(f"完成!处理了 {len(result)} 个作品")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
从 XHS-Downloader 数据库导出笔记到单个 Markdown 文件
|
||||
|
||||
用法:
|
||||
python export_memory.py [db_path] [output_file]
|
||||
|
||||
默认:
|
||||
db_path: Volume/Download/ExploreData.db
|
||||
output_file: xhs_memory.md
|
||||
"""
|
||||
import sqlite3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def export_memory(db_path: Path = None, output_file: Path = None):
|
||||
db_path = db_path or Path("Volume/Download/ExploreData.db")
|
||||
output_file = output_file or Path("xhs_memory.md")
|
||||
|
||||
if not db_path.exists():
|
||||
print(f"错误: 数据库不存在: {db_path}")
|
||||
return False
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 查询所有作品
|
||||
cursor.execute("""
|
||||
SELECT 作品标题, 发布时间, 作品链接, 作品描述, 作者昵称, 作品标签
|
||||
FROM explore_data
|
||||
ORDER BY 发布时间 DESC
|
||||
""")
|
||||
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
if not rows:
|
||||
print("数据库为空")
|
||||
return False
|
||||
|
||||
# 生成 Markdown
|
||||
output = f"# 小红书收藏/点赞笔记 Memory\n\n"
|
||||
output += f"> 导出时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
|
||||
output += f"> 共 {len(rows)} 条笔记\n\n---\n\n"
|
||||
|
||||
for i, (title, time, link, desc, author, tags) in enumerate(rows, 1):
|
||||
output += f"## {i}. {title or '无标题'}\n\n"
|
||||
output += f"- **作者**: {author or '未知'}\n"
|
||||
output += f"- **时间**: {time or '未知'}\n"
|
||||
output += f"- **链接**: {link or '无'}\n"
|
||||
if tags:
|
||||
output += f"- **标签**: {tags}\n"
|
||||
output += f"\n### 内容\n\n{desc or '无内容'}\n\n---\n\n"
|
||||
|
||||
# 保存文件
|
||||
output_file.write_text(output, encoding="utf-8")
|
||||
print(f"导出完成: {output_file.absolute()}")
|
||||
print(f"共 {len(rows)} 条笔记")
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
db_path = Path(sys.argv[1]) if len(sys.argv) > 1 else None
|
||||
output_file = Path(sys.argv[2]) if len(sys.argv) > 2 else None
|
||||
export_memory(db_path, output_file)
|
||||
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
将小红书笔记按单独文件导出到 OpenClaw workspace
|
||||
|
||||
用法:
|
||||
python export_to_workspace.py [db_path] [output_dir]
|
||||
|
||||
默认:
|
||||
db_path: Volume/Download/ExploreData.db
|
||||
output_dir: ~/.openclaw/workspace/xhs-memory
|
||||
|
||||
导出格式类似 gpt-history,每条笔记一个文件,文件名格式: YYYY-MM-标题.md
|
||||
"""
|
||||
import sqlite3
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def sanitize_filename(name: str, max_len: int = 50) -> str:
|
||||
"""清理文件名,移除非法字符"""
|
||||
name = re.sub(r'[<>:"/\\|?*\n\r\t]', '', name)
|
||||
name = re.sub(r'\s+', '-', name.strip())
|
||||
name = re.sub(r'-+', '-', name)
|
||||
name = name.strip('-')
|
||||
if len(name) > max_len:
|
||||
name = name[:max_len].rstrip('-')
|
||||
return name or "无标题"
|
||||
|
||||
|
||||
def export_to_workspace(db_path: Path = None, output_dir: Path = None):
|
||||
db_path = db_path or Path("Volume/Download/ExploreData.db")
|
||||
output_dir = output_dir or Path.home() / ".openclaw/workspace/xhs-memory"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not db_path.exists():
|
||||
print(f"错误: 数据库不存在: {db_path}")
|
||||
return False
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
SELECT 作品标题, 发布时间, 作品链接, 作品描述, 作者昵称, 作品标签
|
||||
FROM explore_data
|
||||
ORDER BY 发布时间 DESC
|
||||
""")
|
||||
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
if not rows:
|
||||
print("数据库为空")
|
||||
return False
|
||||
|
||||
count = 0
|
||||
for title, time, link, desc, author, tags in rows:
|
||||
# 解析时间: 2026-01-25_18:17:43 -> 2026-01
|
||||
if time:
|
||||
date_prefix = time[:7] # YYYY-MM
|
||||
full_date = time.replace('_', ' ')
|
||||
else:
|
||||
date_prefix = "unknown"
|
||||
full_date = "未知"
|
||||
|
||||
# 生成文件名
|
||||
safe_title = sanitize_filename(title or "无标题")
|
||||
filename = f"{date_prefix}-{safe_title}.md"
|
||||
filepath = output_dir / filename
|
||||
|
||||
# 避免重复文件名
|
||||
counter = 1
|
||||
while filepath.exists():
|
||||
filename = f"{date_prefix}-{safe_title}-{counter}.md"
|
||||
filepath = output_dir / filename
|
||||
counter += 1
|
||||
|
||||
# 生成内容
|
||||
content = f"# {title or '无标题'}\n\n"
|
||||
content += f"**来源**: 小红书收藏/点赞\n\n"
|
||||
content += f"**日期**: {full_date}\n\n"
|
||||
content += f"**作者**: {author or '未知'}\n\n"
|
||||
content += f"**链接**: {link or '无'}\n\n"
|
||||
if tags:
|
||||
content += f"**标签**: {tags}\n\n"
|
||||
content += "---\n\n"
|
||||
content += "## 内容\n\n"
|
||||
content += f"{desc or '无内容'}\n"
|
||||
|
||||
filepath.write_text(content, encoding="utf-8")
|
||||
count += 1
|
||||
|
||||
print(f"导出完成: {output_dir}")
|
||||
print(f"共生成 {count} 个文件")
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
db_path = Path(sys.argv[1]) if len(sys.argv) > 1 else None
|
||||
output_dir = Path(sys.argv[2]) if len(sys.argv) > 2 else None
|
||||
export_to_workspace(db_path, output_dir)
|
||||
Reference in New Issue
Block a user