feat: 更新一堆 ai 初始化以及 skill

This commit is contained in:
richarjiang
2026-04-15 09:40:15 +08:00
commit 67b2f7f2ac
37 changed files with 3121 additions and 0 deletions

View File

@@ -0,0 +1,98 @@
# XHS-Downloader 辅助工具
配合 [XHS-Downloader](https://github.com/JoeanAmier/XHS-Downloader) 使用的工具脚本,用于下载小红书收藏/点赞笔记并导出为 OpenClaw 记忆库格式。
## 依赖
需要先安装 XHS-Downloader
```bash
git clone https://github.com/JoeanAmier/XHS-Downloader.git
cd XHS-Downloader
pip install -r requirements.txt
```
## 使用流程
### 1. 获取收藏/点赞链接(使用油猴脚本)
手动复制链接效率低,推荐使用 XHS-Downloader 提供的油猴脚本批量提取:
**安装脚本:**
1. 安装 [Tampermonkey](https://www.tampermonkey.net/) 浏览器扩展
2. 安装用户脚本:[XHS-Downloader.js](https://raw.githubusercontent.com/JoeanAmier/XHS-Downloader/refs/heads/master/static/XHS-Downloader.js)
**提取链接:**
1. 打开 [小红书网页版](https://www.xiaohongshu.com) 并登录
2. 进入个人主页 → **收藏****点赞** 页面
3. 点击 Tampermonkey 图标,选择:
- `提取收藏作品链接`
- `提取点赞作品链接`
4. 脚本会自动滚动页面加载全部内容
5. 提取完成后链接自动复制到剪贴板
**保存链接:**
将剪贴板内容粘贴到 `links.md` 文件:
```
https://www.xiaohongshu.com/explore/xxx?xsec_token=...
https://www.xiaohongshu.com/explore/yyy?xsec_token=...
```
> **注意**:自动滚动功能默认关闭,需在脚本设置中手动开启。开启后可能触发风控,建议适度使用。
### 2. 批量下载
```bash
# 在 XHS-Downloader 目录下运行
python batch_download.py links.md
```
下载的数据会保存到 `Volume/Download/ExploreData.db`
### 3. 导出记忆库
**方式 A导出为单文件**
```bash
python export_memory.py
# 生成 xhs_memory.md
```
**方式 B导出为多文件推荐用于 OpenClaw**
```bash
python export_to_workspace.py
# 生成到 ~/.openclaw/workspace/xhs-memory/
```
### 4. 配置 OpenClaw 记忆搜索
编辑 `~/.openclaw/openclaw.json`,添加:
```json
{
"memorySearch": {
"extraPaths": [
"~/.openclaw/workspace/xhs-memory"
]
}
}
```
或者如果使用单文件导出,将 `xhs_memory.md` 放到 workspace 目录下。
## 脚本说明
| 脚本 | 功能 |
|------|------|
| `batch_download.py` | 批量下载笔记并记录到数据库 |
| `export_memory.py` | 导出为单个 Markdown 文件 |
| `export_to_workspace.py` | 导出为多个独立文件(按日期+标题命名) |
## 致谢
- [XHS-Downloader](https://github.com/JoeanAmier/XHS-Downloader) - GPL-3.0 License

View File

@@ -0,0 +1,48 @@
#!/usr/bin/env python
"""
批量下载小红书笔记
用法:
python batch_download.py [links_file]
默认读取当前目录的 links.md 文件
"""
import asyncio
import sys
from pathlib import Path
try:
from source import XHS
except ImportError:
print("错误: 请在 XHS-Downloader 项目目录下运行此脚本")
print("或安装依赖: pip install -e /path/to/XHS-Downloader")
sys.exit(1)
async def main():
# 读取链接文件
links_file = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("links.md")
if not links_file.exists():
print(f"错误: 链接文件不存在: {links_file}")
print("用法: python batch_download.py [links_file]")
sys.exit(1)
links = links_file.read_text().strip()
link_count = len([l for l in links.split() if l.startswith("http")])
print(f"开始下载,共 {link_count} 个链接...")
async with XHS(
work_path="./Volume",
folder_name="Download",
record_data=True, # 记录作品数据到数据库
download_record=True, # 跳过已下载
author_archive=True, # 按作者分文件夹
) as xhs:
result = await xhs.extract(links, download=True)
print(f"完成!处理了 {len(result)} 个作品")
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,67 @@
#!/usr/bin/env python
"""
从 XHS-Downloader 数据库导出笔记到单个 Markdown 文件
用法:
python export_memory.py [db_path] [output_file]
默认:
db_path: Volume/Download/ExploreData.db
output_file: xhs_memory.md
"""
import sqlite3
import sys
from pathlib import Path
from datetime import datetime
def export_memory(db_path: Path = None, output_file: Path = None):
db_path = db_path or Path("Volume/Download/ExploreData.db")
output_file = output_file or Path("xhs_memory.md")
if not db_path.exists():
print(f"错误: 数据库不存在: {db_path}")
return False
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# 查询所有作品
cursor.execute("""
SELECT 作品标题, 发布时间, 作品链接, 作品描述, 作者昵称, 作品标签
FROM explore_data
ORDER BY 发布时间 DESC
""")
rows = cursor.fetchall()
conn.close()
if not rows:
print("数据库为空")
return False
# 生成 Markdown
output = f"# 小红书收藏/点赞笔记 Memory\n\n"
output += f"> 导出时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
output += f"> 共 {len(rows)} 条笔记\n\n---\n\n"
for i, (title, time, link, desc, author, tags) in enumerate(rows, 1):
output += f"## {i}. {title or '无标题'}\n\n"
output += f"- **作者**: {author or '未知'}\n"
output += f"- **时间**: {time or '未知'}\n"
output += f"- **链接**: {link or ''}\n"
if tags:
output += f"- **标签**: {tags}\n"
output += f"\n### 内容\n\n{desc or '无内容'}\n\n---\n\n"
# 保存文件
output_file.write_text(output, encoding="utf-8")
print(f"导出完成: {output_file.absolute()}")
print(f"{len(rows)} 条笔记")
return True
if __name__ == "__main__":
db_path = Path(sys.argv[1]) if len(sys.argv) > 1 else None
output_file = Path(sys.argv[2]) if len(sys.argv) > 2 else None
export_memory(db_path, output_file)

View File

@@ -0,0 +1,101 @@
#!/usr/bin/env python
"""
将小红书笔记按单独文件导出到 OpenClaw workspace
用法:
python export_to_workspace.py [db_path] [output_dir]
默认:
db_path: Volume/Download/ExploreData.db
output_dir: ~/.openclaw/workspace/xhs-memory
导出格式类似 gpt-history每条笔记一个文件文件名格式: YYYY-MM-标题.md
"""
import sqlite3
import re
import sys
from pathlib import Path
def sanitize_filename(name: str, max_len: int = 50) -> str:
"""清理文件名,移除非法字符"""
name = re.sub(r'[<>:"/\\|?*\n\r\t]', '', name)
name = re.sub(r'\s+', '-', name.strip())
name = re.sub(r'-+', '-', name)
name = name.strip('-')
if len(name) > max_len:
name = name[:max_len].rstrip('-')
return name or "无标题"
def export_to_workspace(db_path: Path = None, output_dir: Path = None):
db_path = db_path or Path("Volume/Download/ExploreData.db")
output_dir = output_dir or Path.home() / ".openclaw/workspace/xhs-memory"
output_dir.mkdir(parents=True, exist_ok=True)
if not db_path.exists():
print(f"错误: 数据库不存在: {db_path}")
return False
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute("""
SELECT 作品标题, 发布时间, 作品链接, 作品描述, 作者昵称, 作品标签
FROM explore_data
ORDER BY 发布时间 DESC
""")
rows = cursor.fetchall()
conn.close()
if not rows:
print("数据库为空")
return False
count = 0
for title, time, link, desc, author, tags in rows:
# 解析时间: 2026-01-25_18:17:43 -> 2026-01
if time:
date_prefix = time[:7] # YYYY-MM
full_date = time.replace('_', ' ')
else:
date_prefix = "unknown"
full_date = "未知"
# 生成文件名
safe_title = sanitize_filename(title or "无标题")
filename = f"{date_prefix}-{safe_title}.md"
filepath = output_dir / filename
# 避免重复文件名
counter = 1
while filepath.exists():
filename = f"{date_prefix}-{safe_title}-{counter}.md"
filepath = output_dir / filename
counter += 1
# 生成内容
content = f"# {title or '无标题'}\n\n"
content += f"**来源**: 小红书收藏/点赞\n\n"
content += f"**日期**: {full_date}\n\n"
content += f"**作者**: {author or '未知'}\n\n"
content += f"**链接**: {link or ''}\n\n"
if tags:
content += f"**标签**: {tags}\n\n"
content += "---\n\n"
content += "## 内容\n\n"
content += f"{desc or '无内容'}\n"
filepath.write_text(content, encoding="utf-8")
count += 1
print(f"导出完成: {output_dir}")
print(f"共生成 {count} 个文件")
return True
if __name__ == "__main__":
db_path = Path(sys.argv[1]) if len(sys.argv) > 1 else None
output_dir = Path(sys.argv[2]) if len(sys.argv) > 2 else None
export_to_workspace(db_path, output_dir)