Data_Source/api.py


								"""

								抖音搜索 FastAPI 接口

								提供搜索抖音视频的 HTTP API

								"""


								from fastapi import FastAPI, HTTPException

								from pydantic import BaseModel, Field

								from typing import Optional, List

								import asyncio

								import sys

								import os

								from pathlib import Path

								import json

								from datetime import datetime

								from playwright.async_api import async_playwright

								from dotenv import load_dotenv


								# 加载环境变量

								load_dotenv()


								# 添加 douyin_data_soupce 到路径

								sys.path.append(str(Path(__file__).parent / "douyin_data_soupce"))


								from douyin_data_soupce.douyin_search_crawler import DouyinSearchCrawler

								from ai_analyzer import AIAnalyzer

								from ai_agent import create_agent


								app = FastAPI(title="抖音数据API", description="提供抖音视频搜索、创作指导、AI分析和智能代理功能")


								class SearchRequest(BaseModel):

								    keyword: str = Field(..., description="搜索关键词", min_length=1)

								    max_scroll: int = Field(default=5, description="最大滚动次数", ge=1, le=20)

								    headless: bool = Field(default=True, description="是否使用无头模式")

								    cookie_file: str = Field(default="douyin_data_soupce/douyin_cookie.json", description="Cookie文件路径")


								class VideoInfo(BaseModel):

								    url: str

								    title: str

								    author: str

								    publishTime: str

								    duration: str

								    playCount: str

								    tags: List[str]


								class SearchResponse(BaseModel):

								    success: bool

								    keyword: str

								    total_count: int

								    videos: List[VideoInfo]

								    message: Optional[str] = None


								class CreativeGuidanceRequest(BaseModel):

								    category: str = Field(default="全部", description="分类标签（全部/美食/旅行/泛生活/汽车/科技/游戏/二次元）")

								    headless: bool = Field(default=True, description="是否使用无头模式")

								    cookie_file: str = Field(default="douyin_data_soupce/douyin_cookie.json", description="Cookie文件路径")

								    output_dir: str = Field(default="douyin_data_soupce/douyin_data", description="输出目录")


								class CreativeVideoInfo(BaseModel):

								    index: int

								    author: str

								    description: str

								    authorLink: str

								    duration: Optional[str] = None

								    hot: Optional[str] = None

								    plays: Optional[str] = None

								    likes: Optional[str] = None

								    comments: Optional[str] = None

								    hotWords: List[str] = []

								    hashTags: List[str] = []


								class CreativeGuidanceResponse(BaseModel):

								    success: bool

								    category: str

								    total_count: int

								    videos: List[CreativeVideoInfo]

								    page_url: str

								    crawl_time: str

								    message: Optional[str] = None


								class AnalyzeRequest(BaseModel):

								    videos: List[dict] = Field(..., description="视频数据列表")

								    prompt_file: str = Field(default="prompts/analyze_prompt.md", description="提示词文件路径")

								    custom_instruction: Optional[str] = Field(None, description="自定义分析指令")

								    model: str = Field(default="qwen-plus", description="使用的模型名称")

								    api_key: Optional[str] = Field(None, description="阿里云百炼API Key（可选，默认从环境变量读取）")


								class AnalyzeFileRequest(BaseModel):

								    json_file: str = Field(..., description="JSON数据文件路径")

								    prompt_file: str = Field(default="prompts/analyze_prompt.md", description="提示词文件路径")

								    custom_instruction: Optional[str] = Field(None, description="自定义分析指令")

								    model: str = Field(default="qwen-plus", description="使用的模型名称")

								    api_key: Optional[str] = Field(None, description="阿里云百炼API Key（可选，默认从环境变量读取）")


								class AnalyzeResponse(BaseModel):

								    success: bool

								    analysis: Optional[str] = None

								    model: Optional[str] = None

								    video_count: int

								    usage: Optional[dict] = None

								    error: Optional[str] = None


								class AgentRequest(BaseModel):

								    query: str = Field(..., description="用户查询", min_length=1)

								    system_prompt_file: str = Field(default="prompts/agent_prompt.md", description="系统提示词文件路径")

								    max_iterations: int = Field(default=10, description="最大迭代次数", ge=1, le=20)

								    model: str = Field(default="qwen-plus", description="使用的模型名称")

								    api_key: Optional[str] = Field(None, description="阿里云百炼API Key（可选）")


								class AgentResponse(BaseModel):

								    success: bool

								    final_answer: Optional[str] = None

								    iteration: int

								    tool_calls: List[dict] = []

								    error: Optional[str] = None


								@app.get("/")

								async def root():

								    """根路径"""

								    return {

								        "message": "抖音数据API",

								        "docs": "/docs",

								        "endpoints": {

								            "search": "/api/search",

								            "creative_guidance": "/api/creative-guidance",

								            "analyze": "/api/analyze",

								            "analyze_file": "/api/analyze-file",

								            "agent": "/api/agent"

								        }

								    }


								async def load_cookies_for_creative(cookie_file: str):

								    """加载Cookie文件"""

								    try:

								        with open(cookie_file, 'r', encoding='utf-8') as f:

								            cookies = json.load(f)

								        return cookies

								    except Exception as e:

								        return None


								async def extract_creative_video_data(page):

								    """从创作指导页面中提取视频数据"""

								    data = await page.evaluate("""

								        () => {

								            const videos = [];

								            const authorLinks = Array.from(document.querySelectorAll('a[href*="iesdouyin.com/share/user/"]'));


								            authorLinks.forEach((authorLink, index) => {

								                try {

								                    const author = authorLink.textContent.trim();

								                    let container = authorLink.parentElement;

								                    let maxLevels = 10;


								                    while (container && maxLevels > 0) {

								                        if (container.querySelector('.contain-info-LpWGHS')) {

								                            break;

								                        }

								                        container = container.parentElement;

								                        maxLevels--;

								                    }


								                    if (!container) return;


								                    const paragraphs = Array.from(container.querySelectorAll('p'));

								                    let description = '';

								                    for (let p of paragraphs) {

								                        const text = p.textContent.trim();

								                        if (text && text !== '|' && text.length > 5 && !text.includes('万') && !text.includes(':')) {

								                            description = text;

								                            break;

								                        }

								                    }


								                    let hot = '', plays = '', likes = '', comments = '';

								                    const infoContainer = container.querySelector('.contain-info-LpWGHS');

								                    if (infoContainer) {

								                        const infoItems = infoContainer.querySelectorAll('.each-info-TpmTI0');

								                        infoItems.forEach(item => {

								                            const img = item.querySelector('img');

								                            const text = item.textContent.trim();

								                            if (img && img.src) {

								                                if (img.src.includes('hot_')) {

								                                    hot = text;

								                                } else if (img.src.includes('play')) {

								                                    plays = text;

								                                } else if (img.src.includes('digg')) {

								                                    likes = text;

								                                } else if (img.src.includes('comment')) {

								                                    comments = text;

								                                }

								                            }

								                        });

								                    }


								                    const hotWords = [];

								                    const hotWordElements = container.querySelectorAll('.other-text-XeleRf');

								                    hotWordElements.forEach((el, i) => {

								                        const text = el.textContent.trim();

								                        if (i === 0 && text.includes('热词')) {

								                        } else if (text && !text.includes('热词')) {

								                            hotWords.push(text);

								                        }

								                    });


								                    const hashTags = description.match(/#[^\\s#]+/g) || [];


								                    let duration = '';

								                    const timeElements = container.querySelectorAll('.time-text-mask-WmpK85 p');

								                    if (timeElements.length > 0) {

								                        duration = timeElements[0].textContent.trim();

								                    }


								                    if (author && description) {

								                        videos.push({

								                            index: index + 1,

								                            author: author,

								                            description: description,

								                            authorLink: authorLink.href,

								                            duration: duration,

								                            hot: hot,

								                            plays: plays,

								                            likes: likes,

								                            comments: comments,

								                            hotWords: hotWords,

								                            hashTags: hashTags

								                        });

								                    }

								                } catch (e) {

								                }

								            });


								            return {

								                total: videos.length,

								                videos: videos,

								                crawlTime: new Date().toISOString(),

								                pageTitle: document.title,

								                pageUrl: window.location.href

								            };

								        }

								    """)

								    return data


								async def crawl_creative_guidance_api(category: str, headless: bool, cookie_file: str, output_dir: str):

								    """抓取抖音创作指导页面 - API版本"""

								    url = "https://creator.douyin.com/creator-micro/creative-guidance"


								    async with async_playwright() as p:

								        browser = await p.chromium.launch(headless=headless)


								        context_options = {}

								        if Path(cookie_file).exists():

								            cookies = await load_cookies_for_creative(cookie_file)

								            if cookies:

								                context_options['storage_state'] = {'cookies': cookies}


								        context = await browser.new_context(**context_options)

								        page = await context.new_page()


								        try:

								            await page.goto(url, wait_until="domcontentloaded", timeout=60000)

								            await asyncio.sleep(10)


								            if category and category != "全部":

								                # 先展开所有分类

								                await page.evaluate("""

								                    () => {

								                        const showButtons = document.querySelectorAll('.show-button-sDo51G');

								                        showButtons.forEach(btn => {

								                            const text = btn.textContent.trim();

								                            // 如果按钮不是"收起"，说明需要展开

								                            if (!text.includes('收起')) {

								                                btn.click();

								                            }

								                        });

								                    }

								                """)

								                await asyncio.sleep(1)


								                # 查找并点击分类标签

								                category_clicked = await page.evaluate(f"""

								                    () => {{

								                        const categoryDivs = Array.from(document.querySelectorAll('.each-kind-MR__DN'));

								                        const targetDiv = categoryDivs.find(div =>

								                            div.textContent.trim() === '{category}'

								                        );

								                        if (targetDiv) {{

								                            targetDiv.click();

								                            return true;

								                        }}

								                        return false;

								                    }}

								                """)


								                if category_clicked:

								                    await asyncio.sleep(8)


								            data = await extract_creative_video_data(page)


								            if data['total'] > 0:

								                # 保存到文件

								                output_path = Path(output_dir)

								                output_path.mkdir(parents=True, exist_ok=True)


								                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

								                category_suffix = f"_{category}" if category else ""

								                json_file = output_path / f"douyin_creative_guidance{category_suffix}_{timestamp}.json"


								                result_data = {

								                    'page_url': url,

								                    'page_title': '抖音创作指导 - 热门视频',

								                    'category': category or '全部',

								                    'platform': 'douyin',

								                    'crawl_time': datetime.now().isoformat(),

								                    'total_videos': len(data['videos']),

								                    'videos': data['videos']

								                }


								                with open(json_file, "w", encoding="utf-8") as f:

								                    json.dump(result_data, f, ensure_ascii=False, indent=2)


								            return data


								        finally:

								            await browser.close()


								@app.post("/api/creative-guidance", response_model=CreativeGuidanceResponse)

								async def get_creative_guidance(request: CreativeGuidanceRequest):

								    """

								    获取抖音创作指导数据


								    - **category**: 分类标签（全部/美食/旅行/泛生活/汽车/科技/游戏/二次元），默认"全部"

								    - **headless**: 是否使用无头模式，默认True

								    - **cookie_file**: Cookie文件路径

								    - **output_dir**: 输出目录

								    """

								    try:

								        # 检查Cookie文件

								        if not Path(request.cookie_file).exists():

								            raise HTTPException(

								                status_code=400,

								                detail=f"Cookie文件不存在: {request.cookie_file}"

								            )


								        # 抓取数据

								        data = await crawl_creative_guidance_api(

								            category=request.category,

								            headless=request.headless,

								            cookie_file=request.cookie_file,

								            output_dir=request.output_dir

								        )


								        if not data or data['total'] == 0:

								            return CreativeGuidanceResponse(

								                success=False,

								                category=request.category,

								                total_count=0,

								                videos=[],

								                page_url="https://creator.douyin.com/creator-micro/creative-guidance",

								                crawl_time=datetime.now().isoformat(),

								                message="未获取到任何视频数据，请确保已登录并且页面加载完成"

								            )


								        return CreativeGuidanceResponse(

								            success=True,

								            category=request.category,

								            total_count=data['total'],

								            videos=data['videos'],

								            page_url=data['pageUrl'],

								            crawl_time=data['crawlTime'],

								            message=f"成功获取 {data['total']} 个视频"

								        )


								    except Exception as e:

								        raise HTTPException(status_code=500, detail=f"获取创作指导数据时出错: {str(e)}")


								@app.post("/api/search", response_model=SearchResponse)

								async def search_douyin(request: SearchRequest):

								    """

								    搜索抖音视频


								    - **keyword**: 搜索关键词（必填）

								    - **max_scroll**: 最大滚动次数，默认5次

								    - **headless**: 是否使用无头模式，默认True

								    - **cookie_file**: Cookie文件路径

								    """

								    crawler = DouyinSearchCrawler(headless=request.headless)


								    try:

								        # 初始化浏览器

								        await crawler.init_browser()


								        # 加载cookies

								        cookie_loaded = await crawler.load_cookies(request.cookie_file)

								        if not cookie_loaded:

								            raise HTTPException(

								                status_code=400,

								                detail=f"无法加载Cookie文件: {request.cookie_file}"

								            )


								        # 搜索视频

								        videos = await crawler.search_videos(request.keyword, max_scroll=request.max_scroll)


								        if not videos:

								            return SearchResponse(

								                success=False,

								                keyword=request.keyword,

								                total_count=0,

								                videos=[],

								                message="未获取到任何视频数据"

								            )


								        # 保存结果

								        await crawler.save_results(request.keyword, videos)


								        return SearchResponse(

								            success=True,

								            keyword=request.keyword,

								            total_count=len(videos),

								            videos=videos,

								            message="搜索成功"

								        )


								    except Exception as e:

								        raise HTTPException(status_code=500, detail=f"搜索过程中出错: {str(e)}")


								    finally:

								        await crawler.close()


								@app.post("/api/analyze", response_model=AnalyzeResponse)

								async def analyze_videos(request: AnalyzeRequest):

								    """

								    使用AI分析视频数据


								    - **videos**: 视频数据列表（必填）

								    - **prompt_file**: 提示词文件路径，默认"prompts/analyze_prompt.md"

								    - **custom_instruction**: 自定义分析指令（可选）

								    - **model**: 使用的模型名称，默认"qwen-plus"

								    - **api_key**: 阿里云百炼API Key（可选，默认从环境变量DASHSCOPE_API_KEY读取）

								    """

								    try:

								        # 检查提示词文件

								        if not Path(request.prompt_file).exists():

								            raise HTTPException(

								                status_code=400,

								                detail=f"提示词文件不存在: {request.prompt_file}"

								            )


								        # 创建分析器

								        try:

								            analyzer = AIAnalyzer(api_key=request.api_key, model=request.model)

								        except ValueError as e:

								            raise HTTPException(

								                status_code=400,

								                detail=str(e)

								            )


								        # 分析数据

								        result = analyzer.analyze(

								            videos=request.videos,

								            prompt_file=request.prompt_file,

								            custom_instruction=request.custom_instruction

								        )


								        return AnalyzeResponse(**result)


								    except HTTPException:

								        raise

								    except Exception as e:

								        raise HTTPException(status_code=500, detail=f"分析过程中出错: {str(e)}")


								@app.post("/api/analyze-file", response_model=AnalyzeResponse)

								async def analyze_from_file(request: AnalyzeFileRequest):

								    """

								    从JSON文件读取数据并使用AI分析


								    - **json_file**: JSON数据文件路径（必填）

								    - **prompt_file**: 提示词文件路径，默认"prompts/analyze_prompt.md"

								    - **custom_instruction**: 自定义分析指令（可选）

								    - **model**: 使用的模型名称，默认"qwen-plus"

								    - **api_key**: 阿里云百炼API Key（可选，默认从环境变量DASHSCOPE_API_KEY读取）

								    """

								    try:

								        # 检查文件

								        if not Path(request.json_file).exists():

								            raise HTTPException(

								                status_code=400,

								                detail=f"JSON文件不存在: {request.json_file}"

								            )


								        if not Path(request.prompt_file).exists():

								            raise HTTPException(

								                status_code=400,

								                detail=f"提示词文件不存在: {request.prompt_file}"

								            )


								        # 创建分析器

								        try:

								            analyzer = AIAnalyzer(api_key=request.api_key, model=request.model)

								        except ValueError as e:

								            raise HTTPException(

								                status_code=400,

								                detail=str(e)

								            )


								        # 分析数据

								        result = analyzer.analyze_from_file(

								            json_file=request.json_file,

								            prompt_file=request.prompt_file,

								            custom_instruction=request.custom_instruction

								        )


								        return AnalyzeResponse(**result)


								    except HTTPException:

								        raise

								    except Exception as e:

								        raise HTTPException(status_code=500, detail=f"分析过程中出错: {str(e)}")


								@app.post("/api/agent", response_model=AgentResponse)

								async def run_agent(request: AgentRequest):

								    """

								    运行AI智能代理 - 创作灵感生成


								    AI代理会根据用户描述自动：

								    1. 理解用户需求并提取内容分类

								    2. 获取相关的热门视频数据

								    3. 生成9个具体可执行的创作灵感

								    4. 如果用户不满意，可以获取更多灵感


								    **支持的内容分类**（共26个）：

								    - 美食、旅行、泛生活、汽车、科技、游戏、二次元

								    - 娱乐、明星、体育、文化教育、校园、政务

								    - 时尚、才艺、随拍、动植物、图文控

								    - 剧情、亲子、三农、创意、户外、公益


								    **参数**:

								    - **query**: 用户描述（必填），例如：

								      - "我想做一些美食相关的短视频，主要是家常菜的制作教程"

								      - "我想拍一些关于大学生活的有趣视频，记录校园日常"

								      - "我想做一些关于健身的短视频，分享简单的居家锻炼方法"

								    - **system_prompt_file**: 系统提示词文件，默认"prompts/agent_prompt.md"

								    - **max_iterations**: 最大迭代次数，默认10（建议设置为15以支持完整流程）

								    - **model**: 使用的模型名称，默认"qwen-plus"

								    - **api_key**: 阿里云百炼API Key（可选）


								    **返回**:

								    - 识别的内容分类

								    - 9个创作灵感（包含标题、核心创意、执行建议、热门标签）

								    - 创作提示和建议


								    **示例**:

								    ```json

								    {

								      "query": "我想做一些美食相关的短视频，主要是家常菜的制作教程",

								      "max_iterations": 15

								    }

								    ```

								    """

								    try:

								        # 检查提示词文件

								        if not Path(request.system_prompt_file).exists():

								            raise HTTPException(

								                status_code=400,

								                detail=f"系统提示词文件不存在: {request.system_prompt_file}"

								            )


								        # 创建代理

								        try:

								            agent = create_agent()

								            if request.api_key:

								                agent.api_key = request.api_key

								                import dashscope

								                dashscope.api_key = request.api_key

								            agent.model = request.model

								        except ValueError as e:

								            raise HTTPException(

								                status_code=400,

								                detail=str(e)

								            )


								        # 运行代理

								        result = await agent.run(

								            user_input=request.query,

								            system_prompt_file=request.system_prompt_file,

								            max_iterations=request.max_iterations

								        )


								        return AgentResponse(

								            success=result["success"],

								            final_answer=result.get("final_answer"),

								            iteration=result["iteration"],

								            tool_calls=result.get("tool_calls", []),

								            error=result.get("error")

								        )


								    except HTTPException:

								        raise

								    except Exception as e:

								        raise HTTPException(status_code=500, detail=f"Agent执行过程中出错: {str(e)}")


								if __name__ == "__main__":

								    import uvicorn

								    uvicorn.run(app, host="0.0.0.0", port=8001)