"""
抖音搜索 FastAPI 接口
提供搜索抖音视频的 HTTP API
"""

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
from typing import Optional, List
import asyncio
import sys
import os
from pathlib import Path
import json
from datetime import datetime
from playwright.async_api import async_playwright
from dotenv import load_dotenv

# 加载环境变量
load_dotenv()

# 添加 douyin_data_soupce 到路径
sys.path.append(str(Path(__file__).parent / "douyin_data_soupce"))

from douyin_data_soupce.douyin_search_crawler import DouyinSearchCrawler
from ai_analyzer import AIAnalyzer
from ai_agent import create_agent

app = FastAPI(title="抖音数据API", description="提供抖音视频搜索、创作指导、AI分析和智能代理功能")


class SearchRequest(BaseModel):
    keyword: str = Field(..., description="搜索关键词", min_length=1)
    max_scroll: int = Field(default=5, description="最大滚动次数", ge=1, le=20)
    headless: bool = Field(default=True, description="是否使用无头模式")
    cookie_file: str = Field(default="douyin_data_soupce/douyin_cookie.json", description="Cookie文件路径")


class VideoInfo(BaseModel):
    url: str
    title: str
    author: str
    publishTime: str
    duration: str
    playCount: str
    tags: List[str]


class SearchResponse(BaseModel):
    success: bool
    keyword: str
    total_count: int
    videos: List[VideoInfo]
    message: Optional[str] = None


class CreativeGuidanceRequest(BaseModel):
    category: str = Field(default="全部", description="分类标签（全部/美食/旅行/泛生活/汽车/科技/游戏/二次元）")
    headless: bool = Field(default=True, description="是否使用无头模式")
    cookie_file: str = Field(default="douyin_data_soupce/douyin_cookie.json", description="Cookie文件路径")
    output_dir: str = Field(default="douyin_data_soupce/douyin_data", description="输出目录")


class CreativeVideoInfo(BaseModel):
    index: int
    author: str
    description: str
    authorLink: str
    duration: Optional[str] = None
    hot: Optional[str] = None
    plays: Optional[str] = None
    likes: Optional[str] = None
    comments: Optional[str] = None
    hotWords: List[str] = []
    hashTags: List[str] = []


class CreativeGuidanceResponse(BaseModel):
    success: bool
    category: str
    total_count: int
    videos: List[CreativeVideoInfo]
    page_url: str
    crawl_time: str
    message: Optional[str] = None


class AnalyzeRequest(BaseModel):
    videos: List[dict] = Field(..., description="视频数据列表")
    prompt_file: str = Field(default="prompts/analyze_prompt.md", description="提示词文件路径")
    custom_instruction: Optional[str] = Field(None, description="自定义分析指令")
    model: str = Field(default="qwen-plus", description="使用的模型名称")
    api_key: Optional[str] = Field(None, description="阿里云百炼API Key（可选，默认从环境变量读取）")


class AnalyzeFileRequest(BaseModel):
    json_file: str = Field(..., description="JSON数据文件路径")
    prompt_file: str = Field(default="prompts/analyze_prompt.md", description="提示词文件路径")
    custom_instruction: Optional[str] = Field(None, description="自定义分析指令")
    model: str = Field(default="qwen-plus", description="使用的模型名称")
    api_key: Optional[str] = Field(None, description="阿里云百炼API Key（可选，默认从环境变量读取）")


class AnalyzeResponse(BaseModel):
    success: bool
    analysis: Optional[str] = None
    model: Optional[str] = None
    video_count: int
    usage: Optional[dict] = None
    error: Optional[str] = None


class AgentRequest(BaseModel):
    query: str = Field(..., description="用户查询", min_length=1)
    system_prompt_file: str = Field(default="prompts/agent_prompt.md", description="系统提示词文件路径")
    max_iterations: int = Field(default=10, description="最大迭代次数", ge=1, le=20)
    model: str = Field(default="qwen-plus", description="使用的模型名称")
    api_key: Optional[str] = Field(None, description="阿里云百炼API Key（可选）")


class AgentResponse(BaseModel):
    success: bool
    final_answer: Optional[str] = None
    iteration: int
    tool_calls: List[dict] = []
    error: Optional[str] = None


class ExtractKeywordsRequest(BaseModel):
    query: str = Field(..., description="用户输入的文字", min_length=1)


class ExtractKeywordsResponse(BaseModel):
    success: bool
    categories: List[str] = []
    primary_category: Optional[str] = None
    original_query: str
    method: Optional[str] = None
    error: Optional[str] = None


@app.get("/")
async def root():
    """根路径"""
    return {
        "message": "抖音数据API",
        "docs": "/docs",
        "endpoints": {
            "search": "/api/search",
            "creative_guidance": "/api/creative-guidance",
            "analyze": "/api/analyze",
            "analyze_file": "/api/analyze-file",
            "agent": "/api/agent"
        }
    }


async def load_cookies_for_creative(cookie_file: str):
    """加载Cookie文件"""
    try:
        with open(cookie_file, 'r', encoding='utf-8') as f:
            cookies = json.load(f)
        return cookies
    except Exception as e:
        return None


async def extract_creative_video_data(page):
    """从创作指导页面中提取视频数据"""
    data = await page.evaluate("""
        () => {
            const videos = [];
            const authorLinks = Array.from(document.querySelectorAll('a[href*="iesdouyin.com/share/user/"]'));
            
            authorLinks.forEach((authorLink, index) => {
                try {
                    const author = authorLink.textContent.trim();
                    let container = authorLink.parentElement;
                    let maxLevels = 10;
                    
                    while (container && maxLevels > 0) {
                        if (container.querySelector('.contain-info-LpWGHS')) {
                            break;
                        }
                        container = container.parentElement;
                        maxLevels--;
                    }
                    
                    if (!container) return;
                    
                    const paragraphs = Array.from(container.querySelectorAll('p'));
                    let description = '';
                    for (let p of paragraphs) {
                        const text = p.textContent.trim();
                        if (text && text !== '|' && text.length > 5 && !text.includes('万') && !text.includes(':')) {
                            description = text;
                            break;
                        }
                    }
                    
                    let hot = '', plays = '', likes = '', comments = '';
                    const infoContainer = container.querySelector('.contain-info-LpWGHS');
                    if (infoContainer) {
                        const infoItems = infoContainer.querySelectorAll('.each-info-TpmTI0');
                        infoItems.forEach(item => {
                            const img = item.querySelector('img');
                            const text = item.textContent.trim();
                            if (img && img.src) {
                                if (img.src.includes('hot_')) {
                                    hot = text;
                                } else if (img.src.includes('play')) {
                                    plays = text;
                                } else if (img.src.includes('digg')) {
                                    likes = text;
                                } else if (img.src.includes('comment')) {
                                    comments = text;
                                }
                            }
                        });
                    }
                    
                    const hotWords = [];
                    const hotWordElements = container.querySelectorAll('.other-text-XeleRf');
                    hotWordElements.forEach((el, i) => {
                        const text = el.textContent.trim();
                        if (i === 0 && text.includes('热词')) {
                        } else if (text && !text.includes('热词')) {
                            hotWords.push(text);
                        }
                    });
                    
                    const hashTags = description.match(/#[^\\s#]+/g) || [];
                    
                    let duration = '';
                    const timeElements = container.querySelectorAll('.time-text-mask-WmpK85 p');
                    if (timeElements.length > 0) {
                        duration = timeElements[0].textContent.trim();
                    }
                    
                    if (author && description) {
                        videos.push({
                            index: index + 1,
                            author: author,
                            description: description,
                            authorLink: authorLink.href,
                            duration: duration,
                            hot: hot,
                            plays: plays,
                            likes: likes,
                            comments: comments,
                            hotWords: hotWords,
                            hashTags: hashTags
                        });
                    }
                } catch (e) {
                }
            });
            
            return {
                total: videos.length,
                videos: videos,
                crawlTime: new Date().toISOString(),
                pageTitle: document.title,
                pageUrl: window.location.href
            };
        }
    """)
    return data


async def crawl_creative_guidance_api(category: str, headless: bool, cookie_file: str, output_dir: str):
    """抓取抖音创作指导页面 - API版本"""
    url = "https://creator.douyin.com/creator-micro/creative-guidance"
    
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=headless)
        
        context_options = {}
        if Path(cookie_file).exists():
            cookies = await load_cookies_for_creative(cookie_file)
            if cookies:
                context_options['storage_state'] = {'cookies': cookies}
        
        context = await browser.new_context(**context_options)
        page = await context.new_page()
        
        try:
            await page.goto(url, wait_until="domcontentloaded", timeout=60000)
            await asyncio.sleep(10)
            
            if category and category != "全部":
                # 先展开所有分类
                await page.evaluate("""
                    () => {
                        const showButtons = document.querySelectorAll('.show-button-sDo51G');
                        showButtons.forEach(btn => {
                            const text = btn.textContent.trim();
                            // 如果按钮不是"收起"，说明需要展开
                            if (!text.includes('收起')) {
                                btn.click();
                            }
                        });
                    }
                """)
                await asyncio.sleep(1)
                
                # 查找并点击分类标签
                category_clicked = await page.evaluate(f"""
                    () => {{
                        const categoryDivs = Array.from(document.querySelectorAll('.each-kind-MR__DN'));
                        const targetDiv = categoryDivs.find(div => 
                            div.textContent.trim() === '{category}'
                        );
                        if (targetDiv) {{
                            targetDiv.click();
                            return true;
                        }}
                        return false;
                    }}
                """)
                
                if category_clicked:
                    await asyncio.sleep(8)
            
            data = await extract_creative_video_data(page)
            
            if data['total'] > 0:
                # 保存到文件
                output_path = Path(output_dir)
                output_path.mkdir(parents=True, exist_ok=True)
                
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                category_suffix = f"_{category}" if category else ""
                json_file = output_path / f"douyin_creative_guidance{category_suffix}_{timestamp}.json"
                
                result_data = {
                    'page_url': url,
                    'page_title': '抖音创作指导 - 热门视频',
                    'category': category or '全部',
                    'platform': 'douyin',
                    'crawl_time': datetime.now().isoformat(),
                    'total_videos': len(data['videos']),
                    'videos': data['videos']
                }
                
                with open(json_file, "w", encoding="utf-8") as f:
                    json.dump(result_data, f, ensure_ascii=False, indent=2)
            
            return data
            
        finally:
            await browser.close()


@app.post("/api/creative-guidance", response_model=CreativeGuidanceResponse)
async def get_creative_guidance(request: CreativeGuidanceRequest):
    """
    获取抖音创作指导数据
    
    - **category**: 分类标签（全部/美食/旅行/泛生活/汽车/科技/游戏/二次元），默认"全部"
    - **headless**: 是否使用无头模式，默认True
    - **cookie_file**: Cookie文件路径
    - **output_dir**: 输出目录
    """
    try:
        # 检查Cookie文件
        if not Path(request.cookie_file).exists():
            raise HTTPException(
                status_code=400,
                detail=f"Cookie文件不存在: {request.cookie_file}"
            )
        
        # 抓取数据
        data = await crawl_creative_guidance_api(
            category=request.category,
            headless=request.headless,
            cookie_file=request.cookie_file,
            output_dir=request.output_dir
        )
        
        if not data or data['total'] == 0:
            return CreativeGuidanceResponse(
                success=False,
                category=request.category,
                total_count=0,
                videos=[],
                page_url="https://creator.douyin.com/creator-micro/creative-guidance",
                crawl_time=datetime.now().isoformat(),
                message="未获取到任何视频数据，请确保已登录并且页面加载完成"
            )
        
        return CreativeGuidanceResponse(
            success=True,
            category=request.category,
            total_count=data['total'],
            videos=data['videos'],
            page_url=data['pageUrl'],
            crawl_time=data['crawlTime'],
            message=f"成功获取 {data['total']} 个视频"
        )
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"获取创作指导数据时出错: {str(e)}")


@app.post("/api/search", response_model=SearchResponse)
async def search_douyin(request: SearchRequest):
    """
    搜索抖音视频
    
    - **keyword**: 搜索关键词（必填）
    - **max_scroll**: 最大滚动次数，默认5次
    - **headless**: 是否使用无头模式，默认True
    - **cookie_file**: Cookie文件路径
    """
    crawler = DouyinSearchCrawler(headless=request.headless)
    
    try:
        # 初始化浏览器
        await crawler.init_browser()
        
        # 加载cookies
        cookie_loaded = await crawler.load_cookies(request.cookie_file)
        if not cookie_loaded:
            raise HTTPException(
                status_code=400,
                detail=f"无法加载Cookie文件: {request.cookie_file}"
            )
        
        # 搜索视频
        videos = await crawler.search_videos(request.keyword, max_scroll=request.max_scroll)
        
        if not videos:
            return SearchResponse(
                success=False,
                keyword=request.keyword,
                total_count=0,
                videos=[],
                message="未获取到任何视频数据"
            )
        
        # 保存结果
        await crawler.save_results(request.keyword, videos)
        
        return SearchResponse(
            success=True,
            keyword=request.keyword,
            total_count=len(videos),
            videos=videos,
            message="搜索成功"
        )
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"搜索过程中出错: {str(e)}")
    
    finally:
        await crawler.close()


@app.post("/api/analyze", response_model=AnalyzeResponse)
async def analyze_videos(request: AnalyzeRequest):
    """
    使用AI分析视频数据
    
    - **videos**: 视频数据列表（必填）
    - **prompt_file**: 提示词文件路径，默认"prompts/analyze_prompt.md"
    - **custom_instruction**: 自定义分析指令（可选）
    - **model**: 使用的模型名称，默认"qwen-plus"
    - **api_key**: 阿里云百炼API Key（可选，默认从环境变量DASHSCOPE_API_KEY读取）
    """
    try:
        # 检查提示词文件
        if not Path(request.prompt_file).exists():
            raise HTTPException(
                status_code=400,
                detail=f"提示词文件不存在: {request.prompt_file}"
            )
        
        # 创建分析器
        try:
            analyzer = AIAnalyzer(api_key=request.api_key, model=request.model)
        except ValueError as e:
            raise HTTPException(
                status_code=400,
                detail=str(e)
            )
        
        # 分析数据
        result = analyzer.analyze(
            videos=request.videos,
            prompt_file=request.prompt_file,
            custom_instruction=request.custom_instruction
        )
        
        return AnalyzeResponse(**result)
        
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"分析过程中出错: {str(e)}")


@app.post("/api/analyze-file", response_model=AnalyzeResponse)
async def analyze_from_file(request: AnalyzeFileRequest):
    """
    从JSON文件读取数据并使用AI分析
    
    - **json_file**: JSON数据文件路径（必填）
    - **prompt_file**: 提示词文件路径，默认"prompts/analyze_prompt.md"
    - **custom_instruction**: 自定义分析指令（可选）
    - **model**: 使用的模型名称，默认"qwen-plus"
    - **api_key**: 阿里云百炼API Key（可选，默认从环境变量DASHSCOPE_API_KEY读取）
    """
    try:
        # 检查文件
        if not Path(request.json_file).exists():
            raise HTTPException(
                status_code=400,
                detail=f"JSON文件不存在: {request.json_file}"
            )
        
        if not Path(request.prompt_file).exists():
            raise HTTPException(
                status_code=400,
                detail=f"提示词文件不存在: {request.prompt_file}"
            )
        
        # 创建分析器
        try:
            analyzer = AIAnalyzer(api_key=request.api_key, model=request.model)
        except ValueError as e:
            raise HTTPException(
                status_code=400,
                detail=str(e)
            )
        
        # 分析数据
        result = analyzer.analyze_from_file(
            json_file=request.json_file,
            prompt_file=request.prompt_file,
            custom_instruction=request.custom_instruction
        )
        
        return AnalyzeResponse(**result)
        
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"分析过程中出错: {str(e)}")


@app.post("/api/agent", response_model=AgentResponse)
async def run_agent(request: AgentRequest):
    """
    运行AI智能代理 - 创作灵感生成
    
    AI代理会根据用户描述自动：
    1. 理解用户需求并提取内容分类
    2. 获取相关的热门视频数据
    3. 生成9个具体可执行的创作灵感
    4. 如果用户不满意，可以获取更多灵感
    
    **支持的内容分类**（共26个）：
    - 美食、旅行、泛生活、汽车、科技、游戏、二次元
    - 娱乐、明星、体育、文化教育、校园、政务
    - 时尚、才艺、随拍、动植物、图文控
    - 剧情、亲子、三农、创意、户外、公益
    
    **参数**:
    - **query**: 用户描述（必填），例如：
      - "我想做一些美食相关的短视频，主要是家常菜的制作教程"
      - "我想拍一些关于大学生活的有趣视频，记录校园日常"
      - "我想做一些关于健身的短视频，分享简单的居家锻炼方法"
    - **system_prompt_file**: 系统提示词文件，默认"prompts/agent_prompt.md"
    - **max_iterations**: 最大迭代次数，默认10（建议设置为15以支持完整流程）
    - **model**: 使用的模型名称，默认"qwen-plus"
    - **api_key**: 阿里云百炼API Key（可选）
    
    **返回**:
    - 识别的内容分类
    - 9个创作灵感（包含标题、核心创意、执行建议、热门标签）
    - 创作提示和建议
    
    **示例**:
    ```json
    {
      "query": "我想做一些美食相关的短视频，主要是家常菜的制作教程",
      "max_iterations": 15
    }
    ```
    """
    try:
        # 检查提示词文件
        if not Path(request.system_prompt_file).exists():
            raise HTTPException(
                status_code=400,
                detail=f"系统提示词文件不存在: {request.system_prompt_file}"
            )
        
        # 创建代理
        try:
            agent = create_agent()
            if request.api_key:
                agent.api_key = request.api_key
                import dashscope
                dashscope.api_key = request.api_key
            agent.model = request.model
        except ValueError as e:
            raise HTTPException(
                status_code=400,
                detail=str(e)
            )
        
        # 运行代理
        result = await agent.run(
            user_input=request.query,
            system_prompt_file=request.system_prompt_file,
            max_iterations=request.max_iterations
        )
        
        return AgentResponse(
            success=result["success"],
            final_answer=result.get("final_answer"),
            iteration=result["iteration"],
            tool_calls=result.get("tool_calls", []),
            error=result.get("error")
        )
        
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Agent执行过程中出错: {str(e)}")


@app.post("/api/extract-keywords", response_model=ExtractKeywordsResponse)
async def extract_keywords(request: ExtractKeywordsRequest):
    """
    提取关键词 - 从用户输入中提取1-3个内容分类关键词
    
    这个接口用于在用户输入后，先提取关键词展示给用户确认，
    然后用户可以选择关键词后再调用 /api/agent 接口生成灵感。
    
    **工作流程**:
    1. 用户输入一段描述
    2. 调用此接口提取关键词
    3. 前端展示关键词给用户
    4. 用户确认后，前端调用 /api/agent 接口生成灵感
    
    **支持的内容分类**（共26个）：
    - 美食、旅行、泛生活、汽车、科技、游戏、二次元
    - 娱乐、明星、体育、文化教育、校园、政务
    - 时尚、才艺、随拍、动植物、图文控
    - 剧情、亲子、三农、创意、户外、公益
    
    **参数**:
    - **query**: 用户输入的文字（必填）
    
    **返回**:
    - **success**: 是否成功
    - **categories**: 提取的关键词列表（1-3个，可能为空）
    - **primary_category**: 主要关键词（第一个，可能为null）
    - **original_query**: 用户原始输入
    - **method**: 提取方法
    - **error**: 错误信息（如果有）
    
    **示例请求**:
    ```json
    {
      "query": "我想做一些关于大学生活的有趣视频，记录校园日常"
    }
    ```
    
    **示例返回**:
    ```json
    {
      "success": true,
      "categories": ["校园", "青春"],
      "primary_category": "校园",
      "original_query": "我想做一些关于大学生活的有趣视频，记录校园日常",
      "method": "ai_classification"
    }
    ```
    """
    try:
        from ai_agent import extract_search_keywords
        
        # 调用关键词提取函数
        result = extract_search_keywords(request.query)
        
        return ExtractKeywordsResponse(
            success=result.get("success", True),
            categories=result.get("categories", []),
            primary_category=result.get("primary_category"),
            original_query=result.get("original_query", request.query),
            method=result.get("method"),
            error=None
        )
        
    except Exception as e:
        return ExtractKeywordsResponse(
            success=False,
            categories=[],
            primary_category=None,
            original_query=request.query,
            method="error",
            error=str(e)
        )


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8002)