"""
scholar_api.py  — Free Google Scholar Plagiarism Check API + HTML Report Server
================================================================================
Endpoints:
  POST /check_title        → checks Arabic + English titles against Google Scholar
  POST /save_report        → saves a full HTML report to disk, returns a public URL
  GET  /report/<uuid>      → serves a saved HTML report page
  GET  /health             → health check
"""

from flask import Flask, request, jsonify, send_from_directory, abort, Response
from scholarly import scholarly
import re
import difflib
import sys
import os
import json
import base64
import datetime

app = Flask(__name__)

# ── Config ────────────────────────────────────────────────────────────────────
# Directory where report HTML files are saved (must be writable by this process)
REPORTS_DIR  = "/home/portal/public_html/reports"
# Public base URL of the reports directory (no trailing slash)
PUBLIC_BASE  = "https://eportal.damanhour.edu.eg/reports"

os.makedirs(REPORTS_DIR, exist_ok=True)

# ── Title utilities ───────────────────────────────────────────────────────────
def normalize_title(title):
    if not title:
        return ''
    title = title.lower()
    arabic_diacritics = re.compile(r'[\u064B-\u0652]')
    title = arabic_diacritics.sub('', title)
    title = re.sub(r'[\"\'""\'`()\[\]\\/\-—_.:,;!?|~«»]', ' ', title)
    title = re.sub(r'\s+', ' ', title).strip()
    return title

def calculate_similarity(t1, t2):
    n1 = normalize_title(t1)
    n2 = normalize_title(t2)
    if not n1 or not n2:
        return 0.0
    return difflib.SequenceMatcher(None, n1, n2).ratio() * 100

def check_title(query_title):
    if not query_title or len(query_title.strip()) < 3:
        return 0.0, "لا يوجد", "لا يوجد"
    try:
        search_query = scholarly.search_pubs(query_title)
        best_ratio = 0.0
        best_title = "لا يوجد"
        best_link  = "لا يوجد"
        for i in range(5):
            try:
                pub = next(search_query)
                found_title = pub.get('bib', {}).get('title', '')
                found_link  = pub.get('pub_url', 'لا يوجد')
                ratio = calculate_similarity(query_title, found_title)
                if ratio > best_ratio:
                    best_ratio = ratio
                    best_title = found_title
                    best_link  = found_link
            except StopIteration:
                break
        return best_ratio, best_title, best_link
    except Exception as e:
        print(f"Error checking Scholar: {e}", file=sys.stderr)
        return 0.0, "خطأ في الاتصال بقاعدة البيانات", "لا يوجد"

# ── /check_title ──────────────────────────────────────────────────────────────
@app.route('/check_title', methods=['POST'])
def handle_check():
    data     = request.json or {}
    ar_title = data.get('document_title', '').strip()
    en_title = data.get('data_english_title', '').strip()

    ar_ratio, ar_match, ar_link = check_title(ar_title)
    en_ratio, en_match, en_link = check_title(en_title)

    is_duplicate = (ar_ratio >= 85.0) or (en_ratio >= 85.0)
    decision     = "لا" if is_duplicate else "نعم"

    if is_duplicate:
        eval_comment = (
            f"تم العثور على دراسة سابقة مطابقة بنسبة عالية للعنوان المقترح "
            f"({max(ar_ratio, en_ratio):.1f}%). يوصى بتعديل صياغة الفكرة أو تغيير العنوان لتجنب التكرار."
        )
    else:
        eval_comment = (
            "لم يتم العثور على أي نتائج مطابقة أو مشابهة للعناوين المقدمة في قواعد البيانات العلمية. "
            "العنوان فريد وأصيل."
        )

    report = (
        f"1. فحص العنوان العربي:\n"
        f"- نسبة التشابه: {ar_ratio:.1f}%\n"
        f"- المطابقة: {ar_match}\n"
        f"- رابط المصدر: {ar_link}\n\n"
        f"2. فحص العنوان الإنجليزي:\n"
        f"- نسبة التشابه: {en_ratio:.1f}%\n"
        f"- المطابقة: {en_match}\n"
        f"- رابط المصدر: {en_link}\n\n"
        f"3. القرار للتسجيل الجديد (نعم/لا): {decision}\n\n"
        f"4. تقييم سريع (أصالة الفكرة وملاحظة الصياغة في سطرين فقط).\n"
        f"{eval_comment}"
    )

    return jsonify({"text_report": report})

# ── /save_report ──────────────────────────────────────────────────────────────
@app.route('/save_report', methods=['POST'])
def save_report():
    """
    Receive a full HTML string + uuid from n8n, save it as a file, return URL.

    Expected JSON body:
      {
        "uuid": "7c1e32ec-...",
        "html": "<!DOCTYPE html>..."
      }
    """
    data = request.json or {}
    uuid = data.get('uuid', '').strip()
    html = data.get('html', '').strip()

    if not uuid or not html:
        return jsonify({"error": "Missing uuid or html"}), 400

    # Sanitise uuid to only allow safe filename characters
    safe_uuid = re.sub(r'[^a-zA-Z0-9\-]', '', uuid)[:64]
    filename  = f"report-{safe_uuid}.html"
    filepath  = os.path.join(REPORTS_DIR, filename)

    try:
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(html)
        url = f"{PUBLIC_BASE}/{filename}"
        print(f"[save_report] Saved: {filepath}  →  {url}", file=sys.stderr)
        return jsonify({"url": url, "filename": filename})
    except Exception as e:
        print(f"[save_report] ERROR: {e}", file=sys.stderr)
        return jsonify({"error": str(e)}), 500

# ── /report/<uuid> ────────────────────────────────────────────────────────────
@app.route('/report/<string:safe_uuid>')
def serve_report(safe_uuid):
    """Serve the saved HTML report for a given uuid."""
    safe_uuid = re.sub(r'[^a-zA-Z0-9\-]', '', safe_uuid)[:64]
    filename  = f"report-{safe_uuid}.html"
    filepath  = os.path.join(REPORTS_DIR, filename)
    if not os.path.isfile(filepath):
        abort(404)
    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()
    return Response(content, mimetype='text/html; charset=utf-8')

# ── /health ───────────────────────────────────────────────────────────────────
@app.route('/health')
def health():
    return jsonify({
        "status": "ok",
        "reports_dir": REPORTS_DIR,
        "reports_count": len([f for f in os.listdir(REPORTS_DIR) if f.endswith('.html')])
    })

# ── Main ──────────────────────────────────────────────────────────────────────
if __name__ == '__main__':
    print("Starting Scholar API + HTML Report Server on port 5000 ...")
    print(f"Reports directory : {REPORTS_DIR}")
    print(f"Public base URL   : {PUBLIC_BASE}")
    app.run(host='0.0.0.0', port=5000)
