#!/usr/bin/env python3
"""Publish Codex working-folder documents to the 1Desk document registry."""
from __future__ import annotations

import argparse
import base64
import hashlib
import json
import mimetypes
import os
import re
import sys
import time
import urllib.error
import urllib.parse
import urllib.request
from email.message import Message
from pathlib import Path
from typing import Any

if hasattr(sys.stdout, "reconfigure"):
    sys.stdout.reconfigure(encoding="utf-8")
if hasattr(sys.stderr, "reconfigure"):
    sys.stderr.reconfigure(encoding="utf-8")


DEFAULT_API_BASE = "https://za.a-rial.ru"
DEFAULT_SOURCE_CODE = "codex_docs"
FINAL_EXTENSIONS = {".pdf", ".docx", ".xlsx", ".xls", ".tsv", ".csv", ".txt", ".md", ".json", ".yml", ".yaml"}
CONTACT_METADATA_KEYS = {
    "contact_name",
    "contact_person",
    "contact_company",
    "contact_email",
    "contact_phone",
    "contact_source",
    "client_contact",
    "client_email",
    "client_phone",
    "customer_contact",
    "customer_email",
    "customer_phone",
    "email",
    "phone",
}
SKIP_DIRS = {
    ".git",
    ".agents",
    ".build",
    ".cache",
    ".playwright-cli",
    ".playwright-mcp",
    ".secrets",
    "__pycache__",
    "_docx_render",
    "_pdf_render",
    "_preview",
    "_render",
    "_render_word",
    "cache",
    "logs",
    "node_modules",
    "source",
    "tmp",
}


def queue_dir() -> Path:
    configured = os.environ.get("ONEDESK_DOCUMENTS_QUEUE")
    if configured:
        return Path(configured)
    return Path(os.environ.get("LOCALAPPDATA", str(Path.home()))) / "1desk-codex-document-queue"


def api_base() -> str:
    return os.environ.get("ONEDESK_API_BASE", DEFAULT_API_BASE).rstrip("/")


def api_key() -> str:
    value = os.environ.get("ONEDESK_API_KEY", "").strip()
    if value:
        return value
    key_file = os.environ.get("ONEDESK_API_KEY_FILE", "").strip()
    candidates = []
    if key_file:
        candidates.append(Path(key_file))
    candidates.append(Path(__file__).resolve().parents[1] / "secrets" / "codex-documents-api-key.txt")
    for path in candidates:
        if path.is_file():
            text = path.read_text(encoding="utf-8-sig").strip()
            value = ""
            for line in text.splitlines():
                key, sep, raw_value = line.partition("=")
                if sep and key.strip() == "api_key":
                    value = raw_value.strip()
                    break
            if not value and "\n" not in text:
                value = text
            if value:
                return value
    raise SystemExit("ONEDESK_API_KEY is required")


def source_code() -> str:
    return os.environ.get("ONEDESK_SOURCE_CODE", DEFAULT_SOURCE_CODE).strip() or DEFAULT_SOURCE_CODE


def request_json(path: str, payload: dict[str, Any], timeout: int = 60) -> dict[str, Any]:
    data = json.dumps(payload, ensure_ascii=False, separators=(",", ":"), default=str).encode("utf-8")
    req = urllib.request.Request(
        api_base() + path,
        data=data,
        headers={
            "Authorization": f"Bearer {api_key()}",
            "Content-Type": "application/json; charset=UTF-8",
            "Accept": "application/json",
        },
        method="POST",
    )
    try:
        with urllib.request.urlopen(req, timeout=timeout) as response:
            raw = response.read().decode("utf-8")
            if not raw:
                return {}
            try:
                return json.loads(raw)
            except json.JSONDecodeError as exc:
                preview = raw[:300].replace("\r", " ").replace("\n", " ")
                raise RuntimeError(json.dumps({"ok": False, "error": "invalid_json_response", "preview": preview}, ensure_ascii=False)) from exc
    except urllib.error.HTTPError as exc:
        raw = exc.read().decode("utf-8", errors="replace")
        try:
            body = json.loads(raw)
        except json.JSONDecodeError:
            body = {"ok": False, "error": raw or exc.reason}
        body.setdefault("http_status", exc.code)
        raise RuntimeError(json.dumps(body, ensure_ascii=False))
    except urllib.error.URLError as exc:
        raise ConnectionError(str(exc)) from exc


def request_bytes(path: str, payload: dict[str, Any], timeout: int = 60) -> tuple[dict[str, str], bytes]:
    data = json.dumps(payload, ensure_ascii=False, separators=(",", ":"), default=str).encode("utf-8")
    req = urllib.request.Request(
        api_base() + path,
        data=data,
        headers={
            "Authorization": f"Bearer {api_key()}",
            "Content-Type": "application/json; charset=UTF-8",
            "Accept": "application/octet-stream, application/json",
        },
        method="POST",
    )
    try:
        with urllib.request.urlopen(req, timeout=timeout) as response:
            return dict(response.headers.items()), response.read()
    except urllib.error.HTTPError as exc:
        raw = exc.read().decode("utf-8", errors="replace")
        try:
            body = json.loads(raw)
        except json.JSONDecodeError:
            body = {"ok": False, "error": raw or exc.reason}
        body.setdefault("http_status", exc.code)
        raise RuntimeError(json.dumps(body, ensure_ascii=False))
    except urllib.error.URLError as exc:
        raise ConnectionError(str(exc)) from exc


def parse_simple_yaml(path: Path) -> dict[str, Any]:
    if not path.is_file():
        return {}
    try:
        import yaml  # type: ignore

        data = yaml.safe_load(path.read_text(encoding="utf-8-sig"))
        return data if isinstance(data, dict) else {}
    except Exception:
        pass
    result: dict[str, Any] = {}
    current_key = ""
    for raw_line in path.read_text(encoding="utf-8-sig", errors="replace").splitlines():
        line = raw_line.rstrip()
        if not line.strip() or line.lstrip().startswith("#"):
            continue
        if line.startswith("  - ") and current_key:
            result.setdefault(current_key, []).append(line[4:].strip().strip('"'))
            continue
        match = re.match(r"^([A-Za-z0-9_-]+):\s*(.*)$", line)
        if match:
            current_key = match.group(1)
            value = match.group(2).strip()
            if value == "":
                result[current_key] = []
            else:
                result[current_key] = value.strip('"')
    return result


def infer_document_type(root: Path, explicit: str | None) -> str:
    if explicit:
        return explicit
    text = str(root).lower()
    if "10_codex_kp" in text or "kp" in root.name.lower():
        return "kp"
    if "11_chet" in text or "invoices" in text:
        return "invoice"
    if "12_closet" in text or "closings" in text:
        return "closing"
    raise SystemExit("Cannot infer document type. Pass --type kp|invoice|closing")


def infer_local_id(root: Path, document_type: str, metadata: dict[str, Any], explicit: str | None) -> str:
    if explicit:
        return explicit
    for key in ("kp_id", "invoice_id", "closing_id", "document_id", "local_id"):
        value = str(metadata.get(key, "")).strip()
        if value:
            return value
    return root.name


def infer_document_no(root: Path, document_type: str, metadata: dict[str, Any], local_id: str, explicit: str | None) -> str:
    if explicit:
        return explicit
    for key in ("document_no", "number", "kp_number", "invoice_no", "closing_no"):
        value = str(metadata.get(key, "")).strip()
        if value:
            return value
    if document_type == "kp":
        for item in root.rglob("*"):
            if item.is_file():
                match = re.search(r"КП-\d{8}-\d{2}", item.name)
                if match:
                    return match.group(0)
    return local_id


def read_first_existing(root: Path, names: list[str], limit: int = 3000) -> str:
    for name in names:
        path = root / name
        if path.is_file():
            return path.read_text(encoding="utf-8-sig", errors="replace")[:limit].strip()
    return ""


def infer_document(root: Path, args: argparse.Namespace) -> dict[str, Any]:
    metadata = parse_simple_yaml(root / "metadata.yml")
    document_type = infer_document_type(root, args.type)
    local_id = infer_local_id(root, document_type, metadata, args.local_id)
    document_no = infer_document_no(root, document_type, metadata, local_id, args.document_no)
    title = args.title or str(metadata.get("title", "")).strip() or root.name.replace("_", " ")
    client = args.client or str(metadata.get("client", metadata.get("client_name", ""))).strip()
    object_title = args.object or str(metadata.get("object", metadata.get("object_title", ""))).strip()
    amount = args.amount or str(metadata.get("amount", metadata.get("total_with_vat", ""))).strip()
    status = args.status or str(metadata.get("status", "")).strip() or "published"
    summary = args.summary or read_first_existing(root, ["README.md", "notes.md", "input.md"])
    return {
        "document_type": document_type,
        "local_id": local_id,
        "document_no": document_no,
        "title": title,
        "client_name": client,
        "object_title": object_title,
        "amount_text": amount,
        "status": status,
        "workspace_path": str(root),
        "summary_text": summary,
        "metadata": metadata,
    }


def should_skip(path: Path, root: Path, max_bytes: int) -> bool:
    rel_parts = path.relative_to(root).parts
    if any(part in SKIP_DIRS or part.startswith("_render") or part.startswith("_preview") for part in rel_parts[:-1]):
        return True
    if path.suffix.lower() not in FINAL_EXTENSIONS:
        return True
    if path.name.lower().endswith((".tmp", ".bak")):
        return True
    return path.stat().st_size > max_bytes


def collect_files(root: Path, max_bytes: int) -> list[dict[str, Any]]:
    files: list[dict[str, Any]] = []
    for path in sorted(root.rglob("*")):
        if not path.is_file() or should_skip(path, root, max_bytes):
            continue
        rel = path.relative_to(root).as_posix()
        content = path.read_bytes()
        mime = mimetypes.guess_type(path.name)[0] or "application/octet-stream"
        files.append(
            {
                "group_code": "final",
                "label": rel,
                "relative_path": rel,
                "original_name": path.name,
                "mime": mime,
                "size_bytes": len(content),
                "sha256": hashlib.sha256(content).hexdigest(),
                "content_base64": base64.b64encode(content).decode("ascii"),
            }
        )
    return files


def reserve(document: dict[str, Any]) -> dict[str, Any]:
    payload = {
        "source_code": source_code(),
        "document_type": document["document_type"],
        "document_no": document["document_no"],
        "local_id": document["local_id"],
        "ttl_seconds": 3600,
    }
    return request_json("/api/v1/documents/reserve-number", payload)


def ping(timeout: int = 30) -> dict[str, Any]:
    return request_json("/api/v1/documents/ping", {"source_code": source_code()}, timeout=timeout)


def list_documents(args: argparse.Namespace) -> dict[str, Any]:
    payload: dict[str, Any] = {
        "source_code": source_code(),
        "page": args.page,
        "per_page": args.per_page,
        "include_files": args.include_files,
        "include_events": args.include_events,
    }
    if args.type:
        payload["document_type"] = args.type
    if args.status:
        payload["status"] = args.status
    if args.q:
        payload["q"] = args.q
    if args.date_from:
        payload["date_from"] = args.date_from
    if args.date_to:
        payload["date_to"] = args.date_to
    return request_json("/api/v1/documents/list", payload, timeout=args.timeout)


def document_lookup_payload(root: Path | None, args: argparse.Namespace) -> dict[str, Any]:
    payload: dict[str, Any] = {"source_code": source_code()}
    document_uid = str(getattr(args, "document_uid", "") or "").strip()
    if document_uid:
        payload["document_uid"] = document_uid
        return payload
    if root is None or not root.is_dir():
        raise SystemExit("Pass --document-uid or an existing document folder")
    document = infer_document(root, args)
    payload.update(
        {
            "document_type": document["document_type"],
            "local_id": document["local_id"],
            "document_no": document["document_no"],
        }
    )
    return payload


def get_document(root: Path | None, args: argparse.Namespace) -> dict[str, Any]:
    payload = document_lookup_payload(root, args)
    payload["include_files"] = not args.no_files
    payload["include_events"] = args.include_events
    return request_json("/api/v1/documents/get", payload, timeout=args.timeout)


def filename_from_content_disposition(value: str) -> str:
    if not value:
        return ""
    for part in value.split(";")[1:]:
        key, sep, raw_value = part.strip().partition("=")
        if sep and key.strip().lower() == "filename*":
            raw_value = raw_value.strip().strip('"')
            charset, sep, rest = raw_value.partition("'")
            if sep:
                _, sep, encoded = rest.partition("'")
                if sep:
                    try:
                        filename = urllib.parse.unquote_to_bytes(encoded).decode(charset or "utf-8", errors="replace")
                    except LookupError:
                        filename = urllib.parse.unquote_to_bytes(encoded).decode("utf-8", errors="replace")
                    return filename.replace("\\", "/").split("/")[-1]
    message = Message()
    message["content-disposition"] = value
    filename = message.get_filename() or ""
    return filename.replace("\\", "/").split("/")[-1] if filename else ""


def download_file(root: Path | None, args: argparse.Namespace) -> dict[str, Any]:
    payload = document_lookup_payload(root, args)
    payload["file_id"] = args.file_id
    headers, content = request_bytes("/api/v1/documents/download-file", payload, timeout=args.timeout)
    output = Path(args.output) if args.output else Path(".")
    if output.exists() and output.is_dir():
        filename = filename_from_content_disposition(headers.get("Content-Disposition", "")) or f"onedesk-file-{args.file_id}"
        output = output / filename
    elif str(output).endswith(("/", "\\")):
        output.mkdir(parents=True, exist_ok=True)
        filename = filename_from_content_disposition(headers.get("Content-Disposition", "")) or f"onedesk-file-{args.file_id}"
        output = output / filename
    else:
        output.parent.mkdir(parents=True, exist_ok=True)
    output.write_bytes(content)
    return {
        "ok": True,
        "file_id": args.file_id,
        "saved_to": str(output.resolve()),
        "size_bytes": len(content),
        "sha256": headers.get("X-OneDesk-SHA256", ""),
        "request_id": headers.get("X-Request-ID", ""),
    }


def read_json_object(path: Path) -> dict[str, Any]:
    data = json.loads(path.read_text(encoding="utf-8-sig"))
    if not isinstance(data, dict):
        raise SystemExit(f"JSON object expected: {path}")
    return data


def metadata_patch_from_args(root: Path, args: argparse.Namespace, document: dict[str, Any]) -> dict[str, Any]:
    patch: dict[str, Any] = {}
    if args.from_metadata:
        metadata = document.get("metadata", {})
        if isinstance(metadata, dict):
            for key, value in metadata.items():
                if key in CONTACT_METADATA_KEYS and str(value).strip():
                    patch[key] = value
    if args.patch_file:
        patch.update(read_json_object(Path(args.patch_file)))
    if args.patch_json:
        data = json.loads(args.patch_json)
        if not isinstance(data, dict):
            raise SystemExit("--patch-json must be a JSON object")
        patch.update(data)
    return patch


def patch_metadata(root: Path, args: argparse.Namespace) -> dict[str, Any]:
    document = infer_document(root, args)
    metadata_patch = metadata_patch_from_args(root, args, document)
    fields: dict[str, Any] = {}
    if args.field_json:
        data = json.loads(args.field_json)
        if not isinstance(data, dict):
            raise SystemExit("--field-json must be a JSON object")
        fields = data
    metadata_remove = [item.strip() for item in args.remove for item in item.split(",") if item.strip()]
    if not metadata_patch and not metadata_remove and not fields:
        raise SystemExit("Nothing to patch. Pass --from-metadata, --patch-json, --patch-file, --remove, or --field-json.")
    payload = {
        "source_code": source_code(),
        "document_type": document["document_type"],
        "local_id": document["local_id"],
        "document_no": document["document_no"],
        "metadata_patch": metadata_patch,
        "metadata_remove": metadata_remove,
        "fields": fields,
        "preserve_dates": not args.touch_updated_at,
        "dry_run": args.dry_run,
        "event": {
            "event_type": "metadata_patched",
            "actor_name": "Codex",
            "message": f"Metadata patch from {root}",
        },
    }
    return request_json("/api/v1/documents/patch-metadata", payload, timeout=args.timeout)


def save_queue(payload: dict[str, Any]) -> Path:
    target_dir = queue_dir()
    target_dir.mkdir(parents=True, exist_ok=True)
    stamp = time.strftime("%Y%m%d-%H%M%S")
    document = payload.get("document", {})
    safe = re.sub(r"[^A-Za-z0-9._-]+", "-", str(document.get("local_id", "document"))).strip("-")
    path = target_dir / f"{stamp}-{safe or 'document'}.json"
    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2, default=str), encoding="utf-8")
    return path


def publish(root: Path, args: argparse.Namespace) -> dict[str, Any]:
    document = infer_document(root, args)
    token = args.reservation_token
    files = collect_files(root, args.max_file_mb * 1024 * 1024)
    if args.reserve and not token:
        try:
            reservation = reserve(document)
            token = str(reservation.get("reservation_token", ""))
            print(f"reserved {document['document_no']} until {reservation.get('expires_at', '')}")
        except ConnectionError:
            payload = {
                "source_code": source_code(),
                "reservation_token": "",
                "document": document,
                "files": files,
                "events": [
                    {
                        "event_type": "codex_publish",
                        "actor_name": "Codex",
                        "message": f"Queued from {root}; reserve was offline",
                    }
                ],
            }
            queued = save_queue(payload)
            print(f"server unavailable; queued {queued}")
            return {"ok": False, "queued": str(queued)}
    payload: dict[str, Any] = {
        "source_code": source_code(),
        "reservation_token": token,
        "document": document,
        "files": files,
        "events": [
            {
                "event_type": "codex_publish",
                "actor_name": "Codex",
                "message": f"Published from {root}",
            }
        ],
    }
    try:
        result = request_json("/api/v1/documents/publish", payload, timeout=args.timeout)
    except ConnectionError:
        queued = save_queue(payload)
        print(f"server unavailable; queued {queued}")
        return {"ok": False, "queued": str(queued)}
    except RuntimeError as exc:
        try:
            error_body = json.loads(str(exc))
        except json.JSONDecodeError:
            error_body = {}
        if int(error_body.get("http_status", 0) or 0) >= 500:
            queued = save_queue(payload)
            print(f"server error; queued {queued}")
            return {"ok": False, "queued": str(queued), "error": error_body}
        raise
    return result


def publish_payload_with_reservation_refresh(payload: dict[str, Any], timeout: int) -> dict[str, Any]:
    try:
        return request_json("/api/v1/documents/publish", payload, timeout=timeout)
    except RuntimeError as exc:
        try:
            error_body = json.loads(str(exc))
        except json.JSONDecodeError:
            raise
        if error_body.get("error") != "reservation_invalid":
            raise
        document = payload.get("document", {})
        if not isinstance(document, dict):
            raise
        reservation = reserve(document)
        payload["reservation_token"] = str(reservation.get("reservation_token", ""))
        return request_json("/api/v1/documents/publish", payload, timeout=timeout)


def retry_queue(args: argparse.Namespace) -> int:
    qdir = queue_dir()
    if not qdir.is_dir():
        print("queue is empty")
        return 0
    sent = 0
    for path in sorted(qdir.glob("*.json")):
        payload = json.loads(path.read_text(encoding="utf-8"))
        try:
            result = publish_payload_with_reservation_refresh(payload, args.timeout)
        except ConnectionError:
            print(f"still offline: {path}")
            continue
        except RuntimeError as exc:
            print(f"failed {path}: {exc}")
            continue
        path.unlink()
        sent += 1
        print(f"sent {path.name}: {result.get('card_url', '')}")
    return sent


def main() -> int:
    parser = argparse.ArgumentParser(description="Publish a Codex document folder to OneDesk.")
    sub = parser.add_subparsers(dest="command", required=True)

    common = argparse.ArgumentParser(add_help=False)
    common.add_argument("folder", nargs="?", default=".", help="Document working folder")
    common.add_argument("--type", choices=["kp", "invoice", "closing"])
    common.add_argument("--document-uid")
    common.add_argument("--local-id")
    common.add_argument("--document-no")
    common.add_argument("--title")
    common.add_argument("--client")
    common.add_argument("--object")
    common.add_argument("--amount")
    common.add_argument("--status")
    common.add_argument("--summary")
    common.add_argument("--timeout", type=int, default=90)

    reserve_parser = sub.add_parser("reserve", parents=[common])
    ping_parser = sub.add_parser("ping")
    ping_parser.add_argument("--timeout", type=int, default=30)
    list_parser = sub.add_parser("list")
    list_parser.add_argument("--type", choices=["kp", "invoice", "closing"])
    list_parser.add_argument("--status", default="")
    list_parser.add_argument("--q", default="", help="Search in uid, number, client, title, object, summary")
    list_parser.add_argument("--date-from", default="")
    list_parser.add_argument("--date-to", default="")
    list_parser.add_argument("--page", type=int, default=1)
    list_parser.add_argument("--per-page", type=int, default=25)
    list_parser.add_argument("--include-files", action="store_true", help="Include file lists in each document")
    list_parser.add_argument("--include-events", action="store_true", help="Include event lists in each document")
    list_parser.add_argument("--timeout", type=int, default=60)
    get_parser = sub.add_parser("get", parents=[common])
    get_parser.add_argument("--no-files", action="store_true", help="Do not include file list")
    get_parser.add_argument("--include-events", action="store_true", help="Include document events")
    download_parser = sub.add_parser("download-file", parents=[common])
    download_parser.add_argument("--file-id", type=int, required=True)
    download_parser.add_argument("--output", default=".", help="Output file or directory")
    publish_parser = sub.add_parser("publish", parents=[common])
    publish_parser.add_argument("--no-reserve", action="store_false", dest="reserve")
    publish_parser.add_argument("--reservation-token", default="")
    publish_parser.add_argument("--max-file-mb", type=int, default=50)
    patch_parser = sub.add_parser("patch-metadata", parents=[common])
    patch_parser.add_argument("--from-metadata", action="store_true", help="Patch contact fields from metadata.yml")
    patch_parser.add_argument("--patch-json", default="", help="JSON object with metadata keys to merge")
    patch_parser.add_argument("--patch-file", default="", help="JSON file with metadata keys to merge")
    patch_parser.add_argument("--remove", action="append", default=[], help="Metadata key to remove; can be repeated or comma-separated")
    patch_parser.add_argument("--field-json", default="", help="JSON object with allowed card fields to update")
    patch_parser.add_argument("--dry-run", action="store_true", help="Validate lookup and payload without writing")
    patch_parser.add_argument("--touch-updated-at", action="store_true", help="Also update document updated_at")
    retry_parser = sub.add_parser("retry")
    retry_parser.add_argument("--timeout", type=int, default=90)

    args = parser.parse_args()
    if args.command == "ping":
        print(json.dumps(ping(args.timeout), ensure_ascii=False, indent=2))
        return 0
    if args.command == "list":
        print(json.dumps(list_documents(args), ensure_ascii=False, indent=2))
        return 0
    if args.command == "retry":
        retry_queue(args)
        return 0

    root = Path(args.folder).resolve()
    if args.command in {"get", "download-file"}:
        root_or_none = root if root.is_dir() else None
        if args.command == "get":
            result = get_document(root_or_none, args)
        else:
            result = download_file(root_or_none, args)
        print(json.dumps(result, ensure_ascii=False, indent=2))
        return 0
    if not root.is_dir():
        raise SystemExit(f"Folder not found: {root}")
    document = infer_document(root, args)
    if args.command == "reserve":
        result = reserve(document)
    elif args.command == "patch-metadata":
        result = patch_metadata(root, args)
    else:
        result = publish(root, args)
    print(json.dumps(result, ensure_ascii=False, indent=2))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
