import os
import sys
import gzip
import zlib
import platform
import urllib.request
import urllib.error
import http.cookiejar
import re
import ssl
import ctypes
import hashlib
import threading
import time
import shutil
import subprocess
from pathlib import Path
from urllib.parse import urljoin, urlparse, urldefrag, unquote
from concurrent.futures import ThreadPoolExecutor, as_completed
from collections import deque

# Check for optional packages initially
try:
    import cloudscraper
    HAS_CLOUDSCRAPER = True
except ImportError:
    HAS_CLOUDSCRAPER = False

try:
    import brotli
    HAS_BROTLI = True
except ImportError:
    HAS_BROTLI = False

try:
    from playwright.sync_api import sync_playwright
    HAS_PLAYWRIGHT = True
except ImportError:
    HAS_PLAYWRIGHT = False

LOGO = r"""
  █████▒██▀███  ▓█████  ██▓ ██▓   
▓██   ▒▓██ ▒ ██▒▓█   ▀ ▓██▒▓██▒   
▒████ ░▓██ ░▄█ ▒▒███   ▒██▒▒██▒   
░▓█▒  ░▒██▀▀█▄  ▒▓█  ▄ ░██░░██░   
░▒█░   ░██▓ ▒██▒░▒████▒░██░░██░   
 ▒ ░   ░ ▒▓ ░▒▓░░░ ▒░ ░░▓  ░▓     
 ░       ░▒ ░ ▒░ ░ ░  ░ ▒ ░ ▒ ░   
 ░ ░     ░░   ░    ░    ▒ ░ ▒ ░   
          ░        ░  ░ ░   ░     
"""

UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36"
TIMEOUT = 15
MAX_WORKERS = 16
MAX_RETRIES = 2
MAX_BYTES = 100 * 1024 * 1024  # 100MB per file cap

SSL_CTX = ssl.create_default_context()
SSL_CTX.check_hostname = False
SSL_CTX.verify_mode = ssl.CERT_NONE

COOKIE_JAR = http.cookiejar.CookieJar()
OPENER = urllib.request.build_opener(
    urllib.request.HTTPSHandler(context=SSL_CTX),
    urllib.request.HTTPCookieProcessor(COOKIE_JAR),
)

SCRAPER = None
if HAS_CLOUDSCRAPER:
    try:
        SCRAPER = cloudscraper.create_scraper(browser={'browser': 'chrome', 'platform': 'windows', 'mobile': False})
    except Exception:
        SCRAPER = None

CF_WARNED = {"flag": False}

# Thread safe state
lock = threading.Lock()
stats = {"ok": 0, "fail": 0, "skipped": 0, "bytes": 0}
seen_urls = set()
seen_pages = set()
url_to_local_map = {}  # Normalized URL -> Absolute Local Path

# Playwright session
class PWSession:
    """Headless browser session that intercepts and caches network resources."""
    def __init__(self):
        self.pw = None
        self.browser = None
        self.context = None
        self.page = None
        self.captured = {}   # url -> (bytes, content_type)
        self.capture_lock = threading.Lock()

    def start(self, headless=True):
        self.pw = sync_playwright().start()
        self.browser = self.pw.chromium.launch(
            headless=headless,
            args=['--disable-blink-features=AutomationControlled', '--no-sandbox'],
        )
        self.context = self.browser.new_context(
            user_agent=UA,
            viewport={'width': 1920, 'height': 1080},
            locale='en-US',
            ignore_https_errors=True,
        )
        # Stealth injections
        self.context.add_init_script(
            "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
            "Object.defineProperty(navigator,'languages',{get:()=>['en-US','en']});"
            "Object.defineProperty(navigator,'plugins',{get:()=>[1,2,3,4,5]});"
        )
        self.page = self.context.new_page()
        self.page.on("response", self._on_response)

    def _on_response(self, resp):
        try:
            url, _ = urldefrag(resp.url)
            if resp.status >= 400:
                return
            if url.startswith('data:') or url.startswith('blob:'):
                return
            body = resp.body()
            if not body or len(body) > MAX_BYTES:
                return
            ct = resp.headers.get('content-type', '')
            with self.capture_lock:
                if url not in self.captured:
                    self.captured[url] = (body, ct)
        except Exception:
            pass

    def goto(self, url):
        try:
            self.page.goto(url, wait_until='domcontentloaded', timeout=30000)
            # Wait out basic Cloudflare/security checks if seen
            for _ in range(15):
                title = (self.page.title() or '').lower()
                if 'just a moment' not in title and 'attention required' not in title:
                    break
                self.page.wait_for_timeout(1000)
            try:
                self.page.wait_for_load_state('networkidle', timeout=5000)
            except Exception:
                pass
            html = self.page.content()
            return html.encode('utf-8'), 'text/html; charset=utf-8', None
        except Exception as e:
            return None, None, f"playwright: {type(e).__name__}: {e}"

    def pop_captured(self, url):
        url, _ = urldefrag(url)
        with self.capture_lock:
            return self.captured.pop(url, None)

    def get_cookies(self):
        try:
            return self.context.cookies()
        except Exception:
            return []

    def close(self):
        try:
            if self.page: self.page.close()
            if self.context: self.context.close()
            if self.browser: self.browser.close()
            if self.pw: self.pw.stop()
        except Exception:
            pass

PW = None  # Global PW session when active

def _sync_pw_cookies_to_urllib():
    if PW is None:
        return
    for c in PW.get_cookies():
        try:
            ck = http.cookiejar.Cookie(
                version=0, name=c['name'], value=c['value'],
                port=None, port_specified=False,
                domain=c.get('domain', ''), domain_specified=bool(c.get('domain')),
                domain_initial_dot=c.get('domain', '').startswith('.'),
                path=c.get('path', '/'), path_specified=True,
                secure=c.get('secure', False),
                expires=int(c.get('expires', 0)) if c.get('expires', -1) > 0 else None,
                discard=False, comment=None, comment_url=None,
                rest={'HttpOnly': None} if c.get('httpOnly') else {},
                rfc2109=False,
            )
            COOKIE_JAR.set_cookie(ck)
        except Exception:
            pass

# Asset configurations
ASSET_EXTS = (
    '.js', '.mjs', '.css', '.map',
    '.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg', '.ico', '.bmp', '.avif',
    '.mp4', '.webm', '.ogg', '.mp3', '.wav', '.m4a',
    '.woff', '.woff2', '.ttf', '.otf', '.eot',
    '.json', '.xml', '.txt', '.pdf',
)

PROBES = [
    ".env", ".env.local", ".env.production", "robots.txt", "sitemap.xml",
    "humans.txt", "security.txt", ".well-known/security.txt", "package.json",
    "composer.json", "yarn.lock", "package-lock.json", "config.js",
    "config.json", "settings.json", ".git/config", ".git/HEAD", ".git/index",
    ".DS_Store", "crossdomain.xml", "manifest.json", "wp-login.php",
    "wp-config.php.bak", "admin/", "phpinfo.php",
]

# Regex parsers
RE_SRC     = re.compile(r'''(?:src|href|data-src|data-href|poster)\s*=\s*["']([^"']+)["']''', re.I)
RE_SRCSET  = re.compile(r'''srcset\s*=\s*["']([^"']+)["']''', re.I)
RE_STYLE   = re.compile(r'''style\s*=\s*["']([^"']+)["']''', re.I)
RE_CSSURL  = re.compile(r'''url\(\s*["']?([^"')]+)["']?\s*\)''', re.I)
RE_CSSIMP  = re.compile(r'''@import\s+(?:url\()?["']([^"']+)["']\)?''', re.I)
RE_INLINE_JS_FETCH = re.compile(r'''["']((?:/|https?://)[^"']+?\.(?:js|mjs|json|css))["']''', re.I)

# Rewrite target extensions
REWRITE_EXTS = ('.html', '.htm', '.css', '.js', '.json', '.svg')

def clear():
    os.system('cls' if platform.system() == 'Windows' else 'clear')

def get_desktop_dumps():
    return str(Path.home() / "Desktop" / "Freii Dumps")

def ensure_dependencies():
    missing = []
    
    # Check cloudscraper
    try:
        import cloudscraper
    except ImportError:
        missing.append("cloudscraper")
        
    # Check brotli
    try:
        import brotli
    except ImportError:
        missing.append("brotli")
        
    # Check playwright
    try:
        from playwright.sync_api import sync_playwright
    except ImportError:
        missing.append("playwright")
        
    if missing:
        clear()
        print(f"Missing modules: {', '.join(missing)}")
        print("Installing dependencies automatically, please wait...")
        try:
            # Install packages via pip
            subprocess.check_call([sys.executable, "-m", "pip", "install"] + missing)
            
            # Install playwright browser if needed
            if "playwright" in missing:
                print("Configuring Playwright engines...")
                subprocess.check_call([sys.executable, "-m", "playwright", "install", "chromium"])
                
            print("Installation complete. Initializing Grabber...")
            time.sleep(1.5)
            
            # Dynamic imports to load installed packages into memory
            global HAS_CLOUDSCRAPER, HAS_BROTLI, HAS_PLAYWRIGHT, SCRAPER, sync_playwright
            if "cloudscraper" in missing:
                import cloudscraper
                HAS_CLOUDSCRAPER = True
                try:
                    SCRAPER = cloudscraper.create_scraper(browser={'browser': 'chrome', 'platform': 'windows', 'mobile': False})
                except Exception:
                    SCRAPER = None
            if "brotli" in missing:
                import brotli
                HAS_BROTLI = True
            if "playwright" in missing:
                from playwright.sync_api import sync_playwright
                HAS_PLAYWRIGHT = True
                
        except Exception as e:
            print(f"\n [!] Dependency auto-install failed: {e}")
            print(f" Please run manually: pip install " + " ".join(missing))
            time.sleep(4)

def print_banner():
    print(LOGO)
    print("------------------------------------------------------------------------")

def log(msg, kind="GET"):
    with lock:
        print(f" [{kind:<5}] {msg}")

def safe_filename(name):
    # Strip URL query and hash params
    name = unquote(name).split('?')[0].split('#')[0]
    # Replace illegal filesystem chars
    name = re.sub(r'[<>:"/\\|?*\x00-\x1f]', '_', name)
    if len(name) > 180:
        h = hashlib.md5(name.encode()).hexdigest()[:8]
        base, ext = os.path.splitext(name)
        name = base[:150] + '_' + h + ext
    return name

def url_to_local(url, root_domain, root_folder):
    """Deterministically map any URL to a structured, clean local file path."""
    parsed = urlparse(url)
    host = parsed.netloc
    path = parsed.path

    # Standardize directory routes
    if not path or path.endswith('/'):
        path = path + "index.html"

    # Auto-append .html to clean routes
    base, ext = os.path.splitext(path)
    if not ext:
        path = path + ".html"

    parts = path.lstrip('/').split('/')
    sanitized_parts = [safe_filename(p) for p in parts if p]
    if not sanitized_parts:
        sanitized_parts = ["index.html"]

    # Save CDNs/External Assets separately in a cdn folder
    if host != root_domain:
        return os.path.join(root_folder, "_cdn", safe_filename(host), *sanitized_parts)
    else:
        return os.path.join(root_folder, *sanitized_parts)

def _decode_body(data, encoding):
    encoding = (encoding or '').lower()
    try:
        if 'gzip' in encoding:
            return gzip.decompress(data)
        if 'deflate' in encoding:
            try:
                return zlib.decompress(data)
            except zlib.error:
                return zlib.decompress(data, -zlib.MAX_WBITS)
        if 'br' in encoding and HAS_BROTLI:
            return brotli.decompress(data)
    except Exception:
        pass
    return data

def _browser_headers(url):
    p = urlparse(url)
    return {
        'User-Agent': UA,
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept-Encoding': 'gzip, deflate' + (', br' if HAS_BROTLI else ''),
        'Referer': f"{p.scheme}://{p.netloc}/",
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1',
        'Connection': 'keep-alive',
    }

def _try_cloudscraper(url):
    if SCRAPER is None:
        return None, None, None
    try:
        r = SCRAPER.get(url, timeout=TIMEOUT, allow_redirects=True)
        if r.status_code < 400:
            return r.content, r.headers.get('Content-Type', ''), None
        return None, None, f"HTTP Status {r.status_code}"
    except Exception as e:
        return None, None, str(e)

def fetch(url, allow_pw_page=False):
    """Retrieve content via Playwright cache, browser engine, or HTTP fallback."""
    if PW is not None:
        cached = PW.pop_captured(url)
        if cached:
            return cached[0], cached[1], None
        if allow_pw_page:
            data, ct, err = PW.goto(url)
            if data is not None:
                return data, ct, None

    last_err = "Unknown"
    for attempt in range(MAX_RETRIES + 1):
        try:
            req = urllib.request.Request(url, headers=_browser_headers(url))
            with OPENER.open(req, timeout=TIMEOUT) as resp:
                ct = resp.headers.get('Content-Type', '')
                enc = resp.headers.get('Content-Encoding', '')
                cf_ray = resp.headers.get('cf-ray')
                data = resp.read(MAX_BYTES + 1)
                if len(data) > MAX_BYTES:
                    return None, None, "File exceeds size threshold"
                data = _decode_body(data, enc)
                
                # Check for Cloudflare challenge in body
                if cf_ray and b'Just a moment' in data[:4096]:
                    last_err = "Cloudflare challenge detected"
                    break
                return data, ct, None
        except urllib.error.HTTPError as e:
            body_head = b''
            cf_ray = None
            try:
                cf_ray = e.headers.get('cf-ray') if e.headers else None
                body_head = e.read(2048) or b''
            except Exception:
                pass
            if cf_ray or b'Cloudflare' in body_head or b'cf-browser-verification' in body_head:
                last_err = f"HTTP {e.code} (Cloudflare)"
                if not CF_WARNED["flag"]:
                    CF_WARNED["flag"] = True
                    log("Cloudflare protection detected.", "WARN")
                break
            last_err = f"HTTP {e.code}"
            if e.code in (401, 403, 404, 410):
                break
        except urllib.error.URLError as e:
            last_err = f"Network issue: {e.reason}"
        except TimeoutError:
            last_err = "Timeout"
        except Exception as e:
            last_err = f"{type(e).__name__}: {e}"

    # Fallback to Cloudscraper
    if "Cloudflare" in last_err or "403" in last_err or "503" in last_err:
        data, ct, err = _try_cloudscraper(url)
        if data is not None:
            return data, ct, None
        if err:
            last_err = f"{last_err} | cs: {err}"
    return None, None, last_err

def save_asset(url, root_domain, root_folder):
    """Save target asset. Uses file existence and sets up local path mappings."""
    url_normalized, _ = urldefrag(url)
    with lock:
        if url_normalized in seen_urls:
            stats["skipped"] += 1
            return None, None
        seen_urls.add(url_normalized)

    local_path = url_to_local(url_normalized, root_domain, root_folder)

    # Save to global mappings
    with lock:
        url_to_local_map[url_normalized] = os.path.abspath(local_path)

    # Check if already present on disk
    if os.path.exists(local_path) and os.path.getsize(local_path) > 0:
        with lock:
            stats["skipped"] += 1
        return local_path, None

    data, ct, err = fetch(url_normalized)
    if data is None:
        with lock:
            stats["fail"] += 1
        log(f"{url_normalized} ({err})", "FAIL")
        return None, None

    os.makedirs(os.path.dirname(local_path), exist_ok=True)
    try:
        with open(local_path, 'wb') as f:
            f.write(data)
    except OSError:
        with lock:
            stats["fail"] += 1
        return None, None

    with lock:
        stats["ok"] += 1
        stats["bytes"] += len(data)
    
    name = os.path.relpath(local_path, root_folder).replace('\\', '/')
    log(f"{name} ({len(data)//1024} KB)", "ASSET")
    return local_path, (data, ct)

def extract_asset_links(html, base_url):
    urls = set()
    for m in RE_SRC.findall(html):
        urls.add(m)
    for m in RE_SRCSET.findall(html):
        for part in m.split(','):
            u = part.strip().split(' ')[0]
            if u:
                urls.add(u)
    for s in RE_STYLE.findall(html):
        for u in RE_CSSURL.findall(s):
            urls.add(u)
    for u in RE_CSSURL.findall(html):
        urls.add(u)
    for u in RE_INLINE_JS_FETCH.findall(html):
        urls.add(u)
    resolved = set()
    for u in urls:
        if u.startswith(('data:', 'javascript:', 'mailto:', 'tel:', '#')):
            continue
        try:
            full = urljoin(base_url, u)
            if full.startswith(('http://', 'https://')):
                resolved.add(full)
        except Exception:
            pass
    return resolved

def extract_page_links(html, base_url, root_domain):
    out = set()
    for m in re.findall(r'''href\s*=\s*["']([^"']+)["']''', html, re.I):
        if m.startswith(('javascript:', 'mailto:', 'tel:', '#', 'data:')):
            continue
        full = urljoin(base_url, m)
        full, _ = urldefrag(full)
        if not full.startswith(('http://', 'https://')):
            continue
        p = urlparse(full)
        if p.netloc != root_domain:
            continue
        if p.path.split('?')[0].lower().endswith(ASSET_EXTS):
            continue
        out.add(full)
    return out

def extract_css_links(css_text, css_url):
    urls = set()
    for u in RE_CSSURL.findall(css_text):
        if u.startswith('data:'):
            continue
        urls.add(urljoin(css_url, u))
    for u in RE_CSSIMP.findall(css_text):
        urls.add(urljoin(css_url, u))
    return urls

def rewrite_links_in_files(root_folder):
    """Processes downloaded text files to rewrite external/absolute links into local relatives."""
    log("Rewriting files for offline viewing...", "INFO")
    count = 0
    
    # We copy map to avoid concurrency modifications
    with lock:
        mappings = dict(url_to_local_map)
        
    for r, d, files in os.walk(root_folder):
        for file in files:
            if not file.lower().endswith(REWRITE_EXTS):
                continue
            file_path = os.path.join(r, file)
            
            # Find the original URL for this local file
            file_url = None
            abs_fp = os.path.abspath(file_path)
            for url, local_fp in mappings.items():
                if local_fp == abs_fp:
                    file_url = url
                    break
            
            if not file_url:
                continue

            try:
                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                    content = f.read()
            except Exception:
                continue

            modified = False

            def replacer(match):
                nonlocal modified
                full_match = match.group(0)
                original_ref = match.group(1).strip()

                if not original_ref or original_ref.startswith(('data:', 'javascript:', 'mailto:', 'tel:', '#')):
                    return full_match

                try:
                    abs_url = urljoin(file_url, original_ref)
                    abs_url_no_frag, frag = urldefrag(abs_url)

                    if abs_url_no_frag in mappings:
                        target_local = mappings[abs_url_no_frag]
                        rel_path = os.path.relpath(target_local, start=os.path.dirname(file_path)).replace('\\', '/')
                        if frag:
                            rel_path += '#' + frag
                        new_match = full_match.replace(original_ref, rel_path, 1)
                        if new_match != full_match:
                            modified = True
                            return new_match
                except Exception:
                    pass
                return full_match

            # Replace HTML/style source mappings
            content = RE_SRC.sub(replacer, content)
            content = RE_STYLE.sub(replacer, content)
            content = RE_CSSURL.sub(replacer, content)
            content = RE_CSSIMP.sub(replacer, content)

            # Handle srcset mappings
            def srcset_replacer(match):
                nonlocal modified
                full_match = match.group(0)
                srcset_val = match.group(1)
                parts = srcset_val.split(',')
                new_parts = []
                for part in parts:
                    part_strip = part.strip()
                    if not part_strip:
                        new_parts.append(part)
                        continue
                    subparts = part_strip.split()
                    if not subparts:
                        new_parts.append(part)
                        continue
                    url_ref = subparts[0]
                    try:
                        abs_url = urljoin(file_url, url_ref)
                        abs_url_no_frag, frag = urldefrag(abs_url)
                        if abs_url_no_frag in mappings:
                            target_local = mappings[abs_url_no_frag]
                            rel_path = os.path.relpath(target_local, start=os.path.dirname(file_path)).replace('\\', '/')
                            if frag:
                                rel_path += '#' + frag
                            new_part = part.replace(url_ref, rel_path, 1)
                            new_parts.append(new_part)
                            modified = True
                            continue
                    except Exception:
                        pass
                    new_parts.append(part)
                return full_match.replace(srcset_val, ','.join(new_parts), 1)

            content = RE_SRCSET.sub(srcset_replacer, content)

            if modified:
                try:
                    with open(file_path, 'w', encoding='utf-8') as f:
                        f.write(content)
                    count += 1
                except Exception:
                    pass

    log(f"Successfully optimized {count} files for offline browsing.", "OK")

def crawl(start_url, root_folder, max_pages, max_depth):
    root_domain = urlparse(start_url).netloc
    queue = deque([(start_url, 0)])
    asset_queue = set()
    pages_done = 0

    log(f"Initiating crawlers on root domain: {root_domain}", "INFO")

    while queue and pages_done < max_pages:
        url, depth = queue.popleft()
        url, _ = urldefrag(url)
        
        if url in seen_pages:
            continue
        seen_pages.add(url)

        data, ct, err = fetch(url, allow_pw_page=True)
        if data is None:
            log(f"{url} ({err})", "FAIL")
            continue

        if PW is not None and pages_done == 0:
            _sync_pw_cookies_to_urllib()

        # Check content-type to verify if it's text/html or generic media
        ct_lower = (ct or '').lower()
        is_html = 'html' in ct_lower or url.endswith('/') or '.' not in os.path.basename(urlparse(url).path)
        
        local_path = url_to_local(url, root_domain, root_folder)
        if is_html and not local_path.lower().endswith(('.html', '.htm')):
            local_path = os.path.join(local_path, 'index.html') if os.path.isdir(local_path) else local_path + '.html'

        # Store to mapping dictionary
        with lock:
            url_to_local_map[url] = os.path.abspath(local_path)

        os.makedirs(os.path.dirname(local_path), exist_ok=True)
        try:
            with open(local_path, 'wb') as f:
                f.write(data)
        except OSError:
            continue

        pages_done += 1
        with lock:
            stats["ok"] += 1
            stats["bytes"] += len(data)
            
        rel = os.path.relpath(local_path, root_folder).replace('\\', '/')
        log(f"{rel} [Depth: {depth}]", "PAGE")

        if not is_html:
            continue

        try:
            html = data.decode('utf-8', errors='ignore')
        except Exception:
            continue

        # Extract files referenced by this page
        asset_queue |= extract_asset_links(html, url)

        # Queue internal links for crawling
        if depth < max_depth:
            for nxt in extract_page_links(html, url, root_domain):
                if nxt not in seen_pages:
                    queue.append((nxt, depth + 1))

    return asset_queue, root_domain

def dump_assets(asset_urls, root_domain, root_folder):
    """Downloads all discovered assets concurrently."""
    pending = set(asset_urls)
    round_num = 0
    while pending and round_num < 3:
        round_num += 1
        batch = list(pending)
        pending = set()
        
        log(f"Batch {round_num}: Fetching {len(batch)} assets...", "INFO")
        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
            futures = {ex.submit(save_asset, u, root_domain, root_folder): u for u in batch}
            for fut in as_completed(futures):
                url = futures[fut]
                try:
                    local, payload = fut.result()
                except Exception:
                    continue
                if not payload:
                    continue
                data, ct = payload
                
                # Check if asset is a CSS file to parse nested URLs (e.g. background images, fonts)
                if url.lower().split('?')[0].endswith('.css') or 'css' in (ct or '').lower():
                    try:
                        css_text = data.decode('utf-8', errors='ignore')
                    except Exception:
                        continue
                    for nxt in extract_css_links(css_text, url):
                        if nxt not in seen_urls:
                            pending.add(nxt)

def probe_common(base_url, root_folder):
    root_domain = urlparse(base_url).netloc
    log("Scanning directory structure for sensitive assets...", "INFO")
    with ThreadPoolExecutor(max_workers=8) as ex:
        futs = [ex.submit(save_asset, urljoin(base_url, p), root_domain, root_folder) for p in PROBES]
        for _ in as_completed(futs):
            pass

def parse_robots(base_url, root_folder):
    extras = set()
    root_domain = urlparse(base_url).netloc
    for name in ("robots.txt", "sitemap.xml"):
        local = os.path.join(root_folder, name)
        if not os.path.exists(local):
            continue
        try:
            with open(local, 'rb') as f:
                text = f.read().decode('utf-8', errors='ignore')
        except Exception:
            continue
        for m in re.findall(r'(?:Disallow|Allow|Sitemap):\s*(\S+)', text, re.I):
            extras.add(urljoin(base_url, m))
        for m in re.findall(r'<loc>\s*([^<]+)\s*</loc>', text, re.I):
            extras.add(m.strip())
    return {u for u in extras if urlparse(u).netloc == root_domain}

def main():
    if platform.system() == 'Windows':
        try:
            ctypes.windll.kernel32.SetConsoleTitleW('Freii Grabber')
        except Exception:
            pass

    ensure_dependencies()

    while True:
        clear()
        print_banner()

        # Clean, minimalist target prompt
        target_url = input(" Target URL > ").strip()
        if not target_url:
            continue
        if target_url.lower() in ('exit', 'quit'):
            break
        if not target_url.startswith(('http://', 'https://')):
            target_url = 'https://' + target_url

        depth_in = input(" Crawl depth (0-3) [1] > ").strip() or "1"
        pages_in = input(" Max pages [50] > ").strip() or "50"
        
        pw_in = "n"
        if HAS_PLAYWRIGHT:
            pw_in = input(" Bypass Cloudflare (y/N) > ").strip().lower() or "n"

        try:
            max_depth = max(0, min(3, int(depth_in)))
            max_pages = max(1, int(pages_in))
        except ValueError:
            max_depth, max_pages = 1, 50
        use_pw = HAS_PLAYWRIGHT and pw_in.startswith("y")

        domain = urlparse(target_url).netloc
        desktop_dumps = get_desktop_dumps()
        root_folder = os.path.join(desktop_dumps, domain.replace('.', '_'))
        os.makedirs(root_folder, exist_ok=True)

        # Clear state
        seen_urls.clear()
        seen_pages.clear()
        url_to_local_map.clear()
        CF_WARNED["flag"] = False
        stats.update({"ok": 0, "fail": 0, "skipped": 0, "bytes": 0})

        clear()
        print_banner()
        
        print(" Session Info:")
        print(f" - Target:    {target_url}")
        print(f" - Depth:     {max_depth}")
        print(f" - Max Pages: {max_pages}")
        print(f" - Output:    {root_folder}")
        print(f" - Engine:    {'Playwright (Chromium)' if use_pw else 'Urllib HTTPS'}")
        print()

        global PW
        PW = None
        start_time = time.time()
        try:
            if use_pw:
                log("Spawning headless Chromium instance...", "INFO")
                PW = PWSession()
                PW.start(headless=True)

            # Start crawl
            asset_urls, root_domain = crawl(target_url, root_folder, max_pages, max_depth)

            # Sensitivity probing & robots parse
            base = f"{urlparse(target_url).scheme}://{domain}"
            probe_common(base, root_folder)
            asset_urls |= parse_robots(base, root_folder)

            # Download asset files
            dump_assets(asset_urls, root_domain, root_folder)

            # Rewriting asset links for offline browseability
            rewrite_links_in_files(root_folder)

            # Finished
            duration = time.time() - start_time
            mb = stats["bytes"] / (1024 * 1024)
            
            print()
            print("---------------------------------------------")
            print(" Session Completed:")
            print(f" - Pages Crawled: {len(seen_pages)}")
            print(f" - Saved Files:   {stats['ok']}")
            print(f" - Skipped Files: {stats['skipped']}")
            print(f" - Failed Files:  {stats['fail']}")
            print(f" - Data Size:     {mb:.2f} MB")
            print(f" - Time Taken:    {duration:.1f}s")
            print(f" - Destination:   {root_folder}")
            print("---------------------------------------------")

        except KeyboardInterrupt:
            print("\n [!] Operation aborted by user.")
        except Exception as e:
            print(f"\n [!] Execution error: {e}")
        finally:
            if PW is not None:
                PW.close()
                PW = None

        print()
        input(" Press Enter to return to main menu...")

if __name__ == "__main__":
    main()
