padelclub_backend/api/utils.py

import logging
import requests
import re
from playwright.sync_api import sync_playwright
from datetime import datetime, timedelta
import json
import traceback
from concurrent.futures import ThreadPoolExecutor, as_completed
from django.conf import settings

logger = logging.getLogger(__name__)


def check_version_smaller_than_1_1_12(version_str):
    # Remove the parentheses part if it exists, example of version: 1.1.12 (2)
    version_str = version_str.split()[0]
    if version_str:
        # Split version into components
        version_parts = [int(x) for x in version_str.split(".")]
        target_parts = [1, 1, 12]
        # Compare version components
        return version_parts < target_parts
    else:
        return False


def scrape_fft_club_tournaments(
    club_code, club_name, start_date=None, end_date=None, page=0
):
    """
    Scrapes FFT tournaments using Playwright with detailed debugging
    """
    # logger.info(f"Starting Playwright scraping for {club_name}")
    try:
        with sync_playwright() as p:
            browser = get_browser_for_environment(p)
            page_obj = browser.new_page()

            page_obj.set_extra_http_headers(
                {
                    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
                }
            )

            # Navigate to FFT
            target_url = "https://tenup.fft.fr/recherche/tournois"
            # # logger.info(f"Navigating to: {target_url}")

            page_obj.goto(target_url)
            page_obj.wait_for_timeout(2000)

            current_url = page_obj.url
            # logger.info(f"Current URL: {current_url}")

            if "queue-it.net" in current_url.lower():
                logger.warning("Still in Queue-It")
                browser.close()
                return None

            # Extract form_build_id
            form_input = page_obj.query_selector('input[name="form_build_id"]')
            if not form_input:
                logger.error("Could not find form_build_id")
                browser.close()
                return None

            form_build_id = form_input.get_attribute("value")
            # # logger.info(f"Extracted form_build_id: {form_build_id}")

            # Build parameters
            date_component = ""
            if start_date and end_date:
                date_component = f"&date[start]={start_date}&date[end]={end_date}"
            elif start_date:
                try:
                    start_dt = datetime.strptime(start_date, "%d/%m/%y")
                    end_dt = start_dt + timedelta(days=90)
                    date_component = f"&date[start]={start_date}&date[end]={end_dt.strftime('%d/%m/%y')}"
                except ValueError:
                    logger.warning(f"Invalid date format: {start_date}")

            club_name_encoded = club_name.replace(" ", "+")
            club_code_clean = club_code.replace(" ", "")

            params = f"recherche_type=club&club[autocomplete][value_container][value_field]={club_code_clean}&club[autocomplete][value_container][label_field]={club_name_encoded}&pratique=PADEL{date_component}&page={page}&sort=dateDebut+asc&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page"

            # # logger.info(f"AJAX Parameters: {params}")

            # Make AJAX request and capture the full response
            ajax_script = f"""
            async () => {{
                try {{
                    const response = await fetch('https://tenup.fft.fr/system/ajax', {{
                        method: 'POST',
                        headers: {{
                            'Accept': 'application/json, text/javascript, */*; q=0.01',
                            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                            'X-Requested-With': 'XMLHttpRequest',
                            'Origin': 'https://tenup.fft.fr',
                            'Referer': 'https://tenup.fft.fr/recherche/tournois'
                        }},
                        body: `{params}`
                    }});

                    const status = response.status;
                    const responseText = await response.text();

                    return {{
                        success: response.ok,
                        status: status,
                        responseText: responseText
                    }};
                }} catch (error) {{
                    return {{
                        success: false,
                        error: error.message
                    }};
                }}
            }}
            """

            # # logger.info("Making AJAX request...")
            result = page_obj.evaluate(ajax_script)

            browser.close()

            # Print the full response for debugging
            # # logger.info(f"AJAX Response Status: {result.get('status')}")
            # # logger.info(f"AJAX Response Success: {result.get('success')}")

            if result.get("success"):
                response_text = result.get("responseText", "")
                # # logger.info(f"Raw Response Length: {len(response_text)}")
                # # logger.info(f"Raw Response (first 500 chars): {response_text[:500]}")

                try:
                    # Try to parse as JSON
                    json_data = json.loads(response_text)
                    # # logger.info(f"JSON Response Type: {type(json_data)}")
                    # # logger.info(f"JSON Response: {json.dumps(json_data, indent=2, default=str)}")

                    # Now try to parse it
                    parsed_result = _parse_ajax_response(json_data)
                    # # logger.info(f"Parsed Result: {parsed_result}")

                    return parsed_result

                except json.JSONDecodeError as json_error:
                    logger.error(f"JSON Parse Error: {json_error}")
                    logger.error(f"Response text: {response_text}")
                    return None
            else:
                logger.error(f"AJAX request failed: {result.get('error')}")
                return None

    except Exception as e:
        logger.error(f"Error in Playwright scraping: {e}")
        logger.error(f"Traceback: {traceback.format_exc()}")
        return None


def scrape_fft_club_tournaments_all_pages(
    club_code, club_name, start_date=None, end_date=None
):
    """
    Scrapes all pages of FFT tournaments for a specific club
    """
    # # logger.info(f"Starting complete tournament scraping for {club_name}")

    all_tournaments = []
    page = 0

    while True:
        try:
            # Call the working single-page function
            result = scrape_fft_club_tournaments(
                club_code=club_code,
                club_name=club_name,
                start_date=start_date,
                end_date=end_date,
                page=page,
            )

            # Debug: Log what we got
            # logger.info(f"Page {page} result: {result}")

            if not result:
                logger.warning(f"No result for page {page}")
                break

            tournaments = result.get("tournaments", [])
            # logger.info(f"Page {page} returned {len(tournaments)} tournaments")

            if not tournaments:
                # logger.info(f"No tournaments on page {page}, stopping")
                break

            all_tournaments.extend(tournaments)

            # Check if we have all results
            total_results = result.get("total_results", 0)
            # logger.info(
            #     f"Total so far: {len(all_tournaments)}, Target: {total_results}"
            # )

            if len(all_tournaments) >= total_results:
                # logger.info("Got all tournaments, stopping")
                break

            page += 1
            # logger.info(f"Moving to page {page}")
            # time.sleep(1)  # Rate limiting

        except Exception as e:
            logger.error(f"Error on page {page}: {e}")
            break

    # logger.info(
    #     f"Completed scraping: {len(all_tournaments)} tournaments across {page + 1} pages"
    # )

    return {
        "tournaments": all_tournaments,
        "total_results": len(all_tournaments),
        "current_count": len(all_tournaments),
        "pages_scraped": page + 1,
    }


def _parse_ajax_response(commands):
    """
    Parse the AJAX response commands to extract tournament data
    Returns data in the exact format expected by Swift FederalTournament struct
    """
    tournaments = []

    try:
        # Check for alert commands (maintenance mode)
        for command in commands:
            if command.get("command") == "alert":
                # logger.warning("Maintenance mode detected")
                return None

        # Find the command with results
        result_command = None
        for command in commands:
            if command.get("command") == "recherche_tournois_update":
                result_command = command
                # # logger.info("Found recherche_tournois_update command!")
                break

        if result_command and result_command.get("results"):
            results = result_command["results"]
            items = results.get("items", [])
            total_results = results.get("nb_results", 0)

            # # logger.info(f"Processing {len(items)} tournaments from results")

            for item in items:
                # Parse dates - they're already in the correct format
                date_debut = item.get("dateDebut")
                date_fin = item.get("dateFin")
                date_validation = item.get("dateValidation")

                # Build the tournament object to match Swift FederalTournament structure
                tournament = {
                    "id": str(item.get("id", "")),
                    "millesime": item.get("millesime"),
                    "libelle": item.get("libelle"),
                    "tmc": item.get("tmc"),
                    "tarifAdulteChampionnat": item.get("tarifAdulteChampionnat"),
                    "type": item.get("type"),
                    "ageReel": item.get("ageReel"),
                    "naturesTerrains": item.get("naturesTerrains", []),
                    "idsArbitres": item.get("idsArbitres", []),
                    "tarifJeuneChampionnat": item.get("tarifJeuneChampionnat"),
                    "international": item.get("international"),
                    "inscriptionEnLigne": item.get("inscriptionEnLigne"),
                    "categorieTournoi": item.get("categorieTournoi"),
                    "prixLot": item.get("prixLot"),
                    "paiementEnLigne": item.get("paiementEnLigne"),
                    "reductionAdherentJeune": item.get("reductionAdherentJeune"),
                    "reductionAdherentAdulte": item.get("reductionAdherentAdulte"),
                    "paiementEnLigneObligatoire": item.get(
                        "paiementEnLigneObligatoire"
                    ),
                    "villeEngagement": item.get("villeEngagement"),
                    "senior": item.get("senior"),
                    "veteran": item.get("veteran"),
                    "inscriptionEnLigneEnCours": item.get("inscriptionEnLigneEnCours"),
                    "avecResultatPublie": item.get("avecResultatPublie"),
                    "code": item.get("code"),
                    "categorieAge": item.get("categorieAge"),
                    "codeComite": item.get("codeComite"),
                    "installations": item.get("installations", []),
                    "reductionEpreuveSupplementaireJeune": item.get(
                        "reductionEpreuveSupplementaireJeune"
                    ),
                    "reductionEpreuveSupplementaireAdulte": item.get(
                        "reductionEpreuveSupplementaireAdulte"
                    ),
                    "nomComite": item.get("nomComite"),
                    "naturesEpreuves": item.get("naturesEpreuves"),
                    "jeune": item.get("jeune"),
                    "courrielEngagement": item.get("courrielEngagement"),
                    "nomClub": item.get("nomClub"),
                    "installation": item.get("installation"),
                    "categorieAgeMax": item.get("categorieAgeMax"),
                    "tournoiInterne": item.get("tournoiInterne"),
                    "nomLigue": item.get("nomLigue"),
                    "nomEngagement": item.get("nomEngagement"),
                    "codeLigue": item.get("codeLigue"),
                    "modeleDeBalle": item.get("modeleDeBalle"),
                    "jugeArbitre": item.get("jugeArbitre"),
                    "adresse2Engagement": item.get("adresse2Engagement"),
                    "epreuves": item.get("epreuves"),
                    "dateDebut": date_debut,
                    "serie": item.get("serie"),
                    "dateFin": date_fin,
                    "dateValidation": date_validation,
                    "codePostalEngagement": item.get("codePostalEngagement"),
                    "codeClub": item.get("codeClub"),
                    "prixEspece": item.get("prixEspece"),
                    "japPhoneNumber": None,  # Will be populated by separate umpire call
                    # Additional fields from the response
                    "adresse1Engagement": item.get("adresse1Engagement"),
                    "originalId": item.get("originalId"),
                    "familleTournoi": item.get("familleTournoi", []),
                    "isTournoi": item.get("isTournoi"),
                    "natureWithCatAge": item.get("natureWithCatAge"),
                }

                tournaments.append(tournament)

            # # logger.info(
            #     f"Successfully parsed {len(tournaments)} tournaments from response"
            # )
            return {
                "tournaments": tournaments,
                "total_results": total_results,
                "current_count": len(tournaments),
            }
        else:
            logger.error("No recherche_tournois_update command found in AJAX response")
            return {"tournaments": [], "total_results": 0, "current_count": 0}

    except Exception as e:
        logger.error(f"Error parsing AJAX response: {e}")
        return None


def scrape_fft_all_tournaments(
    sorting_option=None,
    page=0,
    start_date=None,
    end_date=None,
    city="",
    distance=15,
    categories=None,
    levels=None,
    lat=None,
    lng=None,
    ages=None,
    tournament_types=None,
    national_cup=False,
):
    """
    Scrapes FFT tournaments using Playwright with detailed debugging
    Based exactly on the working scrape_fft_club_tournaments function
    """
    # logger.info(f"Starting Playwright scraping for city: {city}")

    try:
        with sync_playwright() as p:
            browser = get_browser_for_environment(p)
            page_obj = browser.new_page()

            page_obj.set_extra_http_headers(
                {
                    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
                }
            )

            # Navigate to FFT
            target_url = "https://tenup.fft.fr/recherche/tournois"
            # # logger.info(f"Navigating to: {target_url}")

            page_obj.goto(target_url)
            page_obj.wait_for_timeout(500)

            current_url = page_obj.url
            # # logger.info(f"Current URL: {current_url}")

            if "queue-it.net" in current_url.lower():
                # logger.warning("Still in Queue-It")
                browser.close()
                return None

            # Extract form_build_id
            form_input = page_obj.query_selector('input[name="form_build_id"]')
            if not form_input:
                # logger.error("Could not find form_build_id")
                browser.close()
                return None

            form_build_id = form_input.get_attribute("value")
            # # logger.info(f"Extracted form_build_id: {form_build_id}")

            # Build parameters - EXACT same pattern as club function
            date_component = ""
            if start_date and end_date:
                date_component = f"&date[start]={start_date}&date[end]={end_date}"
            elif start_date:
                try:
                    start_dt = datetime.strptime(start_date, "%d/%m/%y")
                    end_dt = start_dt + timedelta(days=90)
                    date_component = f"&date[start]={start_date}&date[end]={end_dt.strftime('%d/%m/%y')}"
                except ValueError:
                    logger.warning(f"Invalid date format: {start_date}")

            # Build filter parameters
            filter_params = ""

            # Add categories filter
            if categories:
                # # logger.info(f"Adding categories filter: {categories}")
                for category in categories:
                    filter_params += f"&epreuve[{category}]={category}"

            # Add levels filter
            if levels:
                # # logger.info(f"Adding levels filter: {levels}")
                for level in levels:
                    filter_params += f"&categorie_tournoi[{level}]={level}"

            # Add ages filter
            if ages:
                # # logger.info(f"Adding ages filter: {ages}")
                for age in ages:
                    filter_params += f"&categorie_age[{age}]={age}"

            # Add types filter
            if tournament_types:
                # # logger.info(f"Adding types filter: {tournament_types}")
                for t_type in tournament_types:
                    capitalized_type = t_type.capitalize()
                    filter_params += f"&type[{capitalized_type}]={capitalized_type}"

            # Add national cup filter
            if national_cup:
                # # logger.info("Adding national cup filter")
                filter_params += "&tournoi_npc=1"

            # Fix the sorting parameter
            if sorting_option:
                sort_param = f"&sort={sorting_option}"
            else:
                sort_param = "&sort=dateDebut+asc"

            # Build city parameters with distance and location
            if city and city.strip():
                city_name_encoded = city.strip().replace(" ", "+")

                # Start with the working base parameters
                base_params = f"recherche_type=ville&ville[autocomplete][value_container][value_field]={city_name_encoded}&ville[autocomplete][value_container][label_field]={city_name_encoded}"

                # Add distance parameter
                distance_param = f"&ville[distance][value_field]={int(distance)}"

                # Add lat/lng if provided
                location_params = ""
                if lat and lng:
                    location_params = f"&ville[autocomplete][value_container][lat_field]={lat}&ville[autocomplete][value_container][lng_field]={lng}"

                # Combine all parameters including filters
                params = f"{base_params}{location_params}{distance_param}&pratique=PADEL{date_component}&page={page}{sort_param}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page"
            else:
                # Default to ligue search if no city provided
                params = f"recherche_type=ligue&pratique=PADEL{date_component}{filter_params}&page={page}&sort={sorting_option or 'dateDebut+asc'}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page"

            # # logger.info(f"AJAX Parameters: {params}")

            # Make AJAX request and capture the full response - EXACT same as club function
            ajax_script = f"""
            async () => {{
                try {{
                    const response = await fetch('https://tenup.fft.fr/system/ajax', {{
                        method: 'POST',
                        headers: {{
                            'Accept': 'application/json, text/javascript, */*; q=0.01',
                            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                            'X-Requested-With': 'XMLHttpRequest',
                            'Origin': 'https://tenup.fft.fr',
                            'Referer': 'https://tenup.fft.fr/recherche/tournois'
                        }},
                        body: `{params}`
                    }});

                    const status = response.status;
                    const responseText = await response.text();

                    return {{
                        success: response.ok,
                        status: status,
                        responseText: responseText
                    }};
                }} catch (error) {{
                    return {{
                        success: false,
                        error: error.message
                    }};
                }}
            }}
            """

            # # logger.info("Making AJAX request...")
            result = page_obj.evaluate(ajax_script)

            browser.close()

            # Print the full response for debugging - EXACT same as club function
            # # logger.info(f"AJAX Response Status: {result.get('status')}")
            # # logger.info(f"AJAX Response Success: {result.get('success')}")

            if result.get("success"):
                response_text = result.get("responseText", "")
                # # logger.info(f"Raw Response Length: {len(response_text)}")

                try:
                    # Try to parse as JSON
                    json_data = json.loads(response_text)
                    # # logger.info(f"JSON Response Type: {type(json_data)}")

                    # Now try to parse it - EXACT same as club function
                    parsed_result = _parse_ajax_response(json_data)

                    return parsed_result

                except json.JSONDecodeError as json_error:
                    # logger.error(f"JSON Parse Error: {json_error}")
                    # logger.error(f"Response text: {response_text}")
                    return None
            else:
                # logger.error(f"AJAX request failed: {result.get('error')}")
                return None

    except Exception as e:
        logger.error(f"Error in Playwright scraping: {e}")
        logger.error(f"Traceback: {traceback.format_exc()}")
        return None


def get_umpire_data(tournament_id):
    """
    Umpire data extraction with improved Queue-It handling
    """
    # # logger.info(f"Getting umpire data for tournament {tournament_id}")

    try:
        with sync_playwright() as p:
            browser = get_browser_for_environment(p)
            page = browser.new_page()

            # Navigate to tournament page
            url = f"https://tenup.fft.fr/tournoi/{tournament_id}"
            # # logger.info(f"Navigating to tournament page: {url}")

            try:
                # Navigate with reasonable timeout
                page.goto(url, timeout=30000, wait_until="domcontentloaded")

                # Enhanced Queue-It handling (similar to tournament search)
                if "queue-it.net" in page.url.lower():
                    # logger.warning(f"Queue-It detected for tournament {tournament_id}")

                    # Wait strategy based on environment
                    max_queue_wait = (
                        120000 if not settings.DEBUG else 30000
                    )  # 2 min prod, 30s dev
                    check_interval = 1000  # 10 seconds
                    elapsed_time = 0

                    # # logger.info(
                    #     f"Waiting in queue for umpire data (max {max_queue_wait / 1000}s)..."
                    # )

                    while (
                        elapsed_time < max_queue_wait
                        and "queue-it.net" in page.url.lower()
                    ):
                        # # logger.info(
                        #     f"Umpire queue wait: {elapsed_time / 1000}s elapsed"
                        # )
                        page.wait_for_timeout(check_interval)
                        elapsed_time += check_interval

                        # Check if we've been redirected
                        if "queue-it.net" not in page.url.lower():
                            # # logger.info(
                            #     "Successfully passed through queue for umpire data!"
                            # )
                            break

                    # If still in queue, try fallback or return None
                    if "queue-it.net" in page.url.lower():
                        # logger.warning(
                        #     "Umpire queue timeout - trying requests fallback"
                        # )
                        browser.close()
                        return _get_umpire_data_requests_fallback(tournament_id)

                # Wait for page to load properly
                page.wait_for_load_state("networkidle", timeout=20000)

                # Extract data using multiple strategies
                html_content = page.content()

                # Strategy 1: Try existing regex patterns
                name, email, phone = _extract_umpire_with_regex(html_content)

                # Strategy 2: If regex fails, try DOM selectors
                if not name and not email and not phone:
                    name, email, phone = _extract_umpire_with_selectors(page)

                browser.close()

                if name or email or phone:
                    # # logger.info(
                    #     f"Successfully extracted umpire data: name={name}, email={email}, phone={phone}"
                    # )
                    return name, email, phone
                else:
                    logger.warning(
                        f"No umpire data found for tournament {tournament_id}"
                    )
                    return None, None, None

            except Exception as page_error:
                logger.error(
                    f"Error loading tournament page {tournament_id}: {page_error}"
                )
                browser.close()

                # Try requests fallback
                # logger.info("Trying requests fallback after Playwright error")
                return _get_umpire_data_requests_fallback(tournament_id)

    except Exception as e:
        logger.error(f"Error in umpire data extraction for {tournament_id}: {e}")
        return None, None, None


def _extract_umpire_with_regex(html_content):
    """
    Extract umpire data using regex patterns
    """
    # Extract name
    name_pattern = r'tournoi-detail-page-inscription-responsable-title">\s*([^<]+)\s*<'
    name_match = re.search(name_pattern, html_content)
    name = name_match.group(1).strip() if name_match else None

    # Extract email
    email_pattern = r'mailto:([^"]+)"'
    email_match = re.search(email_pattern, html_content)
    email = email_match.group(1) if email_match else None

    # Extract phone - try multiple patterns
    phone_patterns = [
        r'<div class="details-bloc">\s*(\d{2}\s+\d{2}\s+\d{2}\s+\d{2}\s+\d{2})\s*</div>',
        r"(\d{2}\.\d{2}\.\d{2}\.\d{2}\.\d{2})",
        r"(\d{10})",
        r"(\+33\s?\d{1}\s?\d{2}\s?\d{2}\s?\d{2}\s?\d{2})",
    ]

    phone = None
    for pattern in phone_patterns:
        phone_match = re.search(pattern, html_content)
        if phone_match:
            phone = phone_match.group(1).strip()
            break

    return name, email, phone


def _extract_umpire_with_selectors(page):
    """
    Extract umpire data using DOM selectors as fallback
    """
    name = None
    email = None
    phone = None

    try:
        # Try different selectors for name
        name_selectors = [
            ".tournoi-detail-page-inscription-responsable-title",
            '[class*="responsable-title"]',
            '[class*="umpire-name"]',
            'h3:has-text("Responsable")',
        ]

        for selector in name_selectors:
            try:
                element = page.query_selector(selector)
                if element:
                    name = element.inner_text().strip()
                    if name:
                        break
            except:
                continue

        # Try different selectors for email
        email_selectors = [
            'a[href^="mailto:"]',
            '[class*="email"]',
        ]

        for selector in email_selectors:
            try:
                element = page.query_selector(selector)
                if element:
                    href = element.get_attribute("href")
                    if href and href.startswith("mailto:"):
                        email = href.replace("mailto:", "")
                        break
                    text = element.inner_text().strip()
                    if "@" in text:
                        email = text
                        break
            except:
                continue

        # Try different selectors for phone
        phone_selectors = [
            ".details-bloc",
            '[class*="phone"]',
            '[class*="telephone"]',
        ]

        for selector in phone_selectors:
            try:
                element = page.query_selector(selector)
                if element:
                    text = element.inner_text().strip()
                    # Check if text looks like a phone number

                    if re.match(r"[\d\s\.\+\-\(\)]{8,}", text):
                        phone = text
                        break
            except:
                continue

    except Exception as e:
        logger.warning(f"Error in selector-based extraction: {e}")

    return name, email, phone


def _get_umpire_data_requests_fallback(tournament_id):
    """
    Enhanced fallback method using requests
    """
    # logger.info(f"Using requests fallback for tournament {tournament_id}")

    try:
        headers = {
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Language": "fr-FR,fr;q=0.9,en;q=0.8",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = f"https://tenup.fft.fr/tournoi/{tournament_id}"
        response = requests.get(url, headers=headers, timeout=15)

        if "queue-it.net" in response.url:
            # logger.warning("Requests fallback also hit Queue-It")
            return None, None, None

        if response.status_code != 200:
            # logger.error(f"Requests fallback failed: {response.status_code}")
            return None, None, None

        # Try regex extraction on requests response
        name, email, phone = _extract_umpire_with_regex(response.text)

        # If regex fails, try BeautifulSoup
        if not name and not email and not phone:
            try:
                soup = BeautifulSoup(response.text, "html.parser")

                # Try to find name
                name_elements = soup.find_all(
                    class_=lambda x: x and "responsable" in x.lower()
                )
                if name_elements:
                    name = name_elements[0].get_text().strip()

                # Try to find email
                email_links = soup.find_all(
                    "a", href=lambda x: x and x.startswith("mailto:")
                )
                if email_links:
                    email = email_links[0]["href"].replace("mailto:", "")

                # Try to find phone in various elements
                for element in soup.find_all(text=True):
                    if re.search(
                        r"\d{2}[\s\.]\d{2}[\s\.]\d{2}[\s\.]\d{2}[\s\.]\d{2}",
                        str(element),
                    ):
                        phone = str(element).strip()
                        break

            except Exception as soup_error:
                logger.warning(f"BeautifulSoup parsing failed: {soup_error}")

        # logger.info(
        #     f"Requests fallback result: name={name}, email={email}, phone={phone}"
        # )
        return name, email, phone

    except Exception as e:
        logger.error(f"Requests fallback error: {e}")
        return None, None, None


def _get_umpire_data_requests_fallback(tournament_id):
    """
    Fallback method using requests (may hit Queue-It)
    """
    # logger.info(f"Using requests fallback for tournament {tournament_id}")

    try:
        url = f"https://tenup.fft.fr/tournoi/{tournament_id}"

        headers = {
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
        }

        response = requests.get(url, headers=headers, timeout=30)

        if response.status_code != 200:
            logger.error(f"Failed to fetch tournament page: {response.status_code}")
            return None, None, None

        html_content = response.text

        # Extract using regex (original method)
        name_pattern = (
            r'tournoi-detail-page-inscription-responsable-title">\s*([^<]+)\s*<'
        )
        name_match = re.search(name_pattern, html_content)
        name = name_match.group(1).strip() if name_match else None

        email_pattern = r'mailto:([^"]+)"'
        email_match = re.search(email_pattern, html_content)
        email = email_match.group(1) if email_match else None

        phone_pattern = r'<div class="details-bloc">\s*(\d{2}\s+\d{2}\s+\d{2}\s+\d{2}\s+\d{2})\s*</div>'
        phone_match = re.search(phone_pattern, html_content)
        phone = phone_match.group(1).strip() if phone_match else None

        # logger.info(
        #     f"Extracted umpire data (requests): name={name}, email={email}, phone={phone}"
        # )
        return name, email, phone

    except Exception as e:
        logger.error(f"Error getting umpire data with requests: {e}")
        return None, None, None


def _scrape_single_page(
    sorting_option,
    page,
    start_date,
    end_date,
    city,
    distance,
    categories,
    levels,
    lat,
    lng,
    ages,
    tournament_types,
    national_cup,
):
    """
    Helper function to scrape a single page of tournaments
    """
    return scrape_fft_all_tournaments(
        sorting_option=sorting_option,
        page=page,
        start_date=start_date,
        end_date=end_date,
        city=city,
        distance=distance,
        categories=categories,
        levels=levels,
        lat=lat,
        lng=lng,
        ages=ages,
        tournament_types=tournament_types,
        national_cup=national_cup,
    )


def scrape_fft_all_tournaments_concurrent(
    sorting_option=None,
    start_date=None,
    end_date=None,
    city="",
    distance=15,
    categories=None,
    levels=None,
    lat=None,
    lng=None,
    ages=None,
    tournament_types=None,
    national_cup=False,
    max_workers=10,
):
    """
    Scrapes all remaining pages of FFT tournaments concurrently (pages 1 to end)
    This assumes page 0 was already fetched by the client
    """
    # logger.info(f"Starting concurrent scraping for remaining tournament pages")

    # First, get the first page to determine total results and pages
    first_page_result = scrape_fft_all_tournaments(
        sorting_option=sorting_option,
        page=0,
        start_date=start_date,
        end_date=end_date,
        city=city,
        distance=distance,
        categories=categories,
        levels=levels,
        lat=lat,
        lng=lng,
        ages=ages,
        tournament_types=tournament_types,
        national_cup=national_cup,
    )

    if not first_page_result:
        logger.error("Failed to get first page results for pagination info")
        return None

    total_results = first_page_result.get("total_results", 0)
    first_page_tournaments = first_page_result.get("tournaments", [])
    results_per_page = len(first_page_tournaments)

    # logger.info(f"Total results: {total_results}, Results per page: {results_per_page}")

    if total_results == 0:
        return {
            "tournaments": [],
            "total_results": 0,
            "current_count": 0,
            "pages_scraped": 0,
        }

    # Calculate number of pages needed
    if results_per_page > 0:
        total_pages = (total_results + results_per_page - 1) // results_per_page
    else:
        total_pages = 1

    # logger.info(f"Total pages: {total_pages}")

    # If only one page total, return empty since page 0 was already handled
    if total_pages <= 1:
        return {
            "tournaments": [],
            "total_results": total_results,
            "current_count": 0,
            "pages_scraped": 0,
        }

    # Scrape all remaining pages concurrently (pages 1 to total_pages-1)
    all_tournaments = []
    max_concurrent = min(total_pages, 20)
    with ThreadPoolExecutor(max_workers=max_concurrent) as executor:
        futures = []

        for page in range(1, total_pages):
            future = executor.submit(
                _scrape_single_page,
                sorting_option,
                page,
                start_date,
                end_date,
                city,
                distance,
                categories,
                levels,
                lat,
                lng,
                ages,
                tournament_types,
                national_cup,
            )
            futures.append((page, future))

        # Collect results as they complete
        for page, future in futures:
            try:
                result = future.result(timeout=60)  # 60 second timeout per page
                if result and result.get("tournaments"):
                    tournaments = result.get("tournaments", [])
                    all_tournaments.extend(tournaments)
                    # logger.info(
                    #     f"Page {page} completed: {len(tournaments)} tournaments"
                    # )
                else:
                    logger.warning(f"Page {page} returned no results")
            except Exception as e:
                logger.error(f"Error processing page {page}: {e}")

    # logger.info(
    #     f"Concurrent scraping completed: {len(all_tournaments)} tournaments from {total_pages - 1} remaining pages"
    # )

    return {
        "tournaments": all_tournaments,
        "total_results": total_results,
        "current_count": len(all_tournaments),
        "pages_scraped": total_pages
        - 1,  # Excluding page 0 which was handled separately
    }


def _parse_clubs_ajax_response(json_data):
    """
    Parse the clubs AJAX response to match Swift FederalClubResponse structure
    """
    try:
        # Log the raw response structure to understand what we're getting
        # # logger.info(f"Raw clubs response structure: {json_data}")

        club_markers = []
        total_results = 0

        # Try to extract clubs data from different possible response structures
        if isinstance(json_data, dict):
            # Pattern 1: Direct club_markers array
            if "club_markers" in json_data:
                clubs_data = json_data["club_markers"]
                total_results = json_data.get("nombreResultat", len(clubs_data))

            # Pattern 2: Results wrapper
            elif "results" in json_data:
                results = json_data["results"]
                clubs_data = results.get(
                    "clubs", results.get("items", results.get("club_markers", []))
                )
                total_results = results.get(
                    "nombreResultat",
                    results.get("total", results.get("nb_results", len(clubs_data))),
                )

            # Pattern 3: Direct array in response
            elif "data" in json_data:
                clubs_data = json_data["data"]
                total_results = len(clubs_data)

            # Pattern 4: Response is the clubs array directly
            else:
                clubs_data = json_data if isinstance(json_data, list) else []
                total_results = len(clubs_data)

        elif isinstance(json_data, list):
            clubs_data = json_data
            total_results = len(clubs_data)

        else:
            logger.error(f"Unexpected response format: {type(json_data)}")
            clubs_data = []
            total_results = 0

        # Parse each club to match ClubMarker structure
        for item in clubs_data:
            if isinstance(item, dict):
                # Extract pratiques array
                pratiques = []
                if "pratiques" in item:
                    pratiques = item["pratiques"]
                elif "practices" in item:
                    pratiques = item["practices"]
                else:
                    # Default to PADEL if not specified
                    pratiques = ["PADEL"]

                # Ensure pratiques are uppercase strings
                pratiques = [
                    p.upper() if isinstance(p, str) else str(p).upper()
                    for p in pratiques
                ]

                club_marker = {
                    "nom": item.get("nom", item.get("name", "")),
                    "clubId": str(
                        item.get("clubId", item.get("id", item.get("code", "")))
                    ),
                    "ville": item.get("ville", item.get("city", "")),
                    "distance": str(item.get("distance", "0")),
                    "terrainPratiqueLibelle": item.get(
                        "terrainPratiqueLibelle", item.get("courtsInfo", "")
                    ),
                    "pratiques": pratiques,
                    "lat": float(item.get("lat", item.get("latitude", 0.0))),
                    "lng": float(item.get("lng", item.get("longitude", 0.0))),
                }
                club_markers.append(club_marker)

        # logger.info(
        #     f"Successfully parsed {len(club_markers)} club markers from response"
        # )

        # Return the response in the format expected by Swift FederalClubResponse
        return {
            "typeRecherche": "clubs",
            "nombreResultat": total_results,
            "club_markers": club_markers,
        }

    except Exception as e:
        logger.error(f"Error parsing clubs AJAX response: {e}")
        return {"typeRecherche": "clubs", "nombreResultat": 0, "club_markers": []}


def scrape_federal_clubs(
    country=None, city="", latitude=None, longitude=None, radius=15, max_workers=5
):
    """
    Scrapes FFT federal clubs by extracting data from the HTML response
    """
    # logger.info(f"Starting federal clubs scraping for city: {city}, country: {country}")

    try:
        with sync_playwright() as p:
            browser = get_browser_for_environment(p)
            page_obj = browser.new_page()

            page_obj.set_extra_http_headers(
                {
                    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
                }
            )

            # Clean up city name - remove zip code and extra info
            clean_city = city
            if city:
                clean_city = re.sub(r"[,\s]*\d{5}.*$", "", city).strip()
                clean_city = clean_city.rstrip(",").strip()

            # logger.info(f"Cleaned city name: '{city}' -> '{clean_city}'")

            # Build the results URL directly
            params = f"ville={clean_city}&pratique=PADEL&distance={int(radius)}&country={country or 'fr'}"
            results_url = f"https://tenup.fft.fr/recherche/clubs/resultats?{params}"

            # logger.info(f"Requesting results URL: {results_url}")

            # Navigate to the results page
            page_obj.goto(results_url)

            # Wait for the page to load
            page_obj.wait_for_timeout(2000)

            # Check if we're in queue
            if "queue-it.net" in page_obj.url.lower():
                logger.warning("Hit Queue-It on results page")
                browser.close()
                return {
                    "typeRecherche": "clubs",
                    "nombreResultat": 0,
                    "club_markers": [],
                }

            # Use JavaScript to extract the data directly from the page
            extraction_script = """
            () => {
                try {
                    // Check if Drupal.settings exists and has the data
                    if (typeof Drupal !== 'undefined' &&
                        Drupal.settings &&
                        Drupal.settings.fft_recherche_club) {

                        const data = Drupal.settings.fft_recherche_club;

                        return {
                            success: true,
                            typeRecherche: data.typeRecherche || 'club',
                            total: data.total || 0,
                            resultat: data.resultat || []
                        };
                    }

                    return {
                        success: false,
                        error: 'Drupal.settings.fft_recherche_club not found'
                    };
                } catch (error) {
                    return {
                        success: false,
                        error: error.message
                    };
                }
            }
            """

            result = page_obj.evaluate(extraction_script)

            browser.close()

            if result.get("success"):
                type_recherche = result.get("typeRecherche", "club")
                total = result.get("total", 0)
                resultat = result.get("resultat", [])

                # logger.info(f"Successfully extracted {total} clubs")

                # Convert resultat to club_markers format
                club_markers = []
                for club in resultat:
                    club_markers.append(
                        {
                            "nom": club.get("nom", ""),
                            "clubId": club.get("clubId", ""),
                            "ville": club.get("ville", ""),
                            "distance": club.get("distance", ""),
                            "terrainPratiqueLibelle": club.get(
                                "terrainPratiqueLibelle", ""
                            ),
                            "pratiques": club.get("pratiques", []),
                            "lat": club.get("lat", 0.0),
                            "lng": club.get("lng", 0.0),
                        }
                    )

                return {
                    "typeRecherche": type_recherche,
                    "nombreResultat": total,
                    "club_markers": club_markers,
                }
            else:
                logger.error(f"Failed to extract data: {result.get('error')}")
                return {
                    "typeRecherche": "clubs",
                    "nombreResultat": 0,
                    "club_markers": [],
                }

    except Exception as e:
        logger.error(f"Error in federal clubs scraping: {e}")
        logger.error(f"Traceback: {traceback.format_exc()}")
        return {"typeRecherche": "clubs", "nombreResultat": 0, "club_markers": []}


def get_browser_for_environment(playwright_instance):
    """
    Get appropriate browser based on environment
    - Development (DEBUG=True): Use Firefox (works better on macOS 15+)
    - Production (DEBUG=False): Use Chromium (as it was working)
    """
    is_development = getattr(settings, "DEBUG", False)

    if is_development:
        # # logger.info("Development environment detected - using Firefox")
        try:
            return playwright_instance.firefox.launch(
                headless=True, args=["--no-sandbox"]
            )
        except Exception as firefox_error:
            logger.warning(
                f"Firefox failed in dev, falling back to Chromium: {firefox_error}"
            )
            return playwright_instance.chromium.launch(
                headless=True,
                args=["--no-sandbox", "--disable-dev-shm-usage", "--single-process"],
            )
    else:
        # logger.info("Production environment detected - using Chromium")
        return playwright_instance.chromium.launch(
            headless=True, args=["--no-sandbox", "--disable-dev-shm-usage"]
        )