From 1269b9776531da75bccb713d8ab7e43940f35909 Mon Sep 17 00:00:00 2001
From: Razmig Sarkissian <razmig@padelclub.app>
Date: Thu, 25 Sep 2025 10:37:12 +0200
Subject: [PATCH 1/4] add waiting fft scraping

---
 api/utils.py | 490 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 300 insertions(+), 190 deletions(-)

diff --git a/api/utils.py b/api/utils.py
index 14fe6d2..ad78e5c 100644
--- a/api/utils.py
+++ b/api/utils.py
@@ -9,19 +9,23 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
 
 logger = logging.getLogger(__name__)
 
+
 def check_version_smaller_than_1_1_12(version_str):
     # Remove the parentheses part if it exists, example of version: 1.1.12 (2)
     version_str = version_str.split()[0]
     if version_str:
         # Split version into components
-        version_parts = [int(x) for x in version_str.split('.')]
+        version_parts = [int(x) for x in version_str.split(".")]
         target_parts = [1, 1, 12]
         # Compare version components
         return version_parts < target_parts
     else:
         return False
 
-def scrape_fft_club_tournaments(club_code, club_name, start_date=None, end_date=None, page=0):
+
+def scrape_fft_club_tournaments(
+    club_code, club_name, start_date=None, end_date=None, page=0
+):
     """
     Scrapes FFT tournaments using Playwright with detailed debugging
     """
@@ -31,16 +35,18 @@ def scrape_fft_club_tournaments(club_code, club_name, start_date=None, end_date=
             browser = p.chromium.launch(headless=True)
             page_obj = browser.new_page()
 
-            page_obj.set_extra_http_headers({
-                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
-            })
+            page_obj.set_extra_http_headers(
+                {
+                    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
+                }
+            )
 
             # Navigate to FFT
             target_url = "https://tenup.fft.fr/recherche/tournois"
             logger.info(f"Navigating to: {target_url}")
 
             page_obj.goto(target_url)
-            # page_obj.wait_for_timeout(7000)
+            page_obj.wait_for_timeout(7000)
 
             current_url = page_obj.url
             logger.info(f"Current URL: {current_url}")
@@ -57,7 +63,7 @@ def scrape_fft_club_tournaments(club_code, club_name, start_date=None, end_date=
                 browser.close()
                 return None
 
-            form_build_id = form_input.get_attribute('value')
+            form_build_id = form_input.get_attribute("value")
             logger.info(f"Extracted form_build_id: {form_build_id}")
 
             # Build parameters
@@ -121,8 +127,8 @@ def scrape_fft_club_tournaments(club_code, club_name, start_date=None, end_date=
             logger.info(f"AJAX Response Status: {result.get('status')}")
             logger.info(f"AJAX Response Success: {result.get('success')}")
 
-            if result.get('success'):
-                response_text = result.get('responseText', '')
+            if result.get("success"):
+                response_text = result.get("responseText", "")
                 logger.info(f"Raw Response Length: {len(response_text)}")
                 # logger.info(f"Raw Response (first 500 chars): {response_text[:500]}")
 
@@ -151,7 +157,10 @@ def scrape_fft_club_tournaments(club_code, club_name, start_date=None, end_date=
         logger.error(f"Traceback: {traceback.format_exc()}")
         return None
 
-def scrape_fft_club_tournaments_all_pages(club_code, club_name, start_date=None, end_date=None):
+
+def scrape_fft_club_tournaments_all_pages(
+    club_code, club_name, start_date=None, end_date=None
+):
     """
     Scrapes all pages of FFT tournaments for a specific club
     """
@@ -168,7 +177,7 @@ def scrape_fft_club_tournaments_all_pages(club_code, club_name, start_date=None,
                 club_name=club_name,
                 start_date=start_date,
                 end_date=end_date,
-                page=page
+                page=page,
             )
 
             # Debug: Log what we got
@@ -178,7 +187,7 @@ def scrape_fft_club_tournaments_all_pages(club_code, club_name, start_date=None,
                 logger.warning(f"No result for page {page}")
                 break
 
-            tournaments = result.get('tournaments', [])
+            tournaments = result.get("tournaments", [])
             logger.info(f"Page {page} returned {len(tournaments)} tournaments")
 
             if not tournaments:
@@ -188,8 +197,10 @@ def scrape_fft_club_tournaments_all_pages(club_code, club_name, start_date=None,
             all_tournaments.extend(tournaments)
 
             # Check if we have all results
-            total_results = result.get('total_results', 0)
-            logger.info(f"Total so far: {len(all_tournaments)}, Target: {total_results}")
+            total_results = result.get("total_results", 0)
+            logger.info(
+                f"Total so far: {len(all_tournaments)}, Target: {total_results}"
+            )
 
             if len(all_tournaments) >= total_results:
                 logger.info("Got all tournaments, stopping")
@@ -203,15 +214,18 @@ def scrape_fft_club_tournaments_all_pages(club_code, club_name, start_date=None,
             logger.error(f"Error on page {page}: {e}")
             break
 
-    logger.info(f"Completed scraping: {len(all_tournaments)} tournaments across {page + 1} pages")
+    logger.info(
+        f"Completed scraping: {len(all_tournaments)} tournaments across {page + 1} pages"
+    )
 
     return {
-        'tournaments': all_tournaments,
-        'total_results': len(all_tournaments),
-        'current_count': len(all_tournaments),
-        'pages_scraped': page + 1
+        "tournaments": all_tournaments,
+        "total_results": len(all_tournaments),
+        "current_count": len(all_tournaments),
+        "pages_scraped": page + 1,
     }
 
+
 def _parse_ajax_response(commands):
     """
     Parse the AJAX response commands to extract tournament data
@@ -222,114 +236,133 @@ def _parse_ajax_response(commands):
     try:
         # Check for alert commands (maintenance mode)
         for command in commands:
-            if command.get('command') == 'alert':
+            if command.get("command") == "alert":
                 logger.warning("Maintenance mode detected")
                 return None
 
         # Find the command with results
         result_command = None
         for command in commands:
-            if command.get('command') == 'recherche_tournois_update':
+            if command.get("command") == "recherche_tournois_update":
                 result_command = command
                 logger.info("Found recherche_tournois_update command!")
                 break
 
-        if result_command and result_command.get('results'):
-            results = result_command['results']
-            items = results.get('items', [])
-            total_results = results.get('nb_results', 0)
+        if result_command and result_command.get("results"):
+            results = result_command["results"]
+            items = results.get("items", [])
+            total_results = results.get("nb_results", 0)
 
             logger.info(f"Processing {len(items)} tournaments from results")
 
             for item in items:
                 # Parse dates - they're already in the correct format
-                date_debut = item.get('dateDebut')
-                date_fin = item.get('dateFin')
-                date_validation = item.get('dateValidation')
+                date_debut = item.get("dateDebut")
+                date_fin = item.get("dateFin")
+                date_validation = item.get("dateValidation")
 
                 # Build the tournament object to match Swift FederalTournament structure
                 tournament = {
-                    "id": str(item.get('id', '')),
-                    "millesime": item.get('millesime'),
-                    "libelle": item.get('libelle'),
-                    "tmc": item.get('tmc'),
-                    "tarifAdulteChampionnat": item.get('tarifAdulteChampionnat'),
-                    "type": item.get('type'),
-                    "ageReel": item.get('ageReel'),
-                    "naturesTerrains": item.get('naturesTerrains', []),
-                    "idsArbitres": item.get('idsArbitres', []),
-                    "tarifJeuneChampionnat": item.get('tarifJeuneChampionnat'),
-                    "international": item.get('international'),
-                    "inscriptionEnLigne": item.get('inscriptionEnLigne'),
-                    "categorieTournoi": item.get('categorieTournoi'),
-                    "prixLot": item.get('prixLot'),
-                    "paiementEnLigne": item.get('paiementEnLigne'),
-                    "reductionAdherentJeune": item.get('reductionAdherentJeune'),
-                    "reductionAdherentAdulte": item.get('reductionAdherentAdulte'),
-                    "paiementEnLigneObligatoire": item.get('paiementEnLigneObligatoire'),
-                    "villeEngagement": item.get('villeEngagement'),
-                    "senior": item.get('senior'),
-                    "veteran": item.get('veteran'),
-                    "inscriptionEnLigneEnCours": item.get('inscriptionEnLigneEnCours'),
-                    "avecResultatPublie": item.get('avecResultatPublie'),
-                    "code": item.get('code'),
-                    "categorieAge": item.get('categorieAge'),
-                    "codeComite": item.get('codeComite'),
-                    "installations": item.get('installations', []),
-                    "reductionEpreuveSupplementaireJeune": item.get('reductionEpreuveSupplementaireJeune'),
-                    "reductionEpreuveSupplementaireAdulte": item.get('reductionEpreuveSupplementaireAdulte'),
-                    "nomComite": item.get('nomComite'),
-                    "naturesEpreuves": item.get('naturesEpreuves'),
-                    "jeune": item.get('jeune'),
-                    "courrielEngagement": item.get('courrielEngagement'),
-                    "nomClub": item.get('nomClub'),
-                    "installation": item.get('installation'),
-                    "categorieAgeMax": item.get('categorieAgeMax'),
-                    "tournoiInterne": item.get('tournoiInterne'),
-                    "nomLigue": item.get('nomLigue'),
-                    "nomEngagement": item.get('nomEngagement'),
-                    "codeLigue": item.get('codeLigue'),
-                    "modeleDeBalle": item.get('modeleDeBalle'),
-                    "jugeArbitre": item.get('jugeArbitre'),
-                    "adresse2Engagement": item.get('adresse2Engagement'),
-                    "epreuves": item.get('epreuves'),
+                    "id": str(item.get("id", "")),
+                    "millesime": item.get("millesime"),
+                    "libelle": item.get("libelle"),
+                    "tmc": item.get("tmc"),
+                    "tarifAdulteChampionnat": item.get("tarifAdulteChampionnat"),
+                    "type": item.get("type"),
+                    "ageReel": item.get("ageReel"),
+                    "naturesTerrains": item.get("naturesTerrains", []),
+                    "idsArbitres": item.get("idsArbitres", []),
+                    "tarifJeuneChampionnat": item.get("tarifJeuneChampionnat"),
+                    "international": item.get("international"),
+                    "inscriptionEnLigne": item.get("inscriptionEnLigne"),
+                    "categorieTournoi": item.get("categorieTournoi"),
+                    "prixLot": item.get("prixLot"),
+                    "paiementEnLigne": item.get("paiementEnLigne"),
+                    "reductionAdherentJeune": item.get("reductionAdherentJeune"),
+                    "reductionAdherentAdulte": item.get("reductionAdherentAdulte"),
+                    "paiementEnLigneObligatoire": item.get(
+                        "paiementEnLigneObligatoire"
+                    ),
+                    "villeEngagement": item.get("villeEngagement"),
+                    "senior": item.get("senior"),
+                    "veteran": item.get("veteran"),
+                    "inscriptionEnLigneEnCours": item.get("inscriptionEnLigneEnCours"),
+                    "avecResultatPublie": item.get("avecResultatPublie"),
+                    "code": item.get("code"),
+                    "categorieAge": item.get("categorieAge"),
+                    "codeComite": item.get("codeComite"),
+                    "installations": item.get("installations", []),
+                    "reductionEpreuveSupplementaireJeune": item.get(
+                        "reductionEpreuveSupplementaireJeune"
+                    ),
+                    "reductionEpreuveSupplementaireAdulte": item.get(
+                        "reductionEpreuveSupplementaireAdulte"
+                    ),
+                    "nomComite": item.get("nomComite"),
+                    "naturesEpreuves": item.get("naturesEpreuves"),
+                    "jeune": item.get("jeune"),
+                    "courrielEngagement": item.get("courrielEngagement"),
+                    "nomClub": item.get("nomClub"),
+                    "installation": item.get("installation"),
+                    "categorieAgeMax": item.get("categorieAgeMax"),
+                    "tournoiInterne": item.get("tournoiInterne"),
+                    "nomLigue": item.get("nomLigue"),
+                    "nomEngagement": item.get("nomEngagement"),
+                    "codeLigue": item.get("codeLigue"),
+                    "modeleDeBalle": item.get("modeleDeBalle"),
+                    "jugeArbitre": item.get("jugeArbitre"),
+                    "adresse2Engagement": item.get("adresse2Engagement"),
+                    "epreuves": item.get("epreuves"),
                     "dateDebut": date_debut,
-                    "serie": item.get('serie'),
+                    "serie": item.get("serie"),
                     "dateFin": date_fin,
                     "dateValidation": date_validation,
-                    "codePostalEngagement": item.get('codePostalEngagement'),
-                    "codeClub": item.get('codeClub'),
-                    "prixEspece": item.get('prixEspece'),
+                    "codePostalEngagement": item.get("codePostalEngagement"),
+                    "codeClub": item.get("codeClub"),
+                    "prixEspece": item.get("prixEspece"),
                     "japPhoneNumber": None,  # Will be populated by separate umpire call
-
                     # Additional fields from the response
-                    "adresse1Engagement": item.get('adresse1Engagement'),
-                    "originalId": item.get('originalId'),
-                    "familleTournoi": item.get('familleTournoi', []),
-                    "isTournoi": item.get('isTournoi'),
-                    "natureWithCatAge": item.get('natureWithCatAge')
+                    "adresse1Engagement": item.get("adresse1Engagement"),
+                    "originalId": item.get("originalId"),
+                    "familleTournoi": item.get("familleTournoi", []),
+                    "isTournoi": item.get("isTournoi"),
+                    "natureWithCatAge": item.get("natureWithCatAge"),
                 }
 
                 tournaments.append(tournament)
 
-            logger.info(f"Successfully parsed {len(tournaments)} tournaments from response")
+            logger.info(
+                f"Successfully parsed {len(tournaments)} tournaments from response"
+            )
             return {
-                'tournaments': tournaments,
-                'total_results': total_results,
-                'current_count': len(tournaments)
+                "tournaments": tournaments,
+                "total_results": total_results,
+                "current_count": len(tournaments),
             }
         else:
             logger.error("No recherche_tournois_update command found in AJAX response")
-            return {'tournaments': [], 'total_results': 0, 'current_count': 0}
+            return {"tournaments": [], "total_results": 0, "current_count": 0}
 
     except Exception as e:
         logger.error(f"Error parsing AJAX response: {e}")
         return None
 
-def scrape_fft_all_tournaments(sorting_option=None, page=0, start_date=None, end_date=None,
-                              city='', distance=15, categories=None, levels=None,
-                              lat=None, lng=None, ages=None, tournament_types=None,
-                              national_cup=False):
+
+def scrape_fft_all_tournaments(
+    sorting_option=None,
+    page=0,
+    start_date=None,
+    end_date=None,
+    city="",
+    distance=15,
+    categories=None,
+    levels=None,
+    lat=None,
+    lng=None,
+    ages=None,
+    tournament_types=None,
+    national_cup=False,
+):
     """
     Scrapes FFT tournaments using Playwright with detailed debugging
     Based exactly on the working scrape_fft_club_tournaments function
@@ -341,16 +374,18 @@ def scrape_fft_all_tournaments(sorting_option=None, page=0, start_date=None, end
             browser = p.chromium.launch(headless=True)
             page_obj = browser.new_page()
 
-            page_obj.set_extra_http_headers({
-                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
-            })
+            page_obj.set_extra_http_headers(
+                {
+                    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
+                }
+            )
 
             # Navigate to FFT
             target_url = "https://tenup.fft.fr/recherche/tournois"
             logger.info(f"Navigating to: {target_url}")
 
             page_obj.goto(target_url)
-            # page_obj.wait_for_timeout(7000)
+            page_obj.wait_for_timeout(7000)
 
             current_url = page_obj.url
             logger.info(f"Current URL: {current_url}")
@@ -367,7 +402,7 @@ def scrape_fft_all_tournaments(sorting_option=None, page=0, start_date=None, end
                 browser.close()
                 return None
 
-            form_build_id = form_input.get_attribute('value')
+            form_build_id = form_input.get_attribute("value")
             logger.info(f"Extracted form_build_id: {form_build_id}")
 
             # Build parameters - EXACT same pattern as club function
@@ -486,8 +521,8 @@ def scrape_fft_all_tournaments(sorting_option=None, page=0, start_date=None, end
             logger.info(f"AJAX Response Status: {result.get('status')}")
             logger.info(f"AJAX Response Success: {result.get('success')}")
 
-            if result.get('success'):
-                response_text = result.get('responseText', '')
+            if result.get("success"):
+                response_text = result.get("responseText", "")
                 logger.info(f"Raw Response Length: {len(response_text)}")
 
                 try:
@@ -525,13 +560,13 @@ def get_umpire_data(tournament_id):
             browser = p.chromium.launch(
                 headless=True,
                 args=[
-                    '--no-sandbox',
-                    '--disable-dev-shm-usage',
-                    '--disable-images',  # Don't load images
-                    '--disable-javascript',  # Disable JS for faster loading
-                    '--disable-plugins',
-                    '--disable-extensions'
-                ]
+                    "--no-sandbox",
+                    "--disable-dev-shm-usage",
+                    "--disable-images",  # Don't load images
+                    "--disable-javascript",  # Disable JS for faster loading
+                    "--disable-plugins",
+                    "--disable-extensions",
+                ],
             )
             page = browser.new_page()
 
@@ -553,7 +588,9 @@ def get_umpire_data(tournament_id):
                 html_content = page.content()
 
                 # Extract name
-                name_pattern = r'tournoi-detail-page-inscription-responsable-title">\s*([^<]+)\s*<'
+                name_pattern = (
+                    r'tournoi-detail-page-inscription-responsable-title">\s*([^<]+)\s*<'
+                )
                 name_match = re.search(name_pattern, html_content)
                 name = name_match.group(1).strip() if name_match else None
 
@@ -569,7 +606,9 @@ def get_umpire_data(tournament_id):
 
                 browser.close()
 
-                logger.info(f"Extracted umpire data: name={name}, email={email}, phone={phone}")
+                logger.info(
+                    f"Extracted umpire data: name={name}, email={email}, phone={phone}"
+                )
                 return name, email, phone
 
             except Exception as page_error:
@@ -581,6 +620,7 @@ def get_umpire_data(tournament_id):
         logger.error(f"Error in umpire data extraction: {e}")
         return None, None, None
 
+
 def _get_umpire_data_requests_fallback(tournament_id):
     """
     Fallback method using requests (may hit Queue-It)
@@ -591,7 +631,7 @@ def _get_umpire_data_requests_fallback(tournament_id):
         url = f"https://tenup.fft.fr/tournoi/{tournament_id}"
 
         headers = {
-            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15'
+            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
         }
 
         response = requests.get(url, headers=headers, timeout=30)
@@ -603,7 +643,9 @@ def _get_umpire_data_requests_fallback(tournament_id):
         html_content = response.text
 
         # Extract using regex (original method)
-        name_pattern = r'tournoi-detail-page-inscription-responsable-title">\s*([^<]+)\s*<'
+        name_pattern = (
+            r'tournoi-detail-page-inscription-responsable-title">\s*([^<]+)\s*<'
+        )
         name_match = re.search(name_pattern, html_content)
         name = name_match.group(1).strip() if name_match else None
 
@@ -615,15 +657,31 @@ def _get_umpire_data_requests_fallback(tournament_id):
         phone_match = re.search(phone_pattern, html_content)
         phone = phone_match.group(1).strip() if phone_match else None
 
-        logger.info(f"Extracted umpire data (requests): name={name}, email={email}, phone={phone}")
+        logger.info(
+            f"Extracted umpire data (requests): name={name}, email={email}, phone={phone}"
+        )
         return name, email, phone
 
     except Exception as e:
         logger.error(f"Error getting umpire data with requests: {e}")
         return None, None, None
 
-def _scrape_single_page(sorting_option, page, start_date, end_date, city, distance,
-                       categories, levels, lat, lng, ages, tournament_types, national_cup):
+
+def _scrape_single_page(
+    sorting_option,
+    page,
+    start_date,
+    end_date,
+    city,
+    distance,
+    categories,
+    levels,
+    lat,
+    lng,
+    ages,
+    tournament_types,
+    national_cup,
+):
     """
     Helper function to scrape a single page of tournaments
     """
@@ -640,14 +698,25 @@ def _scrape_single_page(sorting_option, page, start_date, end_date, city, distan
         lng=lng,
         ages=ages,
         tournament_types=tournament_types,
-        national_cup=national_cup
+        national_cup=national_cup,
     )
 
 
-def scrape_fft_all_tournaments_concurrent(sorting_option=None, start_date=None, end_date=None,
-                                        city='', distance=15, categories=None, levels=None,
-                                        lat=None, lng=None, ages=None, tournament_types=None,
-                                        national_cup=False, max_workers=5):
+def scrape_fft_all_tournaments_concurrent(
+    sorting_option=None,
+    start_date=None,
+    end_date=None,
+    city="",
+    distance=15,
+    categories=None,
+    levels=None,
+    lat=None,
+    lng=None,
+    ages=None,
+    tournament_types=None,
+    national_cup=False,
+    max_workers=5,
+):
     """
     Scrapes all remaining pages of FFT tournaments concurrently (pages 1 to end)
     This assumes page 0 was already fetched by the client
@@ -668,21 +737,26 @@ def scrape_fft_all_tournaments_concurrent(sorting_option=None, start_date=None,
         lng=lng,
         ages=ages,
         tournament_types=tournament_types,
-        national_cup=national_cup
+        national_cup=national_cup,
     )
 
     if not first_page_result:
         logger.error("Failed to get first page results for pagination info")
         return None
 
-    total_results = first_page_result.get('total_results', 0)
-    first_page_tournaments = first_page_result.get('tournaments', [])
+    total_results = first_page_result.get("total_results", 0)
+    first_page_tournaments = first_page_result.get("tournaments", [])
     results_per_page = len(first_page_tournaments)
 
     logger.info(f"Total results: {total_results}, Results per page: {results_per_page}")
 
     if total_results == 0:
-        return {'tournaments': [], 'total_results': 0, 'current_count': 0, 'pages_scraped': 0}
+        return {
+            "tournaments": [],
+            "total_results": 0,
+            "current_count": 0,
+            "pages_scraped": 0,
+        }
 
     # Calculate number of pages needed
     if results_per_page > 0:
@@ -694,7 +768,12 @@ def scrape_fft_all_tournaments_concurrent(sorting_option=None, start_date=None,
 
     # If only one page total, return empty since page 0 was already handled
     if total_pages <= 1:
-        return {'tournaments': [], 'total_results': total_results, 'current_count': 0, 'pages_scraped': 0}
+        return {
+            "tournaments": [],
+            "total_results": total_results,
+            "current_count": 0,
+            "pages_scraped": 0,
+        }
 
     # Scrape all remaining pages concurrently (pages 1 to total_pages-1)
     all_tournaments = []
@@ -705,8 +784,19 @@ def scrape_fft_all_tournaments_concurrent(sorting_option=None, start_date=None,
         for page in range(1, total_pages):
             future = executor.submit(
                 _scrape_single_page,
-                sorting_option, page, start_date, end_date, city, distance,
-                categories, levels, lat, lng, ages, tournament_types, national_cup
+                sorting_option,
+                page,
+                start_date,
+                end_date,
+                city,
+                distance,
+                categories,
+                levels,
+                lat,
+                lng,
+                ages,
+                tournament_types,
+                national_cup,
             )
             futures.append((page, future))
 
@@ -714,24 +804,30 @@ def scrape_fft_all_tournaments_concurrent(sorting_option=None, start_date=None,
         for page, future in futures:
             try:
                 result = future.result(timeout=60)  # 60 second timeout per page
-                if result and result.get('tournaments'):
-                    tournaments = result.get('tournaments', [])
+                if result and result.get("tournaments"):
+                    tournaments = result.get("tournaments", [])
                     all_tournaments.extend(tournaments)
-                    logger.info(f"Page {page} completed: {len(tournaments)} tournaments")
+                    logger.info(
+                        f"Page {page} completed: {len(tournaments)} tournaments"
+                    )
                 else:
                     logger.warning(f"Page {page} returned no results")
             except Exception as e:
                 logger.error(f"Error processing page {page}: {e}")
 
-    logger.info(f"Concurrent scraping completed: {len(all_tournaments)} tournaments from {total_pages-1} remaining pages")
+    logger.info(
+        f"Concurrent scraping completed: {len(all_tournaments)} tournaments from {total_pages - 1} remaining pages"
+    )
 
     return {
-        'tournaments': all_tournaments,
-        'total_results': total_results,
-        'current_count': len(all_tournaments),
-        'pages_scraped': total_pages - 1  # Excluding page 0 which was handled separately
+        "tournaments": all_tournaments,
+        "total_results": total_results,
+        "current_count": len(all_tournaments),
+        "pages_scraped": total_pages
+        - 1,  # Excluding page 0 which was handled separately
     }
 
+
 def _parse_clubs_ajax_response(json_data):
     """
     Parse the clubs AJAX response to match Swift FederalClubResponse structure
@@ -746,19 +842,24 @@ def _parse_clubs_ajax_response(json_data):
         # Try to extract clubs data from different possible response structures
         if isinstance(json_data, dict):
             # Pattern 1: Direct club_markers array
-            if 'club_markers' in json_data:
-                clubs_data = json_data['club_markers']
-                total_results = json_data.get('nombreResultat', len(clubs_data))
+            if "club_markers" in json_data:
+                clubs_data = json_data["club_markers"]
+                total_results = json_data.get("nombreResultat", len(clubs_data))
 
             # Pattern 2: Results wrapper
-            elif 'results' in json_data:
-                results = json_data['results']
-                clubs_data = results.get('clubs', results.get('items', results.get('club_markers', [])))
-                total_results = results.get('nombreResultat', results.get('total', results.get('nb_results', len(clubs_data))))
+            elif "results" in json_data:
+                results = json_data["results"]
+                clubs_data = results.get(
+                    "clubs", results.get("items", results.get("club_markers", []))
+                )
+                total_results = results.get(
+                    "nombreResultat",
+                    results.get("total", results.get("nb_results", len(clubs_data))),
+                )
 
             # Pattern 3: Direct array in response
-            elif 'data' in json_data:
-                clubs_data = json_data['data']
+            elif "data" in json_data:
+                clubs_data = json_data["data"]
                 total_results = len(clubs_data)
 
             # Pattern 4: Response is the clubs array directly
@@ -780,49 +881,55 @@ def _parse_clubs_ajax_response(json_data):
             if isinstance(item, dict):
                 # Extract pratiques array
                 pratiques = []
-                if 'pratiques' in item:
-                    pratiques = item['pratiques']
-                elif 'practices' in item:
-                    pratiques = item['practices']
+                if "pratiques" in item:
+                    pratiques = item["pratiques"]
+                elif "practices" in item:
+                    pratiques = item["practices"]
                 else:
                     # Default to PADEL if not specified
                     pratiques = ["PADEL"]
 
                 # Ensure pratiques are uppercase strings
-                pratiques = [p.upper() if isinstance(p, str) else str(p).upper() for p in pratiques]
+                pratiques = [
+                    p.upper() if isinstance(p, str) else str(p).upper()
+                    for p in pratiques
+                ]
 
                 club_marker = {
-                    "nom": item.get('nom', item.get('name', '')),
-                    "clubId": str(item.get('clubId', item.get('id', item.get('code', '')))),
-                    "ville": item.get('ville', item.get('city', '')),
-                    "distance": str(item.get('distance', '0')),
-                    "terrainPratiqueLibelle": item.get('terrainPratiqueLibelle', item.get('courtsInfo', '')),
+                    "nom": item.get("nom", item.get("name", "")),
+                    "clubId": str(
+                        item.get("clubId", item.get("id", item.get("code", "")))
+                    ),
+                    "ville": item.get("ville", item.get("city", "")),
+                    "distance": str(item.get("distance", "0")),
+                    "terrainPratiqueLibelle": item.get(
+                        "terrainPratiqueLibelle", item.get("courtsInfo", "")
+                    ),
                     "pratiques": pratiques,
-                    "lat": float(item.get('lat', item.get('latitude', 0.0))),
-                    "lng": float(item.get('lng', item.get('longitude', 0.0)))
+                    "lat": float(item.get("lat", item.get("latitude", 0.0))),
+                    "lng": float(item.get("lng", item.get("longitude", 0.0))),
                 }
                 club_markers.append(club_marker)
 
-        logger.info(f"Successfully parsed {len(club_markers)} club markers from response")
+        logger.info(
+            f"Successfully parsed {len(club_markers)} club markers from response"
+        )
 
         # Return the response in the format expected by Swift FederalClubResponse
         return {
             "typeRecherche": "clubs",
             "nombreResultat": total_results,
-            "club_markers": club_markers
+            "club_markers": club_markers,
         }
 
     except Exception as e:
         logger.error(f"Error parsing clubs AJAX response: {e}")
-        return {
-            "typeRecherche": "clubs",
-            "nombreResultat": 0,
-            "club_markers": []
-        }
+        return {"typeRecherche": "clubs", "nombreResultat": 0, "club_markers": []}
 
 
-def scrape_federal_clubs(country=None, city='', latitude=None, longitude=None,
-                        radius=15, max_workers=5):
+def scrape_federal_clubs(
+    country=None, city="", latitude=None, longitude=None, radius=15, max_workers=5
+):
     """
     Scrapes FFT federal clubs by extracting data from the HTML response
     """
@@ -833,16 +940,19 @@ def scrape_federal_clubs(country=None, city='', latitude=None, longitude=None,
             browser = p.chromium.launch(headless=True)
             page_obj = browser.new_page()
 
-            page_obj.set_extra_http_headers({
-                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
-            })
+            page_obj.set_extra_http_headers(
+                {
+                    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
+                }
+            )
 
             # Clean up city name - remove zip code and extra info
             clean_city = city
             if city:
                 import re
-                clean_city = re.sub(r'[,\s]*\d{5}.*$', '', city).strip()
-                clean_city = clean_city.rstrip(',').strip()
+
+                clean_city = re.sub(r"[,\s]*\d{5}.*$", "", city).strip()
+                clean_city = clean_city.rstrip(",").strip()
 
             logger.info(f"Cleaned city name: '{city}' -> '{clean_city}'")
 
@@ -856,7 +966,7 @@ def scrape_federal_clubs(country=None, city='', latitude=None, longitude=None,
             page_obj.goto(results_url)
 
             # Wait for the page to load
-            page_obj.wait_for_timeout(3000)
+            page_obj.wait_for_timeout(7000)
 
             # Check if we're in queue
             if "queue-it.net" in page_obj.url.lower():
@@ -865,7 +975,7 @@ def scrape_federal_clubs(country=None, city='', latitude=None, longitude=None,
                 return {
                     "typeRecherche": "clubs",
                     "nombreResultat": 0,
-                    "club_markers": []
+                    "club_markers": [],
                 }
 
             # Use JavaScript to extract the data directly from the page
@@ -904,45 +1014,45 @@ def scrape_federal_clubs(country=None, city='', latitude=None, longitude=None,
 
             browser.close()
 
-            if result.get('success'):
-                type_recherche = result.get('typeRecherche', 'club')
-                total = result.get('total', 0)
-                resultat = result.get('resultat', [])
+            if result.get("success"):
+                type_recherche = result.get("typeRecherche", "club")
+                total = result.get("total", 0)
+                resultat = result.get("resultat", [])
 
                 logger.info(f"Successfully extracted {total} clubs")
 
                 # Convert resultat to club_markers format
                 club_markers = []
                 for club in resultat:
-                    club_markers.append({
-                        "nom": club.get('nom', ''),
-                        "clubId": club.get('clubId', ''),
-                        "ville": club.get('ville', ''),
-                        "distance": club.get('distance', ''),
-                        "terrainPratiqueLibelle": club.get('terrainPratiqueLibelle', ''),
-                        "pratiques": club.get('pratiques', []),
-                        "lat": club.get('lat', 0.0),
-                        "lng": club.get('lng', 0.0)
-                    })
+                    club_markers.append(
+                        {
+                            "nom": club.get("nom", ""),
+                            "clubId": club.get("clubId", ""),
+                            "ville": club.get("ville", ""),
+                            "distance": club.get("distance", ""),
+                            "terrainPratiqueLibelle": club.get(
+                                "terrainPratiqueLibelle", ""
+                            ),
+                            "pratiques": club.get("pratiques", []),
+                            "lat": club.get("lat", 0.0),
+                            "lng": club.get("lng", 0.0),
+                        }
+                    )
 
                 return {
                     "typeRecherche": type_recherche,
                     "nombreResultat": total,
-                    "club_markers": club_markers
+                    "club_markers": club_markers,
                 }
             else:
                 logger.error(f"Failed to extract data: {result.get('error')}")
                 return {
                     "typeRecherche": "clubs",
                     "nombreResultat": 0,
-                    "club_markers": []
+                    "club_markers": [],
                 }
 
     except Exception as e:
         logger.error(f"Error in federal clubs scraping: {e}")
         logger.error(f"Traceback: {traceback.format_exc()}")
-        return {
-            "typeRecherche": "clubs",
-            "nombreResultat": 0,
-            "club_markers": []
-        }
+        return {"typeRecherche": "clubs", "nombreResultat": 0, "club_markers": []}

From 7d997fdb7d865c233adb9bc153f940ffbf46449a Mon Sep 17 00:00:00 2001
From: Razmig Sarkissian <razmig@padelclub.app>
Date: Thu, 25 Sep 2025 10:42:09 +0200
Subject: [PATCH 2/4] Remove hardcoded page wait timeouts

---
 api/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/api/utils.py b/api/utils.py
index ad78e5c..50ae798 100644
--- a/api/utils.py
+++ b/api/utils.py
@@ -46,7 +46,7 @@ def scrape_fft_club_tournaments(
             logger.info(f"Navigating to: {target_url}")
 
             page_obj.goto(target_url)
-            page_obj.wait_for_timeout(7000)
+            # page_obj.wait_for_timeout(7000)
 
             current_url = page_obj.url
             logger.info(f"Current URL: {current_url}")
@@ -385,7 +385,7 @@ def scrape_fft_all_tournaments(
             logger.info(f"Navigating to: {target_url}")
 
             page_obj.goto(target_url)
-            page_obj.wait_for_timeout(7000)
+            # page_obj.wait_for_timeout(7000)
 
             current_url = page_obj.url
             logger.info(f"Current URL: {current_url}")
@@ -966,7 +966,7 @@ def scrape_federal_clubs(
             page_obj.goto(results_url)
 
             # Wait for the page to load
-            page_obj.wait_for_timeout(7000)
+            page_obj.wait_for_timeout(3000)
 
             # Check if we're in queue
             if "queue-it.net" in page_obj.url.lower():

From 34d8fac0d53b934e0bcd6c62df8fef2c7809845f Mon Sep 17 00:00:00 2001
From: Razmig Sarkissian <razmig@padelclub.app>
Date: Thu, 25 Sep 2025 10:49:25 +0200
Subject: [PATCH 3/4] Refactor FFT tournament scraping with Queue-It fallback

---
 api/utils.py | 417 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 242 insertions(+), 175 deletions(-)

diff --git a/api/utils.py b/api/utils.py
index 50ae798..8fcc960 100644
--- a/api/utils.py
+++ b/api/utils.py
@@ -364,189 +364,77 @@ def scrape_fft_all_tournaments(
     national_cup=False,
 ):
     """
-    Scrapes FFT tournaments using Playwright with detailed debugging
-    Based exactly on the working scrape_fft_club_tournaments function
+    Scrapes FFT tournaments with Queue-It fallback to club-based approach
     """
-    logger.info(f"Starting Playwright scraping for city: {city}")
+    logger.info(f"Starting tournament scraping for city: {city}")
 
+    # First try the original direct approach
     try:
-        with sync_playwright() as p:
-            browser = p.chromium.launch(headless=True)
-            page_obj = browser.new_page()
-
-            page_obj.set_extra_http_headers(
-                {
-                    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
-                }
-            )
-
-            # Navigate to FFT
-            target_url = "https://tenup.fft.fr/recherche/tournois"
-            logger.info(f"Navigating to: {target_url}")
-
-            page_obj.goto(target_url)
-            # page_obj.wait_for_timeout(7000)
-
-            current_url = page_obj.url
-            logger.info(f"Current URL: {current_url}")
-
-            if "queue-it.net" in current_url.lower():
-                logger.warning("Still in Queue-It")
-                browser.close()
-                return None
-
-            # Extract form_build_id
-            form_input = page_obj.query_selector('input[name="form_build_id"]')
-            if not form_input:
-                logger.error("Could not find form_build_id")
-                browser.close()
-                return None
-
-            form_build_id = form_input.get_attribute("value")
-            logger.info(f"Extracted form_build_id: {form_build_id}")
-
-            # Build parameters - EXACT same pattern as club function
-            date_component = ""
-            if start_date and end_date:
-                date_component = f"&date[start]={start_date}&date[end]={end_date}"
-            elif start_date:
-                try:
-                    start_dt = datetime.strptime(start_date, "%d/%m/%y")
-                    end_dt = start_dt + timedelta(days=90)
-                    date_component = f"&date[start]={start_date}&date[end]={end_dt.strftime('%d/%m/%y')}"
-                except ValueError:
-                    logger.warning(f"Invalid date format: {start_date}")
-
-            # Build filter parameters
-            filter_params = ""
-
-            # Add categories filter
-            if categories:
-                logger.info(f"Adding categories filter: {categories}")
-                for category in categories:
-                    filter_params += f"&epreuve[{category}]={category}"
-
-            # Add levels filter
-            if levels:
-                logger.info(f"Adding levels filter: {levels}")
-                for level in levels:
-                    filter_params += f"&categorie_tournoi[{level}]={level}"
-
-            # Add ages filter
-            if ages:
-                logger.info(f"Adding ages filter: {ages}")
-                for age in ages:
-                    filter_params += f"&categorie_age[{age}]={age}"
-
-            # Add types filter
-            if tournament_types:
-                logger.info(f"Adding types filter: {tournament_types}")
-                for t_type in tournament_types:
-                    capitalized_type = t_type.capitalize()
-                    filter_params += f"&type[{capitalized_type}]={capitalized_type}"
-
-            # Add national cup filter
-            if national_cup:
-                logger.info("Adding national cup filter")
-                filter_params += "&tournoi_npc=1"
-
-            # Fix the sorting parameter
-            if sorting_option:
-                sort_param = f"&sort={sorting_option}"
-            else:
-                sort_param = "&sort=dateDebut+asc"
-
-            # Build city parameters with distance and location
-            if city and city.strip():
-                city_name_encoded = city.strip().replace(" ", "+")
-
-                # Start with the working base parameters
-                base_params = f"recherche_type=ville&ville[autocomplete][value_container][value_field]={city_name_encoded}&ville[autocomplete][value_container][label_field]={city_name_encoded}"
-
-                # Add distance parameter
-                distance_param = f"&ville[distance][value_field]={int(distance)}"
-
-                # Add lat/lng if provided
-                location_params = ""
-                if lat and lng:
-                    location_params = f"&ville[autocomplete][value_container][lat_field]={lat}&ville[autocomplete][value_container][lng_field]={lng}"
-
-                # Combine all parameters including filters
-                params = f"{base_params}{location_params}{distance_param}&pratique=PADEL{date_component}&page={page}{sort_param}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page"
-            else:
-                # Default to ligue search if no city provided
-                params = f"recherche_type=ligue&pratique=PADEL{date_component}{filter_params}&page={page}&sort={sorting_option or 'dateDebut+asc'}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page"
-
-            logger.info(f"AJAX Parameters: {params}")
-
-            # Make AJAX request and capture the full response - EXACT same as club function
-            ajax_script = f"""
-            async () => {{
-                try {{
-                    const response = await fetch('https://tenup.fft.fr/system/ajax', {{
-                        method: 'POST',
-                        headers: {{
-                            'Accept': 'application/json, text/javascript, */*; q=0.01',
-                            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
-                            'X-Requested-With': 'XMLHttpRequest',
-                            'Origin': 'https://tenup.fft.fr',
-                            'Referer': 'https://tenup.fft.fr/recherche/tournois'
-                        }},
-                        body: `{params}`
-                    }});
-
-                    const status = response.status;
-                    const responseText = await response.text();
-
-                    return {{
-                        success: response.ok,
-                        status: status,
-                        responseText: responseText
-                    }};
-                }} catch (error) {{
-                    return {{
-                        success: false,
-                        error: error.message
-                    }};
-                }}
-            }}
-            """
-
-            logger.info("Making AJAX request...")
-            result = page_obj.evaluate(ajax_script)
-
-            browser.close()
-
-            # Print the full response for debugging - EXACT same as club function
-            logger.info(f"AJAX Response Status: {result.get('status')}")
-            logger.info(f"AJAX Response Success: {result.get('success')}")
-
-            if result.get("success"):
-                response_text = result.get("responseText", "")
-                logger.info(f"Raw Response Length: {len(response_text)}")
+        logger.info("Attempting direct location-based search...")
+        result = scrape_fft_all_tournaments_original(
+            sorting_option=sorting_option,
+            page=page,
+            start_date=start_date,
+            end_date=end_date,
+            city=city,
+            distance=distance,
+            categories=categories,
+            levels=levels,
+            lat=lat,
+            lng=lng,
+            ages=ages,
+            tournament_types=tournament_types,
+            national_cup=national_cup,
+        )
 
-                try:
-                    # Try to parse as JSON
-                    json_data = json.loads(response_text)
-                    logger.info(f"JSON Response Type: {type(json_data)}")
+        if result is not None:
+            logger.info("Direct search successful")
+            return result
 
-                    # Now try to parse it - EXACT same as club function
-                    parsed_result = _parse_ajax_response(json_data)
+    except Exception as e:
+        logger.warning(f"Direct search failed: {e}")
+
+    # Fallback to club-based approach
+    if city:  # Only use fallback if we have a city to search for clubs
+        logger.info("Falling back to club-based search...")
+        return scrape_fft_all_tournaments_via_clubs(
+            sorting_option=sorting_option,
+            page=page,
+            start_date=start_date,
+            end_date=end_date,
+            city=city,
+            distance=distance,
+            categories=categories,
+            levels=levels,
+            lat=lat,
+            lng=lng,
+            ages=ages,
+            tournament_types=tournament_types,
+            national_cup=national_cup,
+        )
 
-                    return parsed_result
+    logger.error("Both direct and club-based approaches failed")
+    return None
 
-                except json.JSONDecodeError as json_error:
-                    logger.error(f"JSON Parse Error: {json_error}")
-                    logger.error(f"Response text: {response_text}")
-                    return None
-            else:
-                logger.error(f"AJAX request failed: {result.get('error')}")
-                return None
 
-    except Exception as e:
-        logger.error(f"Error in Playwright scraping: {e}")
-        logger.error(f"Traceback: {traceback.format_exc()}")
-        return None
+# Rename the original function
+def scrape_fft_all_tournaments_original(
+    sorting_option=None,
+    page=0,
+    start_date=None,
+    end_date=None,
+    city="",
+    distance=15,
+    categories=None,
+    levels=None,
+    lat=None,
+    lng=None,
+    ages=None,
+    tournament_types=None,
+    national_cup=False,
+):
+    # [Your existing direct scraping code here]
+    pass
 
 
 def get_umpire_data(tournament_id):
@@ -1056,3 +944,182 @@ def scrape_federal_clubs(
         logger.error(f"Error in federal clubs scraping: {e}")
         logger.error(f"Traceback: {traceback.format_exc()}")
         return {"typeRecherche": "clubs", "nombreResultat": 0, "club_markers": []}
+
+
+def scrape_fft_all_tournaments_via_clubs(
+    sorting_option=None,
+    page=0,
+    start_date=None,
+    end_date=None,
+    city="",
+    distance=15,
+    categories=None,
+    levels=None,
+    lat=None,
+    lng=None,
+    ages=None,
+    tournament_types=None,
+    national_cup=False,
+):
+    """
+    Get tournaments by location using the working club-based approach
+    1. Get clubs in the area using scrape_federal_clubs
+    2. Get tournaments for each club using scrape_fft_club_tournaments
+    3. Aggregate and filter results
+    """
+    logger.info(
+        f"Starting hybrid club-based tournament scraping for city: {city}, distance: {distance}km"
+    )
+
+    try:
+        # Step 1: Get clubs in the area
+        logger.info("Step 1: Getting clubs in the area...")
+        clubs_result = scrape_federal_clubs(
+            country="fr", city=city, latitude=lat, longitude=lng, radius=distance
+        )
+
+        if not clubs_result or clubs_result.get("nombreResultat", 0) == 0:
+            logger.warning(f"No clubs found for city: {city}")
+            return {"tournaments": [], "total_results": 0, "current_count": 0}
+
+        clubs = clubs_result.get("club_markers", [])
+        logger.info(f"Found {len(clubs)} clubs in {city} area")
+
+        # Step 2: Get tournaments for each club
+        all_tournaments = []
+        processed_clubs = 0
+        max_clubs = 50  # Limit to prevent too many requests
+
+        for club in clubs[:max_clubs]:
+            club_code = club.get("codeClub")
+            club_name = club.get("nomClub")
+
+            if not club_code or not club_name:
+                continue
+
+            logger.info(f"Getting tournaments for club: {club_name} ({club_code})")
+
+            try:
+                # Get tournaments for this club
+                club_result = scrape_fft_club_tournaments(
+                    club_code=club_code,
+                    club_name=club_name,
+                    start_date=start_date,
+                    end_date=end_date,
+                    page=0,  # Always get first page for each club
+                )
+
+                if club_result and club_result.get("tournaments"):
+                    tournaments = club_result["tournaments"]
+
+                    # Apply additional filters that might not be handled by club search
+                    filtered_tournaments = []
+                    for tournament in tournaments:
+                        # Apply category filter
+                        if categories:
+                            tournament_category = tournament.get("categorieTournoi", "")
+                            if not any(
+                                cat.lower() in tournament_category.lower()
+                                for cat in categories
+                            ):
+                                continue
+
+                        # Apply level filter
+                        if levels:
+                            tournament_level = tournament.get("niveau", "")
+                            if not any(
+                                level.lower() in tournament_level.lower()
+                                for level in levels
+                            ):
+                                continue
+
+                        # Apply age filter
+                        if ages:
+                            tournament_ages = tournament.get("categorieAge", "")
+                            if not any(
+                                age.lower() in tournament_ages.lower() for age in ages
+                            ):
+                                continue
+
+                        # Apply type filter
+                        if tournament_types:
+                            tournament_type = tournament.get("type", "")
+                            if not any(
+                                t_type.lower() in tournament_type.lower()
+                                for t_type in tournament_types
+                            ):
+                                continue
+
+                        # Apply national cup filter
+                        if national_cup:
+                            is_national_cup = tournament.get("tournoi_npc", False)
+                            if not is_national_cup:
+                                continue
+
+                        filtered_tournaments.append(tournament)
+
+                    all_tournaments.extend(filtered_tournaments)
+                    logger.info(
+                        f"Added {len(filtered_tournaments)} tournaments from {club_name}"
+                    )
+
+                processed_clubs += 1
+
+                # Add small delay to be respectful
+                import time
+
+                time.sleep(0.5)
+
+            except Exception as club_error:
+                logger.warning(
+                    f"Error getting tournaments for club {club_name}: {club_error}"
+                )
+                continue
+
+        logger.info(
+            f"Processed {processed_clubs} clubs, found {len(all_tournaments)} total tournaments"
+        )
+
+        # Step 3: Remove duplicates (tournaments might appear in multiple club searches)
+        unique_tournaments = []
+        seen_tournament_ids = set()
+
+        for tournament in all_tournaments:
+            tournament_id = tournament.get("id")
+            if tournament_id and tournament_id not in seen_tournament_ids:
+                unique_tournaments.append(tournament)
+                seen_tournament_ids.add(tournament_id)
+
+        logger.info(
+            f"After deduplication: {len(unique_tournaments)} unique tournaments"
+        )
+
+        # Step 4: Sort tournaments
+        if sorting_option == "dateDebut+asc" or not sorting_option:
+            unique_tournaments.sort(
+                key=lambda x: x.get("dateDebut", {}).get("date", "")
+            )
+        elif sorting_option == "dateDebut+desc":
+            unique_tournaments.sort(
+                key=lambda x: x.get("dateDebut", {}).get("date", ""), reverse=True
+            )
+        # Note: Distance sorting would require additional calculation
+
+        # Step 5: Handle pagination
+        tournaments_per_page = 20
+        start_idx = page * tournaments_per_page
+        end_idx = start_idx + tournaments_per_page
+
+        page_tournaments = unique_tournaments[start_idx:end_idx]
+
+        return {
+            "tournaments": page_tournaments,
+            "total_results": len(unique_tournaments),
+            "current_count": len(page_tournaments),
+            "method": "club_based_hybrid",
+        }
+
+    except Exception as e:
+        logger.error(f"Error in hybrid club-based scraping: {e}")
+        logger.error(f"Traceback: {traceback.format_exc()}")
+        return None

From a7cbf4c6a66d8d29ecc064423d9a5fc3b0b12764 Mon Sep 17 00:00:00 2001
From: Razmig Sarkissian <razmig@padelclub.app>
Date: Thu, 25 Sep 2025 10:53:55 +0200
Subject: [PATCH 4/4] Refactor FFT tournament scraping using Playwright with
 detailed error handling

---
 api/utils.py | 417 +++++++++++++++++++++------------------------------
 1 file changed, 175 insertions(+), 242 deletions(-)

diff --git a/api/utils.py b/api/utils.py
index 8fcc960..50ae798 100644
--- a/api/utils.py
+++ b/api/utils.py
@@ -364,77 +364,189 @@ def scrape_fft_all_tournaments(
     national_cup=False,
 ):
     """
-    Scrapes FFT tournaments with Queue-It fallback to club-based approach
+    Scrapes FFT tournaments using Playwright with detailed debugging
+    Based exactly on the working scrape_fft_club_tournaments function
     """
-    logger.info(f"Starting tournament scraping for city: {city}")
+    logger.info(f"Starting Playwright scraping for city: {city}")
 
-    # First try the original direct approach
     try:
-        logger.info("Attempting direct location-based search...")
-        result = scrape_fft_all_tournaments_original(
-            sorting_option=sorting_option,
-            page=page,
-            start_date=start_date,
-            end_date=end_date,
-            city=city,
-            distance=distance,
-            categories=categories,
-            levels=levels,
-            lat=lat,
-            lng=lng,
-            ages=ages,
-            tournament_types=tournament_types,
-            national_cup=national_cup,
-        )
+        with sync_playwright() as p:
+            browser = p.chromium.launch(headless=True)
+            page_obj = browser.new_page()
 
-        if result is not None:
-            logger.info("Direct search successful")
-            return result
+            page_obj.set_extra_http_headers(
+                {
+                    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
+                }
+            )
 
-    except Exception as e:
-        logger.warning(f"Direct search failed: {e}")
-
-    # Fallback to club-based approach
-    if city:  # Only use fallback if we have a city to search for clubs
-        logger.info("Falling back to club-based search...")
-        return scrape_fft_all_tournaments_via_clubs(
-            sorting_option=sorting_option,
-            page=page,
-            start_date=start_date,
-            end_date=end_date,
-            city=city,
-            distance=distance,
-            categories=categories,
-            levels=levels,
-            lat=lat,
-            lng=lng,
-            ages=ages,
-            tournament_types=tournament_types,
-            national_cup=national_cup,
-        )
+            # Navigate to FFT
+            target_url = "https://tenup.fft.fr/recherche/tournois"
+            logger.info(f"Navigating to: {target_url}")
 
-    logger.error("Both direct and club-based approaches failed")
-    return None
+            page_obj.goto(target_url)
+            # page_obj.wait_for_timeout(7000)
 
+            current_url = page_obj.url
+            logger.info(f"Current URL: {current_url}")
 
-# Rename the original function
-def scrape_fft_all_tournaments_original(
-    sorting_option=None,
-    page=0,
-    start_date=None,
-    end_date=None,
-    city="",
-    distance=15,
-    categories=None,
-    levels=None,
-    lat=None,
-    lng=None,
-    ages=None,
-    tournament_types=None,
-    national_cup=False,
-):
-    # [Your existing direct scraping code here]
-    pass
+            if "queue-it.net" in current_url.lower():
+                logger.warning("Still in Queue-It")
+                browser.close()
+                return None
+
+            # Extract form_build_id
+            form_input = page_obj.query_selector('input[name="form_build_id"]')
+            if not form_input:
+                logger.error("Could not find form_build_id")
+                browser.close()
+                return None
+
+            form_build_id = form_input.get_attribute("value")
+            logger.info(f"Extracted form_build_id: {form_build_id}")
+
+            # Build parameters - EXACT same pattern as club function
+            date_component = ""
+            if start_date and end_date:
+                date_component = f"&date[start]={start_date}&date[end]={end_date}"
+            elif start_date:
+                try:
+                    start_dt = datetime.strptime(start_date, "%d/%m/%y")
+                    end_dt = start_dt + timedelta(days=90)
+                    date_component = f"&date[start]={start_date}&date[end]={end_dt.strftime('%d/%m/%y')}"
+                except ValueError:
+                    logger.warning(f"Invalid date format: {start_date}")
+
+            # Build filter parameters
+            filter_params = ""
+
+            # Add categories filter
+            if categories:
+                logger.info(f"Adding categories filter: {categories}")
+                for category in categories:
+                    filter_params += f"&epreuve[{category}]={category}"
+
+            # Add levels filter
+            if levels:
+                logger.info(f"Adding levels filter: {levels}")
+                for level in levels:
+                    filter_params += f"&categorie_tournoi[{level}]={level}"
+
+            # Add ages filter
+            if ages:
+                logger.info(f"Adding ages filter: {ages}")
+                for age in ages:
+                    filter_params += f"&categorie_age[{age}]={age}"
+
+            # Add types filter
+            if tournament_types:
+                logger.info(f"Adding types filter: {tournament_types}")
+                for t_type in tournament_types:
+                    capitalized_type = t_type.capitalize()
+                    filter_params += f"&type[{capitalized_type}]={capitalized_type}"
+
+            # Add national cup filter
+            if national_cup:
+                logger.info("Adding national cup filter")
+                filter_params += "&tournoi_npc=1"
+
+            # Fix the sorting parameter
+            if sorting_option:
+                sort_param = f"&sort={sorting_option}"
+            else:
+                sort_param = "&sort=dateDebut+asc"
+
+            # Build city parameters with distance and location
+            if city and city.strip():
+                city_name_encoded = city.strip().replace(" ", "+")
+
+                # Start with the working base parameters
+                base_params = f"recherche_type=ville&ville[autocomplete][value_container][value_field]={city_name_encoded}&ville[autocomplete][value_container][label_field]={city_name_encoded}"
+
+                # Add distance parameter
+                distance_param = f"&ville[distance][value_field]={int(distance)}"
+
+                # Add lat/lng if provided
+                location_params = ""
+                if lat and lng:
+                    location_params = f"&ville[autocomplete][value_container][lat_field]={lat}&ville[autocomplete][value_container][lng_field]={lng}"
+
+                # Combine all parameters including filters
+                params = f"{base_params}{location_params}{distance_param}&pratique=PADEL{date_component}&page={page}{sort_param}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page"
+            else:
+                # Default to ligue search if no city provided
+                params = f"recherche_type=ligue&pratique=PADEL{date_component}{filter_params}&page={page}&sort={sorting_option or 'dateDebut+asc'}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page"
+
+            logger.info(f"AJAX Parameters: {params}")
+
+            # Make AJAX request and capture the full response - EXACT same as club function
+            ajax_script = f"""
+            async () => {{
+                try {{
+                    const response = await fetch('https://tenup.fft.fr/system/ajax', {{
+                        method: 'POST',
+                        headers: {{
+                            'Accept': 'application/json, text/javascript, */*; q=0.01',
+                            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+                            'X-Requested-With': 'XMLHttpRequest',
+                            'Origin': 'https://tenup.fft.fr',
+                            'Referer': 'https://tenup.fft.fr/recherche/tournois'
+                        }},
+                        body: `{params}`
+                    }});
+
+                    const status = response.status;
+                    const responseText = await response.text();
+
+                    return {{
+                        success: response.ok,
+                        status: status,
+                        responseText: responseText
+                    }};
+                }} catch (error) {{
+                    return {{
+                        success: false,
+                        error: error.message
+                    }};
+                }}
+            }}
+            """
+
+            logger.info("Making AJAX request...")
+            result = page_obj.evaluate(ajax_script)
+
+            browser.close()
+
+            # Print the full response for debugging - EXACT same as club function
+            logger.info(f"AJAX Response Status: {result.get('status')}")
+            logger.info(f"AJAX Response Success: {result.get('success')}")
+
+            if result.get("success"):
+                response_text = result.get("responseText", "")
+                logger.info(f"Raw Response Length: {len(response_text)}")
+
+                try:
+                    # Try to parse as JSON
+                    json_data = json.loads(response_text)
+                    logger.info(f"JSON Response Type: {type(json_data)}")
+
+                    # Now try to parse it - EXACT same as club function
+                    parsed_result = _parse_ajax_response(json_data)
+
+                    return parsed_result
+
+                except json.JSONDecodeError as json_error:
+                    logger.error(f"JSON Parse Error: {json_error}")
+                    logger.error(f"Response text: {response_text}")
+                    return None
+            else:
+                logger.error(f"AJAX request failed: {result.get('error')}")
+                return None
+
+    except Exception as e:
+        logger.error(f"Error in Playwright scraping: {e}")
+        logger.error(f"Traceback: {traceback.format_exc()}")
+        return None
 
 
 def get_umpire_data(tournament_id):
@@ -944,182 +1056,3 @@ def scrape_federal_clubs(
         logger.error(f"Error in federal clubs scraping: {e}")
         logger.error(f"Traceback: {traceback.format_exc()}")
         return {"typeRecherche": "clubs", "nombreResultat": 0, "club_markers": []}
-
-
-def scrape_fft_all_tournaments_via_clubs(
-    sorting_option=None,
-    page=0,
-    start_date=None,
-    end_date=None,
-    city="",
-    distance=15,
-    categories=None,
-    levels=None,
-    lat=None,
-    lng=None,
-    ages=None,
-    tournament_types=None,
-    national_cup=False,
-):
-    """
-    Get tournaments by location using the working club-based approach
-    1. Get clubs in the area using scrape_federal_clubs
-    2. Get tournaments for each club using scrape_fft_club_tournaments
-    3. Aggregate and filter results
-    """
-    logger.info(
-        f"Starting hybrid club-based tournament scraping for city: {city}, distance: {distance}km"
-    )
-
-    try:
-        # Step 1: Get clubs in the area
-        logger.info("Step 1: Getting clubs in the area...")
-        clubs_result = scrape_federal_clubs(
-            country="fr", city=city, latitude=lat, longitude=lng, radius=distance
-        )
-
-        if not clubs_result or clubs_result.get("nombreResultat", 0) == 0:
-            logger.warning(f"No clubs found for city: {city}")
-            return {"tournaments": [], "total_results": 0, "current_count": 0}
-
-        clubs = clubs_result.get("club_markers", [])
-        logger.info(f"Found {len(clubs)} clubs in {city} area")
-
-        # Step 2: Get tournaments for each club
-        all_tournaments = []
-        processed_clubs = 0
-        max_clubs = 50  # Limit to prevent too many requests
-
-        for club in clubs[:max_clubs]:
-            club_code = club.get("codeClub")
-            club_name = club.get("nomClub")
-
-            if not club_code or not club_name:
-                continue
-
-            logger.info(f"Getting tournaments for club: {club_name} ({club_code})")
-
-            try:
-                # Get tournaments for this club
-                club_result = scrape_fft_club_tournaments(
-                    club_code=club_code,
-                    club_name=club_name,
-                    start_date=start_date,
-                    end_date=end_date,
-                    page=0,  # Always get first page for each club
-                )
-
-                if club_result and club_result.get("tournaments"):
-                    tournaments = club_result["tournaments"]
-
-                    # Apply additional filters that might not be handled by club search
-                    filtered_tournaments = []
-                    for tournament in tournaments:
-                        # Apply category filter
-                        if categories:
-                            tournament_category = tournament.get("categorieTournoi", "")
-                            if not any(
-                                cat.lower() in tournament_category.lower()
-                                for cat in categories
-                            ):
-                                continue
-
-                        # Apply level filter
-                        if levels:
-                            tournament_level = tournament.get("niveau", "")
-                            if not any(
-                                level.lower() in tournament_level.lower()
-                                for level in levels
-                            ):
-                                continue
-
-                        # Apply age filter
-                        if ages:
-                            tournament_ages = tournament.get("categorieAge", "")
-                            if not any(
-                                age.lower() in tournament_ages.lower() for age in ages
-                            ):
-                                continue
-
-                        # Apply type filter
-                        if tournament_types:
-                            tournament_type = tournament.get("type", "")
-                            if not any(
-                                t_type.lower() in tournament_type.lower()
-                                for t_type in tournament_types
-                            ):
-                                continue
-
-                        # Apply national cup filter
-                        if national_cup:
-                            is_national_cup = tournament.get("tournoi_npc", False)
-                            if not is_national_cup:
-                                continue
-
-                        filtered_tournaments.append(tournament)
-
-                    all_tournaments.extend(filtered_tournaments)
-                    logger.info(
-                        f"Added {len(filtered_tournaments)} tournaments from {club_name}"
-                    )
-
-                processed_clubs += 1
-
-                # Add small delay to be respectful
-                import time
-
-                time.sleep(0.5)
-
-            except Exception as club_error:
-                logger.warning(
-                    f"Error getting tournaments for club {club_name}: {club_error}"
-                )
-                continue
-
-        logger.info(
-            f"Processed {processed_clubs} clubs, found {len(all_tournaments)} total tournaments"
-        )
-
-        # Step 3: Remove duplicates (tournaments might appear in multiple club searches)
-        unique_tournaments = []
-        seen_tournament_ids = set()
-
-        for tournament in all_tournaments:
-            tournament_id = tournament.get("id")
-            if tournament_id and tournament_id not in seen_tournament_ids:
-                unique_tournaments.append(tournament)
-                seen_tournament_ids.add(tournament_id)
-
-        logger.info(
-            f"After deduplication: {len(unique_tournaments)} unique tournaments"
-        )
-
-        # Step 4: Sort tournaments
-        if sorting_option == "dateDebut+asc" or not sorting_option:
-            unique_tournaments.sort(
-                key=lambda x: x.get("dateDebut", {}).get("date", "")
-            )
-        elif sorting_option == "dateDebut+desc":
-            unique_tournaments.sort(
-                key=lambda x: x.get("dateDebut", {}).get("date", ""), reverse=True
-            )
-        # Note: Distance sorting would require additional calculation
-
-        # Step 5: Handle pagination
-        tournaments_per_page = 20
-        start_idx = page * tournaments_per_page
-        end_idx = start_idx + tournaments_per_page
-
-        page_tournaments = unique_tournaments[start_idx:end_idx]
-
-        return {
-            "tournaments": page_tournaments,
-            "total_results": len(unique_tournaments),
-            "current_count": len(page_tournaments),
-            "method": "club_based_hybrid",
-        }
-
-    except Exception as e:
-        logger.error(f"Error in hybrid club-based scraping: {e}")
-        logger.error(f"Traceback: {traceback.format_exc()}")
-        return None