import time import logging import requests import re from playwright.sync_api import sync_playwright from datetime import datetime, timedelta import json import traceback logger = logging.getLogger(__name__) def check_version_smaller_than_1_1_12(version_str): # Remove the parentheses part if it exists, example of version: 1.1.12 (2) version_str = version_str.split()[0] if version_str: # Split version into components version_parts = [int(x) for x in version_str.split('.')] target_parts = [1, 1, 12] # Compare version components return version_parts < target_parts else: return False def scrape_fft_club_tournaments(club_code, club_name, start_date=None, end_date=None, page=0): """ Scrapes FFT tournaments using Playwright with detailed debugging """ logger.info(f"Starting Playwright scraping for {club_name}") try: with sync_playwright() as p: browser = p.chromium.launch(headless=True) page_obj = browser.new_page() page_obj.set_extra_http_headers({ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15" }) # Navigate to FFT target_url = "https://tenup.fft.fr/recherche/tournois" logger.info(f"Navigating to: {target_url}") page_obj.goto(target_url) # page_obj.wait_for_timeout(7000) current_url = page_obj.url logger.info(f"Current URL: {current_url}") if "queue-it.net" in current_url.lower(): logger.warning("Still in Queue-It") browser.close() return None # Extract form_build_id form_input = page_obj.query_selector('input[name="form_build_id"]') if not form_input: logger.error("Could not find form_build_id") browser.close() return None form_build_id = form_input.get_attribute('value') logger.info(f"Extracted form_build_id: {form_build_id}") # Build parameters date_component = "" if start_date and end_date: date_component = f"&date[start]={start_date}&date[end]={end_date}" elif start_date: try: start_dt = datetime.strptime(start_date, "%d/%m/%y") end_dt = start_dt + timedelta(days=90) date_component = f"&date[start]={start_date}&date[end]={end_dt.strftime('%d/%m/%y')}" except ValueError: logger.warning(f"Invalid date format: {start_date}") club_name_encoded = club_name.replace(" ", "+") club_code_clean = club_code.replace(" ", "") params = f"recherche_type=club&club[autocomplete][value_container][value_field]={club_code_clean}&club[autocomplete][value_container][label_field]={club_name_encoded}&pratique=PADEL{date_component}&page={page}&sort=dateDebut+asc&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" logger.info(f"AJAX Parameters: {params}") # Make AJAX request and capture the full response ajax_script = f""" async () => {{ try {{ const response = await fetch('https://tenup.fft.fr/system/ajax', {{ method: 'POST', headers: {{ 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'X-Requested-With': 'XMLHttpRequest', 'Origin': 'https://tenup.fft.fr', 'Referer': 'https://tenup.fft.fr/recherche/tournois' }}, body: `{params}` }}); const status = response.status; const responseText = await response.text(); return {{ success: response.ok, status: status, responseText: responseText }}; }} catch (error) {{ return {{ success: false, error: error.message }}; }} }} """ logger.info("Making AJAX request...") result = page_obj.evaluate(ajax_script) browser.close() # Print the full response for debugging logger.info(f"AJAX Response Status: {result.get('status')}") logger.info(f"AJAX Response Success: {result.get('success')}") if result.get('success'): response_text = result.get('responseText', '') logger.info(f"Raw Response Length: {len(response_text)}") # logger.info(f"Raw Response (first 500 chars): {response_text[:500]}") try: # Try to parse as JSON json_data = json.loads(response_text) logger.info(f"JSON Response Type: {type(json_data)}") # logger.info(f"JSON Response: {json.dumps(json_data, indent=2, default=str)}") # Now try to parse it parsed_result = _parse_ajax_response(json_data) # logger.info(f"Parsed Result: {parsed_result}") return parsed_result except json.JSONDecodeError as json_error: logger.error(f"JSON Parse Error: {json_error}") logger.error(f"Response text: {response_text}") return None else: logger.error(f"AJAX request failed: {result.get('error')}") return None except Exception as e: logger.error(f"Error in Playwright scraping: {e}") logger.error(f"Traceback: {traceback.format_exc()}") return None def scrape_fft_club_tournaments_all_pages(club_code, club_name, start_date=None, end_date=None): """ Scrapes all pages of FFT tournaments for a specific club """ logger.info(f"Starting complete tournament scraping for {club_name}") all_tournaments = [] page = 0 while True: try: # Call the working single-page function result = scrape_fft_club_tournaments( club_code=club_code, club_name=club_name, start_date=start_date, end_date=end_date, page=page ) # Debug: Log what we got logger.info(f"Page {page} result: {result}") if not result: logger.warning(f"No result for page {page}") break tournaments = result.get('tournaments', []) logger.info(f"Page {page} returned {len(tournaments)} tournaments") if not tournaments: logger.info(f"No tournaments on page {page}, stopping") break all_tournaments.extend(tournaments) # Check if we have all results total_results = result.get('total_results', 0) logger.info(f"Total so far: {len(all_tournaments)}, Target: {total_results}") if len(all_tournaments) >= total_results: logger.info("Got all tournaments, stopping") break page += 1 logger.info(f"Moving to page {page}") # time.sleep(1) # Rate limiting except Exception as e: logger.error(f"Error on page {page}: {e}") break logger.info(f"Completed scraping: {len(all_tournaments)} tournaments across {page + 1} pages") return { 'tournaments': all_tournaments, 'total_results': len(all_tournaments), 'current_count': len(all_tournaments), 'pages_scraped': page + 1 } def get_umpire_data(tournament_id): """ Scrapes umpire data for a specific tournament """ logger.info(f"Getting umpire data for tournament {tournament_id}") try: url = f"https://tenup.fft.fr/tournoi/{tournament_id}" headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15' } response = requests.get(url, headers=headers, timeout=30) if response.status_code != 200: logger.error(f"Failed to fetch tournament page: {response.status_code}") return None, None, None html_content = response.text # Extract name name_pattern = r'tournoi-detail-page-inscription-responsable-title">\s*([^<]+)\s*<' name_match = re.search(name_pattern, html_content) name = name_match.group(1).strip() if name_match else None # Extract email email_pattern = r'mailto:([^"]+)"' email_match = re.search(email_pattern, html_content) email = email_match.group(1) if email_match else None # Extract phone phone_pattern = r'
\s*(\d{2}\s+\d{2}\s+\d{2}\s+\d{2}\s+\d{2})\s*
' phone_match = re.search(phone_pattern, html_content) phone = phone_match.group(1).strip() if phone_match else None logger.info(f"Extracted umpire data: name={name}, email={email}, phone={phone}") return name, email, phone except Exception as e: logger.error(f"Error getting umpire data: {e}") return None, None, None def _parse_ajax_response(commands): """ Parse the AJAX response commands to extract tournament data Returns data in the exact format expected by Swift FederalTournament struct """ tournaments = [] try: # Check for alert commands (maintenance mode) for command in commands: if command.get('command') == 'alert': logger.warning("Maintenance mode detected") return None # Find the command with results result_command = None for command in commands: if command.get('command') == 'recherche_tournois_update': result_command = command logger.info("Found recherche_tournois_update command!") break if result_command and result_command.get('results'): results = result_command['results'] items = results.get('items', []) total_results = results.get('nb_results', 0) logger.info(f"Processing {len(items)} tournaments from results") for item in items: # Parse dates - they're already in the correct format date_debut = item.get('dateDebut') date_fin = item.get('dateFin') date_validation = item.get('dateValidation') # Build the tournament object to match Swift FederalTournament structure tournament = { "id": str(item.get('id', '')), "millesime": item.get('millesime'), "libelle": item.get('libelle'), "tmc": item.get('tmc'), "tarifAdulteChampionnat": item.get('tarifAdulteChampionnat'), "type": item.get('type'), "ageReel": item.get('ageReel'), "naturesTerrains": item.get('naturesTerrains', []), "idsArbitres": item.get('idsArbitres', []), "tarifJeuneChampionnat": item.get('tarifJeuneChampionnat'), "international": item.get('international'), "inscriptionEnLigne": item.get('inscriptionEnLigne'), "categorieTournoi": item.get('categorieTournoi'), "prixLot": item.get('prixLot'), "paiementEnLigne": item.get('paiementEnLigne'), "reductionAdherentJeune": item.get('reductionAdherentJeune'), "reductionAdherentAdulte": item.get('reductionAdherentAdulte'), "paiementEnLigneObligatoire": item.get('paiementEnLigneObligatoire'), "villeEngagement": item.get('villeEngagement'), "senior": item.get('senior'), "veteran": item.get('veteran'), "inscriptionEnLigneEnCours": item.get('inscriptionEnLigneEnCours'), "avecResultatPublie": item.get('avecResultatPublie'), "code": item.get('code'), "categorieAge": item.get('categorieAge'), "codeComite": item.get('codeComite'), "installations": item.get('installations', []), "reductionEpreuveSupplementaireJeune": item.get('reductionEpreuveSupplementaireJeune'), "reductionEpreuveSupplementaireAdulte": item.get('reductionEpreuveSupplementaireAdulte'), "nomComite": item.get('nomComite'), "naturesEpreuves": item.get('naturesEpreuves'), "jeune": item.get('jeune'), "courrielEngagement": item.get('courrielEngagement'), "nomClub": item.get('nomClub'), "installation": item.get('installation'), "categorieAgeMax": item.get('categorieAgeMax'), "tournoiInterne": item.get('tournoiInterne'), "nomLigue": item.get('nomLigue'), "nomEngagement": item.get('nomEngagement'), "codeLigue": item.get('codeLigue'), "modeleDeBalle": item.get('modeleDeBalle'), "jugeArbitre": item.get('jugeArbitre'), "adresse2Engagement": item.get('adresse2Engagement'), "epreuves": item.get('epreuves'), "dateDebut": date_debut, "serie": item.get('serie'), "dateFin": date_fin, "dateValidation": date_validation, "codePostalEngagement": item.get('codePostalEngagement'), "codeClub": item.get('codeClub'), "prixEspece": item.get('prixEspece'), "japPhoneNumber": None, # Will be populated by separate umpire call # Additional fields from the response "adresse1Engagement": item.get('adresse1Engagement'), "originalId": item.get('originalId'), "familleTournoi": item.get('familleTournoi', []), "isTournoi": item.get('isTournoi'), "natureWithCatAge": item.get('natureWithCatAge') } tournaments.append(tournament) logger.info(f"Successfully parsed {len(tournaments)} tournaments from response") return { 'tournaments': tournaments, 'total_results': total_results, 'current_count': len(tournaments) } else: logger.error("No recherche_tournois_update command found in AJAX response") return {'tournaments': [], 'total_results': 0, 'current_count': 0} except Exception as e: logger.error(f"Error parsing AJAX response: {e}") return None def scrape_fft_all_tournaments(sorting_option=None, page=0, start_date=None, end_date=None, city='', distance=15, categories=None, levels=None, lat=None, lng=None, ages=None, tournament_types=None, national_cup=False): """ Scrapes FFT tournaments using Playwright with detailed debugging Based exactly on the working scrape_fft_club_tournaments function """ logger.info(f"Starting Playwright scraping for city: {city}") try: with sync_playwright() as p: browser = p.chromium.launch(headless=True) page_obj = browser.new_page() page_obj.set_extra_http_headers({ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15" }) # Navigate to FFT target_url = "https://tenup.fft.fr/recherche/tournois" logger.info(f"Navigating to: {target_url}") page_obj.goto(target_url) # page_obj.wait_for_timeout(7000) current_url = page_obj.url logger.info(f"Current URL: {current_url}") if "queue-it.net" in current_url.lower(): logger.warning("Still in Queue-It") browser.close() return None # Extract form_build_id form_input = page_obj.query_selector('input[name="form_build_id"]') if not form_input: logger.error("Could not find form_build_id") browser.close() return None form_build_id = form_input.get_attribute('value') logger.info(f"Extracted form_build_id: {form_build_id}") # Build parameters - EXACT same pattern as club function date_component = "" if start_date and end_date: date_component = f"&date[start]={start_date}&date[end]={end_date}" elif start_date: try: start_dt = datetime.strptime(start_date, "%d/%m/%y") end_dt = start_dt + timedelta(days=90) date_component = f"&date[start]={start_date}&date[end]={end_dt.strftime('%d/%m/%y')}" except ValueError: logger.warning(f"Invalid date format: {start_date}") # Build filter parameters filter_params = "" # Add categories filter if categories: logger.info(f"Adding categories filter: {categories}") for category in categories: filter_params += f"&epreuve[{category}]={category}" # Add levels filter if levels: logger.info(f"Adding levels filter: {levels}") for level in levels: filter_params += f"&categorie_tournoi[{level}]={level}" # Add ages filter if ages: logger.info(f"Adding ages filter: {ages}") for age in ages: filter_params += f"&categorie_age[{age}]={age}" # Add types filter if tournament_types: logger.info(f"Adding types filter: {tournament_types}") for t_type in tournament_types: capitalized_type = t_type.capitalize() filter_params += f"&type[{capitalized_type}]={capitalized_type}" # Add national cup filter if national_cup: logger.info("Adding national cup filter") filter_params += "&tournoi_npc=1" # Fix the sorting parameter if sorting_option: sort_param = f"&sort={sorting_option}" else: sort_param = "&sort=dateDebut+asc" # Build city parameters with distance and location if city and city.strip(): city_name_encoded = city.strip().replace(" ", "+") # Start with the working base parameters base_params = f"recherche_type=ville&ville[autocomplete][value_container][value_field]={city_name_encoded}&ville[autocomplete][value_container][label_field]={city_name_encoded}" # Add distance parameter distance_param = f"&ville[distance][value_field]={int(distance)}" # Add lat/lng if provided location_params = "" if lat and lng: location_params = f"&ville[autocomplete][value_container][lat_field]={lat}&ville[autocomplete][value_container][lng_field]={lng}" # Combine all parameters including filters params = f"{base_params}{location_params}{distance_param}&pratique=PADEL{date_component}&page={page}{sort_param}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" else: # Default to ligue search if no city provided params = f"recherche_type=ligue&pratique=PADEL{date_component}{filter_params}&page={page}&sort={sorting_option or 'dateDebut+asc'}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" logger.info(f"AJAX Parameters: {params}") # Make AJAX request and capture the full response - EXACT same as club function ajax_script = f""" async () => {{ try {{ const response = await fetch('https://tenup.fft.fr/system/ajax', {{ method: 'POST', headers: {{ 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'X-Requested-With': 'XMLHttpRequest', 'Origin': 'https://tenup.fft.fr', 'Referer': 'https://tenup.fft.fr/recherche/tournois' }}, body: `{params}` }}); const status = response.status; const responseText = await response.text(); return {{ success: response.ok, status: status, responseText: responseText }}; }} catch (error) {{ return {{ success: false, error: error.message }}; }} }} """ logger.info("Making AJAX request...") result = page_obj.evaluate(ajax_script) browser.close() # Print the full response for debugging - EXACT same as club function logger.info(f"AJAX Response Status: {result.get('status')}") logger.info(f"AJAX Response Success: {result.get('success')}") if result.get('success'): response_text = result.get('responseText', '') logger.info(f"Raw Response Length: {len(response_text)}") try: # Try to parse as JSON json_data = json.loads(response_text) logger.info(f"JSON Response Type: {type(json_data)}") # Now try to parse it - EXACT same as club function parsed_result = _parse_ajax_response(json_data) return parsed_result except json.JSONDecodeError as json_error: logger.error(f"JSON Parse Error: {json_error}") logger.error(f"Response text: {response_text}") return None else: logger.error(f"AJAX request failed: {result.get('error')}") return None except Exception as e: logger.error(f"Error in Playwright scraping: {e}") logger.error(f"Traceback: {traceback.format_exc()}") return None