From 590a652e834ec8c86fc00dd3add9ffe7b1240d0c Mon Sep 17 00:00:00 2001 From: Razmig Sarkissian Date: Wed, 9 Jul 2025 10:18:30 +0200 Subject: [PATCH] Update utils.py --- api/utils.py | 509 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 507 insertions(+), 2 deletions(-) diff --git a/api/utils.py b/api/utils.py index 825a06f..cc79258 100644 --- a/api/utils.py +++ b/api/utils.py @@ -1,13 +1,518 @@ +import time +import logging +import requests +import re +from playwright.sync_api import sync_playwright +from datetime import datetime, timedelta +import json +import traceback + +logger = logging.getLogger(__name__) + def check_version_smaller_than_1_1_12(version_str): # Remove the parentheses part if it exists, example of version: 1.1.12 (2) version_str = version_str.split()[0] if version_str: - # Split version into components version_parts = [int(x) for x in version_str.split('.')] target_parts = [1, 1, 12] - # Compare version components return version_parts < target_parts else: return False + +def scrape_fft_club_tournaments(club_code, club_name, start_date=None, end_date=None, page=0): + """ + Scrapes FFT tournaments using Playwright with detailed debugging + """ + logger.info(f"Starting Playwright scraping for {club_name}") + try: + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page_obj = browser.new_page() + + page_obj.set_extra_http_headers({ + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15" + }) + + # Navigate to FFT + target_url = "https://tenup.fft.fr/recherche/tournois" + logger.info(f"Navigating to: {target_url}") + + page_obj.goto(target_url) + page_obj.wait_for_timeout(7000) + + current_url = page_obj.url + logger.info(f"Current URL: {current_url}") + + if "queue-it.net" in current_url.lower(): + logger.warning("Still in Queue-It") + browser.close() + return None + + # Extract form_build_id + form_input = page_obj.query_selector('input[name="form_build_id"]') + if not form_input: + logger.error("Could not find form_build_id") + browser.close() + return None + + form_build_id = form_input.get_attribute('value') + logger.info(f"Extracted form_build_id: {form_build_id}") + + # Build parameters + date_component = "" + if start_date and end_date: + date_component = f"&date[start]={start_date}&date[end]={end_date}" + elif start_date: + try: + start_dt = datetime.strptime(start_date, "%d/%m/%y") + end_dt = start_dt + timedelta(days=90) + date_component = f"&date[start]={start_date}&date[end]={end_dt.strftime('%d/%m/%y')}" + except ValueError: + logger.warning(f"Invalid date format: {start_date}") + + club_name_encoded = club_name.replace(" ", "+") + club_code_clean = club_code.replace(" ", "") + + params = f"recherche_type=club&club[autocomplete][value_container][value_field]={club_code_clean}&club[autocomplete][value_container][label_field]={club_name_encoded}&pratique=PADEL{date_component}&page={page}&sort=dateDebut+asc&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" + + logger.info(f"AJAX Parameters: {params}") + + # Make AJAX request and capture the full response + ajax_script = f""" + async () => {{ + try {{ + const response = await fetch('https://tenup.fft.fr/system/ajax', {{ + method: 'POST', + headers: {{ + 'Accept': 'application/json, text/javascript, */*; q=0.01', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'X-Requested-With': 'XMLHttpRequest', + 'Origin': 'https://tenup.fft.fr', + 'Referer': 'https://tenup.fft.fr/recherche/tournois' + }}, + body: `{params}` + }}); + + const status = response.status; + const responseText = await response.text(); + + return {{ + success: response.ok, + status: status, + responseText: responseText + }}; + }} catch (error) {{ + return {{ + success: false, + error: error.message + }}; + }} + }} + """ + + logger.info("Making AJAX request...") + result = page_obj.evaluate(ajax_script) + + browser.close() + + # Print the full response for debugging + logger.info(f"AJAX Response Status: {result.get('status')}") + logger.info(f"AJAX Response Success: {result.get('success')}") + + if result.get('success'): + response_text = result.get('responseText', '') + logger.info(f"Raw Response Length: {len(response_text)}") + # logger.info(f"Raw Response (first 500 chars): {response_text[:500]}") + + try: + # Try to parse as JSON + json_data = json.loads(response_text) + logger.info(f"JSON Response Type: {type(json_data)}") + # logger.info(f"JSON Response: {json.dumps(json_data, indent=2, default=str)}") + + # Now try to parse it + parsed_result = _parse_ajax_response(json_data) + # logger.info(f"Parsed Result: {parsed_result}") + + return parsed_result + + except json.JSONDecodeError as json_error: + logger.error(f"JSON Parse Error: {json_error}") + logger.error(f"Response text: {response_text}") + return None + else: + logger.error(f"AJAX request failed: {result.get('error')}") + return None + + except Exception as e: + logger.error(f"Error in Playwright scraping: {e}") + logger.error(f"Traceback: {traceback.format_exc()}") + return None + +def scrape_fft_club_tournaments_all_pages(club_code, club_name, start_date=None, end_date=None): + """ + Scrapes all pages of FFT tournaments for a specific club + """ + logger.info(f"Starting complete tournament scraping for {club_name}") + + all_tournaments = [] + page = 0 + + while True: + try: + # Call the working single-page function + result = scrape_fft_club_tournaments( + club_code=club_code, + club_name=club_name, + start_date=start_date, + end_date=end_date, + page=page + ) + + # Debug: Log what we got + logger.info(f"Page {page} result: {result}") + + if not result: + logger.warning(f"No result for page {page}") + break + + tournaments = result.get('tournaments', []) + logger.info(f"Page {page} returned {len(tournaments)} tournaments") + + if not tournaments: + logger.info(f"No tournaments on page {page}, stopping") + break + + all_tournaments.extend(tournaments) + + # Check if we have all results + total_results = result.get('total_results', 0) + logger.info(f"Total so far: {len(all_tournaments)}, Target: {total_results}") + + if len(all_tournaments) >= total_results: + logger.info("Got all tournaments, stopping") + break + + page += 1 + logger.info(f"Moving to page {page}") + time.sleep(1) # Rate limiting + + except Exception as e: + logger.error(f"Error on page {page}: {e}") + break + + logger.info(f"Completed scraping: {len(all_tournaments)} tournaments across {page + 1} pages") + + return { + 'tournaments': all_tournaments, + 'total_results': len(all_tournaments), + 'current_count': len(all_tournaments), + 'pages_scraped': page + 1 + } + +def get_umpire_data(tournament_id): + """ + Scrapes umpire data for a specific tournament + """ + logger.info(f"Getting umpire data for tournament {tournament_id}") + + try: + url = f"https://tenup.fft.fr/tournoi/{tournament_id}" + + headers = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15' + } + + response = requests.get(url, headers=headers, timeout=30) + + if response.status_code != 200: + logger.error(f"Failed to fetch tournament page: {response.status_code}") + return None, None, None + + html_content = response.text + + # Extract name + name_pattern = r'tournoi-detail-page-inscription-responsable-title">\s*([^<]+)\s*<' + name_match = re.search(name_pattern, html_content) + name = name_match.group(1).strip() if name_match else None + + # Extract email + email_pattern = r'mailto:([^"]+)"' + email_match = re.search(email_pattern, html_content) + email = email_match.group(1) if email_match else None + + # Extract phone + phone_pattern = r'
\s*(\d{2}\s+\d{2}\s+\d{2}\s+\d{2}\s+\d{2})\s*
' + phone_match = re.search(phone_pattern, html_content) + phone = phone_match.group(1).strip() if phone_match else None + + logger.info(f"Extracted umpire data: name={name}, email={email}, phone={phone}") + return name, email, phone + + except Exception as e: + logger.error(f"Error getting umpire data: {e}") + return None, None, None + +def _parse_ajax_response(commands): + """ + Parse the AJAX response commands to extract tournament data + Returns data in the exact format expected by Swift FederalTournament struct + """ + tournaments = [] + + try: + # Check for alert commands (maintenance mode) + for command in commands: + if command.get('command') == 'alert': + logger.warning("Maintenance mode detected") + return None + + # Find the command with results + result_command = None + for command in commands: + if command.get('command') == 'recherche_tournois_update': + result_command = command + logger.info("Found recherche_tournois_update command!") + break + + if result_command and result_command.get('results'): + results = result_command['results'] + items = results.get('items', []) + total_results = results.get('nb_results', 0) + + logger.info(f"Processing {len(items)} tournaments from results") + + for item in items: + # Parse dates - they're already in the correct format + date_debut = item.get('dateDebut') + date_fin = item.get('dateFin') + date_validation = item.get('dateValidation') + + # Build the tournament object to match Swift FederalTournament structure + tournament = { + "id": str(item.get('id', '')), + "millesime": item.get('millesime'), + "libelle": item.get('libelle'), + "tmc": item.get('tmc'), + "tarifAdulteChampionnat": item.get('tarifAdulteChampionnat'), + "type": item.get('type'), + "ageReel": item.get('ageReel'), + "naturesTerrains": item.get('naturesTerrains', []), + "idsArbitres": item.get('idsArbitres', []), + "tarifJeuneChampionnat": item.get('tarifJeuneChampionnat'), + "international": item.get('international'), + "inscriptionEnLigne": item.get('inscriptionEnLigne'), + "categorieTournoi": item.get('categorieTournoi'), + "prixLot": item.get('prixLot'), + "paiementEnLigne": item.get('paiementEnLigne'), + "reductionAdherentJeune": item.get('reductionAdherentJeune'), + "reductionAdherentAdulte": item.get('reductionAdherentAdulte'), + "paiementEnLigneObligatoire": item.get('paiementEnLigneObligatoire'), + "villeEngagement": item.get('villeEngagement'), + "senior": item.get('senior'), + "veteran": item.get('veteran'), + "inscriptionEnLigneEnCours": item.get('inscriptionEnLigneEnCours'), + "avecResultatPublie": item.get('avecResultatPublie'), + "code": item.get('code'), + "categorieAge": item.get('categorieAge'), + "codeComite": item.get('codeComite'), + "installations": item.get('installations', []), + "reductionEpreuveSupplementaireJeune": item.get('reductionEpreuveSupplementaireJeune'), + "reductionEpreuveSupplementaireAdulte": item.get('reductionEpreuveSupplementaireAdulte'), + "nomComite": item.get('nomComite'), + "naturesEpreuves": item.get('naturesEpreuves'), + "jeune": item.get('jeune'), + "courrielEngagement": item.get('courrielEngagement'), + "nomClub": item.get('nomClub'), + "installation": item.get('installation'), + "categorieAgeMax": item.get('categorieAgeMax'), + "tournoiInterne": item.get('tournoiInterne'), + "nomLigue": item.get('nomLigue'), + "nomEngagement": item.get('nomEngagement'), + "codeLigue": item.get('codeLigue'), + "modeleDeBalle": item.get('modeleDeBalle'), + "jugeArbitre": item.get('jugeArbitre'), + "adresse2Engagement": item.get('adresse2Engagement'), + "epreuves": item.get('epreuves'), + "dateDebut": date_debut, + "serie": item.get('serie'), + "dateFin": date_fin, + "dateValidation": date_validation, + "codePostalEngagement": item.get('codePostalEngagement'), + "codeClub": item.get('codeClub'), + "prixEspece": item.get('prixEspece'), + "japPhoneNumber": None, # Will be populated by separate umpire call + + # Additional fields from the response + "adresse1Engagement": item.get('adresse1Engagement'), + "originalId": item.get('originalId'), + "familleTournoi": item.get('familleTournoi', []), + "isTournoi": item.get('isTournoi'), + "natureWithCatAge": item.get('natureWithCatAge') + } + + tournaments.append(tournament) + + logger.info(f"Successfully parsed {len(tournaments)} tournaments from response") + return { + 'tournaments': tournaments, + 'total_results': total_results, + 'current_count': len(tournaments) + } + else: + logger.error("No recherche_tournois_update command found in AJAX response") + return {'tournaments': [], 'total_results': 0, 'current_count': 0} + + except Exception as e: + logger.error(f"Error parsing AJAX response: {e}") + return None + +def scrape_fft_all_tournaments(sorting_option=None, page=0, start_date=None, end_date=None, + city='', distance=15, categories=None, levels=None, + lat=None, lng=None, ages=None, tournament_types=None, + national_cup=False): + """ + Scrapes FFT tournaments using Playwright with detailed debugging + Based exactly on the working scrape_fft_club_tournaments function + """ + logger.info(f"Starting Playwright scraping for city: {city}") + + try: + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page_obj = browser.new_page() + + page_obj.set_extra_http_headers({ + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15" + }) + + # Navigate to FFT + target_url = "https://tenup.fft.fr/recherche/tournois" + logger.info(f"Navigating to: {target_url}") + + page_obj.goto(target_url) + page_obj.wait_for_timeout(7000) + + current_url = page_obj.url + logger.info(f"Current URL: {current_url}") + + if "queue-it.net" in current_url.lower(): + logger.warning("Still in Queue-It") + browser.close() + return None + + # Extract form_build_id + form_input = page_obj.query_selector('input[name="form_build_id"]') + if not form_input: + logger.error("Could not find form_build_id") + browser.close() + return None + + form_build_id = form_input.get_attribute('value') + logger.info(f"Extracted form_build_id: {form_build_id}") + + # Build parameters - EXACT same pattern as club function + date_component = "" + if start_date and end_date: + date_component = f"&date[start]={start_date}&date[end]={end_date}" + elif start_date: + try: + start_dt = datetime.strptime(start_date, "%d/%m/%y") + end_dt = start_dt + timedelta(days=90) + date_component = f"&date[start]={start_date}&date[end]={end_dt.strftime('%d/%m/%y')}" + except ValueError: + logger.warning(f"Invalid date format: {start_date}") + + # Build city parameters with distance + if city and city.strip(): + city_name_encoded = city.strip().replace(" ", "+") + + # Start with the working base parameters + base_params = f"recherche_type=ville&ville[autocomplete][value_container][value_field]={city_name_encoded}&ville[autocomplete][value_container][label_field]={city_name_encoded}" + + # Add distance parameter + distance_param = f"&ville[distance][value_field]={int(distance)}" + + # Add lat/lng if provided + location_params = "" + if lat and lng: + location_params = f"&ville[autocomplete][value_container][lat_field]={lat}&ville[autocomplete][value_container][lng_field]={lng}" + + # Combine all parameters + params = f"{base_params}{location_params}{distance_param}&pratique=PADEL{date_component}&page={page}&sort=dateDebut+asc&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" + else: + # Default to ligue search if no city provided + params = f"recherche_type=ligue&pratique=PADEL{date_component}&page={page}&sort=dateDebut+asc&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" + + logger.info(f"AJAX Parameters: {params}") + + # Make AJAX request and capture the full response - EXACT same as club function + ajax_script = f""" + async () => {{ + try {{ + const response = await fetch('https://tenup.fft.fr/system/ajax', {{ + method: 'POST', + headers: {{ + 'Accept': 'application/json, text/javascript, */*; q=0.01', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'X-Requested-With': 'XMLHttpRequest', + 'Origin': 'https://tenup.fft.fr', + 'Referer': 'https://tenup.fft.fr/recherche/tournois' + }}, + body: `{params}` + }}); + + const status = response.status; + const responseText = await response.text(); + + return {{ + success: response.ok, + status: status, + responseText: responseText + }}; + }} catch (error) {{ + return {{ + success: false, + error: error.message + }}; + }} + }} + """ + + logger.info("Making AJAX request...") + result = page_obj.evaluate(ajax_script) + + browser.close() + + # Print the full response for debugging - EXACT same as club function + logger.info(f"AJAX Response Status: {result.get('status')}") + logger.info(f"AJAX Response Success: {result.get('success')}") + + if result.get('success'): + response_text = result.get('responseText', '') + logger.info(f"Raw Response Length: {len(response_text)}") + + try: + # Try to parse as JSON + json_data = json.loads(response_text) + logger.info(f"JSON Response Type: {type(json_data)}") + + # Now try to parse it - EXACT same as club function + parsed_result = _parse_ajax_response(json_data) + + return parsed_result + + except json.JSONDecodeError as json_error: + logger.error(f"JSON Parse Error: {json_error}") + logger.error(f"Response text: {response_text}") + return None + else: + logger.error(f"AJAX request failed: {result.get('error')}") + return None + + except Exception as e: + logger.error(f"Error in Playwright scraping: {e}") + logger.error(f"Traceback: {traceback.format_exc()}") + return None