diff --git a/api/utils.py b/api/utils.py index 8fcc960..50ae798 100644 --- a/api/utils.py +++ b/api/utils.py @@ -364,77 +364,189 @@ def scrape_fft_all_tournaments( national_cup=False, ): """ - Scrapes FFT tournaments with Queue-It fallback to club-based approach + Scrapes FFT tournaments using Playwright with detailed debugging + Based exactly on the working scrape_fft_club_tournaments function """ - logger.info(f"Starting tournament scraping for city: {city}") + logger.info(f"Starting Playwright scraping for city: {city}") - # First try the original direct approach try: - logger.info("Attempting direct location-based search...") - result = scrape_fft_all_tournaments_original( - sorting_option=sorting_option, - page=page, - start_date=start_date, - end_date=end_date, - city=city, - distance=distance, - categories=categories, - levels=levels, - lat=lat, - lng=lng, - ages=ages, - tournament_types=tournament_types, - national_cup=national_cup, - ) + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page_obj = browser.new_page() - if result is not None: - logger.info("Direct search successful") - return result + page_obj.set_extra_http_headers( + { + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15" + } + ) - except Exception as e: - logger.warning(f"Direct search failed: {e}") - - # Fallback to club-based approach - if city: # Only use fallback if we have a city to search for clubs - logger.info("Falling back to club-based search...") - return scrape_fft_all_tournaments_via_clubs( - sorting_option=sorting_option, - page=page, - start_date=start_date, - end_date=end_date, - city=city, - distance=distance, - categories=categories, - levels=levels, - lat=lat, - lng=lng, - ages=ages, - tournament_types=tournament_types, - national_cup=national_cup, - ) + # Navigate to FFT + target_url = "https://tenup.fft.fr/recherche/tournois" + logger.info(f"Navigating to: {target_url}") - logger.error("Both direct and club-based approaches failed") - return None + page_obj.goto(target_url) + # page_obj.wait_for_timeout(7000) + current_url = page_obj.url + logger.info(f"Current URL: {current_url}") -# Rename the original function -def scrape_fft_all_tournaments_original( - sorting_option=None, - page=0, - start_date=None, - end_date=None, - city="", - distance=15, - categories=None, - levels=None, - lat=None, - lng=None, - ages=None, - tournament_types=None, - national_cup=False, -): - # [Your existing direct scraping code here] - pass + if "queue-it.net" in current_url.lower(): + logger.warning("Still in Queue-It") + browser.close() + return None + + # Extract form_build_id + form_input = page_obj.query_selector('input[name="form_build_id"]') + if not form_input: + logger.error("Could not find form_build_id") + browser.close() + return None + + form_build_id = form_input.get_attribute("value") + logger.info(f"Extracted form_build_id: {form_build_id}") + + # Build parameters - EXACT same pattern as club function + date_component = "" + if start_date and end_date: + date_component = f"&date[start]={start_date}&date[end]={end_date}" + elif start_date: + try: + start_dt = datetime.strptime(start_date, "%d/%m/%y") + end_dt = start_dt + timedelta(days=90) + date_component = f"&date[start]={start_date}&date[end]={end_dt.strftime('%d/%m/%y')}" + except ValueError: + logger.warning(f"Invalid date format: {start_date}") + + # Build filter parameters + filter_params = "" + + # Add categories filter + if categories: + logger.info(f"Adding categories filter: {categories}") + for category in categories: + filter_params += f"&epreuve[{category}]={category}" + + # Add levels filter + if levels: + logger.info(f"Adding levels filter: {levels}") + for level in levels: + filter_params += f"&categorie_tournoi[{level}]={level}" + + # Add ages filter + if ages: + logger.info(f"Adding ages filter: {ages}") + for age in ages: + filter_params += f"&categorie_age[{age}]={age}" + + # Add types filter + if tournament_types: + logger.info(f"Adding types filter: {tournament_types}") + for t_type in tournament_types: + capitalized_type = t_type.capitalize() + filter_params += f"&type[{capitalized_type}]={capitalized_type}" + + # Add national cup filter + if national_cup: + logger.info("Adding national cup filter") + filter_params += "&tournoi_npc=1" + + # Fix the sorting parameter + if sorting_option: + sort_param = f"&sort={sorting_option}" + else: + sort_param = "&sort=dateDebut+asc" + + # Build city parameters with distance and location + if city and city.strip(): + city_name_encoded = city.strip().replace(" ", "+") + + # Start with the working base parameters + base_params = f"recherche_type=ville&ville[autocomplete][value_container][value_field]={city_name_encoded}&ville[autocomplete][value_container][label_field]={city_name_encoded}" + + # Add distance parameter + distance_param = f"&ville[distance][value_field]={int(distance)}" + + # Add lat/lng if provided + location_params = "" + if lat and lng: + location_params = f"&ville[autocomplete][value_container][lat_field]={lat}&ville[autocomplete][value_container][lng_field]={lng}" + + # Combine all parameters including filters + params = f"{base_params}{location_params}{distance_param}&pratique=PADEL{date_component}&page={page}{sort_param}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" + else: + # Default to ligue search if no city provided + params = f"recherche_type=ligue&pratique=PADEL{date_component}{filter_params}&page={page}&sort={sorting_option or 'dateDebut+asc'}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" + + logger.info(f"AJAX Parameters: {params}") + + # Make AJAX request and capture the full response - EXACT same as club function + ajax_script = f""" + async () => {{ + try {{ + const response = await fetch('https://tenup.fft.fr/system/ajax', {{ + method: 'POST', + headers: {{ + 'Accept': 'application/json, text/javascript, */*; q=0.01', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'X-Requested-With': 'XMLHttpRequest', + 'Origin': 'https://tenup.fft.fr', + 'Referer': 'https://tenup.fft.fr/recherche/tournois' + }}, + body: `{params}` + }}); + + const status = response.status; + const responseText = await response.text(); + + return {{ + success: response.ok, + status: status, + responseText: responseText + }}; + }} catch (error) {{ + return {{ + success: false, + error: error.message + }}; + }} + }} + """ + + logger.info("Making AJAX request...") + result = page_obj.evaluate(ajax_script) + + browser.close() + + # Print the full response for debugging - EXACT same as club function + logger.info(f"AJAX Response Status: {result.get('status')}") + logger.info(f"AJAX Response Success: {result.get('success')}") + + if result.get("success"): + response_text = result.get("responseText", "") + logger.info(f"Raw Response Length: {len(response_text)}") + + try: + # Try to parse as JSON + json_data = json.loads(response_text) + logger.info(f"JSON Response Type: {type(json_data)}") + + # Now try to parse it - EXACT same as club function + parsed_result = _parse_ajax_response(json_data) + + return parsed_result + + except json.JSONDecodeError as json_error: + logger.error(f"JSON Parse Error: {json_error}") + logger.error(f"Response text: {response_text}") + return None + else: + logger.error(f"AJAX request failed: {result.get('error')}") + return None + + except Exception as e: + logger.error(f"Error in Playwright scraping: {e}") + logger.error(f"Traceback: {traceback.format_exc()}") + return None def get_umpire_data(tournament_id): @@ -944,182 +1056,3 @@ def scrape_federal_clubs( logger.error(f"Error in federal clubs scraping: {e}") logger.error(f"Traceback: {traceback.format_exc()}") return {"typeRecherche": "clubs", "nombreResultat": 0, "club_markers": []} - - -def scrape_fft_all_tournaments_via_clubs( - sorting_option=None, - page=0, - start_date=None, - end_date=None, - city="", - distance=15, - categories=None, - levels=None, - lat=None, - lng=None, - ages=None, - tournament_types=None, - national_cup=False, -): - """ - Get tournaments by location using the working club-based approach - 1. Get clubs in the area using scrape_federal_clubs - 2. Get tournaments for each club using scrape_fft_club_tournaments - 3. Aggregate and filter results - """ - logger.info( - f"Starting hybrid club-based tournament scraping for city: {city}, distance: {distance}km" - ) - - try: - # Step 1: Get clubs in the area - logger.info("Step 1: Getting clubs in the area...") - clubs_result = scrape_federal_clubs( - country="fr", city=city, latitude=lat, longitude=lng, radius=distance - ) - - if not clubs_result or clubs_result.get("nombreResultat", 0) == 0: - logger.warning(f"No clubs found for city: {city}") - return {"tournaments": [], "total_results": 0, "current_count": 0} - - clubs = clubs_result.get("club_markers", []) - logger.info(f"Found {len(clubs)} clubs in {city} area") - - # Step 2: Get tournaments for each club - all_tournaments = [] - processed_clubs = 0 - max_clubs = 50 # Limit to prevent too many requests - - for club in clubs[:max_clubs]: - club_code = club.get("codeClub") - club_name = club.get("nomClub") - - if not club_code or not club_name: - continue - - logger.info(f"Getting tournaments for club: {club_name} ({club_code})") - - try: - # Get tournaments for this club - club_result = scrape_fft_club_tournaments( - club_code=club_code, - club_name=club_name, - start_date=start_date, - end_date=end_date, - page=0, # Always get first page for each club - ) - - if club_result and club_result.get("tournaments"): - tournaments = club_result["tournaments"] - - # Apply additional filters that might not be handled by club search - filtered_tournaments = [] - for tournament in tournaments: - # Apply category filter - if categories: - tournament_category = tournament.get("categorieTournoi", "") - if not any( - cat.lower() in tournament_category.lower() - for cat in categories - ): - continue - - # Apply level filter - if levels: - tournament_level = tournament.get("niveau", "") - if not any( - level.lower() in tournament_level.lower() - for level in levels - ): - continue - - # Apply age filter - if ages: - tournament_ages = tournament.get("categorieAge", "") - if not any( - age.lower() in tournament_ages.lower() for age in ages - ): - continue - - # Apply type filter - if tournament_types: - tournament_type = tournament.get("type", "") - if not any( - t_type.lower() in tournament_type.lower() - for t_type in tournament_types - ): - continue - - # Apply national cup filter - if national_cup: - is_national_cup = tournament.get("tournoi_npc", False) - if not is_national_cup: - continue - - filtered_tournaments.append(tournament) - - all_tournaments.extend(filtered_tournaments) - logger.info( - f"Added {len(filtered_tournaments)} tournaments from {club_name}" - ) - - processed_clubs += 1 - - # Add small delay to be respectful - import time - - time.sleep(0.5) - - except Exception as club_error: - logger.warning( - f"Error getting tournaments for club {club_name}: {club_error}" - ) - continue - - logger.info( - f"Processed {processed_clubs} clubs, found {len(all_tournaments)} total tournaments" - ) - - # Step 3: Remove duplicates (tournaments might appear in multiple club searches) - unique_tournaments = [] - seen_tournament_ids = set() - - for tournament in all_tournaments: - tournament_id = tournament.get("id") - if tournament_id and tournament_id not in seen_tournament_ids: - unique_tournaments.append(tournament) - seen_tournament_ids.add(tournament_id) - - logger.info( - f"After deduplication: {len(unique_tournaments)} unique tournaments" - ) - - # Step 4: Sort tournaments - if sorting_option == "dateDebut+asc" or not sorting_option: - unique_tournaments.sort( - key=lambda x: x.get("dateDebut", {}).get("date", "") - ) - elif sorting_option == "dateDebut+desc": - unique_tournaments.sort( - key=lambda x: x.get("dateDebut", {}).get("date", ""), reverse=True - ) - # Note: Distance sorting would require additional calculation - - # Step 5: Handle pagination - tournaments_per_page = 20 - start_idx = page * tournaments_per_page - end_idx = start_idx + tournaments_per_page - - page_tournaments = unique_tournaments[start_idx:end_idx] - - return { - "tournaments": page_tournaments, - "total_results": len(unique_tournaments), - "current_count": len(page_tournaments), - "method": "club_based_hybrid", - } - - except Exception as e: - logger.error(f"Error in hybrid club-based scraping: {e}") - logger.error(f"Traceback: {traceback.format_exc()}") - return None