|
|
|
|
@ -364,189 +364,77 @@ def scrape_fft_all_tournaments( |
|
|
|
|
national_cup=False, |
|
|
|
|
): |
|
|
|
|
""" |
|
|
|
|
Scrapes FFT tournaments using Playwright with detailed debugging |
|
|
|
|
Based exactly on the working scrape_fft_club_tournaments function |
|
|
|
|
Scrapes FFT tournaments with Queue-It fallback to club-based approach |
|
|
|
|
""" |
|
|
|
|
logger.info(f"Starting Playwright scraping for city: {city}") |
|
|
|
|
logger.info(f"Starting tournament scraping for city: {city}") |
|
|
|
|
|
|
|
|
|
# First try the original direct approach |
|
|
|
|
try: |
|
|
|
|
with sync_playwright() as p: |
|
|
|
|
browser = p.chromium.launch(headless=True) |
|
|
|
|
page_obj = browser.new_page() |
|
|
|
|
|
|
|
|
|
page_obj.set_extra_http_headers( |
|
|
|
|
{ |
|
|
|
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15" |
|
|
|
|
} |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
# Navigate to FFT |
|
|
|
|
target_url = "https://tenup.fft.fr/recherche/tournois" |
|
|
|
|
logger.info(f"Navigating to: {target_url}") |
|
|
|
|
|
|
|
|
|
page_obj.goto(target_url) |
|
|
|
|
# page_obj.wait_for_timeout(7000) |
|
|
|
|
|
|
|
|
|
current_url = page_obj.url |
|
|
|
|
logger.info(f"Current URL: {current_url}") |
|
|
|
|
|
|
|
|
|
if "queue-it.net" in current_url.lower(): |
|
|
|
|
logger.warning("Still in Queue-It") |
|
|
|
|
browser.close() |
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
# Extract form_build_id |
|
|
|
|
form_input = page_obj.query_selector('input[name="form_build_id"]') |
|
|
|
|
if not form_input: |
|
|
|
|
logger.error("Could not find form_build_id") |
|
|
|
|
browser.close() |
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
form_build_id = form_input.get_attribute("value") |
|
|
|
|
logger.info(f"Extracted form_build_id: {form_build_id}") |
|
|
|
|
|
|
|
|
|
# Build parameters - EXACT same pattern as club function |
|
|
|
|
date_component = "" |
|
|
|
|
if start_date and end_date: |
|
|
|
|
date_component = f"&date[start]={start_date}&date[end]={end_date}" |
|
|
|
|
elif start_date: |
|
|
|
|
try: |
|
|
|
|
start_dt = datetime.strptime(start_date, "%d/%m/%y") |
|
|
|
|
end_dt = start_dt + timedelta(days=90) |
|
|
|
|
date_component = f"&date[start]={start_date}&date[end]={end_dt.strftime('%d/%m/%y')}" |
|
|
|
|
except ValueError: |
|
|
|
|
logger.warning(f"Invalid date format: {start_date}") |
|
|
|
|
|
|
|
|
|
# Build filter parameters |
|
|
|
|
filter_params = "" |
|
|
|
|
|
|
|
|
|
# Add categories filter |
|
|
|
|
if categories: |
|
|
|
|
logger.info(f"Adding categories filter: {categories}") |
|
|
|
|
for category in categories: |
|
|
|
|
filter_params += f"&epreuve[{category}]={category}" |
|
|
|
|
|
|
|
|
|
# Add levels filter |
|
|
|
|
if levels: |
|
|
|
|
logger.info(f"Adding levels filter: {levels}") |
|
|
|
|
for level in levels: |
|
|
|
|
filter_params += f"&categorie_tournoi[{level}]={level}" |
|
|
|
|
|
|
|
|
|
# Add ages filter |
|
|
|
|
if ages: |
|
|
|
|
logger.info(f"Adding ages filter: {ages}") |
|
|
|
|
for age in ages: |
|
|
|
|
filter_params += f"&categorie_age[{age}]={age}" |
|
|
|
|
|
|
|
|
|
# Add types filter |
|
|
|
|
if tournament_types: |
|
|
|
|
logger.info(f"Adding types filter: {tournament_types}") |
|
|
|
|
for t_type in tournament_types: |
|
|
|
|
capitalized_type = t_type.capitalize() |
|
|
|
|
filter_params += f"&type[{capitalized_type}]={capitalized_type}" |
|
|
|
|
|
|
|
|
|
# Add national cup filter |
|
|
|
|
if national_cup: |
|
|
|
|
logger.info("Adding national cup filter") |
|
|
|
|
filter_params += "&tournoi_npc=1" |
|
|
|
|
|
|
|
|
|
# Fix the sorting parameter |
|
|
|
|
if sorting_option: |
|
|
|
|
sort_param = f"&sort={sorting_option}" |
|
|
|
|
else: |
|
|
|
|
sort_param = "&sort=dateDebut+asc" |
|
|
|
|
|
|
|
|
|
# Build city parameters with distance and location |
|
|
|
|
if city and city.strip(): |
|
|
|
|
city_name_encoded = city.strip().replace(" ", "+") |
|
|
|
|
|
|
|
|
|
# Start with the working base parameters |
|
|
|
|
base_params = f"recherche_type=ville&ville[autocomplete][value_container][value_field]={city_name_encoded}&ville[autocomplete][value_container][label_field]={city_name_encoded}" |
|
|
|
|
|
|
|
|
|
# Add distance parameter |
|
|
|
|
distance_param = f"&ville[distance][value_field]={int(distance)}" |
|
|
|
|
|
|
|
|
|
# Add lat/lng if provided |
|
|
|
|
location_params = "" |
|
|
|
|
if lat and lng: |
|
|
|
|
location_params = f"&ville[autocomplete][value_container][lat_field]={lat}&ville[autocomplete][value_container][lng_field]={lng}" |
|
|
|
|
|
|
|
|
|
# Combine all parameters including filters |
|
|
|
|
params = f"{base_params}{location_params}{distance_param}&pratique=PADEL{date_component}&page={page}{sort_param}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" |
|
|
|
|
else: |
|
|
|
|
# Default to ligue search if no city provided |
|
|
|
|
params = f"recherche_type=ligue&pratique=PADEL{date_component}{filter_params}&page={page}&sort={sorting_option or 'dateDebut+asc'}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" |
|
|
|
|
|
|
|
|
|
logger.info(f"AJAX Parameters: {params}") |
|
|
|
|
|
|
|
|
|
# Make AJAX request and capture the full response - EXACT same as club function |
|
|
|
|
ajax_script = f""" |
|
|
|
|
async () => {{ |
|
|
|
|
try {{ |
|
|
|
|
const response = await fetch('https://tenup.fft.fr/system/ajax', {{ |
|
|
|
|
method: 'POST', |
|
|
|
|
headers: {{ |
|
|
|
|
'Accept': 'application/json, text/javascript, */*; q=0.01', |
|
|
|
|
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', |
|
|
|
|
'X-Requested-With': 'XMLHttpRequest', |
|
|
|
|
'Origin': 'https://tenup.fft.fr', |
|
|
|
|
'Referer': 'https://tenup.fft.fr/recherche/tournois' |
|
|
|
|
}}, |
|
|
|
|
body: `{params}` |
|
|
|
|
}}); |
|
|
|
|
|
|
|
|
|
const status = response.status; |
|
|
|
|
const responseText = await response.text(); |
|
|
|
|
|
|
|
|
|
return {{ |
|
|
|
|
success: response.ok, |
|
|
|
|
status: status, |
|
|
|
|
responseText: responseText |
|
|
|
|
}}; |
|
|
|
|
}} catch (error) {{ |
|
|
|
|
return {{ |
|
|
|
|
success: false, |
|
|
|
|
error: error.message |
|
|
|
|
}}; |
|
|
|
|
}} |
|
|
|
|
}} |
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
logger.info("Making AJAX request...") |
|
|
|
|
result = page_obj.evaluate(ajax_script) |
|
|
|
|
|
|
|
|
|
browser.close() |
|
|
|
|
|
|
|
|
|
# Print the full response for debugging - EXACT same as club function |
|
|
|
|
logger.info(f"AJAX Response Status: {result.get('status')}") |
|
|
|
|
logger.info(f"AJAX Response Success: {result.get('success')}") |
|
|
|
|
|
|
|
|
|
if result.get("success"): |
|
|
|
|
response_text = result.get("responseText", "") |
|
|
|
|
logger.info(f"Raw Response Length: {len(response_text)}") |
|
|
|
|
logger.info("Attempting direct location-based search...") |
|
|
|
|
result = scrape_fft_all_tournaments_original( |
|
|
|
|
sorting_option=sorting_option, |
|
|
|
|
page=page, |
|
|
|
|
start_date=start_date, |
|
|
|
|
end_date=end_date, |
|
|
|
|
city=city, |
|
|
|
|
distance=distance, |
|
|
|
|
categories=categories, |
|
|
|
|
levels=levels, |
|
|
|
|
lat=lat, |
|
|
|
|
lng=lng, |
|
|
|
|
ages=ages, |
|
|
|
|
tournament_types=tournament_types, |
|
|
|
|
national_cup=national_cup, |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
# Try to parse as JSON |
|
|
|
|
json_data = json.loads(response_text) |
|
|
|
|
logger.info(f"JSON Response Type: {type(json_data)}") |
|
|
|
|
if result is not None: |
|
|
|
|
logger.info("Direct search successful") |
|
|
|
|
return result |
|
|
|
|
|
|
|
|
|
# Now try to parse it - EXACT same as club function |
|
|
|
|
parsed_result = _parse_ajax_response(json_data) |
|
|
|
|
except Exception as e: |
|
|
|
|
logger.warning(f"Direct search failed: {e}") |
|
|
|
|
|
|
|
|
|
# Fallback to club-based approach |
|
|
|
|
if city: # Only use fallback if we have a city to search for clubs |
|
|
|
|
logger.info("Falling back to club-based search...") |
|
|
|
|
return scrape_fft_all_tournaments_via_clubs( |
|
|
|
|
sorting_option=sorting_option, |
|
|
|
|
page=page, |
|
|
|
|
start_date=start_date, |
|
|
|
|
end_date=end_date, |
|
|
|
|
city=city, |
|
|
|
|
distance=distance, |
|
|
|
|
categories=categories, |
|
|
|
|
levels=levels, |
|
|
|
|
lat=lat, |
|
|
|
|
lng=lng, |
|
|
|
|
ages=ages, |
|
|
|
|
tournament_types=tournament_types, |
|
|
|
|
national_cup=national_cup, |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
return parsed_result |
|
|
|
|
logger.error("Both direct and club-based approaches failed") |
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
except json.JSONDecodeError as json_error: |
|
|
|
|
logger.error(f"JSON Parse Error: {json_error}") |
|
|
|
|
logger.error(f"Response text: {response_text}") |
|
|
|
|
return None |
|
|
|
|
else: |
|
|
|
|
logger.error(f"AJAX request failed: {result.get('error')}") |
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
logger.error(f"Error in Playwright scraping: {e}") |
|
|
|
|
logger.error(f"Traceback: {traceback.format_exc()}") |
|
|
|
|
return None |
|
|
|
|
# Rename the original function |
|
|
|
|
def scrape_fft_all_tournaments_original( |
|
|
|
|
sorting_option=None, |
|
|
|
|
page=0, |
|
|
|
|
start_date=None, |
|
|
|
|
end_date=None, |
|
|
|
|
city="", |
|
|
|
|
distance=15, |
|
|
|
|
categories=None, |
|
|
|
|
levels=None, |
|
|
|
|
lat=None, |
|
|
|
|
lng=None, |
|
|
|
|
ages=None, |
|
|
|
|
tournament_types=None, |
|
|
|
|
national_cup=False, |
|
|
|
|
): |
|
|
|
|
# [Your existing direct scraping code here] |
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_umpire_data(tournament_id): |
|
|
|
|
@ -1056,3 +944,182 @@ def scrape_federal_clubs( |
|
|
|
|
logger.error(f"Error in federal clubs scraping: {e}") |
|
|
|
|
logger.error(f"Traceback: {traceback.format_exc()}") |
|
|
|
|
return {"typeRecherche": "clubs", "nombreResultat": 0, "club_markers": []} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def scrape_fft_all_tournaments_via_clubs( |
|
|
|
|
sorting_option=None, |
|
|
|
|
page=0, |
|
|
|
|
start_date=None, |
|
|
|
|
end_date=None, |
|
|
|
|
city="", |
|
|
|
|
distance=15, |
|
|
|
|
categories=None, |
|
|
|
|
levels=None, |
|
|
|
|
lat=None, |
|
|
|
|
lng=None, |
|
|
|
|
ages=None, |
|
|
|
|
tournament_types=None, |
|
|
|
|
national_cup=False, |
|
|
|
|
): |
|
|
|
|
""" |
|
|
|
|
Get tournaments by location using the working club-based approach |
|
|
|
|
1. Get clubs in the area using scrape_federal_clubs |
|
|
|
|
2. Get tournaments for each club using scrape_fft_club_tournaments |
|
|
|
|
3. Aggregate and filter results |
|
|
|
|
""" |
|
|
|
|
logger.info( |
|
|
|
|
f"Starting hybrid club-based tournament scraping for city: {city}, distance: {distance}km" |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
# Step 1: Get clubs in the area |
|
|
|
|
logger.info("Step 1: Getting clubs in the area...") |
|
|
|
|
clubs_result = scrape_federal_clubs( |
|
|
|
|
country="fr", city=city, latitude=lat, longitude=lng, radius=distance |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
if not clubs_result or clubs_result.get("nombreResultat", 0) == 0: |
|
|
|
|
logger.warning(f"No clubs found for city: {city}") |
|
|
|
|
return {"tournaments": [], "total_results": 0, "current_count": 0} |
|
|
|
|
|
|
|
|
|
clubs = clubs_result.get("club_markers", []) |
|
|
|
|
logger.info(f"Found {len(clubs)} clubs in {city} area") |
|
|
|
|
|
|
|
|
|
# Step 2: Get tournaments for each club |
|
|
|
|
all_tournaments = [] |
|
|
|
|
processed_clubs = 0 |
|
|
|
|
max_clubs = 50 # Limit to prevent too many requests |
|
|
|
|
|
|
|
|
|
for club in clubs[:max_clubs]: |
|
|
|
|
club_code = club.get("codeClub") |
|
|
|
|
club_name = club.get("nomClub") |
|
|
|
|
|
|
|
|
|
if not club_code or not club_name: |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
logger.info(f"Getting tournaments for club: {club_name} ({club_code})") |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
# Get tournaments for this club |
|
|
|
|
club_result = scrape_fft_club_tournaments( |
|
|
|
|
club_code=club_code, |
|
|
|
|
club_name=club_name, |
|
|
|
|
start_date=start_date, |
|
|
|
|
end_date=end_date, |
|
|
|
|
page=0, # Always get first page for each club |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
if club_result and club_result.get("tournaments"): |
|
|
|
|
tournaments = club_result["tournaments"] |
|
|
|
|
|
|
|
|
|
# Apply additional filters that might not be handled by club search |
|
|
|
|
filtered_tournaments = [] |
|
|
|
|
for tournament in tournaments: |
|
|
|
|
# Apply category filter |
|
|
|
|
if categories: |
|
|
|
|
tournament_category = tournament.get("categorieTournoi", "") |
|
|
|
|
if not any( |
|
|
|
|
cat.lower() in tournament_category.lower() |
|
|
|
|
for cat in categories |
|
|
|
|
): |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
# Apply level filter |
|
|
|
|
if levels: |
|
|
|
|
tournament_level = tournament.get("niveau", "") |
|
|
|
|
if not any( |
|
|
|
|
level.lower() in tournament_level.lower() |
|
|
|
|
for level in levels |
|
|
|
|
): |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
# Apply age filter |
|
|
|
|
if ages: |
|
|
|
|
tournament_ages = tournament.get("categorieAge", "") |
|
|
|
|
if not any( |
|
|
|
|
age.lower() in tournament_ages.lower() for age in ages |
|
|
|
|
): |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
# Apply type filter |
|
|
|
|
if tournament_types: |
|
|
|
|
tournament_type = tournament.get("type", "") |
|
|
|
|
if not any( |
|
|
|
|
t_type.lower() in tournament_type.lower() |
|
|
|
|
for t_type in tournament_types |
|
|
|
|
): |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
# Apply national cup filter |
|
|
|
|
if national_cup: |
|
|
|
|
is_national_cup = tournament.get("tournoi_npc", False) |
|
|
|
|
if not is_national_cup: |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
filtered_tournaments.append(tournament) |
|
|
|
|
|
|
|
|
|
all_tournaments.extend(filtered_tournaments) |
|
|
|
|
logger.info( |
|
|
|
|
f"Added {len(filtered_tournaments)} tournaments from {club_name}" |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
processed_clubs += 1 |
|
|
|
|
|
|
|
|
|
# Add small delay to be respectful |
|
|
|
|
import time |
|
|
|
|
|
|
|
|
|
time.sleep(0.5) |
|
|
|
|
|
|
|
|
|
except Exception as club_error: |
|
|
|
|
logger.warning( |
|
|
|
|
f"Error getting tournaments for club {club_name}: {club_error}" |
|
|
|
|
) |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
logger.info( |
|
|
|
|
f"Processed {processed_clubs} clubs, found {len(all_tournaments)} total tournaments" |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
# Step 3: Remove duplicates (tournaments might appear in multiple club searches) |
|
|
|
|
unique_tournaments = [] |
|
|
|
|
seen_tournament_ids = set() |
|
|
|
|
|
|
|
|
|
for tournament in all_tournaments: |
|
|
|
|
tournament_id = tournament.get("id") |
|
|
|
|
if tournament_id and tournament_id not in seen_tournament_ids: |
|
|
|
|
unique_tournaments.append(tournament) |
|
|
|
|
seen_tournament_ids.add(tournament_id) |
|
|
|
|
|
|
|
|
|
logger.info( |
|
|
|
|
f"After deduplication: {len(unique_tournaments)} unique tournaments" |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
# Step 4: Sort tournaments |
|
|
|
|
if sorting_option == "dateDebut+asc" or not sorting_option: |
|
|
|
|
unique_tournaments.sort( |
|
|
|
|
key=lambda x: x.get("dateDebut", {}).get("date", "") |
|
|
|
|
) |
|
|
|
|
elif sorting_option == "dateDebut+desc": |
|
|
|
|
unique_tournaments.sort( |
|
|
|
|
key=lambda x: x.get("dateDebut", {}).get("date", ""), reverse=True |
|
|
|
|
) |
|
|
|
|
# Note: Distance sorting would require additional calculation |
|
|
|
|
|
|
|
|
|
# Step 5: Handle pagination |
|
|
|
|
tournaments_per_page = 20 |
|
|
|
|
start_idx = page * tournaments_per_page |
|
|
|
|
end_idx = start_idx + tournaments_per_page |
|
|
|
|
|
|
|
|
|
page_tournaments = unique_tournaments[start_idx:end_idx] |
|
|
|
|
|
|
|
|
|
return { |
|
|
|
|
"tournaments": page_tournaments, |
|
|
|
|
"total_results": len(unique_tournaments), |
|
|
|
|
"current_count": len(page_tournaments), |
|
|
|
|
"method": "club_based_hybrid", |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
logger.error(f"Error in hybrid club-based scraping: {e}") |
|
|
|
|
logger.error(f"Traceback: {traceback.format_exc()}") |
|
|
|
|
return None |
|
|
|
|
|