|
|
|
@ -30,7 +30,7 @@ def scrape_fft_club_tournaments( |
|
|
|
""" |
|
|
|
""" |
|
|
|
Scrapes FFT tournaments using Playwright with detailed debugging |
|
|
|
Scrapes FFT tournaments using Playwright with detailed debugging |
|
|
|
""" |
|
|
|
""" |
|
|
|
logger.info(f"Starting Playwright scraping for {club_name}") |
|
|
|
# logger.info(f"Starting Playwright scraping for {club_name}") |
|
|
|
try: |
|
|
|
try: |
|
|
|
with sync_playwright() as p: |
|
|
|
with sync_playwright() as p: |
|
|
|
browser = get_browser_for_environment(p) |
|
|
|
browser = get_browser_for_environment(p) |
|
|
|
@ -44,13 +44,13 @@ def scrape_fft_club_tournaments( |
|
|
|
|
|
|
|
|
|
|
|
# Navigate to FFT |
|
|
|
# Navigate to FFT |
|
|
|
target_url = "https://tenup.fft.fr/recherche/tournois" |
|
|
|
target_url = "https://tenup.fft.fr/recherche/tournois" |
|
|
|
# logger.info(f"Navigating to: {target_url}") |
|
|
|
# # logger.info(f"Navigating to: {target_url}") |
|
|
|
|
|
|
|
|
|
|
|
page_obj.goto(target_url) |
|
|
|
page_obj.goto(target_url) |
|
|
|
page_obj.wait_for_timeout(2000) |
|
|
|
page_obj.wait_for_timeout(2000) |
|
|
|
|
|
|
|
|
|
|
|
current_url = page_obj.url |
|
|
|
current_url = page_obj.url |
|
|
|
logger.info(f"Current URL: {current_url}") |
|
|
|
# logger.info(f"Current URL: {current_url}") |
|
|
|
|
|
|
|
|
|
|
|
if "queue-it.net" in current_url.lower(): |
|
|
|
if "queue-it.net" in current_url.lower(): |
|
|
|
logger.warning("Still in Queue-It") |
|
|
|
logger.warning("Still in Queue-It") |
|
|
|
@ -65,7 +65,7 @@ def scrape_fft_club_tournaments( |
|
|
|
return None |
|
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
form_build_id = form_input.get_attribute("value") |
|
|
|
form_build_id = form_input.get_attribute("value") |
|
|
|
# logger.info(f"Extracted form_build_id: {form_build_id}") |
|
|
|
# # logger.info(f"Extracted form_build_id: {form_build_id}") |
|
|
|
|
|
|
|
|
|
|
|
# Build parameters |
|
|
|
# Build parameters |
|
|
|
date_component = "" |
|
|
|
date_component = "" |
|
|
|
@ -84,7 +84,7 @@ def scrape_fft_club_tournaments( |
|
|
|
|
|
|
|
|
|
|
|
params = f"recherche_type=club&club[autocomplete][value_container][value_field]={club_code_clean}&club[autocomplete][value_container][label_field]={club_name_encoded}&pratique=PADEL{date_component}&page={page}&sort=dateDebut+asc&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" |
|
|
|
params = f"recherche_type=club&club[autocomplete][value_container][value_field]={club_code_clean}&club[autocomplete][value_container][label_field]={club_name_encoded}&pratique=PADEL{date_component}&page={page}&sort=dateDebut+asc&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" |
|
|
|
|
|
|
|
|
|
|
|
# logger.info(f"AJAX Parameters: {params}") |
|
|
|
# # logger.info(f"AJAX Parameters: {params}") |
|
|
|
|
|
|
|
|
|
|
|
# Make AJAX request and capture the full response |
|
|
|
# Make AJAX request and capture the full response |
|
|
|
ajax_script = f""" |
|
|
|
ajax_script = f""" |
|
|
|
@ -119,29 +119,29 @@ def scrape_fft_club_tournaments( |
|
|
|
}} |
|
|
|
}} |
|
|
|
""" |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
# logger.info("Making AJAX request...") |
|
|
|
# # logger.info("Making AJAX request...") |
|
|
|
result = page_obj.evaluate(ajax_script) |
|
|
|
result = page_obj.evaluate(ajax_script) |
|
|
|
|
|
|
|
|
|
|
|
browser.close() |
|
|
|
browser.close() |
|
|
|
|
|
|
|
|
|
|
|
# Print the full response for debugging |
|
|
|
# Print the full response for debugging |
|
|
|
# logger.info(f"AJAX Response Status: {result.get('status')}") |
|
|
|
# # logger.info(f"AJAX Response Status: {result.get('status')}") |
|
|
|
# logger.info(f"AJAX Response Success: {result.get('success')}") |
|
|
|
# # logger.info(f"AJAX Response Success: {result.get('success')}") |
|
|
|
|
|
|
|
|
|
|
|
if result.get("success"): |
|
|
|
if result.get("success"): |
|
|
|
response_text = result.get("responseText", "") |
|
|
|
response_text = result.get("responseText", "") |
|
|
|
# logger.info(f"Raw Response Length: {len(response_text)}") |
|
|
|
# # logger.info(f"Raw Response Length: {len(response_text)}") |
|
|
|
# logger.info(f"Raw Response (first 500 chars): {response_text[:500]}") |
|
|
|
# # logger.info(f"Raw Response (first 500 chars): {response_text[:500]}") |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
try: |
|
|
|
# Try to parse as JSON |
|
|
|
# Try to parse as JSON |
|
|
|
json_data = json.loads(response_text) |
|
|
|
json_data = json.loads(response_text) |
|
|
|
# logger.info(f"JSON Response Type: {type(json_data)}") |
|
|
|
# # logger.info(f"JSON Response Type: {type(json_data)}") |
|
|
|
# logger.info(f"JSON Response: {json.dumps(json_data, indent=2, default=str)}") |
|
|
|
# # logger.info(f"JSON Response: {json.dumps(json_data, indent=2, default=str)}") |
|
|
|
|
|
|
|
|
|
|
|
# Now try to parse it |
|
|
|
# Now try to parse it |
|
|
|
parsed_result = _parse_ajax_response(json_data) |
|
|
|
parsed_result = _parse_ajax_response(json_data) |
|
|
|
# logger.info(f"Parsed Result: {parsed_result}") |
|
|
|
# # logger.info(f"Parsed Result: {parsed_result}") |
|
|
|
|
|
|
|
|
|
|
|
return parsed_result |
|
|
|
return parsed_result |
|
|
|
|
|
|
|
|
|
|
|
@ -165,7 +165,7 @@ def scrape_fft_club_tournaments_all_pages( |
|
|
|
""" |
|
|
|
""" |
|
|
|
Scrapes all pages of FFT tournaments for a specific club |
|
|
|
Scrapes all pages of FFT tournaments for a specific club |
|
|
|
""" |
|
|
|
""" |
|
|
|
# logger.info(f"Starting complete tournament scraping for {club_name}") |
|
|
|
# # logger.info(f"Starting complete tournament scraping for {club_name}") |
|
|
|
|
|
|
|
|
|
|
|
all_tournaments = [] |
|
|
|
all_tournaments = [] |
|
|
|
page = 0 |
|
|
|
page = 0 |
|
|
|
@ -182,42 +182,42 @@ def scrape_fft_club_tournaments_all_pages( |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
# Debug: Log what we got |
|
|
|
# Debug: Log what we got |
|
|
|
logger.info(f"Page {page} result: {result}") |
|
|
|
# logger.info(f"Page {page} result: {result}") |
|
|
|
|
|
|
|
|
|
|
|
if not result: |
|
|
|
if not result: |
|
|
|
logger.warning(f"No result for page {page}") |
|
|
|
logger.warning(f"No result for page {page}") |
|
|
|
break |
|
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
tournaments = result.get("tournaments", []) |
|
|
|
tournaments = result.get("tournaments", []) |
|
|
|
logger.info(f"Page {page} returned {len(tournaments)} tournaments") |
|
|
|
# logger.info(f"Page {page} returned {len(tournaments)} tournaments") |
|
|
|
|
|
|
|
|
|
|
|
if not tournaments: |
|
|
|
if not tournaments: |
|
|
|
logger.info(f"No tournaments on page {page}, stopping") |
|
|
|
# logger.info(f"No tournaments on page {page}, stopping") |
|
|
|
break |
|
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
all_tournaments.extend(tournaments) |
|
|
|
all_tournaments.extend(tournaments) |
|
|
|
|
|
|
|
|
|
|
|
# Check if we have all results |
|
|
|
# Check if we have all results |
|
|
|
total_results = result.get("total_results", 0) |
|
|
|
total_results = result.get("total_results", 0) |
|
|
|
logger.info( |
|
|
|
# logger.info( |
|
|
|
f"Total so far: {len(all_tournaments)}, Target: {total_results}" |
|
|
|
# f"Total so far: {len(all_tournaments)}, Target: {total_results}" |
|
|
|
) |
|
|
|
# ) |
|
|
|
|
|
|
|
|
|
|
|
if len(all_tournaments) >= total_results: |
|
|
|
if len(all_tournaments) >= total_results: |
|
|
|
logger.info("Got all tournaments, stopping") |
|
|
|
# logger.info("Got all tournaments, stopping") |
|
|
|
break |
|
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
page += 1 |
|
|
|
page += 1 |
|
|
|
logger.info(f"Moving to page {page}") |
|
|
|
# logger.info(f"Moving to page {page}") |
|
|
|
# time.sleep(1) # Rate limiting |
|
|
|
# time.sleep(1) # Rate limiting |
|
|
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
logger.error(f"Error on page {page}: {e}") |
|
|
|
logger.error(f"Error on page {page}: {e}") |
|
|
|
break |
|
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
logger.info( |
|
|
|
# logger.info( |
|
|
|
f"Completed scraping: {len(all_tournaments)} tournaments across {page + 1} pages" |
|
|
|
# f"Completed scraping: {len(all_tournaments)} tournaments across {page + 1} pages" |
|
|
|
) |
|
|
|
# ) |
|
|
|
|
|
|
|
|
|
|
|
return { |
|
|
|
return { |
|
|
|
"tournaments": all_tournaments, |
|
|
|
"tournaments": all_tournaments, |
|
|
|
@ -246,7 +246,7 @@ def _parse_ajax_response(commands): |
|
|
|
for command in commands: |
|
|
|
for command in commands: |
|
|
|
if command.get("command") == "recherche_tournois_update": |
|
|
|
if command.get("command") == "recherche_tournois_update": |
|
|
|
result_command = command |
|
|
|
result_command = command |
|
|
|
# logger.info("Found recherche_tournois_update command!") |
|
|
|
# # logger.info("Found recherche_tournois_update command!") |
|
|
|
break |
|
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
if result_command and result_command.get("results"): |
|
|
|
if result_command and result_command.get("results"): |
|
|
|
@ -254,7 +254,7 @@ def _parse_ajax_response(commands): |
|
|
|
items = results.get("items", []) |
|
|
|
items = results.get("items", []) |
|
|
|
total_results = results.get("nb_results", 0) |
|
|
|
total_results = results.get("nb_results", 0) |
|
|
|
|
|
|
|
|
|
|
|
# logger.info(f"Processing {len(items)} tournaments from results") |
|
|
|
# # logger.info(f"Processing {len(items)} tournaments from results") |
|
|
|
|
|
|
|
|
|
|
|
for item in items: |
|
|
|
for item in items: |
|
|
|
# Parse dates - they're already in the correct format |
|
|
|
# Parse dates - they're already in the correct format |
|
|
|
@ -332,7 +332,7 @@ def _parse_ajax_response(commands): |
|
|
|
|
|
|
|
|
|
|
|
tournaments.append(tournament) |
|
|
|
tournaments.append(tournament) |
|
|
|
|
|
|
|
|
|
|
|
# logger.info( |
|
|
|
# # logger.info( |
|
|
|
# f"Successfully parsed {len(tournaments)} tournaments from response" |
|
|
|
# f"Successfully parsed {len(tournaments)} tournaments from response" |
|
|
|
# ) |
|
|
|
# ) |
|
|
|
return { |
|
|
|
return { |
|
|
|
@ -368,7 +368,7 @@ def scrape_fft_all_tournaments( |
|
|
|
Scrapes FFT tournaments using Playwright with detailed debugging |
|
|
|
Scrapes FFT tournaments using Playwright with detailed debugging |
|
|
|
Based exactly on the working scrape_fft_club_tournaments function |
|
|
|
Based exactly on the working scrape_fft_club_tournaments function |
|
|
|
""" |
|
|
|
""" |
|
|
|
logger.info(f"Starting Playwright scraping for city: {city}") |
|
|
|
# logger.info(f"Starting Playwright scraping for city: {city}") |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
try: |
|
|
|
with sync_playwright() as p: |
|
|
|
with sync_playwright() as p: |
|
|
|
@ -383,13 +383,13 @@ def scrape_fft_all_tournaments( |
|
|
|
|
|
|
|
|
|
|
|
# Navigate to FFT |
|
|
|
# Navigate to FFT |
|
|
|
target_url = "https://tenup.fft.fr/recherche/tournois" |
|
|
|
target_url = "https://tenup.fft.fr/recherche/tournois" |
|
|
|
# logger.info(f"Navigating to: {target_url}") |
|
|
|
# # logger.info(f"Navigating to: {target_url}") |
|
|
|
|
|
|
|
|
|
|
|
page_obj.goto(target_url) |
|
|
|
page_obj.goto(target_url) |
|
|
|
page_obj.wait_for_timeout(500) |
|
|
|
page_obj.wait_for_timeout(500) |
|
|
|
|
|
|
|
|
|
|
|
current_url = page_obj.url |
|
|
|
current_url = page_obj.url |
|
|
|
# logger.info(f"Current URL: {current_url}") |
|
|
|
# # logger.info(f"Current URL: {current_url}") |
|
|
|
|
|
|
|
|
|
|
|
if "queue-it.net" in current_url.lower(): |
|
|
|
if "queue-it.net" in current_url.lower(): |
|
|
|
# logger.warning("Still in Queue-It") |
|
|
|
# logger.warning("Still in Queue-It") |
|
|
|
@ -404,7 +404,7 @@ def scrape_fft_all_tournaments( |
|
|
|
return None |
|
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
form_build_id = form_input.get_attribute("value") |
|
|
|
form_build_id = form_input.get_attribute("value") |
|
|
|
# logger.info(f"Extracted form_build_id: {form_build_id}") |
|
|
|
# # logger.info(f"Extracted form_build_id: {form_build_id}") |
|
|
|
|
|
|
|
|
|
|
|
# Build parameters - EXACT same pattern as club function |
|
|
|
# Build parameters - EXACT same pattern as club function |
|
|
|
date_component = "" |
|
|
|
date_component = "" |
|
|
|
@ -423,32 +423,32 @@ def scrape_fft_all_tournaments( |
|
|
|
|
|
|
|
|
|
|
|
# Add categories filter |
|
|
|
# Add categories filter |
|
|
|
if categories: |
|
|
|
if categories: |
|
|
|
# logger.info(f"Adding categories filter: {categories}") |
|
|
|
# # logger.info(f"Adding categories filter: {categories}") |
|
|
|
for category in categories: |
|
|
|
for category in categories: |
|
|
|
filter_params += f"&epreuve[{category}]={category}" |
|
|
|
filter_params += f"&epreuve[{category}]={category}" |
|
|
|
|
|
|
|
|
|
|
|
# Add levels filter |
|
|
|
# Add levels filter |
|
|
|
if levels: |
|
|
|
if levels: |
|
|
|
# logger.info(f"Adding levels filter: {levels}") |
|
|
|
# # logger.info(f"Adding levels filter: {levels}") |
|
|
|
for level in levels: |
|
|
|
for level in levels: |
|
|
|
filter_params += f"&categorie_tournoi[{level}]={level}" |
|
|
|
filter_params += f"&categorie_tournoi[{level}]={level}" |
|
|
|
|
|
|
|
|
|
|
|
# Add ages filter |
|
|
|
# Add ages filter |
|
|
|
if ages: |
|
|
|
if ages: |
|
|
|
# logger.info(f"Adding ages filter: {ages}") |
|
|
|
# # logger.info(f"Adding ages filter: {ages}") |
|
|
|
for age in ages: |
|
|
|
for age in ages: |
|
|
|
filter_params += f"&categorie_age[{age}]={age}" |
|
|
|
filter_params += f"&categorie_age[{age}]={age}" |
|
|
|
|
|
|
|
|
|
|
|
# Add types filter |
|
|
|
# Add types filter |
|
|
|
if tournament_types: |
|
|
|
if tournament_types: |
|
|
|
# logger.info(f"Adding types filter: {tournament_types}") |
|
|
|
# # logger.info(f"Adding types filter: {tournament_types}") |
|
|
|
for t_type in tournament_types: |
|
|
|
for t_type in tournament_types: |
|
|
|
capitalized_type = t_type.capitalize() |
|
|
|
capitalized_type = t_type.capitalize() |
|
|
|
filter_params += f"&type[{capitalized_type}]={capitalized_type}" |
|
|
|
filter_params += f"&type[{capitalized_type}]={capitalized_type}" |
|
|
|
|
|
|
|
|
|
|
|
# Add national cup filter |
|
|
|
# Add national cup filter |
|
|
|
if national_cup: |
|
|
|
if national_cup: |
|
|
|
# logger.info("Adding national cup filter") |
|
|
|
# # logger.info("Adding national cup filter") |
|
|
|
filter_params += "&tournoi_npc=1" |
|
|
|
filter_params += "&tournoi_npc=1" |
|
|
|
|
|
|
|
|
|
|
|
# Fix the sorting parameter |
|
|
|
# Fix the sorting parameter |
|
|
|
@ -478,7 +478,7 @@ def scrape_fft_all_tournaments( |
|
|
|
# Default to ligue search if no city provided |
|
|
|
# Default to ligue search if no city provided |
|
|
|
params = f"recherche_type=ligue&pratique=PADEL{date_component}{filter_params}&page={page}&sort={sorting_option or 'dateDebut+asc'}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" |
|
|
|
params = f"recherche_type=ligue&pratique=PADEL{date_component}{filter_params}&page={page}&sort={sorting_option or 'dateDebut+asc'}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" |
|
|
|
|
|
|
|
|
|
|
|
# logger.info(f"AJAX Parameters: {params}") |
|
|
|
# # logger.info(f"AJAX Parameters: {params}") |
|
|
|
|
|
|
|
|
|
|
|
# Make AJAX request and capture the full response - EXACT same as club function |
|
|
|
# Make AJAX request and capture the full response - EXACT same as club function |
|
|
|
ajax_script = f""" |
|
|
|
ajax_script = f""" |
|
|
|
@ -513,23 +513,23 @@ def scrape_fft_all_tournaments( |
|
|
|
}} |
|
|
|
}} |
|
|
|
""" |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
# logger.info("Making AJAX request...") |
|
|
|
# # logger.info("Making AJAX request...") |
|
|
|
result = page_obj.evaluate(ajax_script) |
|
|
|
result = page_obj.evaluate(ajax_script) |
|
|
|
|
|
|
|
|
|
|
|
browser.close() |
|
|
|
browser.close() |
|
|
|
|
|
|
|
|
|
|
|
# Print the full response for debugging - EXACT same as club function |
|
|
|
# Print the full response for debugging - EXACT same as club function |
|
|
|
# logger.info(f"AJAX Response Status: {result.get('status')}") |
|
|
|
# # logger.info(f"AJAX Response Status: {result.get('status')}") |
|
|
|
# logger.info(f"AJAX Response Success: {result.get('success')}") |
|
|
|
# # logger.info(f"AJAX Response Success: {result.get('success')}") |
|
|
|
|
|
|
|
|
|
|
|
if result.get("success"): |
|
|
|
if result.get("success"): |
|
|
|
response_text = result.get("responseText", "") |
|
|
|
response_text = result.get("responseText", "") |
|
|
|
# logger.info(f"Raw Response Length: {len(response_text)}") |
|
|
|
# # logger.info(f"Raw Response Length: {len(response_text)}") |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
try: |
|
|
|
# Try to parse as JSON |
|
|
|
# Try to parse as JSON |
|
|
|
json_data = json.loads(response_text) |
|
|
|
json_data = json.loads(response_text) |
|
|
|
# logger.info(f"JSON Response Type: {type(json_data)}") |
|
|
|
# # logger.info(f"JSON Response Type: {type(json_data)}") |
|
|
|
|
|
|
|
|
|
|
|
# Now try to parse it - EXACT same as club function |
|
|
|
# Now try to parse it - EXACT same as club function |
|
|
|
parsed_result = _parse_ajax_response(json_data) |
|
|
|
parsed_result = _parse_ajax_response(json_data) |
|
|
|
@ -554,7 +554,7 @@ def get_umpire_data(tournament_id): |
|
|
|
""" |
|
|
|
""" |
|
|
|
Umpire data extraction with improved Queue-It handling |
|
|
|
Umpire data extraction with improved Queue-It handling |
|
|
|
""" |
|
|
|
""" |
|
|
|
# logger.info(f"Getting umpire data for tournament {tournament_id}") |
|
|
|
# # logger.info(f"Getting umpire data for tournament {tournament_id}") |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
try: |
|
|
|
with sync_playwright() as p: |
|
|
|
with sync_playwright() as p: |
|
|
|
@ -563,7 +563,7 @@ def get_umpire_data(tournament_id): |
|
|
|
|
|
|
|
|
|
|
|
# Navigate to tournament page |
|
|
|
# Navigate to tournament page |
|
|
|
url = f"https://tenup.fft.fr/tournoi/{tournament_id}" |
|
|
|
url = f"https://tenup.fft.fr/tournoi/{tournament_id}" |
|
|
|
# logger.info(f"Navigating to tournament page: {url}") |
|
|
|
# # logger.info(f"Navigating to tournament page: {url}") |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
try: |
|
|
|
# Navigate with reasonable timeout |
|
|
|
# Navigate with reasonable timeout |
|
|
|
@ -580,7 +580,7 @@ def get_umpire_data(tournament_id): |
|
|
|
check_interval = 1000 # 10 seconds |
|
|
|
check_interval = 1000 # 10 seconds |
|
|
|
elapsed_time = 0 |
|
|
|
elapsed_time = 0 |
|
|
|
|
|
|
|
|
|
|
|
# logger.info( |
|
|
|
# # logger.info( |
|
|
|
# f"Waiting in queue for umpire data (max {max_queue_wait / 1000}s)..." |
|
|
|
# f"Waiting in queue for umpire data (max {max_queue_wait / 1000}s)..." |
|
|
|
# ) |
|
|
|
# ) |
|
|
|
|
|
|
|
|
|
|
|
@ -588,7 +588,7 @@ def get_umpire_data(tournament_id): |
|
|
|
elapsed_time < max_queue_wait |
|
|
|
elapsed_time < max_queue_wait |
|
|
|
and "queue-it.net" in page.url.lower() |
|
|
|
and "queue-it.net" in page.url.lower() |
|
|
|
): |
|
|
|
): |
|
|
|
# logger.info( |
|
|
|
# # logger.info( |
|
|
|
# f"Umpire queue wait: {elapsed_time / 1000}s elapsed" |
|
|
|
# f"Umpire queue wait: {elapsed_time / 1000}s elapsed" |
|
|
|
# ) |
|
|
|
# ) |
|
|
|
page.wait_for_timeout(check_interval) |
|
|
|
page.wait_for_timeout(check_interval) |
|
|
|
@ -596,7 +596,7 @@ def get_umpire_data(tournament_id): |
|
|
|
|
|
|
|
|
|
|
|
# Check if we've been redirected |
|
|
|
# Check if we've been redirected |
|
|
|
if "queue-it.net" not in page.url.lower(): |
|
|
|
if "queue-it.net" not in page.url.lower(): |
|
|
|
# logger.info( |
|
|
|
# # logger.info( |
|
|
|
# "Successfully passed through queue for umpire data!" |
|
|
|
# "Successfully passed through queue for umpire data!" |
|
|
|
# ) |
|
|
|
# ) |
|
|
|
break |
|
|
|
break |
|
|
|
@ -625,7 +625,7 @@ def get_umpire_data(tournament_id): |
|
|
|
browser.close() |
|
|
|
browser.close() |
|
|
|
|
|
|
|
|
|
|
|
if name or email or phone: |
|
|
|
if name or email or phone: |
|
|
|
# logger.info( |
|
|
|
# # logger.info( |
|
|
|
# f"Successfully extracted umpire data: name={name}, email={email}, phone={phone}" |
|
|
|
# f"Successfully extracted umpire data: name={name}, email={email}, phone={phone}" |
|
|
|
# ) |
|
|
|
# ) |
|
|
|
return name, email, phone |
|
|
|
return name, email, phone |
|
|
|
@ -642,7 +642,7 @@ def get_umpire_data(tournament_id): |
|
|
|
browser.close() |
|
|
|
browser.close() |
|
|
|
|
|
|
|
|
|
|
|
# Try requests fallback |
|
|
|
# Try requests fallback |
|
|
|
logger.info("Trying requests fallback after Playwright error") |
|
|
|
# logger.info("Trying requests fallback after Playwright error") |
|
|
|
return _get_umpire_data_requests_fallback(tournament_id) |
|
|
|
return _get_umpire_data_requests_fallback(tournament_id) |
|
|
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
@ -760,7 +760,7 @@ def _get_umpire_data_requests_fallback(tournament_id): |
|
|
|
""" |
|
|
|
""" |
|
|
|
Enhanced fallback method using requests |
|
|
|
Enhanced fallback method using requests |
|
|
|
""" |
|
|
|
""" |
|
|
|
logger.info(f"Using requests fallback for tournament {tournament_id}") |
|
|
|
# logger.info(f"Using requests fallback for tournament {tournament_id}") |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
try: |
|
|
|
headers = { |
|
|
|
headers = { |
|
|
|
@ -816,9 +816,9 @@ def _get_umpire_data_requests_fallback(tournament_id): |
|
|
|
except Exception as soup_error: |
|
|
|
except Exception as soup_error: |
|
|
|
logger.warning(f"BeautifulSoup parsing failed: {soup_error}") |
|
|
|
logger.warning(f"BeautifulSoup parsing failed: {soup_error}") |
|
|
|
|
|
|
|
|
|
|
|
logger.info( |
|
|
|
# logger.info( |
|
|
|
f"Requests fallback result: name={name}, email={email}, phone={phone}" |
|
|
|
# f"Requests fallback result: name={name}, email={email}, phone={phone}" |
|
|
|
) |
|
|
|
# ) |
|
|
|
return name, email, phone |
|
|
|
return name, email, phone |
|
|
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
@ -830,7 +830,7 @@ def _get_umpire_data_requests_fallback(tournament_id): |
|
|
|
""" |
|
|
|
""" |
|
|
|
Fallback method using requests (may hit Queue-It) |
|
|
|
Fallback method using requests (may hit Queue-It) |
|
|
|
""" |
|
|
|
""" |
|
|
|
logger.info(f"Using requests fallback for tournament {tournament_id}") |
|
|
|
# logger.info(f"Using requests fallback for tournament {tournament_id}") |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
try: |
|
|
|
url = f"https://tenup.fft.fr/tournoi/{tournament_id}" |
|
|
|
url = f"https://tenup.fft.fr/tournoi/{tournament_id}" |
|
|
|
@ -862,9 +862,9 @@ def _get_umpire_data_requests_fallback(tournament_id): |
|
|
|
phone_match = re.search(phone_pattern, html_content) |
|
|
|
phone_match = re.search(phone_pattern, html_content) |
|
|
|
phone = phone_match.group(1).strip() if phone_match else None |
|
|
|
phone = phone_match.group(1).strip() if phone_match else None |
|
|
|
|
|
|
|
|
|
|
|
logger.info( |
|
|
|
# logger.info( |
|
|
|
f"Extracted umpire data (requests): name={name}, email={email}, phone={phone}" |
|
|
|
# f"Extracted umpire data (requests): name={name}, email={email}, phone={phone}" |
|
|
|
) |
|
|
|
# ) |
|
|
|
return name, email, phone |
|
|
|
return name, email, phone |
|
|
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
@ -926,7 +926,7 @@ def scrape_fft_all_tournaments_concurrent( |
|
|
|
Scrapes all remaining pages of FFT tournaments concurrently (pages 1 to end) |
|
|
|
Scrapes all remaining pages of FFT tournaments concurrently (pages 1 to end) |
|
|
|
This assumes page 0 was already fetched by the client |
|
|
|
This assumes page 0 was already fetched by the client |
|
|
|
""" |
|
|
|
""" |
|
|
|
logger.info(f"Starting concurrent scraping for remaining tournament pages") |
|
|
|
# logger.info(f"Starting concurrent scraping for remaining tournament pages") |
|
|
|
|
|
|
|
|
|
|
|
# First, get the first page to determine total results and pages |
|
|
|
# First, get the first page to determine total results and pages |
|
|
|
first_page_result = scrape_fft_all_tournaments( |
|
|
|
first_page_result = scrape_fft_all_tournaments( |
|
|
|
@ -953,7 +953,7 @@ def scrape_fft_all_tournaments_concurrent( |
|
|
|
first_page_tournaments = first_page_result.get("tournaments", []) |
|
|
|
first_page_tournaments = first_page_result.get("tournaments", []) |
|
|
|
results_per_page = len(first_page_tournaments) |
|
|
|
results_per_page = len(first_page_tournaments) |
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"Total results: {total_results}, Results per page: {results_per_page}") |
|
|
|
# logger.info(f"Total results: {total_results}, Results per page: {results_per_page}") |
|
|
|
|
|
|
|
|
|
|
|
if total_results == 0: |
|
|
|
if total_results == 0: |
|
|
|
return { |
|
|
|
return { |
|
|
|
@ -969,7 +969,7 @@ def scrape_fft_all_tournaments_concurrent( |
|
|
|
else: |
|
|
|
else: |
|
|
|
total_pages = 1 |
|
|
|
total_pages = 1 |
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"Total pages: {total_pages}") |
|
|
|
# logger.info(f"Total pages: {total_pages}") |
|
|
|
|
|
|
|
|
|
|
|
# If only one page total, return empty since page 0 was already handled |
|
|
|
# If only one page total, return empty since page 0 was already handled |
|
|
|
if total_pages <= 1: |
|
|
|
if total_pages <= 1: |
|
|
|
@ -1012,17 +1012,17 @@ def scrape_fft_all_tournaments_concurrent( |
|
|
|
if result and result.get("tournaments"): |
|
|
|
if result and result.get("tournaments"): |
|
|
|
tournaments = result.get("tournaments", []) |
|
|
|
tournaments = result.get("tournaments", []) |
|
|
|
all_tournaments.extend(tournaments) |
|
|
|
all_tournaments.extend(tournaments) |
|
|
|
logger.info( |
|
|
|
# logger.info( |
|
|
|
f"Page {page} completed: {len(tournaments)} tournaments" |
|
|
|
# f"Page {page} completed: {len(tournaments)} tournaments" |
|
|
|
) |
|
|
|
# ) |
|
|
|
else: |
|
|
|
else: |
|
|
|
logger.warning(f"Page {page} returned no results") |
|
|
|
logger.warning(f"Page {page} returned no results") |
|
|
|
except Exception as e: |
|
|
|
except Exception as e: |
|
|
|
logger.error(f"Error processing page {page}: {e}") |
|
|
|
logger.error(f"Error processing page {page}: {e}") |
|
|
|
|
|
|
|
|
|
|
|
logger.info( |
|
|
|
# logger.info( |
|
|
|
f"Concurrent scraping completed: {len(all_tournaments)} tournaments from {total_pages - 1} remaining pages" |
|
|
|
# f"Concurrent scraping completed: {len(all_tournaments)} tournaments from {total_pages - 1} remaining pages" |
|
|
|
) |
|
|
|
# ) |
|
|
|
|
|
|
|
|
|
|
|
return { |
|
|
|
return { |
|
|
|
"tournaments": all_tournaments, |
|
|
|
"tournaments": all_tournaments, |
|
|
|
@ -1039,7 +1039,7 @@ def _parse_clubs_ajax_response(json_data): |
|
|
|
""" |
|
|
|
""" |
|
|
|
try: |
|
|
|
try: |
|
|
|
# Log the raw response structure to understand what we're getting |
|
|
|
# Log the raw response structure to understand what we're getting |
|
|
|
# logger.info(f"Raw clubs response structure: {json_data}") |
|
|
|
# # logger.info(f"Raw clubs response structure: {json_data}") |
|
|
|
|
|
|
|
|
|
|
|
club_markers = [] |
|
|
|
club_markers = [] |
|
|
|
total_results = 0 |
|
|
|
total_results = 0 |
|
|
|
@ -1116,9 +1116,9 @@ def _parse_clubs_ajax_response(json_data): |
|
|
|
} |
|
|
|
} |
|
|
|
club_markers.append(club_marker) |
|
|
|
club_markers.append(club_marker) |
|
|
|
|
|
|
|
|
|
|
|
logger.info( |
|
|
|
# logger.info( |
|
|
|
f"Successfully parsed {len(club_markers)} club markers from response" |
|
|
|
# f"Successfully parsed {len(club_markers)} club markers from response" |
|
|
|
) |
|
|
|
# ) |
|
|
|
|
|
|
|
|
|
|
|
# Return the response in the format expected by Swift FederalClubResponse |
|
|
|
# Return the response in the format expected by Swift FederalClubResponse |
|
|
|
return { |
|
|
|
return { |
|
|
|
@ -1138,7 +1138,7 @@ def scrape_federal_clubs( |
|
|
|
""" |
|
|
|
""" |
|
|
|
Scrapes FFT federal clubs by extracting data from the HTML response |
|
|
|
Scrapes FFT federal clubs by extracting data from the HTML response |
|
|
|
""" |
|
|
|
""" |
|
|
|
logger.info(f"Starting federal clubs scraping for city: {city}, country: {country}") |
|
|
|
# logger.info(f"Starting federal clubs scraping for city: {city}, country: {country}") |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
try: |
|
|
|
with sync_playwright() as p: |
|
|
|
with sync_playwright() as p: |
|
|
|
@ -1157,13 +1157,13 @@ def scrape_federal_clubs( |
|
|
|
clean_city = re.sub(r"[,\s]*\d{5}.*$", "", city).strip() |
|
|
|
clean_city = re.sub(r"[,\s]*\d{5}.*$", "", city).strip() |
|
|
|
clean_city = clean_city.rstrip(",").strip() |
|
|
|
clean_city = clean_city.rstrip(",").strip() |
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"Cleaned city name: '{city}' -> '{clean_city}'") |
|
|
|
# logger.info(f"Cleaned city name: '{city}' -> '{clean_city}'") |
|
|
|
|
|
|
|
|
|
|
|
# Build the results URL directly |
|
|
|
# Build the results URL directly |
|
|
|
params = f"ville={clean_city}&pratique=PADEL&distance={int(radius)}&country={country or 'fr'}" |
|
|
|
params = f"ville={clean_city}&pratique=PADEL&distance={int(radius)}&country={country or 'fr'}" |
|
|
|
results_url = f"https://tenup.fft.fr/recherche/clubs/resultats?{params}" |
|
|
|
results_url = f"https://tenup.fft.fr/recherche/clubs/resultats?{params}" |
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"Requesting results URL: {results_url}") |
|
|
|
# logger.info(f"Requesting results URL: {results_url}") |
|
|
|
|
|
|
|
|
|
|
|
# Navigate to the results page |
|
|
|
# Navigate to the results page |
|
|
|
page_obj.goto(results_url) |
|
|
|
page_obj.goto(results_url) |
|
|
|
@ -1222,7 +1222,7 @@ def scrape_federal_clubs( |
|
|
|
total = result.get("total", 0) |
|
|
|
total = result.get("total", 0) |
|
|
|
resultat = result.get("resultat", []) |
|
|
|
resultat = result.get("resultat", []) |
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"Successfully extracted {total} clubs") |
|
|
|
# logger.info(f"Successfully extracted {total} clubs") |
|
|
|
|
|
|
|
|
|
|
|
# Convert resultat to club_markers format |
|
|
|
# Convert resultat to club_markers format |
|
|
|
club_markers = [] |
|
|
|
club_markers = [] |
|
|
|
@ -1270,7 +1270,7 @@ def get_browser_for_environment(playwright_instance): |
|
|
|
is_development = getattr(settings, "DEBUG", False) |
|
|
|
is_development = getattr(settings, "DEBUG", False) |
|
|
|
|
|
|
|
|
|
|
|
if is_development: |
|
|
|
if is_development: |
|
|
|
# logger.info("Development environment detected - using Firefox") |
|
|
|
# # logger.info("Development environment detected - using Firefox") |
|
|
|
try: |
|
|
|
try: |
|
|
|
return playwright_instance.firefox.launch( |
|
|
|
return playwright_instance.firefox.launch( |
|
|
|
headless=True, args=["--no-sandbox"] |
|
|
|
headless=True, args=["--no-sandbox"] |
|
|
|
@ -1284,7 +1284,7 @@ def get_browser_for_environment(playwright_instance): |
|
|
|
args=["--no-sandbox", "--disable-dev-shm-usage", "--single-process"], |
|
|
|
args=["--no-sandbox", "--disable-dev-shm-usage", "--single-process"], |
|
|
|
) |
|
|
|
) |
|
|
|
else: |
|
|
|
else: |
|
|
|
logger.info("Production environment detected - using Chromium") |
|
|
|
# logger.info("Production environment detected - using Chromium") |
|
|
|
return playwright_instance.chromium.launch( |
|
|
|
return playwright_instance.chromium.launch( |
|
|
|
headless=True, args=["--no-sandbox", "--disable-dev-shm-usage"] |
|
|
|
headless=True, args=["--no-sandbox", "--disable-dev-shm-usage"] |
|
|
|
) |
|
|
|
) |
|
|
|
|