commit
285292ac55
@ -1,13 +1,557 @@ |
|||||||
|
import time |
||||||
|
import logging |
||||||
|
import requests |
||||||
|
import re |
||||||
|
from playwright.sync_api import sync_playwright |
||||||
|
from datetime import datetime, timedelta |
||||||
|
import json |
||||||
|
import traceback |
||||||
|
|
||||||
|
logger = logging.getLogger(__name__) |
||||||
|
|
||||||
def check_version_smaller_than_1_1_12(version_str): |
def check_version_smaller_than_1_1_12(version_str): |
||||||
# Remove the parentheses part if it exists, example of version: 1.1.12 (2) |
# Remove the parentheses part if it exists, example of version: 1.1.12 (2) |
||||||
version_str = version_str.split()[0] |
version_str = version_str.split()[0] |
||||||
if version_str: |
if version_str: |
||||||
|
|
||||||
# Split version into components |
# Split version into components |
||||||
version_parts = [int(x) for x in version_str.split('.')] |
version_parts = [int(x) for x in version_str.split('.')] |
||||||
target_parts = [1, 1, 12] |
target_parts = [1, 1, 12] |
||||||
|
|
||||||
# Compare version components |
# Compare version components |
||||||
return version_parts < target_parts |
return version_parts < target_parts |
||||||
else: |
else: |
||||||
return False |
return False |
||||||
|
|
||||||
|
def scrape_fft_club_tournaments(club_code, club_name, start_date=None, end_date=None, page=0): |
||||||
|
""" |
||||||
|
Scrapes FFT tournaments using Playwright with detailed debugging |
||||||
|
""" |
||||||
|
logger.info(f"Starting Playwright scraping for {club_name}") |
||||||
|
try: |
||||||
|
with sync_playwright() as p: |
||||||
|
browser = p.chromium.launch(headless=True) |
||||||
|
page_obj = browser.new_page() |
||||||
|
|
||||||
|
page_obj.set_extra_http_headers({ |
||||||
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15" |
||||||
|
}) |
||||||
|
|
||||||
|
# Navigate to FFT |
||||||
|
target_url = "https://tenup.fft.fr/recherche/tournois" |
||||||
|
logger.info(f"Navigating to: {target_url}") |
||||||
|
|
||||||
|
page_obj.goto(target_url) |
||||||
|
# page_obj.wait_for_timeout(7000) |
||||||
|
|
||||||
|
current_url = page_obj.url |
||||||
|
logger.info(f"Current URL: {current_url}") |
||||||
|
|
||||||
|
if "queue-it.net" in current_url.lower(): |
||||||
|
logger.warning("Still in Queue-It") |
||||||
|
browser.close() |
||||||
|
return None |
||||||
|
|
||||||
|
# Extract form_build_id |
||||||
|
form_input = page_obj.query_selector('input[name="form_build_id"]') |
||||||
|
if not form_input: |
||||||
|
logger.error("Could not find form_build_id") |
||||||
|
browser.close() |
||||||
|
return None |
||||||
|
|
||||||
|
form_build_id = form_input.get_attribute('value') |
||||||
|
logger.info(f"Extracted form_build_id: {form_build_id}") |
||||||
|
|
||||||
|
# Build parameters |
||||||
|
date_component = "" |
||||||
|
if start_date and end_date: |
||||||
|
date_component = f"&date[start]={start_date}&date[end]={end_date}" |
||||||
|
elif start_date: |
||||||
|
try: |
||||||
|
start_dt = datetime.strptime(start_date, "%d/%m/%y") |
||||||
|
end_dt = start_dt + timedelta(days=90) |
||||||
|
date_component = f"&date[start]={start_date}&date[end]={end_dt.strftime('%d/%m/%y')}" |
||||||
|
except ValueError: |
||||||
|
logger.warning(f"Invalid date format: {start_date}") |
||||||
|
|
||||||
|
club_name_encoded = club_name.replace(" ", "+") |
||||||
|
club_code_clean = club_code.replace(" ", "") |
||||||
|
|
||||||
|
params = f"recherche_type=club&club[autocomplete][value_container][value_field]={club_code_clean}&club[autocomplete][value_container][label_field]={club_name_encoded}&pratique=PADEL{date_component}&page={page}&sort=dateDebut+asc&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" |
||||||
|
|
||||||
|
logger.info(f"AJAX Parameters: {params}") |
||||||
|
|
||||||
|
# Make AJAX request and capture the full response |
||||||
|
ajax_script = f""" |
||||||
|
async () => {{ |
||||||
|
try {{ |
||||||
|
const response = await fetch('https://tenup.fft.fr/system/ajax', {{ |
||||||
|
method: 'POST', |
||||||
|
headers: {{ |
||||||
|
'Accept': 'application/json, text/javascript, */*; q=0.01', |
||||||
|
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', |
||||||
|
'X-Requested-With': 'XMLHttpRequest', |
||||||
|
'Origin': 'https://tenup.fft.fr', |
||||||
|
'Referer': 'https://tenup.fft.fr/recherche/tournois' |
||||||
|
}}, |
||||||
|
body: `{params}` |
||||||
|
}}); |
||||||
|
|
||||||
|
const status = response.status; |
||||||
|
const responseText = await response.text(); |
||||||
|
|
||||||
|
return {{ |
||||||
|
success: response.ok, |
||||||
|
status: status, |
||||||
|
responseText: responseText |
||||||
|
}}; |
||||||
|
}} catch (error) {{ |
||||||
|
return {{ |
||||||
|
success: false, |
||||||
|
error: error.message |
||||||
|
}}; |
||||||
|
}} |
||||||
|
}} |
||||||
|
""" |
||||||
|
|
||||||
|
logger.info("Making AJAX request...") |
||||||
|
result = page_obj.evaluate(ajax_script) |
||||||
|
|
||||||
|
browser.close() |
||||||
|
|
||||||
|
# Print the full response for debugging |
||||||
|
logger.info(f"AJAX Response Status: {result.get('status')}") |
||||||
|
logger.info(f"AJAX Response Success: {result.get('success')}") |
||||||
|
|
||||||
|
if result.get('success'): |
||||||
|
response_text = result.get('responseText', '') |
||||||
|
logger.info(f"Raw Response Length: {len(response_text)}") |
||||||
|
# logger.info(f"Raw Response (first 500 chars): {response_text[:500]}") |
||||||
|
|
||||||
|
try: |
||||||
|
# Try to parse as JSON |
||||||
|
json_data = json.loads(response_text) |
||||||
|
logger.info(f"JSON Response Type: {type(json_data)}") |
||||||
|
# logger.info(f"JSON Response: {json.dumps(json_data, indent=2, default=str)}") |
||||||
|
|
||||||
|
# Now try to parse it |
||||||
|
parsed_result = _parse_ajax_response(json_data) |
||||||
|
# logger.info(f"Parsed Result: {parsed_result}") |
||||||
|
|
||||||
|
return parsed_result |
||||||
|
|
||||||
|
except json.JSONDecodeError as json_error: |
||||||
|
logger.error(f"JSON Parse Error: {json_error}") |
||||||
|
logger.error(f"Response text: {response_text}") |
||||||
|
return None |
||||||
|
else: |
||||||
|
logger.error(f"AJAX request failed: {result.get('error')}") |
||||||
|
return None |
||||||
|
|
||||||
|
except Exception as e: |
||||||
|
logger.error(f"Error in Playwright scraping: {e}") |
||||||
|
logger.error(f"Traceback: {traceback.format_exc()}") |
||||||
|
return None |
||||||
|
|
||||||
|
def scrape_fft_club_tournaments_all_pages(club_code, club_name, start_date=None, end_date=None): |
||||||
|
""" |
||||||
|
Scrapes all pages of FFT tournaments for a specific club |
||||||
|
""" |
||||||
|
logger.info(f"Starting complete tournament scraping for {club_name}") |
||||||
|
|
||||||
|
all_tournaments = [] |
||||||
|
page = 0 |
||||||
|
|
||||||
|
while True: |
||||||
|
try: |
||||||
|
# Call the working single-page function |
||||||
|
result = scrape_fft_club_tournaments( |
||||||
|
club_code=club_code, |
||||||
|
club_name=club_name, |
||||||
|
start_date=start_date, |
||||||
|
end_date=end_date, |
||||||
|
page=page |
||||||
|
) |
||||||
|
|
||||||
|
# Debug: Log what we got |
||||||
|
logger.info(f"Page {page} result: {result}") |
||||||
|
|
||||||
|
if not result: |
||||||
|
logger.warning(f"No result for page {page}") |
||||||
|
break |
||||||
|
|
||||||
|
tournaments = result.get('tournaments', []) |
||||||
|
logger.info(f"Page {page} returned {len(tournaments)} tournaments") |
||||||
|
|
||||||
|
if not tournaments: |
||||||
|
logger.info(f"No tournaments on page {page}, stopping") |
||||||
|
break |
||||||
|
|
||||||
|
all_tournaments.extend(tournaments) |
||||||
|
|
||||||
|
# Check if we have all results |
||||||
|
total_results = result.get('total_results', 0) |
||||||
|
logger.info(f"Total so far: {len(all_tournaments)}, Target: {total_results}") |
||||||
|
|
||||||
|
if len(all_tournaments) >= total_results: |
||||||
|
logger.info("Got all tournaments, stopping") |
||||||
|
break |
||||||
|
|
||||||
|
page += 1 |
||||||
|
logger.info(f"Moving to page {page}") |
||||||
|
# time.sleep(1) # Rate limiting |
||||||
|
|
||||||
|
except Exception as e: |
||||||
|
logger.error(f"Error on page {page}: {e}") |
||||||
|
break |
||||||
|
|
||||||
|
logger.info(f"Completed scraping: {len(all_tournaments)} tournaments across {page + 1} pages") |
||||||
|
|
||||||
|
return { |
||||||
|
'tournaments': all_tournaments, |
||||||
|
'total_results': len(all_tournaments), |
||||||
|
'current_count': len(all_tournaments), |
||||||
|
'pages_scraped': page + 1 |
||||||
|
} |
||||||
|
|
||||||
|
def get_umpire_data(tournament_id): |
||||||
|
""" |
||||||
|
Scrapes umpire data for a specific tournament |
||||||
|
""" |
||||||
|
logger.info(f"Getting umpire data for tournament {tournament_id}") |
||||||
|
|
||||||
|
try: |
||||||
|
url = f"https://tenup.fft.fr/tournoi/{tournament_id}" |
||||||
|
|
||||||
|
headers = { |
||||||
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15' |
||||||
|
} |
||||||
|
|
||||||
|
response = requests.get(url, headers=headers, timeout=30) |
||||||
|
|
||||||
|
if response.status_code != 200: |
||||||
|
logger.error(f"Failed to fetch tournament page: {response.status_code}") |
||||||
|
return None, None, None |
||||||
|
|
||||||
|
html_content = response.text |
||||||
|
|
||||||
|
# Extract name |
||||||
|
name_pattern = r'tournoi-detail-page-inscription-responsable-title">\s*([^<]+)\s*<' |
||||||
|
name_match = re.search(name_pattern, html_content) |
||||||
|
name = name_match.group(1).strip() if name_match else None |
||||||
|
|
||||||
|
# Extract email |
||||||
|
email_pattern = r'mailto:([^"]+)"' |
||||||
|
email_match = re.search(email_pattern, html_content) |
||||||
|
email = email_match.group(1) if email_match else None |
||||||
|
|
||||||
|
# Extract phone |
||||||
|
phone_pattern = r'<div class="details-bloc">\s*(\d{2}\s+\d{2}\s+\d{2}\s+\d{2}\s+\d{2})\s*</div>' |
||||||
|
phone_match = re.search(phone_pattern, html_content) |
||||||
|
phone = phone_match.group(1).strip() if phone_match else None |
||||||
|
|
||||||
|
logger.info(f"Extracted umpire data: name={name}, email={email}, phone={phone}") |
||||||
|
return name, email, phone |
||||||
|
|
||||||
|
except Exception as e: |
||||||
|
logger.error(f"Error getting umpire data: {e}") |
||||||
|
return None, None, None |
||||||
|
|
||||||
|
def _parse_ajax_response(commands): |
||||||
|
""" |
||||||
|
Parse the AJAX response commands to extract tournament data |
||||||
|
Returns data in the exact format expected by Swift FederalTournament struct |
||||||
|
""" |
||||||
|
tournaments = [] |
||||||
|
|
||||||
|
try: |
||||||
|
# Check for alert commands (maintenance mode) |
||||||
|
for command in commands: |
||||||
|
if command.get('command') == 'alert': |
||||||
|
logger.warning("Maintenance mode detected") |
||||||
|
return None |
||||||
|
|
||||||
|
# Find the command with results |
||||||
|
result_command = None |
||||||
|
for command in commands: |
||||||
|
if command.get('command') == 'recherche_tournois_update': |
||||||
|
result_command = command |
||||||
|
logger.info("Found recherche_tournois_update command!") |
||||||
|
break |
||||||
|
|
||||||
|
if result_command and result_command.get('results'): |
||||||
|
results = result_command['results'] |
||||||
|
items = results.get('items', []) |
||||||
|
total_results = results.get('nb_results', 0) |
||||||
|
|
||||||
|
logger.info(f"Processing {len(items)} tournaments from results") |
||||||
|
|
||||||
|
for item in items: |
||||||
|
# Parse dates - they're already in the correct format |
||||||
|
date_debut = item.get('dateDebut') |
||||||
|
date_fin = item.get('dateFin') |
||||||
|
date_validation = item.get('dateValidation') |
||||||
|
|
||||||
|
# Build the tournament object to match Swift FederalTournament structure |
||||||
|
tournament = { |
||||||
|
"id": str(item.get('id', '')), |
||||||
|
"millesime": item.get('millesime'), |
||||||
|
"libelle": item.get('libelle'), |
||||||
|
"tmc": item.get('tmc'), |
||||||
|
"tarifAdulteChampionnat": item.get('tarifAdulteChampionnat'), |
||||||
|
"type": item.get('type'), |
||||||
|
"ageReel": item.get('ageReel'), |
||||||
|
"naturesTerrains": item.get('naturesTerrains', []), |
||||||
|
"idsArbitres": item.get('idsArbitres', []), |
||||||
|
"tarifJeuneChampionnat": item.get('tarifJeuneChampionnat'), |
||||||
|
"international": item.get('international'), |
||||||
|
"inscriptionEnLigne": item.get('inscriptionEnLigne'), |
||||||
|
"categorieTournoi": item.get('categorieTournoi'), |
||||||
|
"prixLot": item.get('prixLot'), |
||||||
|
"paiementEnLigne": item.get('paiementEnLigne'), |
||||||
|
"reductionAdherentJeune": item.get('reductionAdherentJeune'), |
||||||
|
"reductionAdherentAdulte": item.get('reductionAdherentAdulte'), |
||||||
|
"paiementEnLigneObligatoire": item.get('paiementEnLigneObligatoire'), |
||||||
|
"villeEngagement": item.get('villeEngagement'), |
||||||
|
"senior": item.get('senior'), |
||||||
|
"veteran": item.get('veteran'), |
||||||
|
"inscriptionEnLigneEnCours": item.get('inscriptionEnLigneEnCours'), |
||||||
|
"avecResultatPublie": item.get('avecResultatPublie'), |
||||||
|
"code": item.get('code'), |
||||||
|
"categorieAge": item.get('categorieAge'), |
||||||
|
"codeComite": item.get('codeComite'), |
||||||
|
"installations": item.get('installations', []), |
||||||
|
"reductionEpreuveSupplementaireJeune": item.get('reductionEpreuveSupplementaireJeune'), |
||||||
|
"reductionEpreuveSupplementaireAdulte": item.get('reductionEpreuveSupplementaireAdulte'), |
||||||
|
"nomComite": item.get('nomComite'), |
||||||
|
"naturesEpreuves": item.get('naturesEpreuves'), |
||||||
|
"jeune": item.get('jeune'), |
||||||
|
"courrielEngagement": item.get('courrielEngagement'), |
||||||
|
"nomClub": item.get('nomClub'), |
||||||
|
"installation": item.get('installation'), |
||||||
|
"categorieAgeMax": item.get('categorieAgeMax'), |
||||||
|
"tournoiInterne": item.get('tournoiInterne'), |
||||||
|
"nomLigue": item.get('nomLigue'), |
||||||
|
"nomEngagement": item.get('nomEngagement'), |
||||||
|
"codeLigue": item.get('codeLigue'), |
||||||
|
"modeleDeBalle": item.get('modeleDeBalle'), |
||||||
|
"jugeArbitre": item.get('jugeArbitre'), |
||||||
|
"adresse2Engagement": item.get('adresse2Engagement'), |
||||||
|
"epreuves": item.get('epreuves'), |
||||||
|
"dateDebut": date_debut, |
||||||
|
"serie": item.get('serie'), |
||||||
|
"dateFin": date_fin, |
||||||
|
"dateValidation": date_validation, |
||||||
|
"codePostalEngagement": item.get('codePostalEngagement'), |
||||||
|
"codeClub": item.get('codeClub'), |
||||||
|
"prixEspece": item.get('prixEspece'), |
||||||
|
"japPhoneNumber": None, # Will be populated by separate umpire call |
||||||
|
|
||||||
|
# Additional fields from the response |
||||||
|
"adresse1Engagement": item.get('adresse1Engagement'), |
||||||
|
"originalId": item.get('originalId'), |
||||||
|
"familleTournoi": item.get('familleTournoi', []), |
||||||
|
"isTournoi": item.get('isTournoi'), |
||||||
|
"natureWithCatAge": item.get('natureWithCatAge') |
||||||
|
} |
||||||
|
|
||||||
|
tournaments.append(tournament) |
||||||
|
|
||||||
|
logger.info(f"Successfully parsed {len(tournaments)} tournaments from response") |
||||||
|
return { |
||||||
|
'tournaments': tournaments, |
||||||
|
'total_results': total_results, |
||||||
|
'current_count': len(tournaments) |
||||||
|
} |
||||||
|
else: |
||||||
|
logger.error("No recherche_tournois_update command found in AJAX response") |
||||||
|
return {'tournaments': [], 'total_results': 0, 'current_count': 0} |
||||||
|
|
||||||
|
except Exception as e: |
||||||
|
logger.error(f"Error parsing AJAX response: {e}") |
||||||
|
return None |
||||||
|
|
||||||
|
def scrape_fft_all_tournaments(sorting_option=None, page=0, start_date=None, end_date=None, |
||||||
|
city='', distance=15, categories=None, levels=None, |
||||||
|
lat=None, lng=None, ages=None, tournament_types=None, |
||||||
|
national_cup=False): |
||||||
|
""" |
||||||
|
Scrapes FFT tournaments using Playwright with detailed debugging |
||||||
|
Based exactly on the working scrape_fft_club_tournaments function |
||||||
|
""" |
||||||
|
logger.info(f"Starting Playwright scraping for city: {city}") |
||||||
|
|
||||||
|
try: |
||||||
|
with sync_playwright() as p: |
||||||
|
browser = p.chromium.launch(headless=True) |
||||||
|
page_obj = browser.new_page() |
||||||
|
|
||||||
|
page_obj.set_extra_http_headers({ |
||||||
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15" |
||||||
|
}) |
||||||
|
|
||||||
|
# Navigate to FFT |
||||||
|
target_url = "https://tenup.fft.fr/recherche/tournois" |
||||||
|
logger.info(f"Navigating to: {target_url}") |
||||||
|
|
||||||
|
page_obj.goto(target_url) |
||||||
|
# page_obj.wait_for_timeout(7000) |
||||||
|
|
||||||
|
current_url = page_obj.url |
||||||
|
logger.info(f"Current URL: {current_url}") |
||||||
|
|
||||||
|
if "queue-it.net" in current_url.lower(): |
||||||
|
logger.warning("Still in Queue-It") |
||||||
|
browser.close() |
||||||
|
return None |
||||||
|
|
||||||
|
# Extract form_build_id |
||||||
|
form_input = page_obj.query_selector('input[name="form_build_id"]') |
||||||
|
if not form_input: |
||||||
|
logger.error("Could not find form_build_id") |
||||||
|
browser.close() |
||||||
|
return None |
||||||
|
|
||||||
|
form_build_id = form_input.get_attribute('value') |
||||||
|
logger.info(f"Extracted form_build_id: {form_build_id}") |
||||||
|
|
||||||
|
# Build parameters - EXACT same pattern as club function |
||||||
|
date_component = "" |
||||||
|
if start_date and end_date: |
||||||
|
date_component = f"&date[start]={start_date}&date[end]={end_date}" |
||||||
|
elif start_date: |
||||||
|
try: |
||||||
|
start_dt = datetime.strptime(start_date, "%d/%m/%y") |
||||||
|
end_dt = start_dt + timedelta(days=90) |
||||||
|
date_component = f"&date[start]={start_date}&date[end]={end_dt.strftime('%d/%m/%y')}" |
||||||
|
except ValueError: |
||||||
|
logger.warning(f"Invalid date format: {start_date}") |
||||||
|
|
||||||
|
# Build filter parameters |
||||||
|
filter_params = "" |
||||||
|
|
||||||
|
# Add categories filter |
||||||
|
if categories: |
||||||
|
logger.info(f"Adding categories filter: {categories}") |
||||||
|
for category in categories: |
||||||
|
filter_params += f"&epreuve[{category}]={category}" |
||||||
|
|
||||||
|
# Add levels filter |
||||||
|
if levels: |
||||||
|
logger.info(f"Adding levels filter: {levels}") |
||||||
|
for level in levels: |
||||||
|
filter_params += f"&categorie_tournoi[{level}]={level}" |
||||||
|
|
||||||
|
# Add ages filter |
||||||
|
if ages: |
||||||
|
logger.info(f"Adding ages filter: {ages}") |
||||||
|
for age in ages: |
||||||
|
filter_params += f"&categorie_age[{age}]={age}" |
||||||
|
|
||||||
|
# Add types filter |
||||||
|
if tournament_types: |
||||||
|
logger.info(f"Adding types filter: {tournament_types}") |
||||||
|
for t_type in tournament_types: |
||||||
|
capitalized_type = t_type.capitalize() |
||||||
|
filter_params += f"&type[{capitalized_type}]={capitalized_type}" |
||||||
|
|
||||||
|
# Add national cup filter |
||||||
|
if national_cup: |
||||||
|
logger.info("Adding national cup filter") |
||||||
|
filter_params += "&tournoi_npc=1" |
||||||
|
|
||||||
|
# Fix the sorting parameter |
||||||
|
if sorting_option: |
||||||
|
sort_param = f"&sort={sorting_option}" |
||||||
|
else: |
||||||
|
sort_param = "&sort=dateDebut+asc" |
||||||
|
|
||||||
|
# Build city parameters with distance and location |
||||||
|
if city and city.strip(): |
||||||
|
city_name_encoded = city.strip().replace(" ", "+") |
||||||
|
|
||||||
|
# Start with the working base parameters |
||||||
|
base_params = f"recherche_type=ville&ville[autocomplete][value_container][value_field]={city_name_encoded}&ville[autocomplete][value_container][label_field]={city_name_encoded}" |
||||||
|
|
||||||
|
# Add distance parameter |
||||||
|
distance_param = f"&ville[distance][value_field]={int(distance)}" |
||||||
|
|
||||||
|
# Add lat/lng if provided |
||||||
|
location_params = "" |
||||||
|
if lat and lng: |
||||||
|
location_params = f"&ville[autocomplete][value_container][lat_field]={lat}&ville[autocomplete][value_container][lng_field]={lng}" |
||||||
|
|
||||||
|
# Combine all parameters including filters |
||||||
|
params = f"{base_params}{location_params}{distance_param}&pratique=PADEL{date_component}&page={page}{sort_param}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" |
||||||
|
else: |
||||||
|
# Default to ligue search if no city provided |
||||||
|
params = f"recherche_type=ligue&pratique=PADEL{date_component}{filter_params}&page={page}&sort={sorting_option or 'dateDebut+asc'}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page" |
||||||
|
|
||||||
|
logger.info(f"AJAX Parameters: {params}") |
||||||
|
|
||||||
|
# Make AJAX request and capture the full response - EXACT same as club function |
||||||
|
ajax_script = f""" |
||||||
|
async () => {{ |
||||||
|
try {{ |
||||||
|
const response = await fetch('https://tenup.fft.fr/system/ajax', {{ |
||||||
|
method: 'POST', |
||||||
|
headers: {{ |
||||||
|
'Accept': 'application/json, text/javascript, */*; q=0.01', |
||||||
|
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', |
||||||
|
'X-Requested-With': 'XMLHttpRequest', |
||||||
|
'Origin': 'https://tenup.fft.fr', |
||||||
|
'Referer': 'https://tenup.fft.fr/recherche/tournois' |
||||||
|
}}, |
||||||
|
body: `{params}` |
||||||
|
}}); |
||||||
|
|
||||||
|
const status = response.status; |
||||||
|
const responseText = await response.text(); |
||||||
|
|
||||||
|
return {{ |
||||||
|
success: response.ok, |
||||||
|
status: status, |
||||||
|
responseText: responseText |
||||||
|
}}; |
||||||
|
}} catch (error) {{ |
||||||
|
return {{ |
||||||
|
success: false, |
||||||
|
error: error.message |
||||||
|
}}; |
||||||
|
}} |
||||||
|
}} |
||||||
|
""" |
||||||
|
|
||||||
|
logger.info("Making AJAX request...") |
||||||
|
result = page_obj.evaluate(ajax_script) |
||||||
|
|
||||||
|
browser.close() |
||||||
|
|
||||||
|
# Print the full response for debugging - EXACT same as club function |
||||||
|
logger.info(f"AJAX Response Status: {result.get('status')}") |
||||||
|
logger.info(f"AJAX Response Success: {result.get('success')}") |
||||||
|
|
||||||
|
if result.get('success'): |
||||||
|
response_text = result.get('responseText', '') |
||||||
|
logger.info(f"Raw Response Length: {len(response_text)}") |
||||||
|
|
||||||
|
try: |
||||||
|
# Try to parse as JSON |
||||||
|
json_data = json.loads(response_text) |
||||||
|
logger.info(f"JSON Response Type: {type(json_data)}") |
||||||
|
|
||||||
|
# Now try to parse it - EXACT same as club function |
||||||
|
parsed_result = _parse_ajax_response(json_data) |
||||||
|
|
||||||
|
return parsed_result |
||||||
|
|
||||||
|
except json.JSONDecodeError as json_error: |
||||||
|
logger.error(f"JSON Parse Error: {json_error}") |
||||||
|
logger.error(f"Response text: {response_text}") |
||||||
|
return None |
||||||
|
else: |
||||||
|
logger.error(f"AJAX request failed: {result.get('error')}") |
||||||
|
return None |
||||||
|
|
||||||
|
except Exception as e: |
||||||
|
logger.error(f"Error in Playwright scraping: {e}") |
||||||
|
logger.error(f"Traceback: {traceback.format_exc()}") |
||||||
|
return None |
||||||
|
|||||||
@ -0,0 +1,222 @@ |
|||||||
|
from django.core.management.base import BaseCommand |
||||||
|
from datetime import datetime, timedelta |
||||||
|
import logging |
||||||
|
|
||||||
|
class Command(BaseCommand): |
||||||
|
help = 'Test FFT all tournaments scraping with various filters' |
||||||
|
|
||||||
|
def add_arguments(self, parser): |
||||||
|
parser.add_argument( |
||||||
|
'--sorting', |
||||||
|
type=str, |
||||||
|
default='dateDebut+asc', |
||||||
|
choices=['dateDebut+asc', 'dateDebut+desc', '_DISTANCE_'], |
||||||
|
help='Sorting option (default: dateDebut+asc)' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--page', |
||||||
|
type=int, |
||||||
|
default=0, |
||||||
|
help='Page number to scrape (default: 0)' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--city', |
||||||
|
type=str, |
||||||
|
default='', |
||||||
|
help='City to search around' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--distance', |
||||||
|
type=float, |
||||||
|
default=15.0, |
||||||
|
help='Distance in km (default: 15)' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--categories', |
||||||
|
nargs='*', |
||||||
|
default=[], |
||||||
|
help='Tournament categories to filter by' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--levels', |
||||||
|
nargs='*', |
||||||
|
default=[], |
||||||
|
help='Tournament levels to filter by' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--ages', |
||||||
|
nargs='*', |
||||||
|
default=[], |
||||||
|
help='Age categories to filter by' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--types', |
||||||
|
nargs='*', |
||||||
|
default=[], |
||||||
|
help='Tournament types to filter by' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--national-cup', |
||||||
|
action='store_true', |
||||||
|
help='Filter for national cup tournaments only' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--lat', |
||||||
|
type=float, |
||||||
|
help='Latitude for location-based search' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--lng', |
||||||
|
type=float, |
||||||
|
help='Longitude for location-based search' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--days-ahead', |
||||||
|
type=int, |
||||||
|
default=90, |
||||||
|
help='How many days ahead to search (default: 90)' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--start-date', |
||||||
|
type=str, |
||||||
|
help='Start date in DD/MM/YY format (overrides --days-ahead)' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--end-date', |
||||||
|
type=str, |
||||||
|
help='End date in DD/MM/YY format (overrides --days-ahead)' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--verbose', |
||||||
|
action='store_true', |
||||||
|
help='Enable verbose logging' |
||||||
|
) |
||||||
|
|
||||||
|
def handle(self, *args, **options): |
||||||
|
if options['verbose']: |
||||||
|
logging.basicConfig(level=logging.INFO) |
||||||
|
|
||||||
|
# Extract options |
||||||
|
sorting_option = options['sorting'] |
||||||
|
page = options['page'] |
||||||
|
city = options['city'] |
||||||
|
distance = options['distance'] |
||||||
|
categories = options['categories'] |
||||||
|
levels = options['levels'] |
||||||
|
ages = options['ages'] |
||||||
|
tournament_types = options['types'] |
||||||
|
national_cup = options['national_cup'] |
||||||
|
lat = options['lat'] |
||||||
|
lng = options['lng'] |
||||||
|
verbose = options['verbose'] |
||||||
|
|
||||||
|
# Calculate date range |
||||||
|
if options['start_date'] and options['end_date']: |
||||||
|
start_date_str = options['start_date'] |
||||||
|
end_date_str = options['end_date'] |
||||||
|
else: |
||||||
|
start_date = datetime.now() |
||||||
|
end_date = start_date + timedelta(days=options['days_ahead']) |
||||||
|
start_date_str = start_date.strftime('%d/%m/%y') |
||||||
|
end_date_str = end_date.strftime('%d/%m/%y') |
||||||
|
|
||||||
|
self.stdout.write(self.style.SUCCESS("=== FFT All Tournaments Scraper ===")) |
||||||
|
self.stdout.write(f"Sorting: {sorting_option}") |
||||||
|
self.stdout.write(f"Page: {page}") |
||||||
|
self.stdout.write(f"Date range: {start_date_str} to {end_date_str}") |
||||||
|
self.stdout.write(f"City: {city if city else 'Not specified'}") |
||||||
|
self.stdout.write(f"Distance: {distance} km") |
||||||
|
self.stdout.write(f"Categories: {categories if categories else 'All'}") |
||||||
|
self.stdout.write(f"Levels: {levels if levels else 'All'}") |
||||||
|
self.stdout.write(f"Ages: {ages if ages else 'All'}") |
||||||
|
self.stdout.write(f"Types: {tournament_types if tournament_types else 'All'}") |
||||||
|
self.stdout.write(f"National Cup: {'Yes' if national_cup else 'No'}") |
||||||
|
if lat and lng: |
||||||
|
self.stdout.write(f"Location: {lat}, {lng}") |
||||||
|
self.stdout.write(f"Method: Playwright (Chrome-free)") |
||||||
|
self.stdout.write("") |
||||||
|
|
||||||
|
try: |
||||||
|
from api.utils import scrape_fft_all_tournaments |
||||||
|
self.stdout.write("🚀 Testing general tournament scraping...") |
||||||
|
|
||||||
|
result = scrape_fft_all_tournaments( |
||||||
|
sorting_option=sorting_option, |
||||||
|
page=page, |
||||||
|
start_date=start_date_str, |
||||||
|
end_date=end_date_str, |
||||||
|
city=city, |
||||||
|
distance=distance, |
||||||
|
categories=categories, |
||||||
|
levels=levels, |
||||||
|
lat=lat, |
||||||
|
lng=lng, |
||||||
|
ages=ages, |
||||||
|
tournament_types=tournament_types, |
||||||
|
national_cup=national_cup |
||||||
|
) |
||||||
|
|
||||||
|
# Debug: Show what we got (only in verbose mode) |
||||||
|
if verbose: |
||||||
|
self.stdout.write(f"🔍 Raw result: {result}") |
||||||
|
|
||||||
|
if result: |
||||||
|
tournaments = result.get('tournaments', []) |
||||||
|
self.stdout.write(self.style.SUCCESS(f"✅ SUCCESS: {len(tournaments)} tournaments found")) |
||||||
|
|
||||||
|
if tournaments: |
||||||
|
self.stdout.write("\n📝 Sample tournaments:") |
||||||
|
# Show first 3 tournaments |
||||||
|
for i, tournament in enumerate(tournaments[:3]): |
||||||
|
self.stdout.write(f"\n Tournament {i+1}:") |
||||||
|
self.stdout.write(f" ID: {tournament.get('id')}") |
||||||
|
self.stdout.write(f" Name: {tournament.get('libelle')}") |
||||||
|
self.stdout.write(f" Date: {tournament.get('dateDebut', {}).get('date', 'N/A')}") |
||||||
|
self.stdout.write(f" Club: {tournament.get('nomClub', 'N/A')}") |
||||||
|
self.stdout.write(f" City: {tournament.get('villeEngagement', 'N/A')}") |
||||||
|
self.stdout.write(f" Category: {tournament.get('categorieTournoi', 'N/A')}") |
||||||
|
self.stdout.write(f" Type: {tournament.get('type', 'N/A')}") |
||||||
|
if tournament.get('jugeArbitre'): |
||||||
|
self.stdout.write(f" Judge: {tournament.get('jugeArbitre', {}).get('nom', 'N/A')}") |
||||||
|
|
||||||
|
self.stdout.write(f"\n📊 Summary:") |
||||||
|
self.stdout.write(f" Total tournaments: {len(tournaments)}") |
||||||
|
self.stdout.write(f" Current page: {page}") |
||||||
|
self.stdout.write(f" Total results available: {result.get('total_results', 'Unknown')}") |
||||||
|
|
||||||
|
# Analysis of results |
||||||
|
if tournaments: |
||||||
|
cities = set() |
||||||
|
clubs = set() |
||||||
|
categories = set() |
||||||
|
types = set() |
||||||
|
|
||||||
|
for tournament in tournaments: |
||||||
|
if tournament.get('villeEngagement'): |
||||||
|
cities.add(tournament['villeEngagement']) |
||||||
|
if tournament.get('nomClub'): |
||||||
|
clubs.add(tournament['nomClub']) |
||||||
|
if tournament.get('categorieTournoi'): |
||||||
|
categories.add(tournament['categorieTournoi']) |
||||||
|
if tournament.get('type'): |
||||||
|
types.add(tournament['type']) |
||||||
|
|
||||||
|
self.stdout.write(f"\n🔍 Analysis:") |
||||||
|
self.stdout.write(f" Unique cities: {len(cities)}") |
||||||
|
self.stdout.write(f" Unique clubs: {len(clubs)}") |
||||||
|
self.stdout.write(f" Unique categories: {len(categories)}") |
||||||
|
self.stdout.write(f" Unique types: {len(types)}") |
||||||
|
|
||||||
|
if verbose: |
||||||
|
self.stdout.write(f"\n Cities: {sorted(list(cities))[:10]}") # Show first 10 |
||||||
|
self.stdout.write(f" Categories: {sorted(list(categories))}") |
||||||
|
self.stdout.write(f" Types: {sorted(list(types))}") |
||||||
|
|
||||||
|
else: |
||||||
|
self.stdout.write(self.style.ERROR("❌ FAILED: No tournaments found")) |
||||||
|
|
||||||
|
except Exception as e: |
||||||
|
self.stdout.write(self.style.ERROR(f"❌ ERROR: {e}")) |
||||||
|
import traceback |
||||||
|
if verbose: |
||||||
|
self.stdout.write(traceback.format_exc()) |
||||||
@ -0,0 +1,103 @@ |
|||||||
|
from django.core.management.base import BaseCommand |
||||||
|
from datetime import datetime, timedelta |
||||||
|
import logging |
||||||
|
|
||||||
|
class Command(BaseCommand): |
||||||
|
help = 'Test FFT tournament scraping with Playwright' |
||||||
|
|
||||||
|
def add_arguments(self, parser): |
||||||
|
parser.add_argument( |
||||||
|
'--club-code', |
||||||
|
type=str, |
||||||
|
default='62130180', |
||||||
|
help='Club code for testing (default: 62130180)' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--club-name', |
||||||
|
type=str, |
||||||
|
default='TENNIS SPORTING CLUB DE CASSIS', |
||||||
|
help='Club name for testing' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--all-pages', |
||||||
|
action='store_true', |
||||||
|
help='Test all pages scraping' |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
'--verbose', |
||||||
|
action='store_true', |
||||||
|
help='Enable verbose logging' |
||||||
|
) |
||||||
|
|
||||||
|
def handle(self, *args, **options): |
||||||
|
if options['verbose']: |
||||||
|
logging.basicConfig(level=logging.INFO) |
||||||
|
|
||||||
|
club_code = options['club_code'] |
||||||
|
club_name = options['club_name'] |
||||||
|
all_pages = options['all_pages'] |
||||||
|
verbose = options['verbose'] |
||||||
|
|
||||||
|
# Calculate date range |
||||||
|
start_date = datetime.now() |
||||||
|
end_date = start_date + timedelta(days=90) |
||||||
|
start_date_str = start_date.strftime('%d/%m/%y') |
||||||
|
end_date_str = end_date.strftime('%d/%m/%y') |
||||||
|
|
||||||
|
self.stdout.write(self.style.SUCCESS("=== FFT Tournament Scraper ===")) |
||||||
|
self.stdout.write(f"Club: {club_name} ({club_code})") |
||||||
|
self.stdout.write(f"Date range: {start_date_str} to {end_date_str}") |
||||||
|
self.stdout.write(f"Method: Playwright (Chrome-free)") |
||||||
|
self.stdout.write("") |
||||||
|
|
||||||
|
try: |
||||||
|
if all_pages: |
||||||
|
from api.utils import scrape_fft_club_tournaments_all_pages |
||||||
|
self.stdout.write("🚀 Testing complete tournament scraping...") |
||||||
|
|
||||||
|
result = scrape_fft_club_tournaments_all_pages( |
||||||
|
club_code=club_code, |
||||||
|
club_name=club_name, |
||||||
|
start_date=start_date_str, |
||||||
|
end_date=end_date_str |
||||||
|
) |
||||||
|
else: |
||||||
|
from api.utils import scrape_fft_club_tournaments |
||||||
|
self.stdout.write("🚀 Testing single page scraping...") |
||||||
|
|
||||||
|
result = scrape_fft_club_tournaments( |
||||||
|
club_code=club_code, |
||||||
|
club_name=club_name, |
||||||
|
start_date=start_date_str, |
||||||
|
end_date=end_date_str, |
||||||
|
page=0 |
||||||
|
) |
||||||
|
|
||||||
|
# Debug: Show what we got (only in verbose mode) |
||||||
|
if verbose: |
||||||
|
self.stdout.write(f"🔍 Raw result: {result}") |
||||||
|
|
||||||
|
if result: |
||||||
|
tournaments = result.get('tournaments', []) |
||||||
|
self.stdout.write(self.style.SUCCESS(f"✅ SUCCESS: {len(tournaments)} tournaments found")) |
||||||
|
|
||||||
|
if tournaments: |
||||||
|
self.stdout.write("\n📝 Sample tournament:") |
||||||
|
sample = tournaments[0] |
||||||
|
self.stdout.write(f" ID: {sample.get('id')}") |
||||||
|
self.stdout.write(f" Name: {sample.get('libelle')}") |
||||||
|
self.stdout.write(f" Date: {sample.get('dateDebut', {}).get('date', 'N/A')}") |
||||||
|
self.stdout.write(f" Judge: {sample.get('jugeArbitre', {}).get('nom', 'N/A')}") |
||||||
|
|
||||||
|
self.stdout.write(f"\n📊 Summary:") |
||||||
|
self.stdout.write(f" Total tournaments: {len(tournaments)}") |
||||||
|
self.stdout.write(f" Pages scraped: {result.get('pages_scraped', 1)}") |
||||||
|
|
||||||
|
else: |
||||||
|
self.stdout.write(self.style.ERROR("❌ FAILED: No tournaments found")) |
||||||
|
|
||||||
|
except Exception as e: |
||||||
|
self.stdout.write(self.style.ERROR(f"❌ ERROR: {e}")) |
||||||
|
import traceback |
||||||
|
if verbose: |
||||||
|
self.stdout.write(traceback.format_exc()) |
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue