You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
557 lines
23 KiB
557 lines
23 KiB
import time
|
|
import logging
|
|
import requests
|
|
import re
|
|
from playwright.sync_api import sync_playwright
|
|
from datetime import datetime, timedelta
|
|
import json
|
|
import traceback
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def check_version_smaller_than_1_1_12(version_str):
|
|
# Remove the parentheses part if it exists, example of version: 1.1.12 (2)
|
|
version_str = version_str.split()[0]
|
|
if version_str:
|
|
# Split version into components
|
|
version_parts = [int(x) for x in version_str.split('.')]
|
|
target_parts = [1, 1, 12]
|
|
# Compare version components
|
|
return version_parts < target_parts
|
|
else:
|
|
return False
|
|
|
|
def scrape_fft_club_tournaments(club_code, club_name, start_date=None, end_date=None, page=0):
|
|
"""
|
|
Scrapes FFT tournaments using Playwright with detailed debugging
|
|
"""
|
|
logger.info(f"Starting Playwright scraping for {club_name}")
|
|
try:
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
page_obj = browser.new_page()
|
|
|
|
page_obj.set_extra_http_headers({
|
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
|
|
})
|
|
|
|
# Navigate to FFT
|
|
target_url = "https://tenup.fft.fr/recherche/tournois"
|
|
logger.info(f"Navigating to: {target_url}")
|
|
|
|
page_obj.goto(target_url)
|
|
# page_obj.wait_for_timeout(7000)
|
|
|
|
current_url = page_obj.url
|
|
logger.info(f"Current URL: {current_url}")
|
|
|
|
if "queue-it.net" in current_url.lower():
|
|
logger.warning("Still in Queue-It")
|
|
browser.close()
|
|
return None
|
|
|
|
# Extract form_build_id
|
|
form_input = page_obj.query_selector('input[name="form_build_id"]')
|
|
if not form_input:
|
|
logger.error("Could not find form_build_id")
|
|
browser.close()
|
|
return None
|
|
|
|
form_build_id = form_input.get_attribute('value')
|
|
logger.info(f"Extracted form_build_id: {form_build_id}")
|
|
|
|
# Build parameters
|
|
date_component = ""
|
|
if start_date and end_date:
|
|
date_component = f"&date[start]={start_date}&date[end]={end_date}"
|
|
elif start_date:
|
|
try:
|
|
start_dt = datetime.strptime(start_date, "%d/%m/%y")
|
|
end_dt = start_dt + timedelta(days=90)
|
|
date_component = f"&date[start]={start_date}&date[end]={end_dt.strftime('%d/%m/%y')}"
|
|
except ValueError:
|
|
logger.warning(f"Invalid date format: {start_date}")
|
|
|
|
club_name_encoded = club_name.replace(" ", "+")
|
|
club_code_clean = club_code.replace(" ", "")
|
|
|
|
params = f"recherche_type=club&club[autocomplete][value_container][value_field]={club_code_clean}&club[autocomplete][value_container][label_field]={club_name_encoded}&pratique=PADEL{date_component}&page={page}&sort=dateDebut+asc&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page"
|
|
|
|
logger.info(f"AJAX Parameters: {params}")
|
|
|
|
# Make AJAX request and capture the full response
|
|
ajax_script = f"""
|
|
async () => {{
|
|
try {{
|
|
const response = await fetch('https://tenup.fft.fr/system/ajax', {{
|
|
method: 'POST',
|
|
headers: {{
|
|
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
|
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
|
'X-Requested-With': 'XMLHttpRequest',
|
|
'Origin': 'https://tenup.fft.fr',
|
|
'Referer': 'https://tenup.fft.fr/recherche/tournois'
|
|
}},
|
|
body: `{params}`
|
|
}});
|
|
|
|
const status = response.status;
|
|
const responseText = await response.text();
|
|
|
|
return {{
|
|
success: response.ok,
|
|
status: status,
|
|
responseText: responseText
|
|
}};
|
|
}} catch (error) {{
|
|
return {{
|
|
success: false,
|
|
error: error.message
|
|
}};
|
|
}}
|
|
}}
|
|
"""
|
|
|
|
logger.info("Making AJAX request...")
|
|
result = page_obj.evaluate(ajax_script)
|
|
|
|
browser.close()
|
|
|
|
# Print the full response for debugging
|
|
logger.info(f"AJAX Response Status: {result.get('status')}")
|
|
logger.info(f"AJAX Response Success: {result.get('success')}")
|
|
|
|
if result.get('success'):
|
|
response_text = result.get('responseText', '')
|
|
logger.info(f"Raw Response Length: {len(response_text)}")
|
|
# logger.info(f"Raw Response (first 500 chars): {response_text[:500]}")
|
|
|
|
try:
|
|
# Try to parse as JSON
|
|
json_data = json.loads(response_text)
|
|
logger.info(f"JSON Response Type: {type(json_data)}")
|
|
# logger.info(f"JSON Response: {json.dumps(json_data, indent=2, default=str)}")
|
|
|
|
# Now try to parse it
|
|
parsed_result = _parse_ajax_response(json_data)
|
|
# logger.info(f"Parsed Result: {parsed_result}")
|
|
|
|
return parsed_result
|
|
|
|
except json.JSONDecodeError as json_error:
|
|
logger.error(f"JSON Parse Error: {json_error}")
|
|
logger.error(f"Response text: {response_text}")
|
|
return None
|
|
else:
|
|
logger.error(f"AJAX request failed: {result.get('error')}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in Playwright scraping: {e}")
|
|
logger.error(f"Traceback: {traceback.format_exc()}")
|
|
return None
|
|
|
|
def scrape_fft_club_tournaments_all_pages(club_code, club_name, start_date=None, end_date=None):
|
|
"""
|
|
Scrapes all pages of FFT tournaments for a specific club
|
|
"""
|
|
logger.info(f"Starting complete tournament scraping for {club_name}")
|
|
|
|
all_tournaments = []
|
|
page = 0
|
|
|
|
while True:
|
|
try:
|
|
# Call the working single-page function
|
|
result = scrape_fft_club_tournaments(
|
|
club_code=club_code,
|
|
club_name=club_name,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
page=page
|
|
)
|
|
|
|
# Debug: Log what we got
|
|
logger.info(f"Page {page} result: {result}")
|
|
|
|
if not result:
|
|
logger.warning(f"No result for page {page}")
|
|
break
|
|
|
|
tournaments = result.get('tournaments', [])
|
|
logger.info(f"Page {page} returned {len(tournaments)} tournaments")
|
|
|
|
if not tournaments:
|
|
logger.info(f"No tournaments on page {page}, stopping")
|
|
break
|
|
|
|
all_tournaments.extend(tournaments)
|
|
|
|
# Check if we have all results
|
|
total_results = result.get('total_results', 0)
|
|
logger.info(f"Total so far: {len(all_tournaments)}, Target: {total_results}")
|
|
|
|
if len(all_tournaments) >= total_results:
|
|
logger.info("Got all tournaments, stopping")
|
|
break
|
|
|
|
page += 1
|
|
logger.info(f"Moving to page {page}")
|
|
# time.sleep(1) # Rate limiting
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error on page {page}: {e}")
|
|
break
|
|
|
|
logger.info(f"Completed scraping: {len(all_tournaments)} tournaments across {page + 1} pages")
|
|
|
|
return {
|
|
'tournaments': all_tournaments,
|
|
'total_results': len(all_tournaments),
|
|
'current_count': len(all_tournaments),
|
|
'pages_scraped': page + 1
|
|
}
|
|
|
|
def get_umpire_data(tournament_id):
|
|
"""
|
|
Scrapes umpire data for a specific tournament
|
|
"""
|
|
logger.info(f"Getting umpire data for tournament {tournament_id}")
|
|
|
|
try:
|
|
url = f"https://tenup.fft.fr/tournoi/{tournament_id}"
|
|
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15'
|
|
}
|
|
|
|
response = requests.get(url, headers=headers, timeout=30)
|
|
|
|
if response.status_code != 200:
|
|
logger.error(f"Failed to fetch tournament page: {response.status_code}")
|
|
return None, None, None
|
|
|
|
html_content = response.text
|
|
|
|
# Extract name
|
|
name_pattern = r'tournoi-detail-page-inscription-responsable-title">\s*([^<]+)\s*<'
|
|
name_match = re.search(name_pattern, html_content)
|
|
name = name_match.group(1).strip() if name_match else None
|
|
|
|
# Extract email
|
|
email_pattern = r'mailto:([^"]+)"'
|
|
email_match = re.search(email_pattern, html_content)
|
|
email = email_match.group(1) if email_match else None
|
|
|
|
# Extract phone
|
|
phone_pattern = r'<div class="details-bloc">\s*(\d{2}\s+\d{2}\s+\d{2}\s+\d{2}\s+\d{2})\s*</div>'
|
|
phone_match = re.search(phone_pattern, html_content)
|
|
phone = phone_match.group(1).strip() if phone_match else None
|
|
|
|
logger.info(f"Extracted umpire data: name={name}, email={email}, phone={phone}")
|
|
return name, email, phone
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting umpire data: {e}")
|
|
return None, None, None
|
|
|
|
def _parse_ajax_response(commands):
|
|
"""
|
|
Parse the AJAX response commands to extract tournament data
|
|
Returns data in the exact format expected by Swift FederalTournament struct
|
|
"""
|
|
tournaments = []
|
|
|
|
try:
|
|
# Check for alert commands (maintenance mode)
|
|
for command in commands:
|
|
if command.get('command') == 'alert':
|
|
logger.warning("Maintenance mode detected")
|
|
return None
|
|
|
|
# Find the command with results
|
|
result_command = None
|
|
for command in commands:
|
|
if command.get('command') == 'recherche_tournois_update':
|
|
result_command = command
|
|
logger.info("Found recherche_tournois_update command!")
|
|
break
|
|
|
|
if result_command and result_command.get('results'):
|
|
results = result_command['results']
|
|
items = results.get('items', [])
|
|
total_results = results.get('nb_results', 0)
|
|
|
|
logger.info(f"Processing {len(items)} tournaments from results")
|
|
|
|
for item in items:
|
|
# Parse dates - they're already in the correct format
|
|
date_debut = item.get('dateDebut')
|
|
date_fin = item.get('dateFin')
|
|
date_validation = item.get('dateValidation')
|
|
|
|
# Build the tournament object to match Swift FederalTournament structure
|
|
tournament = {
|
|
"id": str(item.get('id', '')),
|
|
"millesime": item.get('millesime'),
|
|
"libelle": item.get('libelle'),
|
|
"tmc": item.get('tmc'),
|
|
"tarifAdulteChampionnat": item.get('tarifAdulteChampionnat'),
|
|
"type": item.get('type'),
|
|
"ageReel": item.get('ageReel'),
|
|
"naturesTerrains": item.get('naturesTerrains', []),
|
|
"idsArbitres": item.get('idsArbitres', []),
|
|
"tarifJeuneChampionnat": item.get('tarifJeuneChampionnat'),
|
|
"international": item.get('international'),
|
|
"inscriptionEnLigne": item.get('inscriptionEnLigne'),
|
|
"categorieTournoi": item.get('categorieTournoi'),
|
|
"prixLot": item.get('prixLot'),
|
|
"paiementEnLigne": item.get('paiementEnLigne'),
|
|
"reductionAdherentJeune": item.get('reductionAdherentJeune'),
|
|
"reductionAdherentAdulte": item.get('reductionAdherentAdulte'),
|
|
"paiementEnLigneObligatoire": item.get('paiementEnLigneObligatoire'),
|
|
"villeEngagement": item.get('villeEngagement'),
|
|
"senior": item.get('senior'),
|
|
"veteran": item.get('veteran'),
|
|
"inscriptionEnLigneEnCours": item.get('inscriptionEnLigneEnCours'),
|
|
"avecResultatPublie": item.get('avecResultatPublie'),
|
|
"code": item.get('code'),
|
|
"categorieAge": item.get('categorieAge'),
|
|
"codeComite": item.get('codeComite'),
|
|
"installations": item.get('installations', []),
|
|
"reductionEpreuveSupplementaireJeune": item.get('reductionEpreuveSupplementaireJeune'),
|
|
"reductionEpreuveSupplementaireAdulte": item.get('reductionEpreuveSupplementaireAdulte'),
|
|
"nomComite": item.get('nomComite'),
|
|
"naturesEpreuves": item.get('naturesEpreuves'),
|
|
"jeune": item.get('jeune'),
|
|
"courrielEngagement": item.get('courrielEngagement'),
|
|
"nomClub": item.get('nomClub'),
|
|
"installation": item.get('installation'),
|
|
"categorieAgeMax": item.get('categorieAgeMax'),
|
|
"tournoiInterne": item.get('tournoiInterne'),
|
|
"nomLigue": item.get('nomLigue'),
|
|
"nomEngagement": item.get('nomEngagement'),
|
|
"codeLigue": item.get('codeLigue'),
|
|
"modeleDeBalle": item.get('modeleDeBalle'),
|
|
"jugeArbitre": item.get('jugeArbitre'),
|
|
"adresse2Engagement": item.get('adresse2Engagement'),
|
|
"epreuves": item.get('epreuves'),
|
|
"dateDebut": date_debut,
|
|
"serie": item.get('serie'),
|
|
"dateFin": date_fin,
|
|
"dateValidation": date_validation,
|
|
"codePostalEngagement": item.get('codePostalEngagement'),
|
|
"codeClub": item.get('codeClub'),
|
|
"prixEspece": item.get('prixEspece'),
|
|
"japPhoneNumber": None, # Will be populated by separate umpire call
|
|
|
|
# Additional fields from the response
|
|
"adresse1Engagement": item.get('adresse1Engagement'),
|
|
"originalId": item.get('originalId'),
|
|
"familleTournoi": item.get('familleTournoi', []),
|
|
"isTournoi": item.get('isTournoi'),
|
|
"natureWithCatAge": item.get('natureWithCatAge')
|
|
}
|
|
|
|
tournaments.append(tournament)
|
|
|
|
logger.info(f"Successfully parsed {len(tournaments)} tournaments from response")
|
|
return {
|
|
'tournaments': tournaments,
|
|
'total_results': total_results,
|
|
'current_count': len(tournaments)
|
|
}
|
|
else:
|
|
logger.error("No recherche_tournois_update command found in AJAX response")
|
|
return {'tournaments': [], 'total_results': 0, 'current_count': 0}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error parsing AJAX response: {e}")
|
|
return None
|
|
|
|
def scrape_fft_all_tournaments(sorting_option=None, page=0, start_date=None, end_date=None,
|
|
city='', distance=15, categories=None, levels=None,
|
|
lat=None, lng=None, ages=None, tournament_types=None,
|
|
national_cup=False):
|
|
"""
|
|
Scrapes FFT tournaments using Playwright with detailed debugging
|
|
Based exactly on the working scrape_fft_club_tournaments function
|
|
"""
|
|
logger.info(f"Starting Playwright scraping for city: {city}")
|
|
|
|
try:
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
page_obj = browser.new_page()
|
|
|
|
page_obj.set_extra_http_headers({
|
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
|
|
})
|
|
|
|
# Navigate to FFT
|
|
target_url = "https://tenup.fft.fr/recherche/tournois"
|
|
logger.info(f"Navigating to: {target_url}")
|
|
|
|
page_obj.goto(target_url)
|
|
# page_obj.wait_for_timeout(7000)
|
|
|
|
current_url = page_obj.url
|
|
logger.info(f"Current URL: {current_url}")
|
|
|
|
if "queue-it.net" in current_url.lower():
|
|
logger.warning("Still in Queue-It")
|
|
browser.close()
|
|
return None
|
|
|
|
# Extract form_build_id
|
|
form_input = page_obj.query_selector('input[name="form_build_id"]')
|
|
if not form_input:
|
|
logger.error("Could not find form_build_id")
|
|
browser.close()
|
|
return None
|
|
|
|
form_build_id = form_input.get_attribute('value')
|
|
logger.info(f"Extracted form_build_id: {form_build_id}")
|
|
|
|
# Build parameters - EXACT same pattern as club function
|
|
date_component = ""
|
|
if start_date and end_date:
|
|
date_component = f"&date[start]={start_date}&date[end]={end_date}"
|
|
elif start_date:
|
|
try:
|
|
start_dt = datetime.strptime(start_date, "%d/%m/%y")
|
|
end_dt = start_dt + timedelta(days=90)
|
|
date_component = f"&date[start]={start_date}&date[end]={end_dt.strftime('%d/%m/%y')}"
|
|
except ValueError:
|
|
logger.warning(f"Invalid date format: {start_date}")
|
|
|
|
# Build filter parameters
|
|
filter_params = ""
|
|
|
|
# Add categories filter
|
|
if categories:
|
|
logger.info(f"Adding categories filter: {categories}")
|
|
for category in categories:
|
|
filter_params += f"&epreuve[{category}]={category}"
|
|
|
|
# Add levels filter
|
|
if levels:
|
|
logger.info(f"Adding levels filter: {levels}")
|
|
for level in levels:
|
|
filter_params += f"&categorie_tournoi[{level}]={level}"
|
|
|
|
# Add ages filter
|
|
if ages:
|
|
logger.info(f"Adding ages filter: {ages}")
|
|
for age in ages:
|
|
filter_params += f"&categorie_age[{age}]={age}"
|
|
|
|
# Add types filter
|
|
if tournament_types:
|
|
logger.info(f"Adding types filter: {tournament_types}")
|
|
for t_type in tournament_types:
|
|
capitalized_type = t_type.capitalize()
|
|
filter_params += f"&type[{capitalized_type}]={capitalized_type}"
|
|
|
|
# Add national cup filter
|
|
if national_cup:
|
|
logger.info("Adding national cup filter")
|
|
filter_params += "&tournoi_npc=1"
|
|
|
|
# Fix the sorting parameter
|
|
if sorting_option:
|
|
sort_param = f"&sort={sorting_option}"
|
|
else:
|
|
sort_param = "&sort=dateDebut+asc"
|
|
|
|
# Build city parameters with distance and location
|
|
if city and city.strip():
|
|
city_name_encoded = city.strip().replace(" ", "+")
|
|
|
|
# Start with the working base parameters
|
|
base_params = f"recherche_type=ville&ville[autocomplete][value_container][value_field]={city_name_encoded}&ville[autocomplete][value_container][label_field]={city_name_encoded}"
|
|
|
|
# Add distance parameter
|
|
distance_param = f"&ville[distance][value_field]={int(distance)}"
|
|
|
|
# Add lat/lng if provided
|
|
location_params = ""
|
|
if lat and lng:
|
|
location_params = f"&ville[autocomplete][value_container][lat_field]={lat}&ville[autocomplete][value_container][lng_field]={lng}"
|
|
|
|
# Combine all parameters including filters
|
|
params = f"{base_params}{location_params}{distance_param}&pratique=PADEL{date_component}&page={page}{sort_param}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page"
|
|
else:
|
|
# Default to ligue search if no city provided
|
|
params = f"recherche_type=ligue&pratique=PADEL{date_component}{filter_params}&page={page}&sort={sorting_option or 'dateDebut+asc'}&form_build_id={form_build_id}&form_id=recherche_tournois_form&_triggering_element_name=submit_page&_triggering_element_value=Submit+page"
|
|
|
|
logger.info(f"AJAX Parameters: {params}")
|
|
|
|
# Make AJAX request and capture the full response - EXACT same as club function
|
|
ajax_script = f"""
|
|
async () => {{
|
|
try {{
|
|
const response = await fetch('https://tenup.fft.fr/system/ajax', {{
|
|
method: 'POST',
|
|
headers: {{
|
|
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
|
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
|
'X-Requested-With': 'XMLHttpRequest',
|
|
'Origin': 'https://tenup.fft.fr',
|
|
'Referer': 'https://tenup.fft.fr/recherche/tournois'
|
|
}},
|
|
body: `{params}`
|
|
}});
|
|
|
|
const status = response.status;
|
|
const responseText = await response.text();
|
|
|
|
return {{
|
|
success: response.ok,
|
|
status: status,
|
|
responseText: responseText
|
|
}};
|
|
}} catch (error) {{
|
|
return {{
|
|
success: false,
|
|
error: error.message
|
|
}};
|
|
}}
|
|
}}
|
|
"""
|
|
|
|
logger.info("Making AJAX request...")
|
|
result = page_obj.evaluate(ajax_script)
|
|
|
|
browser.close()
|
|
|
|
# Print the full response for debugging - EXACT same as club function
|
|
logger.info(f"AJAX Response Status: {result.get('status')}")
|
|
logger.info(f"AJAX Response Success: {result.get('success')}")
|
|
|
|
if result.get('success'):
|
|
response_text = result.get('responseText', '')
|
|
logger.info(f"Raw Response Length: {len(response_text)}")
|
|
|
|
try:
|
|
# Try to parse as JSON
|
|
json_data = json.loads(response_text)
|
|
logger.info(f"JSON Response Type: {type(json_data)}")
|
|
|
|
# Now try to parse it - EXACT same as club function
|
|
parsed_result = _parse_ajax_response(json_data)
|
|
|
|
return parsed_result
|
|
|
|
except json.JSONDecodeError as json_error:
|
|
logger.error(f"JSON Parse Error: {json_error}")
|
|
logger.error(f"Response text: {response_text}")
|
|
return None
|
|
else:
|
|
logger.error(f"AJAX request failed: {result.get('error')}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in Playwright scraping: {e}")
|
|
logger.error(f"Traceback: {traceback.format_exc()}")
|
|
return None
|
|
|