Laurent 2 months ago
commit 5754a655bc
  1. 484
      api/utils.py

@ -9,19 +9,23 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
logger = logging.getLogger(__name__)
def check_version_smaller_than_1_1_12(version_str):
# Remove the parentheses part if it exists, example of version: 1.1.12 (2)
version_str = version_str.split()[0]
if version_str:
# Split version into components
version_parts = [int(x) for x in version_str.split('.')]
version_parts = [int(x) for x in version_str.split(".")]
target_parts = [1, 1, 12]
# Compare version components
return version_parts < target_parts
else:
return False
def scrape_fft_club_tournaments(club_code, club_name, start_date=None, end_date=None, page=0):
def scrape_fft_club_tournaments(
club_code, club_name, start_date=None, end_date=None, page=0
):
"""
Scrapes FFT tournaments using Playwright with detailed debugging
"""
@ -31,9 +35,11 @@ def scrape_fft_club_tournaments(club_code, club_name, start_date=None, end_date=
browser = p.chromium.launch(headless=True)
page_obj = browser.new_page()
page_obj.set_extra_http_headers({
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
})
page_obj.set_extra_http_headers(
{
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
}
)
# Navigate to FFT
target_url = "https://tenup.fft.fr/recherche/tournois"
@ -57,7 +63,7 @@ def scrape_fft_club_tournaments(club_code, club_name, start_date=None, end_date=
browser.close()
return None
form_build_id = form_input.get_attribute('value')
form_build_id = form_input.get_attribute("value")
logger.info(f"Extracted form_build_id: {form_build_id}")
# Build parameters
@ -121,8 +127,8 @@ def scrape_fft_club_tournaments(club_code, club_name, start_date=None, end_date=
logger.info(f"AJAX Response Status: {result.get('status')}")
logger.info(f"AJAX Response Success: {result.get('success')}")
if result.get('success'):
response_text = result.get('responseText', '')
if result.get("success"):
response_text = result.get("responseText", "")
logger.info(f"Raw Response Length: {len(response_text)}")
# logger.info(f"Raw Response (first 500 chars): {response_text[:500]}")
@ -151,7 +157,10 @@ def scrape_fft_club_tournaments(club_code, club_name, start_date=None, end_date=
logger.error(f"Traceback: {traceback.format_exc()}")
return None
def scrape_fft_club_tournaments_all_pages(club_code, club_name, start_date=None, end_date=None):
def scrape_fft_club_tournaments_all_pages(
club_code, club_name, start_date=None, end_date=None
):
"""
Scrapes all pages of FFT tournaments for a specific club
"""
@ -168,7 +177,7 @@ def scrape_fft_club_tournaments_all_pages(club_code, club_name, start_date=None,
club_name=club_name,
start_date=start_date,
end_date=end_date,
page=page
page=page,
)
# Debug: Log what we got
@ -178,7 +187,7 @@ def scrape_fft_club_tournaments_all_pages(club_code, club_name, start_date=None,
logger.warning(f"No result for page {page}")
break
tournaments = result.get('tournaments', [])
tournaments = result.get("tournaments", [])
logger.info(f"Page {page} returned {len(tournaments)} tournaments")
if not tournaments:
@ -188,8 +197,10 @@ def scrape_fft_club_tournaments_all_pages(club_code, club_name, start_date=None,
all_tournaments.extend(tournaments)
# Check if we have all results
total_results = result.get('total_results', 0)
logger.info(f"Total so far: {len(all_tournaments)}, Target: {total_results}")
total_results = result.get("total_results", 0)
logger.info(
f"Total so far: {len(all_tournaments)}, Target: {total_results}"
)
if len(all_tournaments) >= total_results:
logger.info("Got all tournaments, stopping")
@ -203,15 +214,18 @@ def scrape_fft_club_tournaments_all_pages(club_code, club_name, start_date=None,
logger.error(f"Error on page {page}: {e}")
break
logger.info(f"Completed scraping: {len(all_tournaments)} tournaments across {page + 1} pages")
logger.info(
f"Completed scraping: {len(all_tournaments)} tournaments across {page + 1} pages"
)
return {
'tournaments': all_tournaments,
'total_results': len(all_tournaments),
'current_count': len(all_tournaments),
'pages_scraped': page + 1
"tournaments": all_tournaments,
"total_results": len(all_tournaments),
"current_count": len(all_tournaments),
"pages_scraped": page + 1,
}
def _parse_ajax_response(commands):
"""
Parse the AJAX response commands to extract tournament data
@ -222,114 +236,133 @@ def _parse_ajax_response(commands):
try:
# Check for alert commands (maintenance mode)
for command in commands:
if command.get('command') == 'alert':
if command.get("command") == "alert":
logger.warning("Maintenance mode detected")
return None
# Find the command with results
result_command = None
for command in commands:
if command.get('command') == 'recherche_tournois_update':
if command.get("command") == "recherche_tournois_update":
result_command = command
logger.info("Found recherche_tournois_update command!")
break
if result_command and result_command.get('results'):
results = result_command['results']
items = results.get('items', [])
total_results = results.get('nb_results', 0)
if result_command and result_command.get("results"):
results = result_command["results"]
items = results.get("items", [])
total_results = results.get("nb_results", 0)
logger.info(f"Processing {len(items)} tournaments from results")
for item in items:
# Parse dates - they're already in the correct format
date_debut = item.get('dateDebut')
date_fin = item.get('dateFin')
date_validation = item.get('dateValidation')
date_debut = item.get("dateDebut")
date_fin = item.get("dateFin")
date_validation = item.get("dateValidation")
# Build the tournament object to match Swift FederalTournament structure
tournament = {
"id": str(item.get('id', '')),
"millesime": item.get('millesime'),
"libelle": item.get('libelle'),
"tmc": item.get('tmc'),
"tarifAdulteChampionnat": item.get('tarifAdulteChampionnat'),
"type": item.get('type'),
"ageReel": item.get('ageReel'),
"naturesTerrains": item.get('naturesTerrains', []),
"idsArbitres": item.get('idsArbitres', []),
"tarifJeuneChampionnat": item.get('tarifJeuneChampionnat'),
"international": item.get('international'),
"inscriptionEnLigne": item.get('inscriptionEnLigne'),
"categorieTournoi": item.get('categorieTournoi'),
"prixLot": item.get('prixLot'),
"paiementEnLigne": item.get('paiementEnLigne'),
"reductionAdherentJeune": item.get('reductionAdherentJeune'),
"reductionAdherentAdulte": item.get('reductionAdherentAdulte'),
"paiementEnLigneObligatoire": item.get('paiementEnLigneObligatoire'),
"villeEngagement": item.get('villeEngagement'),
"senior": item.get('senior'),
"veteran": item.get('veteran'),
"inscriptionEnLigneEnCours": item.get('inscriptionEnLigneEnCours'),
"avecResultatPublie": item.get('avecResultatPublie'),
"code": item.get('code'),
"categorieAge": item.get('categorieAge'),
"codeComite": item.get('codeComite'),
"installations": item.get('installations', []),
"reductionEpreuveSupplementaireJeune": item.get('reductionEpreuveSupplementaireJeune'),
"reductionEpreuveSupplementaireAdulte": item.get('reductionEpreuveSupplementaireAdulte'),
"nomComite": item.get('nomComite'),
"naturesEpreuves": item.get('naturesEpreuves'),
"jeune": item.get('jeune'),
"courrielEngagement": item.get('courrielEngagement'),
"nomClub": item.get('nomClub'),
"installation": item.get('installation'),
"categorieAgeMax": item.get('categorieAgeMax'),
"tournoiInterne": item.get('tournoiInterne'),
"nomLigue": item.get('nomLigue'),
"nomEngagement": item.get('nomEngagement'),
"codeLigue": item.get('codeLigue'),
"modeleDeBalle": item.get('modeleDeBalle'),
"jugeArbitre": item.get('jugeArbitre'),
"adresse2Engagement": item.get('adresse2Engagement'),
"epreuves": item.get('epreuves'),
"id": str(item.get("id", "")),
"millesime": item.get("millesime"),
"libelle": item.get("libelle"),
"tmc": item.get("tmc"),
"tarifAdulteChampionnat": item.get("tarifAdulteChampionnat"),
"type": item.get("type"),
"ageReel": item.get("ageReel"),
"naturesTerrains": item.get("naturesTerrains", []),
"idsArbitres": item.get("idsArbitres", []),
"tarifJeuneChampionnat": item.get("tarifJeuneChampionnat"),
"international": item.get("international"),
"inscriptionEnLigne": item.get("inscriptionEnLigne"),
"categorieTournoi": item.get("categorieTournoi"),
"prixLot": item.get("prixLot"),
"paiementEnLigne": item.get("paiementEnLigne"),
"reductionAdherentJeune": item.get("reductionAdherentJeune"),
"reductionAdherentAdulte": item.get("reductionAdherentAdulte"),
"paiementEnLigneObligatoire": item.get(
"paiementEnLigneObligatoire"
),
"villeEngagement": item.get("villeEngagement"),
"senior": item.get("senior"),
"veteran": item.get("veteran"),
"inscriptionEnLigneEnCours": item.get("inscriptionEnLigneEnCours"),
"avecResultatPublie": item.get("avecResultatPublie"),
"code": item.get("code"),
"categorieAge": item.get("categorieAge"),
"codeComite": item.get("codeComite"),
"installations": item.get("installations", []),
"reductionEpreuveSupplementaireJeune": item.get(
"reductionEpreuveSupplementaireJeune"
),
"reductionEpreuveSupplementaireAdulte": item.get(
"reductionEpreuveSupplementaireAdulte"
),
"nomComite": item.get("nomComite"),
"naturesEpreuves": item.get("naturesEpreuves"),
"jeune": item.get("jeune"),
"courrielEngagement": item.get("courrielEngagement"),
"nomClub": item.get("nomClub"),
"installation": item.get("installation"),
"categorieAgeMax": item.get("categorieAgeMax"),
"tournoiInterne": item.get("tournoiInterne"),
"nomLigue": item.get("nomLigue"),
"nomEngagement": item.get("nomEngagement"),
"codeLigue": item.get("codeLigue"),
"modeleDeBalle": item.get("modeleDeBalle"),
"jugeArbitre": item.get("jugeArbitre"),
"adresse2Engagement": item.get("adresse2Engagement"),
"epreuves": item.get("epreuves"),
"dateDebut": date_debut,
"serie": item.get('serie'),
"serie": item.get("serie"),
"dateFin": date_fin,
"dateValidation": date_validation,
"codePostalEngagement": item.get('codePostalEngagement'),
"codeClub": item.get('codeClub'),
"prixEspece": item.get('prixEspece'),
"codePostalEngagement": item.get("codePostalEngagement"),
"codeClub": item.get("codeClub"),
"prixEspece": item.get("prixEspece"),
"japPhoneNumber": None, # Will be populated by separate umpire call
# Additional fields from the response
"adresse1Engagement": item.get('adresse1Engagement'),
"originalId": item.get('originalId'),
"familleTournoi": item.get('familleTournoi', []),
"isTournoi": item.get('isTournoi'),
"natureWithCatAge": item.get('natureWithCatAge')
"adresse1Engagement": item.get("adresse1Engagement"),
"originalId": item.get("originalId"),
"familleTournoi": item.get("familleTournoi", []),
"isTournoi": item.get("isTournoi"),
"natureWithCatAge": item.get("natureWithCatAge"),
}
tournaments.append(tournament)
logger.info(f"Successfully parsed {len(tournaments)} tournaments from response")
logger.info(
f"Successfully parsed {len(tournaments)} tournaments from response"
)
return {
'tournaments': tournaments,
'total_results': total_results,
'current_count': len(tournaments)
"tournaments": tournaments,
"total_results": total_results,
"current_count": len(tournaments),
}
else:
logger.error("No recherche_tournois_update command found in AJAX response")
return {'tournaments': [], 'total_results': 0, 'current_count': 0}
return {"tournaments": [], "total_results": 0, "current_count": 0}
except Exception as e:
logger.error(f"Error parsing AJAX response: {e}")
return None
def scrape_fft_all_tournaments(sorting_option=None, page=0, start_date=None, end_date=None,
city='', distance=15, categories=None, levels=None,
lat=None, lng=None, ages=None, tournament_types=None,
national_cup=False):
def scrape_fft_all_tournaments(
sorting_option=None,
page=0,
start_date=None,
end_date=None,
city="",
distance=15,
categories=None,
levels=None,
lat=None,
lng=None,
ages=None,
tournament_types=None,
national_cup=False,
):
"""
Scrapes FFT tournaments using Playwright with detailed debugging
Based exactly on the working scrape_fft_club_tournaments function
@ -341,9 +374,11 @@ def scrape_fft_all_tournaments(sorting_option=None, page=0, start_date=None, end
browser = p.chromium.launch(headless=True)
page_obj = browser.new_page()
page_obj.set_extra_http_headers({
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
})
page_obj.set_extra_http_headers(
{
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
}
)
# Navigate to FFT
target_url = "https://tenup.fft.fr/recherche/tournois"
@ -367,7 +402,7 @@ def scrape_fft_all_tournaments(sorting_option=None, page=0, start_date=None, end
browser.close()
return None
form_build_id = form_input.get_attribute('value')
form_build_id = form_input.get_attribute("value")
logger.info(f"Extracted form_build_id: {form_build_id}")
# Build parameters - EXACT same pattern as club function
@ -486,8 +521,8 @@ def scrape_fft_all_tournaments(sorting_option=None, page=0, start_date=None, end
logger.info(f"AJAX Response Status: {result.get('status')}")
logger.info(f"AJAX Response Success: {result.get('success')}")
if result.get('success'):
response_text = result.get('responseText', '')
if result.get("success"):
response_text = result.get("responseText", "")
logger.info(f"Raw Response Length: {len(response_text)}")
try:
@ -525,13 +560,13 @@ def get_umpire_data(tournament_id):
browser = p.chromium.launch(
headless=True,
args=[
'--no-sandbox',
'--disable-dev-shm-usage',
'--disable-images', # Don't load images
'--disable-javascript', # Disable JS for faster loading
'--disable-plugins',
'--disable-extensions'
]
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-images", # Don't load images
"--disable-javascript", # Disable JS for faster loading
"--disable-plugins",
"--disable-extensions",
],
)
page = browser.new_page()
@ -553,7 +588,9 @@ def get_umpire_data(tournament_id):
html_content = page.content()
# Extract name
name_pattern = r'tournoi-detail-page-inscription-responsable-title">\s*([^<]+)\s*<'
name_pattern = (
r'tournoi-detail-page-inscription-responsable-title">\s*([^<]+)\s*<'
)
name_match = re.search(name_pattern, html_content)
name = name_match.group(1).strip() if name_match else None
@ -569,7 +606,9 @@ def get_umpire_data(tournament_id):
browser.close()
logger.info(f"Extracted umpire data: name={name}, email={email}, phone={phone}")
logger.info(
f"Extracted umpire data: name={name}, email={email}, phone={phone}"
)
return name, email, phone
except Exception as page_error:
@ -581,6 +620,7 @@ def get_umpire_data(tournament_id):
logger.error(f"Error in umpire data extraction: {e}")
return None, None, None
def _get_umpire_data_requests_fallback(tournament_id):
"""
Fallback method using requests (may hit Queue-It)
@ -591,7 +631,7 @@ def _get_umpire_data_requests_fallback(tournament_id):
url = f"https://tenup.fft.fr/tournoi/{tournament_id}"
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15'
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
}
response = requests.get(url, headers=headers, timeout=30)
@ -603,7 +643,9 @@ def _get_umpire_data_requests_fallback(tournament_id):
html_content = response.text
# Extract using regex (original method)
name_pattern = r'tournoi-detail-page-inscription-responsable-title">\s*([^<]+)\s*<'
name_pattern = (
r'tournoi-detail-page-inscription-responsable-title">\s*([^<]+)\s*<'
)
name_match = re.search(name_pattern, html_content)
name = name_match.group(1).strip() if name_match else None
@ -615,15 +657,31 @@ def _get_umpire_data_requests_fallback(tournament_id):
phone_match = re.search(phone_pattern, html_content)
phone = phone_match.group(1).strip() if phone_match else None
logger.info(f"Extracted umpire data (requests): name={name}, email={email}, phone={phone}")
logger.info(
f"Extracted umpire data (requests): name={name}, email={email}, phone={phone}"
)
return name, email, phone
except Exception as e:
logger.error(f"Error getting umpire data with requests: {e}")
return None, None, None
def _scrape_single_page(sorting_option, page, start_date, end_date, city, distance,
categories, levels, lat, lng, ages, tournament_types, national_cup):
def _scrape_single_page(
sorting_option,
page,
start_date,
end_date,
city,
distance,
categories,
levels,
lat,
lng,
ages,
tournament_types,
national_cup,
):
"""
Helper function to scrape a single page of tournaments
"""
@ -640,14 +698,25 @@ def _scrape_single_page(sorting_option, page, start_date, end_date, city, distan
lng=lng,
ages=ages,
tournament_types=tournament_types,
national_cup=national_cup
national_cup=national_cup,
)
def scrape_fft_all_tournaments_concurrent(sorting_option=None, start_date=None, end_date=None,
city='', distance=15, categories=None, levels=None,
lat=None, lng=None, ages=None, tournament_types=None,
national_cup=False, max_workers=5):
def scrape_fft_all_tournaments_concurrent(
sorting_option=None,
start_date=None,
end_date=None,
city="",
distance=15,
categories=None,
levels=None,
lat=None,
lng=None,
ages=None,
tournament_types=None,
national_cup=False,
max_workers=5,
):
"""
Scrapes all remaining pages of FFT tournaments concurrently (pages 1 to end)
This assumes page 0 was already fetched by the client
@ -668,21 +737,26 @@ def scrape_fft_all_tournaments_concurrent(sorting_option=None, start_date=None,
lng=lng,
ages=ages,
tournament_types=tournament_types,
national_cup=national_cup
national_cup=national_cup,
)
if not first_page_result:
logger.error("Failed to get first page results for pagination info")
return None
total_results = first_page_result.get('total_results', 0)
first_page_tournaments = first_page_result.get('tournaments', [])
total_results = first_page_result.get("total_results", 0)
first_page_tournaments = first_page_result.get("tournaments", [])
results_per_page = len(first_page_tournaments)
logger.info(f"Total results: {total_results}, Results per page: {results_per_page}")
if total_results == 0:
return {'tournaments': [], 'total_results': 0, 'current_count': 0, 'pages_scraped': 0}
return {
"tournaments": [],
"total_results": 0,
"current_count": 0,
"pages_scraped": 0,
}
# Calculate number of pages needed
if results_per_page > 0:
@ -694,7 +768,12 @@ def scrape_fft_all_tournaments_concurrent(sorting_option=None, start_date=None,
# If only one page total, return empty since page 0 was already handled
if total_pages <= 1:
return {'tournaments': [], 'total_results': total_results, 'current_count': 0, 'pages_scraped': 0}
return {
"tournaments": [],
"total_results": total_results,
"current_count": 0,
"pages_scraped": 0,
}
# Scrape all remaining pages concurrently (pages 1 to total_pages-1)
all_tournaments = []
@ -705,8 +784,19 @@ def scrape_fft_all_tournaments_concurrent(sorting_option=None, start_date=None,
for page in range(1, total_pages):
future = executor.submit(
_scrape_single_page,
sorting_option, page, start_date, end_date, city, distance,
categories, levels, lat, lng, ages, tournament_types, national_cup
sorting_option,
page,
start_date,
end_date,
city,
distance,
categories,
levels,
lat,
lng,
ages,
tournament_types,
national_cup,
)
futures.append((page, future))
@ -714,24 +804,30 @@ def scrape_fft_all_tournaments_concurrent(sorting_option=None, start_date=None,
for page, future in futures:
try:
result = future.result(timeout=60) # 60 second timeout per page
if result and result.get('tournaments'):
tournaments = result.get('tournaments', [])
if result and result.get("tournaments"):
tournaments = result.get("tournaments", [])
all_tournaments.extend(tournaments)
logger.info(f"Page {page} completed: {len(tournaments)} tournaments")
logger.info(
f"Page {page} completed: {len(tournaments)} tournaments"
)
else:
logger.warning(f"Page {page} returned no results")
except Exception as e:
logger.error(f"Error processing page {page}: {e}")
logger.info(f"Concurrent scraping completed: {len(all_tournaments)} tournaments from {total_pages-1} remaining pages")
logger.info(
f"Concurrent scraping completed: {len(all_tournaments)} tournaments from {total_pages - 1} remaining pages"
)
return {
'tournaments': all_tournaments,
'total_results': total_results,
'current_count': len(all_tournaments),
'pages_scraped': total_pages - 1 # Excluding page 0 which was handled separately
"tournaments": all_tournaments,
"total_results": total_results,
"current_count": len(all_tournaments),
"pages_scraped": total_pages
- 1, # Excluding page 0 which was handled separately
}
def _parse_clubs_ajax_response(json_data):
"""
Parse the clubs AJAX response to match Swift FederalClubResponse structure
@ -746,19 +842,24 @@ def _parse_clubs_ajax_response(json_data):
# Try to extract clubs data from different possible response structures
if isinstance(json_data, dict):
# Pattern 1: Direct club_markers array
if 'club_markers' in json_data:
clubs_data = json_data['club_markers']
total_results = json_data.get('nombreResultat', len(clubs_data))
if "club_markers" in json_data:
clubs_data = json_data["club_markers"]
total_results = json_data.get("nombreResultat", len(clubs_data))
# Pattern 2: Results wrapper
elif 'results' in json_data:
results = json_data['results']
clubs_data = results.get('clubs', results.get('items', results.get('club_markers', [])))
total_results = results.get('nombreResultat', results.get('total', results.get('nb_results', len(clubs_data))))
elif "results" in json_data:
results = json_data["results"]
clubs_data = results.get(
"clubs", results.get("items", results.get("club_markers", []))
)
total_results = results.get(
"nombreResultat",
results.get("total", results.get("nb_results", len(clubs_data))),
)
# Pattern 3: Direct array in response
elif 'data' in json_data:
clubs_data = json_data['data']
elif "data" in json_data:
clubs_data = json_data["data"]
total_results = len(clubs_data)
# Pattern 4: Response is the clubs array directly
@ -780,49 +881,55 @@ def _parse_clubs_ajax_response(json_data):
if isinstance(item, dict):
# Extract pratiques array
pratiques = []
if 'pratiques' in item:
pratiques = item['pratiques']
elif 'practices' in item:
pratiques = item['practices']
if "pratiques" in item:
pratiques = item["pratiques"]
elif "practices" in item:
pratiques = item["practices"]
else:
# Default to PADEL if not specified
pratiques = ["PADEL"]
# Ensure pratiques are uppercase strings
pratiques = [p.upper() if isinstance(p, str) else str(p).upper() for p in pratiques]
pratiques = [
p.upper() if isinstance(p, str) else str(p).upper()
for p in pratiques
]
club_marker = {
"nom": item.get('nom', item.get('name', '')),
"clubId": str(item.get('clubId', item.get('id', item.get('code', '')))),
"ville": item.get('ville', item.get('city', '')),
"distance": str(item.get('distance', '0')),
"terrainPratiqueLibelle": item.get('terrainPratiqueLibelle', item.get('courtsInfo', '')),
"nom": item.get("nom", item.get("name", "")),
"clubId": str(
item.get("clubId", item.get("id", item.get("code", "")))
),
"ville": item.get("ville", item.get("city", "")),
"distance": str(item.get("distance", "0")),
"terrainPratiqueLibelle": item.get(
"terrainPratiqueLibelle", item.get("courtsInfo", "")
),
"pratiques": pratiques,
"lat": float(item.get('lat', item.get('latitude', 0.0))),
"lng": float(item.get('lng', item.get('longitude', 0.0)))
"lat": float(item.get("lat", item.get("latitude", 0.0))),
"lng": float(item.get("lng", item.get("longitude", 0.0))),
}
club_markers.append(club_marker)
logger.info(f"Successfully parsed {len(club_markers)} club markers from response")
logger.info(
f"Successfully parsed {len(club_markers)} club markers from response"
)
# Return the response in the format expected by Swift FederalClubResponse
return {
"typeRecherche": "clubs",
"nombreResultat": total_results,
"club_markers": club_markers
"club_markers": club_markers,
}
except Exception as e:
logger.error(f"Error parsing clubs AJAX response: {e}")
return {
"typeRecherche": "clubs",
"nombreResultat": 0,
"club_markers": []
}
return {"typeRecherche": "clubs", "nombreResultat": 0, "club_markers": []}
def scrape_federal_clubs(country=None, city='', latitude=None, longitude=None,
radius=15, max_workers=5):
def scrape_federal_clubs(
country=None, city="", latitude=None, longitude=None, radius=15, max_workers=5
):
"""
Scrapes FFT federal clubs by extracting data from the HTML response
"""
@ -833,16 +940,19 @@ def scrape_federal_clubs(country=None, city='', latitude=None, longitude=None,
browser = p.chromium.launch(headless=True)
page_obj = browser.new_page()
page_obj.set_extra_http_headers({
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
})
page_obj.set_extra_http_headers(
{
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15"
}
)
# Clean up city name - remove zip code and extra info
clean_city = city
if city:
import re
clean_city = re.sub(r'[,\s]*\d{5}.*$', '', city).strip()
clean_city = clean_city.rstrip(',').strip()
clean_city = re.sub(r"[,\s]*\d{5}.*$", "", city).strip()
clean_city = clean_city.rstrip(",").strip()
logger.info(f"Cleaned city name: '{city}' -> '{clean_city}'")
@ -865,7 +975,7 @@ def scrape_federal_clubs(country=None, city='', latitude=None, longitude=None,
return {
"typeRecherche": "clubs",
"nombreResultat": 0,
"club_markers": []
"club_markers": [],
}
# Use JavaScript to extract the data directly from the page
@ -904,45 +1014,45 @@ def scrape_federal_clubs(country=None, city='', latitude=None, longitude=None,
browser.close()
if result.get('success'):
type_recherche = result.get('typeRecherche', 'club')
total = result.get('total', 0)
resultat = result.get('resultat', [])
if result.get("success"):
type_recherche = result.get("typeRecherche", "club")
total = result.get("total", 0)
resultat = result.get("resultat", [])
logger.info(f"Successfully extracted {total} clubs")
# Convert resultat to club_markers format
club_markers = []
for club in resultat:
club_markers.append({
"nom": club.get('nom', ''),
"clubId": club.get('clubId', ''),
"ville": club.get('ville', ''),
"distance": club.get('distance', ''),
"terrainPratiqueLibelle": club.get('terrainPratiqueLibelle', ''),
"pratiques": club.get('pratiques', []),
"lat": club.get('lat', 0.0),
"lng": club.get('lng', 0.0)
})
club_markers.append(
{
"nom": club.get("nom", ""),
"clubId": club.get("clubId", ""),
"ville": club.get("ville", ""),
"distance": club.get("distance", ""),
"terrainPratiqueLibelle": club.get(
"terrainPratiqueLibelle", ""
),
"pratiques": club.get("pratiques", []),
"lat": club.get("lat", 0.0),
"lng": club.get("lng", 0.0),
}
)
return {
"typeRecherche": type_recherche,
"nombreResultat": total,
"club_markers": club_markers
"club_markers": club_markers,
}
else:
logger.error(f"Failed to extract data: {result.get('error')}")
return {
"typeRecherche": "clubs",
"nombreResultat": 0,
"club_markers": []
"club_markers": [],
}
except Exception as e:
logger.error(f"Error in federal clubs scraping: {e}")
logger.error(f"Traceback: {traceback.format_exc()}")
return {
"typeRecherche": "clubs",
"nombreResultat": 0,
"club_markers": []
}
return {"typeRecherche": "clubs", "nombreResultat": 0, "club_markers": []}

Loading…
Cancel
Save