fix ratings july

apikeys
Razmig Sarkissian 4 months ago
parent 09620693e8
commit fe62c93671
  1. 138
      tournaments/admin_utils.py
  2. 513
      tournaments/management/commands/analyze_rankings.py
  3. 1622
      tournaments/static/rankings/CLASSEMENT-PADEL-DAMES-07-2025.csv
  4. 8368
      tournaments/static/rankings/CLASSEMENT-PADEL-MESSIEURS-07-2025.csv

@ -15,9 +15,9 @@ from django.middleware.csrf import get_token
import concurrent.futures
from functools import partial
default_sexe = "H"
default_sexe = 'F'
default_id_homologation = "82546485"
default_session_id = "JSESSIONID=CFE4A886CB92764066D1EC920EF9AB1C; AWSALB=c4OHU4Lw6YW6QRsoD1ktcfSgEExZZil/dDetMp3teMKtQ7RlA3VIM8ZHnJH8K3GYMoPu0M61xLjZo64rBNzDEO5tISKEYIX79baengXCKXnaqdqNqHJ7cSPeon+g; AWSALBCORS=c4OHU4Lw6YW6QRsoD1ktcfSgEExZZil/dDetMp3teMKtQ7RlA3VIM8ZHnJH8K3GYMoPu0M61xLjZo64rBNzDEO5tISKEYIX79baengXCKXnaqdqNqHJ7cSPeon+g; tc_cj_v2=m_iZZZ%22**%22%27%20ZZZKQNRSMQNLMRQLZZZ%5D777%5Ecl_%5Dny%5B%5D%5D_mmZZZZZZKQOKMQKQONJKOZZZ%5D777_rn_lh%5BfyfcheZZZ222H%2B%7B%7E%20%27-%20%21%20-%20%29%7D%20H%7D*%28ZZZKQOKMQKRLNNPMZZZ%5D777%5Ecl_%5Dny%5B%5D%5D_mmZZZZZZKQOKNLQOPMLSMZZZ%5D; tc_cj_v2_cmp=; tc_cj_v2_med=; incap_ses_2223_2712217=/I1fA56LxWI8KbyfPa/ZHpmpZGgAAAAAoadzHEsPdo9W59AkhfXcmQ==; xtan=-; xtant=1; pa_vid=%22mckhos3iasswydjm%22; datadome=oi7wKIO2uaUDCcpICiRO1_hEYcwyQWDVbXyNCSkAmr315~8pnPcuXWKfvNEEz~jKcoORIOThSRe~AxoRRrPLUsr0miWm7AdAcy~_3hABc1ZWfRt~SKGa_uhyqiE0Hzfj; _pcid=%7B%22browserId%22%3A%22mckhos3iasswydjm%22%2C%22_t%22%3A%22ms8wm9hs%7Cmckhos5s%22%7D; _pctx=%7Bu%7DN4IgrgzgpgThIC4B2YA2qA05owMoBcBDfSREQpAeyRCwgEt8oBJAE0RXSwH18yBbCAA4A7vwCcACwgAffgGMA1pMoQArPAC%2BQA; EA_SESSION_ID=E15E1DD5A23272A1A0CC3B8CEDF56B65; refresh_token=eyJhbGciOiJIUzUxMiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICIzYjQ2ODk1ZC0zN2EzLTQzM2QtYmQ1My01N2QxZTM1YTI3NzkifQ.eyJleHAiOjE3NTY1NTM5MjgsImlhdCI6MTc1MTM3MjAwNCwianRpIjoiYzJiNzA3N2UtZmQ5MS00ZGM4LWI4ZDEtMzA2MDdkYjk5MTgxIiwiaXNzIjoiaHR0cHM6Ly9sb2dpbi5mZnQuZnIvcmVhbG1zL2Nvbm5lY3QiLCJhdWQiOiJodHRwczovL2xvZ2luLmZmdC5mci9yZWFsbXMvY29ubmVjdCIsInN1YiI6IjI3ZDQ5NzRjLTEwZWUtNDNlOC1iOTczLWUyMzc2MDM1ZTE0MSIsInR5cCI6IlJlZnJlc2giLCJhenAiOiJtZWEtc2l0ZSIsInNpZCI6IjM5NTZjMzZlLTczMWItNDJkNy1iNjI2LTE2MGViY2Y2YTY2ZiIsInNjb3BlIjoib3BlbmlkIHJvbGVzIHJlYWQ6bGljZW5jZSByZWFkOmlkZW50aXR5IGVtYWlsIHByb2ZpbGUifQ.e6v5vlen985vSFJhrgMQTTB3fzzsnwugPfXKoyib1QSIBZ9kC47h1cYwcpam0VmZ9vRD_y0hVC14jDvBR6d1dQ; user_login=10000984864; user_token=eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJRaTV3bWx2bTNuX2p1YW4tSTl1dHo3UGZRLU1tVVlvektwSExhbm9lTXI4In0.eyJleHAiOjE3NTEzNzIzMDQsImlhdCI6MTc1MTM3MjAwNCwianRpIjoiMzEzMGVhODUtNjFjNC00OGRjLWFlNGMtZTIwZmZhYTU3YTlhIiwiaXNzIjoiaHR0cHM6Ly9sb2dpbi5mZnQuZnIvcmVhbG1zL2Nvbm5lY3QiLCJhdWQiOlsiZmVkLWFwaSIsImFjY291bnQiXSwic3ViIjoiMjdkNDk3NGMtMTBlZS00M2U4LWI5NzMtZTIzNzYwMzVlMTQxIiwidHlwIjoiQmVhcmVyIiwiYXpwIjoibWVhLXNpdGUiLCJzaWQiOiIzOTU2YzM2ZS03MzFiLTQyZDctYjYyNi0xNjBlYmNmNmE2NmYiLCJyZWFsbV9hY2Nlc3MiOnsicm9sZXMiOlsiZGVmYXVsdC1yb2xlcy1jb25uZWN0Il19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCJdfX0sInNjb3BlIjoib3BlbmlkIHJlYWQ6bGljZW5jZSByZWFkOmlkZW50aXR5IGVtYWlsIHByb2ZpbGUiLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiaWRDcm0iOiIxMDAwMDk4NDg2NCIsIm5hbWUiOiJSYXptaWcgU0FSS0lTU0lBTiIsInByZWZlcnJlZF91c2VybmFtZSI6InJhem1vZyIsImdpdmVuX25hbWUiOiJSYXptaWciLCJzZXNzaW9uX3N0YXRlIjoiMzk1NmMzNmUtNzMxYi00MmQ3LWI2MjYtMTYwZWJjZjZhNjZmIiwibG9jYWxlIjoiZnIiLCJmYW1pbHlfbmFtZSI6IlNBUktJU1NJQU4iLCJlbWFpbCI6InJhem1pZy5zYXJraXNzaWFuQGdtYWlsLmNvbSJ9.VSjG2htaUMt_acrqL3VcAjVMhAno9q0vdb7LTzw8UVbjIiDLzhR5msRxI8h8gSJ38kFLaa7f_SFGLIsRCSdcmhYRd2zKIrcPE-QFKbsPnH69xN2i3giMMiYEy3hj__IIyijt9z3W4KXeQdwUrlXPxprlXQ2sYTlZG63HlCGq1iI3Go9eXFmNDNM6p1jBypXcHEvJr6HwNcRdn6ZGfZ9LLMZ2aMEJAhDqL2CLrFrOZkGQpFz7ITUi_DVJAqh5DmTK1JqPswcOjhuZhDT7qWNfIleV-L7XCwvofxBwkSX9ve9l_3COZJXbsMiiRdCSTZtewlFRfgo4IuAu3g06fmJw7g; TCID=; nlbi_2712217=Ok4tKplxIEN+k1gmb9lUTgAAAAA70zbGXpiElrV2qkRjBeXO; visid_incap_2712217=LW/brcN4Rwml/7waoG/rloFBYmgAAAAAQUIPAAAAAAAlHbwlYSPbNS2qq3UBZNK8; TCPID=125629554310878226394; xtvrn=$548419$"
default_session_id = "JSESSIONID=CEC70DF4428E76E1FD1BFE5C66904708; AWSALB=omN79AoahQc27iH5vvO14U7ZrjH30faWu5delXAthjiYVq4jzbeXJ0IOmVTGjG6YDoi7Do2uCswhEaO/smz1QG733RpYlsw7ShlFV/X2aLn2L7/DZ5KUBA/8LPNr; AWSALBCORS=omN79AoahQc27iH5vvO14U7ZrjH30faWu5delXAthjiYVq4jzbeXJ0IOmVTGjG6YDoi7Do2uCswhEaO/smz1QG733RpYlsw7ShlFV/X2aLn2L7/DZ5KUBA/8LPNr; incap_ses_2223_2712217=g6xvVwmOBh66wpenPa/ZHpN2ZmgAAAAAcmuXPCKJ1/mEqKuQEXJS2Q==; tc_cj_v2=m_iZZZ%22**%22%27%20ZZZKQNRSMQNLMRQLZZZ%5D777%5Ecl_%5Dny%5B%5D%5D_mmZZZZZZKQOKMQKQONJKOZZZ%5D777_rn_lh%5BfyfcheZZZ222H%2B%7B%7E%20%27-%20%21%20-%20%29%7D%20H%7D*%28ZZZKQOKMQKRLNNPMZZZ%5D777%5Ecl_%5Dny%5B%5D%5D_mmZZZZZZKQOKNRJOLLQOJZZZ%5D777_rn_lh%5BfyfcheZZZ%2F%20%290%2BH%2C0%200%20G%24%2FH%29%20%2FZZZKQOKOLRLNNQJLZZZ%5D; tc_cj_v2_cmp=; tc_cj_v2_med=; SSESS7ba44afc36c80c3faa2b8fa87e7742c5=4-IzUXNKXq_BQFMLjjivcLW14OXgk3lLPl18WYgSmU0; xtan=-; xtant=1; pa_vid=%22mckhos3iasswydjm%22; datadome=oi7wKIO2uaUDCcpICiRO1_hEYcwyQWDVbXyNCSkAmr315~8pnPcuXWKfvNEEz~jKcoORIOThSRe~AxoRRrPLUsr0miWm7AdAcy~_3hABc1ZWfRt~SKGa_uhyqiE0Hzfj; _pcid=%7B%22browserId%22%3A%22mckhos3iasswydjm%22%2C%22_t%22%3A%22ms8wm9hs%7Cmckhos5s%22%7D; _pctx=%7Bu%7DN4IgrgzgpgThIC4B2YA2qA05owMoBcBDfSREQpAeyRCwgEt8oBJAE0RXSwH18yBbCAA4A7vwCcACwgAffgGMA1pMoQArPAC%2BQA; EA_SESSION_ID=E15E1DD5A23272A1A0CC3B8CEDF56B65; refresh_token=eyJhbGciOiJIUzUxMiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICIzYjQ2ODk1ZC0zN2EzLTQzM2QtYmQ1My01N2QxZTM1YTI3NzkifQ.eyJleHAiOjE3NTY1NTM5MjgsImlhdCI6MTc1MTM3MjAwNCwianRpIjoiYzJiNzA3N2UtZmQ5MS00ZGM4LWI4ZDEtMzA2MDdkYjk5MTgxIiwiaXNzIjoiaHR0cHM6Ly9sb2dpbi5mZnQuZnIvcmVhbG1zL2Nvbm5lY3QiLCJhdWQiOiJodHRwczovL2xvZ2luLmZmdC5mci9yZWFsbXMvY29ubmVjdCIsInN1YiI6IjI3ZDQ5NzRjLTEwZWUtNDNlOC1iOTczLWUyMzc2MDM1ZTE0MSIsInR5cCI6IlJlZnJlc2giLCJhenAiOiJtZWEtc2l0ZSIsInNpZCI6IjM5NTZjMzZlLTczMWItNDJkNy1iNjI2LTE2MGViY2Y2YTY2ZiIsInNjb3BlIjoib3BlbmlkIHJvbGVzIHJlYWQ6bGljZW5jZSByZWFkOmlkZW50aXR5IGVtYWlsIHByb2ZpbGUifQ.e6v5vlen985vSFJhrgMQTTB3fzzsnwugPfXKoyib1QSIBZ9kC47h1cYwcpam0VmZ9vRD_y0hVC14jDvBR6d1dQ; user_login=10000984864; user_token=eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJRaTV3bWx2bTNuX2p1YW4tSTl1dHo3UGZRLU1tVVlvektwSExhbm9lTXI4In0.eyJleHAiOjE3NTEzNzIzMDQsImlhdCI6MTc1MTM3MjAwNCwianRpIjoiMzEzMGVhODUtNjFjNC00OGRjLWFlNGMtZTIwZmZhYTU3YTlhIiwiaXNzIjoiaHR0cHM6Ly9sb2dpbi5mZnQuZnIvcmVhbG1zL2Nvbm5lY3QiLCJhdWQiOlsiZmVkLWFwaSIsImFjY291bnQiXSwic3ViIjoiMjdkNDk3NGMtMTBlZS00M2U4LWI5NzMtZTIzNzYwMzVlMTQxIiwidHlwIjoiQmVhcmVyIiwiYXpwIjoibWVhLXNpdGUiLCJzaWQiOiIzOTU2YzM2ZS03MzFiLTQyZDctYjYyNi0xNjBlYmNmNmE2NmYiLCJyZWFsbV9hY2Nlc3MiOnsicm9sZXMiOlsiZGVmYXVsdC1yb2xlcy1jb25uZWN0Il19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCJdfX0sInNjb3BlIjoib3BlbmlkIHJlYWQ6bGljZW5jZSByZWFkOmlkZW50aXR5IGVtYWlsIHByb2ZpbGUiLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiaWRDcm0iOiIxMDAwMDk4NDg2NCIsIm5hbWUiOiJSYXptaWcgU0FSS0lTU0lBTiIsInByZWZlcnJlZF91c2VybmFtZSI6InJhem1vZyIsImdpdmVuX25hbWUiOiJSYXptaWciLCJzZXNzaW9uX3N0YXRlIjoiMzk1NmMzNmUtNzMxYi00MmQ3LWI2MjYtMTYwZWJjZjZhNjZmIiwibG9jYWxlIjoiZnIiLCJmYW1pbHlfbmFtZSI6IlNBUktJU1NJQU4iLCJlbWFpbCI6InJhem1pZy5zYXJraXNzaWFuQGdtYWlsLmNvbSJ9.VSjG2htaUMt_acrqL3VcAjVMhAno9q0vdb7LTzw8UVbjIiDLzhR5msRxI8h8gSJ38kFLaa7f_SFGLIsRCSdcmhYRd2zKIrcPE-QFKbsPnH69xN2i3giMMiYEy3hj__IIyijt9z3W4KXeQdwUrlXPxprlXQ2sYTlZG63HlCGq1iI3Go9eXFmNDNM6p1jBypXcHEvJr6HwNcRdn6ZGfZ9LLMZ2aMEJAhDqL2CLrFrOZkGQpFz7ITUi_DVJAqh5DmTK1JqPswcOjhuZhDT7qWNfIleV-L7XCwvofxBwkSX9ve9l_3COZJXbsMiiRdCSTZtewlFRfgo4IuAu3g06fmJw7g; TCID=; nlbi_2712217=Ok4tKplxIEN+k1gmb9lUTgAAAAA70zbGXpiElrV2qkRjBeXO; visid_incap_2712217=LW/brcN4Rwml/7waoG/rloFBYmgAAAAAQUIPAAAAAAAlHbwlYSPbNS2qq3UBZNK8; TCPID=125629554310878226394; xtvrn=$548419$"
def calculate_age_from_birth_date(birth_date_str):
"""
@ -36,9 +36,10 @@ def calculate_age_from_birth_date(birth_date_str):
except (ValueError, TypeError):
return None
def find_best_license_match(license_results, player_age_sportif):
def find_best_license_match(license_results, player):
"""
Find the best matching license from multiple results using ageSportif comparison
Also filters out players without valid classement data
Args:
license_results: List of license data from API
@ -47,21 +48,90 @@ def find_best_license_match(license_results, player_age_sportif):
Returns:
Tuple of (best_match, match_info)
"""
# Get player's age from ranking data for duplicate matching
player_age_sportif = player.get('ageSportif')
rank = player.get('classement')
lastname = player.get('nom')
firstname = player.get('prenom')
if not license_results:
return None, {"reason": "no_results"}
if len(license_results) == 1:
return license_results[0], {"reason": "single_result", "age_match": "n/a"}
# First, filter out players without valid classement data
def has_valid_classement(license_data, rank):
"""Check if a license has valid classement data"""
classement = license_data.get('classement', {})
if not classement:
return False
# Check if any of the key classement fields have meaningful data
date_fr = classement.get('dateFr', '').strip()
rang = classement.get('rang')
points = classement.get('points')
date = classement.get('date')
# Consider it valid if at least one of these conditions is met:
# - dateFr is not empty
# - rang is not None
# - points is not None (and > 0)
# - date is not None
return (
rang is not None and rang == rank
)
# First, filter out players without valid classement data
def has_valid_name(license_data, firstname, lastname):
lk_firstname = license_data.get('prenom', '')
lk_lastname = license_data.get('nom', '')
if not lk_firstname and not lk_lastname:
return False
return (
lk_firstname == firstname and lk_lastname == lastname
)
# Filter license results to only include those with valid classement
valid_license_results = [
license_data for license_data in license_results
if has_valid_name(license_data, firstname, lastname)
if has_valid_classement(license_data, rank)
]
# If no valid results after filtering, return None
if not valid_license_results:
return None, {
"reason": "no_valid_classement",
"original_count": len(license_results),
"filtered_count": 0
}
# If only one valid result, return it
if len(valid_license_results) == 1:
return valid_license_results[0], {
"reason": "single_valid_result",
"original_count": len(license_results),
"filtered_count": 1,
"age_match": "n/a"
}
# If we don't have ageSportif from ranking, take the first match
# If we don't have ageSportif from ranking, take the first valid match
if player_age_sportif is None:
return license_results[0], {"reason": "no_age_data", "used_first_result": True}
return valid_license_results[0], {
"reason": "no_age_data_used_first_valid",
"original_count": len(license_results),
"filtered_count": len(valid_license_results),
"used_first_result": True
}
best_match = None
best_age_diff = float('inf')
match_details = []
best_match_count = 0
for i, license_data in enumerate(license_results):
for i, license_data in enumerate(valid_license_results):
birth_date_fr = license_data.get('dateNaissanceFr')
calculated_age = calculate_age_from_birth_date(birth_date_fr)
@ -70,36 +140,53 @@ def find_best_license_match(license_results, player_age_sportif):
"dateNaissanceFr": birth_date_fr,
"calculated_age": calculated_age,
"player_age_sportif": player_age_sportif,
"age_difference": None
"age_difference": None,
"license": license_data.get('licence'),
"classement": license_data.get('classement', {})
}
if calculated_age is not None:
age_diff = abs(calculated_age - player_age_sportif)
match_detail["age_difference"] = age_diff
if age_diff < best_age_diff:
if age_diff < best_age_diff and best_age_diff > 1 and age_diff < 2:
best_age_diff = age_diff
best_match = license_data
best_match_count = 1
elif age_diff <= best_age_diff:
best_match_count += 1
match_details.append(match_detail)
# If no match found with valid age, use first result
# If no match found with valid age, use first valid result
if best_match is None:
best_match = license_results[0]
match_info = {
"reason": "no_valid_ages",
"reason": "no_valid_ages_used_first_valid",
"original_count": len(license_results),
"filtered_count": len(valid_license_results),
"used_first_result": True,
"match_details": match_details
}
return valid_license_results[0], match_info
else:
if best_match_count == 1:
match_info = {
"reason": "age_matched",
"best_age_difference": best_age_diff,
"total_candidates": len(license_results),
"valid_candidates": len(valid_license_results),
"match_details": match_details
}
return best_match, match_info
else:
match_info = {
"reason": "multiple_matches",
"best_age_difference": best_age_diff,
"total_candidates": len(license_results),
"valid_candidates": len(valid_license_results),
"match_details": match_details
}
return None, match_info
@staff_member_required
def test_player_details_apis(request):
@ -1089,8 +1176,8 @@ def search_player_by_name(request):
if request.method == 'POST':
session_id = request.POST.get('sessionId', '').strip()
id_homologation = request.POST.get('idHomologation', '').strip()
nom = request.POST.get('nom', '').strip()
prenom = request.POST.get('prenom', '').strip()
nom = request.POST.get('nom', '')
prenom = request.POST.get('prenom', '')
if not session_id or not id_homologation:
messages.error(request, "sessionId and idHomologation are required.")
@ -1396,11 +1483,11 @@ def enrich_rankings_with_licenses(request):
return player, False, None
# Keep original case and accents, just clean up any extra whitespace
nom = raw_nom.strip()
prenom = raw_prenom.strip()
nom = raw_nom
prenom = raw_prenom
# Get player's gender
sexe = player.get('sexe', 'H') # Default to 'H' for male
sexe = player.get('sexe', default_sexe) # Default to 'H' for male
# Setup license lookup headers
license_headers = {
@ -1440,11 +1527,12 @@ def enrich_rankings_with_licenses(request):
presence_doublon = license_data['object'].get('presenceDoublon', False)
if liste_joueurs:
# Get player's age from ranking data for duplicate matching
player_age_sportif = player.get('ageSportif')
# Find the best matching license using age comparison
license_info, match_info = find_best_license_match(liste_joueurs, player_age_sportif)
license_info, match_info = find_best_license_match(liste_joueurs, player)
if license_info is None:
player['license_lookup_status'] = 'too_many_results'
player['presenceDoublon'] = presence_doublon
return player, False, f"Failed {nom} {prenom} {player['idCrm']} -> Too many results"
# Add all license data to player
player['licence'] = license_info.get('licence')
@ -1476,7 +1564,7 @@ def enrich_rankings_with_licenses(request):
else:
player['license_lookup_status'] = 'no_results'
player['presenceDoublon'] = presence_doublon
return player, False, f"Failed {nom} {prenom} {player['idCrm']} -> No results"
return player, False, f"Failed {nom} {prenom} {player['idCrm']} {presence_doublon} -> No results"
else:
player['license_lookup_status'] = 'no_data'
return player, False, f"Failed {nom} {prenom} {player['idCrm']} -> No data"
@ -1690,7 +1778,7 @@ def enrich_rankings_with_licenses(request):
<div class="form-group">
<label for="max_workers">Concurrent Workers (1-50):</label>
<input type="number" id="max_workers" name="max_workers" class="form-control" value="10" min="1" max="200">
<input type="number" id="max_workers" name="max_workers" class="form-control" value="100" min="1" max="200">
<small>Higher values = faster processing but more server load. Adjust based on server capacity.</small>
</div>

@ -20,10 +20,12 @@ class Command(BaseCommand):
parser.add_argument('--clubs', type=int, default=10, help='Number of top clubs to display')
parser.add_argument('--leagues', type=int, default=10, help='Number of top leagues to display')
parser.add_argument('--find-anonymous', action='store_true', help='Find and match anonymous players with previous month rankings')
parser.add_argument('--confidence', type=float, default=0.7, help='Confidence threshold for automatic matching (0-1)')
parser.add_argument('--confidence', type=int, default=7, help='Confidence threshold for automatic matching (0-1)')
parser.add_argument('--auto-match', action='store_true', help='Automatically match anonymous players when confidence is high')
parser.add_argument('--output', type=str, help='Save results to output file')
parser.add_argument('--verbose', action='store_true', help='Show detailed matching information')
parser.add_argument('--named-only', action='store_true', help='Process only anonymous players WITH names (missing license)')
parser.add_argument('--unnamed-only', action='store_true', help='Process only anonymous players WITHOUT names')
def handle(self, *args, **options):
# Base directory for rankings files
@ -53,7 +55,7 @@ class Command(BaseCommand):
# Generate statistics
if players:
self.generate_statistics(players, options)
# self.generate_statistics(players, options)
# Find anonymous players if requested
if options['find_anonymous']:
@ -243,10 +245,17 @@ class Command(BaseCommand):
self.stdout.write(self.style.SUCCESS(f'\nAnonymous players: {len(anonymous_players)} ({(len(anonymous_players) / total_players) * 100:.1f}%)'))
def is_anonymous_player(self, player):
"""Check if a player is anonymous (missing name data)"""
# Define criteria for anonymous players - adjust as needed
return (player['name'] == 'N/A' or player['name'] == '' or
player['first_name'] == 'N/A' or player['first_name'] == '')
"""Check if a player is anonymous (missing name data or license)"""
# Player is anonymous if they have no name data
if (player['name'] == 'N/A' or player['name'] == '' or
player['first_name'] == 'N/A' or player['first_name'] == ''):
return True
# Player is also anonymous if they have name but no license
if (player['license'] == 'N/A' or player['license'] == ''):
return True
return False
def player_exists_in_current_month(self, prev_player, current_players_indexes):
"""
@ -276,8 +285,11 @@ class Command(BaseCommand):
self.stdout.write("Building player indexes for fast lookup...")
start_time = datetime.now()
# Players to index (only non-anonymous)
players_to_index = [p for p in current_players if not self.is_anonymous_player(p)]
# Only index players that have BOTH name AND license
players_to_index = [p for p in current_players
if (p['license'] != 'N/A' and p['license'] != '') and
(p['name'] != 'N/A' and p['name'] != '') and
(p['first_name'] != 'N/A' and p['first_name'] != '')]
# Create license index
license_index = {}
@ -376,14 +388,59 @@ class Command(BaseCommand):
matched_count = 0
# Identify anonymous players
anonymous_players = [p for p in current_players if self.is_anonymous_player(p)]
if not anonymous_players:
all_anonymous_players = [p for p in current_players if self.is_anonymous_player(p)]
if not all_anonymous_players:
self.stdout.write(self.style.SUCCESS('No anonymous players found!'))
if return_count:
return 0
return
self.stdout.write(self.style.SUCCESS(f'\nFound {len(anonymous_players)} anonymous players. Looking for matches...'))
# Check for conflicting options
if options['named_only'] and options['unnamed_only']:
self.stderr.write(self.style.ERROR('Cannot use both --named-only and --unnamed-only options together'))
if return_count:
return 0
return
# Sort anonymous players by type
anonymous_players_with_names = []
anonymous_players_without_names = []
for player in all_anonymous_players:
if (player['name'] != 'N/A' and player['name'] != '' and
player['first_name'] != 'N/A' and player['first_name'] != ''):
anonymous_players_with_names.append(player)
else:
anonymous_players_without_names.append(player)
# Select which players to process based on options
if options['named_only']:
anonymous_players = anonymous_players_with_names
processing_type = "named anonymous players (with names but missing license)"
elif options['unnamed_only']:
anonymous_players = anonymous_players_without_names
processing_type = "unnamed anonymous players (missing names)"
else:
# Default behavior: process named players first, then unnamed
anonymous_players = anonymous_players_with_names + anonymous_players_without_names
processing_type = "all anonymous players (named first, then unnamed)"
if not anonymous_players:
if options['named_only']:
self.stdout.write(self.style.SUCCESS('No anonymous players with names found!'))
elif options['unnamed_only']:
self.stdout.write(self.style.SUCCESS('No anonymous players without names found!'))
if return_count:
return 0
return
# Display summary
self.stdout.write(self.style.SUCCESS(f'\nProcessing {processing_type}'))
self.stdout.write(f'Anonymous players breakdown:')
self.stdout.write(f' Total found: {len(all_anonymous_players)}')
self.stdout.write(f' With names: {len(anonymous_players_with_names)}')
self.stdout.write(f' Without names: {len(anonymous_players_without_names)}')
self.stdout.write(f' Selected for processing: {len(anonymous_players)}')
# Find previous month file
prev_month_file = self.find_previous_month_file(current_metadata, rankings_dir)
@ -422,7 +479,19 @@ class Command(BaseCommand):
# Show progress
progress_counter += 1
if progress_counter % progress_interval == 0 or progress_counter == 1:
self.stdout.write(f' Processing anonymous player {progress_counter}/{len(anonymous_players)} ({(progress_counter/len(anonymous_players))*100:.1f}%)')
# Determine which type of player we're processing
if options['named_only']:
player_type = "named"
elif options['unnamed_only']:
player_type = "unnamed"
else:
# Default behavior: check if we're still processing named players
if progress_counter <= len(anonymous_players_with_names):
player_type = "named"
else:
player_type = "unnamed"
self.stdout.write(f' Processing {player_type} anonymous player {progress_counter}/{len(anonymous_players)} ({(progress_counter/len(anonymous_players))*100:.1f}%)')
potential_matches = self.find_potential_matches(anon_player, prev_players, current_players_indexes, options)
@ -442,7 +511,12 @@ class Command(BaseCommand):
progression = f", Progression: {anon_player['progression']}" if anon_player['progression'] != 'N/A' else ""
assimilation = f", Assimilation: {anon_player['assimilation']}" if anon_player['assimilation'] != 'N/A' else ""
self.stdout.write(f"\nAnonymous player: Rank {anon_player['rank']}, League: {anon_player['league']}{progression}{assimilation}")
# Show if this is a named or unnamed anonymous player
if (anon_player['name'] != 'N/A' and anon_player['name'] != '' and
anon_player['first_name'] != 'N/A' and anon_player['first_name'] != ''):
self.stdout.write(f"\nNamed anonymous player: {anon_player['name']} {anon_player['first_name']} - Rank {anon_player['rank']}, League: {anon_player['league']}{progression}{assimilation}")
else:
self.stdout.write(f"\nUnnamed anonymous player: Rank {anon_player['rank']}, League: {anon_player['league']}{progression}{assimilation}")
for i, match in enumerate(potential_matches[:3]): # Show top 3 matches
player = match['player']
@ -456,7 +530,11 @@ class Command(BaseCommand):
high_confidence_matches += 1
else:
if options['verbose']:
self.stdout.write(f"\nNo matches found for anonymous player: Rank {anon_player['rank']}, League: {anon_player['league']}")
if (anon_player['name'] != 'N/A' and anon_player['name'] != '' and
anon_player['first_name'] != 'N/A' and anon_player['first_name'] != ''):
self.stdout.write(f"\nNo matches found for named anonymous player: {anon_player['name']} {anon_player['first_name']} - Rank {anon_player['rank']}, League: {anon_player['league']}")
else:
self.stdout.write(f"\nNo matches found for unnamed anonymous player: Rank {anon_player['rank']}, League: {anon_player['league']}")
# Batch processing status update
if progress_counter % 100 == 0 and progress_counter > 0:
@ -472,7 +550,11 @@ class Command(BaseCommand):
# Summary
self.stdout.write(self.style.SUCCESS(f'\nMatching summary:'))
self.stdout.write(f'Total anonymous players: {len(anonymous_players)}')
self.stdout.write(f'Processing mode: {processing_type}')
self.stdout.write(f'Anonymous players processed: {len(anonymous_players)}')
if not options['named_only'] and not options['unnamed_only']:
self.stdout.write(f' Named: {len(anonymous_players_with_names)}')
self.stdout.write(f' Unnamed: {len(anonymous_players_without_names)}')
self.stdout.write(f'Players with potential matches: {matches_found}')
self.stdout.write(f'High confidence matches (≥{options["confidence"]}): {high_confidence_matches}')
self.stdout.write(f'Skipped players already in current month: {skipped_existing_players}')
@ -484,6 +566,7 @@ class Command(BaseCommand):
# Auto-match players if requested
if options['auto_match'] and matches_found > 0 and file_path:
# Note: We pass the selected anonymous_players for matching
matched_count = self.update_rankings_with_matches(file_path, anonymous_players, results,
options['confidence'], options)
elif options['auto_match'] and file_path is None:
@ -546,32 +629,40 @@ class Command(BaseCommand):
if options['verbose']:
self.stdout.write(f" Target previous rank: {prev_rank_from_progression} (current rank {anon_rank} + progression {prog_value})")
# Show anonymous player details
self.stdout.write("\n" + "="*80)
self.stdout.write(f"Looking for matches for anonymous player at rank {anon_player['rank']}:")
self.stdout.write(f" Points: {anon_player['points']}")
self.stdout.write(f" Assimilation: {anon_player['assimilation']}")
self.stdout.write(f" Tournaments: {anon_player['tournaments_played']}")
self.stdout.write(f" League: {anon_player['league']}")
if anon_player['name'] != 'N/A' and anon_player['first_name'] != 'N/A':
self.stdout.write(f" Name: {anon_player['name']} {anon_player['first_name']}")
self.stdout.write("-"*80)
for prev_player in prev_players:
# Skip anonymous players in previous month
if self.is_anonymous_player(prev_player):
continue
# Check if this player exists in current month with the same license
exists, existing_player = self.player_exists_in_current_month(prev_player, current_players_indexes)
if exists:
# If we found the exact same player (same license), skip them
if existing_player['license'] == prev_player['license']:
skipped_players += 1
continue
# If we found someone with the same name but different license, we can still consider this player
# Initialize match data
match_data = {
'player': prev_player,
'rank_match_type': None,
'rank_diff': None,
'has_league_match': False,
'has_assimilation_match': False,
'points_similarity': 0.0,
'match_reasons': [],
'confidence': 0.0
'confidence': 0
}
# Print candidate details
self.stdout.write(f"\nChecking candidate: {prev_player['name']} {prev_player['first_name']}")
self.stdout.write(f" Rank: {prev_player['rank']}")
self.stdout.write(f" Points: {prev_player['points']}")
self.stdout.write(f" Assimilation: {prev_player['assimilation']}")
self.stdout.write(f" Tournaments: {prev_player['tournaments_played']}")
self.stdout.write(f" League: {prev_player['league']}")
# Start building confidence score
confidence_details = []
# 1. PRIMARY MATCHER: Previous rank match
if prev_rank_from_progression is not None:
try:
@ -583,68 +674,142 @@ class Command(BaseCommand):
if rank_diff == 0:
match_data['rank_match_type'] = 'exact'
match_data['match_reasons'].append(f"exact previous rank match ({prev_rank_value})")
match_data['confidence'] = 0.7
elif rank_diff <= 3:
match_data['rank_match_type'] = 'close'
match_data['match_reasons'].append(f"close previous rank match ({prev_rank_value} vs {prev_rank_from_progression})")
match_data['confidence'] = 0.4
elif rank_diff <= 10:
match_data['rank_match_type'] = 'approximate'
match_data['match_reasons'].append(f"approximate previous rank match ({prev_rank_value} vs {prev_rank_from_progression})")
match_data['confidence'] = 0.2
match_data['confidence'] = 7
# Assimilation match
if anon_player['assimilation'] == prev_player['assimilation']:
match_data['confidence'] += 3
confidence_details.append(f"Assimilation match (+0.3)")
match_data['match_reasons'].append(f"same assimilation ({anon_player['assimilation']})")
# League match
if (anon_player['league'] == prev_player['league'] and
anon_player['league'] != 'N/A' and anon_player['league'] != ''):
match_data['confidence'] += 7
confidence_details.append(f"League match (+0.5)")
match_data['match_reasons'].append(f"same league ({anon_player['league']})")
# Tournament count comparison
try:
anon_tournaments = int(anon_player['tournaments_played'])
prev_tournaments = int(prev_player['tournaments_played'])
tournaments_diff = abs(anon_tournaments - prev_tournaments)
if tournaments_diff == 0:
match_data['confidence'] += 4
confidence_details.append(f"Tournaments unchanged (+0.2)")
match_data['match_reasons'].append(f"same tournaments played ({anon_tournaments})")
else:
# Calculate percentage difference
max_tournaments = max(anon_tournaments, prev_tournaments)
if max_tournaments > 0:
percentage_diff = (tournaments_diff / max_tournaments) * 100
if percentage_diff <= 10:
match_data['confidence'] += 3
confidence_details.append(f"Tournaments within 10% range (+0.15, diff: {percentage_diff:.1f}%)")
match_data['match_reasons'].append(f"tournaments played: prev={prev_tournaments}, current={anon_tournaments}")
elif percentage_diff <= 20:
match_data['confidence'] += 2
confidence_details.append(f"Tournaments within 20% range (+0.1, diff: {percentage_diff:.1f}%)")
match_data['match_reasons'].append(f"tournaments played: prev={prev_tournaments}, current={anon_tournaments}")
else:
confidence_details.append(f"Tournaments too different (diff: {percentage_diff:.1f}%)")
match_data['match_reasons'].append(f"tournaments played: prev={prev_tournaments}, current={anon_tournaments}")
else:
# Handle edge case where both values are 0
match_data['confidence'] += 4
confidence_details.append(f"Both have 0 tournaments (+0.2)")
match_data['match_reasons'].append(f"both have 0 tournaments played")
except ValueError:
pass
confidence_details.append("Could not compare tournaments played")
# 2. Points similarity (new check)
# Points comparison
try:
if anon_player['points'] != 'N/A' and prev_player['points'] != 'N/A':
anon_points = float(anon_player['points'])
prev_points = float(prev_player['points'])
points_diff = abs(anon_points - prev_points)
points_similarity = max(0, 1 - (points_diff / max(anon_points, prev_points)))
match_data['match_reasons'].append(f"points: prev={prev_points}, current={anon_points}, diff={points_diff}")
if points_similarity > 0.9:
match_data['points_similarity'] = points_similarity
match_data['match_reasons'].append(f"similar points ({prev_points} vs {anon_points})")
match_data['confidence'] += 0.2
except ValueError:
pass
if points_diff == 0:
match_data['confidence'] += 4
confidence_details.append(f"Points unchanged (+0.3)")
else:
# Calculate percentage difference
max_points = max(anon_points, prev_points)
if max_points > 0:
percentage_diff = (points_diff / max_points) * 100
if percentage_diff <= 10:
match_data['confidence'] += 3
confidence_details.append(f"Points within 10% range (+0.25, diff: {percentage_diff:.1f}%)")
elif percentage_diff <= 20:
match_data['confidence'] += 2
confidence_details.append(f"Points within 20% range (+0.15, diff: {percentage_diff:.1f}%)")
elif percentage_diff <= 30:
match_data['confidence'] += 1
confidence_details.append(f"Points within 30% range (+0.1, diff: {percentage_diff:.1f}%)")
else:
confidence_details.append(f"Points too different (diff: {percentage_diff:.1f}%)")
# 3. League match
if anon_player['league'] != 'N/A' and prev_player['league'] != 'N/A':
if anon_player['league'] == prev_player['league']:
match_data['has_league_match'] = True
match_data['match_reasons'].append("league match")
match_data['confidence'] += 0.25
except ValueError:
confidence_details.append("Could not compare points")
# 4. Assimilation match
if anon_player['assimilation'] != 'N/A' and prev_player['assimilation'] != 'N/A':
if anon_player['assimilation'] == prev_player['assimilation']:
match_data['has_assimilation_match'] = True
match_data['match_reasons'].append("assimilation match")
match_data['confidence'] += 0.1
elif rank_diff <= 3:
match_data['rank_match_type'] = 'close'
match_data['match_reasons'].append(f"close previous rank match ({prev_rank_value} vs {prev_rank_from_progression})")
match_data['confidence'] = 4
elif rank_diff <= 10:
match_data['rank_match_type'] = 'approximate'
match_data['match_reasons'].append(f"approximate previous rank match ({prev_rank_value} vs {prev_rank_from_progression})")
match_data['confidence'] = 2
except ValueError:
pass
# Only consider matches with minimum confidence
if match_data['confidence'] >= 0.1:
# Name match check
if (anon_player['name'] != 'N/A' and anon_player['name'] != '' and
anon_player['first_name'] != 'N/A' and anon_player['first_name'] != ''):
if (anon_player['name'].lower() == prev_player['name'].lower() and
anon_player['first_name'].lower() == prev_player['first_name'].lower()):
match_data['confidence'] += 25
confidence_details.append(f"Exact name match (+0.4)")
match_data['match_reasons'].append("exact name match")
# Birth year match
if (anon_player['birth_year'] != 'N/A' and anon_player['birth_year'] != '' and
prev_player['birth_year'] != 'N/A' and prev_player['birth_year'] != '' and
anon_player['birth_year'] == prev_player['birth_year']):
match_data['confidence'] += 1
confidence_details.append(f"Birth year match (+0.2)")
match_data['match_reasons'].append(f"same birth year ({anon_player['birth_year']})")
# Only consider matches with reasonable confidence
if match_data['confidence'] >= 10:
# Print confidence calculation details
self.stdout.write("\n Confidence calculation:")
for detail in confidence_details:
self.stdout.write(f" {detail}")
self.stdout.write(f" Total confidence: {match_data['confidence']:.2f}")
match_data['match_reasons'] = ", ".join(match_data['match_reasons'])
potential_matches.append(match_data)
self.stdout.write(" → Considered as potential match")
# else:
# self.stdout.write(" → Rejected (confidence too low)")
# Sort matches with updated criteria
def match_sort_key(match):
rank_score = {
'exact': 1000,
'close': 100,
'approximate': 10,
None: 1
}.get(match['rank_match_type'], 0)
# self.stdout.write("-"*40)
points_score = int(match.get('points_similarity', 0) * 100)
league_value = 2 if match['has_league_match'] else 1
assimilation_value = 2 if match['has_assimilation_match'] else 1
# Sort matches by confidence
potential_matches.sort(key=lambda x: x['confidence'], reverse=True)
return (rank_score, points_score, league_value, assimilation_value, match['confidence'])
# Summary of best matches
if potential_matches:
self.stdout.write("\nTop matches found:")
for i, match in enumerate(potential_matches[:3]): # Show top 3
self.stdout.write(f"\n{i+1}. {match['player']['name']} {match['player']['first_name']}")
self.stdout.write(f" Confidence: {match['confidence']:.2f}")
self.stdout.write(f" Reasons: {match['match_reasons']}")
else:
self.stdout.write("\nNo matches found with sufficient confidence.")
potential_matches.sort(key=match_sort_key, reverse=True)
return potential_matches
def save_results(self, results, output_path):
@ -679,7 +844,7 @@ class Command(BaseCommand):
Args:
file_path: Path to the current month's rankings file
anonymous_players: List of anonymous players
anonymous_players: List of anonymous players (filtered based on command options)
matches: List of match info dictionaries
confidence_threshold: Minimum confidence to apply auto-matching
options: Command options
@ -698,39 +863,42 @@ class Command(BaseCommand):
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
# Create a map of anonymous players by rank for faster lookup
anon_by_rank = {}
for player in anonymous_players:
if player['rank'] != 'N/A':
anon_by_rank[player['rank']] = player
# Create a set of players that should be updated
# Only include players that were in our filtered anonymous_players list AND have high confidence matches
players_to_update = set()
update_info = {}
# Track which players will be updated (use a dictionary to ensure only one update per anonymous player)
players_to_update = {}
for match_info in matches:
anon_player = match_info['anonymous_player']
best_match = match_info['best_match']
rank = anon_player['rank']
if best_match['confidence'] >= confidence_threshold and rank not in players_to_update:
# This match has high enough confidence to auto-apply
# Only add if we haven't already found a match for this rank
players_to_update[rank] = {
# Only update if this player was in our filtered list AND meets confidence threshold
if anon_player in anonymous_players and best_match['confidence'] >= confidence_threshold:
# Create a unique identifier for this player
player_id = f"{anon_player['rank']}_{anon_player['points']}_{anon_player['assimilation']}_{anon_player['tournaments_played']}_{anon_player['league']}"
# Add additional uniqueness based on name status
if (anon_player['name'] != 'N/A' and anon_player['name'] != '' and
anon_player['first_name'] != 'N/A' and anon_player['first_name'] != ''):
player_id += f"_{anon_player['name']}_{anon_player['first_name']}"
players_to_update.add(player_id)
update_info[player_id] = {
'anonymous_player': anon_player,
'match': best_match
}
if not players_to_update:
self.stdout.write("No players met the confidence threshold for auto-matching.")
return 0 # Return 0 because no players were updated
return 0
self.stdout.write(f"Found {len(players_to_update)} players to update.")
# Process the file line by line
updated_count = 0
updated_lines = []
already_updated_ranks = set() # Track which ranks we've already updated
# First, we need to find the data start line
# First, find the data start line
data_start_line = 0
for i, line in enumerate(lines):
if ';RANG;NOM;PRENOM;' in line:
@ -752,84 +920,55 @@ class Command(BaseCommand):
updated_lines.append(line)
continue
# Check if this is an anonymous player line
# Extract player data from the line
rank = values[1].strip() if len(values) > 1 else ''
name = values[2].strip() if len(values) > 2 else ''
first_name = values[3].strip() if len(values) > 3 else ''
# Skip if we've already updated this rank (prevent duplicates)
if rank in already_updated_ranks:
updated_lines.append(line)
continue
# CRITICAL CHECK: Only update if this is actually an anonymous player
# Check if player is anonymous (empty or missing name fields)
is_anonymous = not name or not first_name
if rank in players_to_update and is_anonymous:
# This is an anonymous player line with a match to apply
update_info = players_to_update[rank]
matched_player = update_info['match']['player']
# Log the current values for debugging
self.stdout.write(f"Updating anonymous player at rank {rank}. Current values: Name='{name}', First name='{first_name}'")
# Update this line with matched player info
# Basic information: name and first name
values[2] = matched_player['name'] # Last name
values[3] = matched_player['first_name'] # First name
# Update nationality if available
if matched_player['nationality'] != 'N/A' and len(values) > 4:
values[4] = matched_player['nationality']
# Update license if available
if matched_player['license'] != 'N/A' and len(values) > 5:
values[5] = matched_player['license']
# Additional fields:
# Club code (position 10)
if matched_player['club_code'] != 'N/A' and len(values) > 10:
values[10] = matched_player['club_code']
# Club name (position 11)
if matched_player['club'] != 'N/A' and len(values) > 11:
values[11] = matched_player['club']
# Birth year (position 14)
if matched_player['birth_year'] != 'N/A' and len(values) > 14:
values[14] = matched_player['birth_year']
# Reconstruct the line
updated_line = ';'.join(values) + '\n'
updated_lines.append(updated_line)
license_num = values[5].strip() if len(values) > 5 else ''
points = values[6].strip() if len(values) > 6 else ''
assimilation = values[7].strip() if len(values) > 7 else ''
tournaments = values[8].strip() if len(values) > 8 else ''
league = values[9].strip() if len(values) > 9 else ''
# Create player identifier for this line
line_player_id = f"{rank}_{points}_{assimilation}_{tournaments}_{league}"
# Add name info if present
if name and first_name and name != 'N/A' and first_name != 'N/A':
line_player_id += f"_{name}_{first_name}"
# Check if this player should be updated
if line_player_id in players_to_update:
# This player should be updated
match_info = update_info[line_player_id]
matched_player = match_info['match']['player']
# Update the line with matched player information
# Keep the existing rank and points, but update name and license
new_values = values.copy()
new_values[2] = matched_player['name'] # Name
new_values[3] = matched_player['first_name'] # First name
new_values[4] = matched_player['nationality']
new_values[5] = matched_player['license']
new_values[10] = matched_player['club_code']
new_values[11] = matched_player['club']
new_values[14] = matched_player['birth_year']
new_line = ';'.join(new_values) + '\n'
updated_lines.append(new_line)
updated_count += 1
# Mark this rank as updated to prevent duplicates
already_updated_ranks.add(rank)
self.stdout.write(f"Updated player rank {rank}: {matched_player['name']} {matched_player['first_name']}")
self.stdout.write(f"Updated player: {matched_player['name']} {matched_player['first_name']} (Rank: {rank})")
else:
# Not an anonymous player or no match to apply - keep the line unchanged
# This player should NOT be updated - keep the line exactly as is
updated_lines.append(line)
# If this is a non-anonymous player with a rank that was in our update list,
# log a warning that we skipped it
if rank in players_to_update and not is_anonymous:
self.stdout.write(self.style.WARNING(
f"WARNING: Skipped rank {rank} because it already contains a non-anonymous player: {name} {first_name}"
))
# Write the updated file
with open(file_path, 'w', encoding='utf-8') as f:
f.writelines(updated_lines)
self.stdout.write(self.style.SUCCESS(f"\nUpdated {updated_count} players in the rankings file."))
self.stdout.write(f"Original file backed up to: {backup_path}")
return updated_count # Return the count of updated players
self.stdout.write(self.style.SUCCESS(f"Successfully updated {updated_count} players in {file_path}"))
return updated_count
def iterative_match_anonymous_players(self, file_path, rankings_dir, options):
"""
@ -875,10 +1014,13 @@ class Command(BaseCommand):
anon_file = os.path.join(temp_dir, "anonymous_players.json")
prev_players_file = os.path.join(temp_dir, "prev_month_players.json")
matches_file = os.path.join(temp_dir, "matches.json")
print(os.path.join(temp_dir))
# Extract anonymous players and filter previous month players
self.stdout.write("Creating initial working files...")
filtered_data = self.create_filtered_working_files(current_players, prev_players, anon_file, prev_players_file)
filtered_data = self.create_filtered_working_files(
current_players, prev_players, anon_file, prev_players_file, options
)
anon_count = filtered_data['anon_count']
prev_count = filtered_data['prev_count']
@ -907,13 +1049,25 @@ class Command(BaseCommand):
# Update current players from the main file
current_players, _ = self.parse_rankings_file(file_path)
# Remove matched players from prev_players for next iteration
# Load the matches from the temp file to identify which prev players were used
if os.path.exists(matches_file):
with open(matches_file, 'r', encoding='utf-8') as f:
matches = json.load(f)
# Create a set of licenses that were matched
matched_licenses = set()
for match in matches:
matched_player = match['best_match']['player']
if matched_player['license'] != 'N/A' and matched_player['license']:
matched_licenses.add(matched_player['license'])
# Remove matched players from prev_players
prev_players = [p for p in prev_players if p['license'] not in matched_licenses]
# Update temp files for next iteration
filtered_data = self.create_filtered_working_files(current_players, prev_players, anon_file, prev_players_file)
anon_count = filtered_data['anon_count']
prev_count = filtered_data['prev_count']
filtered_data = self.create_filtered_working_files(current_players, prev_players, anon_file, prev_players_file, options)
self.stdout.write(f"Updated working files: {anon_count} anonymous players and {prev_count} eligible previous month players")
else:
self.stdout.write(self.style.SUCCESS(f"Iteration {iteration} complete: No new matches found"))
changes_made = False
@ -921,7 +1075,7 @@ class Command(BaseCommand):
iteration += 1
# Prevent infinite loops (optional safety check)
if iteration > 10: # Cap at 10 iterations maximum
if iteration > 1: # Cap at 10 iterations maximum
self.stdout.write(self.style.WARNING("Maximum iterations reached (10). Stopping process."))
break
@ -940,7 +1094,7 @@ class Command(BaseCommand):
improvement = ((initial_anonymous_count - final_anonymous_count) / initial_anonymous_count) * 100
self.stdout.write(f"Data completeness improved by {improvement:.1f}%")
def create_filtered_working_files(self, current_players, prev_players, anon_file, prev_players_file):
def create_filtered_working_files(self, current_players, prev_players, anon_file, prev_players_file, options):
"""
Create filtered working files:
1. anonymous_players.json - Contains only anonymous players from current month
@ -950,7 +1104,23 @@ class Command(BaseCommand):
"""
# Extract anonymous players from current month
anonymous_players = [p for p in current_players if self.is_anonymous_player(p)]
all_anonymous_players = [p for p in current_players if self.is_anonymous_player(p)]
# Filter based on named/unnamed options
if options['named_only']:
anonymous_players = [p for p in all_anonymous_players if (
p['name'] != 'N/A' and p['name'] != '' and
p['first_name'] != 'N/A' and p['first_name'] != ''
)]
self.stdout.write(self.style.SUCCESS(f"Filtering to only process named anonymous players ({len(anonymous_players)}/{len(all_anonymous_players)})"))
elif options['unnamed_only']:
anonymous_players = [p for p in all_anonymous_players if (
p['name'] == 'N/A' or p['name'] == '' or
p['first_name'] == 'N/A' or p['first_name'] == ''
)]
self.stdout.write(self.style.SUCCESS(f"Filtering to only process unnamed anonymous players ({len(anonymous_players)}/{len(all_anonymous_players)})"))
else:
anonymous_players = all_anonymous_players
# Create lookup for current non-anonymous players
current_players_lookup = {}
@ -1022,8 +1192,15 @@ class Command(BaseCommand):
for anon_player in anonymous_players:
potential_matches = self.find_potential_matches(anon_player, prev_players, current_players_indexes, options)
if potential_matches:
if len(potential_matches) == 1:
best_match = potential_matches[0] # Highest confidence match
elif len(potential_matches) > 1 and potential_matches[0]['confidence'] - potential_matches[1]['confidence'] > 2:
# print(potential_matches[0]['confidence'], potential_matches[1]['match_reasons'])
best_match = potential_matches[0] # Highest confidence match
else:
# for match in potential_matches:
# print(match['player']['name'], match['confidence'], match['match_reasons'])
continue
# Record the match info
match_info = {
'anonymous_player': anon_player,

Loading…
Cancel
Save