@ -20,10 +20,12 @@ class Command(BaseCommand):
parser . add_argument ( ' --clubs ' , type = int , default = 10 , help = ' Number of top clubs to display ' )
parser . add_argument ( ' --clubs ' , type = int , default = 10 , help = ' Number of top clubs to display ' )
parser . add_argument ( ' --leagues ' , type = int , default = 10 , help = ' Number of top leagues to display ' )
parser . add_argument ( ' --leagues ' , type = int , default = 10 , help = ' Number of top leagues to display ' )
parser . add_argument ( ' --find-anonymous ' , action = ' store_true ' , help = ' Find and match anonymous players with previous month rankings ' )
parser . add_argument ( ' --find-anonymous ' , action = ' store_true ' , help = ' Find and match anonymous players with previous month rankings ' )
parser . add_argument ( ' --confidence ' , type = floa t, default = 0. 7, help = ' Confidence threshold for automatic matching (0-1) ' )
parser . add_argument ( ' --confidence ' , type = in t, default = 7 , help = ' Confidence threshold for automatic matching (0-1) ' )
parser . add_argument ( ' --auto-match ' , action = ' store_true ' , help = ' Automatically match anonymous players when confidence is high ' )
parser . add_argument ( ' --auto-match ' , action = ' store_true ' , help = ' Automatically match anonymous players when confidence is high ' )
parser . add_argument ( ' --output ' , type = str , help = ' Save results to output file ' )
parser . add_argument ( ' --output ' , type = str , help = ' Save results to output file ' )
parser . add_argument ( ' --verbose ' , action = ' store_true ' , help = ' Show detailed matching information ' )
parser . add_argument ( ' --verbose ' , action = ' store_true ' , help = ' Show detailed matching information ' )
parser . add_argument ( ' --named-only ' , action = ' store_true ' , help = ' Process only anonymous players WITH names (missing license) ' )
parser . add_argument ( ' --unnamed-only ' , action = ' store_true ' , help = ' Process only anonymous players WITHOUT names ' )
def handle ( self , * args , * * options ) :
def handle ( self , * args , * * options ) :
# Base directory for rankings files
# Base directory for rankings files
@ -53,7 +55,7 @@ class Command(BaseCommand):
# Generate statistics
# Generate statistics
if players :
if players :
self . generate_statistics ( players , options )
# self.generate_statistics(players, options )
# Find anonymous players if requested
# Find anonymous players if requested
if options [ ' find_anonymous ' ] :
if options [ ' find_anonymous ' ] :
@ -243,10 +245,17 @@ class Command(BaseCommand):
self . stdout . write ( self . style . SUCCESS ( f ' \n Anonymous players: { len ( anonymous_players ) } ( { ( len ( anonymous_players ) / total_players ) * 100 : .1f } %) ' ) )
self . stdout . write ( self . style . SUCCESS ( f ' \n Anonymous players: { len ( anonymous_players ) } ( { ( len ( anonymous_players ) / total_players ) * 100 : .1f } %) ' ) )
def is_anonymous_player ( self , player ) :
def is_anonymous_player ( self , player ) :
""" Check if a player is anonymous (missing name data) """
""" Check if a player is anonymous (missing name data or license) """
# Define criteria for anonymous players - adjust as needed
# Player is anonymous if they have no name data
return ( player [ ' name ' ] == ' N/A ' or player [ ' name ' ] == ' ' or
if ( player [ ' name ' ] == ' N/A ' or player [ ' name ' ] == ' ' or
player [ ' first_name ' ] == ' N/A ' or player [ ' first_name ' ] == ' ' )
player [ ' first_name ' ] == ' N/A ' or player [ ' first_name ' ] == ' ' ) :
return True
# Player is also anonymous if they have name but no license
if ( player [ ' license ' ] == ' N/A ' or player [ ' license ' ] == ' ' ) :
return True
return False
def player_exists_in_current_month ( self , prev_player , current_players_indexes ) :
def player_exists_in_current_month ( self , prev_player , current_players_indexes ) :
"""
"""
@ -276,8 +285,11 @@ class Command(BaseCommand):
self . stdout . write ( " Building player indexes for fast lookup... " )
self . stdout . write ( " Building player indexes for fast lookup... " )
start_time = datetime . now ( )
start_time = datetime . now ( )
# Players to index (only non-anonymous)
# Only index players that have BOTH name AND license
players_to_index = [ p for p in current_players if not self . is_anonymous_player ( p ) ]
players_to_index = [ p for p in current_players
if ( p [ ' license ' ] != ' N/A ' and p [ ' license ' ] != ' ' ) and
( p [ ' name ' ] != ' N/A ' and p [ ' name ' ] != ' ' ) and
( p [ ' first_name ' ] != ' N/A ' and p [ ' first_name ' ] != ' ' ) ]
# Create license index
# Create license index
license_index = { }
license_index = { }
@ -376,14 +388,59 @@ class Command(BaseCommand):
matched_count = 0
matched_count = 0
# Identify anonymous players
# Identify anonymous players
anonymous_players = [ p for p in current_players if self . is_anonymous_player ( p ) ]
all_a nonymous_players = [ p for p in current_players if self . is_anonymous_player ( p ) ]
if not anonymous_players :
if not all_a nonymous_players :
self . stdout . write ( self . style . SUCCESS ( ' No anonymous players found! ' ) )
self . stdout . write ( self . style . SUCCESS ( ' No anonymous players found! ' ) )
if return_count :
if return_count :
return 0
return 0
return
return
self . stdout . write ( self . style . SUCCESS ( f ' \n Found { len ( anonymous_players ) } anonymous players. Looking for matches... ' ) )
# Check for conflicting options
if options [ ' named_only ' ] and options [ ' unnamed_only ' ] :
self . stderr . write ( self . style . ERROR ( ' Cannot use both --named-only and --unnamed-only options together ' ) )
if return_count :
return 0
return
# Sort anonymous players by type
anonymous_players_with_names = [ ]
anonymous_players_without_names = [ ]
for player in all_anonymous_players :
if ( player [ ' name ' ] != ' N/A ' and player [ ' name ' ] != ' ' and
player [ ' first_name ' ] != ' N/A ' and player [ ' first_name ' ] != ' ' ) :
anonymous_players_with_names . append ( player )
else :
anonymous_players_without_names . append ( player )
# Select which players to process based on options
if options [ ' named_only ' ] :
anonymous_players = anonymous_players_with_names
processing_type = " named anonymous players (with names but missing license) "
elif options [ ' unnamed_only ' ] :
anonymous_players = anonymous_players_without_names
processing_type = " unnamed anonymous players (missing names) "
else :
# Default behavior: process named players first, then unnamed
anonymous_players = anonymous_players_with_names + anonymous_players_without_names
processing_type = " all anonymous players (named first, then unnamed) "
if not anonymous_players :
if options [ ' named_only ' ] :
self . stdout . write ( self . style . SUCCESS ( ' No anonymous players with names found! ' ) )
elif options [ ' unnamed_only ' ] :
self . stdout . write ( self . style . SUCCESS ( ' No anonymous players without names found! ' ) )
if return_count :
return 0
return
# Display summary
self . stdout . write ( self . style . SUCCESS ( f ' \n Processing { processing_type } ' ) )
self . stdout . write ( f ' Anonymous players breakdown: ' )
self . stdout . write ( f ' Total found: { len ( all_anonymous_players ) } ' )
self . stdout . write ( f ' With names: { len ( anonymous_players_with_names ) } ' )
self . stdout . write ( f ' Without names: { len ( anonymous_players_without_names ) } ' )
self . stdout . write ( f ' Selected for processing: { len ( anonymous_players ) } ' )
# Find previous month file
# Find previous month file
prev_month_file = self . find_previous_month_file ( current_metadata , rankings_dir )
prev_month_file = self . find_previous_month_file ( current_metadata , rankings_dir )
@ -422,7 +479,19 @@ class Command(BaseCommand):
# Show progress
# Show progress
progress_counter + = 1
progress_counter + = 1
if progress_counter % progress_interval == 0 or progress_counter == 1 :
if progress_counter % progress_interval == 0 or progress_counter == 1 :
self . stdout . write ( f ' Processing anonymous player { progress_counter } / { len ( anonymous_players ) } ( { ( progress_counter / len ( anonymous_players ) ) * 100 : .1f } %) ' )
# Determine which type of player we're processing
if options [ ' named_only ' ] :
player_type = " named "
elif options [ ' unnamed_only ' ] :
player_type = " unnamed "
else :
# Default behavior: check if we're still processing named players
if progress_counter < = len ( anonymous_players_with_names ) :
player_type = " named "
else :
player_type = " unnamed "
self . stdout . write ( f ' Processing { player_type } anonymous player { progress_counter } / { len ( anonymous_players ) } ( { ( progress_counter / len ( anonymous_players ) ) * 100 : .1f } %) ' )
potential_matches = self . find_potential_matches ( anon_player , prev_players , current_players_indexes , options )
potential_matches = self . find_potential_matches ( anon_player , prev_players , current_players_indexes , options )
@ -442,7 +511,12 @@ class Command(BaseCommand):
progression = f " , Progression: { anon_player [ ' progression ' ] } " if anon_player [ ' progression ' ] != ' N/A ' else " "
progression = f " , Progression: { anon_player [ ' progression ' ] } " if anon_player [ ' progression ' ] != ' N/A ' else " "
assimilation = f " , Assimilation: { anon_player [ ' assimilation ' ] } " if anon_player [ ' assimilation ' ] != ' N/A ' else " "
assimilation = f " , Assimilation: { anon_player [ ' assimilation ' ] } " if anon_player [ ' assimilation ' ] != ' N/A ' else " "
self . stdout . write ( f " \n Anonymous player: Rank { anon_player [ ' rank ' ] } , League: { anon_player [ ' league ' ] } { progression } { assimilation } " )
# Show if this is a named or unnamed anonymous player
if ( anon_player [ ' name ' ] != ' N/A ' and anon_player [ ' name ' ] != ' ' and
anon_player [ ' first_name ' ] != ' N/A ' and anon_player [ ' first_name ' ] != ' ' ) :
self . stdout . write ( f " \n Named anonymous player: { anon_player [ ' name ' ] } { anon_player [ ' first_name ' ] } - Rank { anon_player [ ' rank ' ] } , League: { anon_player [ ' league ' ] } { progression } { assimilation } " )
else :
self . stdout . write ( f " \n Unnamed anonymous player: Rank { anon_player [ ' rank ' ] } , League: { anon_player [ ' league ' ] } { progression } { assimilation } " )
for i , match in enumerate ( potential_matches [ : 3 ] ) : # Show top 3 matches
for i , match in enumerate ( potential_matches [ : 3 ] ) : # Show top 3 matches
player = match [ ' player ' ]
player = match [ ' player ' ]
@ -456,7 +530,11 @@ class Command(BaseCommand):
high_confidence_matches + = 1
high_confidence_matches + = 1
else :
else :
if options [ ' verbose ' ] :
if options [ ' verbose ' ] :
self . stdout . write ( f " \n No matches found for anonymous player: Rank { anon_player [ ' rank ' ] } , League: { anon_player [ ' league ' ] } " )
if ( anon_player [ ' name ' ] != ' N/A ' and anon_player [ ' name ' ] != ' ' and
anon_player [ ' first_name ' ] != ' N/A ' and anon_player [ ' first_name ' ] != ' ' ) :
self . stdout . write ( f " \n No matches found for named anonymous player: { anon_player [ ' name ' ] } { anon_player [ ' first_name ' ] } - Rank { anon_player [ ' rank ' ] } , League: { anon_player [ ' league ' ] } " )
else :
self . stdout . write ( f " \n No matches found for unnamed anonymous player: Rank { anon_player [ ' rank ' ] } , League: { anon_player [ ' league ' ] } " )
# Batch processing status update
# Batch processing status update
if progress_counter % 100 == 0 and progress_counter > 0 :
if progress_counter % 100 == 0 and progress_counter > 0 :
@ -472,7 +550,11 @@ class Command(BaseCommand):
# Summary
# Summary
self . stdout . write ( self . style . SUCCESS ( f ' \n Matching summary: ' ) )
self . stdout . write ( self . style . SUCCESS ( f ' \n Matching summary: ' ) )
self . stdout . write ( f ' Total anonymous players: { len ( anonymous_players ) } ' )
self . stdout . write ( f ' Processing mode: { processing_type } ' )
self . stdout . write ( f ' Anonymous players processed: { len ( anonymous_players ) } ' )
if not options [ ' named_only ' ] and not options [ ' unnamed_only ' ] :
self . stdout . write ( f ' Named: { len ( anonymous_players_with_names ) } ' )
self . stdout . write ( f ' Unnamed: { len ( anonymous_players_without_names ) } ' )
self . stdout . write ( f ' Players with potential matches: { matches_found } ' )
self . stdout . write ( f ' Players with potential matches: { matches_found } ' )
self . stdout . write ( f ' High confidence matches (≥ { options [ " confidence " ] } ): { high_confidence_matches } ' )
self . stdout . write ( f ' High confidence matches (≥ { options [ " confidence " ] } ): { high_confidence_matches } ' )
self . stdout . write ( f ' Skipped players already in current month: { skipped_existing_players } ' )
self . stdout . write ( f ' Skipped players already in current month: { skipped_existing_players } ' )
@ -484,6 +566,7 @@ class Command(BaseCommand):
# Auto-match players if requested
# Auto-match players if requested
if options [ ' auto_match ' ] and matches_found > 0 and file_path :
if options [ ' auto_match ' ] and matches_found > 0 and file_path :
# Note: We pass the selected anonymous_players for matching
matched_count = self . update_rankings_with_matches ( file_path , anonymous_players , results ,
matched_count = self . update_rankings_with_matches ( file_path , anonymous_players , results ,
options [ ' confidence ' ] , options )
options [ ' confidence ' ] , options )
elif options [ ' auto_match ' ] and file_path is None :
elif options [ ' auto_match ' ] and file_path is None :
@ -546,32 +629,40 @@ class Command(BaseCommand):
if options [ ' verbose ' ] :
if options [ ' verbose ' ] :
self . stdout . write ( f " Target previous rank: { prev_rank_from_progression } (current rank { anon_rank } + progression { prog_value } ) " )
self . stdout . write ( f " Target previous rank: { prev_rank_from_progression } (current rank { anon_rank } + progression { prog_value } ) " )
# Show anonymous player details
self . stdout . write ( " \n " + " = " * 80 )
self . stdout . write ( f " Looking for matches for anonymous player at rank { anon_player [ ' rank ' ] } : " )
self . stdout . write ( f " Points: { anon_player [ ' points ' ] } " )
self . stdout . write ( f " Assimilation: { anon_player [ ' assimilation ' ] } " )
self . stdout . write ( f " Tournaments: { anon_player [ ' tournaments_played ' ] } " )
self . stdout . write ( f " League: { anon_player [ ' league ' ] } " )
if anon_player [ ' name ' ] != ' N/A ' and anon_player [ ' first_name ' ] != ' N/A ' :
self . stdout . write ( f " Name: { anon_player [ ' name ' ] } { anon_player [ ' first_name ' ] } " )
self . stdout . write ( " - " * 80 )
for prev_player in prev_players :
for prev_player in prev_players :
# Skip anonymous players in previous month
# Skip anonymous players in previous month
if self . is_anonymous_player ( prev_player ) :
if self . is_anonymous_player ( prev_player ) :
continue
continue
# Check if this player exists in current month with the same license
exists , existing_player = self . player_exists_in_current_month ( prev_player , current_players_indexes )
if exists :
# If we found the exact same player (same license), skip them
if existing_player [ ' license ' ] == prev_player [ ' license ' ] :
skipped_players + = 1
continue
# If we found someone with the same name but different license, we can still consider this player
# Initialize match data
# Initialize match data
match_data = {
match_data = {
' player ' : prev_player ,
' player ' : prev_player ,
' rank_match_type ' : None ,
' rank_diff ' : None ,
' has_league_match ' : False ,
' has_assimilation_match ' : False ,
' points_similarity ' : 0.0 ,
' match_reasons ' : [ ] ,
' match_reasons ' : [ ] ,
' confidence ' : 0. 0
' confidence ' : 0
}
}
# Print candidate details
self . stdout . write ( f " \n Checking candidate: { prev_player [ ' name ' ] } { prev_player [ ' first_name ' ] } " )
self . stdout . write ( f " Rank: { prev_player [ ' rank ' ] } " )
self . stdout . write ( f " Points: { prev_player [ ' points ' ] } " )
self . stdout . write ( f " Assimilation: { prev_player [ ' assimilation ' ] } " )
self . stdout . write ( f " Tournaments: { prev_player [ ' tournaments_played ' ] } " )
self . stdout . write ( f " League: { prev_player [ ' league ' ] } " )
# Start building confidence score
confidence_details = [ ]
# 1. PRIMARY MATCHER: Previous rank match
# 1. PRIMARY MATCHER: Previous rank match
if prev_rank_from_progression is not None :
if prev_rank_from_progression is not None :
try :
try :
@ -583,68 +674,142 @@ class Command(BaseCommand):
if rank_diff == 0 :
if rank_diff == 0 :
match_data [ ' rank_match_type ' ] = ' exact '
match_data [ ' rank_match_type ' ] = ' exact '
match_data [ ' match_reasons ' ] . append ( f " exact previous rank match ( { prev_rank_value } ) " )
match_data [ ' match_reasons ' ] . append ( f " exact previous rank match ( { prev_rank_value } ) " )
match_data [ ' confidence ' ] = 0.7
match_data [ ' confidence ' ] = 7
# Assimilation match
if anon_player [ ' assimilation ' ] == prev_player [ ' assimilation ' ] :
match_data [ ' confidence ' ] + = 3
confidence_details . append ( f " Assimilation match (+0.3) " )
match_data [ ' match_reasons ' ] . append ( f " same assimilation ( { anon_player [ ' assimilation ' ] } ) " )
# League match
if ( anon_player [ ' league ' ] == prev_player [ ' league ' ] and
anon_player [ ' league ' ] != ' N/A ' and anon_player [ ' league ' ] != ' ' ) :
match_data [ ' confidence ' ] + = 7
confidence_details . append ( f " League match (+0.5) " )
match_data [ ' match_reasons ' ] . append ( f " same league ( { anon_player [ ' league ' ] } ) " )
# Tournament count comparison
try :
anon_tournaments = int ( anon_player [ ' tournaments_played ' ] )
prev_tournaments = int ( prev_player [ ' tournaments_played ' ] )
tournaments_diff = abs ( anon_tournaments - prev_tournaments )
if tournaments_diff == 0 :
match_data [ ' confidence ' ] + = 4
confidence_details . append ( f " Tournaments unchanged (+0.2) " )
match_data [ ' match_reasons ' ] . append ( f " same tournaments played ( { anon_tournaments } ) " )
else :
# Calculate percentage difference
max_tournaments = max ( anon_tournaments , prev_tournaments )
if max_tournaments > 0 :
percentage_diff = ( tournaments_diff / max_tournaments ) * 100
if percentage_diff < = 10 :
match_data [ ' confidence ' ] + = 3
confidence_details . append ( f " Tournaments within 10% range (+0.15, diff: { percentage_diff : .1f } %) " )
match_data [ ' match_reasons ' ] . append ( f " tournaments played: prev= { prev_tournaments } , current= { anon_tournaments } " )
elif percentage_diff < = 20 :
match_data [ ' confidence ' ] + = 2
confidence_details . append ( f " Tournaments within 20% range (+0.1, diff: { percentage_diff : .1f } %) " )
match_data [ ' match_reasons ' ] . append ( f " tournaments played: prev= { prev_tournaments } , current= { anon_tournaments } " )
else :
confidence_details . append ( f " Tournaments too different (diff: { percentage_diff : .1f } %) " )
match_data [ ' match_reasons ' ] . append ( f " tournaments played: prev= { prev_tournaments } , current= { anon_tournaments } " )
else :
# Handle edge case where both values are 0
match_data [ ' confidence ' ] + = 4
confidence_details . append ( f " Both have 0 tournaments (+0.2) " )
match_data [ ' match_reasons ' ] . append ( f " both have 0 tournaments played " )
except ValueError :
confidence_details . append ( " Could not compare tournaments played " )
# Points comparison
try :
anon_points = float ( anon_player [ ' points ' ] )
prev_points = float ( prev_player [ ' points ' ] )
points_diff = abs ( anon_points - prev_points )
match_data [ ' match_reasons ' ] . append ( f " points: prev= { prev_points } , current= { anon_points } , diff= { points_diff } " )
if points_diff == 0 :
match_data [ ' confidence ' ] + = 4
confidence_details . append ( f " Points unchanged (+0.3) " )
else :
# Calculate percentage difference
max_points = max ( anon_points , prev_points )
if max_points > 0 :
percentage_diff = ( points_diff / max_points ) * 100
if percentage_diff < = 10 :
match_data [ ' confidence ' ] + = 3
confidence_details . append ( f " Points within 10% range (+0.25, diff: { percentage_diff : .1f } %) " )
elif percentage_diff < = 20 :
match_data [ ' confidence ' ] + = 2
confidence_details . append ( f " Points within 20% range (+0.15, diff: { percentage_diff : .1f } %) " )
elif percentage_diff < = 30 :
match_data [ ' confidence ' ] + = 1
confidence_details . append ( f " Points within 30% range (+0.1, diff: { percentage_diff : .1f } %) " )
else :
confidence_details . append ( f " Points too different (diff: { percentage_diff : .1f } %) " )
except ValueError :
confidence_details . append ( " Could not compare points " )
elif rank_diff < = 3 :
elif rank_diff < = 3 :
match_data [ ' rank_match_type ' ] = ' close '
match_data [ ' rank_match_type ' ] = ' close '
match_data [ ' match_reasons ' ] . append ( f " close previous rank match ( { prev_rank_value } vs { prev_rank_from_progression } ) " )
match_data [ ' match_reasons ' ] . append ( f " close previous rank match ( { prev_rank_value } vs { prev_rank_from_progression } ) " )
match_data [ ' confidence ' ] = 0.4
match_data [ ' confidence ' ] = 4
elif rank_diff < = 10 :
elif rank_diff < = 10 :
match_data [ ' rank_match_type ' ] = ' approximate '
match_data [ ' rank_match_type ' ] = ' approximate '
match_data [ ' match_reasons ' ] . append ( f " approximate previous rank match ( { prev_rank_value } vs { prev_rank_from_progression } ) " )
match_data [ ' match_reasons ' ] . append ( f " approximate previous rank match ( { prev_rank_value } vs { prev_rank_from_progression } ) " )
match_data [ ' confidence ' ] = 0.2
match_data [ ' confidence ' ] = 2
except ValueError :
except ValueError :
pass
pass
# 2. Points similarity (new check)
# Name match check
try :
if ( anon_player [ ' name ' ] != ' N/A ' and anon_player [ ' name ' ] != ' ' and
if anon_player [ ' points ' ] != ' N/A ' and prev_player [ ' points ' ] != ' N/A ' :
anon_player [ ' first_name ' ] != ' N/A ' and anon_player [ ' first_name ' ] != ' ' ) :
anon_points = float ( anon_player [ ' points ' ] )
if ( anon_player [ ' name ' ] . lower ( ) == prev_player [ ' name ' ] . lower ( ) and
prev_points = float ( prev_player [ ' points ' ] )
anon_player [ ' first_name ' ] . lower ( ) == prev_player [ ' first_name ' ] . lower ( ) ) :
points_diff = abs ( anon_points - prev_points )
match_data [ ' confidence ' ] + = 25
points_similarity = max ( 0 , 1 - ( points_diff / max ( anon_points , prev_points ) ) )
confidence_details . append ( f " Exact name match (+0.4) " )
match_data [ ' match_reasons ' ] . append ( " exact name match " )
if points_similarity > 0.9 :
match_data [ ' points_similarity ' ] = points_similarity
# Birth year match
match_data [ ' match_reasons ' ] . append ( f " similar points ( { prev_points } vs { anon_points } ) " )
if ( anon_player [ ' birth_year ' ] != ' N/A ' and anon_player [ ' birth_year ' ] != ' ' and
match_data [ ' confidence ' ] + = 0.2
prev_player [ ' birth_year ' ] != ' N/A ' and prev_player [ ' birth_year ' ] != ' ' and
except ValueError :
anon_player [ ' birth_year ' ] == prev_player [ ' birth_year ' ] ) :
pass
match_data [ ' confidence ' ] + = 1
confidence_details . append ( f " Birth year match (+0.2) " )
# 3. League match
match_data [ ' match_reasons ' ] . append ( f " same birth year ( { anon_player [ ' birth_year ' ] } ) " )
if anon_player [ ' league ' ] != ' N/A ' and prev_player [ ' league ' ] != ' N/A ' :
if anon_player [ ' league ' ] == prev_player [ ' league ' ] :
# Only consider matches with reasonable confidence
match_data [ ' has_league_match ' ] = True
if match_data [ ' confidence ' ] > = 10 :
match_data [ ' match_reasons ' ] . append ( " league match " )
# Print confidence calculation details
match_data [ ' confidence ' ] + = 0.25
self . stdout . write ( " \n Confidence calculation: " )
for detail in confidence_details :
# 4. Assimilation match
self . stdout . write ( f " { detail } " )
if anon_player [ ' assimilation ' ] != ' N/A ' and prev_player [ ' assimilation ' ] != ' N/A ' :
self . stdout . write ( f " Total confidence: { match_data [ ' confidence ' ] : .2f } " )
if anon_player [ ' assimilation ' ] == prev_player [ ' assimilation ' ] :
match_data [ ' has_assimilation_match ' ] = True
match_data [ ' match_reasons ' ] . append ( " assimilation match " )
match_data [ ' confidence ' ] + = 0.1
# Only consider matches with minimum confidence
if match_data [ ' confidence ' ] > = 0.1 :
match_data [ ' match_reasons ' ] = " , " . join ( match_data [ ' match_reasons ' ] )
match_data [ ' match_reasons ' ] = " , " . join ( match_data [ ' match_reasons ' ] )
potential_matches . append ( match_data )
potential_matches . append ( match_data )
self . stdout . write ( " → Considered as potential match " )
# else:
# self.stdout.write(" → Rejected (confidence too low)")
# self.stdout.write("-"*40)
# Sort matches by confidence
potential_matches . sort ( key = lambda x : x [ ' confidence ' ] , reverse = True )
# Summary of best matches
if potential_matches :
self . stdout . write ( " \n Top matches found: " )
for i , match in enumerate ( potential_matches [ : 3 ] ) : # Show top 3
self . stdout . write ( f " \n { i + 1 } . { match [ ' player ' ] [ ' name ' ] } { match [ ' player ' ] [ ' first_name ' ] } " )
self . stdout . write ( f " Confidence: { match [ ' confidence ' ] : .2f } " )
self . stdout . write ( f " Reasons: { match [ ' match_reasons ' ] } " )
else :
self . stdout . write ( " \n No matches found with sufficient confidence. " )
# Sort matches with updated criteria
def match_sort_key ( match ) :
rank_score = {
' exact ' : 1000 ,
' close ' : 100 ,
' approximate ' : 10 ,
None : 1
} . get ( match [ ' rank_match_type ' ] , 0 )
points_score = int ( match . get ( ' points_similarity ' , 0 ) * 100 )
league_value = 2 if match [ ' has_league_match ' ] else 1
assimilation_value = 2 if match [ ' has_assimilation_match ' ] else 1
return ( rank_score , points_score , league_value , assimilation_value , match [ ' confidence ' ] )
potential_matches . sort ( key = match_sort_key , reverse = True )
return potential_matches
return potential_matches
def save_results ( self , results , output_path ) :
def save_results ( self , results , output_path ) :
@ -679,7 +844,7 @@ class Command(BaseCommand):
Args :
Args :
file_path : Path to the current month ' s rankings file
file_path : Path to the current month ' s rankings file
anonymous_players : List of anonymous players
anonymous_players : List of anonymous players ( filtered based on command options )
matches : List of match info dictionaries
matches : List of match info dictionaries
confidence_threshold : Minimum confidence to apply auto - matching
confidence_threshold : Minimum confidence to apply auto - matching
options : Command options
options : Command options
@ -698,39 +863,42 @@ class Command(BaseCommand):
with open ( file_path , ' r ' , encoding = ' utf-8 ' ) as f :
with open ( file_path , ' r ' , encoding = ' utf-8 ' ) as f :
lines = f . readlines ( )
lines = f . readlines ( )
# Create a map of anonymous players by rank for faster lookup
# Create a set of players that should be updated
anon_by_rank = { }
# Only include players that were in our filtered anonymous_players list AND have high confidence matches
for player in anonymous_players :
players_to_update = set ( )
if player [ ' rank ' ] != ' N/A ' :
update_info = { }
anon_by_rank [ player [ ' rank ' ] ] = player
# Track which players will be updated (use a dictionary to ensure only one update per anonymous player)
players_to_update = { }
for match_info in matches :
for match_info in matches :
anon_player = match_info [ ' anonymous_player ' ]
anon_player = match_info [ ' anonymous_player ' ]
best_match = match_info [ ' best_match ' ]
best_match = match_info [ ' best_match ' ]
rank = anon_player [ ' rank ' ]
if best_match [ ' confidence ' ] > = confidence_threshold and rank not in players_to_update :
# Only update if this player was in our filtered list AND meets confidence threshold
# This match has high enough confidence to auto-apply
if anon_player in anonymous_players and best_match [ ' confidence ' ] > = confidence_threshold :
# Only add if we haven't already found a match for this rank
# Create a unique identifier for this player
players_to_update [ rank ] = {
player_id = f " { anon_player [ ' rank ' ] } _ { anon_player [ ' points ' ] } _ { anon_player [ ' assimilation ' ] } _ { anon_player [ ' tournaments_played ' ] } _ { anon_player [ ' league ' ] } "
# Add additional uniqueness based on name status
if ( anon_player [ ' name ' ] != ' N/A ' and anon_player [ ' name ' ] != ' ' and
anon_player [ ' first_name ' ] != ' N/A ' and anon_player [ ' first_name ' ] != ' ' ) :
player_id + = f " _ { anon_player [ ' name ' ] } _ { anon_player [ ' first_name ' ] } "
players_to_update . add ( player_id )
update_info [ player_id ] = {
' anonymous_player ' : anon_player ,
' anonymous_player ' : anon_player ,
' match ' : best_match
' match ' : best_match
}
}
if not players_to_update :
if not players_to_update :
self . stdout . write ( " No players met the confidence threshold for auto-matching. " )
self . stdout . write ( " No players met the confidence threshold for auto-matching. " )
return 0 # Return 0 because no players were updated
return 0
self . stdout . write ( f " Found { len ( players_to_update ) } players to update. " )
self . stdout . write ( f " Found { len ( players_to_update ) } players to update. " )
# Process the file line by line
# Process the file line by line
updated_count = 0
updated_count = 0
updated_lines = [ ]
updated_lines = [ ]
already_updated_ranks = set ( ) # Track which ranks we've already updated
# First, we need to find the data start line
# First, find the data start line
data_start_line = 0
data_start_line = 0
for i , line in enumerate ( lines ) :
for i , line in enumerate ( lines ) :
if ' ;RANG;NOM;PRENOM; ' in line :
if ' ;RANG;NOM;PRENOM; ' in line :
@ -752,84 +920,55 @@ class Command(BaseCommand):
updated_lines . append ( line )
updated_lines . append ( line )
continue
continue
# Check if this is an anonymous player line
# Extract player data from the line
rank = values [ 1 ] . strip ( ) if len ( values ) > 1 else ' '
rank = values [ 1 ] . strip ( ) if len ( values ) > 1 else ' '
name = values [ 2 ] . strip ( ) if len ( values ) > 2 else ' '
name = values [ 2 ] . strip ( ) if len ( values ) > 2 else ' '
first_name = values [ 3 ] . strip ( ) if len ( values ) > 3 else ' '
first_name = values [ 3 ] . strip ( ) if len ( values ) > 3 else ' '
license_num = values [ 5 ] . strip ( ) if len ( values ) > 5 else ' '
# Skip if we've already updated this rank (prevent duplicates)
points = values [ 6 ] . strip ( ) if len ( values ) > 6 else ' '
if rank in already_updated_ranks :
assimilation = values [ 7 ] . strip ( ) if len ( values ) > 7 else ' '
updated_lines . append ( line )
tournaments = values [ 8 ] . strip ( ) if len ( values ) > 8 else ' '
continue
league = values [ 9 ] . strip ( ) if len ( values ) > 9 else ' '
# CRITICAL CHECK: Only update if this is actually an anonymous player
# Create player identifier for this line
# Check if player is anonymous (empty or missing name fields)
line_player_id = f " { rank } _ { points } _ { assimilation } _ { tournaments } _ { league } "
is_anonymous = not name or not first_name
# Add name info if present
if rank in players_to_update and is_anonymous :
if name and first_name and name != ' N/A ' and first_name != ' N/A ' :
# This is an anonymous player line with a match to apply
line_player_id + = f " _ { name } _ { first_name } "
update_info = players_to_update [ rank ]
matched_player = update_info [ ' match ' ] [ ' player ' ]
# Check if this player should be updated
if line_player_id in players_to_update :
# Log the current values for debugging
# This player should be updated
self . stdout . write ( f " Updating anonymous player at rank { rank } . Current values: Name= ' { name } ' , First name= ' { first_name } ' " )
match_info = update_info [ line_player_id ]
matched_player = match_info [ ' match ' ] [ ' player ' ]
# Update this line with matched player info
# Update the line with matched player information
# Basic information: name and first name
# Keep the existing rank and points, but update name and license
values [ 2 ] = matched_player [ ' name ' ] # Last name
new_values = values . copy ( )
values [ 3 ] = matched_player [ ' first_name ' ] # First name
new_values [ 2 ] = matched_player [ ' name ' ] # Name
new_values [ 3 ] = matched_player [ ' first_name ' ] # First name
# Update nationality if available
new_values [ 4 ] = matched_player [ ' nationality ' ]
if matched_player [ ' nationality ' ] != ' N/A ' and len ( values ) > 4 :
new_values [ 5 ] = matched_player [ ' license ' ]
values [ 4 ] = matched_player [ ' nationality ' ]
new_values [ 10 ] = matched_player [ ' club_code ' ]
new_values [ 11 ] = matched_player [ ' club ' ]
# Update license if available
new_values [ 14 ] = matched_player [ ' birth_year ' ]
if matched_player [ ' license ' ] != ' N/A ' and len ( values ) > 5 :
values [ 5 ] = matched_player [ ' license ' ]
new_line = ' ; ' . join ( new_values ) + ' \n '
updated_lines . append ( new_line )
# Additional fields:
# Club code (position 10)
if matched_player [ ' club_code ' ] != ' N/A ' and len ( values ) > 10 :
values [ 10 ] = matched_player [ ' club_code ' ]
# Club name (position 11)
if matched_player [ ' club ' ] != ' N/A ' and len ( values ) > 11 :
values [ 11 ] = matched_player [ ' club ' ]
# Birth year (position 14)
if matched_player [ ' birth_year ' ] != ' N/A ' and len ( values ) > 14 :
values [ 14 ] = matched_player [ ' birth_year ' ]
# Reconstruct the line
updated_line = ' ; ' . join ( values ) + ' \n '
updated_lines . append ( updated_line )
updated_count + = 1
updated_count + = 1
# Mark this rank as updated to prevent duplicates
self . stdout . write ( f " Updated player: { matched_player [ ' name ' ] } { matched_player [ ' first_name ' ] } (Rank: { rank } ) " )
already_updated_ranks . add ( rank )
self . stdout . write ( f " Updated player rank { rank } : { matched_player [ ' name ' ] } { matched_player [ ' first_name ' ] } " )
else :
else :
# Not an anonymous player or no match to apply - keep the line unchanged
# This player should NOT be updated - keep the line exactly as is
updated_lines . append ( line )
updated_lines . append ( line )
# If this is a non-anonymous player with a rank that was in our update list,
# log a warning that we skipped it
if rank in players_to_update and not is_anonymous :
self . stdout . write ( self . style . WARNING (
f " WARNING: Skipped rank { rank } because it already contains a non-anonymous player: { name } { first_name } "
) )
# Write the updated file
# Write the updated file
with open ( file_path , ' w ' , encoding = ' utf-8 ' ) as f :
with open ( file_path , ' w ' , encoding = ' utf-8 ' ) as f :
f . writelines ( updated_lines )
f . writelines ( updated_lines )
self . stdout . write ( self . style . SUCCESS ( f " \n Updated { updated_count } players in the rankings file. " ) )
self . stdout . write ( self . style . SUCCESS ( f " Successfully updated { updated_count } players in { file_path } " ) )
self . stdout . write ( f " Original file backed up to: { backup_path } " )
return updated_count
return updated_count # Return the count of updated players
def iterative_match_anonymous_players ( self , file_path , rankings_dir , options ) :
def iterative_match_anonymous_players ( self , file_path , rankings_dir , options ) :
"""
"""
@ -875,10 +1014,13 @@ class Command(BaseCommand):
anon_file = os . path . join ( temp_dir , " anonymous_players.json " )
anon_file = os . path . join ( temp_dir , " anonymous_players.json " )
prev_players_file = os . path . join ( temp_dir , " prev_month_players.json " )
prev_players_file = os . path . join ( temp_dir , " prev_month_players.json " )
matches_file = os . path . join ( temp_dir , " matches.json " )
matches_file = os . path . join ( temp_dir , " matches.json " )
print ( os . path . join ( temp_dir ) )
# Extract anonymous players and filter previous month players
# Extract anonymous players and filter previous month players
self . stdout . write ( " Creating initial working files... " )
self . stdout . write ( " Creating initial working files... " )
filtered_data = self . create_filtered_working_files ( current_players , prev_players , anon_file , prev_players_file )
filtered_data = self . create_filtered_working_files (
current_players , prev_players , anon_file , prev_players_file , options
)
anon_count = filtered_data [ ' anon_count ' ]
anon_count = filtered_data [ ' anon_count ' ]
prev_count = filtered_data [ ' prev_count ' ]
prev_count = filtered_data [ ' prev_count ' ]
@ -907,13 +1049,25 @@ class Command(BaseCommand):
# Update current players from the main file
# Update current players from the main file
current_players , _ = self . parse_rankings_file ( file_path )
current_players , _ = self . parse_rankings_file ( file_path )
# Remove matched players from prev_players for next iteration
# Load the matches from the temp file to identify which prev players were used
if os . path . exists ( matches_file ) :
with open ( matches_file , ' r ' , encoding = ' utf-8 ' ) as f :
matches = json . load ( f )
# Create a set of licenses that were matched
matched_licenses = set ( )
for match in matches :
matched_player = match [ ' best_match ' ] [ ' player ' ]
if matched_player [ ' license ' ] != ' N/A ' and matched_player [ ' license ' ] :
matched_licenses . add ( matched_player [ ' license ' ] )
# Remove matched players from prev_players
prev_players = [ p for p in prev_players if p [ ' license ' ] not in matched_licenses ]
# Update temp files for next iteration
# Update temp files for next iteration
filtered_data = self . create_filtered_working_files ( current_players , prev_players , anon_file , prev_players_file )
filtered_data = self . create_filtered_working_files ( current_players , prev_players , anon_file , prev_players_file , options )
anon_count = filtered_data [ ' anon_count ' ]
prev_count = filtered_data [ ' prev_count ' ]
self . stdout . write ( f " Updated working files: { anon_count } anonymous players and { prev_count } eligible previous month players " )
else :
self . stdout . write ( self . style . SUCCESS ( f " Iteration { iteration } complete: No new matches found " ) )
self . stdout . write ( self . style . SUCCESS ( f " Iteration { iteration } complete: No new matches found " ) )
changes_made = False
changes_made = False
@ -921,7 +1075,7 @@ class Command(BaseCommand):
iteration + = 1
iteration + = 1
# Prevent infinite loops (optional safety check)
# Prevent infinite loops (optional safety check)
if iteration > 10 : # Cap at 10 iterations maximum
if iteration > 1 : # Cap at 10 iterations maximum
self . stdout . write ( self . style . WARNING ( " Maximum iterations reached (10). Stopping process. " ) )
self . stdout . write ( self . style . WARNING ( " Maximum iterations reached (10). Stopping process. " ) )
break
break
@ -940,7 +1094,7 @@ class Command(BaseCommand):
improvement = ( ( initial_anonymous_count - final_anonymous_count ) / initial_anonymous_count ) * 100
improvement = ( ( initial_anonymous_count - final_anonymous_count ) / initial_anonymous_count ) * 100
self . stdout . write ( f " Data completeness improved by { improvement : .1f } % " )
self . stdout . write ( f " Data completeness improved by { improvement : .1f } % " )
def create_filtered_working_files ( self , current_players , prev_players , anon_file , prev_players_file ) :
def create_filtered_working_files ( self , current_players , prev_players , anon_file , prev_players_file , options ) :
"""
"""
Create filtered working files :
Create filtered working files :
1. anonymous_players . json - Contains only anonymous players from current month
1. anonymous_players . json - Contains only anonymous players from current month
@ -950,7 +1104,23 @@ class Command(BaseCommand):
"""
"""
# Extract anonymous players from current month
# Extract anonymous players from current month
anonymous_players = [ p for p in current_players if self . is_anonymous_player ( p ) ]
all_anonymous_players = [ p for p in current_players if self . is_anonymous_player ( p ) ]
# Filter based on named/unnamed options
if options [ ' named_only ' ] :
anonymous_players = [ p for p in all_anonymous_players if (
p [ ' name ' ] != ' N/A ' and p [ ' name ' ] != ' ' and
p [ ' first_name ' ] != ' N/A ' and p [ ' first_name ' ] != ' '
) ]
self . stdout . write ( self . style . SUCCESS ( f " Filtering to only process named anonymous players ( { len ( anonymous_players ) } / { len ( all_anonymous_players ) } ) " ) )
elif options [ ' unnamed_only ' ] :
anonymous_players = [ p for p in all_anonymous_players if (
p [ ' name ' ] == ' N/A ' or p [ ' name ' ] == ' ' or
p [ ' first_name ' ] == ' N/A ' or p [ ' first_name ' ] == ' '
) ]
self . stdout . write ( self . style . SUCCESS ( f " Filtering to only process unnamed anonymous players ( { len ( anonymous_players ) } / { len ( all_anonymous_players ) } ) " ) )
else :
anonymous_players = all_anonymous_players
# Create lookup for current non-anonymous players
# Create lookup for current non-anonymous players
current_players_lookup = { }
current_players_lookup = { }
@ -1022,8 +1192,15 @@ class Command(BaseCommand):
for anon_player in anonymous_players :
for anon_player in anonymous_players :
potential_matches = self . find_potential_matches ( anon_player , prev_players , current_players_indexes , options )
potential_matches = self . find_potential_matches ( anon_player , prev_players , current_players_indexes , options )
if potential_matches :
if potential_matches :
best_match = potential_matches [ 0 ] # Highest confidence match
if len ( potential_matches ) == 1 :
best_match = potential_matches [ 0 ] # Highest confidence match
elif len ( potential_matches ) > 1 and potential_matches [ 0 ] [ ' confidence ' ] - potential_matches [ 1 ] [ ' confidence ' ] > 2 :
# print(potential_matches[0]['confidence'], potential_matches[1]['match_reasons'])
best_match = potential_matches [ 0 ] # Highest confidence match
else :
# for match in potential_matches:
# print(match['player']['name'], match['confidence'], match['match_reasons'])
continue
# Record the match info
# Record the match info
match_info = {
match_info = {
' anonymous_player ' : anon_player ,
' anonymous_player ' : anon_player ,