You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
220 lines
6.2 KiB
220 lines
6.2 KiB
//
|
|
// SwiftParser.swift
|
|
// Padel Tournament
|
|
//
|
|
// Created by Razmig Sarkissian on 01/09/2023.
|
|
//
|
|
|
|
import Foundation
|
|
import PadelClubData
|
|
|
|
typealias LineIterator = AsyncLineSequence<URL.AsyncBytes>.AsyncIterator
|
|
struct Line: Identifiable {
|
|
let id: UUID = UUID()
|
|
let date: Date
|
|
let source: URL
|
|
let lineNumber: Int
|
|
let rawValue: String
|
|
let data: [String?]
|
|
|
|
// func updatePlayer(player: Player) {
|
|
// player.clubName = data[11]
|
|
// player.ligue = data[9]
|
|
// player.country = data[4]
|
|
// player.license = data[5]
|
|
// player.inscriptionRank = rank
|
|
// player.currentRank = rank
|
|
// player.tournamentCount = Int16(tournamentCount)
|
|
// player.points = points ?? 0
|
|
// player.codeClub = data[10]
|
|
// player.assimilation = assimilation
|
|
// }
|
|
|
|
var points: Double? {
|
|
if let pointsValue {
|
|
return Double(pointsValue.replacingOccurrences(of: ",", with: "."))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var pointsValue: String? {
|
|
data[6] ?? nil
|
|
}
|
|
|
|
var tournamentCountValue: String? {
|
|
data[8] ?? nil
|
|
}
|
|
|
|
var tournamentCount: Int64 {
|
|
if let tournamentCountValue {
|
|
return Int64(tournamentCountValue) ?? 0
|
|
}
|
|
return 0
|
|
}
|
|
|
|
var rankValue: String? {
|
|
data[1] ?? nil
|
|
}
|
|
|
|
var rank: Int {
|
|
if let rankValue {
|
|
return Int(rankValue) ?? 0
|
|
}
|
|
return 0
|
|
}
|
|
|
|
var assimilation: String? {
|
|
data[7]
|
|
}
|
|
}
|
|
|
|
struct CSVParser: AsyncSequence, AsyncIteratorProtocol {
|
|
typealias Element = Line
|
|
|
|
let url: URL
|
|
private var lineIterator: LineIterator
|
|
private let separator: Character
|
|
private let quoteCharacter: Character = "\""
|
|
private var lineNumber = 0
|
|
private let date: Date
|
|
let maleData: Bool
|
|
|
|
init(url: URL, separator: Character = ";") {
|
|
self.date = url.dateFromPath
|
|
self.url = url
|
|
self.separator = separator
|
|
self.lineIterator = url.lines.makeAsyncIterator()
|
|
self.maleData = url.path().contains(SourceFile.messieurs.rawValue)
|
|
}
|
|
|
|
mutating func last() async throws -> Line? {
|
|
var lastString: String?
|
|
var currentString: String?
|
|
var isOver: Bool = false
|
|
while isOver == false {
|
|
lastString = currentString
|
|
currentString = try await lineIterator.next()
|
|
if currentString == nil {
|
|
isOver = true
|
|
}
|
|
}
|
|
|
|
if let lastString {
|
|
return Line(
|
|
date: date,
|
|
source: url,
|
|
lineNumber: lineNumber,
|
|
rawValue: lastString,
|
|
data: split(line: lastString))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
mutating func next() async throws -> Line? {
|
|
if let string = try await lineIterator.next() {
|
|
defer { lineNumber += 1 }
|
|
return Line(
|
|
date: date,
|
|
source: url,
|
|
lineNumber: lineNumber,
|
|
rawValue: string,
|
|
data: split(line: string)
|
|
)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func makeAsyncIterator() -> CSVParser {
|
|
return self
|
|
}
|
|
|
|
private func split(line: String) -> [String?] {
|
|
var data = [String?]()
|
|
var inQuote = false
|
|
var currentString = ""
|
|
|
|
for character in line {
|
|
switch character {
|
|
case quoteCharacter:
|
|
inQuote = !inQuote
|
|
continue
|
|
|
|
case separator:
|
|
if !inQuote {
|
|
data.append(currentString.isEmpty ? nil : currentString)
|
|
currentString = ""
|
|
continue
|
|
}
|
|
|
|
default:
|
|
break
|
|
}
|
|
|
|
currentString.append(character)
|
|
}
|
|
|
|
data.append(currentString.isEmpty ? nil : currentString)
|
|
|
|
return data
|
|
}
|
|
|
|
/// Splits the CSV file into multiple temporary CSV files, each containing `size` lines.
|
|
/// Returns an array of new `CSVParser` instances pointing to these chunked files.
|
|
func getChunkedParser(size: Int) async throws -> [CSVParser] {
|
|
var chunkedParsers: [CSVParser] = []
|
|
var currentChunk: [String] = []
|
|
var iterator = self.makeAsyncIterator()
|
|
var chunkIndex = 0
|
|
|
|
while let line = try await iterator.next()?.rawValue {
|
|
currentChunk.append(line)
|
|
|
|
// When the chunk reaches the desired size, write it to a file
|
|
if currentChunk.count == size {
|
|
let chunkURL = try writeChunkToFile(chunk: currentChunk, index: chunkIndex)
|
|
chunkedParsers.append(CSVParser(url: chunkURL, separator: self.separator))
|
|
chunkIndex += 1
|
|
currentChunk.removeAll()
|
|
}
|
|
}
|
|
|
|
// Handle remaining lines (if any)
|
|
if !currentChunk.isEmpty {
|
|
let chunkURL = try writeChunkToFile(chunk: currentChunk, index: chunkIndex)
|
|
chunkedParsers.append(CSVParser(url: chunkURL, separator: self.separator))
|
|
}
|
|
|
|
return chunkedParsers
|
|
}
|
|
|
|
/// Writes a chunk of CSV lines to a temporary file and returns its URL.
|
|
private func writeChunkToFile(chunk: [String], index: Int) throws -> URL {
|
|
let tempDirectory = FileManager.default.temporaryDirectory
|
|
let chunkURL = tempDirectory.appendingPathComponent("\(url.lastPathComponent)-\(index).csv")
|
|
|
|
let chunkData = chunk.joined(separator: "\n")
|
|
try chunkData.write(to: chunkURL, atomically: true, encoding: .utf8)
|
|
|
|
return chunkURL
|
|
}
|
|
}
|
|
|
|
/// Process all large CSV files concurrently and gather all mini CSVs.
|
|
func chunkAllSources(sources: [CSVParser], size: Int) async throws -> [CSVParser] {
|
|
var allChunks: [CSVParser] = []
|
|
|
|
await withTaskGroup(of: [CSVParser].self) { group in
|
|
for source in sources {
|
|
group.addTask {
|
|
return (try? await source.getChunkedParser(size: size)) ?? []
|
|
}
|
|
}
|
|
|
|
for await miniCSVs in group {
|
|
allChunks.append(contentsOf: miniCSVs)
|
|
}
|
|
}
|
|
|
|
return allChunks
|
|
}
|
|
|