You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
PadelClub/PadelClub/Utils/SwiftParser.swift

220 lines
6.2 KiB

//
// SwiftParser.swift
// Padel Tournament
//
// Created by Razmig Sarkissian on 01/09/2023.
//
import Foundation
import PadelClubData
typealias LineIterator = AsyncLineSequence<URL.AsyncBytes>.AsyncIterator
struct Line: Identifiable {
let id: UUID = UUID()
let date: Date
let source: URL
let lineNumber: Int
let rawValue: String
let data: [String?]
// func updatePlayer(player: Player) {
// player.clubName = data[11]
// player.ligue = data[9]
// player.country = data[4]
// player.license = data[5]
// player.inscriptionRank = rank
// player.currentRank = rank
// player.tournamentCount = Int16(tournamentCount)
// player.points = points ?? 0
// player.codeClub = data[10]
// player.assimilation = assimilation
// }
var points: Double? {
if let pointsValue {
return Double(pointsValue.replacingOccurrences(of: ",", with: "."))
}
return nil
}
var pointsValue: String? {
data[6] ?? nil
}
var tournamentCountValue: String? {
data[8] ?? nil
}
var tournamentCount: Int64 {
if let tournamentCountValue {
return Int64(tournamentCountValue) ?? 0
}
return 0
}
var rankValue: String? {
data[1] ?? nil
}
var rank: Int {
if let rankValue {
return Int(rankValue) ?? 0
}
return 0
}
var assimilation: String? {
data[7]
}
}
struct CSVParser: AsyncSequence, AsyncIteratorProtocol {
typealias Element = Line
let url: URL
private var lineIterator: LineIterator
private let separator: Character
private let quoteCharacter: Character = "\""
private var lineNumber = 0
private let date: Date
let maleData: Bool
init(url: URL, separator: Character = ";") {
self.date = url.dateFromPath
self.url = url
self.separator = separator
self.lineIterator = url.lines.makeAsyncIterator()
self.maleData = url.path().contains(SourceFile.messieurs.rawValue)
}
mutating func last() async throws -> Line? {
var lastString: String?
var currentString: String?
var isOver: Bool = false
while isOver == false {
lastString = currentString
currentString = try await lineIterator.next()
if currentString == nil {
isOver = true
}
}
if let lastString {
return Line(
date: date,
source: url,
lineNumber: lineNumber,
rawValue: lastString,
data: split(line: lastString))
}
return nil
}
mutating func next() async throws -> Line? {
if let string = try await lineIterator.next() {
defer { lineNumber += 1 }
return Line(
date: date,
source: url,
lineNumber: lineNumber,
rawValue: string,
data: split(line: string)
)
}
return nil
}
func makeAsyncIterator() -> CSVParser {
return self
}
private func split(line: String) -> [String?] {
var data = [String?]()
var inQuote = false
var currentString = ""
for character in line {
switch character {
case quoteCharacter:
inQuote = !inQuote
continue
case separator:
if !inQuote {
data.append(currentString.isEmpty ? nil : currentString)
currentString = ""
continue
}
default:
break
}
currentString.append(character)
}
data.append(currentString.isEmpty ? nil : currentString)
return data
}
/// Splits the CSV file into multiple temporary CSV files, each containing `size` lines.
/// Returns an array of new `CSVParser` instances pointing to these chunked files.
func getChunkedParser(size: Int) async throws -> [CSVParser] {
var chunkedParsers: [CSVParser] = []
var currentChunk: [String] = []
var iterator = self.makeAsyncIterator()
var chunkIndex = 0
while let line = try await iterator.next()?.rawValue {
currentChunk.append(line)
// When the chunk reaches the desired size, write it to a file
if currentChunk.count == size {
let chunkURL = try writeChunkToFile(chunk: currentChunk, index: chunkIndex)
chunkedParsers.append(CSVParser(url: chunkURL, separator: self.separator))
chunkIndex += 1
currentChunk.removeAll()
}
}
// Handle remaining lines (if any)
if !currentChunk.isEmpty {
let chunkURL = try writeChunkToFile(chunk: currentChunk, index: chunkIndex)
chunkedParsers.append(CSVParser(url: chunkURL, separator: self.separator))
}
return chunkedParsers
}
/// Writes a chunk of CSV lines to a temporary file and returns its URL.
private func writeChunkToFile(chunk: [String], index: Int) throws -> URL {
let tempDirectory = FileManager.default.temporaryDirectory
let chunkURL = tempDirectory.appendingPathComponent("\(url.lastPathComponent)-\(index).csv")
let chunkData = chunk.joined(separator: "\n")
try chunkData.write(to: chunkURL, atomically: true, encoding: .utf8)
return chunkURL
}
}
/// Process all large CSV files concurrently and gather all mini CSVs.
func chunkAllSources(sources: [CSVParser], size: Int) async throws -> [CSVParser] {
var allChunks: [CSVParser] = []
await withTaskGroup(of: [CSVParser].self) { group in
for source in sources {
group.addTask {
return (try? await source.getChunkedParser(size: size)) ?? []
}
}
for await miniCSVs in group {
allChunks.append(contentsOf: miniCSVs)
}
}
return allChunks
}