Improve scrapper to download multiple result pages

master
Laurent Morvillier 4 years ago
parent a57c0f13ec
commit 3a8ef77ef6
  1. 36
      TournamentStats.xcodeproj/xcuserdata/laurent.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist
  2. 63
      TournamentStats/scrapper/Downloader.swift

@ -40,9 +40,9 @@
filePath = "TournamentStats/scrapper/Downloader.swift" filePath = "TournamentStats/scrapper/Downloader.swift"
startingColumnNumber = "9223372036854775807" startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807"
startingLineNumber = "188" startingLineNumber = "203"
endingLineNumber = "188" endingLineNumber = "203"
landmarkName = "downloadWSOPResults(tournamentInfo:)" landmarkName = "downloadWSOPResults(url:date:currentTournament:)"
landmarkType = "7"> landmarkType = "7">
</BreakpointContent> </BreakpointContent>
</BreakpointProxy> </BreakpointProxy>
@ -56,9 +56,9 @@
filePath = "TournamentStats/scrapper/Downloader.swift" filePath = "TournamentStats/scrapper/Downloader.swift"
startingColumnNumber = "9223372036854775807" startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807"
startingLineNumber = "161" startingLineNumber = "177"
endingLineNumber = "161" endingLineNumber = "177"
landmarkName = "downloadWSOPResults(tournamentInfo:)" landmarkName = "downloadWSOPResults(url:date:currentTournament:)"
landmarkType = "7"> landmarkType = "7">
</BreakpointContent> </BreakpointContent>
</BreakpointProxy> </BreakpointProxy>
@ -72,9 +72,9 @@
filePath = "TournamentStats/scrapper/Downloader.swift" filePath = "TournamentStats/scrapper/Downloader.swift"
startingColumnNumber = "9223372036854775807" startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807"
startingLineNumber = "137" startingLineNumber = "153"
endingLineNumber = "137" endingLineNumber = "153"
landmarkName = "downloadWSOPResults(tournamentInfo:)" landmarkName = "downloadWSOPResults(url:date:currentTournament:)"
landmarkType = "7"> landmarkType = "7">
</BreakpointContent> </BreakpointContent>
</BreakpointProxy> </BreakpointProxy>
@ -88,9 +88,9 @@
filePath = "TournamentStats/scrapper/Downloader.swift" filePath = "TournamentStats/scrapper/Downloader.swift"
startingColumnNumber = "9223372036854775807" startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807"
startingLineNumber = "130" startingLineNumber = "146"
endingLineNumber = "130" endingLineNumber = "146"
landmarkName = "downloadWSOPResults(tournamentInfo:)" landmarkName = "downloadWSOPResults(url:date:currentTournament:)"
landmarkType = "7"> landmarkType = "7">
</BreakpointContent> </BreakpointContent>
</BreakpointProxy> </BreakpointProxy>
@ -104,9 +104,9 @@
filePath = "TournamentStats/scrapper/Downloader.swift" filePath = "TournamentStats/scrapper/Downloader.swift"
startingColumnNumber = "9223372036854775807" startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807"
startingLineNumber = "138" startingLineNumber = "154"
endingLineNumber = "138" endingLineNumber = "154"
landmarkName = "downloadWSOPResults(tournamentInfo:)" landmarkName = "downloadWSOPResults(url:date:currentTournament:)"
landmarkType = "7"> landmarkType = "7">
</BreakpointContent> </BreakpointContent>
</BreakpointProxy> </BreakpointProxy>
@ -120,9 +120,9 @@
filePath = "TournamentStats/scrapper/Downloader.swift" filePath = "TournamentStats/scrapper/Downloader.swift"
startingColumnNumber = "9223372036854775807" startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807"
startingLineNumber = "153" startingLineNumber = "169"
endingLineNumber = "153" endingLineNumber = "169"
landmarkName = "downloadWSOPResults(tournamentInfo:)" landmarkName = "downloadWSOPResults(url:date:currentTournament:)"
landmarkType = "7"> landmarkType = "7">
</BreakpointContent> </BreakpointContent>
</BreakpointProxy> </BreakpointProxy>

@ -18,13 +18,13 @@ class Downloader {
static func downloadWSOPResults() { static func downloadWSOPResults() {
let infos = [ let infos: [TS] = [
// TS(id: "19728", date: "02/10/2021"), // TS(id: "19728", date: "02/10/2021"),
// TS(id: "19729", date: "03/10/2021"), // TS(id: "19729", date: "03/10/2021"),
// TS(id: "19730", date: "02/10/2021"), // TS(id: "19730", date: "02/10/2021"),
// TS(id: "19732", date: "04/10/2021"), // TS(id: "19732", date: "04/10/2021"),
// TS(id: "19733", date: "05/10/2021"), // TS(id: "19733", date: "05/10/2021"),
// TS(id: "19731", date: "06/10/2021"), TS(id: "19731", date: "06/10/2021"),
// TS(id: "19734", date: "06/10/2021"), // TS(id: "19734", date: "06/10/2021"),
// TS(id: "19926", date: "06/10/2021"), // TS(id: "19926", date: "06/10/2021"),
// TS(id: "19927", date: "06/10/2021"), // TS(id: "19927", date: "06/10/2021"),
@ -40,7 +40,7 @@ class Downloader {
// TS(id: "19745", date: "12/10/2021"), // TS(id: "19745", date: "12/10/2021"),
// TS(id: "19747", date: "12/10/2021"), // TS(id: "19747", date: "12/10/2021"),
// TS(id: "19748", date: "12/10/2021"), // TS(id: "19748", date: "12/10/2021"),
TS(id: "19749", date: "13/10/2021"), // TS(id: "19749", date: "13/10/2021"),
] ]
for info in infos { for info in infos {
@ -65,6 +65,18 @@ class Downloader {
guard let url = URL(string: "https://www.wsop.com/tournaments/results/?aid=2&grid=4865&tid=\(tournamentInfo.id)&rr=5") else { guard let url = URL(string: "https://www.wsop.com/tournaments/results/?aid=2&grid=4865&tid=\(tournamentInfo.id)&rr=5") else {
return return
} }
let realm = try! Realm()
do {
try realm.write {
self.downloadWSOPResults(url: url, date: tournamentInfo.date)
}
} catch {
print("error = \(error)")
}
}
fileprivate static func downloadWSOPResults(url: URL, date: String, currentTournament: Tournament? = nil) {
let realm = try! Realm() let realm = try! Realm()
@ -82,24 +94,26 @@ class Downloader {
if let infos = infos { if let infos = infos {
try realm.write { var tournament: Tournament? = currentTournament
let tournament = realm.create(Tournament.self) if tournament == nil {
let tour = realm.create(Tournament.self)
tournament = tour
if let name = infos.slice(from: "<h1>", to: "</h1>") { if let name = infos.slice(from: "<h1>", to: "</h1>") {
tournament.name = name tour.name = name
tournament.date = Formatter.basicDate.date(from: tournamentInfo.date) ?? Date() tour.date = Formatter.basicDate.date(from: date) ?? Date()
if let strNumber = name.slice(from: "Event #", to: ":"), let number = nf.number(from: strNumber) { if let strNumber = name.slice(from: "Event #", to: ":"), let number = nf.number(from: strNumber) {
tournament.number = number.intValue tour.number = number.intValue
} }
if name.contains("6-Handed") { if name.contains("6-Handed") {
tournament.tableSize = 6 tour.tableSize = 6
} else if name.contains("8-Handed") { } else if name.contains("8-Handed") {
tournament.tableSize = 8 tour.tableSize = 8
} else if name.contains("Heads-Up") { } else if name.contains("Heads-Up") {
tournament.tableSize = 2 tour.tableSize = 2
} }
} else { } else {
@ -109,24 +123,26 @@ class Downloader {
if let stats = string.slice(from: "<ul id=\"eventstats\">", to: "<div id=\"nav-tabs\">") { if let stats = string.slice(from: "<ul id=\"eventstats\">", to: "<div id=\"nav-tabs\">") {
if let strBuyin = stats.slice(from: "Buy-in:</b> &#36;", to: "</li>"), let buyin = nf.number(from: strBuyin) { if let strBuyin = stats.slice(from: "Buy-in:</b> &#36;", to: "</li>"), let buyin = nf.number(from: strBuyin) {
tournament.buyin = buyin.intValue tour.buyin = buyin.intValue
} else { } else {
print("no buyin = \(stats)") print("no buyin = \(stats)")
} }
if let strprizepool = stats.slice(from: "Prizepool:</b> &#36;", to: " </li>"), let pp = nf.number(from: strprizepool) { if let strprizepool = stats.slice(from: "Prizepool:</b> &#36;", to: " </li>"), let pp = nf.number(from: strprizepool) {
tournament.prizepool = pp.doubleValue tour.prizepool = pp.doubleValue
} else { } else {
print("no prizepool = \(stats)") print("no prizepool = \(stats)")
} }
if let strentries = stats.slice(from: "Entries:</b> ", to: " </li>"), let entries = nf.number(from: strentries) { if let strentries = stats.slice(from: "Entries:</b> ", to: " </li>"), let entries = nf.number(from: strentries) {
tournament.entries = entries.intValue tour.entries = entries.intValue
} else { } else {
print("no entries = \(stats)") print("no entries = \(stats)")
} }
} }
}
if let strresultsblock = string.slice(from: "Country</li>", to: "<div id=\"PagingNav\"") { if let strresultsblock = string.slice(from: "Country</li>", to: "<div id=\"PagingNav\"") {
let strresults = strresultsblock.components(separatedBy: "<li class='place") let strresults = strresultsblock.components(separatedBy: "<li class='place")
@ -182,7 +198,6 @@ class Downloader {
if let countryraw if let countryraw
= cells[countryIndex].components(separatedBy: "\'>").last { = cells[countryIndex].components(separatedBy: "\'>").last {
country = String(countryraw.prefix(2)) country = String(countryraw.prefix(2))
// country = countrytmp.prefix(2)
} }
guard let rank = rank, let name = name, let earnings = earnings else { guard let rank = rank, let name = name, let earnings = earnings else {
@ -197,14 +212,28 @@ class Downloader {
result.earnings = earnings result.earnings = earnings
result.rank = rank result.rank = rank
tournament.results.append(result) tournament?.results.append(result)
}
if currentTournament == nil, let paging = string.slice(from: "<div id=\"PagingNav\"", to: "<div class=\"banner\">") {
let pages = paging.components(separatedBy: "<a href='")
if pages.count > 1 {
for i in 2..<pages.count { // don't do page 1 again
let components = pages[i].components(separatedBy: "'")
if let urlString = components.first,
let url = URL(string: "https://www.wsop.com\(urlString)") {
self.downloadWSOPResults(url: url, date: date, currentTournament: tournament)
}
}
}
} }
} else { } else {
print("Bad page strresultsblock") print("Bad page strresultsblock")
} }
}
} else { } else {
print("Missing name block") print("Missing name block")
} }

Loading…
Cancel
Save