Improve scrapper to download multiple result pages

master
Laurent Morvillier 4 years ago
parent a57c0f13ec
commit 3a8ef77ef6
  1. 36
      TournamentStats.xcodeproj/xcuserdata/laurent.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist
  2. 63
      TournamentStats/scrapper/Downloader.swift

@ -40,9 +40,9 @@
filePath = "TournamentStats/scrapper/Downloader.swift"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
startingLineNumber = "188"
endingLineNumber = "188"
landmarkName = "downloadWSOPResults(tournamentInfo:)"
startingLineNumber = "203"
endingLineNumber = "203"
landmarkName = "downloadWSOPResults(url:date:currentTournament:)"
landmarkType = "7">
</BreakpointContent>
</BreakpointProxy>
@ -56,9 +56,9 @@
filePath = "TournamentStats/scrapper/Downloader.swift"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
startingLineNumber = "161"
endingLineNumber = "161"
landmarkName = "downloadWSOPResults(tournamentInfo:)"
startingLineNumber = "177"
endingLineNumber = "177"
landmarkName = "downloadWSOPResults(url:date:currentTournament:)"
landmarkType = "7">
</BreakpointContent>
</BreakpointProxy>
@ -72,9 +72,9 @@
filePath = "TournamentStats/scrapper/Downloader.swift"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
startingLineNumber = "137"
endingLineNumber = "137"
landmarkName = "downloadWSOPResults(tournamentInfo:)"
startingLineNumber = "153"
endingLineNumber = "153"
landmarkName = "downloadWSOPResults(url:date:currentTournament:)"
landmarkType = "7">
</BreakpointContent>
</BreakpointProxy>
@ -88,9 +88,9 @@
filePath = "TournamentStats/scrapper/Downloader.swift"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
startingLineNumber = "130"
endingLineNumber = "130"
landmarkName = "downloadWSOPResults(tournamentInfo:)"
startingLineNumber = "146"
endingLineNumber = "146"
landmarkName = "downloadWSOPResults(url:date:currentTournament:)"
landmarkType = "7">
</BreakpointContent>
</BreakpointProxy>
@ -104,9 +104,9 @@
filePath = "TournamentStats/scrapper/Downloader.swift"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
startingLineNumber = "138"
endingLineNumber = "138"
landmarkName = "downloadWSOPResults(tournamentInfo:)"
startingLineNumber = "154"
endingLineNumber = "154"
landmarkName = "downloadWSOPResults(url:date:currentTournament:)"
landmarkType = "7">
</BreakpointContent>
</BreakpointProxy>
@ -120,9 +120,9 @@
filePath = "TournamentStats/scrapper/Downloader.swift"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
startingLineNumber = "153"
endingLineNumber = "153"
landmarkName = "downloadWSOPResults(tournamentInfo:)"
startingLineNumber = "169"
endingLineNumber = "169"
landmarkName = "downloadWSOPResults(url:date:currentTournament:)"
landmarkType = "7">
</BreakpointContent>
</BreakpointProxy>

@ -18,13 +18,13 @@ class Downloader {
static func downloadWSOPResults() {
let infos = [
let infos: [TS] = [
// TS(id: "19728", date: "02/10/2021"),
// TS(id: "19729", date: "03/10/2021"),
// TS(id: "19730", date: "02/10/2021"),
// TS(id: "19732", date: "04/10/2021"),
// TS(id: "19733", date: "05/10/2021"),
// TS(id: "19731", date: "06/10/2021"),
TS(id: "19731", date: "06/10/2021"),
// TS(id: "19734", date: "06/10/2021"),
// TS(id: "19926", date: "06/10/2021"),
// TS(id: "19927", date: "06/10/2021"),
@ -40,7 +40,7 @@ class Downloader {
// TS(id: "19745", date: "12/10/2021"),
// TS(id: "19747", date: "12/10/2021"),
// TS(id: "19748", date: "12/10/2021"),
TS(id: "19749", date: "13/10/2021"),
// TS(id: "19749", date: "13/10/2021"),
]
for info in infos {
@ -65,6 +65,18 @@ class Downloader {
guard let url = URL(string: "https://www.wsop.com/tournaments/results/?aid=2&grid=4865&tid=\(tournamentInfo.id)&rr=5") else {
return
}
let realm = try! Realm()
do {
try realm.write {
self.downloadWSOPResults(url: url, date: tournamentInfo.date)
}
} catch {
print("error = \(error)")
}
}
fileprivate static func downloadWSOPResults(url: URL, date: String, currentTournament: Tournament? = nil) {
let realm = try! Realm()
@ -82,24 +94,26 @@ class Downloader {
if let infos = infos {
try realm.write {
var tournament: Tournament? = currentTournament
let tournament = realm.create(Tournament.self)
if tournament == nil {
let tour = realm.create(Tournament.self)
tournament = tour
if let name = infos.slice(from: "<h1>", to: "</h1>") {
tournament.name = name
tournament.date = Formatter.basicDate.date(from: tournamentInfo.date) ?? Date()
tour.name = name
tour.date = Formatter.basicDate.date(from: date) ?? Date()
if let strNumber = name.slice(from: "Event #", to: ":"), let number = nf.number(from: strNumber) {
tournament.number = number.intValue
tour.number = number.intValue
}
if name.contains("6-Handed") {
tournament.tableSize = 6
tour.tableSize = 6
} else if name.contains("8-Handed") {
tournament.tableSize = 8
tour.tableSize = 8
} else if name.contains("Heads-Up") {
tournament.tableSize = 2
tour.tableSize = 2
}
} else {
@ -109,24 +123,26 @@ class Downloader {
if let stats = string.slice(from: "<ul id=\"eventstats\">", to: "<div id=\"nav-tabs\">") {
if let strBuyin = stats.slice(from: "Buy-in:</b> &#36;", to: "</li>"), let buyin = nf.number(from: strBuyin) {
tournament.buyin = buyin.intValue
tour.buyin = buyin.intValue
} else {
print("no buyin = \(stats)")
}
if let strprizepool = stats.slice(from: "Prizepool:</b> &#36;", to: " </li>"), let pp = nf.number(from: strprizepool) {
tournament.prizepool = pp.doubleValue
tour.prizepool = pp.doubleValue
} else {
print("no prizepool = \(stats)")
}
if let strentries = stats.slice(from: "Entries:</b> ", to: " </li>"), let entries = nf.number(from: strentries) {
tournament.entries = entries.intValue
tour.entries = entries.intValue
} else {
print("no entries = \(stats)")
}
}
}
if let strresultsblock = string.slice(from: "Country</li>", to: "<div id=\"PagingNav\"") {
let strresults = strresultsblock.components(separatedBy: "<li class='place")
@ -182,7 +198,6 @@ class Downloader {
if let countryraw
= cells[countryIndex].components(separatedBy: "\'>").last {
country = String(countryraw.prefix(2))
// country = countrytmp.prefix(2)
}
guard let rank = rank, let name = name, let earnings = earnings else {
@ -197,14 +212,28 @@ class Downloader {
result.earnings = earnings
result.rank = rank
tournament.results.append(result)
tournament?.results.append(result)
}
if currentTournament == nil, let paging = string.slice(from: "<div id=\"PagingNav\"", to: "<div class=\"banner\">") {
let pages = paging.components(separatedBy: "<a href='")
if pages.count > 1 {
for i in 2..<pages.count { // don't do page 1 again
let components = pages[i].components(separatedBy: "'")
if let urlString = components.first,
let url = URL(string: "https://www.wsop.com\(urlString)") {
self.downloadWSOPResults(url: url, date: date, currentTournament: tournament)
}
}
}
}
} else {
print("Bad page strresultsblock")
}
}
} else {
print("Missing name block")
}

Loading…
Cancel
Save