Source code for mchartanalyzer.ultimateguitarstrategy
import urllib.parse
import re
import requests
from bs4 import BeautifulSoup
from .scrapestrategy import ScrapeStrategy
[docs]class UltimateGuitarStrategy(ScrapeStrategy):
def __init__(self):
self.siteDomain = "ultimate-guitar.com"
self.siteDomainRoot = "http://ultimate-guitar.com"
def _formatArtistName(self, artistName):
formattedName = artistName.lower()
formattedName = formattedName.replace(" ", "_")
return formattedName
def getSourceName(self):
return "Ultimate Guitar"
def getArtistUrl(self, artistName):
formattedName = self._formatArtistName(artistName)
return "https://www.ultimate-guitar.com/tabs/" + formattedName + "_chords_tabs.htm"
[docs] def getSongUrls(self, artistUrl):
"""
Gets the song URLs from an artist chord chart page.
If there are multiple pages available, this method will call itself for the next available page.
"""
resp = requests.get(artistUrl)
# print("(" + str(resp.status_code) + ") " + artistUrl)
pageContent = resp.content
soup = BeautifulSoup(pageContent, "html.parser")
songUrls = []
for urlTag in soup.select("td a"):
hrefContent = urlTag["href"]
if hrefContent.find("crd") >= 0 and hrefContent.find("album_crd") is -1:
songUrls.append(hrefContent)
for urlTag in soup.select("td a.ys"):
navLinkText = urlTag.get_text()
navLinkUrl = urlTag["href"]
navLinkUrlAbs = urllib.parse.urljoin(self.siteDomainRoot, navLinkUrl)
if navLinkText.lower().find("next") >= 0:
songUrls.extend(self.getSongUrls(navLinkUrlAbs))
return songUrls
[docs] def getSongUrlsForArtist(self, artistName):
"""
Gets the chart URLs for a given artist.
"""
artistUrl = self.getArtistUrl(artistName)
songUrls = self.getSongUrls(artistUrl)
return songUrls
def getSongTitle(self, bSoup):
titleTag = bSoup.select(".t_header .t_title h1")[0]
rawTitle = titleTag.get_text()
rePattern = re.compile(r"[ ]*chords", re.IGNORECASE)
return rePattern.sub("", rawTitle)