feat: Add Ruff linting, pre-commit hooks, and build scripts
- Configure Ruff for linting and formatting with pre-commit hooks - Add Makefile with convenient commands for development workflow - Create build and upload scripts for Gitea package registry - Update README with documentation for new features - Fix code quality issues identified by Ruff - Add development dependencies (ruff, pre-commit) to pyproject.toml - Update Python version requirement to >=3.9 - Add template for Gitea PyPI configuration - Bump version to 0.3.0 - All tests passing and code properly formatted
This commit is contained in:
@@ -3,83 +3,95 @@
|
||||
Module to fetch song lyrics from paroles.net
|
||||
"""
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import argparse
|
||||
import re
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def get_song_lyrics(artist, song_title):
|
||||
"""
|
||||
Fetch song lyrics from paroles.net
|
||||
|
||||
|
||||
Args:
|
||||
artist (str): Name of the artist
|
||||
song_title (str): Title of the song
|
||||
|
||||
|
||||
Returns:
|
||||
str: Song lyrics or error message
|
||||
"""
|
||||
# Format the URL
|
||||
# Convert artist and song to lowercase and replace spaces with hyphens
|
||||
formatted_artist = artist.lower().replace(' ', '-').replace('$', 's').replace('&', 'and')
|
||||
formatted_song = song_title.lower().replace(' ', '-').replace('\'', '').replace('"', '')
|
||||
|
||||
formatted_artist = (
|
||||
artist.lower().replace(" ", "-").replace("$", "s").replace("&", "and")
|
||||
)
|
||||
formatted_song = (
|
||||
song_title.lower().replace(" ", "-").replace("'", "").replace('"', "")
|
||||
)
|
||||
|
||||
url = f"https://www.paroles.net/{formatted_artist}/paroles-{formatted_song}"
|
||||
|
||||
|
||||
try:
|
||||
# Set headers to mimic a browser request
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
# Send GET request
|
||||
response = requests.get(url, headers=headers)
|
||||
response.raise_for_status() # Raise exception for bad status codes
|
||||
|
||||
|
||||
# Parse HTML content
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
|
||||
# Find the lyrics container
|
||||
# Looking for the div with class 'song-text'
|
||||
lyrics_div = soup.find('div', class_='song-text')
|
||||
|
||||
lyrics_div = soup.find("div", class_="song-text")
|
||||
|
||||
if not lyrics_div:
|
||||
return "Lyrics not found on the page"
|
||||
|
||||
|
||||
# Extract text content
|
||||
# Get all text from the div but preserve line breaks
|
||||
lyrics_parts = []
|
||||
for element in lyrics_div.descendants:
|
||||
if element.name == 'br':
|
||||
lyrics_parts.append('\n')
|
||||
elif element.string and element.string.strip():
|
||||
# Skip the heading that repeats the song info
|
||||
if 'Paroles de la chanson' not in element.string:
|
||||
lyrics_parts.append(element.string)
|
||||
|
||||
if element.name == "br":
|
||||
lyrics_parts.append("\n")
|
||||
elif (
|
||||
element.string
|
||||
and element.string.strip()
|
||||
and "Paroles de la chanson" not in element.string
|
||||
):
|
||||
lyrics_parts.append(element.string)
|
||||
|
||||
# Join the parts and clean up
|
||||
lyrics = ''.join(lyrics_parts).strip()
|
||||
|
||||
lyrics = "".join(lyrics_parts).strip()
|
||||
|
||||
# Clean up extra whitespace while preserving verse structure
|
||||
lines = lyrics.split('\n')
|
||||
lines = lyrics.split("\n")
|
||||
cleaned_lines = []
|
||||
for line in lines:
|
||||
stripped_line = line.strip()
|
||||
# Skip empty lines and ad content
|
||||
if stripped_line and not re.match(r'^(Content_\d+|.*Advertisement.*|\d+\s*)$', stripped_line):
|
||||
if stripped_line and not re.match(
|
||||
r"^(Content_\d+|.*Advertisement.*|\d+\s*)$", stripped_line
|
||||
):
|
||||
# Also remove inline ad markers
|
||||
cleaned_line = re.sub(r'^Content_\d+\s*', '', stripped_line)
|
||||
cleaned_line = re.sub(r"^Content_\d+\s*", "", stripped_line)
|
||||
if cleaned_line: # Only add non-empty lines
|
||||
cleaned_lines.append(cleaned_line)
|
||||
|
||||
lyrics = '\n'.join(cleaned_lines).strip()
|
||||
|
||||
|
||||
lyrics = "\n".join(cleaned_lines).strip()
|
||||
|
||||
if not lyrics:
|
||||
return "Could not extract lyrics from the page"
|
||||
|
||||
|
||||
return lyrics
|
||||
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
return f"Error fetching lyrics: {str(e)}"
|
||||
except Exception as e:
|
||||
@@ -89,48 +101,65 @@ def get_song_lyrics(artist, song_title):
|
||||
def search_song(artist, song_title):
|
||||
"""
|
||||
Search for a song on paroles.net and return the first result
|
||||
|
||||
|
||||
Args:
|
||||
artist (str): Name of the artist
|
||||
song_title (str): Title of the song
|
||||
|
||||
|
||||
Returns:
|
||||
str: URL of the first search result or error message
|
||||
"""
|
||||
# Format search URL
|
||||
search_query = f"{artist} {song_title}"
|
||||
search_url = f"https://www.paroles.net/recherche?q={requests.utils.quote(search_query)}"
|
||||
|
||||
search_url = (
|
||||
f"https://www.paroles.net/recherche?q={requests.utils.quote(search_query)}"
|
||||
)
|
||||
|
||||
try:
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
response = requests.get(search_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
|
||||
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
|
||||
# Find the first search result link
|
||||
first_result = soup.find('a', href=lambda x: x and '/paroles-' in x)
|
||||
|
||||
first_result = soup.find("a", href=lambda x: x and "/paroles-" in x)
|
||||
|
||||
if first_result:
|
||||
return f"https://www.paroles.net{first_result['href']}"
|
||||
else:
|
||||
return "No search results found"
|
||||
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
return f"Error searching for song: {str(e)}"
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Fetch song lyrics from paroles.net')
|
||||
parser.add_argument('query', help='Artist and song in format "ARTIST - SONG TITLE" or separate artist and song arguments')
|
||||
parser.add_argument('song', nargs='?', help='Song title (optional if using ARTIST - SONG format)')
|
||||
parser.add_argument('--search', action='store_true', help='Use search functionality instead of direct URL construction')
|
||||
|
||||
parser = argparse.ArgumentParser(description="Fetch song lyrics from paroles.net")
|
||||
parser.add_argument(
|
||||
"query",
|
||||
help=(
|
||||
"Artist and song in format 'ARTIST - SONG TITLE' or "
|
||||
"separate artist and song arguments"
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"song", nargs="?", help="Song title (optional if using ARTIST - SONG format)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--search",
|
||||
action="store_true",
|
||||
help="Use search functionality instead of direct URL construction",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
# Handle both input formats:
|
||||
# 1. Single argument: "ARTIST - SONG TITLE"
|
||||
# 2. Two arguments: ARTIST SONG_TITLE
|
||||
@@ -139,17 +168,19 @@ def main():
|
||||
if " - " in args.query:
|
||||
artist, song = args.query.split(" - ", 1)
|
||||
else:
|
||||
print("Error: Please provide artist and song in format 'ARTIST - SONG TITLE'")
|
||||
print(
|
||||
"Error: Please provide artist and song in format 'ARTIST - SONG TITLE'"
|
||||
)
|
||||
return
|
||||
else:
|
||||
# Two argument format: artist and song provided separately
|
||||
artist = args.query
|
||||
song = args.song
|
||||
|
||||
|
||||
# Strip any leading/trailing whitespace
|
||||
artist = artist.strip()
|
||||
song = song.strip()
|
||||
|
||||
|
||||
if args.search:
|
||||
# First search for the song to get the correct URL
|
||||
search_result = search_song(artist, song)
|
||||
@@ -167,4 +198,4 @@ def main():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user