Updated the CLI to accept arguments in two formats: 1. Single argument with dash separator: "ARTIST - SONG" 2. Two separate arguments: ARTIST SONG This makes it more convenient for users to input artist and song information. Also updated README to document both usage formats.
170 lines
6.0 KiB
Python
170 lines
6.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Module to fetch song lyrics from paroles.net
|
|
"""
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
import argparse
|
|
import re
|
|
|
|
|
|
def get_song_lyrics(artist, song_title):
|
|
"""
|
|
Fetch song lyrics from paroles.net
|
|
|
|
Args:
|
|
artist (str): Name of the artist
|
|
song_title (str): Title of the song
|
|
|
|
Returns:
|
|
str: Song lyrics or error message
|
|
"""
|
|
# Format the URL
|
|
# Convert artist and song to lowercase and replace spaces with hyphens
|
|
formatted_artist = artist.lower().replace(' ', '-').replace('$', 's').replace('&', 'and')
|
|
formatted_song = song_title.lower().replace(' ', '-').replace('\'', '').replace('"', '')
|
|
|
|
url = f"https://www.paroles.net/{formatted_artist}/paroles-{formatted_song}"
|
|
|
|
try:
|
|
# Set headers to mimic a browser request
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
}
|
|
|
|
# Send GET request
|
|
response = requests.get(url, headers=headers)
|
|
response.raise_for_status() # Raise exception for bad status codes
|
|
|
|
# Parse HTML content
|
|
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
|
# Find the lyrics container
|
|
# Looking for the div with class 'song-text'
|
|
lyrics_div = soup.find('div', class_='song-text')
|
|
|
|
if not lyrics_div:
|
|
return "Lyrics not found on the page"
|
|
|
|
# Extract text content
|
|
# Get all text from the div but preserve line breaks
|
|
lyrics_parts = []
|
|
for element in lyrics_div.descendants:
|
|
if element.name == 'br':
|
|
lyrics_parts.append('\n')
|
|
elif element.string and element.string.strip():
|
|
# Skip the heading that repeats the song info
|
|
if 'Paroles de la chanson' not in element.string:
|
|
lyrics_parts.append(element.string)
|
|
|
|
# Join the parts and clean up
|
|
lyrics = ''.join(lyrics_parts).strip()
|
|
|
|
# Clean up extra whitespace while preserving verse structure
|
|
lines = lyrics.split('\n')
|
|
cleaned_lines = []
|
|
for line in lines:
|
|
stripped_line = line.strip()
|
|
# Skip empty lines and ad content
|
|
if stripped_line and not re.match(r'^(Content_\d+|.*Advertisement.*|\d+\s*)$', stripped_line):
|
|
# Also remove inline ad markers
|
|
cleaned_line = re.sub(r'^Content_\d+\s*', '', stripped_line)
|
|
if cleaned_line: # Only add non-empty lines
|
|
cleaned_lines.append(cleaned_line)
|
|
|
|
lyrics = '\n'.join(cleaned_lines).strip()
|
|
|
|
if not lyrics:
|
|
return "Could not extract lyrics from the page"
|
|
|
|
return lyrics
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
return f"Error fetching lyrics: {str(e)}"
|
|
except Exception as e:
|
|
return f"Error parsing lyrics: {str(e)}"
|
|
|
|
|
|
def search_song(artist, song_title):
|
|
"""
|
|
Search for a song on paroles.net and return the first result
|
|
|
|
Args:
|
|
artist (str): Name of the artist
|
|
song_title (str): Title of the song
|
|
|
|
Returns:
|
|
str: URL of the first search result or error message
|
|
"""
|
|
# Format search URL
|
|
search_query = f"{artist} {song_title}"
|
|
search_url = f"https://www.paroles.net/recherche?q={requests.utils.quote(search_query)}"
|
|
|
|
try:
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
}
|
|
|
|
response = requests.get(search_url, headers=headers)
|
|
response.raise_for_status()
|
|
|
|
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
|
# Find the first search result link
|
|
first_result = soup.find('a', href=lambda x: x and '/paroles-' in x)
|
|
|
|
if first_result:
|
|
return f"https://www.paroles.net{first_result['href']}"
|
|
else:
|
|
return "No search results found"
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
return f"Error searching for song: {str(e)}"
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Fetch song lyrics from paroles.net')
|
|
parser.add_argument('query', help='Artist and song in format "ARTIST - SONG TITLE" or separate artist and song arguments')
|
|
parser.add_argument('song', nargs='?', help='Song title (optional if using ARTIST - SONG format)')
|
|
parser.add_argument('--search', action='store_true', help='Use search functionality instead of direct URL construction')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Handle both input formats:
|
|
# 1. Single argument: "ARTIST - SONG TITLE"
|
|
# 2. Two arguments: ARTIST SONG_TITLE
|
|
if args.song is None:
|
|
# Single argument format: split on " - "
|
|
if " - " in args.query:
|
|
artist, song = args.query.split(" - ", 1)
|
|
else:
|
|
print("Error: Please provide artist and song in format 'ARTIST - SONG TITLE'")
|
|
return
|
|
else:
|
|
# Two argument format: artist and song provided separately
|
|
artist = args.query
|
|
song = args.song
|
|
|
|
# Strip any leading/trailing whitespace
|
|
artist = artist.strip()
|
|
song = song.strip()
|
|
|
|
if args.search:
|
|
# First search for the song to get the correct URL
|
|
search_result = search_song(artist, song)
|
|
if search_result.startswith("http"):
|
|
# Extract artist and song from the URL
|
|
print(f"Found song at: {search_result}")
|
|
# For simplicity, we'll still call get_song_lyrics with original params
|
|
lyrics = get_song_lyrics(artist, song)
|
|
print(lyrics)
|
|
else:
|
|
print(search_result) # Print error message
|
|
else:
|
|
lyrics = get_song_lyrics(artist, song)
|
|
print(lyrics)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |