#!/usr/bin/env python3 """ Module to fetch song lyrics from paroles.net """ import argparse import re import requests from bs4 import BeautifulSoup def get_song_lyrics(artist, song_title): """ Fetch song lyrics from paroles.net Args: artist (str): Name of the artist song_title (str): Title of the song Returns: str: Song lyrics or error message """ # Format the URL # Convert artist and song to lowercase and replace spaces with hyphens formatted_artist = ( artist.lower().replace(" ", "-").replace("$", "s").replace("&", "and") ) formatted_song = ( song_title.lower().replace(" ", "-").replace("'", "").replace('"', "") ) url = f"https://www.paroles.net/{formatted_artist}/paroles-{formatted_song}" try: # Set headers to mimic a browser request headers = { "User-Agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" ) } # Send GET request response = requests.get(url, headers=headers) response.raise_for_status() # Raise exception for bad status codes # Parse HTML content soup = BeautifulSoup(response.content, "html.parser") # Find the lyrics container # Looking for the div with class 'song-text' lyrics_div = soup.find("div", class_="song-text") if not lyrics_div: return "Lyrics not found on the page" # Extract text content # Get all text from the div but preserve line breaks lyrics_parts = [] for element in lyrics_div.descendants: if element.name == "br": lyrics_parts.append("\n") elif ( element.string and element.string.strip() and "Paroles de la chanson" not in element.string ): lyrics_parts.append(element.string) # Join the parts and clean up lyrics = "".join(lyrics_parts).strip() # Clean up extra whitespace while preserving verse structure lines = lyrics.split("\n") cleaned_lines = [] for line in lines: stripped_line = line.strip() # Skip empty lines and ad content if stripped_line and not re.match( r"^(Content_\d+|.*Advertisement.*|\d+\s*)$", stripped_line ): # Also remove inline ad markers cleaned_line = re.sub(r"^Content_\d+\s*", "", stripped_line) if cleaned_line: # Only add non-empty lines cleaned_lines.append(cleaned_line) lyrics = "\n".join(cleaned_lines).strip() if not lyrics: return "Could not extract lyrics from the page" return lyrics except requests.exceptions.RequestException as e: return f"Error fetching lyrics: {str(e)}" except Exception as e: return f"Error parsing lyrics: {str(e)}" def search_song(artist, song_title): """ Search for a song on paroles.net and return the first result Args: artist (str): Name of the artist song_title (str): Title of the song Returns: str: URL of the first search result or error message """ # Format search URL search_query = f"{artist} {song_title}" search_url = ( f"https://www.paroles.net/recherche?q={requests.utils.quote(search_query)}" ) try: headers = { "User-Agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" ) } response = requests.get(search_url, headers=headers) response.raise_for_status() soup = BeautifulSoup(response.content, "html.parser") # Find the first search result link first_result = soup.find("a", href=lambda x: x and "/paroles-" in x) if first_result: return f"https://www.paroles.net{first_result['href']}" else: return "No search results found" except requests.exceptions.RequestException as e: return f"Error searching for song: {str(e)}" def main(): parser = argparse.ArgumentParser(description="Fetch song lyrics from paroles.net") parser.add_argument( "query", help=( "Artist and song in format 'ARTIST - SONG TITLE' or " "separate artist and song arguments" ), ) parser.add_argument( "song", nargs="?", help="Song title (optional if using ARTIST - SONG format)" ) parser.add_argument( "--search", action="store_true", help="Use search functionality instead of direct URL construction", ) args = parser.parse_args() # Handle both input formats: # 1. Single argument: "ARTIST - SONG TITLE" # 2. Two arguments: ARTIST SONG_TITLE if args.song is None: # Single argument format: split on " - " if " - " in args.query: artist, song = args.query.split(" - ", 1) else: print( "Error: Please provide artist and song in format 'ARTIST - SONG TITLE'" ) return else: # Two argument format: artist and song provided separately artist = args.query song = args.song # Strip any leading/trailing whitespace artist = artist.strip() song = song.strip() if args.search: # First search for the song to get the correct URL search_result = search_song(artist, song) if search_result.startswith("http"): # Extract artist and song from the URL print(f"Found song at: {search_result}") # For simplicity, we'll still call get_song_lyrics with original params lyrics = get_song_lyrics(artist, song) print(lyrics) else: print(search_result) # Print error message else: lyrics = get_song_lyrics(artist, song) print(lyrics) if __name__ == "__main__": main()