- Python package to fetch song lyrics from paroles.net - Web scraping functionality with requests and BeautifulSoup4 - Command-line interface for easy usage - Comprehensive test suite with pytest - GitLab CI configuration with uv support - Package metadata and dependencies in pyproject.toml - Documentation and usage instructions
99 lines
3.4 KiB
Python
99 lines
3.4 KiB
Python
"""
|
|
Test suite for paroles_net_scraper package
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import pytest
|
|
from unittest.mock import patch, Mock
|
|
|
|
# Add the parent directory to the path so we can import the scraper
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
|
|
|
# Import from the package
|
|
from paroles_net_scraper import get_song_lyrics
|
|
|
|
|
|
def test_get_song_lyrics_success():
|
|
"""Test successful lyrics retrieval with mocked response"""
|
|
# Mock HTML response with lyrics
|
|
mock_html = """
|
|
<html>
|
|
<body>
|
|
<div class="song-text">
|
|
<h2>Paroles de la chanson Test Song par Test Artist</h2>
|
|
<div>
|
|
This is the first line of the song<br>
|
|
This is the second line of the song<br>
|
|
This is the third line of the song<br>
|
|
</div>
|
|
</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Mock response object
|
|
mock_response = Mock()
|
|
mock_response.content = mock_html
|
|
mock_response.raise_for_status.return_value = None
|
|
|
|
# Mock BeautifulSoup parsing
|
|
with patch('paroles_net_scraper.paroles_net_scraper.requests.get', return_value=mock_response):
|
|
lyrics = get_song_lyrics("Test Artist", "Test Song")
|
|
assert "This is the first line of the song" in lyrics
|
|
assert "This is the second line of the song" in lyrics
|
|
assert "This is the third line of the song" in lyrics
|
|
# Check that the heading is not included
|
|
assert "Paroles de la chanson" not in lyrics
|
|
|
|
|
|
def test_get_song_lyrics_not_found():
|
|
"""Test handling of song not found"""
|
|
# Mock HTML response without lyrics div
|
|
mock_html = """
|
|
<html>
|
|
<body>
|
|
<div class="content">
|
|
<p>Song not found</p>
|
|
</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Mock response object
|
|
mock_response = Mock()
|
|
mock_response.content = mock_html
|
|
mock_response.raise_for_status.return_value = None
|
|
|
|
with patch('paroles_net_scraper.paroles_net_scraper.requests.get', return_value=mock_response):
|
|
lyrics = get_song_lyrics("Non Existent", "Non Existent Song")
|
|
assert lyrics == "Lyrics not found on the page"
|
|
|
|
|
|
def test_get_song_lyrics_request_exception():
|
|
"""Test handling of request exceptions"""
|
|
with patch('paroles_net_scraper.paroles_net_scraper.requests.get', side_effect=Exception("Network error")):
|
|
lyrics = get_song_lyrics("Test Artist", "Test Song")
|
|
assert "Error parsing lyrics" in lyrics
|
|
|
|
|
|
def test_url_formatting():
|
|
"""Test URL formatting with special characters"""
|
|
# This test will check that the URL is properly formatted
|
|
# We'll test this by checking the requests.get call arguments
|
|
|
|
mock_response = Mock()
|
|
mock_response.content = "<div class='song-text'></div>"
|
|
mock_response.raise_for_status.return_value = None
|
|
|
|
with patch('paroles_net_scraper.paroles_net_scraper.requests.get', return_value=mock_response) as mock_get:
|
|
# Test with artist and song containing spaces and special characters
|
|
get_song_lyrics("Ed Sheeran", "Shape of You")
|
|
expected_url = "https://www.paroles.net/ed-sheeran/paroles-shape-of-you"
|
|
mock_get.assert_called_once_with(expected_url, headers={
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
})
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__]) |