From 5b5b593a3b0f30a070e17aef626b62753b522436 Mon Sep 17 00:00:00 2001 From: Rene Luria Date: Thu, 14 Aug 2025 12:50:13 +0200 Subject: [PATCH] refactor: improve JSON handling and API reliability - Add robust JSON sanitization function to handle malformed API responses - Select club after login for proper session initialization - Enhance API request headers for better compatibility - Add JSON parsing fallback with sanitization for malformed files - Add poetry plugin requirement for export functionality --- myice/myice.py | 92 +++++++++++++++++++++++++++++++++++++++++++++++--- pyproject.toml | 3 ++ 2 files changed, 91 insertions(+), 4 deletions(-) diff --git a/myice/myice.py b/myice/myice.py index 7fada78..18acf52 100755 --- a/myice/myice.py +++ b/myice/myice.py @@ -22,6 +22,73 @@ from rl_ai_tools import utils # type: ignore user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0" + +def sanitize_json_response(text): + """Sanitize JSON response from MyIce API using our proven fix_schedule_json.py approach""" + try: + # First, try to parse as-is + data = json.loads(text) + return json.dumps(data, ensure_ascii=False, indent=2, separators=(",", ": ")) + except json.JSONDecodeError: + # Use the exact same approach as our working fix_schedule_json.py script + # but implemented directly in code + + # Split into lines to process + lines = text.split("\n") + + # Fix line issues (remove line numbers if present) + fixed_lines = [] + for line in lines: + # Remove line numbers prefix if present (from cat -n format) + line = re.sub(r"^\s*\d+\|\s*", "", line) + fixed_lines.append(line) + + # Join all lines back together + content = "".join(fixed_lines) + + # Apply comprehensive sanitization that we know works + # 1. Escape literal newlines, carriage returns, and tabs everywhere + content = content.replace("\n", "\\n") + content = content.replace("\r", "\\r") + content = content.replace("\t", "\\t") + + # 2. Find the main JSON array structure + start = content.find("[") + end = content.rfind("]") + + if start != -1 and end != -1 and end > start: + array_content = content[start : end + 1] + else: + # Fallback: try to reconstruct a valid array + array_content = "[" + content + "]" + + # 3. Fix common JSON formatting issues + # Remove trailing commas before closing brackets/braces + array_content = re.sub(r",(\s*[}\]])", r"\1", array_content) + + # 4. Try to parse the sanitized content + try: + data = json.loads(array_content) + return json.dumps( + data, ensure_ascii=False, indent=2, separators=(",", ": ") + ) + except json.JSONDecodeError: + # If parsing still fails, try one more aggressive approach + # Remove any remaining control characters that might be causing issues + array_content = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", array_content) + + try: + data = json.loads(array_content) + return json.dumps( + data, ensure_ascii=False, indent=2, separators=(",", ": ") + ) + except json.JSONDecodeError: + # Final fallback - return a minimal valid JSON array + return json.dumps( + [], ensure_ascii=False, indent=2, separators=(",", ": ") + ) + + app = typer.Typer(no_args_is_help=True) session: requests.Session userid: int @@ -106,6 +173,9 @@ def do_login(): }, ) r.raise_for_status() + # select the club we want + session.get("https://app.myice.hockey/?cl=172", headers={"User-Agent": user_agent}) + r.raise_for_status() def get_userid(): @@ -166,9 +236,14 @@ def get_schedule(num_days: int) -> str: headers={ "User-Agent": user_agent, "Referer": "https://app.myice.hockey/players/clubschedule/", + "Accept": "application/json, text/javascript, */*; q=0.01", + "X-Requested-With": "XMLHttpRequest", }, ) r.raise_for_status() + # Debug: Save raw response to file for analysis + # with open("raw_response.txt", "w") as f: + # f.write(r.text) return r.text @@ -220,11 +295,13 @@ def schedule( Fetch schedule as json """ schedule = get_schedule(num_days) + # Sanitize the JSON response using our proven approach + sanitized_schedule = sanitize_json_response(schedule) if outfile: with outfile.open("w") as f: - f.write(schedule) + f.write(sanitized_schedule) else: - print(schedule) + print(sanitized_schedule) def os_open(file: str) -> None: @@ -299,8 +376,15 @@ def parse_schedule( """ Parse schedule.json to look for specific games or practices """ - with schedule_file.open("r") as f: - data = json.load(f) + try: + with schedule_file.open("r") as f: + data = json.load(f) + except json.JSONDecodeError: + # If JSON is malformed, try to sanitize it first + with schedule_file.open("r") as f: + content = f.read() + sanitized_content = sanitize_json_response(content) + data = json.loads(sanitized_content) # age_group filter if age_group: events = [x for x in data if x["agegroup"] == age_group] diff --git a/pyproject.toml b/pyproject.toml index 80568c1..aefd491 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,3 +37,6 @@ build-backend = "poetry.core.masonry.api" [projectscripts] myice = 'myice.myice:app' + +[tool.poetry.requires-plugins] +poetry-plugin-export = ">=1.8"