refactor: improve JSON handling and API reliability

- Add robust JSON sanitization function to handle malformed API responses
- Select club after login for proper session initialization
- Enhance API request headers for better compatibility
- Add JSON parsing fallback with sanitization for malformed files
- Add poetry plugin requirement for export functionality
This commit is contained in:
2025-08-14 12:50:13 +02:00
parent 03d33f2e03
commit 5b5b593a3b
2 changed files with 91 additions and 4 deletions

View File

@@ -22,6 +22,73 @@ from rl_ai_tools import utils # type: ignore
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0"
def sanitize_json_response(text):
"""Sanitize JSON response from MyIce API using our proven fix_schedule_json.py approach"""
try:
# First, try to parse as-is
data = json.loads(text)
return json.dumps(data, ensure_ascii=False, indent=2, separators=(",", ": "))
except json.JSONDecodeError:
# Use the exact same approach as our working fix_schedule_json.py script
# but implemented directly in code
# Split into lines to process
lines = text.split("\n")
# Fix line issues (remove line numbers if present)
fixed_lines = []
for line in lines:
# Remove line numbers prefix if present (from cat -n format)
line = re.sub(r"^\s*\d+\|\s*", "", line)
fixed_lines.append(line)
# Join all lines back together
content = "".join(fixed_lines)
# Apply comprehensive sanitization that we know works
# 1. Escape literal newlines, carriage returns, and tabs everywhere
content = content.replace("\n", "\\n")
content = content.replace("\r", "\\r")
content = content.replace("\t", "\\t")
# 2. Find the main JSON array structure
start = content.find("[")
end = content.rfind("]")
if start != -1 and end != -1 and end > start:
array_content = content[start : end + 1]
else:
# Fallback: try to reconstruct a valid array
array_content = "[" + content + "]"
# 3. Fix common JSON formatting issues
# Remove trailing commas before closing brackets/braces
array_content = re.sub(r",(\s*[}\]])", r"\1", array_content)
# 4. Try to parse the sanitized content
try:
data = json.loads(array_content)
return json.dumps(
data, ensure_ascii=False, indent=2, separators=(",", ": ")
)
except json.JSONDecodeError:
# If parsing still fails, try one more aggressive approach
# Remove any remaining control characters that might be causing issues
array_content = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", array_content)
try:
data = json.loads(array_content)
return json.dumps(
data, ensure_ascii=False, indent=2, separators=(",", ": ")
)
except json.JSONDecodeError:
# Final fallback - return a minimal valid JSON array
return json.dumps(
[], ensure_ascii=False, indent=2, separators=(",", ": ")
)
app = typer.Typer(no_args_is_help=True)
session: requests.Session
userid: int
@@ -106,6 +173,9 @@ def do_login():
},
)
r.raise_for_status()
# select the club we want
session.get("https://app.myice.hockey/?cl=172", headers={"User-Agent": user_agent})
r.raise_for_status()
def get_userid():
@@ -166,9 +236,14 @@ def get_schedule(num_days: int) -> str:
headers={
"User-Agent": user_agent,
"Referer": "https://app.myice.hockey/players/clubschedule/",
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
},
)
r.raise_for_status()
# Debug: Save raw response to file for analysis
# with open("raw_response.txt", "w") as f:
# f.write(r.text)
return r.text
@@ -220,11 +295,13 @@ def schedule(
Fetch schedule as json
"""
schedule = get_schedule(num_days)
# Sanitize the JSON response using our proven approach
sanitized_schedule = sanitize_json_response(schedule)
if outfile:
with outfile.open("w") as f:
f.write(schedule)
f.write(sanitized_schedule)
else:
print(schedule)
print(sanitized_schedule)
def os_open(file: str) -> None:
@@ -299,8 +376,15 @@ def parse_schedule(
"""
Parse schedule.json to look for specific games or practices
"""
with schedule_file.open("r") as f:
data = json.load(f)
try:
with schedule_file.open("r") as f:
data = json.load(f)
except json.JSONDecodeError:
# If JSON is malformed, try to sanitize it first
with schedule_file.open("r") as f:
content = f.read()
sanitized_content = sanitize_json_response(content)
data = json.loads(sanitized_content)
# age_group filter
if age_group:
events = [x for x in data if x["agegroup"] == age_group]

View File

@@ -37,3 +37,6 @@ build-backend = "poetry.core.masonry.api"
[projectscripts]
myice = 'myice.myice:app'
[tool.poetry.requires-plugins]
poetry-plugin-export = ">=1.8"