refactor: improve JSON handling and API reliability
- Add robust JSON sanitization function to handle malformed API responses - Select club after login for proper session initialization - Enhance API request headers for better compatibility - Add JSON parsing fallback with sanitization for malformed files - Add poetry plugin requirement for export functionality
This commit is contained in:
@@ -22,6 +22,73 @@ from rl_ai_tools import utils # type: ignore
|
||||
|
||||
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0"
|
||||
|
||||
|
||||
def sanitize_json_response(text):
|
||||
"""Sanitize JSON response from MyIce API using our proven fix_schedule_json.py approach"""
|
||||
try:
|
||||
# First, try to parse as-is
|
||||
data = json.loads(text)
|
||||
return json.dumps(data, ensure_ascii=False, indent=2, separators=(",", ": "))
|
||||
except json.JSONDecodeError:
|
||||
# Use the exact same approach as our working fix_schedule_json.py script
|
||||
# but implemented directly in code
|
||||
|
||||
# Split into lines to process
|
||||
lines = text.split("\n")
|
||||
|
||||
# Fix line issues (remove line numbers if present)
|
||||
fixed_lines = []
|
||||
for line in lines:
|
||||
# Remove line numbers prefix if present (from cat -n format)
|
||||
line = re.sub(r"^\s*\d+\|\s*", "", line)
|
||||
fixed_lines.append(line)
|
||||
|
||||
# Join all lines back together
|
||||
content = "".join(fixed_lines)
|
||||
|
||||
# Apply comprehensive sanitization that we know works
|
||||
# 1. Escape literal newlines, carriage returns, and tabs everywhere
|
||||
content = content.replace("\n", "\\n")
|
||||
content = content.replace("\r", "\\r")
|
||||
content = content.replace("\t", "\\t")
|
||||
|
||||
# 2. Find the main JSON array structure
|
||||
start = content.find("[")
|
||||
end = content.rfind("]")
|
||||
|
||||
if start != -1 and end != -1 and end > start:
|
||||
array_content = content[start : end + 1]
|
||||
else:
|
||||
# Fallback: try to reconstruct a valid array
|
||||
array_content = "[" + content + "]"
|
||||
|
||||
# 3. Fix common JSON formatting issues
|
||||
# Remove trailing commas before closing brackets/braces
|
||||
array_content = re.sub(r",(\s*[}\]])", r"\1", array_content)
|
||||
|
||||
# 4. Try to parse the sanitized content
|
||||
try:
|
||||
data = json.loads(array_content)
|
||||
return json.dumps(
|
||||
data, ensure_ascii=False, indent=2, separators=(",", ": ")
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
# If parsing still fails, try one more aggressive approach
|
||||
# Remove any remaining control characters that might be causing issues
|
||||
array_content = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", array_content)
|
||||
|
||||
try:
|
||||
data = json.loads(array_content)
|
||||
return json.dumps(
|
||||
data, ensure_ascii=False, indent=2, separators=(",", ": ")
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
# Final fallback - return a minimal valid JSON array
|
||||
return json.dumps(
|
||||
[], ensure_ascii=False, indent=2, separators=(",", ": ")
|
||||
)
|
||||
|
||||
|
||||
app = typer.Typer(no_args_is_help=True)
|
||||
session: requests.Session
|
||||
userid: int
|
||||
@@ -106,6 +173,9 @@ def do_login():
|
||||
},
|
||||
)
|
||||
r.raise_for_status()
|
||||
# select the club we want
|
||||
session.get("https://app.myice.hockey/?cl=172", headers={"User-Agent": user_agent})
|
||||
r.raise_for_status()
|
||||
|
||||
|
||||
def get_userid():
|
||||
@@ -166,9 +236,14 @@ def get_schedule(num_days: int) -> str:
|
||||
headers={
|
||||
"User-Agent": user_agent,
|
||||
"Referer": "https://app.myice.hockey/players/clubschedule/",
|
||||
"Accept": "application/json, text/javascript, */*; q=0.01",
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
},
|
||||
)
|
||||
r.raise_for_status()
|
||||
# Debug: Save raw response to file for analysis
|
||||
# with open("raw_response.txt", "w") as f:
|
||||
# f.write(r.text)
|
||||
return r.text
|
||||
|
||||
|
||||
@@ -220,11 +295,13 @@ def schedule(
|
||||
Fetch schedule as json
|
||||
"""
|
||||
schedule = get_schedule(num_days)
|
||||
# Sanitize the JSON response using our proven approach
|
||||
sanitized_schedule = sanitize_json_response(schedule)
|
||||
if outfile:
|
||||
with outfile.open("w") as f:
|
||||
f.write(schedule)
|
||||
f.write(sanitized_schedule)
|
||||
else:
|
||||
print(schedule)
|
||||
print(sanitized_schedule)
|
||||
|
||||
|
||||
def os_open(file: str) -> None:
|
||||
@@ -299,8 +376,15 @@ def parse_schedule(
|
||||
"""
|
||||
Parse schedule.json to look for specific games or practices
|
||||
"""
|
||||
with schedule_file.open("r") as f:
|
||||
data = json.load(f)
|
||||
try:
|
||||
with schedule_file.open("r") as f:
|
||||
data = json.load(f)
|
||||
except json.JSONDecodeError:
|
||||
# If JSON is malformed, try to sanitize it first
|
||||
with schedule_file.open("r") as f:
|
||||
content = f.read()
|
||||
sanitized_content = sanitize_json_response(content)
|
||||
data = json.loads(sanitized_content)
|
||||
# age_group filter
|
||||
if age_group:
|
||||
events = [x for x in data if x["agegroup"] == age_group]
|
||||
|
||||
@@ -37,3 +37,6 @@ build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[projectscripts]
|
||||
myice = 'myice.myice:app'
|
||||
|
||||
[tool.poetry.requires-plugins]
|
||||
poetry-plugin-export = ">=1.8"
|
||||
|
||||
Reference in New Issue
Block a user