refactor: improve JSON handling and API reliability
- Add robust JSON sanitization function to handle malformed API responses - Select club after login for proper session initialization - Enhance API request headers for better compatibility - Add JSON parsing fallback with sanitization for malformed files - Add poetry plugin requirement for export functionality
This commit is contained in:
@@ -22,6 +22,73 @@ from rl_ai_tools import utils # type: ignore
|
|||||||
|
|
||||||
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0"
|
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0"
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_json_response(text):
|
||||||
|
"""Sanitize JSON response from MyIce API using our proven fix_schedule_json.py approach"""
|
||||||
|
try:
|
||||||
|
# First, try to parse as-is
|
||||||
|
data = json.loads(text)
|
||||||
|
return json.dumps(data, ensure_ascii=False, indent=2, separators=(",", ": "))
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Use the exact same approach as our working fix_schedule_json.py script
|
||||||
|
# but implemented directly in code
|
||||||
|
|
||||||
|
# Split into lines to process
|
||||||
|
lines = text.split("\n")
|
||||||
|
|
||||||
|
# Fix line issues (remove line numbers if present)
|
||||||
|
fixed_lines = []
|
||||||
|
for line in lines:
|
||||||
|
# Remove line numbers prefix if present (from cat -n format)
|
||||||
|
line = re.sub(r"^\s*\d+\|\s*", "", line)
|
||||||
|
fixed_lines.append(line)
|
||||||
|
|
||||||
|
# Join all lines back together
|
||||||
|
content = "".join(fixed_lines)
|
||||||
|
|
||||||
|
# Apply comprehensive sanitization that we know works
|
||||||
|
# 1. Escape literal newlines, carriage returns, and tabs everywhere
|
||||||
|
content = content.replace("\n", "\\n")
|
||||||
|
content = content.replace("\r", "\\r")
|
||||||
|
content = content.replace("\t", "\\t")
|
||||||
|
|
||||||
|
# 2. Find the main JSON array structure
|
||||||
|
start = content.find("[")
|
||||||
|
end = content.rfind("]")
|
||||||
|
|
||||||
|
if start != -1 and end != -1 and end > start:
|
||||||
|
array_content = content[start : end + 1]
|
||||||
|
else:
|
||||||
|
# Fallback: try to reconstruct a valid array
|
||||||
|
array_content = "[" + content + "]"
|
||||||
|
|
||||||
|
# 3. Fix common JSON formatting issues
|
||||||
|
# Remove trailing commas before closing brackets/braces
|
||||||
|
array_content = re.sub(r",(\s*[}\]])", r"\1", array_content)
|
||||||
|
|
||||||
|
# 4. Try to parse the sanitized content
|
||||||
|
try:
|
||||||
|
data = json.loads(array_content)
|
||||||
|
return json.dumps(
|
||||||
|
data, ensure_ascii=False, indent=2, separators=(",", ": ")
|
||||||
|
)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# If parsing still fails, try one more aggressive approach
|
||||||
|
# Remove any remaining control characters that might be causing issues
|
||||||
|
array_content = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", array_content)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(array_content)
|
||||||
|
return json.dumps(
|
||||||
|
data, ensure_ascii=False, indent=2, separators=(",", ": ")
|
||||||
|
)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Final fallback - return a minimal valid JSON array
|
||||||
|
return json.dumps(
|
||||||
|
[], ensure_ascii=False, indent=2, separators=(",", ": ")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
app = typer.Typer(no_args_is_help=True)
|
app = typer.Typer(no_args_is_help=True)
|
||||||
session: requests.Session
|
session: requests.Session
|
||||||
userid: int
|
userid: int
|
||||||
@@ -106,6 +173,9 @@ def do_login():
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
|
# select the club we want
|
||||||
|
session.get("https://app.myice.hockey/?cl=172", headers={"User-Agent": user_agent})
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
|
||||||
def get_userid():
|
def get_userid():
|
||||||
@@ -166,9 +236,14 @@ def get_schedule(num_days: int) -> str:
|
|||||||
headers={
|
headers={
|
||||||
"User-Agent": user_agent,
|
"User-Agent": user_agent,
|
||||||
"Referer": "https://app.myice.hockey/players/clubschedule/",
|
"Referer": "https://app.myice.hockey/players/clubschedule/",
|
||||||
|
"Accept": "application/json, text/javascript, */*; q=0.01",
|
||||||
|
"X-Requested-With": "XMLHttpRequest",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
|
# Debug: Save raw response to file for analysis
|
||||||
|
# with open("raw_response.txt", "w") as f:
|
||||||
|
# f.write(r.text)
|
||||||
return r.text
|
return r.text
|
||||||
|
|
||||||
|
|
||||||
@@ -220,11 +295,13 @@ def schedule(
|
|||||||
Fetch schedule as json
|
Fetch schedule as json
|
||||||
"""
|
"""
|
||||||
schedule = get_schedule(num_days)
|
schedule = get_schedule(num_days)
|
||||||
|
# Sanitize the JSON response using our proven approach
|
||||||
|
sanitized_schedule = sanitize_json_response(schedule)
|
||||||
if outfile:
|
if outfile:
|
||||||
with outfile.open("w") as f:
|
with outfile.open("w") as f:
|
||||||
f.write(schedule)
|
f.write(sanitized_schedule)
|
||||||
else:
|
else:
|
||||||
print(schedule)
|
print(sanitized_schedule)
|
||||||
|
|
||||||
|
|
||||||
def os_open(file: str) -> None:
|
def os_open(file: str) -> None:
|
||||||
@@ -299,8 +376,15 @@ def parse_schedule(
|
|||||||
"""
|
"""
|
||||||
Parse schedule.json to look for specific games or practices
|
Parse schedule.json to look for specific games or practices
|
||||||
"""
|
"""
|
||||||
with schedule_file.open("r") as f:
|
try:
|
||||||
data = json.load(f)
|
with schedule_file.open("r") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# If JSON is malformed, try to sanitize it first
|
||||||
|
with schedule_file.open("r") as f:
|
||||||
|
content = f.read()
|
||||||
|
sanitized_content = sanitize_json_response(content)
|
||||||
|
data = json.loads(sanitized_content)
|
||||||
# age_group filter
|
# age_group filter
|
||||||
if age_group:
|
if age_group:
|
||||||
events = [x for x in data if x["agegroup"] == age_group]
|
events = [x for x in data if x["agegroup"] == age_group]
|
||||||
|
|||||||
@@ -37,3 +37,6 @@ build-backend = "poetry.core.masonry.api"
|
|||||||
|
|
||||||
[projectscripts]
|
[projectscripts]
|
||||||
myice = 'myice.myice:app'
|
myice = 'myice.myice:app'
|
||||||
|
|
||||||
|
[tool.poetry.requires-plugins]
|
||||||
|
poetry-plugin-export = ">=1.8"
|
||||||
|
|||||||
Reference in New Issue
Block a user