"""Data loading utilities for charted.
Provides functions to load data from various file formats (CSV, JSON, TSV)
without requiring external dependencies like pandas.
"""
__all__ = ["load_data", "load_csv", "load_json"]
import csv
import json
from pathlib import Path
def load_data(
source: str | Path,
x_col: str | None = None,
y_col: str | None = None,
delimiter: str | None = None,
) -> tuple[list[str], list[float], list[str]]:
"""Load data from a file and return x_data, y_data, and labels.
Auto-detects file format based on extension (.csv, .tsv, .json).
Args:
source: Path to the data file.
x_col: Column name for x-axis data (required for CSV/TSV).
y_col: Column name for y-axis data (required for CSV/TSV).
delimiter: Field delimiter for CSV/TSV (auto-detected if None).
Returns:
Tuple of (x_data, y_data, labels) where:
- x_data: List of x-axis values (strings or numbers)
- y_data: List of y-axis values (floats)
- labels: List of series/label names
Raises:
FileNotFoundError: If the source file doesn't exist.
ValueError: If required columns are missing or data is invalid.
Example:
>>> # CSV with columns: Quarter, Revenue
>>> x, y, labels = load_data("sales.csv", x_col="Quarter", y_col="Revenue")
>>>
>>> # JSON array of numbers
>>> x, y, labels = load_data("data.json")
>>>
>>> # JSON object with data and labels
>>> x, y, labels = load_data("metrics.json")
"""
source = Path(source)
if not source.exists():
raise FileNotFoundError(f"Data file not found: {source}")
suffix = source.suffix.lower()
if suffix in (".csv", ".tsv"):
return _load_csv(source, x_col, y_col, delimiter)
elif suffix == ".json":
return _load_json(source)
else:
raise ValueError(f"Unsupported file format: {suffix}. Use .csv, .tsv, or .json")
def _load_csv(
path: Path,
x_col: str | None,
y_col: str | None,
delimiter: str | None,
) -> tuple[list[str], list[float], list[str]]:
"""Load data from a CSV or TSV file."""
if delimiter is None:
delimiter = "\t" if path.suffix == ".tsv" else ","
if x_col is None or y_col is None:
raise ValueError("x_col and y_col are required for CSV/TSV files")
x_data: list[str] = []
y_data: list[float] = []
labels: list[str] = []
with open(path, "r", newline="", encoding="utf-8") as f:
reader = csv.DictReader(f, delimiter=delimiter)
# Validate columns exist
if reader.fieldnames is None:
raise ValueError(f"Empty or invalid CSV file: {path}")
if x_col not in reader.fieldnames:
raise ValueError(
f"Column '{x_col}' not found in {path}. Available: {reader.fieldnames}"
)
if y_col not in reader.fieldnames:
raise ValueError(
f"Column '{y_col}' not found in {path}. Available: {reader.fieldnames}"
)
for row in reader:
x_data.append(row[x_col])
try:
y_data.append(float(row[y_col]))
except (ValueError, TypeError):
raise ValueError(
f"Invalid numeric value in column '{y_col}': {row[y_col]}"
)
# Use y_col name as series label
labels = [y_col]
return x_data, y_data, labels
def _load_json(path: Path) -> tuple[list[str], list[float], list[str]]:
"""Load data from a JSON file."""
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
# Handle different JSON structures
if isinstance(data, list):
# Simple array of numbers: [1, 2, 3]
if all(isinstance(x, (int, float)) for x in data):
x_data = [str(i) for i in range(len(data))]
y_data = [float(x) for x in data]
labels = [path.stem]
return x_data, y_data, labels
# Array of objects: [{"label": "Q1", "value": 100}, ...]
if all(isinstance(x, dict) for x in data):
# Try common key names
value_keys = ["value", "y", "data", "amount", "count"]
label_keys = ["label", "x", "name", "category"]
value_key = next((k for k in value_keys if k in data[0]), None)
label_key = next((k for k in label_keys if k in data[0]), None)
if value_key is None:
raise ValueError(
f"No numeric value key found in JSON objects. "
f"Available keys: {list(data[0].keys())}"
)
x_data = [str(item.get(label_key, i)) for i, item in enumerate(data)]
y_data = [float(item[value_key]) for item in data]
labels = [path.stem]
return x_data, y_data, labels
elif isinstance(data, dict):
# Object with explicit data and labels
# {"data": [1,2,3], "labels": ["a","b","c"]}
if "data" in data and "labels" in data:
x_data = [str(x) for x in data["labels"]]
y_data = [float(x) for x in data["data"]]
labels = [data.get("title", path.stem)]
return x_data, y_data, labels
# Object with single series: {"Q1": 100, "Q2": 200}
if all(isinstance(v, (int, float)) for v in data.values()):
x_data = list(data.keys())
y_data = [float(v) for v in data.values()]
labels = [path.stem]
return x_data, y_data, labels
raise ValueError(
f"Unsupported JSON structure in {path}. "
"Expected array of numbers, array of objects, "
"or object with 'data' and 'labels'"
)
[docs]
def load_csv(
path: str | Path,
x_col: str,
y_col: str,
delimiter: str | None = None,
) -> tuple[list[str], list[float], list[str]]:
"""Load data from a CSV file.
Convenience wrapper around load_data for CSV files.
Args:
path: Path to the CSV file.
x_col: Column name for x-axis.
y_col: Column name for y-axis.
delimiter: Field delimiter (comma by default, tab for .tsv).
Returns:
Tuple of (x_data, y_data, labels).
Example:
>>> x, y, labels = load_csv("sales.csv", x_col="Quarter", y_col="Revenue")
"""
return load_data(path, x_col=x_col, y_col=y_col, delimiter=delimiter)
[docs]
def load_json(path: str | Path) -> tuple[list[str], list[float], list[str]]:
"""Load data from a JSON file.
Convenience wrapper around load_data for JSON files.
Args:
path: Path to the JSON file.
Returns:
Tuple of (x_data, y_data, labels).
Example:
>>> x, y, labels = load_json("sales.json")
"""
return load_data(path)