Loading Data
This guide covers loading session data from various formats into slopit.
Quick Start
The simplest way to load data:
from slopit import load_session, load_sessions
# Single file
session = load_session("data/participant_001.json")
# Directory of files
sessions = load_sessions("data/")
slopit automatically detects the file format and uses the appropriate loader.
Supported Formats
Native slopit Format
The native format is a JSON file that directly matches the SlopitSession schema. Files are detected by the presence of schemaVersion and sessionId fields.
from slopit import load_session
session = load_session("data/session.json")
Expected file structure:
{
"schemaVersion": "1.0",
"sessionId": "abc123",
"participantId": "P001",
"platform": {"name": "jspsych", "version": "7.3.0"},
"environment": {...},
"timing": {...},
"trials": [...],
"globalEvents": {...}
}
JATOS Format
slopit can load data exported from JATOS (Just Another Tool for Online Studies). JATOS exports are detected by the presence of trial_type or "jsPsych" in the file content.
from slopit import load_session
# JATOS result file
session = load_session("jatos_results/study_result_123.txt")
JATOS exports can be:
- JSON array:
[{trial1}, {trial2}, ...] - Newline-delimited JSON: One trial object per line
The JATOS loader extracts participant and study IDs from common fields:
| Field | Source |
|---|---|
| participant_id | PROLIFIC_PID, workerId, participant_id, subject |
| study_id | STUDY_ID, study_id, experiment_id |
Behavioral data is extracted from the slopit field added by slopit adapters.
Loading Multiple Sessions
From a Directory
from slopit import load_sessions
# Load all files
sessions = load_sessions("data/")
# Filter by pattern
sessions = load_sessions("data/", pattern="participant_*.json")
From Multiple Directories
from slopit import load_sessions
all_sessions = []
for directory in ["data/batch1/", "data/batch2/", "data/batch3/"]:
sessions = load_sessions(directory)
all_sessions.extend(sessions)
print(f"Loaded {len(all_sessions)} sessions total")
Handling Errors
By default, load_sessions skips files that fail to parse. To handle errors explicitly:
from pathlib import Path
from slopit import load_session
sessions = []
errors = []
for path in Path("data/").glob("*.json"):
try:
session = load_session(path)
sessions.append(session)
except ValueError as e:
errors.append((path, str(e)))
except FileNotFoundError as e:
errors.append((path, str(e)))
print(f"Loaded {len(sessions)} sessions")
print(f"Errors: {len(errors)}")
Using Specific Loaders
For more control, use loaders directly:
from pathlib import Path
from slopit.io import NativeLoader, JATOSLoader
# Native format
native_loader = NativeLoader()
session = native_loader.load(Path("data/session.json"))
# JATOS format
jatos_loader = JATOSLoader()
session = jatos_loader.load(Path("jatos_results/result.txt"))
Checking Format Support
from pathlib import Path
from slopit.io import NativeLoader, JATOSLoader
path = Path("data/unknown.json")
if NativeLoader.can_load(path):
loader = NativeLoader()
elif JATOSLoader.can_load(path):
loader = JATOSLoader()
else:
raise ValueError(f"Unknown format: {path}")
session = loader.load(path)
Working with Session Data
Accessing Session Metadata
session = load_session("data/session.json")
print(f"Session ID: {session.session_id}")
print(f"Participant: {session.participant_id}")
print(f"Study: {session.study_id}")
print(f"Platform: {session.platform.name} {session.platform.version}")
print(f"Duration: {session.timing.duration / 1000:.1f} seconds")
Accessing Environment Info
env = session.environment
print(f"User Agent: {env.user_agent}")
print(f"Screen: {env.screen_resolution[0]}x{env.screen_resolution[1]}")
print(f"Viewport: {env.viewport_size[0]}x{env.viewport_size[1]}")
print(f"Timezone: {env.timezone}")
print(f"Language: {env.language}")
print(f"Touch: {env.touch_capable}")
Iterating Over Trials
for trial in session.trials:
print(f"\nTrial {trial.trial_index}: {trial.trial_type}")
# Response
if trial.response:
print(f" Response type: {trial.response.type}")
if trial.response.character_count:
print(f" Characters: {trial.response.character_count}")
# Timing
print(f" RT: {trial.rt}ms" if trial.rt else " RT: N/A")
# Behavioral data
if trial.behavioral:
ks = len(trial.behavioral.keystrokes)
focus = len(trial.behavioral.focus)
paste = len(trial.behavioral.paste)
print(f" Events: {ks} keystrokes, {focus} focus, {paste} paste")
Accessing Behavioral Data
for trial in session.trials:
if trial.behavioral is None:
continue
# Keystroke analysis
keydowns = [k for k in trial.behavioral.keystrokes if k.event == "keydown"]
print(f"Trial {trial.trial_index}: {len(keydowns)} keydowns")
# Calculate IKI
if len(keydowns) >= 2:
times = [k.time for k in keydowns]
ikis = [times[i+1] - times[i] for i in range(len(times)-1)]
mean_iki = sum(ikis) / len(ikis)
print(f" Mean IKI: {mean_iki:.1f}ms")
# Focus events
blurs = [f for f in trial.behavioral.focus if f.event == "blur"]
print(f" Blur events: {len(blurs)}")
# Paste events
for paste in trial.behavioral.paste:
print(f" Paste at {paste.time}ms: {paste.text_length} chars")
Data Validation
Validating Raw Data
from slopit.schemas import SlopitSession
raw_data = {
"schemaVersion": "1.0",
"sessionId": "test",
# ... other fields
}
try:
session = SlopitSession.model_validate(raw_data)
print("Data is valid")
except Exception as e:
print(f"Validation error: {e}")
Using the CLI
Validate files from the command line:
# Validate single file
slopit validate data/session.json
# Validate directory
slopit validate data/
Output:
✓ data/session_001.json: 1 session(s) valid
✓ data/session_002.json: 1 session(s) valid
✗ data/session_003.json: Invalid schema version
Summary: 2 valid, 1 invalid
Filtering Sessions
By Participant
sessions = load_sessions("data/")
# Filter to specific participants
participant_ids = {"P001", "P002", "P003"}
filtered = [s for s in sessions if s.participant_id in participant_ids]
By Trial Count
# Sessions with at least 5 trials
filtered = [s for s in sessions if len(s.trials) >= 5]
By Behavioral Data Availability
# Sessions with keystroke data
filtered = [
s for s in sessions
if any(t.behavioral and t.behavioral.keystrokes for t in s.trials)
]
By Date Range
from datetime import datetime
start = datetime(2024, 1, 1).timestamp() * 1000 # Unix ms
end = datetime(2024, 6, 30).timestamp() * 1000
filtered = [
s for s in sessions
if start <= s.timing.start_time <= end
]
Performance Tips
Memory Management
For large datasets, process sessions in batches:
from pathlib import Path
from slopit import load_session
from slopit.pipeline import AnalysisPipeline
pipeline = AnalysisPipeline([...])
# Process files one at a time
for path in Path("data/").glob("*.json"):
session = load_session(path)
result = pipeline.analyze([session])
# Process result...
# Session is garbage collected after each iteration
Parallel Loading
For I/O-bound loading, use concurrent processing:
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from slopit import load_session
paths = list(Path("data/").glob("*.json"))
with ThreadPoolExecutor(max_workers=4) as executor:
sessions = list(executor.map(load_session, paths))
print(f"Loaded {len(sessions)} sessions")
Troubleshooting
"Unable to determine file format"
The file does not match any known format. Check:
- Is it valid JSON?
- Does it have
schemaVersion(native) ortrial_type(JATOS)? - Is the file encoding UTF-8?
"File not found"
Ensure the path is correct. Use absolute paths or paths relative to the current working directory.
"Validation error"
The file structure does not match the schema. Use model_validate with strict=False for more lenient parsing, or check the file contents against the schema documentation.